aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-07-07 05:13:25 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-07-07 05:13:25 +0000
commit31557ec760365acee4625de6e25b57d5d382b3bf (patch)
tree10d072d3cc9f927a72050c5420635bde93dfa25a
parentfe9f7930146e6ad2645522d5c90d218a616adc3f (diff)
parent6b9ff13286194c7a2b38d624eeee38dc35987dc8 (diff)
downloadOpenCL-CTS-android14-mainline-uwb-release.tar.gz
Change-Id: Ic2583ca37fe8118671784cb9f028d161e35bbf09
-rw-r--r--.appveyor.yml54
-rw-r--r--.github/workflows/presubmit.yml35
-rw-r--r--Android.bp394
-rw-r--r--CMakeLists.txt70
l---------LICENSE1
-rw-r--r--METADATA22
-rw-r--r--MODULE_LICENSE_APACHE20
-rw-r--r--README.md117
-rwxr-xr-xcheck-format.sh4
-rw-r--r--dependencies/Android.bp1
-rw-r--r--dependencies/ocl-headers/CL/cl.h7
-rw-r--r--dependencies/ocl-headers/CL/cl_ext.h1356
-rw-r--r--dependencies/ocl-headers/CL/cl_gl.h25
-rw-r--r--dependencies/ocl-headers/CL/cl_gl_ext.h18
-rw-r--r--dependencies/ocl-headers/CL/cl_platform.h38
-rw-r--r--dependencies/ocl-stubs/apis_generator.py2
-rw-r--r--dependencies/ocl-stubs/stubs.cpp2
-rwxr-xr-xpresubmit.sh65
-rw-r--r--scripts/android_bp_head39
-rw-r--r--scripts/android_bp_tail18
-rw-r--r--scripts/generate_test_files.py16
-rw-r--r--test_common/CMakeLists.txt2
-rw-r--r--test_common/gl/helpers.cpp1
-rw-r--r--test_common/gl/setup_win32.cpp3
-rw-r--r--test_common/gl/setup_x11.cpp20
-rw-r--r--test_common/gles/helpers.cpp6
-rw-r--r--test_common/gles/helpers.h5
-rw-r--r--test_common/harness/ThreadPool.cpp47
-rw-r--r--test_common/harness/alloc.h4
-rw-r--r--test_common/harness/compat.h11
-rw-r--r--test_common/harness/conversions.cpp9
-rw-r--r--test_common/harness/deviceInfo.cpp34
-rw-r--r--test_common/harness/deviceInfo.h5
-rw-r--r--test_common/harness/errorHelpers.cpp37
-rw-r--r--test_common/harness/errorHelpers.h24
-rw-r--r--test_common/harness/fpcontrol.h16
-rw-r--r--test_common/harness/imageHelpers.cpp204
-rw-r--r--test_common/harness/imageHelpers.h12
-rw-r--r--test_common/harness/integer_ops_test_info.h92
-rw-r--r--test_common/harness/kernelHelpers.cpp40
-rw-r--r--test_common/harness/mt19937.cpp2
-rw-r--r--test_common/harness/mt19937.h36
-rw-r--r--test_common/harness/os_helpers.cpp6
-rw-r--r--test_common/harness/parseParameters.cpp32
-rw-r--r--test_common/harness/parseParameters.h2
-rw-r--r--test_common/harness/propertyHelpers.cpp10
-rw-r--r--test_common/harness/rounding_mode.cpp10
-rw-r--r--test_common/harness/rounding_mode.h2
-rw-r--r--test_common/harness/testHarness.cpp178
-rw-r--r--test_common/harness/threadTesting.cpp98
-rw-r--r--test_common/harness/threadTesting.h5
-rw-r--r--test_common/harness/typeWrappers.h246
-rw-r--r--test_conformance/CMakeLists.txt1
-rw-r--r--test_conformance/SVM/test_byte_granularity.cpp1
-rw-r--r--test_conformance/SVM/test_cross_buffer_pointers.cpp3
-rw-r--r--test_conformance/SVM/test_migrate.cpp3
-rw-r--r--test_conformance/SVM/test_shared_address_space_coarse_grain.cpp8
-rw-r--r--test_conformance/SVM/test_shared_address_space_fine_grain.cpp2
-rw-r--r--test_conformance/SVM/test_shared_sub_buffers.cpp3
-rw-r--r--test_conformance/allocations/allocation_fill.cpp6
-rw-r--r--test_conformance/allocations/allocation_functions.cpp4
-rw-r--r--test_conformance/allocations/main.cpp6
-rw-r--r--test_conformance/api/negative_platform.cpp13
-rw-r--r--test_conformance/api/test_api_min_max.cpp1769
-rw-r--r--test_conformance/api/test_context_destructor_callback.cpp7
-rw-r--r--test_conformance/api/test_kernel_arg_info.cpp45
-rw-r--r--test_conformance/api/test_kernel_attributes.cpp6
-rw-r--r--test_conformance/api/test_mem_object_info.cpp9
-rw-r--r--test_conformance/api/test_mem_objects.cpp7
-rw-r--r--test_conformance/api/test_null_buffer_arg.cpp1
-rw-r--r--test_conformance/api/test_queries.cpp147
-rw-r--r--test_conformance/api/test_sub_group_dispatch.cpp4
-rw-r--r--test_conformance/atomics/main.cpp7
-rw-r--r--test_conformance/atomics/procs.h49
-rw-r--r--test_conformance/atomics/testBase.h5
-rw-r--r--test_conformance/atomics/test_atomics.cpp1266
-rw-r--r--test_conformance/atomics/test_indexed_cases.cpp500
-rw-r--r--test_conformance/basic/test_arraycopy.cpp5
-rw-r--r--test_conformance/basic/test_async_copy2D.cpp238
-rw-r--r--test_conformance/basic/test_async_copy3D.cpp331
-rw-r--r--test_conformance/basic/test_enqueue_map.cpp2
-rw-r--r--test_conformance/basic/test_enqueued_local_size.cpp130
-rw-r--r--test_conformance/basic/test_fpmath_float.cpp2
-rw-r--r--test_conformance/basic/test_hiloeo.cpp2
-rw-r--r--test_conformance/basic/test_hostptr.cpp2
-rw-r--r--test_conformance/basic/test_multireadimageonefmt.cpp24
-rw-r--r--test_conformance/basic/test_preprocessors.cpp2
-rw-r--r--test_conformance/basic/test_progvar.cpp1740
-rw-r--r--test_conformance/basic/test_queue_priority.cpp6
-rw-r--r--test_conformance/basic/test_readimage3d.cpp2
-rw-r--r--test_conformance/basic/test_simple_image_pitch.cpp4
-rw-r--r--test_conformance/basic/test_sizeof.cpp47
-rw-r--r--test_conformance/basic/test_vector_swizzle.cpp58
-rw-r--r--test_conformance/basic/test_writeimage_fp32.cpp7
-rw-r--r--test_conformance/basic/test_writeimage_int16.cpp7
-rw-r--r--test_conformance/buffers/test_buffer_fill.cpp4
-rw-r--r--test_conformance/buffers/test_buffer_migrate.cpp2
-rw-r--r--test_conformance/buffers/test_buffer_read.cpp4
-rw-r--r--test_conformance/buffers/test_image_migrate.cpp1
-rw-r--r--test_conformance/buffers/test_sub_buffers.cpp25
-rw-r--r--test_conformance/c11_atomics/common.h2590
-rw-r--r--test_conformance/c11_atomics/test_atomics.cpp4897
-rw-r--r--test_conformance/commonfns/test_sign.cpp13
-rw-r--r--test_conformance/commonfns/test_step.cpp27
-rw-r--r--test_conformance/compiler/test_compiler_defines_for_extensions.cpp37
-rw-r--r--test_conformance/compiler/test_feature_macro.cpp98
-rw-r--r--test_conformance/computeinfo/CMakeLists.txt1
-rw-r--r--test_conformance/computeinfo/device_uuid.cpp2
-rw-r--r--test_conformance/computeinfo/main.cpp21
-rw-r--r--test_conformance/computeinfo/pci_bus_info.cpp53
-rw-r--r--test_conformance/contractions/contractions.cpp3
-rw-r--r--test_conformance/conversions/basic_test_conversions.cpp3
-rw-r--r--test_conformance/conversions/fplib.cpp4
-rw-r--r--test_conformance/conversions/test_conversions.cpp131
-rw-r--r--test_conformance/device_execution/enqueue_ndrange.cpp3
-rw-r--r--test_conformance/device_execution/host_queue_order.cpp3
-rw-r--r--test_conformance/events/action_classes.cpp529
-rw-r--r--test_conformance/events/action_classes.h430
-rw-r--r--test_conformance/events/main.cpp62
-rw-r--r--test_conformance/events/procs.h127
-rw-r--r--test_conformance/events/testBase.h5
-rw-r--r--test_conformance/events/test_callbacks.cpp397
-rw-r--r--test_conformance/events/test_event_dependencies.cpp542
-rw-r--r--test_conformance/events/test_events.cpp734
-rw-r--r--test_conformance/events/test_userevents.cpp426
-rw-r--r--test_conformance/events/test_userevents_multithreaded.cpp38
-rw-r--r--test_conformance/events/test_waitlists.cpp448
-rw-r--r--test_conformance/extensions/CMakeLists.txt1
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt8
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp588
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h177
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/main.cpp35
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/procs.h35
-rw-r--r--test_conformance/gl/common.h27
-rw-r--r--test_conformance/gl/test_buffers.cpp415
-rw-r--r--test_conformance/gl/test_fence_sync.cpp624
-rw-r--r--test_conformance/gl/test_image_methods.cpp2
-rw-r--r--test_conformance/gl/test_images_getinfo_common.cpp17
-rw-r--r--test_conformance/gl/test_images_read_common.cpp14
-rw-r--r--test_conformance/gl/test_images_write_common.cpp9
-rw-r--r--test_conformance/gles/CMakeLists.txt8
-rw-r--r--test_conformance/gles/main.cpp2
-rw-r--r--test_conformance/gles/setup_egl.cpp5
-rw-r--r--test_conformance/gles/test_buffers.cpp11
-rw-r--r--test_conformance/gles/test_fence_sync.cpp10
-rw-r--r--test_conformance/gles/test_images_2D.cpp2
-rw-r--r--test_conformance/gles/test_renderbuffer.cpp2
-rw-r--r--test_conformance/half/Test_roundTrip.cpp7
-rw-r--r--test_conformance/half/Test_vLoadHalf.cpp21
-rw-r--r--test_conformance/half/Test_vStoreHalf.cpp54
-rw-r--r--test_conformance/half/main.cpp7
-rw-r--r--test_conformance/images/clCopyImage/test_copy_1D.cpp1
-rw-r--r--test_conformance/images/clCopyImage/test_copy_1D_array.cpp1
-rw-r--r--test_conformance/images/clCopyImage/test_copy_2D.cpp1
-rw-r--r--test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp1
-rw-r--r--test_conformance/images/clCopyImage/test_copy_2D_3D.cpp1
-rw-r--r--test_conformance/images/clCopyImage/test_copy_2D_array.cpp1
-rw-r--r--test_conformance/images/clCopyImage/test_copy_3D.cpp1
-rw-r--r--test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp1
-rw-r--r--test_conformance/images/clCopyImage/test_copy_generic.cpp28
-rw-r--r--test_conformance/images/clFillImage/test_fill_1D.cpp1
-rw-r--r--test_conformance/images/clFillImage/test_fill_1D_array.cpp1
-rw-r--r--test_conformance/images/clFillImage/test_fill_2D.cpp1
-rw-r--r--test_conformance/images/clFillImage/test_fill_2D_array.cpp1
-rw-r--r--test_conformance/images/clFillImage/test_fill_3D.cpp1
-rw-r--r--test_conformance/images/clFillImage/test_fill_generic.cpp28
-rw-r--r--test_conformance/images/clGetInfo/test_1D.cpp1
-rw-r--r--test_conformance/images/clGetInfo/test_1D_2D_array.cpp2
-rw-r--r--test_conformance/images/clGetInfo/test_2D.cpp1
-rw-r--r--test_conformance/images/clGetInfo/test_3D.cpp1
-rw-r--r--test_conformance/images/clReadWriteImage/test_read_1D.cpp21
-rw-r--r--test_conformance/images/clReadWriteImage/test_read_1D_array.cpp2
-rw-r--r--test_conformance/images/clReadWriteImage/test_read_2D.cpp2
-rw-r--r--test_conformance/images/clReadWriteImage/test_read_2D_array.cpp4
-rw-r--r--test_conformance/images/clReadWriteImage/test_read_3D.cpp2
-rw-r--r--test_conformance/images/kernel_image_methods/test_1D.cpp1
-rw-r--r--test_conformance/images/kernel_image_methods/test_1D_array.cpp1
-rw-r--r--test_conformance/images/kernel_image_methods/test_2D.cpp1
-rw-r--r--test_conformance/images/kernel_read_write/CMakeLists.txt6
-rw-r--r--test_conformance/images/kernel_read_write/main.cpp143
-rw-r--r--test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp124
-rw-r--r--test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp1013
-rw-r--r--test_conformance/images/kernel_read_write/test_cl_ext_image_requirements_info.cpp482
-rw-r--r--test_conformance/images/kernel_read_write/test_common.cpp515
-rw-r--r--test_conformance/images/kernel_read_write/test_common.h144
-rw-r--r--test_conformance/images/kernel_read_write/test_iterations.cpp156
-rw-r--r--test_conformance/images/kernel_read_write/test_loops.cpp2
-rw-r--r--test_conformance/images/kernel_read_write/test_read_1D.cpp124
-rw-r--r--test_conformance/images/kernel_read_write/test_read_1D_array.cpp133
-rw-r--r--test_conformance/images/kernel_read_write/test_read_2D_array.cpp146
-rw-r--r--test_conformance/images/kernel_read_write/test_read_3D.cpp59
-rw-r--r--test_conformance/images/kernel_read_write/test_write_1D.cpp112
-rw-r--r--test_conformance/images/kernel_read_write/test_write_1D_array.cpp115
-rw-r--r--test_conformance/images/kernel_read_write/test_write_2D_array.cpp134
-rw-r--r--test_conformance/images/kernel_read_write/test_write_3D.cpp133
-rw-r--r--test_conformance/images/kernel_read_write/test_write_image.cpp122
-rw-r--r--test_conformance/images/samplerlessReads/test_iterations.cpp1
-rw-r--r--test_conformance/images/samplerlessReads/test_read_1D.cpp1
-rw-r--r--test_conformance/images/samplerlessReads/test_read_1D_array.cpp1
-rw-r--r--test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp1
-rw-r--r--test_conformance/images/samplerlessReads/test_read_2D_array.cpp1
-rw-r--r--test_conformance/images/samplerlessReads/test_read_3D.cpp1
-rw-r--r--test_conformance/integer_ops/CMakeLists.txt1
-rw-r--r--test_conformance/integer_ops/main.cpp236
-rw-r--r--test_conformance/integer_ops/procs.h2
-rw-r--r--test_conformance/integer_ops/test_add_sat.cpp31
-rw-r--r--test_conformance/integer_ops/test_integer_dot_product.cpp442
-rw-r--r--test_conformance/integer_ops/test_integers.cpp60
-rw-r--r--test_conformance/integer_ops/test_sub_sat.cpp32
-rw-r--r--test_conformance/integer_ops/test_unary_ops.cpp2
-rw-r--r--test_conformance/math_brute_force/CMakeLists.txt11
-rw-r--r--test_conformance/math_brute_force/binary_double.cpp417
-rw-r--r--test_conformance/math_brute_force/binary_float.cpp425
-rw-r--r--test_conformance/math_brute_force/binary_i_double.cpp420
-rw-r--r--test_conformance/math_brute_force/binary_i_float.cpp427
-rw-r--r--test_conformance/math_brute_force/binary_operator_double.cpp412
-rw-r--r--test_conformance/math_brute_force/binary_operator_float.cpp425
-rw-r--r--test_conformance/math_brute_force/binary_two_results_i_double.cpp76
-rw-r--r--test_conformance/math_brute_force/binary_two_results_i_float.cpp72
-rw-r--r--test_conformance/math_brute_force/common.cpp170
-rw-r--r--test_conformance/math_brute_force/common.h68
-rw-r--r--test_conformance/math_brute_force/function_list.cpp1
-rw-r--r--test_conformance/math_brute_force/function_list.h16
-rw-r--r--test_conformance/math_brute_force/i_unary_double.cpp41
-rw-r--r--test_conformance/math_brute_force/i_unary_float.cpp41
-rw-r--r--test_conformance/math_brute_force/macro_binary_double.cpp394
-rw-r--r--test_conformance/math_brute_force/macro_binary_float.cpp389
-rw-r--r--test_conformance/math_brute_force/macro_unary_double.cpp353
-rw-r--r--test_conformance/math_brute_force/macro_unary_float.cpp349
-rw-r--r--test_conformance/math_brute_force/mad_double.cpp128
-rw-r--r--test_conformance/math_brute_force/mad_float.cpp126
-rw-r--r--test_conformance/math_brute_force/main.cpp43
-rw-r--r--test_conformance/math_brute_force/reference_math.cpp33
-rw-r--r--test_conformance/math_brute_force/ternary_double.cpp140
-rw-r--r--test_conformance/math_brute_force/ternary_float.cpp137
-rw-r--r--test_conformance/math_brute_force/unary_double.cpp371
-rw-r--r--test_conformance/math_brute_force/unary_float.cpp419
-rw-r--r--test_conformance/math_brute_force/unary_two_results_double.cpp41
-rw-r--r--test_conformance/math_brute_force/unary_two_results_float.cpp43
-rw-r--r--test_conformance/math_brute_force/unary_two_results_i_double.cpp47
-rw-r--r--test_conformance/math_brute_force/unary_two_results_i_float.cpp47
-rw-r--r--test_conformance/math_brute_force/unary_u_double.cpp53
-rw-r--r--test_conformance/math_brute_force/unary_u_float.cpp41
-rw-r--r--test_conformance/math_brute_force/utility.h33
-rw-r--r--test_conformance/multiple_device_context/test_multiple_devices.cpp5
-rw-r--r--test_conformance/pipes/test_pipe_info.cpp40
-rw-r--r--test_conformance/pipes/test_pipe_limits.cpp6
-rw-r--r--test_conformance/pipes/test_pipe_read_write.cpp51
-rw-r--r--test_conformance/pipes/test_pipe_subgroups.cpp5
-rw-r--r--test_conformance/printf/test_printf.cpp142
-rw-r--r--test_conformance/printf/util_printf.cpp2
-rw-r--r--test_conformance/profiling/execute.cpp16
-rw-r--r--test_conformance/profiling/writeImage.cpp4
-rwxr-xr-xtest_conformance/run_conformance.py585
-rw-r--r--test_conformance/select/test_select.cpp13
-rw-r--r--test_conformance/spir/main.cpp73
-rw-r--r--test_conformance/spir/run_services.cpp127
-rw-r--r--test_conformance/spir/run_services.h59
-rw-r--r--test_conformance/spir/sampler_enumeration.zipbin63216 -> 67926 bytes
-rw-r--r--test_conformance/spirv_new/main.cpp1
-rw-r--r--test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp437
-rw-r--r--test_conformance/spirv_new/test_op_fmath.cpp3
-rw-r--r--test_conformance/spirv_new/test_op_function.cpp1
-rw-r--r--test_conformance/spirv_new/test_op_negate.cpp1
-rw-r--r--test_conformance/spirv_new/test_op_opaque.cpp1
-rw-r--r--test_conformance/spirv_new/test_op_vector_times_scalar.cpp2
-rw-r--r--test_conformance/subgroups/CMakeLists.txt1
-rw-r--r--test_conformance/subgroups/main.cpp21
-rw-r--r--test_conformance/subgroups/procs.h4
-rw-r--r--test_conformance/subgroups/subgroup_common_kernels.cpp104
-rw-r--r--test_conformance/subgroups/subgroup_common_kernels.h12
-rw-r--r--test_conformance/subgroups/subgroup_common_templates.h327
-rw-r--r--test_conformance/subgroups/subhelpers.h580
-rw-r--r--test_conformance/subgroups/test_barrier.cpp27
-rw-r--r--test_conformance/subgroups/test_ifp.cpp46
-rw-r--r--test_conformance/subgroups/test_queries.cpp8
-rw-r--r--test_conformance/subgroups/test_subgroup.cpp77
-rw-r--r--test_conformance/subgroups/test_subgroup_ballot.cpp722
-rw-r--r--test_conformance/subgroups/test_subgroup_clustered_reduce.cpp240
-rw-r--r--test_conformance/subgroups/test_subgroup_extended_types.cpp60
-rw-r--r--test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp421
-rw-r--r--test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp164
-rw-r--r--test_conformance/subgroups/test_subgroup_rotate.cpp109
-rw-r--r--test_conformance/subgroups/test_subgroup_shuffle.cpp39
-rw-r--r--test_conformance/subgroups/test_subgroup_shuffle_relative.cpp40
-rw-r--r--test_conformance/subgroups/test_workitem.cpp20
-rw-r--r--test_conformance/submission_details_template.txt6
-rw-r--r--test_conformance/vectors/test_step.cpp2
-rw-r--r--test_conformance/vulkan/CMakeLists.txt50
-rw-r--r--test_conformance/vulkan/main.cpp346
-rw-r--r--test_conformance/vulkan/procs.h38
-rw-r--r--test_conformance/vulkan/shaders/buffer.comp28
-rw-r--r--test_conformance/vulkan/shaders/buffer.spvbin0 -> 2168 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D.comp31
-rw-r--r--test_conformance/vulkan/shaders/image2D_r16i.spvbin0 -> 3264 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_r16ui.spvbin0 -> 3264 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_r32f.spvbin0 -> 3268 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_r32i.spvbin0 -> 3256 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_r32ui.spvbin0 -> 3256 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_r8i.spvbin0 -> 3264 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_r8ui.spvbin0 -> 3264 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_rg16i.spvbin0 -> 3264 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_rg16ui.spvbin0 -> 3264 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_rg32f.spvbin0 -> 3276 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_rg32i.spvbin0 -> 3264 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_rg32ui.spvbin0 -> 3264 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_rg8i.spvbin0 -> 3264 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_rg8ui.spvbin0 -> 3264 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_rgba16i.spvbin0 -> 3256 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_rgba16ui.spvbin0 -> 3256 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_rgba32f.spvbin0 -> 3268 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_rgba32i.spvbin0 -> 3256 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_rgba32ui.spvbin0 -> 3256 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_rgba8i.spvbin0 -> 3256 bytes
-rw-r--r--test_conformance/vulkan/shaders/image2D_rgba8ui.spvbin0 -> 3256 bytes
-rw-r--r--test_conformance/vulkan/test_vulkan_api_consistency.cpp568
-rw-r--r--test_conformance/vulkan/test_vulkan_interop_buffer.cpp1786
-rw-r--r--test_conformance/vulkan/test_vulkan_interop_image.cpp1596
-rw-r--r--test_conformance/vulkan/test_vulkan_platform_device_info.cpp146
-rw-r--r--test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp853
-rw-r--r--test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp131
-rw-r--r--test_conformance/vulkan/vulkan_interop_common/vulkan_api_list.hpp195
-rw-r--r--test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.cpp22
-rw-r--r--test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.hpp50
-rw-r--r--test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.cpp424
-rw-r--r--test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp386
-rw-r--r--test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp692
-rw-r--r--test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp70
-rw-r--r--test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp2072
-rw-r--r--test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp580
-rw-r--r--test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper_types.hpp463
-rw-r--r--test_conformance/workgroups/CMakeLists.txt11
-rw-r--r--test_conformance/workgroups/main.cpp33
-rw-r--r--test_conformance/workgroups/procs.h18
-rw-r--r--test_conformance/workgroups/test_wg_broadcast.cpp6
-rw-r--r--test_conformance/workgroups/test_wg_reduce.cpp596
-rw-r--r--test_conformance/workgroups/test_wg_reduce_max.cpp632
-rw-r--r--test_conformance/workgroups/test_wg_reduce_min.cpp632
-rw-r--r--test_conformance/workgroups/test_wg_scan_exclusive_add.cpp604
-rw-r--r--test_conformance/workgroups/test_wg_scan_exclusive_max.cpp631
-rw-r--r--test_conformance/workgroups/test_wg_scan_exclusive_min.cpp632
-rw-r--r--test_conformance/workgroups/test_wg_scan_inclusive_add.cpp593
-rw-r--r--test_conformance/workgroups/test_wg_scan_inclusive_max.cpp595
-rw-r--r--test_conformance/workgroups/test_wg_scan_inclusive_min.cpp595
-rw-r--r--test_conformance/workgroups/test_wg_scan_reduce.cpp456
-rw-r--r--test_conformance/workgroups/test_wg_suggested_local_work_size.cpp611
346 files changed, 35047 insertions, 21226 deletions
diff --git a/.appveyor.yml b/.appveyor.yml
deleted file mode 100644
index ea010778..00000000
--- a/.appveyor.yml
+++ /dev/null
@@ -1,54 +0,0 @@
-os:
- - Visual Studio 2017
-
-shallow_clone: true
-
-platform:
- - Win32
- - x64
-
-configuration:
- - Release
-
-environment:
- matrix:
- - SETARCH: i686
- - SETARCH: x86_64
-
-matrix:
- exclude:
- - platform: Win32
- SETARCH: x86_64
- - platform: x64
- SETARCH: i686
-
-before_build:
- # Setup environment:
- - ps: $env:TOP = $env:APPVEYOR_BUILD_FOLDER
- - ps: $env:TOP
- - echo %TOP%
- # Get the OpenCL Headers:
- - git clone --depth=1 https://github.com/KhronosGroup/OpenCL-Headers OpenCL-Headers
- # Get and build the OpenCL ICD Loader:
- - git clone --depth=1 https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
- - ps: cd OpenCL-ICD-Loader
- - ps: mkdir build
- - ps: cd build
- - cmake -A%PLATFORM% -DENABLE_OPENCL30_PROVISIONAL=1 -DOPENCL_ICD_LOADER_HEADERS_DIR=%TOP%/OpenCL-Headers/ ..
- - cmake --build . --config %CONFIGURATION%
- - ps: cd $env:TOP
- # Get the libclcxx standard library:
- - git clone --depth=1 https://github.com/KhronosGroup/libclcxx.git libclcxx
- # Generate the CTS solution file:
- - cmake -DCL_INCLUDE_DIR=%TOP%/OpenCL-Headers
- -DCL_LIB_DIR=%TOP%/OpenCL-ICD-Loader/build
- -DCL_LIBCLCXX_DIR=%TOP%/libclcxx
- -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin
- -DOPENCL_LIBRARIES="OpenCL"
- -H. -Bbuild_win -A%PLATFORM%
- -DD3D10_IS_SUPPORTED=ON -DD3D11_IS_SUPPORTED=ON -DARCH=%SETARCH%
-
-build:
- project: build_win\CLConform.sln
- parallel: true
- verbosity: normal
diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml
index 0c1778eb..1dfdb963 100644
--- a/.github/workflows/presubmit.yml
+++ b/.github/workflows/presubmit.yml
@@ -3,37 +3,62 @@ on: [push, pull_request]
jobs:
build:
- name: Build ${{ matrix.os }} ${{ matrix.name }}
+ name: Build ${{ matrix.os }} ${{ matrix.arch }}${{ matrix.extra }}
runs-on: ${{ matrix.os }}
env:
JOB_ARCHITECTURE: ${{ matrix.arch }}
JOB_ENABLE_GL: ${{ matrix.gl }}
+ JOB_ENABLE_DEBUG: ${{ matrix.debug }}
strategy:
+ fail-fast: false
matrix:
mainmatrix: [true]
- os: [ubuntu-20.04, macos-11.0]
+ os: [ubuntu-20.04, macos-latest, windows-latest]
include:
- os: ubuntu-20.04
mainmatrix: true
gl: 1
+ extra: " gl"
- os: ubuntu-20.04
mainmatrix: false
- name: Arm
arch: arm
- os: ubuntu-20.04
mainmatrix: false
- name: AArch64
arch: aarch64
+ debug: 1
+ extra: " debug"
steps:
- uses: actions/checkout@v2
+ - name: Setup Ninja
+ uses: seanmiddleditch/gha-setup-ninja@master
+ - name: Setup OpenGL build dependencies
+ if: ${{ matrix.gl }}
+ run: |
+ sudo apt-get update
+ sudo apt-get -y install libglu1-mesa-dev freeglut3-dev mesa-common-dev libglew-dev
+ - name: Setup MSVC with Ninja
+ uses: ilammy/msvc-dev-cmd@v1
+ - name: Setup ccache
+ uses: hendrikmuhs/ccache-action@v1.2
+ with:
+ variant: sccache
+ key: ${{ matrix.os }}-${{ matrix.arch }}
+ - name: Fetch OpenCL Headers
+ shell: bash
+ run: |
+ git clone https://github.com/KhronosGroup/OpenCL-Headers.git
+ cd OpenCL-Headers
+ ln -s CL OpenCL # For OSX builds
+ cd ..
- name: Build
+ shell: bash
run: ./presubmit.sh
formatcheck:
name: Check code format
runs-on: ubuntu-20.04
steps:
- name: Install packages
- run: sudo apt install -y clang-format
+ run: sudo apt install -y clang-format clang-format-9
- uses: actions/checkout@v2
with:
fetch-depth: 0
diff --git a/Android.bp b/Android.bp
index 37913dd9..2c9a3b6c 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1,24 +1,7 @@
-// *** THIS PACKAGE HAS SPECIAL LICENSING CONDITIONS. PLEASE
-// CONSULT THE OWNERS AND opensource-licensing@google.com BEFORE
-// DEPENDING ON IT IN YOUR PROJECT. ***
package {
default_applicable_licenses: ["external_OpenCL-CTS_license"],
}
-// Added automatically by a large-scale-change that took the approach of
-// 'apply every license found to every target'. While this makes sure we respect
-// every license restriction, it may not be entirely correct.
-//
-// e.g. GPL in an MIT project might only apply to the contrib/ directory.
-//
-// Please consider splitting the single license below into multiple licenses,
-// taking care not to lose any license_kind information, and overriding the
-// default license using the 'licenses: [...]' property on targets as needed.
-//
-// For unused files, consider creating a 'fileGroup' with "//visibility:private"
-// to attach the license to, and including a comment whether the files may be
-// used in the current project.
-// See: http://go/android-license-faq
license {
name: "external_OpenCL-CTS_license",
visibility: [":__subpackages__"],
@@ -27,9 +10,6 @@ license {
"SPDX-license-identifier-BSD",
"SPDX-license-identifier-MIT",
"SPDX-license-identifier-Unlicense",
- "legacy_by_exception_only", // by exception only
- "legacy_proprietary", // by exception only
- "legacy_unencumbered",
],
license_text: [
"LICENSE.txt",
@@ -40,8 +20,8 @@ cc_library_headers {
name: "ocl-harness-headers",
export_include_dirs: [
"test_common/harness",
- "test_common"
- ]
+ "test_common",
+ ],
}
cc_defaults {
@@ -56,54 +36,36 @@ cc_defaults {
"-DCL_EXPERIMENTAL",
"-DCL_TARGET_OPENCL_VERSION=300",
"-Wno-#warnings",
- "-Wno-absolute-value",
- "-Wno-asm-operand-widths",
"-Wno-c++11-narrowing",
- "-Wno-dangling-else",
"-Wno-date-time",
"-Wno-deprecated-declarations",
"-Wno-format",
- "-Wno-ignored-pragmas",
"-Wno-ignored-qualifiers",
"-Wno-implicit-fallthrough",
- "-Wno-logical-op-parentheses",
- "-Wno-macro-redefined",
"-Wno-missing-braces",
- "-Wno-missing-declarations",
"-Wno-missing-field-initializers",
"-Wno-non-virtual-dtor",
"-Wno-overloaded-virtual",
- "-Wno-parentheses",
- "-Wno-parentheses-equality",
"-Wno-reorder-ctor",
- "-Wno-return-stack-address",
- "-Wno-shift-negative-value",
"-Wno-sometimes-uninitialized",
- "-Wno-switch",
- "-Wno-unknown-pragmas",
- "-Wno-unneeded-internal-declaration",
- "-Wno-unused-function",
- "-Wno-unused-label",
"-Wno-unused-parameter",
- "-Wno-unused-variable",
- "-Wno-writable-strings",
"-fexceptions",
],
static_libs: [
- "ocl-stubs"
+ "ocl-stubs",
],
}
cc_library {
name: "ocl-harness",
- srcs: [ "test_common/harness/*.cpp", ],
- defaults: [ "ocl-harness-defaults" ],
+ srcs: ["test_common/harness/*.cpp"],
+ defaults: ["ocl-harness-defaults"],
}
cc_defaults {
name: "ocl-test-defaults",
- defaults: [ "ocl-harness-defaults" ],
- static_libs: [ "ocl-harness" ],
+ defaults: ["ocl-harness-defaults"],
+ static_libs: ["ocl-harness"],
compile_multilib: "64",
multilib: {
lib64: {
@@ -114,398 +76,366 @@ cc_defaults {
cc_defaults {
name: "ocl-test-image-defaults",
- srcs: [ "test_conformance/images/common.cpp" ],
- export_include_dirs: [ "test_conformance/images" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/images/common.cpp"],
+ export_include_dirs: ["test_conformance/images"],
+ defaults: ["ocl-test-defaults"],
}
-
cc_test {
name: "ocl-test-allocations",
- srcs: [ "test_conformance/allocations/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/allocations/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-api",
- srcs: [ "test_conformance/api/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/api/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-atomics",
- srcs: [ "test_conformance/atomics/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/atomics/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-basic",
- srcs: [ "test_conformance/basic/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/basic/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-buffers",
- srcs: [ "test_conformance/buffers/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/buffers/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-c11-atomics",
- srcs: [ "test_conformance/c11_atomics/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/c11_atomics/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-commonfns",
- srcs: [ "test_conformance/commonfns/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/commonfns/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-compiler",
- srcs: [ "test_conformance/compiler/*.cpp" ],
- data: [ "test_conformance/compiler/includeTestDirectory/testIncludeFile.h", "test_conformance/compiler/secondIncludeTestDirectory/testIncludeFile.h" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/compiler/*.cpp"],
+ data: [
+ "test_conformance/compiler/includeTestDirectory/testIncludeFile.h",
+ "test_conformance/compiler/secondIncludeTestDirectory/testIncludeFile.h",
+ ],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-computeinfo",
- srcs: [ "test_conformance/computeinfo/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/computeinfo/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-contractions",
- srcs: [ "test_conformance/contractions/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/contractions/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-conversions",
- srcs: [ "test_conformance/conversions/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/conversions/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-device-execution",
- srcs: [ "test_conformance/device_execution/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/device_execution/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-device-partition",
- srcs: [ "test_conformance/device_partition/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/device_partition/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-device-timer",
- srcs: [ "test_conformance/device_timer/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/device_timer/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-events",
- srcs: [ "test_conformance/events/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/events/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-generic-address-space",
- srcs: [ "test_conformance/generic_address_space/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/generic_address_space/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-geometrics",
- srcs: [ "test_conformance/geometrics/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/geometrics/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-half",
- srcs: [ "test_conformance/half/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/half/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-integer-ops",
- srcs: [ "test_conformance/integer_ops/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/integer_ops/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-math-brute-force",
- srcs: [ "test_conformance/math_brute_force/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/math_brute_force/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-mem-host-flags",
- srcs: [ "test_conformance/mem_host_flags/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/mem_host_flags/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-multiple-device-context",
- srcs: [ "test_conformance/multiple_device_context/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/multiple_device_context/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-non-uniform-work-group",
- srcs: [ "test_conformance/non_uniform_work_group/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/non_uniform_work_group/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-pipes",
- srcs: [ "test_conformance/pipes/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/pipes/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-printf",
- srcs: [ "test_conformance/printf/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/printf/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-profiling",
- srcs: [ "test_conformance/profiling/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/profiling/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-relationals",
- srcs: [ "test_conformance/relationals/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/relationals/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-select",
- srcs: [ "test_conformance/select/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/select/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-spir",
- srcs: [ "test_conformance/spir/*.cpp", "test_conformance/math_brute_force/function_list.cpp", "test_common/miniz/miniz.c" ],
- data: [ "test_conformance/spir/*.zip" ],
- cflags: [ "-DFUNCTION_LIST_ULPS_ONLY", "-Wno-unused-private-field" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: [
+ "test_conformance/spir/*.cpp",
+ "test_conformance/math_brute_force/function_list.cpp",
+ "test_common/miniz/miniz.c",
+ ],
+ data: ["test_conformance/spir/*.zip"],
+ cflags: [
+ "-DFUNCTION_LIST_ULPS_ONLY",
+ "-Wno-unused-private-field",
+ ],
+ defaults: ["ocl-test-defaults"],
rtti: true,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-spirv-new",
- srcs: [ "test_conformance/spirv_new/*.cpp", "test_conformance/math_brute_force/reference_math.cpp", "test_conformance/math_brute_force/utility.cpp" ],
- data: [ "test_conformance/spirv_new/spirv_asm/*", "test_conformance/spirv_new/spirv_bin/*" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: [
+ "test_conformance/spirv_new/*.cpp",
+ "test_conformance/math_brute_force/reference_math.cpp",
+ "test_conformance/math_brute_force/utility.cpp",
+ ],
+ data: [
+ "test_conformance/spirv_new/spirv_asm/*",
+ "test_conformance/spirv_new/spirv_bin/*",
+ ],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-subgroups",
- srcs: [ "test_conformance/subgroups/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/subgroups/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-svm",
- srcs: [ "test_conformance/SVM/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/SVM/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-thread-dimensions",
- srcs: [ "test_conformance/thread_dimensions/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/thread_dimensions/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-vectors",
- srcs: [ "test_conformance/vectors/*.cpp" ],
- defaults: [ "ocl-test-defaults" ],
+ srcs: ["test_conformance/vectors/*.cpp"],
+ defaults: ["ocl-test-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-image-clcopyimage",
- srcs: [ "test_conformance/images/clCopyImage/*.cpp" ],
- defaults: [ "ocl-test-image-defaults" ],
+ srcs: ["test_conformance/images/clCopyImage/*.cpp"],
+ defaults: ["ocl-test-image-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-image-clfillimage",
- srcs: [ "test_conformance/images/clFillImage/*.cpp" ],
- defaults: [ "ocl-test-image-defaults" ],
+ srcs: ["test_conformance/images/clFillImage/*.cpp"],
+ defaults: ["ocl-test-image-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-image-clgetinfo",
- srcs: [ "test_conformance/images/clGetInfo/*.cpp" ],
- defaults: [ "ocl-test-image-defaults" ],
+ srcs: ["test_conformance/images/clGetInfo/*.cpp"],
+ defaults: ["ocl-test-image-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-image-clreadwriteimage",
- srcs: [ "test_conformance/images/clReadWriteImage/*.cpp" ],
- defaults: [ "ocl-test-image-defaults" ],
+ srcs: ["test_conformance/images/clReadWriteImage/*.cpp"],
+ defaults: ["ocl-test-image-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-image-kernel-image-methods",
- srcs: [ "test_conformance/images/kernel_image_methods/*.cpp" ],
- defaults: [ "ocl-test-image-defaults" ],
+ srcs: ["test_conformance/images/kernel_image_methods/*.cpp"],
+ defaults: ["ocl-test-image-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-image-kernel-read-write",
- srcs: [ "test_conformance/images/kernel_read_write/*.cpp" ],
- defaults: [ "ocl-test-image-defaults" ],
+ srcs: ["test_conformance/images/kernel_read_write/*.cpp"],
+ defaults: ["ocl-test-image-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
-
cc_test {
name: "ocl-test-image-samplerlessreads",
- srcs: [ "test_conformance/images/samplerlessReads/*.cpp" ],
- defaults: [ "ocl-test-image-defaults" ],
+ srcs: ["test_conformance/images/samplerlessReads/*.cpp"],
+ defaults: ["ocl-test-image-defaults"],
rtti: false,
- gtest: false
+ gtest: false,
}
python_test_host {
name: "opencl_cts",
main: "scripts/test_opencl_cts.py",
- srcs: [ "scripts/test_opencl_cts.py" ],
- data: [ "scripts/test_opencl_cts.xml" ],
+ srcs: ["scripts/test_opencl_cts.py"],
+ data: ["scripts/test_opencl_cts.xml"],
test_config: "scripts/test_opencl_cts.xml",
- version: {
- py2: {
- enabled: false,
- },
- py3: {
- enabled: true
- }
- },
test_options: {
unit_test: false,
},
@@ -514,15 +444,5 @@ python_test_host {
python_test {
name: "run_conformance",
main: "test_conformance/run_conformance.py",
- srcs: [ "test_conformance/run_conformance.py" ],
- version: {
- py2: {
- enabled: true,
- embedded_launcher: true,
- },
- py3: {
- enabled: false,
- }
- },
+ srcs: ["test_conformance/run_conformance.py"],
}
-
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 083ea96d..6a25d5b5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,12 +10,6 @@ set(CMAKE_C_STANDARD_REQUIRED ON)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
-if(CMAKE_BUILD_TYPE STREQUAL "release")
- set (BUILD_FLAVOR "release")
-else(CMAKE_BUILD_TYPE STREQUAL "release")
- set (BUILD_FLAVOR "debug")
-endif(CMAKE_BUILD_TYPE STREQUAL "release")
-
add_definitions(-DCL_TARGET_OPENCL_VERSION=300)
add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_2_APIS=1)
add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_1_APIS=1)
@@ -29,14 +23,6 @@ if(USE_CL_EXPERIMENTAL)
add_definitions(-DCL_EXPERIMENTAL)
endif(USE_CL_EXPERIMENTAL)
-# Support both VS2008 and VS2012.
-set(BUILD_DIR "$ENV{ADRENO_DRIVER}/build")
-if(MSVC90)
- set(VS_BUILD_DIR "${BUILD_DIR}/vs2008")
-else(MSVC110)
- set(VS_BUILD_DIR "${BUILD_DIR}/vs2012")
-endif(MSVC90)
-
#-----------------------------------------------------------
# Default Configurable Test Set
#-----------------------------------------------------------
@@ -102,14 +88,14 @@ macro(add_cxx_flag_if_supported flag)
endmacro(add_cxx_flag_if_supported)
if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
+ add_cxx_flag_if_supported(-Wmisleading-indentation)
+ add_cxx_flag_if_supported(-Wunused-variable)
add_cxx_flag_if_supported(-Wno-narrowing)
add_cxx_flag_if_supported(-Wno-format)
add_cxx_flag_if_supported(-Werror)
add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive
- add_cxx_flag_if_supported(-Wno-error=absolute-value) # Issue 783
add_cxx_flag_if_supported(-Wno-error=unknown-pragmas) # Issue #785
add_cxx_flag_if_supported(-Wno-error=asm-operand-widths) # Issue #784
- add_cxx_flag_if_supported(-Wno-error=overflow) # Fixed by #699
# -msse -mfpmath=sse to force gcc to use sse for float math,
# avoiding excess precision problems that cause tests like int2float
@@ -127,9 +113,24 @@ else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__")
endif()
+# Set a module's COMPILE_FLAGS if using gcc or clang.
+macro(set_gnulike_module_compile_flags flags)
+ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
+ SET_SOURCE_FILES_PROPERTIES(
+ ${${MODULE_NAME}_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS ${flags}
+ )
+ endif()
+endmacro(set_gnulike_module_compile_flags)
+
if(MSVC)
# Don't warn when using standard non-secure functions.
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
+ # Don't warn about using the portable "strdup" function.
+ add_compile_definitions(_CRT_NONSTDC_NO_DEPRECATE)
+ # Fix std::min and std::max handling with windows.harness.
+ add_compile_definitions(NOMINMAX)
endif()
if( WIN32 AND "${CMAKE_CXX_COMPILER_ID}" MATCHES "Intel" )
@@ -152,10 +153,6 @@ if(LINK_PTHREAD)
list(APPEND CLConform_LIBRARIES pthread)
endif()
-if(DEFINED USE_GLES3)
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGLES3")
-endif()
-
if(APPLE)
find_library(corefoundation CoreFoundation)
find_library(iokit IOKit)
@@ -169,38 +166,5 @@ include_directories(${CLConform_SOURCE_DIR}/test_common/harness
${CLConform_SOURCE_DIR}/test_common/gl
${CLConform_SOURCE_DIR}/test_common)
-if(CMAKE_BUILD_TYPE STREQUAL "release")
- set (BUILD_FLAVOR "release")
-elseif (CMAKE_BUILD_TYPE STREQUAL "debug")
- set (BUILD_FLAVOR "debug")
-endif(CMAKE_BUILD_TYPE STREQUAL "release")
-
-
add_subdirectory(test_common)
add_subdirectory(test_conformance)
-
-# Support both VS2008 and VS2012.
-set (DLL_FILES "${VS_BUILD_DIR}/Debug/*.dll")
-set (DST_DIR "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/Debug/")
-
-if (WIN32)
- set (COPY "echo")
- add_custom_target(COPY_DLL${CONFORMANCE_SUFFIX} ALL
- COMMAND ${COPY} "${DLL_FILES}" "${DST_DIR}"
- COMMENT "Copying dll files.. ")
-else (WIN32)
- set (COPY cp)
- add_custom_target(COPY_DLL${CONFORMANCE_SUFFIX})
-endif(WIN32)
-
-set_property(TARGET COPY_DLL${CONFORMANCE_SUFFIX} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")
-
-if(WIN32)
- add_custom_target( COPY_FILES${CONFORMANCE_SUFFIX} ALL
- COMMAND ${COPY} ${DLL_FILES} ${DST_DIR}
- COMMENT "Copying other files to output folder..." )
-else(WIN32)
- add_custom_target( COPY_FILES${CONFORMANCE_SUFFIX} )
-endif(WIN32)
-
-set_property(TARGET COPY_FILES${CONFORMANCE_SUFFIX} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")
diff --git a/LICENSE b/LICENSE
new file mode 120000
index 00000000..85de3d45
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1 @@
+LICENSE.txt \ No newline at end of file
diff --git a/METADATA b/METADATA
index 1eaf99d3..235bc5aa 100644
--- a/METADATA
+++ b/METADATA
@@ -1,7 +1,19 @@
-# *** THIS PACKAGE HAS SPECIAL LICENSING CONDITIONS. PLEASE
-# CONSULT THE OWNERS AND opensource-licensing@google.com BEFORE
-# DEPENDING ON IT IN YOUR PROJECT. ***
+# This project was upgraded with external_updater.
+# Usage: tools/external_updater/updater.sh update OpenCL-CTS
+# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md
+
+name: "OpenCL-CTS"
+description: "OpenCL Conformance Tests"
third_party {
- license_note: "Khronos proprietary"
- license_type: BY_EXCEPTION_ONLY
+ url {
+ type: GIT
+ value: "https://github.com/KhronosGroup/OpenCL-CTS.git"
+ }
+ version: "90a5183ec499d5b4701f58f6134dd424d82c4dca"
+ license_type: NOTICE
+ last_upgrade_date {
+ year: 2022
+ month: 10
+ day: 26
+ }
}
diff --git a/MODULE_LICENSE_APACHE2 b/MODULE_LICENSE_APACHE2
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/MODULE_LICENSE_APACHE2
diff --git a/README.md b/README.md
index b2d825fc..3d410644 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,115 @@
-# OpenCL-CTS [![Build Status](https://api.travis-ci.org/KhronosGroup/OpenCL-CTS.svg?branch=master)](https://travis-ci.org/KhronosGroup/OpenCL-CTS/branches)
-The OpenCL Conformance Tests
+# OpenCL Conformance Test Suite (CTS)
+
+This it the OpenCL CTS for all versions of the Khronos
+[OpenCL](https://www.khronos.org/opencl/) standard.
+
+## Building the CTS
+
+The CTS supports Linux, Windows, macOS, and Android platforms. In particular,
+GitHub Actions CI builds against Ubuntu 20.04, Windows-latest, and
+macos-latest.
+
+Compiling the CTS requires the following CMake configuration options to be set:
+
+* `CL_INCLUDE_DIR` Points to the unified
+ [OpenCL-Headers](https://github.com/KhronosGroup/OpenCL-Headers).
+* `CL_LIB_DIR` Directory containing the OpenCL library to build against.
+* `OPENCL_LIBRARIES` Name of the OpenCL library to link.
+
+It is advised that the [OpenCL ICD-Loader](https://github.com/KhronosGroup/OpenCL-ICD-Loader)
+is used as the OpenCL library to build against. Where `CL_LIB_DIR` points to a
+build of the ICD loader and `OPENCL_LIBRARIES` is "OpenCL".
+
+### Example Build
+
+Steps on a Linux platform to clone dependencies from GitHub sources, configure
+a build, and compile.
+
+```sh
+git clone https://github.com/KhronosGroup/OpenCL-CTS.git
+git clone https://github.com/KhronosGroup/OpenCL-Headers.git
+git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
+
+mkdir OpenCL-ICD-Loader/build
+cmake -S OpenCL-ICD-Loader -B OpenCL-ICD-Loader/build \
+ -DOPENCL_ICD_LOADER_HEADERS_DIR=$PWD/OpenCL-Headers
+cmake --build ./OpenCL-ICD-Loader/build --config Release
+
+mkdir OpenCL-CTS/build
+cmake -S OpenCL-CTS -B OpenCL-CTS/build \
+ -DCL_INCLUDE_DIR=$PWD/OpenCL-Headers \
+ -DCL_LIB_DIR=$PWD/OpenCL-ICD-Loader/build \
+ -DOPENCL_LIBRARIES=OpenCL
+cmake --build OpenCL-CTS/build --config Release
+```
+
+## Running the CTS
+
+A build of the CTS contains multiple executables representing the directories in
+the `test_conformance` folder. Each of these executables contains sub-tests, and
+possibly smaller granularities of testing within the sub-tests.
+
+See the `--help` output on each executable for the list of sub-tests available,
+as well as other options for configuring execution.
+
+If the OpenCL library built against is the ICD Loader, and the vendor library to
+be tested is not registered in the
+[default ICD Loader location](https://github.com/KhronosGroup/OpenCL-ICD-Loader#registering-icds)
+then the [OCL_ICD_FILENAMES](https://github.com/KhronosGroup/OpenCL-ICD-Loader#table-of-debug-environment-variables)
+environment variable will need to be set for the ICD Loader to detect the OpenCL
+library to use at runtime. For example, to run the basic tests on a Linux
+platform:
+
+```sh
+OCL_ICD_FILENAMES=/path/to/vendor_lib.so ./test_basic
+```
+
+### Offline Compilation
+
+Testing OpenCL drivers which do not have a runtime compiler can be done by using
+additional command line arguments provided by the test harness for tests which
+require compilation, these are:
+
+* `--compilation-mode` Selects if OpenCL-C source code should be compiled using
+ an external tool before being passed on to the OpenCL driver in that form for
+ testing. Online is the default mode, but also accepts the values `spir-v`, and
+ `binary`.
+
+* `--compilation-cache-mode` Controls how the compiled OpenCL-C source code
+ should be cached on disk.
+
+* `--compilation-cache-path` Accepts a path to a directory where the compiled
+ binary cache should be stored on disk.
+
+* `--compilation-program` Accepts a path to an executable (default:
+ cl_offline_compiler) invoked by the test harness to perform offline
+ compilation of OpenCL-C source code. This executable must match the
+ [interface description](test_common/harness/cl_offline_compiler-interface.txt).
+
+## Generating a Conformance Report
+
+The Khronos [Conformance Process Document](https://members.khronos.org/document/dl/911)
+details the steps required for a conformance submissions.
+In this repository [opencl_conformance_tests_full.csv](test_conformance/submission_details_template.txt)
+defines the full list of tests which must be run for conformance. The output log
+of which must be included alongside a filled in
+[submission details template](test_conformance/submission_details_template.txt).
+
+Utility script [run_conformance.py](test_conformance/run_conformance.py) can be
+used to help generating the submission log, although it is not required.
+
+Git [tags](https://github.com/KhronosGroup/OpenCL-CTS/tags) are used to define
+the version of the repository conformance submissions are made against.
+
+## Contributing
+
+Contributions are welcome to the project from Khronos members and non-members
+alike via GitHub Pull Requests (PR). Alternatively, if you've found a bug or have
+a questions please file an issue in the GitHub project. First time contributors
+will be required to sign the Khronos Contributor License Agreement (CLA) before
+their PR can be merged.
+
+PRs to the repository are required to be `clang-format` clean to pass CI.
+Developers can either use the `git-clang-format` tool locally to verify this
+before contributing, or update their PR based on the diff provided by a failing
+CI job.
diff --git a/check-format.sh b/check-format.sh
index 7de2bd2c..be8f9d78 100755
--- a/check-format.sh
+++ b/check-format.sh
@@ -1,7 +1,7 @@
#!/usr/bin/env bash
-# Arg used to specify non-'origin/master' comparison branch
-ORIGIN_BRANCH=${1:-"origin/master"}
+# Arg used to specify non-'origin/main' comparison branch
+ORIGIN_BRANCH=${1:-"origin/main"}
CLANG_BINARY=${2:-"`which clang-format-9`"}
# Run git-clang-format to check for violations
diff --git a/dependencies/Android.bp b/dependencies/Android.bp
index a8dbeeea..e521ca8e 100644
--- a/dependencies/Android.bp
+++ b/dependencies/Android.bp
@@ -33,7 +33,6 @@ genrule {
"ocl-headers/CL/cl_gl.h",
"ocl-headers/CL/cl_egl.h",
"ocl-headers/CL/cl_ext.h",
- "ocl-headers/CL/cl_gl_ext.h",
],
cmd: "python3 $(location) $(in) > $(out)"
}
diff --git a/dependencies/ocl-headers/CL/cl.h b/dependencies/ocl-headers/CL/cl.h
index 0018a0f4..6c700ab1 100644
--- a/dependencies/ocl-headers/CL/cl.h
+++ b/dependencies/ocl-headers/CL/cl.h
@@ -141,6 +141,10 @@ typedef struct _cl_image_desc {
#pragma warning( push )
#pragma warning( disable : 4201 ) /* Prevents warning about nameless struct/union in /W4 builds */
#endif
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wc11-extensions" /* Prevents warning about nameless union being C11 extension*/
+#endif
#if defined(_MSC_VER) && defined(__STDC__)
/* Anonymous unions are not supported in /Za builds */
#else
@@ -158,6 +162,9 @@ typedef struct _cl_image_desc {
#if defined(_MSC_VER) && !defined(__STDC__)
#pragma warning( pop )
#endif
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
#endif
} cl_image_desc;
diff --git a/dependencies/ocl-headers/CL/cl_ext.h b/dependencies/ocl-headers/CL/cl_ext.h
index 80a81dea..3eba7ed1 100644
--- a/dependencies/ocl-headers/CL/cl_ext.h
+++ b/dependencies/ocl-headers/CL/cl_ext.h
@@ -26,6 +26,494 @@ extern "C" {
#include <CL/cl.h>
+/***************************************************************
+* cl_khr_command_buffer
+***************************************************************/
+#define cl_khr_command_buffer 1
+#define CL_KHR_COMMAND_BUFFER_EXTENSION_NAME \
+ "cl_khr_command_buffer"
+
+typedef cl_bitfield cl_device_command_buffer_capabilities_khr;
+typedef struct _cl_command_buffer_khr* cl_command_buffer_khr;
+typedef cl_uint cl_sync_point_khr;
+typedef cl_uint cl_command_buffer_info_khr;
+typedef cl_uint cl_command_buffer_state_khr;
+typedef cl_properties cl_command_buffer_properties_khr;
+typedef cl_bitfield cl_command_buffer_flags_khr;
+typedef cl_properties cl_ndrange_kernel_command_properties_khr;
+typedef struct _cl_mutable_command_khr* cl_mutable_command_khr;
+
+/* cl_device_info */
+#define CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR 0x12A9
+#define CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR 0x12AA
+
+/* cl_device_command_buffer_capabilities_khr - bitfield */
+#define CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR (1 << 0)
+#define CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR (1 << 1)
+#define CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR (1 << 2)
+#define CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR (1 << 3)
+
+/* cl_command_buffer_properties_khr */
+#define CL_COMMAND_BUFFER_FLAGS_KHR 0x1293
+
+/* cl_command_buffer_flags_khr */
+#define CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR (1 << 0)
+
+/* Error codes */
+#define CL_INVALID_COMMAND_BUFFER_KHR -1138
+#define CL_INVALID_SYNC_POINT_WAIT_LIST_KHR -1139
+#define CL_INCOMPATIBLE_COMMAND_QUEUE_KHR -1140
+
+/* cl_command_buffer_info_khr */
+#define CL_COMMAND_BUFFER_QUEUES_KHR 0x1294
+#define CL_COMMAND_BUFFER_NUM_QUEUES_KHR 0x1295
+#define CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR 0x1296
+#define CL_COMMAND_BUFFER_STATE_KHR 0x1297
+#define CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR 0x1298
+
+/* cl_command_buffer_state_khr */
+#define CL_COMMAND_BUFFER_STATE_RECORDING_KHR 0
+#define CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR 1
+#define CL_COMMAND_BUFFER_STATE_PENDING_KHR 2
+#define CL_COMMAND_BUFFER_STATE_INVALID_KHR 3
+
+/* cl_command_type */
+#define CL_COMMAND_COMMAND_BUFFER_KHR 0x12A8
+
+
+typedef cl_command_buffer_khr (CL_API_CALL *
+clCreateCommandBufferKHR_fn)(
+ cl_uint num_queues,
+ const cl_command_queue* queues,
+ const cl_command_buffer_properties_khr* properties,
+ cl_int* errcode_ret) ;
+
+typedef cl_int (CL_API_CALL *
+clFinalizeCommandBufferKHR_fn)(
+ cl_command_buffer_khr command_buffer) ;
+
+typedef cl_int (CL_API_CALL *
+clRetainCommandBufferKHR_fn)(
+ cl_command_buffer_khr command_buffer) ;
+
+typedef cl_int (CL_API_CALL *
+clReleaseCommandBufferKHR_fn)(
+ cl_command_buffer_khr command_buffer) ;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueCommandBufferKHR_fn)(
+ cl_uint num_queues,
+ cl_command_queue* queues,
+ cl_command_buffer_khr command_buffer,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) ;
+
+typedef cl_int (CL_API_CALL *
+clCommandBarrierWithWaitListKHR_fn)(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+typedef cl_int (CL_API_CALL *
+clCommandCopyBufferKHR_fn)(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ cl_mem src_buffer,
+ cl_mem dst_buffer,
+ size_t src_offset,
+ size_t dst_offset,
+ size_t size,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+typedef cl_int (CL_API_CALL *
+clCommandCopyBufferRectKHR_fn)(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ cl_mem src_buffer,
+ cl_mem dst_buffer,
+ const size_t* src_origin,
+ const size_t* dst_origin,
+ const size_t* region,
+ size_t src_row_pitch,
+ size_t src_slice_pitch,
+ size_t dst_row_pitch,
+ size_t dst_slice_pitch,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+typedef cl_int (CL_API_CALL *
+clCommandCopyBufferToImageKHR_fn)(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ cl_mem src_buffer,
+ cl_mem dst_image,
+ size_t src_offset,
+ const size_t* dst_origin,
+ const size_t* region,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+typedef cl_int (CL_API_CALL *
+clCommandCopyImageKHR_fn)(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ cl_mem src_image,
+ cl_mem dst_image,
+ const size_t* src_origin,
+ const size_t* dst_origin,
+ const size_t* region,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+typedef cl_int (CL_API_CALL *
+clCommandCopyImageToBufferKHR_fn)(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ cl_mem src_image,
+ cl_mem dst_buffer,
+ const size_t* src_origin,
+ const size_t* region,
+ size_t dst_offset,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+typedef cl_int (CL_API_CALL *
+clCommandFillBufferKHR_fn)(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ cl_mem buffer,
+ const void* pattern,
+ size_t pattern_size,
+ size_t offset,
+ size_t size,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+typedef cl_int (CL_API_CALL *
+clCommandFillImageKHR_fn)(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ cl_mem image,
+ const void* fill_color,
+ const size_t* origin,
+ const size_t* region,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+typedef cl_int (CL_API_CALL *
+clCommandNDRangeKernelKHR_fn)(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ const cl_ndrange_kernel_command_properties_khr* properties,
+ cl_kernel kernel,
+ cl_uint work_dim,
+ const size_t* global_work_offset,
+ const size_t* global_work_size,
+ const size_t* local_work_size,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+typedef cl_int (CL_API_CALL *
+clGetCommandBufferInfoKHR_fn)(
+ cl_command_buffer_khr command_buffer,
+ cl_command_buffer_info_khr param_name,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_value_size_ret) ;
+
+#ifndef CL_NO_PROTOTYPES
+
+extern CL_API_ENTRY cl_command_buffer_khr CL_API_CALL
+clCreateCommandBufferKHR(
+ cl_uint num_queues,
+ const cl_command_queue* queues,
+ const cl_command_buffer_properties_khr* properties,
+ cl_int* errcode_ret) ;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clFinalizeCommandBufferKHR(
+ cl_command_buffer_khr command_buffer) ;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainCommandBufferKHR(
+ cl_command_buffer_khr command_buffer) ;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseCommandBufferKHR(
+ cl_command_buffer_khr command_buffer) ;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCommandBufferKHR(
+ cl_uint num_queues,
+ cl_command_queue* queues,
+ cl_command_buffer_khr command_buffer,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) ;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCommandBarrierWithWaitListKHR(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCommandCopyBufferKHR(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ cl_mem src_buffer,
+ cl_mem dst_buffer,
+ size_t src_offset,
+ size_t dst_offset,
+ size_t size,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCommandCopyBufferRectKHR(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ cl_mem src_buffer,
+ cl_mem dst_buffer,
+ const size_t* src_origin,
+ const size_t* dst_origin,
+ const size_t* region,
+ size_t src_row_pitch,
+ size_t src_slice_pitch,
+ size_t dst_row_pitch,
+ size_t dst_slice_pitch,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCommandCopyBufferToImageKHR(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ cl_mem src_buffer,
+ cl_mem dst_image,
+ size_t src_offset,
+ const size_t* dst_origin,
+ const size_t* region,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCommandCopyImageKHR(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ cl_mem src_image,
+ cl_mem dst_image,
+ const size_t* src_origin,
+ const size_t* dst_origin,
+ const size_t* region,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCommandCopyImageToBufferKHR(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ cl_mem src_image,
+ cl_mem dst_buffer,
+ const size_t* src_origin,
+ const size_t* region,
+ size_t dst_offset,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCommandFillBufferKHR(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ cl_mem buffer,
+ const void* pattern,
+ size_t pattern_size,
+ size_t offset,
+ size_t size,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCommandFillImageKHR(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ cl_mem image,
+ const void* fill_color,
+ const size_t* origin,
+ const size_t* region,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCommandNDRangeKernelKHR(
+ cl_command_buffer_khr command_buffer,
+ cl_command_queue command_queue,
+ const cl_ndrange_kernel_command_properties_khr* properties,
+ cl_kernel kernel,
+ cl_uint work_dim,
+ const size_t* global_work_offset,
+ const size_t* global_work_size,
+ const size_t* local_work_size,
+ cl_uint num_sync_points_in_wait_list,
+ const cl_sync_point_khr* sync_point_wait_list,
+ cl_sync_point_khr* sync_point,
+ cl_mutable_command_khr* mutable_handle) ;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetCommandBufferInfoKHR(
+ cl_command_buffer_khr command_buffer,
+ cl_command_buffer_info_khr param_name,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_value_size_ret) ;
+
+#endif /* CL_NO_PROTOTYPES */
+
+/***************************************************************
+* cl_khr_command_buffer_mutable_dispatch
+***************************************************************/
+#define cl_khr_command_buffer_mutable_dispatch 1
+#define CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME \
+ "cl_khr_command_buffer_mutable_dispatch"
+
+typedef cl_uint cl_command_buffer_structure_type_khr;
+typedef cl_bitfield cl_mutable_dispatch_fields_khr;
+typedef cl_uint cl_mutable_command_info_khr;
+typedef struct _cl_mutable_dispatch_arg_khr {
+ cl_uint arg_index;
+ size_t arg_size;
+ const void* arg_value;
+} cl_mutable_dispatch_arg_khr;
+typedef struct _cl_mutable_dispatch_exec_info_khr {
+ cl_uint param_name;
+ size_t param_value_size;
+ const void* param_value;
+} cl_mutable_dispatch_exec_info_khr;
+typedef struct _cl_mutable_dispatch_config_khr {
+ cl_command_buffer_structure_type_khr type;
+ const void* next;
+ cl_mutable_command_khr command;
+ cl_uint num_args;
+ cl_uint num_svm_args;
+ cl_uint num_exec_infos;
+ cl_uint work_dim;
+ const cl_mutable_dispatch_arg_khr* arg_list;
+ const cl_mutable_dispatch_arg_khr* arg_svm_list;
+ const cl_mutable_dispatch_exec_info_khr* exec_info_list;
+ const size_t* global_work_offset;
+ const size_t* global_work_size;
+ const size_t* local_work_size;
+} cl_mutable_dispatch_config_khr;
+typedef struct _cl_mutable_base_config_khr {
+ cl_command_buffer_structure_type_khr type;
+ const void* next;
+ cl_uint num_mutable_dispatch;
+ const cl_mutable_dispatch_config_khr* mutable_dispatch_list;
+} cl_mutable_base_config_khr;
+
+/* cl_command_buffer_flags_khr - bitfield */
+#define CL_COMMAND_BUFFER_MUTABLE_KHR (1 << 1)
+
+/* Error codes */
+#define CL_INVALID_MUTABLE_COMMAND_KHR -1141
+
+/* cl_device_info */
+#define CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR 0x12B0
+
+/* cl_ndrange_kernel_command_properties_khr */
+#define CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR 0x12B1
+
+/* cl_mutable_dispatch_fields_khr - bitfield */
+#define CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR (1 << 0)
+#define CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR (1 << 1)
+#define CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR (1 << 2)
+#define CL_MUTABLE_DISPATCH_ARGUMENTS_KHR (1 << 3)
+#define CL_MUTABLE_DISPATCH_EXEC_INFO_KHR (1 << 4)
+
+/* cl_mutable_command_info_khr */
+#define CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR 0x12A0
+#define CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR 0x12A1
+#define CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR 0x12AD
+#define CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR 0x12A2
+#define CL_MUTABLE_DISPATCH_KERNEL_KHR 0x12A3
+#define CL_MUTABLE_DISPATCH_DIMENSIONS_KHR 0x12A4
+#define CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR 0x12A5
+#define CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR 0x12A6
+#define CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR 0x12A7
+
+/* cl_command_buffer_structure_type_khr */
+#define CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR 0
+#define CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR 1
+
+
+typedef cl_int (CL_API_CALL *
+clUpdateMutableCommandsKHR_fn)(
+ cl_command_buffer_khr command_buffer,
+ const cl_mutable_base_config_khr* mutable_config) ;
+
+typedef cl_int (CL_API_CALL *
+clGetMutableCommandInfoKHR_fn)(
+ cl_mutable_command_khr command,
+ cl_mutable_command_info_khr param_name,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_value_size_ret) ;
+
+#ifndef CL_NO_PROTOTYPES
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clUpdateMutableCommandsKHR(
+ cl_command_buffer_khr command_buffer,
+ const cl_mutable_base_config_khr* mutable_config) ;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetMutableCommandInfoKHR(
+ cl_mutable_command_khr command,
+ cl_mutable_command_info_khr param_name,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_value_size_ret) ;
+
+#endif /* CL_NO_PROTOTYPES */
+
/* cl_khr_fp64 extension - no extension #define since it has no functions */
/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */
@@ -734,6 +1222,321 @@ clGetKernelSuggestedLocalWorkSizeKHR_fn)(
size_t* suggested_local_work_size) CL_API_SUFFIX__VERSION_3_0;
+/***************************************************************
+* cl_khr_integer_dot_product
+***************************************************************/
+#define cl_khr_integer_dot_product 1
+
+typedef cl_bitfield cl_device_integer_dot_product_capabilities_khr;
+
+/* cl_device_integer_dot_product_capabilities_khr */
+#define CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR (1 << 0)
+#define CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR (1 << 1)
+
+typedef struct _cl_device_integer_dot_product_acceleration_properties_khr {
+ cl_bool signed_accelerated;
+ cl_bool unsigned_accelerated;
+ cl_bool mixed_signedness_accelerated;
+ cl_bool accumulating_saturating_signed_accelerated;
+ cl_bool accumulating_saturating_unsigned_accelerated;
+ cl_bool accumulating_saturating_mixed_signedness_accelerated;
+} cl_device_integer_dot_product_acceleration_properties_khr;
+
+/* cl_device_info */
+#define CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR 0x1073
+#define CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR 0x1074
+#define CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR 0x1075
+
+
+/***************************************************************
+* cl_khr_external_memory
+***************************************************************/
+#define cl_khr_external_memory 1
+
+typedef cl_uint cl_external_memory_handle_type_khr;
+
+/* cl_platform_info */
+#define CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR 0x2044
+
+/* cl_device_info */
+#define CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR 0x204F
+
+/* cl_mem_properties */
+#define CL_DEVICE_HANDLE_LIST_KHR 0x2051
+#define CL_DEVICE_HANDLE_LIST_END_KHR 0
+
+/* cl_command_type */
+#define CL_COMMAND_ACQUIRE_EXTERNAL_MEM_OBJECTS_KHR 0x2047
+#define CL_COMMAND_RELEASE_EXTERNAL_MEM_OBJECTS_KHR 0x2048
+
+
+typedef cl_int (CL_API_CALL *
+clEnqueueAcquireExternalMemObjectsKHR_fn)(
+ cl_command_queue command_queue,
+ cl_uint num_mem_objects,
+ const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) CL_API_SUFFIX__VERSION_3_0;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueReleaseExternalMemObjectsKHR_fn)(
+ cl_command_queue command_queue,
+ cl_uint num_mem_objects,
+ const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) CL_API_SUFFIX__VERSION_3_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireExternalMemObjectsKHR(
+ cl_command_queue command_queue,
+ cl_uint num_mem_objects,
+ const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) CL_API_SUFFIX__VERSION_3_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseExternalMemObjectsKHR(
+ cl_command_queue command_queue,
+ cl_uint num_mem_objects,
+ const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) CL_API_SUFFIX__VERSION_3_0;
+
+/***************************************************************
+* cl_khr_external_memory_dma_buf
+***************************************************************/
+#define cl_khr_external_memory_dma_buf 1
+
+/* cl_external_memory_handle_type_khr */
+#define CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR 0x2067
+
+/***************************************************************
+* cl_khr_external_memory_dx
+***************************************************************/
+#define cl_khr_external_memory_dx 1
+
+/* cl_external_memory_handle_type_khr */
+#define CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR 0x2063
+#define CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR 0x2064
+#define CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR 0x2065
+#define CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR 0x2066
+
+/***************************************************************
+* cl_khr_external_memory_opaque_fd
+***************************************************************/
+#define cl_khr_external_memory_opaque_fd 1
+
+/* cl_external_memory_handle_type_khr */
+#define CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR 0x2060
+
+/***************************************************************
+* cl_khr_external_memory_win32
+***************************************************************/
+#define cl_khr_external_memory_win32 1
+
+/* cl_external_memory_handle_type_khr */
+#define CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR 0x2061
+#define CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR 0x2062
+
+/***************************************************************
+* cl_khr_external_semaphore
+***************************************************************/
+#define cl_khr_external_semaphore 1
+
+typedef struct _cl_semaphore_khr * cl_semaphore_khr;
+typedef cl_uint cl_external_semaphore_handle_type_khr;
+
+/* cl_platform_info */
+#define CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR 0x2037
+#define CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x2038
+
+/* cl_device_info */
+#define CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR 0x204D
+#define CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x204E
+
+/* cl_semaphore_properties_khr */
+#define CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x203F
+#define CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR 0
+
+
+typedef cl_int (CL_API_CALL *
+clGetSemaphoreHandleForTypeKHR_fn)(
+ cl_semaphore_khr sema_object,
+ cl_device_id device,
+ cl_external_semaphore_handle_type_khr handle_type,
+ size_t handle_size,
+ void* handle_ptr,
+ size_t* handle_size_ret) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSemaphoreHandleForTypeKHR(
+ cl_semaphore_khr sema_object,
+ cl_device_id device,
+ cl_external_semaphore_handle_type_khr handle_type,
+ size_t handle_size,
+ void* handle_ptr,
+ size_t* handle_size_ret) CL_API_SUFFIX__VERSION_1_2;
+
+/***************************************************************
+* cl_khr_external_semaphore_dx_fence
+***************************************************************/
+#define cl_khr_external_semaphore_dx_fence 1
+
+/* cl_external_semaphore_handle_type_khr */
+#define CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR 0x2059
+
+/***************************************************************
+* cl_khr_external_semaphore_opaque_fd
+***************************************************************/
+#define cl_khr_external_semaphore_opaque_fd 1
+
+/* cl_external_semaphore_handle_type_khr */
+#define CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR 0x2055
+
+/***************************************************************
+* cl_khr_external_semaphore_sync_fd
+***************************************************************/
+#define cl_khr_external_semaphore_sync_fd 1
+
+/* cl_external_semaphore_handle_type_khr */
+#define CL_SEMAPHORE_HANDLE_SYNC_FD_KHR 0x2058
+
+/***************************************************************
+* cl_khr_external_semaphore_win32
+***************************************************************/
+#define cl_khr_external_semaphore_win32 1
+
+/* cl_external_semaphore_handle_type_khr */
+#define CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR 0x2056
+#define CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR 0x2057
+
+/***************************************************************
+* cl_khr_semaphore
+***************************************************************/
+#define cl_khr_semaphore 1
+
+/* type cl_semaphore_khr */
+typedef cl_properties cl_semaphore_properties_khr;
+typedef cl_uint cl_semaphore_info_khr;
+typedef cl_uint cl_semaphore_type_khr;
+typedef cl_ulong cl_semaphore_payload_khr;
+
+/* cl_semaphore_type */
+#define CL_SEMAPHORE_TYPE_BINARY_KHR 1
+
+/* cl_platform_info */
+#define CL_PLATFORM_SEMAPHORE_TYPES_KHR 0x2036
+
+/* cl_device_info */
+#define CL_DEVICE_SEMAPHORE_TYPES_KHR 0x204C
+
+/* cl_semaphore_info_khr */
+#define CL_SEMAPHORE_CONTEXT_KHR 0x2039
+#define CL_SEMAPHORE_REFERENCE_COUNT_KHR 0x203A
+#define CL_SEMAPHORE_PROPERTIES_KHR 0x203B
+#define CL_SEMAPHORE_PAYLOAD_KHR 0x203C
+
+/* cl_semaphore_info_khr or cl_semaphore_properties_khr */
+#define CL_SEMAPHORE_TYPE_KHR 0x203D
+/* enum CL_DEVICE_HANDLE_LIST_KHR */
+/* enum CL_DEVICE_HANDLE_LIST_END_KHR */
+
+/* cl_command_type */
+#define CL_COMMAND_SEMAPHORE_WAIT_KHR 0x2042
+#define CL_COMMAND_SEMAPHORE_SIGNAL_KHR 0x2043
+
+/* Error codes */
+#define CL_INVALID_SEMAPHORE_KHR -1142
+
+
+typedef cl_semaphore_khr (CL_API_CALL *
+clCreateSemaphoreWithPropertiesKHR_fn)(
+ cl_context context,
+ const cl_semaphore_properties_khr* sema_props,
+ cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueWaitSemaphoresKHR_fn)(
+ cl_command_queue command_queue,
+ cl_uint num_sema_objects,
+ const cl_semaphore_khr* sema_objects,
+ const cl_semaphore_payload_khr* sema_payload_list,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) CL_API_SUFFIX__VERSION_1_2;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueSignalSemaphoresKHR_fn)(
+ cl_command_queue command_queue,
+ cl_uint num_sema_objects,
+ const cl_semaphore_khr* sema_objects,
+ const cl_semaphore_payload_khr* sema_payload_list,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) CL_API_SUFFIX__VERSION_1_2;
+
+typedef cl_int (CL_API_CALL *
+clGetSemaphoreInfoKHR_fn)(
+ cl_semaphore_khr sema_object,
+ cl_semaphore_info_khr param_name,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_2;
+
+typedef cl_int (CL_API_CALL *
+clReleaseSemaphoreKHR_fn)(
+ cl_semaphore_khr sema_object) CL_API_SUFFIX__VERSION_1_2;
+
+typedef cl_int (CL_API_CALL *
+clRetainSemaphoreKHR_fn)(
+ cl_semaphore_khr sema_object) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_semaphore_khr CL_API_CALL
+clCreateSemaphoreWithPropertiesKHR(
+ cl_context context,
+ const cl_semaphore_properties_khr* sema_props,
+ cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWaitSemaphoresKHR(
+ cl_command_queue command_queue,
+ cl_uint num_sema_objects,
+ const cl_semaphore_khr* sema_objects,
+ const cl_semaphore_payload_khr* sema_payload_list,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueSignalSemaphoresKHR(
+ cl_command_queue command_queue,
+ cl_uint num_sema_objects,
+ const cl_semaphore_khr* sema_objects,
+ const cl_semaphore_payload_khr* sema_payload_list,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSemaphoreInfoKHR(
+ cl_semaphore_khr sema_object,
+ cl_semaphore_info_khr param_name,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseSemaphoreKHR(
+ cl_semaphore_khr sema_object) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainSemaphoreKHR(
+ cl_semaphore_khr sema_object) CL_API_SUFFIX__VERSION_1_2;
+
/**********************************
* cl_arm_import_memory extension *
**********************************/
@@ -941,12 +1744,20 @@ typedef cl_bitfield cl_device_scheduling_controls_capabilities_arm;
#define CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_MODIFIER_ARM (1 << 2)
#define CL_DEVICE_SCHEDULING_DEFERRED_FLUSH_ARM (1 << 3)
#define CL_DEVICE_SCHEDULING_REGISTER_ALLOCATION_ARM (1 << 4)
+#define CL_DEVICE_SCHEDULING_WARP_THROTTLING_ARM (1 << 5)
+#define CL_DEVICE_SCHEDULING_COMPUTE_UNIT_BATCH_QUEUE_SIZE_ARM (1 << 6)
#define CL_DEVICE_SUPPORTED_REGISTER_ALLOCATIONS_ARM 0x41EB
+#define CL_DEVICE_MAX_WARP_COUNT_ARM 0x41EA
/* cl_kernel_info */
+#define CL_KERNEL_MAX_WARP_COUNT_ARM 0x41E9
+
+/* cl_kernel_exec_info */
#define CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM 0x41E5
#define CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM 0x41E6
+#define CL_KERNEL_EXEC_INFO_WARP_COUNT_LIMIT_ARM 0x41E8
+#define CL_KERNEL_EXEC_INFO_COMPUTE_UNIT_MAX_QUEUED_BATCHES_ARM 0x41F1
/* cl_queue_properties */
#define CL_QUEUE_KERNEL_BATCHING_ARM 0x41E7
@@ -982,14 +1793,43 @@ typedef cl_uint cl_command_termination_reason_arm;
#define CL_COMMAND_TERMINATION_CONTROLLED_FAILURE_ARM 2
#define CL_COMMAND_TERMINATION_ERROR_ARM 3
-/***************************************
-* cl_intel_thread_local_exec extension *
-****************************************/
+/*************************************
+* cl_arm_protected_memory_allocation *
+*************************************/
+
+#define cl_arm_protected_memory_allocation 1
+
+#define CL_MEM_PROTECTED_ALLOC_ARM (1ULL << 36)
-#define cl_intel_thread_local_exec 1
+/******************************************
+* cl_intel_exec_by_local_thread extension *
+******************************************/
+
+#define cl_intel_exec_by_local_thread 1
#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31)
+/***************************************************************
+* cl_intel_device_attribute_query
+***************************************************************/
+
+#define cl_intel_device_attribute_query 1
+
+typedef cl_bitfield cl_device_feature_capabilities_intel;
+
+/* cl_device_feature_capabilities_intel */
+#define CL_DEVICE_FEATURE_FLAG_DP4A_INTEL (1 << 0)
+#define CL_DEVICE_FEATURE_FLAG_DPAS_INTEL (1 << 1)
+
+/* cl_device_info */
+#define CL_DEVICE_IP_VERSION_INTEL 0x4250
+#define CL_DEVICE_ID_INTEL 0x4251
+#define CL_DEVICE_NUM_SLICES_INTEL 0x4252
+#define CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL 0x4253
+#define CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL 0x4254
+#define CL_DEVICE_NUM_THREADS_PER_EU_INTEL 0x4255
+#define CL_DEVICE_FEATURE_CAPABILITIES_INTEL 0x4256
+
/***********************************************
* cl_intel_device_partition_by_names extension *
************************************************/
@@ -1342,57 +2182,47 @@ typedef cl_uint cl_diagnostics_verbose_level;
/*******************************************
* cl_intel_unified_shared_memory extension *
********************************************/
-
-/* These APIs are in sync with Revision Q of the cl_intel_unified_shared_memory spec! */
-
#define cl_intel_unified_shared_memory 1
-/* cl_device_info */
-#define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190
-#define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL 0x4191
-#define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4192
-#define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193
-#define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194
+typedef cl_bitfield cl_device_unified_shared_memory_capabilities_intel;
+typedef cl_properties cl_mem_properties_intel;
+typedef cl_bitfield cl_mem_alloc_flags_intel;
+typedef cl_uint cl_mem_info_intel;
+typedef cl_uint cl_unified_shared_memory_type_intel;
+typedef cl_uint cl_mem_advice_intel;
-typedef cl_bitfield cl_device_unified_shared_memory_capabilities_intel;
+/* cl_device_info */
+#define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190
+#define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL 0x4191
+#define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4192
+#define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193
+#define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194
/* cl_device_unified_shared_memory_capabilities_intel - bitfield */
-#define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL (1 << 0)
-#define CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL (1 << 1)
-#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL (1 << 2)
+#define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL (1 << 0)
+#define CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL (1 << 1)
+#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL (1 << 2)
#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL (1 << 3)
-typedef cl_properties cl_mem_properties_intel;
-
/* cl_mem_properties_intel */
-#define CL_MEM_ALLOC_FLAGS_INTEL 0x4195
-
-typedef cl_bitfield cl_mem_alloc_flags_intel;
+#define CL_MEM_ALLOC_FLAGS_INTEL 0x4195
/* cl_mem_alloc_flags_intel - bitfield */
-#define CL_MEM_ALLOC_WRITE_COMBINED_INTEL (1 << 0)
-
-typedef cl_uint cl_mem_info_intel;
+#define CL_MEM_ALLOC_WRITE_COMBINED_INTEL (1 << 0)
+#define CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL (1 << 1)
+#define CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL (1 << 2)
/* cl_mem_alloc_info_intel */
-#define CL_MEM_ALLOC_TYPE_INTEL 0x419A
-#define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B
-#define CL_MEM_ALLOC_SIZE_INTEL 0x419C
-#define CL_MEM_ALLOC_DEVICE_INTEL 0x419D
-/* Enum values 0x419E-0x419F are reserved for future queries. */
-
-typedef cl_uint cl_unified_shared_memory_type_intel;
+#define CL_MEM_ALLOC_TYPE_INTEL 0x419A
+#define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B
+#define CL_MEM_ALLOC_SIZE_INTEL 0x419C
+#define CL_MEM_ALLOC_DEVICE_INTEL 0x419D
/* cl_unified_shared_memory_type_intel */
-#define CL_MEM_TYPE_UNKNOWN_INTEL 0x4196
-#define CL_MEM_TYPE_HOST_INTEL 0x4197
-#define CL_MEM_TYPE_DEVICE_INTEL 0x4198
-#define CL_MEM_TYPE_SHARED_INTEL 0x4199
-
-typedef cl_uint cl_mem_advice_intel;
-
-/* cl_mem_advice_intel */
-/* Enum values 0x4208-0x420F are reserved for future memory advices. */
+#define CL_MEM_TYPE_UNKNOWN_INTEL 0x4196
+#define CL_MEM_TYPE_HOST_INTEL 0x4197
+#define CL_MEM_TYPE_DEVICE_INTEL 0x4198
+#define CL_MEM_TYPE_SHARED_INTEL 0x4199
/* cl_kernel_exec_info */
#define CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL 0x4200
@@ -1401,223 +2231,249 @@ typedef cl_uint cl_mem_advice_intel;
#define CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL 0x4203
/* cl_command_type */
-#define CL_COMMAND_MEMFILL_INTEL 0x4204
-#define CL_COMMAND_MEMCPY_INTEL 0x4205
-#define CL_COMMAND_MIGRATEMEM_INTEL 0x4206
-#define CL_COMMAND_MEMADVISE_INTEL 0x4207
+#define CL_COMMAND_MEMFILL_INTEL 0x4204
+#define CL_COMMAND_MEMCPY_INTEL 0x4205
+#define CL_COMMAND_MIGRATEMEM_INTEL 0x4206
+#define CL_COMMAND_MEMADVISE_INTEL 0x4207
-extern CL_API_ENTRY void* CL_API_CALL
-clHostMemAllocINTEL(
- cl_context context,
- const cl_mem_properties_intel* properties,
- size_t size,
- cl_uint alignment,
- cl_int* errcode_ret);
typedef void* (CL_API_CALL *
clHostMemAllocINTEL_fn)(
- cl_context context,
- const cl_mem_properties_intel* properties,
- size_t size,
- cl_uint alignment,
- cl_int* errcode_ret);
-
-extern CL_API_ENTRY void* CL_API_CALL
-clDeviceMemAllocINTEL(
- cl_context context,
- cl_device_id device,
- const cl_mem_properties_intel* properties,
- size_t size,
- cl_uint alignment,
- cl_int* errcode_ret);
+ cl_context context,
+ const cl_mem_properties_intel* properties,
+ size_t size,
+ cl_uint alignment,
+ cl_int* errcode_ret) ;
typedef void* (CL_API_CALL *
clDeviceMemAllocINTEL_fn)(
- cl_context context,
- cl_device_id device,
- const cl_mem_properties_intel* properties,
- size_t size,
- cl_uint alignment,
- cl_int* errcode_ret);
-
-extern CL_API_ENTRY void* CL_API_CALL
-clSharedMemAllocINTEL(
- cl_context context,
- cl_device_id device,
- const cl_mem_properties_intel* properties,
- size_t size,
- cl_uint alignment,
- cl_int* errcode_ret);
+ cl_context context,
+ cl_device_id device,
+ const cl_mem_properties_intel* properties,
+ size_t size,
+ cl_uint alignment,
+ cl_int* errcode_ret) ;
typedef void* (CL_API_CALL *
clSharedMemAllocINTEL_fn)(
- cl_context context,
- cl_device_id device,
- const cl_mem_properties_intel* properties,
- size_t size,
- cl_uint alignment,
- cl_int* errcode_ret);
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clMemFreeINTEL(
- cl_context context,
- void* ptr);
+ cl_context context,
+ cl_device_id device,
+ const cl_mem_properties_intel* properties,
+ size_t size,
+ cl_uint alignment,
+ cl_int* errcode_ret) ;
typedef cl_int (CL_API_CALL *
clMemFreeINTEL_fn)(
- cl_context context,
- void* ptr);
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clMemBlockingFreeINTEL(
- cl_context context,
- void* ptr);
+ cl_context context,
+ void* ptr) ;
typedef cl_int (CL_API_CALL *
clMemBlockingFreeINTEL_fn)(
- cl_context context,
- void* ptr);
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetMemAllocInfoINTEL(
- cl_context context,
- const void* ptr,
- cl_mem_info_intel param_name,
- size_t param_value_size,
- void* param_value,
- size_t* param_value_size_ret);
+ cl_context context,
+ void* ptr) ;
typedef cl_int (CL_API_CALL *
clGetMemAllocInfoINTEL_fn)(
- cl_context context,
- const void* ptr,
- cl_mem_info_intel param_name,
- size_t param_value_size,
- void* param_value,
- size_t* param_value_size_ret);
-
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetKernelArgMemPointerINTEL(
- cl_kernel kernel,
- cl_uint arg_index,
- const void* arg_value);
+ cl_context context,
+ const void* ptr,
+ cl_mem_info_intel param_name,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_value_size_ret) ;
typedef cl_int (CL_API_CALL *
clSetKernelArgMemPointerINTEL_fn)(
- cl_kernel kernel,
- cl_uint arg_index,
- const void* arg_value);
+ cl_kernel kernel,
+ cl_uint arg_index,
+ const void* arg_value) ;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMemsetINTEL(
- cl_command_queue command_queue,
- void* dst_ptr,
- cl_int value,
- size_t size,
- cl_uint num_events_in_wait_list,
- const cl_event* event_wait_list,
- cl_event* event);
+typedef cl_int (CL_API_CALL *
+clEnqueueMemFillINTEL_fn)(
+ cl_command_queue command_queue,
+ void* dst_ptr,
+ const void* pattern,
+ size_t pattern_size,
+ size_t size,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) ;
typedef cl_int (CL_API_CALL *
-clEnqueueMemsetINTEL_fn)(
- cl_command_queue command_queue,
- void* dst_ptr,
- cl_int value,
- size_t size,
- cl_uint num_events_in_wait_list,
- const cl_event* event_wait_list,
- cl_event* event);
+clEnqueueMemcpyINTEL_fn)(
+ cl_command_queue command_queue,
+ cl_bool blocking,
+ void* dst_ptr,
+ const void* src_ptr,
+ size_t size,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) ;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueMemAdviseINTEL_fn)(
+ cl_command_queue command_queue,
+ const void* ptr,
+ size_t size,
+ cl_mem_advice_intel advice,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) ;
+
+#ifndef CL_NO_PROTOTYPES
+
+extern CL_API_ENTRY void* CL_API_CALL
+clHostMemAllocINTEL(
+ cl_context context,
+ const cl_mem_properties_intel* properties,
+ size_t size,
+ cl_uint alignment,
+ cl_int* errcode_ret) ;
+
+extern CL_API_ENTRY void* CL_API_CALL
+clDeviceMemAllocINTEL(
+ cl_context context,
+ cl_device_id device,
+ const cl_mem_properties_intel* properties,
+ size_t size,
+ cl_uint alignment,
+ cl_int* errcode_ret) ;
+
+extern CL_API_ENTRY void* CL_API_CALL
+clSharedMemAllocINTEL(
+ cl_context context,
+ cl_device_id device,
+ const cl_mem_properties_intel* properties,
+ size_t size,
+ cl_uint alignment,
+ cl_int* errcode_ret) ;
extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMemFillINTEL(
- cl_command_queue command_queue,
- void* dst_ptr,
- const void* pattern,
- size_t pattern_size,
- size_t size,
- cl_uint num_events_in_wait_list,
- const cl_event* event_wait_list,
- cl_event* event);
+clMemFreeINTEL(
+ cl_context context,
+ void* ptr) ;
-typedef cl_int (CL_API_CALL *
-clEnqueueMemFillINTEL_fn)(
- cl_command_queue command_queue,
- void* dst_ptr,
- const void* pattern,
- size_t pattern_size,
- size_t size,
- cl_uint num_events_in_wait_list,
- const cl_event* event_wait_list,
- cl_event* event);
+extern CL_API_ENTRY cl_int CL_API_CALL
+clMemBlockingFreeINTEL(
+ cl_context context,
+ void* ptr) ;
extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMemcpyINTEL(
- cl_command_queue command_queue,
- cl_bool blocking,
- void* dst_ptr,
- const void* src_ptr,
- size_t size,
- cl_uint num_events_in_wait_list,
- const cl_event* event_wait_list,
- cl_event* event);
+clGetMemAllocInfoINTEL(
+ cl_context context,
+ const void* ptr,
+ cl_mem_info_intel param_name,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_value_size_ret) ;
-typedef cl_int (CL_API_CALL *
-clEnqueueMemcpyINTEL_fn)(
- cl_command_queue command_queue,
- cl_bool blocking,
- void* dst_ptr,
- const void* src_ptr,
- size_t size,
- cl_uint num_events_in_wait_list,
- const cl_event* event_wait_list,
- cl_event* event);
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetKernelArgMemPointerINTEL(
+ cl_kernel kernel,
+ cl_uint arg_index,
+ const void* arg_value) ;
-#ifdef CL_VERSION_1_2
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueMemFillINTEL(
+ cl_command_queue command_queue,
+ void* dst_ptr,
+ const void* pattern,
+ size_t pattern_size,
+ size_t size,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) ;
-/* Because these APIs use cl_mem_migration_flags, they require
- OpenCL 1.2: */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueMemcpyINTEL(
+ cl_command_queue command_queue,
+ cl_bool blocking,
+ void* dst_ptr,
+ const void* src_ptr,
+ size_t size,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) ;
extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMigrateMemINTEL(
- cl_command_queue command_queue,
- const void* ptr,
- size_t size,
- cl_mem_migration_flags flags,
- cl_uint num_events_in_wait_list,
- const cl_event* event_wait_list,
- cl_event* event);
+clEnqueueMemAdviseINTEL(
+ cl_command_queue command_queue,
+ const void* ptr,
+ size_t size,
+ cl_mem_advice_intel advice,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) ;
+
+#endif /* CL_NO_PROTOTYPES */
+
+#if defined(CL_VERSION_1_2)
+/* Requires OpenCL 1.2 for cl_mem_migration_flags: */
typedef cl_int (CL_API_CALL *
clEnqueueMigrateMemINTEL_fn)(
- cl_command_queue command_queue,
- const void* ptr,
- size_t size,
- cl_mem_migration_flags flags,
- cl_uint num_events_in_wait_list,
- const cl_event* event_wait_list,
- cl_event* event);
+ cl_command_queue command_queue,
+ const void* ptr,
+ size_t size,
+ cl_mem_migration_flags flags,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) ;
-#endif
+#ifndef CL_NO_PROTOTYPES
extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMemAdviseINTEL(
- cl_command_queue command_queue,
- const void* ptr,
- size_t size,
- cl_mem_advice_intel advice,
- cl_uint num_events_in_wait_list,
- const cl_event* event_wait_list,
- cl_event* event);
+clEnqueueMigrateMemINTEL(
+ cl_command_queue command_queue,
+ const void* ptr,
+ size_t size,
+ cl_mem_migration_flags flags,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) ;
+
+#endif /* CL_NO_PROTOTYPES */
+
+#endif /* defined(CL_VERSION_1_2) */
+
+/* deprecated, use clEnqueueMemFillINTEL instead */
typedef cl_int (CL_API_CALL *
-clEnqueueMemAdviseINTEL_fn)(
- cl_command_queue command_queue,
- const void* ptr,
- size_t size,
- cl_mem_advice_intel advice,
- cl_uint num_events_in_wait_list,
- const cl_event* event_wait_list,
- cl_event* event);
+clEnqueueMemsetINTEL_fn)(
+ cl_command_queue command_queue,
+ void* dst_ptr,
+ cl_int value,
+ size_t size,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) ;
+
+#ifndef CL_NO_PROTOTYPES
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueMemsetINTEL(
+ cl_command_queue command_queue,
+ void* dst_ptr,
+ cl_int value,
+ size_t size,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event) ;
+
+#endif /* CL_NO_PROTOTYPES */
+
+/***************************************************************
+* cl_intel_mem_alloc_buffer_location
+***************************************************************/
+#define cl_intel_mem_alloc_buffer_location 1
+#define CL_INTEL_MEM_ALLOC_BUFFER_LOCATION_EXTENSION_NAME \
+ "cl_intel_mem_alloc_buffer_location"
+
+/* cl_mem_properties_intel */
+#define CL_MEM_ALLOC_BUFFER_LOCATION_INTEL 0x419E
+
+/* cl_mem_alloc_info_intel */
+/* enum CL_MEM_ALLOC_BUFFER_LOCATION_INTEL */
/***************************************************
* cl_intel_create_buffer_with_properties extension *
@@ -1700,6 +2556,76 @@ typedef struct _cl_queue_family_properties_intel {
#define CL_QUEUE_CAPABILITY_BARRIER_INTEL (1 << 25)
#define CL_QUEUE_CAPABILITY_KERNEL_INTEL (1 << 26)
+/***************************************************************
+* cl_intel_queue_no_sync_operations
+***************************************************************/
+
+#define cl_intel_queue_no_sync_operations 1
+
+/* addition to cl_command_queue_properties */
+#define CL_QUEUE_NO_SYNC_OPERATIONS_INTEL (1 << 29)
+
+/***************************************************************
+* cl_intel_sharing_format_query
+***************************************************************/
+#define cl_intel_sharing_format_query 1
+
+/***************************************************************
+* cl_ext_image_requirements_info
+***************************************************************/
+
+#ifdef CL_VERSION_3_0
+
+#define cl_ext_image_requirements_info 1
+
+typedef cl_uint cl_image_requirements_info_ext;
+
+#define CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT 0x1290
+#define CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT 0x1292
+#define CL_IMAGE_REQUIREMENTS_SIZE_EXT 0x12B2
+#define CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT 0x12B3
+#define CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT 0x12B4
+#define CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT 0x12B5
+#define CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT 0x12B6
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetImageRequirementsInfoEXT(
+ cl_context context,
+ const cl_mem_properties* properties,
+ cl_mem_flags flags,
+ const cl_image_format* image_format,
+ const cl_image_desc* image_desc,
+ cl_image_requirements_info_ext param_name,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_3_0;
+
+typedef cl_int (CL_API_CALL *
+clGetImageRequirementsInfoEXT_fn)(
+ cl_context context,
+ const cl_mem_properties* properties,
+ cl_mem_flags flags,
+ const cl_image_format* image_format,
+ const cl_image_desc* image_desc,
+ cl_image_requirements_info_ext param_name,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_3_0;
+
+#endif
+
+/***************************************************************
+* cl_ext_image_from_buffer
+***************************************************************/
+
+#ifdef CL_VERSION_3_0
+
+#define cl_ext_image_from_buffer 1
+
+#define CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT 0x1291
+
+#endif
+
#ifdef __cplusplus
}
#endif
diff --git a/dependencies/ocl-headers/CL/cl_gl.h b/dependencies/ocl-headers/CL/cl_gl.h
index 5ea0fd8b..32774650 100644
--- a/dependencies/ocl-headers/CL/cl_gl.h
+++ b/dependencies/ocl-headers/CL/cl_gl.h
@@ -162,6 +162,31 @@ clCreateEventFromGLsyncKHR(cl_context context,
cl_GLsync sync,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1;
+/***************************************************************
+* cl_intel_sharing_format_query_gl
+***************************************************************/
+#define cl_intel_sharing_format_query_gl 1
+
+/* when cl_khr_gl_sharing is supported */
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSupportedGLTextureFormatsINTEL(
+ cl_context context,
+ cl_mem_flags flags,
+ cl_mem_object_type image_type,
+ cl_uint num_entries,
+ cl_GLenum* gl_formats,
+ cl_uint* num_texture_formats) ;
+
+typedef cl_int (CL_API_CALL *
+clGetSupportedGLTextureFormatsINTEL_fn)(
+ cl_context context,
+ cl_mem_flags flags,
+ cl_mem_object_type image_type,
+ cl_uint num_entries,
+ cl_GLenum* gl_formats,
+ cl_uint* num_texture_formats) ;
+
#ifdef __cplusplus
}
#endif
diff --git a/dependencies/ocl-headers/CL/cl_gl_ext.h b/dependencies/ocl-headers/CL/cl_gl_ext.h
deleted file mode 100644
index 8ec81816..00000000
--- a/dependencies/ocl-headers/CL/cl_gl_ext.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2008-2021 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-#include <CL/cl_gl.h>
-#pragma message("All OpenGL-related extensions have been moved into cl_gl.h. Please include cl_gl.h directly.")
diff --git a/dependencies/ocl-headers/CL/cl_platform.h b/dependencies/ocl-headers/CL/cl_platform.h
index 8ae655d1..e7a0d6f4 100644
--- a/dependencies/ocl-headers/CL/cl_platform.h
+++ b/dependencies/ocl-headers/CL/cl_platform.h
@@ -135,6 +135,11 @@ extern "C" {
#if (defined (_WIN32) && defined(_MSC_VER))
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wlanguage-extension-token"
+#endif
+
/* intptr_t is used in cl.h and provided by stddef.h in Visual C++, but not in clang */
/* stdint.h was missing before Visual Studio 2010, include it for later versions and for clang */
#if defined(__clang__) || _MSC_VER >= 1600
@@ -155,6 +160,10 @@ typedef unsigned __int16 cl_half;
typedef float cl_float;
typedef double cl_double;
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
/* Macro names and corresponding values defined by OpenCL */
#define CL_CHAR_BIT 8
#define CL_SCHAR_MAX 127
@@ -501,25 +510,26 @@ typedef unsigned int cl_GLenum;
#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
#define __CL_HAS_ANON_STRUCT__ 1
#define __CL_ANON_STRUCT__
-#elif defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+#elif defined(_WIN32) && defined(_MSC_VER) && !defined(__STDC__)
+#define __CL_HAS_ANON_STRUCT__ 1
+#define __CL_ANON_STRUCT__
+#elif defined(__GNUC__) && ! defined(__STRICT_ANSI__)
+#define __CL_HAS_ANON_STRUCT__ 1
+#define __CL_ANON_STRUCT__ __extension__
+#elif defined(__clang__)
#define __CL_HAS_ANON_STRUCT__ 1
#define __CL_ANON_STRUCT__ __extension__
-#elif defined( _WIN32) && defined(_MSC_VER) && ! defined(__STDC__)
- #if _MSC_VER >= 1500
- /* Microsoft Developer Studio 2008 supports anonymous structs, but
- * complains by default. */
- #define __CL_HAS_ANON_STRUCT__ 1
- #define __CL_ANON_STRUCT__
- /* Disable warning C4201: nonstandard extension used : nameless
- * struct/union */
- #pragma warning( push )
- #pragma warning( disable : 4201 )
- #endif
#else
#define __CL_HAS_ANON_STRUCT__ 0
#define __CL_ANON_STRUCT__
#endif
+#if defined(_WIN32) && defined(_MSC_VER) && __CL_HAS_ANON_STRUCT__
+ /* Disable warning C4201: nonstandard extension used : nameless struct/union */
+ #pragma warning( push )
+ #pragma warning( disable : 4201 )
+#endif
+
/* Define alignment keys */
#if defined( __GNUC__ ) || defined(__INTEGRITY)
#define CL_ALIGNED(_x) __attribute__ ((aligned(_x)))
@@ -1395,10 +1405,8 @@ typedef union
}
#endif
-#if defined( _WIN32) && defined(_MSC_VER) && ! defined(__STDC__)
- #if _MSC_VER >=1500
+#if defined(_WIN32) && defined(_MSC_VER) && __CL_HAS_ANON_STRUCT__
#pragma warning( pop )
- #endif
#endif
#endif /* __CL_PLATFORM_H */
diff --git a/dependencies/ocl-stubs/apis_generator.py b/dependencies/ocl-stubs/apis_generator.py
index 8cc09542..8cdbc403 100644
--- a/dependencies/ocl-stubs/apis_generator.py
+++ b/dependencies/ocl-stubs/apis_generator.py
@@ -80,7 +80,7 @@ def process_type(raw):
def parse_api(api_signature):
m = None
- api_signature = re.sub('extern', '', api_signature)
+ api_signature = re.sub(r'\bextern\b', '', api_signature)
api_signature = re.sub('CL_\w+', '', api_signature)
m = re.match(r'\s*(.*)\s+(\w+)\((.*)\)\s*;', api_signature)
diff --git a/dependencies/ocl-stubs/stubs.cpp b/dependencies/ocl-stubs/stubs.cpp
index 2cf37001..fe9a9126 100644
--- a/dependencies/ocl-stubs/stubs.cpp
+++ b/dependencies/ocl-stubs/stubs.cpp
@@ -2,7 +2,6 @@
#include <CL/cl_gl.h>
#include <CL/cl_egl.h>
#include <CL/cl_ext.h>
-#include <CL/cl_gl_ext.h>
#include <dlfcn.h>
@@ -31,4 +30,3 @@ rettype fname fargs {
#define CL_MACRO FUNC_SYM
#include "apis.h"
#undef CL_MACRO
-
diff --git a/presubmit.sh b/presubmit.sh
index 6fc037c8..ca39b9a2 100755
--- a/presubmit.sh
+++ b/presubmit.sh
@@ -14,8 +14,11 @@ TOOLCHAIN_FILE=${TOP}/toolchain.cmake
touch ${TOOLCHAIN_FILE}
BUILD_OPENGL_TEST="OFF"
+cmake --version
+echo
+
# Prepare toolchain if needed
-if [[ ${JOB_ARCHITECTURE} != "" ]]; then
+if [[ ${JOB_ARCHITECTURE} != "" && ${RUNNER_OS} != "Windows" ]]; then
TOOLCHAIN_URL_VAR=TOOLCHAIN_URL_${JOB_ARCHITECTURE}
TOOLCHAIN_URL=${!TOOLCHAIN_URL_VAR}
wget ${TOOLCHAIN_URL}
@@ -38,35 +41,67 @@ fi
if [[ ( ${JOB_ARCHITECTURE} == "" && ${JOB_ENABLE_GL} == "1" ) ]]; then
BUILD_OPENGL_TEST="ON"
- sudo apt-get update
- sudo apt-get -y install libglu1-mesa-dev freeglut3-dev mesa-common-dev libglew-dev
fi
-# Prepare headers
-git clone https://github.com/KhronosGroup/OpenCL-Headers.git
-cd OpenCL-Headers
-ln -s CL OpenCL # For OSX builds
-cd ..
+
+if [[ ${JOB_ENABLE_DEBUG} == 1 ]]; then
+ BUILD_CONFIG="Debug"
+else
+ BUILD_CONFIG="Release"
+fi
+
+#Vulkan Headers
+git clone https://github.com/KhronosGroup/Vulkan-Headers.git
# Get and build loader
git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
cd ${TOP}/OpenCL-ICD-Loader
mkdir build
cd build
-cmake -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/ ..
-make
+cmake .. -G Ninja \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
+ -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/
+cmake --build . -j2
+
+#Vulkan Loader
+cd ${TOP}
+git clone https://github.com/KhronosGroup/Vulkan-Loader.git
+cd Vulkan-Loader
+mkdir build
+cd build
+python3 ../scripts/update_deps.py
+cmake .. -G Ninja \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
+ -DBUILD_WSI_XLIB_SUPPORT=OFF \
+ -DBUILD_WSI_XCB_SUPPORT=OFF \
+ -DBUILD_WSI_WAYLAND_SUPPORT=OFF \
+ -DUSE_GAS=OFF \
+ -C helper.cmake ..
+cmake --build . -j2
# Build CTS
cd ${TOP}
ls -l
mkdir build
cd build
-cmake -DCL_INCLUDE_DIR=${TOP}/OpenCL-Headers \
+if [[ ${RUNNER_OS} == "Windows" ]]; then
+ CMAKE_OPENCL_LIBRARIES_OPTION="OpenCL"
+ CMAKE_CACHE_OPTIONS=""
+else
+ CMAKE_OPENCL_LIBRARIES_OPTION="-lOpenCL -lpthread"
+ CMAKE_CACHE_OPTIONS="-DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache"
+fi
+cmake .. -G Ninja \
+ -DCMAKE_BUILD_TYPE="${BUILD_CONFIG}" \
+ ${CMAKE_CACHE_OPTIONS} \
+ -DCL_INCLUDE_DIR=${TOP}/OpenCL-Headers \
-DCL_LIB_DIR=${TOP}/OpenCL-ICD-Loader/build \
-DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
-DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin \
- -DOPENCL_LIBRARIES="-lOpenCL -lpthread" \
+ -DOPENCL_LIBRARIES="${CMAKE_OPENCL_LIBRARIES_OPTION}" \
-DUSE_CL_EXPERIMENTAL=ON \
-DGL_IS_SUPPORTED=${BUILD_OPENGL_TEST} \
- ..
-make -j2
-
+ -DVULKAN_INCLUDE_DIR=${TOP}/Vulkan-Headers/include/ \
+ -DVULKAN_LIB_DIR=${TOP}/Vulkan-Loader/build/loader/
+cmake --build . -j3
diff --git a/scripts/android_bp_head b/scripts/android_bp_head
index c5cd3949..42c3e2c7 100644
--- a/scripts/android_bp_head
+++ b/scripts/android_bp_head
@@ -1,24 +1,7 @@
-// *** THIS PACKAGE HAS SPECIAL LICENSING CONDITIONS. PLEASE
-// CONSULT THE OWNERS AND opensource-licensing@google.com BEFORE
-// DEPENDING ON IT IN YOUR PROJECT. ***
package {
default_applicable_licenses: ["external_OpenCL-CTS_license"],
}
-// Added automatically by a large-scale-change that took the approach of
-// 'apply every license found to every target'. While this makes sure we respect
-// every license restriction, it may not be entirely correct.
-//
-// e.g. GPL in an MIT project might only apply to the contrib/ directory.
-//
-// Please consider splitting the single license below into multiple licenses,
-// taking care not to lose any license_kind information, and overriding the
-// default license using the 'licenses: [...]' property on targets as needed.
-//
-// For unused files, consider creating a 'fileGroup' with "//visibility:private"
-// to attach the license to, and including a comment whether the files may be
-// used in the current project.
-// See: http://go/android-license-faq
license {
name: "external_OpenCL-CTS_license",
visibility: [":__subpackages__"],
@@ -27,9 +10,6 @@ license {
"SPDX-license-identifier-BSD",
"SPDX-license-identifier-MIT",
"SPDX-license-identifier-Unlicense",
- "legacy_by_exception_only", // by exception only
- "legacy_proprietary", // by exception only
- "legacy_unencumbered",
],
license_text: [
"LICENSE.txt",
@@ -56,37 +36,19 @@ cc_defaults {
"-DCL_EXPERIMENTAL",
"-DCL_TARGET_OPENCL_VERSION=300",
"-Wno-#warnings",
- "-Wno-absolute-value",
- "-Wno-asm-operand-widths",
"-Wno-c++11-narrowing",
- "-Wno-dangling-else",
"-Wno-date-time",
"-Wno-deprecated-declarations",
"-Wno-format",
- "-Wno-ignored-pragmas",
"-Wno-ignored-qualifiers",
"-Wno-implicit-fallthrough",
- "-Wno-logical-op-parentheses",
- "-Wno-macro-redefined",
"-Wno-missing-braces",
- "-Wno-missing-declarations",
"-Wno-missing-field-initializers",
"-Wno-non-virtual-dtor",
"-Wno-overloaded-virtual",
- "-Wno-parentheses",
- "-Wno-parentheses-equality",
"-Wno-reorder-ctor",
- "-Wno-return-stack-address",
- "-Wno-shift-negative-value",
"-Wno-sometimes-uninitialized",
- "-Wno-switch",
- "-Wno-unknown-pragmas",
- "-Wno-unneeded-internal-declaration",
- "-Wno-unused-function",
- "-Wno-unused-label",
"-Wno-unused-parameter",
- "-Wno-unused-variable",
- "-Wno-writable-strings",
"-fexceptions",
],
static_libs: [
@@ -118,4 +80,3 @@ cc_defaults {
export_include_dirs: [ "test_conformance/images" ],
defaults: [ "ocl-test-defaults" ],
}
-
diff --git a/scripts/android_bp_tail b/scripts/android_bp_tail
index a073f337..c0488738 100644
--- a/scripts/android_bp_tail
+++ b/scripts/android_bp_tail
@@ -4,14 +4,6 @@ python_test_host {
srcs: [ "scripts/test_opencl_cts.py" ],
data: [ "scripts/test_opencl_cts.xml" ],
test_config: "scripts/test_opencl_cts.xml",
- version: {
- py2: {
- enabled: false,
- },
- py3: {
- enabled: true
- }
- },
test_options: {
unit_test: false,
},
@@ -21,14 +13,4 @@ python_test {
name: "run_conformance",
main: "test_conformance/run_conformance.py",
srcs: [ "test_conformance/run_conformance.py" ],
- version: {
- py2: {
- enabled: true,
- embedded_launcher: true,
- },
- py3: {
- enabled: false,
- }
- },
}
-
diff --git a/scripts/generate_test_files.py b/scripts/generate_test_files.py
index cdb10dbf..1155a0ce 100644
--- a/scripts/generate_test_files.py
+++ b/scripts/generate_test_files.py
@@ -1,6 +1,8 @@
import json
import os
import re
+import shutil
+import subprocess
from xml.dom import minidom
from xml.etree import ElementTree
@@ -45,7 +47,8 @@ cc_test {{
f.write(cc_test_string)
-def generate_android_bp():
+# Return value indicates whether the output should be formatted with bpfmt
+def generate_android_bp() -> bool:
android_bp_head_path = os.path.join(SCRIPT_DIR, 'android_bp_head')
android_bp_tail_path = os.path.join(SCRIPT_DIR, 'android_bp_tail')
@@ -61,6 +64,12 @@ def generate_android_bp():
with open(android_bp_tail_path, 'r') as android_bp_tail:
android_bp.write(android_bp_tail.read())
+ if shutil.which('bpfmt') is not None:
+ subprocess.run(['bpfmt', '-w', 'Android.bp'])
+ return True
+
+ return False
+
def create_subelement_with_attribs(element, tag, attribs):
subelement = ElementTree.SubElement(element, tag)
@@ -142,12 +151,15 @@ def generate_test_xml():
def main():
- generate_android_bp()
+ android_bp_formatted = generate_android_bp()
generate_test_xml()
print("Don't forget to move -")
print(" Android.bp -> {ANDROID_ROOT}/external/OpenCL-CTS/Android.bp")
print(" test_opencl_cts.xml -> {ANDROID_ROOT}/external/OpenCL-CTS/scripts/test_opencl_cts.xml")
+ if not android_bp_formatted:
+ print("then run the blueprint autoformatter:")
+ print(" bpfmt -w {ANDROID_ROOT}/external/OpenCL-CTS/Android.bp")
if __name__ == '__main__':
diff --git a/test_common/CMakeLists.txt b/test_common/CMakeLists.txt
index 2d4bc190..b0505345 100644
--- a/test_common/CMakeLists.txt
+++ b/test_common/CMakeLists.txt
@@ -1,6 +1,5 @@
set(HARNESS_SOURCES
- harness/threadTesting.cpp
harness/typeWrappers.cpp
harness/mt19937.cpp
harness/conversions.cpp
@@ -22,4 +21,3 @@ set(HARNESS_SOURCES
)
add_library(harness STATIC ${HARNESS_SOURCES})
-
diff --git a/test_common/gl/helpers.cpp b/test_common/gl/helpers.cpp
index def78d75..b9f95a94 100644
--- a/test_common/gl/helpers.cpp
+++ b/test_common/gl/helpers.cpp
@@ -1381,7 +1381,6 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height,
//calculating colors
double color_delta = 1.0 / (total_layers * samples);
- double color = color_delta;
if (attachment != GL_DEPTH_ATTACHMENT && attachment != GL_DEPTH_STENCIL_ATTACHMENT) {
glDisable(GL_DEPTH_TEST);
diff --git a/test_common/gl/setup_win32.cpp b/test_common/gl/setup_win32.cpp
index b120a36d..708e681d 100644
--- a/test_common/gl/setup_win32.cpp
+++ b/test_common/gl/setup_win32.cpp
@@ -13,14 +13,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#define GL_GLEXT_PROTOTYPES
#include "setup.h"
#include "testBase.h"
#include "harness/errorHelpers.h"
-#include <GL/gl.h>
-#include <GL/glut.h>
#include <CL/cl_ext.h>
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
diff --git a/test_common/gl/setup_x11.cpp b/test_common/gl/setup_x11.cpp
index c54ecdec..abc065c9 100644
--- a/test_common/gl/setup_x11.cpp
+++ b/test_common/gl/setup_x11.cpp
@@ -13,16 +13,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#define GL_GLEXT_PROTOTYPES
#include "setup.h"
#include "testBase.h"
#include "harness/errorHelpers.h"
-#include <GL/gl.h>
-#include <GL/glut.h>
-#include <GL/glext.h>
-#include <GL/freeglut.h>
#include <GL/glx.h>
#include <CL/cl_ext.h>
@@ -90,10 +85,17 @@ public:
}
for (int i=0; i<(int)num_of_devices; i++) {
- if (!is_extension_available(devices[i], "cl_khr_gl_sharing ")) {
- log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
- } else {
- log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
+ if (!is_extension_available(devices[i], "cl_khr_gl_sharing"))
+ {
+ log_info("Device %d of %d does not support required extension "
+ "cl_khr_gl_sharing.\n",
+ i + 1, num_of_devices);
+ }
+ else
+ {
+ log_info("Device %d of %d supports required extension "
+ "cl_khr_gl_sharing.\n",
+ i + 1, num_of_devices);
found_valid_device = 1;
m_devices[m_device_count++] = devices[i];
}
diff --git a/test_common/gles/helpers.cpp b/test_common/gles/helpers.cpp
index 34f40b4c..57a4ddc1 100644
--- a/test_common/gles/helpers.cpp
+++ b/test_common/gles/helpers.cpp
@@ -22,7 +22,7 @@
{GLint __error = glGetError(); if(__error) {log_error( "GL ERROR: %s!\n", gluErrorString( err ));}}
#if defined(__linux__) || defined(GL_ES_VERSION_2_0)
-// On linux we dont link to GLU library to avoid comaptibility issues with
+// On linux we don't link to GLU library to avoid compatibility issues with
// libstdc++
// FIXME: Implement this
const GLubyte* gluErrorString (GLenum error)
@@ -271,8 +271,6 @@ void * ReadGLTexture( GLenum glTarget, GLuint glTexture,
// Read results from the GL texture
glBindTexture(get_base_gl_target(glTarget), glTexture);
- GLint realWidth, realHeight;
- GLint realInternalFormat;
GLenum readBackFormat = GL_RGBA;
GLenum readBackType = glType;
glFramebufferWrapper glFramebuffer;
@@ -301,7 +299,7 @@ void * ReadGLTexture( GLenum glTarget, GLuint glTexture,
GetGLFormatName(readBackFormat),
GetGLTypeName(readBackType));
- DumpGLBuffer(readBackType, realWidth, realHeight, (void*)outBuffer);
+ DumpGLBuffer(readBackType, outWidth, outHeight, (void *)outBuffer);
#endif
diff --git a/test_common/gles/helpers.h b/test_common/gles/helpers.h
index 5bd0fdf1..20768787 100644
--- a/test_common/gles/helpers.h
+++ b/test_common/gles/helpers.h
@@ -30,11 +30,10 @@
#if !defined (__APPLE__)
#include <CL/cl.h>
-#include "gl_headers.h"
#include <CL/cl_gl.h>
-#else
-#include "gl_headers.h"
+#include <CL/cl_half.h>
#endif
+#include "gl_headers.h"
#include "harness/errorHelpers.h"
#include "harness/kernelHelpers.h"
diff --git a/test_common/harness/ThreadPool.cpp b/test_common/harness/ThreadPool.cpp
index 31985aa0..62798045 100644
--- a/test_common/harness/ThreadPool.cpp
+++ b/test_common/harness/ThreadPool.cpp
@@ -22,6 +22,8 @@
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
// or any other POSIX system
+#include <atomic>
+
#if defined(_WIN32)
#include <windows.h>
#if defined(_MSC_VER)
@@ -241,7 +243,7 @@ pthread_cond_t cond_var;
// Condition variable state. How many iterations on the function left to run,
// set to CL_INT_MAX to cause worker threads to exit. Note: this value might
// go negative.
-volatile cl_int gRunCount = 0;
+std::atomic<cl_int> gRunCount{ 0 };
// State that only changes when the threadpool is not working.
volatile TPFuncPtr gFunc_ptr = NULL;
@@ -261,19 +263,20 @@ pthread_cond_t caller_cond_var;
// # of threads intended to be running. Running threads will decrement this
// as they discover they've run out of work to do.
-volatile cl_int gRunning = 0;
+std::atomic<cl_int> gRunning{ 0 };
// The total number of threads launched.
-volatile cl_int gThreadCount = 0;
+std::atomic<cl_int> gThreadCount{ 0 };
+
#ifdef _WIN32
void ThreadPool_WorkerFunc(void *p)
#else
void *ThreadPool_WorkerFunc(void *p)
#endif
{
- cl_uint threadID = ThreadPool_AtomicAdd((volatile cl_int *)p, 1);
- cl_int item = ThreadPool_AtomicAdd(&gRunCount, -1);
- // log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
+ auto &tid = *static_cast<std::atomic<cl_uint> *>(p);
+ cl_uint threadID = tid++;
+ cl_int item = gRunCount--;
while (MAX_COUNT > item)
{
@@ -282,8 +285,6 @@ void *ThreadPool_WorkerFunc(void *p)
// check for more work to do
if (0 >= item)
{
- // log_info("Thread %d has run out of work.\n", threadID);
-
// No work to do. Attempt to block waiting for work
#if defined(_WIN32)
EnterCriticalSection(cond_lock);
@@ -298,9 +299,7 @@ void *ThreadPool_WorkerFunc(void *p)
}
#endif // !_WIN32
- cl_int remaining = ThreadPool_AtomicAdd(&gRunning, -1);
- // log_info("ThreadPool_WorkerFunc: gRunning = %d\n",
- // remaining - 1);
+ cl_int remaining = gRunning--;
if (1 == remaining)
{ // last thread out signal the main thread to wake up
#if defined(_WIN32)
@@ -350,7 +349,7 @@ void *ThreadPool_WorkerFunc(void *p)
#endif // !_WIN32
// try again to get a valid item id
- item = ThreadPool_AtomicAdd(&gRunCount, -1);
+ item = gRunCount--;
if (MAX_COUNT <= item) // exit if we are done
{
#if defined(_WIN32)
@@ -362,8 +361,7 @@ void *ThreadPool_WorkerFunc(void *p)
}
}
- ThreadPool_AtomicAdd(&gRunning, 1);
- // log_info("Thread %d has found work.\n", threadID);
+ gRunning++;
#if defined(_WIN32)
LeaveCriticalSection(cond_lock);
@@ -447,12 +445,12 @@ void *ThreadPool_WorkerFunc(void *p)
}
// get the next item
- item = ThreadPool_AtomicAdd(&gRunCount, -1);
+ item = gRunCount--;
}
exit:
log_info("ThreadPool: thread %d exiting.\n", threadID);
- ThreadPool_AtomicAdd(&gThreadCount, -1);
+ gThreadCount--;
#if !defined(_WIN32)
return NULL;
#endif
@@ -487,7 +485,7 @@ void ThreadPool_Init(void)
{
cl_int i;
int err;
- volatile cl_uint threadID = 0;
+ std::atomic<cl_uint> threadID{ 0 };
// Check for manual override of multithreading code. We add this for better
// debuggability.
@@ -523,7 +521,7 @@ void ThreadPool_Init(void)
{
// Count the number of bits in ProcessorMask (number of
// logical cores)
- ULONG mask = ptr->ProcessorMask;
+ ULONG_PTR mask = ptr->ProcessorMask;
while (mask)
{
++gThreadCount;
@@ -624,7 +622,7 @@ void ThreadPool_Init(void)
}
#endif // !_WIN32
- gRunning = gThreadCount;
+ gRunning = gThreadCount.load();
// init threads
for (i = 0; i < gThreadCount; i++)
{
@@ -688,7 +686,6 @@ static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter,
void ThreadPool_Exit(void)
{
- int err, count;
gRunCount = CL_INT_MAX;
#if defined(__GNUC__)
@@ -702,13 +699,13 @@ void ThreadPool_Exit(void)
#endif
// spin waiting for threads to die
- for (count = 0; 0 != gThreadCount && count < 1000; count++)
+ for (int count = 0; 0 != gThreadCount && count < 1000; count++)
{
#if defined(_WIN32)
_WakeAllConditionVariable(cond_var);
Sleep(1);
#else // !_WIN32
- if ((err = pthread_cond_broadcast(&cond_var)))
+ if (int err = pthread_cond_broadcast(&cond_var))
{
log_error("Error %d from pthread_cond_broadcast. Unable to wake up "
"work threads. ThreadPool_Exit failed.\n",
@@ -722,7 +719,7 @@ void ThreadPool_Exit(void)
if (gThreadCount)
log_error("Error: Thread pool timed out after 1 second with %d threads "
"still active.\n",
- gThreadCount);
+ gThreadCount.load());
else
log_info("Thread pool exited in a orderly fashion.\n");
}
@@ -738,7 +735,9 @@ void ThreadPool_Exit(void)
// all available then it would make more sense to use those features.
cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo)
{
+#ifndef _WIN32
cl_int newErr;
+#endif
cl_int err = 0;
// Lazily set up our threads
#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
@@ -913,7 +912,9 @@ cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo)
err = jobError;
+#ifndef _WIN32
exit:
+#endif
// exit critical region
#if defined(_WIN32)
LeaveCriticalSection(gThreadPoolLock);
diff --git a/test_common/harness/alloc.h b/test_common/harness/alloc.h
index 653dde05..3b00d7c9 100644
--- a/test_common/harness/alloc.h
+++ b/test_common/harness/alloc.h
@@ -29,7 +29,7 @@
#include "mingw_compat.h"
#endif
-static void* align_malloc(size_t size, size_t alignment)
+inline void* align_malloc(size_t size, size_t alignment)
{
#if defined(_WIN32) && defined(_MSC_VER)
return _aligned_malloc(size, alignment);
@@ -53,7 +53,7 @@ static void* align_malloc(size_t size, size_t alignment)
#endif
}
-static void align_free(void* ptr)
+inline void align_free(void* ptr)
{
#if defined(_WIN32) && defined(_MSC_VER)
_aligned_free(ptr);
diff --git a/test_common/harness/compat.h b/test_common/harness/compat.h
index 7aad15a0..4053b7ee 100644
--- a/test_common/harness/compat.h
+++ b/test_common/harness/compat.h
@@ -18,13 +18,13 @@
#if defined(_WIN32) && defined(_MSC_VER)
#include <Windows.h>
-#endif
-
+#else
#ifdef __cplusplus
#define EXTERN_C extern "C"
#else
#define EXTERN_C
#endif
+#endif
//
@@ -309,13 +309,6 @@ EXTERN_C int __builtin_clz(unsigned int pattern);
#endif
-#ifndef MIN
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
-#endif
-#ifndef MAX
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-#endif
-
/*-----------------------------------------------------------------------------
WARNING: DO NOT USE THESE MACROS:
diff --git a/test_common/harness/conversions.cpp b/test_common/harness/conversions.cpp
index fc3317c7..d52a2ac6 100644
--- a/test_common/harness/conversions.cpp
+++ b/test_common/harness/conversions.cpp
@@ -14,6 +14,7 @@
// limitations under the License.
//
#include "conversions.h"
+#include <cinttypes>
#include <limits.h>
#include <time.h>
#include <assert.h>
@@ -50,10 +51,10 @@ void print_type_to_string(ExplicitType type, void *data, char *string)
case kInt: sprintf(string, "%d", *((cl_int *)data)); return;
case kUInt:
case kUnsignedInt: sprintf(string, "%u", *((cl_uint *)data)); return;
- case kLong: sprintf(string, "%lld", *((cl_long *)data)); return;
+ case kLong: sprintf(string, "%" PRId64 "", *((cl_long *)data)); return;
case kULong:
case kUnsignedLong:
- sprintf(string, "%llu", *((cl_ulong *)data));
+ sprintf(string, "%" PRIu64 "", *((cl_ulong *)data));
return;
case kFloat: sprintf(string, "%f", *((cl_float *)data)); return;
case kHalf: sprintf(string, "half"); return;
@@ -181,8 +182,8 @@ static ULong sUpperLimits[kNumExplicitTypes] = {
0xffffffffLL,
0xffffffffLL,
0x7fffffffffffffffLL,
- 0xffffffffffffffffLL,
- 0xffffffffffffffffLL,
+ 0xffffffffffffffffULL,
+ 0xffffffffffffffffULL,
0,
0
}; // Last two values aren't stored here
diff --git a/test_common/harness/deviceInfo.cpp b/test_common/harness/deviceInfo.cpp
index 287a1423..97ab8c85 100644
--- a/test_common/harness/deviceInfo.cpp
+++ b/test_common/harness/deviceInfo.cpp
@@ -63,6 +63,40 @@ int is_extension_available(cl_device_id device, const char *extensionName)
return false;
}
+cl_version get_extension_version(cl_device_id device, const char *extensionName)
+{
+ cl_int err;
+ size_t size;
+
+ err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS_WITH_VERSION, 0, nullptr,
+ &size);
+ if (err != CL_SUCCESS)
+ {
+ throw std::runtime_error("clGetDeviceInfo(CL_DEVICE_EXTENSIONS_WITH_"
+ "VERSION) failed to return size\n");
+ }
+
+ std::vector<cl_name_version> extensions(size / sizeof(cl_name_version));
+ err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS_WITH_VERSION, size,
+ extensions.data(), &size);
+ if (err != CL_SUCCESS)
+ {
+ throw std::runtime_error("clGetDeviceInfo(CL_DEVICE_EXTENSIONS_WITH_"
+ "VERSION) failed to return value\n");
+ }
+
+ for (auto &ext : extensions)
+ {
+ if (!strcmp(extensionName, ext.name))
+ {
+ return ext.version;
+ }
+ }
+
+ throw std::runtime_error("Extension " + std::string(extensionName)
+ + " not supported by device!");
+}
+
/* Returns a string containing the supported extensions list for a device. */
std::string get_device_extensions_string(cl_device_id device)
{
diff --git a/test_common/harness/deviceInfo.h b/test_common/harness/deviceInfo.h
index f8c55805..912dd198 100644
--- a/test_common/harness/deviceInfo.h
+++ b/test_common/harness/deviceInfo.h
@@ -31,6 +31,11 @@ std::string get_device_info_string(cl_device_id device,
/* Determines if an extension is supported by a device. */
int is_extension_available(cl_device_id device, const char *extensionName);
+/* Returns the version of the extension the device supports or throws an
+ * exception if the extension is not supported by the device. */
+cl_version get_extension_version(cl_device_id device,
+ const char *extensionName);
+
/* Returns a string containing the supported extensions list for a device. */
std::string get_device_extensions_string(cl_device_id device);
diff --git a/test_common/harness/errorHelpers.cpp b/test_common/harness/errorHelpers.cpp
index 22a2677d..eaccf641 100644
--- a/test_common/harness/errorHelpers.cpp
+++ b/test_common/harness/errorHelpers.cpp
@@ -18,9 +18,12 @@
#include <stdlib.h>
#include <string.h>
+#include <algorithm>
+
#include "errorHelpers.h"
#include "parseParameters.h"
+#include "testHarness.h"
#include <CL/cl_half.h>
@@ -300,10 +303,6 @@ const char *GetQueuePropertyName(cl_command_queue_properties property)
}
}
-#ifndef MAX
-#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
-#endif
-
#if defined(_MSC_VER)
#define scalbnf(_a, _i) ldexpf(_a, _i)
#define scalbn(_a, _i) ldexp(_a, _i)
@@ -356,7 +355,7 @@ static float Ulp_Error_Half_Float(float test, double reference)
// The unbiased exponent of the ulp unit place
int ulp_exp =
- HALF_MANT_DIG - 1 - MAX(ilogb(reference), HALF_MIN_EXP - 1);
+ HALF_MANT_DIG - 1 - std::max(ilogb(reference), HALF_MIN_EXP - 1);
// Scale the exponent of the error
return (float)scalbn(testVal - reference, ulp_exp);
@@ -364,7 +363,7 @@ static float Ulp_Error_Half_Float(float test, double reference)
// reference is a normal power of two or a zero
int ulp_exp =
- HALF_MANT_DIG - 1 - MAX(ilogb(reference) - 1, HALF_MIN_EXP - 1);
+ HALF_MANT_DIG - 1 - std::max(ilogb(reference) - 1, HALF_MIN_EXP - 1);
// Scale the exponent of the error
return (float)scalbn(testVal - reference, ulp_exp);
@@ -436,7 +435,8 @@ float Ulp_Error(float test, double reference)
return 0.0f; // if we are expecting a NaN, any NaN is fine
// The unbiased exponent of the ulp unit place
- int ulp_exp = FLT_MANT_DIG - 1 - MAX(ilogb(reference), FLT_MIN_EXP - 1);
+ int ulp_exp =
+ FLT_MANT_DIG - 1 - std::max(ilogb(reference), FLT_MIN_EXP - 1);
// Scale the exponent of the error
return (float)scalbn(testVal - reference, ulp_exp);
@@ -444,7 +444,8 @@ float Ulp_Error(float test, double reference)
// reference is a normal power of two or a zero
// The unbiased exponent of the ulp unit place
- int ulp_exp = FLT_MANT_DIG - 1 - MAX(ilogb(reference) - 1, FLT_MIN_EXP - 1);
+ int ulp_exp =
+ FLT_MANT_DIG - 1 - std::max(ilogb(reference) - 1, FLT_MIN_EXP - 1);
// Scale the exponent of the error
return (float)scalbn(testVal - reference, ulp_exp);
@@ -512,7 +513,7 @@ float Ulp_Error_Double(double test, long double reference)
// The unbiased exponent of the ulp unit place
int ulp_exp =
- DBL_MANT_DIG - 1 - MAX(ilogbl(reference), DBL_MIN_EXP - 1);
+ DBL_MANT_DIG - 1 - std::max(ilogbl(reference), DBL_MIN_EXP - 1);
// Scale the exponent of the error
float result = (float)scalbnl(testVal - reference, ulp_exp);
@@ -528,7 +529,7 @@ float Ulp_Error_Double(double test, long double reference)
// reference is a normal power of two or a zero
// The unbiased exponent of the ulp unit place
int ulp_exp =
- DBL_MANT_DIG - 1 - MAX(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
+ DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
// Scale the exponent of the error
float result = (float)scalbnl(testVal - reference, ulp_exp);
@@ -564,7 +565,7 @@ cl_int OutputBuildLogs(cl_program program, cl_uint num_devices,
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
&size_ret);
test_error(error, "Unable to query context's device size");
- num_devices = size_ret / sizeof(cl_device_id);
+ num_devices = static_cast<cl_uint>(size_ret / sizeof(cl_device_id));
device_list = (cl_device_id *)malloc(size_ret);
if (device_list == NULL)
{
@@ -690,21 +691,19 @@ const char *subtests_to_skip_with_offline_compiler[] = {
"library_function"
};
-int check_functions_for_offline_compiler(const char *subtestname,
- cl_device_id device)
+bool check_functions_for_offline_compiler(const char *subtestname)
{
if (gCompilationMode != kOnline)
{
- int nNotRequiredWithOfflineCompiler =
- sizeof(subtests_to_skip_with_offline_compiler) / sizeof(char *);
- size_t i;
- for (i = 0; i < nNotRequiredWithOfflineCompiler; ++i)
+ size_t nNotRequiredWithOfflineCompiler =
+ ARRAY_SIZE(subtests_to_skip_with_offline_compiler);
+ for (size_t i = 0; i < nNotRequiredWithOfflineCompiler; ++i)
{
if (!strcmp(subtestname, subtests_to_skip_with_offline_compiler[i]))
{
- return 1;
+ return false;
}
}
}
- return 0;
+ return true;
}
diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h
index 19446014..80eb3b58 100644
--- a/test_common/harness/errorHelpers.h
+++ b/test_common/harness/errorHelpers.h
@@ -56,17 +56,13 @@ static int vlog_win32(const char *format, ...);
#define vlog printf
#endif
-#define ct_assert(b) ct_assert_i(b, __LINE__)
-#define ct_assert_i(b, line) ct_assert_ii(b, line)
-#define ct_assert_ii(b, line) \
- int _compile_time_assertion_on_line_##line[b ? 1 : -1];
-
#define test_fail(msg, ...) \
{ \
log_error(msg, ##__VA_ARGS__); \
return TEST_FAIL; \
}
#define test_error(errCode, msg) test_error_ret(errCode, msg, errCode)
+#define test_error_fail(errCode, msg) test_error_ret(errCode, msg, TEST_FAIL)
#define test_error_ret(errCode, msg, retValue) \
{ \
auto errCodeResult = errCode; \
@@ -97,21 +93,6 @@ static int vlog_win32(const char *format, ...);
"the device version! (from %s:%d)\n", \
msg, __FILE__, __LINE__);
-#define test_missing_support_offline_cmpiler(errCode, msg) \
- test_missing_support_offline_cmpiler_ret(errCode, msg, errCode)
-// this macro should always return CL_SUCCESS, but print the skip message on
-// test not supported with offline compiler
-#define test_missing_support_offline_cmpiler_ret(errCode, msg, retValue) \
- { \
- if (errCode != CL_SUCCESS) \
- { \
- log_info("INFO: Subtest %s tests is not supported in offline " \
- "compiler execution path! (from %s:%d)\n", \
- msg, __FILE__, __LINE__); \
- return TEST_SKIP; \
- } \
- }
-
// expected error code vs. what we got
#define test_failure_error(errCode, expectedErrCode, msg) \
test_failure_error_ret(errCode, expectedErrCode, msg, \
@@ -186,8 +167,7 @@ extern const char *GetAddressModeName(cl_addressing_mode mode);
extern const char *GetQueuePropertyName(cl_command_queue_properties properties);
extern const char *GetDeviceTypeName(cl_device_type type);
-int check_functions_for_offline_compiler(const char *subtestname,
- cl_device_id device);
+bool check_functions_for_offline_compiler(const char *subtestname);
cl_int OutputBuildLogs(cl_program program, cl_uint num_devices,
cl_device_id *device_list);
diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h
index 40826c5c..222aa2c4 100644
--- a/test_common/harness/fpcontrol.h
+++ b/test_common/harness/fpcontrol.h
@@ -16,6 +16,8 @@
#ifndef _fpcontrol_h
#define _fpcontrol_h
+#include <cstdint>
+
// In order to get tests for correctly rounded operations (e.g. multiply) to
// work properly we need to be able to set the reference hardware to FTZ mode if
// the device hardware is running in that mode. We have explored all other
@@ -30,7 +32,11 @@
// that rounding mode.
#if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__) \
|| defined(__MINGW32__)
+#ifdef _MSC_VER
typedef int FPU_mode_type;
+#else
+typedef int64_t FPU_mode_type;
+#endif
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|| defined(__MINGW32__)
#include <xmmintrin.h>
@@ -39,7 +45,7 @@ typedef int FPU_mode_type;
extern __thread fpu_control_t fpu_control;
#endif
// Set the reference hardware floating point unit to FTZ mode
-static inline void ForceFTZ(FPU_mode_type *mode)
+inline void ForceFTZ(FPU_mode_type *mode)
{
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|| defined(__MINGW32__)
@@ -55,7 +61,7 @@ static inline void ForceFTZ(FPU_mode_type *mode)
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
// Add 64 bit support
#elif defined(__aarch64__)
- unsigned fpscr;
+ uint64_t fpscr;
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
@@ -65,7 +71,7 @@ static inline void ForceFTZ(FPU_mode_type *mode)
}
// Disable the denorm flush to zero
-static inline void DisableFTZ(FPU_mode_type *mode)
+inline void DisableFTZ(FPU_mode_type *mode)
{
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|| defined(__MINGW32__)
@@ -81,7 +87,7 @@ static inline void DisableFTZ(FPU_mode_type *mode)
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
// Add 64 bit support
#elif defined(__aarch64__)
- unsigned fpscr;
+ uint64_t fpscr;
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
@@ -91,7 +97,7 @@ static inline void DisableFTZ(FPU_mode_type *mode)
}
// Restore the reference hardware to floating point state indicated by *mode
-static inline void RestoreFPState(FPU_mode_type *mode)
+inline void RestoreFPState(FPU_mode_type *mode)
{
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|| defined(__MINGW32__)
diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index 72a2f0c0..f1694e88 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
@@ -23,6 +23,7 @@
#include <malloc.h>
#endif
#include <algorithm>
+#include <cinttypes>
#include <iterator>
#if !defined(_WIN32)
#include <cmath>
@@ -421,7 +422,7 @@ void print_first_pixel_difference_error(size_t where, const char *sourcePixel,
(int)thirdDim, (int)imageInfo->rowPitch,
(int)imageInfo->rowPitch
- (int)imageInfo->width * (int)pixel_size);
- log_error("Failed at column: %ld ", where);
+ log_error("Failed at column: %zu ", where);
switch (pixel_size)
{
@@ -454,7 +455,7 @@ void print_first_pixel_difference_error(size_t where, const char *sourcePixel,
((cl_ushort *)destPixel)[1], ((cl_ushort *)destPixel)[2]);
break;
case 8:
- log_error("*0x%16.16llx vs. 0x%16.16llx\n",
+ log_error("*0x%16.16" PRIx64 " vs. 0x%16.16" PRIx64 "\n",
((cl_ulong *)sourcePixel)[0], ((cl_ulong *)destPixel)[0]);
break;
case 12:
@@ -473,12 +474,53 @@ void print_first_pixel_difference_error(size_t where, const char *sourcePixel,
((cl_uint *)destPixel)[2], ((cl_uint *)destPixel)[3]);
break;
default:
- log_error("Don't know how to print pixel size of %ld\n",
+ log_error("Don't know how to print pixel size of %zu\n",
pixel_size);
break;
}
}
+size_t compare_scanlines(const image_descriptor *imageInfo, const char *aPtr,
+ const char *bPtr)
+{
+ size_t pixel_size = get_pixel_size(imageInfo->format);
+ size_t column;
+
+ for (column = 0; column < imageInfo->width; column++)
+ {
+ switch (imageInfo->format->image_channel_data_type)
+ {
+ // If the data type is 101010, then ignore bits 31 and 32 when
+ // comparing the row
+ case CL_UNORM_INT_101010: {
+ cl_uint aPixel = *(cl_uint *)aPtr;
+ cl_uint bPixel = *(cl_uint *)bPtr;
+ if ((aPixel & 0x3fffffff) != (bPixel & 0x3fffffff))
+ return column;
+ }
+ break;
+
+ // If the data type is 555, ignore bit 15 when comparing the row
+ case CL_UNORM_SHORT_555: {
+ cl_ushort aPixel = *(cl_ushort *)aPtr;
+ cl_ushort bPixel = *(cl_ushort *)bPtr;
+ if ((aPixel & 0x7fff) != (bPixel & 0x7fff)) return column;
+ }
+ break;
+
+ default:
+ if (memcmp(aPtr, bPtr, pixel_size) != 0) return column;
+ break;
+ }
+
+ aPtr += pixel_size;
+ bPtr += pixel_size;
+ }
+
+ // If we didn't find a difference, return the width of the image
+ return column;
+}
+
int random_log_in_range(int minV, int maxV, MTdata d)
{
double v = log2(((double)genrand_int32(d) / (double)0xffffffff) + 1);
@@ -554,8 +596,8 @@ struct AddressingTable
{
AddressingTable()
{
- ct_assert((CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6));
- ct_assert(CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2);
+ static_assert(CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6, "");
+ static_assert(CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2, "");
mTable[CL_ADDRESS_NONE - CL_ADDRESS_NONE]
[CL_FILTER_NEAREST - CL_FILTER_NEAREST] = NoAddressFn;
@@ -649,9 +691,6 @@ int has_alpha(const cl_image_format *format)
_b ^= _a; \
_a ^= _b; \
} while (0)
-#ifndef MAX
-#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
-#endif
void get_max_sizes(
size_t *numberOfSizes, const int maxNumberOfSizes, size_t sizes[][3],
@@ -719,7 +758,7 @@ void get_max_sizes(
if (usingMaxPixelSizeBuffer || raw_pixel_size == 12) raw_pixel_size = 16;
size_t max_pixels = (size_t)maxAllocSize / raw_pixel_size;
- log_info("Maximums: [%ld x %ld x %ld], raw pixel size %lu bytes, "
+ log_info("Maximums: [%zu x %zu x %zu], raw pixel size %zu bytes, "
"per-allocation limit %gMB.\n",
maxWidth, maxHeight, isArray ? maxArraySize : maxDepth,
raw_pixel_size, (maxAllocSize / (1024.0 * 1024.0)));
@@ -760,10 +799,10 @@ void get_max_sizes(
if (image_type == CL_MEM_OBJECT_IMAGE1D)
{
- double M = maximum_sizes[0];
+ size_t M = maximum_sizes[0];
// Store the size
- sizes[(*numberOfSizes)][0] = (size_t)M;
+ sizes[(*numberOfSizes)][0] = M;
sizes[(*numberOfSizes)][1] = 1;
sizes[(*numberOfSizes)][2] = 1;
++(*numberOfSizes);
@@ -777,17 +816,17 @@ void get_max_sizes(
{
// Determine the size of the fixed dimension
- double M = maximum_sizes[fixed_dim];
- double A = max_pixels;
+ size_t M = maximum_sizes[fixed_dim];
+ size_t A = max_pixels;
int x0_dim = !fixed_dim;
- double x0 =
+ size_t x0 = static_cast<size_t>(
fmin(fmin(other_sizes[(other_size++) % num_other_sizes], A / M),
- maximum_sizes[x0_dim]);
+ maximum_sizes[x0_dim]));
// Store the size
- sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
- sizes[(*numberOfSizes)][x0_dim] = (size_t)x0;
+ sizes[(*numberOfSizes)][fixed_dim] = M;
+ sizes[(*numberOfSizes)][x0_dim] = x0;
sizes[(*numberOfSizes)][2] = 1;
++(*numberOfSizes);
}
@@ -802,16 +841,17 @@ void get_max_sizes(
{
// Determine the size of the fixed dimension
- double M = maximum_sizes[fixed_dim];
- double A = max_pixels;
+ size_t M = maximum_sizes[fixed_dim];
+ size_t A = max_pixels;
// Find two other dimensions, x0 and x1
int x0_dim = (fixed_dim == 0) ? 1 : 0;
int x1_dim = (fixed_dim == 2) ? 1 : 2;
// Choose two other sizes for these dimensions
- double x0 = fmin(fmin(A / M, maximum_sizes[x0_dim]),
- other_sizes[(other_size++) % num_other_sizes]);
+ size_t x0 = static_cast<size_t>(
+ fmin(fmin(A / M, maximum_sizes[x0_dim]),
+ other_sizes[(other_size++) % num_other_sizes]));
// GPUs have certain restrictions on minimum width (row alignment)
// of images which has given us issues testing small widths in this
// test (say we set width to 3 for testing, and compute size based
@@ -820,8 +860,9 @@ void get_max_sizes(
// width of 16 which doesnt fit in vram). For this purpose we are
// not testing width < 16 for this test.
if (x0_dim == 0 && x0 < 16) x0 = 16;
- double x1 = fmin(fmin(A / M / x0, maximum_sizes[x1_dim]),
- other_sizes[(other_size++) % num_other_sizes]);
+ size_t x1 = static_cast<size_t>(
+ fmin(fmin(A / M / x0, maximum_sizes[x1_dim]),
+ other_sizes[(other_size++) % num_other_sizes]));
// Valid image sizes cannot be below 1. Due to the workaround for
// the xo_dim where x0 is overidden to 16 there might not be enough
@@ -834,9 +875,9 @@ void get_max_sizes(
assert(x0 > 0 && M > 0);
// Store the size
- sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
- sizes[(*numberOfSizes)][x0_dim] = (size_t)x0;
- sizes[(*numberOfSizes)][x1_dim] = (size_t)x1;
+ sizes[(*numberOfSizes)][fixed_dim] = M;
+ sizes[(*numberOfSizes)][x0_dim] = x0;
+ sizes[(*numberOfSizes)][x1_dim] = x1;
++(*numberOfSizes);
}
}
@@ -847,20 +888,20 @@ void get_max_sizes(
switch (image_type)
{
case CL_MEM_OBJECT_IMAGE1D:
- log_info(" size[%d] = [%ld] (%g MB image)\n", j, sizes[j][0],
+ log_info(" size[%d] = [%zu] (%g MB image)\n", j, sizes[j][0],
raw_pixel_size * sizes[j][0] * sizes[j][1]
* sizes[j][2] / (1024.0 * 1024.0));
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
case CL_MEM_OBJECT_IMAGE2D:
- log_info(" size[%d] = [%ld %ld] (%g MB image)\n", j,
+ log_info(" size[%d] = [%zu %zu] (%g MB image)\n", j,
sizes[j][0], sizes[j][1],
raw_pixel_size * sizes[j][0] * sizes[j][1]
* sizes[j][2] / (1024.0 * 1024.0));
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
case CL_MEM_OBJECT_IMAGE3D:
- log_info(" size[%d] = [%ld %ld %ld] (%g MB image)\n", j,
+ log_info(" size[%d] = [%zu %zu %zu] (%g MB image)\n", j,
sizes[j][0], sizes[j][1], sizes[j][2],
raw_pixel_size * sizes[j][0] * sizes[j][1]
* sizes[j][2] / (1024.0 * 1024.0));
@@ -884,6 +925,8 @@ float get_max_absolute_error(const cl_image_format *format,
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE: return 0x1.0p-14f;
#endif
+ case CL_UNORM_SHORT_555:
+ case CL_UNORM_SHORT_565: return 1.0f / 31.0f;
default: return 0.0f;
}
}
@@ -1124,12 +1167,13 @@ void escape_inf_nan_values(char *data, size_t allocSize)
char *generate_random_image_data(image_descriptor *imageInfo,
BufferOwningPtr<char> &P, MTdata d)
{
- size_t allocSize = get_image_size(imageInfo);
+ size_t allocSize = static_cast<size_t>(get_image_size(imageInfo));
size_t pixelRowBytes = imageInfo->width * get_pixel_size(imageInfo->format);
size_t i;
if (imageInfo->num_mip_levels > 1)
- allocSize = compute_mipmapped_image_size(*imageInfo);
+ allocSize =
+ static_cast<size_t>(compute_mipmapped_image_size(*imageInfo));
#if defined(__APPLE__)
char *data = NULL;
@@ -1161,7 +1205,7 @@ char *generate_random_image_data(image_descriptor *imageInfo,
if (data == NULL)
{
- log_error("ERROR: Unable to malloc %lu bytes for "
+ log_error("ERROR: Unable to malloc %zu bytes for "
"generate_random_image_data\n",
allocSize);
return 0;
@@ -1678,24 +1722,26 @@ bool get_integer_coords_offset(float x, float y, float z, float xAddressOffset,
// At this point, we're dealing with non-normalized coordinates.
- outX = adFn(floorf(x), width);
+ outX = adFn(static_cast<int>(floorf(x)), width);
// 1D and 2D arrays require special care for the index coordinate:
switch (imageInfo->type)
{
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
- outY = calculate_array_index(y, (float)imageInfo->arraySize - 1.0f);
- outZ = 0.0f; /* don't care! */
+ outY = static_cast<int>(
+ calculate_array_index(y, (float)imageInfo->arraySize - 1.0f));
+ outZ = 0; /* don't care! */
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
- outY = adFn(floorf(y), height);
- outZ = calculate_array_index(z, (float)imageInfo->arraySize - 1.0f);
+ outY = adFn(static_cast<int>(floorf(y)), height);
+ outZ = static_cast<int>(
+ calculate_array_index(z, (float)imageInfo->arraySize - 1.0f));
break;
default:
// legacy path:
- if (height != 0) outY = adFn(floorf(y), height);
- if (depth != 0) outZ = adFn(floorf(z), depth);
+ if (height != 0) outY = adFn(static_cast<int>(floorf(y)), height);
+ if (depth != 0) outZ = adFn(static_cast<int>(floorf(z)), depth);
}
return !((int)refX == outX && (int)refY == outY && (int)refZ == outZ);
@@ -1766,7 +1812,7 @@ static float unnormalize_coordinate(const char *name, float coord, float offset,
switch (addressing_mode)
{
case CL_ADDRESS_REPEAT:
- ret = RepeatNormalizedAddressFn(coord, extent);
+ ret = RepeatNormalizedAddressFn(coord, static_cast<size_t>(extent));
if (verbose)
{
@@ -1790,7 +1836,8 @@ static float unnormalize_coordinate(const char *name, float coord, float offset,
break;
case CL_ADDRESS_MIRRORED_REPEAT:
- ret = MirroredRepeatNormalizedAddressFn(coord, extent);
+ ret = MirroredRepeatNormalizedAddressFn(
+ coord, static_cast<size_t>(extent));
if (verbose)
{
@@ -1948,7 +1995,7 @@ FloatPixel sample_image_pixel_float_offset(
break;
case CL_MEM_OBJECT_IMAGE1D:
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
- log_info("Starting coordinate: %f\b", x);
+ log_info("Starting coordinate: %f\n", x);
break;
case CL_MEM_OBJECT_IMAGE2D:
log_info("Starting coordinate: %f, %f\n", x, y);
@@ -1968,13 +2015,13 @@ FloatPixel sample_image_pixel_float_offset(
// coordinates. Note that the array cases again require special
// care, per section 8.4 in the OpenCL 1.2 Specification.
- ix = adFn(floorf(x), width_lod);
+ ix = adFn(static_cast<int>(floorf(x)), width_lod);
switch (imageInfo->type)
{
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
- iy =
- calculate_array_index(y, (float)(imageInfo->arraySize - 1));
+ iy = static_cast<int>(calculate_array_index(
+ y, (float)(imageInfo->arraySize - 1)));
iz = 0;
if (verbose)
{
@@ -1982,18 +2029,18 @@ FloatPixel sample_image_pixel_float_offset(
}
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
- iy = adFn(floorf(y), height_lod);
- iz =
- calculate_array_index(z, (float)(imageInfo->arraySize - 1));
+ iy = adFn(static_cast<int>(floorf(y)), height_lod);
+ iz = static_cast<int>(calculate_array_index(
+ z, (float)(imageInfo->arraySize - 1)));
if (verbose)
{
log_info("\tArray index %f evaluates to %d\n", z, iz);
}
break;
default:
- iy = adFn(floorf(y), height_lod);
+ iy = adFn(static_cast<int>(floorf(y)), height_lod);
if (depth_lod != 0)
- iz = adFn(floorf(z), depth_lod);
+ iz = adFn(static_cast<int>(floorf(z)), depth_lod);
else
iz = 0;
}
@@ -2047,16 +2094,16 @@ FloatPixel sample_image_pixel_float_offset(
height = 1;
}
- int x1 = adFn(floorf(x - 0.5f), width);
+ int x1 = adFn(static_cast<int>(floorf(x - 0.5f)), width);
int y1 = 0;
- int x2 = adFn(floorf(x - 0.5f) + 1, width);
+ int x2 = adFn(static_cast<int>(floorf(x - 0.5f) + 1), width);
int y2 = 0;
if ((imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
&& (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
&& (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_BUFFER))
{
- y1 = adFn(floorf(y - 0.5f), height);
- y2 = adFn(floorf(y - 0.5f) + 1, height);
+ y1 = adFn(static_cast<int>(floorf(y - 0.5f)), height);
+ y2 = adFn(static_cast<int>(floorf(y - 0.5f) + 1), height);
}
else
{
@@ -2147,12 +2194,12 @@ FloatPixel sample_image_pixel_float_offset(
else
{
// 3D linear filtering
- int x1 = adFn(floorf(x - 0.5f), width_lod);
- int y1 = adFn(floorf(y - 0.5f), height_lod);
- int z1 = adFn(floorf(z - 0.5f), depth_lod);
- int x2 = adFn(floorf(x - 0.5f) + 1, width_lod);
- int y2 = adFn(floorf(y - 0.5f) + 1, height_lod);
- int z2 = adFn(floorf(z - 0.5f) + 1, depth_lod);
+ int x1 = adFn(static_cast<int>(floorf(x - 0.5f)), width_lod);
+ int y1 = adFn(static_cast<int>(floorf(y - 0.5f)), height_lod);
+ int z1 = adFn(static_cast<int>(floorf(z - 0.5f)), depth_lod);
+ int x2 = adFn(static_cast<int>(floorf(x - 0.5f) + 1), width_lod);
+ int y2 = adFn(static_cast<int>(floorf(y - 0.5f) + 1), height_lod);
+ int z2 = adFn(static_cast<int>(floorf(z - 0.5f) + 1), depth_lod);
if (verbose)
log_info("\tActual integer coords used (i = floor(x-.5)): "
@@ -2580,11 +2627,11 @@ void pack_image_pixel(int *srcVector, const cl_image_format *imageFormat,
}
}
-int round_to_even(float v)
+cl_int round_to_even(float v)
{
// clamp overflow
- if (v >= -(float)INT_MIN) return INT_MAX;
- if (v <= (float)INT_MIN) return INT_MIN;
+ if (v >= -(float)CL_INT_MIN) return CL_INT_MAX;
+ if (v <= (float)CL_INT_MIN) return CL_INT_MIN;
// round fractional values to integer value
if (fabsf(v) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23))
@@ -2596,7 +2643,7 @@ int round_to_even(float v)
v -= magicVal;
}
- return (int)v;
+ return (cl_int)v;
}
void pack_image_pixel(float *srcVector, const cl_image_format *imageFormat,
@@ -2721,10 +2768,7 @@ void pack_image_pixel(float *srcVector, const cl_image_format *imageFormat,
case CL_SIGNED_INT32: {
cl_int *ptr = (cl_int *)outData;
for (unsigned int i = 0; i < channelCount; i++)
- ptr[i] = (int)CONVERT_INT(
- srcVector[i], MAKE_HEX_FLOAT(-0x1.0p31f, -1, 31),
- MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffe, 30 - 23),
- CL_INT_MAX);
+ ptr[i] = round_to_even(srcVector[i]);
break;
}
case CL_UNSIGNED_INT8: {
@@ -2888,26 +2932,25 @@ void pack_image_pixel_error(const float *srcVector,
case CL_SIGNED_INT32: {
const cl_int *ptr = (const cl_int *)results;
for (unsigned int i = 0; i < channelCount; i++)
- errors[i] = (cl_float)(
- (cl_long)ptr[i]
- - (cl_long)CONVERT_INT(
- srcVector[i], MAKE_HEX_FLOAT(-0x1.0p31f, -1, 31),
- MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffe, 30 - 23),
- CL_INT_MAX));
+ errors[i] = (cl_float)((cl_long)ptr[i]
+ - (cl_long)round_to_even(srcVector[i]));
break;
}
case CL_UNSIGNED_INT8: {
const cl_uchar *ptr = (const cl_uchar *)results;
for (unsigned int i = 0; i < channelCount; i++)
- errors[i] = (cl_int)ptr[i]
- - (cl_int)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX);
+ errors[i] = static_cast<float>(
+ (cl_int)ptr[i]
+ - (cl_int)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX));
break;
}
case CL_UNSIGNED_INT16: {
const cl_ushort *ptr = (const cl_ushort *)results;
for (unsigned int i = 0; i < channelCount; i++)
- errors[i] = (cl_int)ptr[i]
- - (cl_int)CONVERT_UINT(srcVector[i], 32767.f, CL_USHRT_MAX);
+ errors[i] = static_cast<float>(
+ (cl_int)ptr[i]
+ - (cl_int)CONVERT_UINT(srcVector[i], 32767.f,
+ CL_USHRT_MAX));
break;
}
case CL_UNSIGNED_INT32: {
@@ -3228,7 +3271,7 @@ char *create_random_image_data(ExplicitType dataType,
if (data == NULL)
{
log_error(
- "ERROR: Unable to malloc %lu bytes for create_random_image_data\n",
+ "ERROR: Unable to malloc %zu bytes for create_random_image_data\n",
allocSize);
return NULL;
}
@@ -3988,7 +4031,8 @@ bool is_image_format_required(cl_image_format format, cl_mem_flags flags,
cl_uint compute_max_mip_levels(size_t width, size_t height, size_t depth)
{
- cl_uint retMaxMipLevels = 0, max_dim = 0;
+ cl_uint retMaxMipLevels = 0;
+ size_t max_dim = 0;
max_dim = width;
max_dim = height > max_dim ? height : max_dim;
diff --git a/test_common/harness/imageHelpers.h b/test_common/harness/imageHelpers.h
index 848ec655..f8ae4fb9 100644
--- a/test_common/harness/imageHelpers.h
+++ b/test_common/harness/imageHelpers.h
@@ -63,7 +63,7 @@ typedef struct
bool normalized_coords;
} image_sampler_data;
-int round_to_even(float v);
+cl_int round_to_even(float v);
#define NORMALIZE(v, max) (v < 0 ? 0 : (v > 1.f ? max : round_to_even(v * max)))
#define NORMALIZE_UNROUNDED(v, max) (v < 0 ? 0 : (v > 1.f ? max : v * max))
@@ -139,6 +139,9 @@ void print_first_pixel_difference_error(size_t where, const char *sourcePixel,
image_descriptor *imageInfo, size_t y,
size_t thirdDim);
+size_t compare_scanlines(const image_descriptor *imageInfo, const char *aPtr,
+ const char *bPtr);
+
void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
size_t sizes[][3], size_t maxWidth, size_t maxHeight,
size_t maxDepth, size_t maxArraySize,
@@ -479,6 +482,13 @@ void read_image_pixel(void *imageData, image_descriptor *imageInfo, int x,
outData[2] = tempData[3];
outData[3] = tempData[0];
}
+ else if (format->image_channel_order == CL_ABGR)
+ {
+ outData[0] = tempData[3];
+ outData[1] = tempData[2];
+ outData[2] = tempData[1];
+ outData[3] = tempData[0];
+ }
else if ((format->image_channel_order == CL_BGRA)
|| (format->image_channel_order == CL_sBGRA))
{
diff --git a/test_common/harness/integer_ops_test_info.h b/test_common/harness/integer_ops_test_info.h
new file mode 100644
index 00000000..ad7b303b
--- /dev/null
+++ b/test_common/harness/integer_ops_test_info.h
@@ -0,0 +1,92 @@
+//
+// Copyright (c) 2021 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef INTEGER_OPS_TEST_INFO_H
+#define INTEGER_OPS_TEST_INFO_H
+
+#include "conversions.h"
+#include "testHarness.h"
+
+// TODO: expand usage to other tests.
+
+template <typename T> struct TestInfo
+{
+};
+template <> struct TestInfo<cl_char>
+{
+ static const ExplicitType explicitType = kChar;
+ static constexpr const char* deviceTypeName = "char";
+ static constexpr const char* deviceTypeNameSigned = "char";
+ static constexpr const char* deviceTypeNameUnsigned = "uchar";
+};
+template <> struct TestInfo<cl_uchar>
+{
+ static const ExplicitType explicitType = kUChar;
+ static constexpr const char* deviceTypeName = "uchar";
+ static constexpr const char* deviceTypeNameSigned = "char";
+ static constexpr const char* deviceTypeNameUnsigned = "uchar";
+};
+template <> struct TestInfo<cl_short>
+{
+ static const ExplicitType explicitType = kShort;
+ static constexpr const char* deviceTypeName = "short";
+ static constexpr const char* deviceTypeNameSigned = "short";
+ static constexpr const char* deviceTypeNameUnsigned = "ushort";
+};
+template <> struct TestInfo<cl_ushort>
+{
+ static const ExplicitType explicitType = kUShort;
+ static constexpr const char* deviceTypeName = "ushort";
+ static constexpr const char* deviceTypeNameSigned = "short";
+ static constexpr const char* deviceTypeNameUnsigned = "ushort";
+};
+template <> struct TestInfo<cl_int>
+{
+ static const ExplicitType explicitType = kInt;
+ static constexpr const char* deviceTypeName = "int";
+ static constexpr const char* deviceTypeNameSigned = "int";
+ static constexpr const char* deviceTypeNameUnsigned = "uint";
+};
+template <> struct TestInfo<cl_uint>
+{
+ static const ExplicitType explicitType = kUInt;
+ static constexpr const char* deviceTypeName = "uint";
+ static constexpr const char* deviceTypeNameSigned = "int";
+ static constexpr const char* deviceTypeNameUnsigned = "uint";
+};
+template <> struct TestInfo<cl_long>
+{
+ static const ExplicitType explicitType = kLong;
+ static constexpr const char* deviceTypeName = "long";
+ static constexpr const char* deviceTypeNameSigned = "long";
+ static constexpr const char* deviceTypeNameUnsigned = "ulong";
+};
+template <> struct TestInfo<cl_ulong>
+{
+ static const ExplicitType explicitType = kULong;
+ static constexpr const char* deviceTypeName = "ulong";
+ static constexpr const char* deviceTypeNameSigned = "long";
+ static constexpr const char* deviceTypeNameUnsigned = "ulong";
+};
+
+template <typename T>
+static void fill_vector_with_random_data(std::vector<T>& v)
+{
+ MTdataHolder d(gRandomSeed);
+ generate_random_data(TestInfo<T>::explicitType, v.size(), d, v.data());
+}
+
+#endif /* INTEGER_OPS_TEST_INFO_H */
diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp
index 95b9555e..13ebcbc9 100644
--- a/test_common/harness/kernelHelpers.cpp
+++ b/test_common/harness/kernelHelpers.cpp
@@ -530,7 +530,7 @@ static int get_offline_compiler_output(
sourceFilename, outputFilename);
if (error != CL_SUCCESS) return error;
- // read output file
+ // open output file for reading
ifs.open(outputFilename.c_str(), std::ios::binary);
if (!ifs.good())
{
@@ -540,6 +540,26 @@ static int get_offline_compiler_output(
}
}
}
+
+ if (compilationMode == kSpir_v && !gDisableSPIRVValidation)
+ {
+ std::string runString = gSPIRVValidator + " " + outputFilename;
+
+ int returnCode = system(runString.c_str());
+ if (returnCode == -1)
+ {
+ log_error("Error: failed to invoke SPIR-V validator\n");
+ return CL_COMPILE_PROGRAM_FAILURE;
+ }
+ else if (returnCode != 0)
+ {
+ log_error(
+ "Failed to validate SPIR-V file %s: system() returned 0x%x\n",
+ outputFilename.c_str(), returnCode);
+ return CL_COMPILE_PROGRAM_FAILURE;
+ }
+ }
+
return CL_SUCCESS;
}
@@ -579,7 +599,7 @@ static int create_single_kernel_helper_create_program_offline(
if (error != CL_SUCCESS) return error;
ifs.seekg(0, ifs.end);
- int length = ifs.tellg();
+ size_t length = static_cast<size_t>(ifs.tellg());
ifs.seekg(0, ifs.beg);
// treat modifiedProgram as input for clCreateProgramWithBinary
@@ -1226,7 +1246,7 @@ int is_image_format_supported(cl_context context, cl_mem_flags flags,
list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
if (NULL == list)
{
- log_error("Error: unable to allocate %ld byte buffer for image format "
+ log_error("Error: unable to allocate %zu byte buffer for image format "
"list at %s:%d (err = %d)\n",
count * sizeof(cl_image_format), __FILE__, __LINE__, err);
return 0;
@@ -1641,8 +1661,10 @@ Version get_device_latest_cl_c_version(cl_device_id device)
Version max_supported_cl_c_version{};
for (const auto &name_version : name_versions)
{
- Version current_version{ CL_VERSION_MAJOR(name_version.version),
- CL_VERSION_MINOR(name_version.version) };
+ Version current_version{
+ static_cast<int>(CL_VERSION_MAJOR(name_version.version)),
+ static_cast<int>(CL_VERSION_MINOR(name_version.version))
+ };
max_supported_cl_c_version =
(current_version > max_supported_cl_c_version)
? current_version
@@ -1687,7 +1709,7 @@ Version get_max_OpenCL_C_for_context(cl_context context)
else
{
current_version =
- (std::min)(device_version, current_version);
+ std::min(device_version, current_version);
}
});
return current_version;
@@ -1725,8 +1747,10 @@ bool device_supports_cl_c_version(cl_device_id device, Version version)
for (const auto &name_version : name_versions)
{
- Version current_version{ CL_VERSION_MAJOR(name_version.version),
- CL_VERSION_MINOR(name_version.version) };
+ Version current_version{
+ static_cast<int>(CL_VERSION_MAJOR(name_version.version)),
+ static_cast<int>(CL_VERSION_MINOR(name_version.version))
+ };
if (current_version == version)
{
return true;
diff --git a/test_common/harness/mt19937.cpp b/test_common/harness/mt19937.cpp
index c32d9bac..f5665deb 100644
--- a/test_common/harness/mt19937.cpp
+++ b/test_common/harness/mt19937.cpp
@@ -277,3 +277,5 @@ double genrand_res53(MTdata d)
unsigned long a = genrand_int32(d) >> 5, b = genrand_int32(d) >> 6;
return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
}
+
+bool genrand_bool(MTdata d) { return ((cl_uint)genrand_int32(d) & 1); }
diff --git a/test_common/harness/mt19937.h b/test_common/harness/mt19937.h
index 35c84933..447ca25a 100644
--- a/test_common/harness/mt19937.h
+++ b/test_common/harness/mt19937.h
@@ -90,24 +90,46 @@ double genrand_res53(MTdata /*data*/);
#ifdef __cplusplus
+/* generates a random boolean */
+bool genrand_bool(MTdata /*data*/);
+
#include <cassert>
+#include <utility>
-struct MTdataHolder
-{
- MTdataHolder(cl_uint seed)
+class MTdataHolder {
+public:
+ MTdataHolder() = default;
+ explicit MTdataHolder(cl_uint seed)
{
m_mtdata = init_genrand(seed);
assert(m_mtdata != nullptr);
}
- MTdataHolder(MTdata mtdata): m_mtdata(mtdata) {}
+ // Forbid copy.
+ MTdataHolder(const MTdataHolder&) = delete;
+ MTdataHolder& operator=(const MTdataHolder&) = delete;
+
+ // Support move semantics.
+ MTdataHolder(MTdataHolder&& h) { std::swap(m_mtdata, h.m_mtdata); }
+ MTdataHolder& operator=(MTdataHolder&& h)
+ {
+ std::swap(m_mtdata, h.m_mtdata);
+ return *this;
+ }
- ~MTdataHolder() { free_mtdata(m_mtdata); }
+ ~MTdataHolder()
+ {
+ if (m_mtdata) free_mtdata(m_mtdata);
+ }
- operator MTdata() const { return m_mtdata; }
+ operator MTdata() const
+ {
+ assert(m_mtdata && "Object wasn't initialised");
+ return m_mtdata;
+ }
private:
- MTdata m_mtdata;
+ MTdata m_mtdata = nullptr;
};
#endif // #ifdef __cplusplus
diff --git a/test_common/harness/os_helpers.cpp b/test_common/harness/os_helpers.cpp
index cd350cf8..8fc91108 100644
--- a/test_common/harness/os_helpers.cpp
+++ b/test_common/harness/os_helpers.cpp
@@ -333,9 +333,6 @@ std::string exe_dir()
#include <windows.h>
-#if defined(max)
-#undef max
-#endif
#include <cctype>
#include <algorithm>
@@ -404,7 +401,8 @@ std::string exe_path()
for (;;)
{
- DWORD len = GetModuleFileNameA(NULL, &path.front(), path.size());
+ DWORD len = GetModuleFileNameA(NULL, &path.front(),
+ static_cast<DWORD>(path.size()));
if (len == 0)
{
diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp
index b2ab5b02..e946d744 100644
--- a/test_common/harness/parseParameters.cpp
+++ b/test_common/harness/parseParameters.cpp
@@ -28,11 +28,14 @@
using namespace std;
#define DEFAULT_COMPILATION_PROGRAM "cl_offline_compiler"
+#define DEFAULT_SPIRV_VALIDATOR "spirv-val"
CompilationMode gCompilationMode = kOnline;
CompilationCacheMode gCompilationCacheMode = kCacheModeCompileIfAbsent;
std::string gCompilationCachePath = ".";
std::string gCompilationProgram = DEFAULT_COMPILATION_PROGRAM;
+bool gDisableSPIRVValidation = false;
+std::string gSPIRVValidator = DEFAULT_SPIRV_VALIDATOR;
void helpInfo()
{
@@ -62,7 +65,14 @@ For offline compilation (binary and spir-v modes) only:
Path for offline compiler output and CL source
--compilation-program <prog>
Program to use for offline compilation, defaults to:
- )" DEFAULT_COMPILATION_PROGRAM "\n\n");
+ )" DEFAULT_COMPILATION_PROGRAM R"(
+
+For spir-v mode only:
+ --disable-spirv-validation
+ Disable validation of SPIR-V using the SPIR-V validator
+ --spirv-validator
+ Path for SPIR-V validator, defaults to )" DEFAULT_SPIRV_VALIDATOR "\n"
+ "\n");
}
int parseCustomParam(int argc, const char *argv[], const char *ignore)
@@ -198,6 +208,26 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore)
return -1;
}
}
+ else if (!strcmp(argv[i], "--disable-spirv-validation"))
+ {
+ delArg++;
+ gDisableSPIRVValidation = true;
+ }
+ else if (!strcmp(argv[i], "--spirv-validator"))
+ {
+ delArg++;
+ if ((i + 1) < argc)
+ {
+ delArg++;
+ gSPIRVValidator = argv[i + 1];
+ }
+ else
+ {
+ log_error("Program argument for --spirv-validator was not "
+ "specified.\n");
+ return -1;
+ }
+ }
// cleaning parameters from argv tab
for (int j = i; j < argc - delArg; j++) argv[j] = argv[j + delArg];
diff --git a/test_common/harness/parseParameters.h b/test_common/harness/parseParameters.h
index b0f8328a..437e12f9 100644
--- a/test_common/harness/parseParameters.h
+++ b/test_common/harness/parseParameters.h
@@ -38,6 +38,8 @@ extern CompilationMode gCompilationMode;
extern CompilationCacheMode gCompilationCacheMode;
extern std::string gCompilationCachePath;
extern std::string gCompilationProgram;
+extern bool gDisableSPIRVValidation;
+extern std::string gSPIRVValidator;
extern int parseCustomParam(int argc, const char *argv[],
const char *ignore = 0);
diff --git a/test_common/harness/propertyHelpers.cpp b/test_common/harness/propertyHelpers.cpp
index 3157ca80..6a10c076 100644
--- a/test_common/harness/propertyHelpers.cpp
+++ b/test_common/harness/propertyHelpers.cpp
@@ -19,6 +19,7 @@
#include <assert.h>
#include <algorithm>
+#include <cinttypes>
#include <vector>
static bool findProperty(const std::vector<cl_properties>& props,
@@ -97,14 +98,15 @@ int compareProperties(const std::vector<cl_properties>& queried,
if (!found)
{
- log_error("ERROR: expected property 0x%x not found!\n",
+ log_error("ERROR: expected property 0x%" PRIx64 " not found!\n",
check_prop);
return TEST_FAIL;
}
else if (check_value != queried_value)
{
- log_error("ERROR: mis-matched value for property 0x%x: wanted "
- "0x%x, got 0x%x\n",
+ log_error("ERROR: mis-matched value for property 0x%" PRIx64
+ ": wanted "
+ "0x%" PRIx64 ", got 0x%" PRIx64 "\n",
check_prop, check_value, queried_value);
return TEST_FAIL;
}
@@ -113,7 +115,7 @@ int compareProperties(const std::vector<cl_properties>& queried,
if (queried.size() > check.size())
{
log_error("ERROR: all properties found but there are extra "
- "properties: expected %d, got %d.\n",
+ "properties: expected %zu, got %zu.\n",
check.size(), queried.size());
return TEST_FAIL;
}
diff --git a/test_common/harness/rounding_mode.cpp b/test_common/harness/rounding_mode.cpp
index 681ccdd8..1f531478 100644
--- a/test_common/harness/rounding_mode.cpp
+++ b/test_common/harness/rounding_mode.cpp
@@ -48,7 +48,7 @@ RoundingMode set_round(RoundingMode r, Type outType)
const int *p = int_rounds;
if (outType == kfloat || outType == kdouble) p = flt_rounds;
- int fpscr = 0;
+ int64_t fpscr = 0;
RoundingMode oldRound = get_round();
_FPU_GETCW(fpscr);
@@ -59,7 +59,7 @@ RoundingMode set_round(RoundingMode r, Type outType)
RoundingMode get_round(void)
{
- int fpscr;
+ int64_t fpscr;
int oldRound;
_FPU_GETCW(fpscr);
@@ -203,13 +203,13 @@ void *FlushToZero(void)
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
union {
- int i;
+ unsigned int i;
void *p;
} u = { _mm_getcsr() };
_mm_setcsr(u.i | 0x8040);
return u.p;
#elif defined(__arm__) || defined(__aarch64__)
- int fpscr;
+ int64_t fpscr;
_FPU_GETCW(fpscr);
_FPU_SETCW(fpscr | FPSCR_FZ);
return NULL;
@@ -239,7 +239,7 @@ void UnFlushToZero(void *p)
} u = { p };
_mm_setcsr(u.i);
#elif defined(__arm__) || defined(__aarch64__)
- int fpscr;
+ int64_t fpscr;
_FPU_GETCW(fpscr);
_FPU_SETCW(fpscr & ~FPSCR_FZ);
#elif defined(__PPC__)
diff --git a/test_common/harness/rounding_mode.h b/test_common/harness/rounding_mode.h
index 064a3a63..6f52f0a0 100644
--- a/test_common/harness/rounding_mode.h
+++ b/test_common/harness/rounding_mode.h
@@ -16,8 +16,6 @@
#ifndef __ROUNDING_MODE_H__
#define __ROUNDING_MODE_H__
-#pragma STDC FENV_ACCESS ON
-
#include "compat.h"
#if (defined(_WIN32) && defined(_MSC_VER))
diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp
index 1aec3d07..a309f53d 100644
--- a/test_common/harness/testHarness.cpp
+++ b/test_common/harness/testHarness.cpp
@@ -60,6 +60,54 @@ bool gCoreILProgram = true;
#define DEFAULT_NUM_ELEMENTS 0x4000
+static int saveResultsToJson(const char *suiteName, test_definition testList[],
+ unsigned char selectedTestList[],
+ test_status resultTestList[], int testNum)
+{
+ char *fileName = getenv("CL_CONFORMANCE_RESULTS_FILENAME");
+ if (fileName == nullptr)
+ {
+ return EXIT_SUCCESS;
+ }
+
+ FILE *file = fopen(fileName, "w");
+ if (NULL == file)
+ {
+ log_error("ERROR: Failed to open '%s' for writing results.\n",
+ fileName);
+ return EXIT_FAILURE;
+ }
+
+ const char *save_map[] = { "success", "failure" };
+ const char *result_map[] = { "pass", "fail", "skip" };
+ const char *linebreak[] = { "", ",\n" };
+ int add_linebreak = 0;
+
+ fprintf(file, "{\n");
+ fprintf(file, "\t\"cmd\": \"%s\",\n", suiteName);
+ fprintf(file, "\t\"results\": {\n");
+
+ for (int i = 0; i < testNum; ++i)
+ {
+ if (selectedTestList[i])
+ {
+ fprintf(file, "%s\t\t\"%s\": \"%s\"", linebreak[add_linebreak],
+ testList[i].name, result_map[(int)resultTestList[i]]);
+ add_linebreak = 1;
+ }
+ }
+ fprintf(file, "\n");
+
+ fprintf(file, "\t}\n");
+ fprintf(file, "}\n");
+
+ int ret = fclose(file) ? EXIT_FAILURE : EXIT_SUCCESS;
+
+ log_info("Saving results to %s: %s!\n", fileName, save_map[ret]);
+
+ return ret;
+}
+
int runTestHarness(int argc, const char *argv[], int testNum,
test_definition testList[], int forceNoContextCreation,
cl_command_queue_properties queueProps)
@@ -68,19 +116,28 @@ int runTestHarness(int argc, const char *argv[], int testNum,
forceNoContextCreation, queueProps, NULL);
}
-int skip_init_info(int count)
+int suite_did_not_pass_init(const char *suiteName, test_status status,
+ int testNum, test_definition testList[])
{
- log_info("Test skipped while initialization\n");
- log_info("SKIPPED %d of %d tests.\n", count, count);
- return EXIT_SUCCESS;
-}
+ std::vector<unsigned char> selectedTestList(testNum, 1);
+ std::vector<test_status> resultTestList(testNum, status);
-int fail_init_info(int count)
-{
- log_info("Test failed while initialization\n");
- log_info("FAILED %d of %d tests.\n", count, count);
- return EXIT_FAILURE;
+ int ret = saveResultsToJson(suiteName, testList, selectedTestList.data(),
+ resultTestList.data(), testNum);
+
+ log_info("Test %s while initialization\n",
+ status == TEST_SKIP ? "skipped" : "failed");
+ log_info("%s %d of %d tests.\n", status == TEST_SKIP ? "SKIPPED" : "FAILED",
+ testNum, testNum);
+
+ if (ret != EXIT_SUCCESS)
+ {
+ return ret;
+ }
+
+ return status == TEST_SKIP ? EXIT_SUCCESS : EXIT_FAILURE;
}
+
void version_expected_info(const char *test_name, const char *api_name,
const char *expected_version,
const char *device_version)
@@ -470,6 +527,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
log_error("Invalid device address bit size returned by device.\n");
return EXIT_FAILURE;
}
+ const char *suiteName = argv[0];
if (gCompilationMode == kSpir_v)
{
test_status spirv_readiness = check_spirv_compilation_readiness(device);
@@ -478,9 +536,15 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
switch (spirv_readiness)
{
case TEST_PASS: break;
- case TEST_FAIL: return fail_init_info(testNum);
- case TEST_SKIP: return skip_init_info(testNum);
- case TEST_SKIPPED_ITSELF: return skip_init_info(testNum);
+ case TEST_FAIL:
+ return suite_did_not_pass_init(suiteName, TEST_FAIL,
+ testNum, testList);
+ case TEST_SKIP:
+ return suite_did_not_pass_init(suiteName, TEST_SKIP,
+ testNum, testList);
+ case TEST_SKIPPED_ITSELF:
+ return suite_did_not_pass_init(suiteName, TEST_SKIP,
+ testNum, testList);
}
}
}
@@ -492,9 +556,15 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
switch (status)
{
case TEST_PASS: break;
- case TEST_FAIL: return fail_init_info(testNum);
- case TEST_SKIP: return skip_init_info(testNum);
- case TEST_SKIPPED_ITSELF: return skip_init_info(testNum);
+ case TEST_FAIL:
+ return suite_did_not_pass_init(suiteName, TEST_FAIL, testNum,
+ testList);
+ case TEST_SKIP:
+ return suite_did_not_pass_init(suiteName, TEST_SKIP, testNum,
+ testList);
+ case TEST_SKIPPED_ITSELF:
+ return suite_did_not_pass_init(suiteName, TEST_SKIP, testNum,
+ testList);
}
}
@@ -574,49 +644,6 @@ static int find_matching_tests(test_definition testList[],
return EXIT_SUCCESS;
}
-static int saveResultsToJson(const char *fileName, const char *suiteName,
- test_definition testList[],
- unsigned char selectedTestList[],
- test_status resultTestList[], int testNum)
-{
- FILE *file = fopen(fileName, "w");
- if (NULL == file)
- {
- log_error("ERROR: Failed to open '%s' for writing results.\n",
- fileName);
- return EXIT_FAILURE;
- }
-
- const char *save_map[] = { "success", "failure" };
- const char *result_map[] = { "pass", "fail", "skip" };
- const char *linebreak[] = { "", ",\n" };
- int add_linebreak = 0;
-
- fprintf(file, "{\n");
- fprintf(file, "\t\"cmd\": \"%s\",\n", suiteName);
- fprintf(file, "\t\"results\": {\n");
-
- for (int i = 0; i < testNum; ++i)
- {
- if (selectedTestList[i])
- {
- fprintf(file, "%s\t\t\"%s\": \"%s\"", linebreak[add_linebreak],
- testList[i].name, result_map[(int)resultTestList[i]]);
- add_linebreak = 1;
- }
- }
- fprintf(file, "\n");
-
- fprintf(file, "\t}\n");
- fprintf(file, "}\n");
-
- int ret = fclose(file) ? 1 : 0;
-
- log_info("Saving results to %s: %s!\n", fileName, save_map[ret]);
-
- return ret;
-}
-
static void print_results(int failed, int count, const char *name)
{
if (count < failed)
@@ -658,7 +685,6 @@ int parseAndCallCommandLineTests(int argc, const char *argv[],
int ret = EXIT_SUCCESS;
unsigned char *selectedTestList = (unsigned char *)calloc(testNum, 1);
- test_status *resultTestList = NULL;
if (argc == 1)
{
@@ -697,24 +723,19 @@ int parseAndCallCommandLineTests(int argc, const char *argv[],
if (ret == EXIT_SUCCESS)
{
- resultTestList =
- (test_status *)calloc(testNum, sizeof(*resultTestList));
+ std::vector<test_status> resultTestList(testNum, TEST_PASS);
- callTestFunctions(testList, selectedTestList, resultTestList, testNum,
- device, forceNoContextCreation, num_elements,
+ callTestFunctions(testList, selectedTestList, resultTestList.data(),
+ testNum, device, forceNoContextCreation, num_elements,
queueProps);
print_results(gFailCount, gTestCount, "sub-test");
print_results(gTestsFailed, gTestsFailed + gTestsPassed, "test");
- char *filename = getenv("CL_CONFORMANCE_RESULTS_FILENAME");
- if (filename != NULL)
- {
- ret = saveResultsToJson(filename, argv[0], testList,
- selectedTestList, resultTestList, testNum);
- }
+ ret = saveResultsToJson(argv[0], testList, selectedTestList,
+ resultTestList.data(), testNum);
- if (std::any_of(resultTestList, resultTestList + testNum,
+ if (std::any_of(resultTestList.begin(), resultTestList.end(),
[](test_status result) {
switch (result)
{
@@ -730,7 +751,6 @@ int parseAndCallCommandLineTests(int argc, const char *argv[],
}
free(selectedTestList);
- free(resultTestList);
return ret;
}
@@ -783,6 +803,14 @@ test_status callSingleTestFunction(test_definition test,
return TEST_SKIP;
}
+ if (!check_functions_for_offline_compiler(test.name))
+ {
+ log_info("Subtest %s tests is not supported in offline compiler "
+ "execution path!\n",
+ test.name);
+ return TEST_SKIP;
+ }
+
/* Create a context to work with, unless we're told not to */
if (!forceNoContextCreation)
{
@@ -812,14 +840,12 @@ test_status callSingleTestFunction(test_definition test,
if (queue == NULL)
{
print_error(error, "Unable to create testing command queue");
+ clReleaseContext(context);
return TEST_FAIL;
}
}
/* Run the test and print the result */
- error = check_functions_for_offline_compiler(test.name, deviceToUse);
- test_missing_support_offline_cmpiler(error, test.name);
-
if (test.func == NULL)
{
// Skip unimplemented test, can happen when all of the tests are
@@ -1172,7 +1198,7 @@ cl_platform_id getPlatformFromDevice(cl_device_id deviceID)
void PrintArch(void)
{
- vlog("sizeof( void*) = %ld\n", sizeof(void *));
+ vlog("sizeof( void*) = %zu\n", sizeof(void *));
#if defined(__ppc__)
vlog("ARCH:\tppc\n");
#elif defined(__ppc64__)
diff --git a/test_common/harness/threadTesting.cpp b/test_common/harness/threadTesting.cpp
deleted file mode 100644
index 875ee59b..00000000
--- a/test_common/harness/threadTesting.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "compat.h"
-#include "threadTesting.h"
-#include "errorHelpers.h"
-#include <stdio.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <pthread.h>
-#endif
-
-#if 0 // Disabed for now
-
-typedef struct
-{
- basefn mFunction;
- cl_device_id mDevice;
- cl_context mContext;
- int mNumElements;
-} TestFnArgs;
-
-////////////////////////////////////////////////////////////////////////////////
-// Thread-based testing. Spawns a new thread to run the given test function,
-// then waits for it to complete. The entire idea is that, if the thread crashes,
-// we can catch it and report it as a failure instead of crashing the entire suite
-////////////////////////////////////////////////////////////////////////////////
-
-void *test_thread_wrapper( void *data )
-{
- TestFnArgs *args;
- int retVal;
- cl_context context;
-
- args = (TestFnArgs *)data;
-
- /* Create a new context to use (contexts can't cross threads) */
- context = clCreateContext(NULL, args->mDeviceGroup);
- if( context == NULL )
- {
- log_error("clCreateContext failed for new thread\n");
- return (void *)(-1);
- }
-
- /* Call function */
- retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements );
-
- clReleaseContext( context );
-
- return (void *)retVal;
-}
-
-int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
- int error;
- pthread_t threadHdl;
- void *retVal;
- TestFnArgs args;
-
-
- args.mFunction = fnToTest;
- args.mDeviceGroup = deviceGroup;
- args.mDevice = device;
- args.mContext = context;
- args.mNumElements = numElements;
-
-
- error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args );
- if( error != 0 )
- {
- log_error( "ERROR: Unable to create thread for testing!\n" );
- return -1;
- }
-
- /* Thread has been started, now just wait for it to complete (or crash) */
- error = pthread_join( threadHdl, &retVal );
- if( error != 0 )
- {
- log_error( "ERROR: Unable to join testing thread!\n" );
- return -1;
- }
-
- return (int)((intptr_t)retVal);
-}
-#endif
diff --git a/test_common/harness/threadTesting.h b/test_common/harness/threadTesting.h
index 765eabcc..2f3c1873 100644
--- a/test_common/harness/threadTesting.h
+++ b/test_common/harness/threadTesting.h
@@ -24,8 +24,5 @@
typedef int (*basefn)(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements);
-extern int test_threaded_function(basefn fnToTest, cl_device_id device,
- cl_context context, cl_command_queue queue,
- int numElements);
-#endif // _threadTesting_h
+#endif // _threadTesting_h \ No newline at end of file
diff --git a/test_common/harness/typeWrappers.h b/test_common/harness/typeWrappers.h
index 9a58a9d2..50c7c938 100644
--- a/test_common/harness/typeWrappers.h
+++ b/test_common/harness/typeWrappers.h
@@ -16,122 +16,134 @@
#ifndef _typeWrappers_h
#define _typeWrappers_h
-#include <stdio.h>
-#include <stdlib.h>
-
#if !defined(_WIN32)
#include <sys/mman.h>
#endif
#include "compat.h"
-#include <stdio.h>
#include "mt19937.h"
#include "errorHelpers.h"
#include "kernelHelpers.h"
-/* cl_context wrapper */
+#include <cstdlib>
+#include <type_traits>
-class clContextWrapper {
-public:
- clContextWrapper() { mContext = NULL; }
- clContextWrapper(cl_context program) { mContext = program; }
- ~clContextWrapper()
- {
- if (mContext != NULL) clReleaseContext(mContext);
- }
+namespace wrapper_details {
+
+// clRetain*() and clRelease*() functions share the same type.
+template <typename T> // T should be cl_context, cl_program, ...
+using RetainReleaseType = cl_int CL_API_CALL(T);
- clContextWrapper &operator=(const cl_context &rhs)
+// A generic wrapper class that follows OpenCL retain/release semantics.
+//
+// This Wrapper class implement copy and move semantics, which makes it
+// compatible with standard containers for example.
+//
+// Template parameters:
+// - T is the cl_* type (e.g. cl_context, cl_program, ...)
+// - Retain is the clRetain* function (e.g. clRetainContext, ...)
+// - Release is the clRelease* function (e.g. clReleaseContext, ...)
+template <typename T, RetainReleaseType<T> Retain, RetainReleaseType<T> Release>
+class Wrapper {
+ static_assert(std::is_pointer<T>::value, "T should be a pointer type.");
+ T object = nullptr;
+
+ void retain()
{
- mContext = rhs;
- return *this;
+ if (!object) return;
+
+ auto err = Retain(object);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "clRetain*() failed");
+ std::abort();
+ }
}
- operator cl_context() const { return mContext; }
- cl_context *operator&() { return &mContext; }
+ void release()
+ {
+ if (!object) return;
- bool operator==(const cl_context &rhs) { return mContext == rhs; }
+ auto err = Release(object);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "clRelease*() failed");
+ std::abort();
+ }
+ }
-protected:
- cl_context mContext;
-};
+public:
+ Wrapper() = default;
-/* cl_program wrapper */
+ // On initialisation, assume the object has a refcount of one.
+ Wrapper(T object): object(object) {}
-class clProgramWrapper {
-public:
- clProgramWrapper() { mProgram = NULL; }
- clProgramWrapper(cl_program program) { mProgram = program; }
- ~clProgramWrapper()
+ // On assignment, assume the object has a refcount of one.
+ Wrapper &operator=(T rhs)
{
- if (mProgram != NULL) clReleaseProgram(mProgram);
+ reset(rhs);
+ return *this;
}
- clProgramWrapper &operator=(const cl_program &rhs)
+ // Copy semantics, increase retain count.
+ Wrapper(Wrapper const &w) { *this = w; }
+ Wrapper &operator=(Wrapper const &w)
{
- mProgram = rhs;
+ reset(w.object);
+ retain();
return *this;
}
- operator cl_program() const { return mProgram; }
-
- cl_program *operator&() { return &mProgram; }
- bool operator==(const cl_program &rhs) { return mProgram == rhs; }
-
-protected:
- cl_program mProgram;
-};
-
-/* cl_kernel wrapper */
-
-class clKernelWrapper {
-public:
- clKernelWrapper() { mKernel = NULL; }
- clKernelWrapper(cl_kernel kernel) { mKernel = kernel; }
- ~clKernelWrapper()
+ // Move semantics, directly take ownership.
+ Wrapper(Wrapper &&w) { *this = std::move(w); }
+ Wrapper &operator=(Wrapper &&w)
{
- if (mKernel != NULL) clReleaseKernel(mKernel);
+ reset(w.object);
+ w.object = nullptr;
+ return *this;
}
- clKernelWrapper &operator=(const cl_kernel &rhs)
+ ~Wrapper() { reset(); }
+
+ // Release the existing object, if any, and own the new one, if any.
+ void reset(T new_object = nullptr)
{
- mKernel = rhs;
- return *this;
+ release();
+ object = new_object;
}
- operator cl_kernel() const { return mKernel; }
- cl_kernel *operator&() { return &mKernel; }
+ operator T() const { return object; }
- bool operator==(const cl_kernel &rhs) { return mKernel == rhs; }
-
-protected:
- cl_kernel mKernel;
+ // Ideally this function should not exist as it breaks encapsulation by
+ // allowing external mutation of the Wrapper internal state. However, too
+ // much code currently relies on this. For example, instead of using T* as
+ // output parameters, existing code can be updated to use Wrapper& instead.
+ T *operator&() { return &object; }
};
-/* cl_mem (stream) wrapper */
+} // namespace wrapper_details
-class clMemWrapper {
-public:
- clMemWrapper() { mMem = NULL; }
- clMemWrapper(cl_mem mem) { mMem = mem; }
- ~clMemWrapper()
- {
- if (mMem != NULL) clReleaseMemObject(mMem);
- }
+using clContextWrapper =
+ wrapper_details::Wrapper<cl_context, clRetainContext, clReleaseContext>;
- clMemWrapper &operator=(const cl_mem &rhs)
- {
- mMem = rhs;
- return *this;
- }
- operator cl_mem() const { return mMem; }
+using clProgramWrapper =
+ wrapper_details::Wrapper<cl_program, clRetainProgram, clReleaseProgram>;
- cl_mem *operator&() { return &mMem; }
+using clKernelWrapper =
+ wrapper_details::Wrapper<cl_kernel, clRetainKernel, clReleaseKernel>;
- bool operator==(const cl_mem &rhs) { return mMem == rhs; }
+using clMemWrapper =
+ wrapper_details::Wrapper<cl_mem, clRetainMemObject, clReleaseMemObject>;
-protected:
- cl_mem mMem;
-};
+using clCommandQueueWrapper =
+ wrapper_details::Wrapper<cl_command_queue, clRetainCommandQueue,
+ clReleaseCommandQueue>;
+
+using clSamplerWrapper =
+ wrapper_details::Wrapper<cl_sampler, clRetainSampler, clReleaseSampler>;
+
+using clEventWrapper =
+ wrapper_details::Wrapper<cl_event, clRetainEvent, clReleaseEvent>;
class clProtectedImage {
public:
@@ -183,92 +195,12 @@ public:
cl_mem *operator&() { return &image; }
- bool operator==(const cl_mem &rhs) { return image == rhs; }
-
protected:
void *backingStore;
size_t backingStoreSize;
cl_mem image;
};
-/* cl_command_queue wrapper */
-class clCommandQueueWrapper {
-public:
- clCommandQueueWrapper() { mMem = NULL; }
- clCommandQueueWrapper(cl_command_queue mem) { mMem = mem; }
- ~clCommandQueueWrapper()
- {
- if (mMem != NULL)
- {
- clReleaseCommandQueue(mMem);
- }
- }
-
- clCommandQueueWrapper &operator=(const cl_command_queue &rhs)
- {
- mMem = rhs;
- return *this;
- }
- operator cl_command_queue() const { return mMem; }
-
- cl_command_queue *operator&() { return &mMem; }
-
- bool operator==(const cl_command_queue &rhs) { return mMem == rhs; }
-
-protected:
- cl_command_queue mMem;
-};
-
-/* cl_sampler wrapper */
-class clSamplerWrapper {
-public:
- clSamplerWrapper() { mMem = NULL; }
- clSamplerWrapper(cl_sampler mem) { mMem = mem; }
- ~clSamplerWrapper()
- {
- if (mMem != NULL) clReleaseSampler(mMem);
- }
-
- clSamplerWrapper &operator=(const cl_sampler &rhs)
- {
- mMem = rhs;
- return *this;
- }
- operator cl_sampler() const { return mMem; }
-
- cl_sampler *operator&() { return &mMem; }
-
- bool operator==(const cl_sampler &rhs) { return mMem == rhs; }
-
-protected:
- cl_sampler mMem;
-};
-
-/* cl_event wrapper */
-class clEventWrapper {
-public:
- clEventWrapper() { mMem = NULL; }
- clEventWrapper(cl_event mem) { mMem = mem; }
- ~clEventWrapper()
- {
- if (mMem != NULL) clReleaseEvent(mMem);
- }
-
- clEventWrapper &operator=(const cl_event &rhs)
- {
- mMem = rhs;
- return *this;
- }
- operator cl_event() const { return mMem; }
-
- cl_event *operator&() { return &mMem; }
-
- bool operator==(const cl_event &rhs) { return mMem == rhs; }
-
-protected:
- cl_event mMem;
-};
-
/* Generic protected memory buffer, for verifying access within bounds */
class clProtectedArray {
public:
diff --git a/test_conformance/CMakeLists.txt b/test_conformance/CMakeLists.txt
index 363ece86..f9514f1e 100644
--- a/test_conformance/CMakeLists.txt
+++ b/test_conformance/CMakeLists.txt
@@ -52,6 +52,7 @@ add_subdirectory( pipes )
add_subdirectory( device_timer )
add_subdirectory( spirv_new )
add_subdirectory( spir )
+add_subdirectory( vulkan )
file(GLOB CSV_FILES "opencl_conformance_tests_*.csv")
diff --git a/test_conformance/SVM/test_byte_granularity.cpp b/test_conformance/SVM/test_byte_granularity.cpp
index 403528b9..6dbb3649 100644
--- a/test_conformance/SVM/test_byte_granularity.cpp
+++ b/test_conformance/SVM/test_byte_granularity.cpp
@@ -58,7 +58,6 @@ int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_qu
cl_uint num_devices = 0;
cl_int err = CL_SUCCESS;
- cl_int rval = CL_SUCCESS;
err = create_cl_objects(deviceID, &byte_manipulation_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
diff --git a/test_conformance/SVM/test_cross_buffer_pointers.cpp b/test_conformance/SVM/test_cross_buffer_pointers.cpp
index c1caebb9..2baa7ad7 100644
--- a/test_conformance/SVM/test_cross_buffer_pointers.cpp
+++ b/test_conformance/SVM/test_cross_buffer_pointers.cpp
@@ -162,7 +162,8 @@ int test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_contex
test_error(error, "clCreateBuffer failed.");
// this buffer holds the index into the nodes buffer that is used for node allocation
- clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
+ clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
+ sizeof(size_t), NULL, &error);
test_error(error, "clCreateBuffer failed.");
// this buffer holds the count of correct nodes which is computed by the verify kernel.
diff --git a/test_conformance/SVM/test_migrate.cpp b/test_conformance/SVM/test_migrate.cpp
index 2a1ce051..f624bcd9 100644
--- a/test_conformance/SVM/test_migrate.cpp
+++ b/test_conformance/SVM/test_migrate.cpp
@@ -78,9 +78,6 @@ int test_svm_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue
cl_uint amem[GLOBAL_SIZE];
cl_uint bmem[GLOBAL_SIZE];
cl_uint cmem[GLOBAL_SIZE];
- cl_uint ramem[GLOBAL_SIZE];
- cl_uint rbmem[GLOBAL_SIZE];
- cl_uint rcmem[GLOBAL_SIZE];
cl_event evs[20];
const size_t global_size = GLOBAL_SIZE;
diff --git a/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp b/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp
index f26981bc..12358167 100644
--- a/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp
+++ b/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp
@@ -98,7 +98,9 @@ cl_int create_linked_lists_on_device(int ci, cl_command_queue cmdq, cl_mem alloc
cl_int error = CL_SUCCESS;
log_info("SVM: creating linked list on device: %d ", ci);
- size_t *pAllocator = (size_t*) clEnqueueMapBuffer(cmdq, allocator, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
+ size_t *pAllocator = (size_t *)clEnqueueMapBuffer(
+ cmdq, allocator, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(size_t),
+ 0, NULL, NULL, &error);
test_error2(error, pAllocator, "clEnqueueMapBuffer failed");
// reset allocator index
*pAllocator = numLists; // the first numLists elements of the nodes array are already allocated (they hold the head of each list).
@@ -206,7 +208,9 @@ int shared_address_space_coarse_grain(cl_device_id deviceID, cl_context context2
}
// this buffer holds an index into the nodes buffer, it is used for node allocation
- clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
+ clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
+ sizeof(size_t), NULL, &error);
+
test_error(error, "clCreateBuffer failed.");
error = clGetMemObjectInfo(allocator, CL_MEM_USES_SVM_POINTER, sizeof(cl_bool), &usesSVMpointer, 0);
diff --git a/test_conformance/SVM/test_shared_address_space_fine_grain.cpp b/test_conformance/SVM/test_shared_address_space_fine_grain.cpp
index a98a880c..3350972e 100644
--- a/test_conformance/SVM/test_shared_address_space_fine_grain.cpp
+++ b/test_conformance/SVM/test_shared_address_space_fine_grain.cpp
@@ -47,7 +47,7 @@ int test_svm_shared_address_space_fine_grain(cl_device_id deviceID, cl_context c
test_error2(error, pNodes, "malloc failed");
// this allocation holds an index into the nodes buffer, it is used for node allocation
- size_t* pAllocator = (size_t*) align_malloc(sizeof(cl_int), 128);
+ size_t *pAllocator = (size_t *)align_malloc(sizeof(size_t), 128);
test_error2(error, pAllocator, "malloc failed");
// this allocation holds the count of correct nodes, which is computed by the verify kernel.
diff --git a/test_conformance/SVM/test_shared_sub_buffers.cpp b/test_conformance/SVM/test_shared_sub_buffers.cpp
index a79484c9..2532886e 100644
--- a/test_conformance/SVM/test_shared_sub_buffers.cpp
+++ b/test_conformance/SVM/test_shared_sub_buffers.cpp
@@ -182,7 +182,8 @@ int test_svm_shared_sub_buffers(cl_device_id deviceID, cl_context context2, cl_c
// this buffer holds the index into the nodes buffer that is used for node allocation
- clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
+ clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
+ sizeof(size_t), NULL, &error);
test_error(error, "clCreateBuffer failed.");
// this buffer holds the count of correct nodes which is computed by the verify kernel.
diff --git a/test_conformance/allocations/allocation_fill.cpp b/test_conformance/allocations/allocation_fill.cpp
index a7558942..b4ea3798 100644
--- a/test_conformance/allocations/allocation_fill.cpp
+++ b/test_conformance/allocations/allocation_fill.cpp
@@ -200,8 +200,10 @@ int fill_image_with_data(cl_context context, cl_device_id device_id, cl_command_
result = clFinish(*queue);
if (result != SUCCEEDED)
{
- print_error(error, "clFinish failed after successful enquing filling buffer with data.");
- return result;
+ print_error(error,
+ "clFinish failed after successful enqueuing filling "
+ "buffer with data.");
+ return result;
}
} else {
error = clEnqueueWriteImage(*queue, mem, CL_FALSE, origin, region, 0, 0, data, 0, NULL, &event);
diff --git a/test_conformance/allocations/allocation_functions.cpp b/test_conformance/allocations/allocation_functions.cpp
index 7182c727..827ee104 100644
--- a/test_conformance/allocations/allocation_functions.cpp
+++ b/test_conformance/allocations/allocation_functions.cpp
@@ -37,8 +37,8 @@ int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t
}
if (size_to_allocate == 0) {
- log_error("Trying to allcoate a zero sized image.\n");
- return FAILED_ABORT;
+ log_error("Trying to allocate a zero sized image.\n");
+ return FAILED_ABORT;
}
error = clGetDeviceInfo( device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( max_width ), &max_width, NULL );
diff --git a/test_conformance/allocations/main.cpp b/test_conformance/allocations/main.cpp
index 0dec4c6d..43e81277 100644
--- a/test_conformance/allocations/main.cpp
+++ b/test_conformance/allocations/main.cpp
@@ -112,6 +112,8 @@ int doTest( cl_device_id device, cl_context context, cl_command_queue queue, All
int number_of_mems_used;
cl_ulong max_individual_allocation_size = g_max_individual_allocation_size;
cl_ulong global_mem_size = g_global_mem_size ;
+ const bool allocate_image =
+ (alloc_type != BUFFER) && (alloc_type != BUFFER_NON_BLOCKING);
static const char* alloc_description[] = {
"buffer(s)",
@@ -123,7 +125,7 @@ int doTest( cl_device_id device, cl_context context, cl_command_queue queue, All
};
// Skip image tests if we don't support images on the device
- if( alloc_type > BUFFER && checkForImageSupport( device ) )
+ if (allocate_image && checkForImageSupport(device))
{
log_info( "Can not test image allocation because device does not support images.\n" );
return 0;
@@ -132,7 +134,7 @@ int doTest( cl_device_id device, cl_context context, cl_command_queue queue, All
// This section was added in order to fix a bug in the test
// If CL_DEVICE_MAX_MEM_ALLOC_SIZE is much grater than CL_DEVICE_IMAGE2D_MAX_WIDTH * CL_DEVICE_IMAGE2D_MAX_HEIGHT
// The test will fail in image allocations as the size requested for the allocation will be much grater than the maximum size allowed for image
- if( ( alloc_type != BUFFER ) && ( alloc_type != BUFFER_NON_BLOCKING ) )
+ if (allocate_image)
{
size_t max_width, max_height;
diff --git a/test_conformance/api/negative_platform.cpp b/test_conformance/api/negative_platform.cpp
index 7d9de5df..861d4748 100644
--- a/test_conformance/api/negative_platform.cpp
+++ b/test_conformance/api/negative_platform.cpp
@@ -42,18 +42,9 @@ int test_negative_get_platform_info(cl_device_id deviceID, cl_context context,
{
cl_platform_id platform = getPlatformFromDevice(deviceID);
- cl_int err =
- clGetPlatformInfo(reinterpret_cast<cl_platform_id>(deviceID),
- CL_PLATFORM_VERSION, sizeof(char*), nullptr, nullptr);
- test_failure_error_ret(
- err, CL_INVALID_PLATFORM,
- "clGetPlatformInfo should return CL_INVALID_PLATFORM when: \"platform "
- "is not a valid platform\" using a valid object which is NOT a "
- "platform",
- TEST_FAIL);
-
constexpr cl_platform_info INVALID_PARAM_VALUE = 0;
- err = clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
+ cl_int err =
+ clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
test_failure_error_ret(
err, CL_INVALID_VALUE,
"clGetPlatformInfo should return CL_INVALID_VALUE when: \"param_name "
diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp
index 9e981cd3..086008d7 100644
--- a/test_conformance/api/test_api_min_max.cpp
+++ b/test_conformance/api/test_api_min_max.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -22,33 +22,32 @@
const char *sample_single_param_kernel[] = {
"__kernel void sample_test(__global int *src)\n"
"{\n"
- " int tid = get_global_id(0);\n"
+ " size_t tid = get_global_id(0);\n"
"\n"
- "}\n" };
+ "}\n"
+};
-const char *sample_single_param_write_kernel[] = {
- "__kernel void sample_test(__global int *src)\n"
- "{\n"
- " int tid = get_global_id(0);\n"
- " src[tid] = tid;\n"
- "\n"
- "}\n" };
const char *sample_read_image_kernel_pattern[] = {
- "__kernel void sample_test( __global float *result, ", " )\n"
+ "__kernel void sample_test( __global float *result, ",
+ " )\n"
"{\n"
- " sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n"
- " int tid = get_global_id(0);\n"
+ " sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | "
+ "CLK_FILTER_NEAREST;\n"
+ " size_t tid = get_global_id(0);\n"
" result[0] = 0.0f;\n",
"\n"
- "}\n" };
+ "}\n"
+};
const char *sample_write_image_kernel_pattern[] = {
- "__kernel void sample_test( ", " )\n"
+ "__kernel void sample_test( ",
+ " )\n"
"{\n"
- " int tid = get_global_id(0);\n",
+ " size_t tid = get_global_id(0);\n",
"\n"
- "}\n" };
+ "}\n"
+};
const char *sample_large_parmam_kernel_pattern[] = {
@@ -57,7 +56,8 @@ const char *sample_large_parmam_kernel_pattern[] = {
"result[0] = 0;\n"
"%s"
"\n"
- "}\n" };
+ "}\n"
+};
const char *sample_large_int_parmam_kernel_pattern[] = {
"__kernel void sample_test(%s, __global int *result)\n"
@@ -65,47 +65,55 @@ const char *sample_large_int_parmam_kernel_pattern[] = {
"result[0] = 0;\n"
"%s"
"\n"
- "}\n" };
+ "}\n"
+};
const char *sample_sampler_kernel_pattern[] = {
- "__kernel void sample_test( read_only image2d_t src, __global int4 *dst", ", sampler_t sampler%d", ")\n"
+ "__kernel void sample_test( read_only image2d_t src, __global int4 *dst",
+ ", sampler_t sampler%d",
+ ")\n"
"{\n"
- " int tid = get_global_id(0);\n",
- " dst[ 0 ] = read_imagei( src, sampler%d, (int2)( 0, 0 ) );\n",
+ " size_t tid = get_global_id(0);\n",
+ " dst[ 0 ] = read_imagei( src, sampler%d, (int2)( 0, 0 ) );\n",
"\n"
- "}\n" };
+ "}\n"
+};
const char *sample_const_arg_kernel[] = {
"__kernel void sample_test(__constant int *src1, __global int *dst)\n"
"{\n"
- " int tid = get_global_id(0);\n"
+ " size_t tid = get_global_id(0);\n"
"\n"
" dst[tid] = src1[tid];\n"
"\n"
- "}\n" };
+ "}\n"
+};
const char *sample_local_arg_kernel[] = {
- "__kernel void sample_test(__local int *src1, __global int *global_src, __global int *dst)\n"
+ "__kernel void sample_test(__local int *src1, __global int *global_src, "
+ "__global int *dst)\n"
"{\n"
- " int tid = get_global_id(0);\n"
+ " size_t tid = get_global_id(0);\n"
"\n"
" src1[tid] = global_src[tid];\n"
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
" dst[tid] = src1[tid];\n"
"\n"
- "}\n" };
+ "}\n"
+};
const char *sample_const_max_arg_kernel_pattern =
-"__kernel void sample_test(__constant int *src1 %s, __global int *dst)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" dst[tid] = src1[tid];\n"
-"%s"
-"\n"
-"}\n";
-
-int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+ "__kernel void sample_test(__constant int *src1 %s, __global int *dst)\n"
+ "{\n"
+ " int tid = get_global_id(0);\n"
+ "\n"
+ " dst[tid] = src1[tid];\n"
+ "%s"
+ "\n"
+ "}\n";
+
+int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error, retVal;
unsigned int maxThreadDim, threadDim, i;
@@ -118,19 +126,24 @@ int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl
/* Get the max thread dimensions */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( maxThreadDim ), &maxThreadDim, NULL );
- test_error( error, "Unable to get max work item dimensions from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
+ sizeof(maxThreadDim), &maxThreadDim, NULL);
+ test_error(error, "Unable to get max work item dimensions from device");
- if( maxThreadDim < 3 )
+ if (maxThreadDim < 3)
{
- log_error( "ERROR: Reported max work item dimensions is less than required! (%d)\n", maxThreadDim );
+ log_error("ERROR: Reported max work item dimensions is less than "
+ "required! (%d)\n",
+ maxThreadDim);
return -1;
}
log_info("Reported max thread dimensions of %d.\n", maxThreadDim);
/* Create a kernel to test with */
- if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_param_kernel, "sample_test" ) != 0 )
+ if (create_single_kernel_helper(context, &program, &kernel, 1,
+ sample_single_param_kernel, "sample_test")
+ != 0)
{
return -1;
}
@@ -138,105 +151,122 @@ int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl
/* Create some I/O streams */
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int) * 100, NULL, &error);
- if( streams[0] == NULL )
+ if (streams[0] == NULL)
{
log_error("ERROR: Creating test array failed!\n");
return -1;
}
/* Set the arguments */
- error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
- test_error( error, "Unable to set kernel arguments" );
+ error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+ test_error(error, "Unable to set kernel arguments");
retVal = 0;
/* Now try running the kernel with up to that many threads */
- for (threadDim=1; threadDim <= maxThreadDim; threadDim++)
+ for (threadDim = 1; threadDim <= maxThreadDim; threadDim++)
{
- threads = (size_t *)malloc( sizeof( size_t ) * maxThreadDim );
- localThreads = (size_t *)malloc( sizeof( size_t ) * maxThreadDim );
- for( i = 0; i < maxThreadDim; i++ )
+ threads = (size_t *)malloc(sizeof(size_t) * maxThreadDim);
+ localThreads = (size_t *)malloc(sizeof(size_t) * maxThreadDim);
+ for (i = 0; i < maxThreadDim; i++)
{
- threads[ i ] = 1;
+ threads[i] = 1;
localThreads[i] = 1;
}
- error = clEnqueueNDRangeKernel( queue, kernel, maxThreadDim, NULL, threads, localThreads, 0, NULL, &event );
- test_error( error, "Failed clEnqueueNDRangeKernel");
+ error = clEnqueueNDRangeKernel(queue, kernel, maxThreadDim, NULL,
+ threads, localThreads, 0, NULL, &event);
+ test_error(error, "Failed clEnqueueNDRangeKernel");
// Verify that the event does not return an error from the execution
error = clWaitForEvents(1, &event);
- test_error( error, "clWaitForEvent failed");
- error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
- test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+ test_error(error, "clWaitForEvent failed");
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(event_status), &event_status, NULL);
+ test_error(
+ error,
+ "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
clReleaseEvent(event);
if (event_status < 0)
test_error(error, "Kernel execution event returned error");
/* All done */
- free( threads );
- free( localThreads );
+ free(threads);
+ free(localThreads);
}
return retVal;
}
-int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
size_t *deviceMaxWorkItemSize;
unsigned int maxWorkItemDim;
/* Get the max work item dimensions */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( maxWorkItemDim ), &maxWorkItemDim, NULL );
- test_error( error, "Unable to get max work item dimensions from device" );
-
- log_info("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS returned %d\n", maxWorkItemDim);
- deviceMaxWorkItemSize = (size_t*)malloc(sizeof(size_t)*maxWorkItemDim);
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*maxWorkItemDim, deviceMaxWorkItemSize, NULL );
- test_error( error, "clDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_SIZES failed" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
+ sizeof(maxWorkItemDim), &maxWorkItemDim, NULL);
+ test_error(error, "Unable to get max work item dimensions from device");
+
+ log_info("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS returned %d\n",
+ maxWorkItemDim);
+ deviceMaxWorkItemSize = (size_t *)malloc(sizeof(size_t) * maxWorkItemDim);
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+ sizeof(size_t) * maxWorkItemDim,
+ deviceMaxWorkItemSize, NULL);
+ test_error(error, "clDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_SIZES failed");
unsigned int i;
int errors = 0;
- for(i=0; i<maxWorkItemDim; i++) {
- if (deviceMaxWorkItemSize[i]<1) {
- log_error("MAX_WORK_ITEM_SIZE in dimension %d is invalid: %lu\n", i, deviceMaxWorkItemSize[i]);
+ for (i = 0; i < maxWorkItemDim; i++)
+ {
+ if (deviceMaxWorkItemSize[i] < 1)
+ {
+ log_error("MAX_WORK_ITEM_SIZE in dimension %d is invalid: %lu\n", i,
+ deviceMaxWorkItemSize[i]);
errors++;
- } else {
- log_info("Dimension %d has max work item size %lu\n", i, deviceMaxWorkItemSize[i]);
+ }
+ else
+ {
+ log_info("Dimension %d has max work item size %lu\n", i,
+ deviceMaxWorkItemSize[i]);
}
}
free(deviceMaxWorkItemSize);
- if (errors)
- return -1;
+ if (errors) return -1;
return 0;
}
-
-int test_min_max_work_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_work_group_size(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
size_t deviceMaxThreadSize;
/* Get the max thread dimensions */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( deviceMaxThreadSize ), &deviceMaxThreadSize, NULL );
- test_error( error, "Unable to get max work group size from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+ sizeof(deviceMaxThreadSize), &deviceMaxThreadSize,
+ NULL);
+ test_error(error, "Unable to get max work group size from device");
log_info("Reported %ld max device work group size.\n", deviceMaxThreadSize);
- if( deviceMaxThreadSize == 0 )
+ if (deviceMaxThreadSize == 0)
{
- log_error( "ERROR: Max work group size is reported as zero!\n" );
+ log_error("ERROR: Max work group size is reported as zero!\n");
return -1;
}
return 0;
}
-int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_read_image_args(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
unsigned int maxReadImages, i;
@@ -245,48 +275,55 @@ int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_c
char readArgLine[128], *programSrc;
const char *readArgPattern = ", read_only image2d_t srcimg%d";
clKernelWrapper kernel;
- clMemWrapper *streams, result;
+ clMemWrapper *streams, result;
size_t threads[2];
- cl_image_format image_format_desc;
+ cl_image_format image_format_desc;
size_t maxParameterSize;
cl_event event;
cl_int event_status;
- cl_float image_data[4*4];
+ cl_float image_data[4 * 4];
float image_result = 0.0f;
float actual_image_result;
cl_uint minRequiredReadImages = gIsEmbedded ? 8 : 128;
cl_device_type deviceType;
- PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+ PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
image_format_desc.image_channel_order = CL_RGBA;
image_format_desc.image_channel_data_type = CL_FLOAT;
/* Get the max read image arg count */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof( maxReadImages ), &maxReadImages, NULL );
- test_error( error, "Unable to get max read image arg count from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_READ_IMAGE_ARGS,
+ sizeof(maxReadImages), &maxReadImages, NULL);
+ test_error(error, "Unable to get max read image arg count from device");
- if( maxReadImages < minRequiredReadImages )
+ if (maxReadImages < minRequiredReadImages)
{
- log_error( "ERROR: Reported max read image arg count is less than required! (%d)\n", maxReadImages );
+ log_error("ERROR: Reported max read image arg count is less than "
+ "required! (%d)\n",
+ maxReadImages);
return -1;
}
log_info("Reported %d max read image args.\n", maxReadImages);
- error = clGetDeviceInfo( deviceID, CL_DEVICE_ADDRESS_BITS, sizeof( deviceAddressSize ), &deviceAddressSize, NULL );
- test_error( error, "Unable to query CL_DEVICE_ADDRESS_BITS for device" );
+ error =
+ clGetDeviceInfo(deviceID, CL_DEVICE_ADDRESS_BITS,
+ sizeof(deviceAddressSize), &deviceAddressSize, NULL);
+ test_error(error, "Unable to query CL_DEVICE_ADDRESS_BITS for device");
deviceAddressSize /= 8; // convert from bits to bytes
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
- test_error( error, "Unable to get max parameter size from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE,
+ sizeof(maxParameterSize), &maxParameterSize, NULL);
+ test_error(error, "Unable to get max parameter size from device");
if (!gIsEmbedded && maxReadImages >= 128 && maxParameterSize == 1024)
{
- error = clGetDeviceInfo( deviceID, CL_DEVICE_TYPE, sizeof( deviceType ), &deviceType, NULL );
- test_error( error, "Unable to get device type from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_TYPE, sizeof(deviceType),
+ &deviceType, NULL);
+ test_error(error, "Unable to get device type from device");
- if(deviceType != CL_DEVICE_TYPE_CUSTOM)
+ if (deviceType != CL_DEVICE_TYPE_CUSTOM)
{
maxReadImages = 127;
}
@@ -295,85 +332,107 @@ int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_c
maxParameterSize -= deviceAddressSize;
// Calculate the number we can use
- if (maxParameterSize/deviceAddressSize < maxReadImages) {
- log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/deviceAddressSize));
- maxReadImages = (unsigned int)(maxParameterSize/deviceAddressSize);
+ if (maxParameterSize / deviceAddressSize < maxReadImages)
+ {
+ log_info("WARNING: Max parameter size of %d bytes limits test to %d "
+ "max image arguments.\n",
+ (int)maxParameterSize,
+ (int)(maxParameterSize / deviceAddressSize));
+ maxReadImages = (unsigned int)(maxParameterSize / deviceAddressSize);
}
/* Create a program with that many read args */
- programSrc = (char *)malloc( strlen( sample_read_image_kernel_pattern[ 0 ] ) + ( strlen( readArgPattern ) + 6 ) * ( maxReadImages ) +
- strlen( sample_read_image_kernel_pattern[ 1 ] ) + 1 + 40240);
+ programSrc = (char *)malloc(strlen(sample_read_image_kernel_pattern[0])
+ + (strlen(readArgPattern) + 6) * (maxReadImages)
+ + strlen(sample_read_image_kernel_pattern[1])
+ + 1 + 40240);
- strcpy( programSrc, sample_read_image_kernel_pattern[ 0 ] );
- strcat( programSrc, "read_only image2d_t srcimg0" );
- for( i = 0; i < maxReadImages-1; i++ )
+ strcpy(programSrc, sample_read_image_kernel_pattern[0]);
+ strcat(programSrc, "read_only image2d_t srcimg0");
+ for (i = 0; i < maxReadImages - 1; i++)
{
- sprintf( readArgLine, readArgPattern, i+1 );
- strcat( programSrc, readArgLine );
+ sprintf(readArgLine, readArgPattern, i + 1);
+ strcat(programSrc, readArgLine);
}
- strcat( programSrc, sample_read_image_kernel_pattern[ 1 ] );
- for ( i = 0; i < maxReadImages; i++) {
- sprintf( readArgLine, "\tresult[0] += read_imagef( srcimg%d, sampler, (int2)(0,0)).x;\n", i);
- strcat( programSrc, readArgLine );
+ strcat(programSrc, sample_read_image_kernel_pattern[1]);
+ for (i = 0; i < maxReadImages; i++)
+ {
+ sprintf(
+ readArgLine,
+ "\tresult[0] += read_imagef( srcimg%d, sampler, (int2)(0,0)).x;\n",
+ i);
+ strcat(programSrc, readArgLine);
}
- strcat( programSrc, sample_read_image_kernel_pattern[ 2 ] );
+ strcat(programSrc, sample_read_image_kernel_pattern[2]);
- error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test");
- test_error( error, "Failed to create the program and kernel.");
- free( programSrc );
+ error =
+ create_single_kernel_helper(context, &program, &kernel, 1,
+ (const char **)&programSrc, "sample_test");
+ test_error(error, "Failed to create the program and kernel.");
+ free(programSrc);
result = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float), NULL,
&error);
- test_error( error, "clCreateBufer failed");
+ test_error(error, "clCreateBufer failed");
/* Create some I/O streams */
streams = new clMemWrapper[maxReadImages + 1];
- for( i = 0; i < maxReadImages; i++ )
+ for (i = 0; i < maxReadImages; i++)
{
- image_data[0]=i;
- image_result+= image_data[0];
- streams[i] = create_image_2d( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &image_format_desc, 4, 4, 0, image_data, &error );
- test_error( error, "Unable to allocate test image" );
+ image_data[0] = i;
+ image_result += image_data[0];
+ streams[i] =
+ create_image_2d(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+ &image_format_desc, 4, 4, 0, image_data, &error);
+ test_error(error, "Unable to allocate test image");
}
- error = clSetKernelArg( kernel, 0, sizeof( result ), &result );
- test_error( error, "Unable to set kernel arguments" );
+ error = clSetKernelArg(kernel, 0, sizeof(result), &result);
+ test_error(error, "Unable to set kernel arguments");
/* Set the arguments */
- for( i = 1; i < maxReadImages+1; i++ )
+ for (i = 1; i < maxReadImages + 1; i++)
{
- error = clSetKernelArg( kernel, i, sizeof( streams[i-1] ), &streams[i-1] );
- test_error( error, "Unable to set kernel arguments" );
+ error =
+ clSetKernelArg(kernel, i, sizeof(streams[i - 1]), &streams[i - 1]);
+ test_error(error, "Unable to set kernel arguments");
}
/* Now try running the kernel */
threads[0] = threads[1] = 1;
- error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, &event );
- test_error( error, "clEnqueueNDRangeKernel failed");
+ error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0,
+ NULL, &event);
+ test_error(error, "clEnqueueNDRangeKernel failed");
// Verify that the event does not return an error from the execution
error = clWaitForEvents(1, &event);
- test_error( error, "clWaitForEvent failed");
- error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
- test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+ test_error(error, "clWaitForEvent failed");
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(event_status), &event_status, NULL);
+ test_error(error,
+ "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
clReleaseEvent(event);
if (event_status < 0)
test_error(error, "Kernel execution event returned error");
- error = clEnqueueReadBuffer(queue, result, CL_TRUE, 0, sizeof(cl_float), &actual_image_result, 0, NULL, NULL);
+ error = clEnqueueReadBuffer(queue, result, CL_TRUE, 0, sizeof(cl_float),
+ &actual_image_result, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
delete[] streams;
- if (actual_image_result != image_result) {
- log_error("Result failed to verify. Got %g, expected %g.\n", actual_image_result, image_result);
+ if (actual_image_result != image_result)
+ {
+ log_error("Result failed to verify. Got %g, expected %g.\n",
+ actual_image_result, image_result);
return 1;
}
return 0;
}
-int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_write_image_args(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
unsigned int maxWriteImages, i;
@@ -381,94 +440,117 @@ int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_
char writeArgLine[128], *programSrc;
const char *writeArgPattern = ", write_only image2d_t dstimg%d";
clKernelWrapper kernel;
- clMemWrapper *streams;
+ clMemWrapper *streams;
size_t threads[2];
- cl_image_format image_format_desc;
+ cl_image_format image_format_desc;
size_t maxParameterSize;
cl_event event;
cl_int event_status;
cl_uint minRequiredWriteImages = gIsEmbedded ? 1 : 8;
- PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+ PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
image_format_desc.image_channel_order = CL_RGBA;
image_format_desc.image_channel_data_type = CL_UNORM_INT8;
/* Get the max read image arg count */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof( maxWriteImages ), &maxWriteImages, NULL );
- test_error( error, "Unable to get max write image arg count from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WRITE_IMAGE_ARGS,
+ sizeof(maxWriteImages), &maxWriteImages, NULL);
+ test_error(error, "Unable to get max write image arg count from device");
- if( maxWriteImages == 0 )
+ if (maxWriteImages == 0)
{
- log_info( "WARNING: Device reports 0 for a max write image arg count (write image arguments unsupported). Skipping test (implicitly passes). This is only valid if the number of image formats is also 0.\n" );
+ log_info(
+ "WARNING: Device reports 0 for a max write image arg count (write "
+ "image arguments unsupported). Skipping test (implicitly passes). "
+ "This is only valid if the number of image formats is also 0.\n");
return 0;
}
- if( maxWriteImages < minRequiredWriteImages )
+ if (maxWriteImages < minRequiredWriteImages)
{
- log_error( "ERROR: Reported max write image arg count is less than required! (%d)\n", maxWriteImages );
+ log_error("ERROR: Reported max write image arg count is less than "
+ "required! (%d)\n",
+ maxWriteImages);
return -1;
}
log_info("Reported %d max write image args.\n", maxWriteImages);
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
- test_error( error, "Unable to get max parameter size from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE,
+ sizeof(maxParameterSize), &maxParameterSize, NULL);
+ test_error(error, "Unable to get max parameter size from device");
// Calculate the number we can use
- if (maxParameterSize/sizeof(cl_mem) < maxWriteImages) {
- log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_mem)));
- maxWriteImages = (unsigned int)(maxParameterSize/sizeof(cl_mem));
+ if (maxParameterSize / sizeof(cl_mem) < maxWriteImages)
+ {
+ log_info("WARNING: Max parameter size of %d bytes limits test to %d "
+ "max image arguments.\n",
+ (int)maxParameterSize,
+ (int)(maxParameterSize / sizeof(cl_mem)));
+ maxWriteImages = (unsigned int)(maxParameterSize / sizeof(cl_mem));
}
/* Create a program with that many write args + 1 */
- programSrc = (char *)malloc( strlen( sample_write_image_kernel_pattern[ 0 ] ) + ( strlen( writeArgPattern ) + 6 ) * ( maxWriteImages + 1 ) +
- strlen( sample_write_image_kernel_pattern[ 1 ] ) + 1 + 40240 );
+ programSrc = (char *)malloc(
+ strlen(sample_write_image_kernel_pattern[0])
+ + (strlen(writeArgPattern) + 6) * (maxWriteImages + 1)
+ + strlen(sample_write_image_kernel_pattern[1]) + 1 + 40240);
- strcpy( programSrc, sample_write_image_kernel_pattern[ 0 ] );
- strcat( programSrc, "write_only image2d_t dstimg0" );
- for( i = 1; i < maxWriteImages; i++ )
+ strcpy(programSrc, sample_write_image_kernel_pattern[0]);
+ strcat(programSrc, "write_only image2d_t dstimg0");
+ for (i = 1; i < maxWriteImages; i++)
{
- sprintf( writeArgLine, writeArgPattern, i );
- strcat( programSrc, writeArgLine );
+ sprintf(writeArgLine, writeArgPattern, i);
+ strcat(programSrc, writeArgLine);
}
- strcat( programSrc, sample_write_image_kernel_pattern[ 1 ] );
- for ( i = 0; i < maxWriteImages; i++) {
- sprintf( writeArgLine, "\twrite_imagef( dstimg%d, (int2)(0,0), (float4)(0,0,0,0));\n", i);
- strcat( programSrc, writeArgLine );
+ strcat(programSrc, sample_write_image_kernel_pattern[1]);
+ for (i = 0; i < maxWriteImages; i++)
+ {
+ sprintf(writeArgLine,
+ "\twrite_imagef( dstimg%d, (int2)(0,0), (float4)(0,0,0,0));\n",
+ i);
+ strcat(programSrc, writeArgLine);
}
- strcat( programSrc, sample_write_image_kernel_pattern[ 2 ] );
+ strcat(programSrc, sample_write_image_kernel_pattern[2]);
- error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test");
- test_error( error, "Failed to create the program and kernel.");
- free( programSrc );
+ error =
+ create_single_kernel_helper(context, &program, &kernel, 1,
+ (const char **)&programSrc, "sample_test");
+ test_error(error, "Failed to create the program and kernel.");
+ free(programSrc);
/* Create some I/O streams */
streams = new clMemWrapper[maxWriteImages + 1];
- for( i = 0; i < maxWriteImages; i++ )
+ for (i = 0; i < maxWriteImages; i++)
{
- streams[i] = create_image_2d( context, CL_MEM_READ_WRITE, &image_format_desc, 16, 16, 0, NULL, &error );
- test_error( error, "Unable to allocate test image" );
+ streams[i] =
+ create_image_2d(context, CL_MEM_READ_WRITE, &image_format_desc, 16,
+ 16, 0, NULL, &error);
+ test_error(error, "Unable to allocate test image");
}
/* Set the arguments */
- for( i = 0; i < maxWriteImages; i++ )
+ for (i = 0; i < maxWriteImages; i++)
{
- error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
- test_error( error, "Unable to set kernel arguments" );
+ error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]);
+ test_error(error, "Unable to set kernel arguments");
}
/* Now try running the kernel */
threads[0] = threads[1] = 16;
- error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, &event );
- test_error( error, "clEnqueueNDRangeKernel failed.");
+ error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0,
+ NULL, &event);
+ test_error(error, "clEnqueueNDRangeKernel failed.");
// Verify that the event does not return an error from the execution
error = clWaitForEvents(1, &event);
- test_error( error, "clWaitForEvent failed");
- error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
- test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+ test_error(error, "clWaitForEvent failed");
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(event_status), &event_status, NULL);
+ test_error(error,
+ "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
clReleaseEvent(event);
if (event_status < 0)
test_error(error, "Kernel execution event returned error");
@@ -478,7 +560,8 @@ int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_
return 0;
}
-int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_ulong maxAllocSize, memSize, minSizeToTry;
@@ -492,61 +575,89 @@ int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_co
requiredAllocSize = 128 * 1024 * 1024;
/* Get the max mem alloc size */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
- test_error( error, "Unable to get max mem alloc size from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+ sizeof(maxAllocSize), &maxAllocSize, NULL);
+ test_error(error, "Unable to get max mem alloc size from device");
- error = clGetDeviceInfo( deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
- test_error( error, "Unable to get global memory size from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE,
+ sizeof(memSize), &memSize, NULL);
+ test_error(error, "Unable to get global memory size from device");
- if (memSize > (cl_ulong)SIZE_MAX) {
- memSize = (cl_ulong)SIZE_MAX;
+ if (memSize > (cl_ulong)SIZE_MAX)
+ {
+ memSize = (cl_ulong)SIZE_MAX;
}
- if( maxAllocSize < requiredAllocSize)
+ if (maxAllocSize < requiredAllocSize)
{
- log_error( "ERROR: Reported max allocation size is less than required %lldMB! (%llu or %lluMB, from a total mem size of %lldMB)\n", (requiredAllocSize / 1024) / 1024, maxAllocSize, (maxAllocSize / 1024)/1024, (memSize / 1024)/1024 );
+ log_error("ERROR: Reported max allocation size is less than required "
+ "%lldMB! (%llu or %lluMB, from a total mem size of %lldMB)\n",
+ (requiredAllocSize / 1024) / 1024, maxAllocSize,
+ (maxAllocSize / 1024) / 1024, (memSize / 1024) / 1024);
return -1;
}
- requiredAllocSize = ((memSize / 4) > (1024 * 1024 * 1024)) ? 1024 * 1024 * 1024 : memSize / 4;
+ requiredAllocSize = ((memSize / 4) > (1024 * 1024 * 1024))
+ ? 1024 * 1024 * 1024
+ : memSize / 4;
if (gIsEmbedded)
- requiredAllocSize = (requiredAllocSize < 1 * 1024 * 1024) ? 1 * 1024 * 1024 : requiredAllocSize;
+ requiredAllocSize = (requiredAllocSize < 1 * 1024 * 1024)
+ ? 1 * 1024 * 1024
+ : requiredAllocSize;
else
- requiredAllocSize = (requiredAllocSize < 128 * 1024 * 1024) ? 128 * 1024 * 1024 : requiredAllocSize;
+ requiredAllocSize = (requiredAllocSize < 128 * 1024 * 1024)
+ ? 128 * 1024 * 1024
+ : requiredAllocSize;
- if( maxAllocSize < requiredAllocSize )
+ if (maxAllocSize < requiredAllocSize)
{
- log_error( "ERROR: Reported max allocation size is less than required of total memory! (%llu or %lluMB, from a total mem size of %lluMB)\n", maxAllocSize, (maxAllocSize / 1024)/1024, (requiredAllocSize / 1024)/1024 );
+ log_error(
+ "ERROR: Reported max allocation size is less than required of "
+ "total memory! (%llu or %lluMB, from a total mem size of %lluMB)\n",
+ maxAllocSize, (maxAllocSize / 1024) / 1024,
+ (requiredAllocSize / 1024) / 1024);
return -1;
}
- log_info("Reported max allocation size of %lld bytes (%gMB) and global mem size of %lld bytes (%gMB).\n",
- maxAllocSize, maxAllocSize/(1024.0*1024.0), requiredAllocSize, requiredAllocSize/(1024.0*1024.0));
+ log_info("Reported max allocation size of %lld bytes (%gMB) and global mem "
+ "size of %lld bytes (%gMB).\n",
+ maxAllocSize, maxAllocSize / (1024.0 * 1024.0), requiredAllocSize,
+ requiredAllocSize / (1024.0 * 1024.0));
- if ( memSize < maxAllocSize ) {
- log_info("Global memory size is less than max allocation size, using that.\n");
+ if (memSize < maxAllocSize)
+ {
+ log_info("Global memory size is less than max allocation size, using "
+ "that.\n");
maxAllocSize = memSize;
}
- minSizeToTry = maxAllocSize/16;
- while (maxAllocSize > (maxAllocSize/4)) {
+ minSizeToTry = maxAllocSize / 16;
+ while (maxAllocSize > (maxAllocSize / 4))
+ {
- log_info("Trying to create a buffer of size of %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0));
- memHdl = clCreateBuffer( context, CL_MEM_READ_ONLY, (size_t)maxAllocSize, NULL, &error );
- if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE || error == CL_OUT_OF_RESOURCES || error == CL_OUT_OF_HOST_MEMORY) {
- log_info("\tAllocation failed at size of %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0));
+ log_info("Trying to create a buffer of size of %lld bytes (%gMB).\n",
+ maxAllocSize, (double)maxAllocSize / (1024.0 * 1024.0));
+ memHdl = clCreateBuffer(context, CL_MEM_READ_ONLY, (size_t)maxAllocSize,
+ NULL, &error);
+ if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE
+ || error == CL_OUT_OF_RESOURCES || error == CL_OUT_OF_HOST_MEMORY)
+ {
+ log_info("\tAllocation failed at size of %lld bytes (%gMB).\n",
+ maxAllocSize, (double)maxAllocSize / (1024.0 * 1024.0));
maxAllocSize -= minSizeToTry;
continue;
}
- test_error( error, "clCreateBuffer failed for maximum sized buffer.");
+ test_error(error, "clCreateBuffer failed for maximum sized buffer.");
return 0;
}
- log_error("Failed to allocate even %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0));
+ log_error("Failed to allocate even %lld bytes (%gMB).\n", maxAllocSize,
+ (double)maxAllocSize / (1024.0 * 1024.0));
return -1;
}
-int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
size_t maxDimension;
@@ -554,10 +665,8 @@ int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_co
cl_image_format image_format_desc;
cl_ulong maxAllocSize;
cl_uint minRequiredDimension;
- size_t length;
-
- PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+ PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
auto version = get_device_cl_version(deviceID);
if (version == Version(1, 0))
@@ -571,16 +680,20 @@ int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_co
/* Just get any ol format to test with */
- error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &image_format_desc );
- test_error( error, "Unable to obtain suitable image format to test with!" );
+ error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE2D,
+ CL_MEM_READ_WRITE, 0, &image_format_desc);
+ test_error(error, "Unable to obtain suitable image format to test with!");
/* Get the max 2d image width */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxDimension ), &maxDimension, NULL );
- test_error( error, "Unable to get max image 2d width from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE2D_MAX_WIDTH,
+ sizeof(maxDimension), &maxDimension, NULL);
+ test_error(error, "Unable to get max image 2d width from device");
- if( maxDimension < minRequiredDimension )
+ if (maxDimension < minRequiredDimension)
{
- log_error( "ERROR: Reported max image 2d width is less than required! (%d)\n", (int)maxDimension );
+ log_error(
+ "ERROR: Reported max image 2d width is less than required! (%d)\n",
+ (int)maxDimension);
return -1;
}
log_info("Max reported width is %ld.\n", maxDimension);
@@ -588,34 +701,42 @@ int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_co
/* Verify we can use the format */
image_format_desc.image_channel_data_type = CL_UNORM_INT8;
image_format_desc.image_channel_order = CL_RGBA;
- if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) {
+ if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
+ CL_MEM_OBJECT_IMAGE2D, &image_format_desc))
+ {
log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
return -1;
}
/* Verify that we can actually allocate an image that large */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
- test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
- if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
- log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
- (cl_ulong)maxDimension*1*4, maxAllocSize);
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+ sizeof(maxAllocSize), &maxAllocSize, NULL);
+ test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE.");
+ if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize)
+ {
+ log_error("Can not allocate a large enough image (min size: %lld "
+ "bytes, max allowed: %lld bytes) to test.\n",
+ (cl_ulong)maxDimension * 1 * 4, maxAllocSize);
return -1;
}
- log_info("Attempting to create an image of size %d x 1 = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
+ log_info("Attempting to create an image of size %d x 1 = %gMB.\n",
+ (int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0));
/* Try to allocate a very big image */
- streams[0] = create_image_2d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimension, 1, 0, NULL, &error );
- if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+ streams[0] = create_image_2d(context, CL_MEM_READ_ONLY, &image_format_desc,
+ maxDimension, 1, 0, NULL, &error);
+ if ((streams[0] == NULL) || (error != CL_SUCCESS))
{
- print_error( error, "Image 2D creation failed for maximum width" );
+ print_error(error, "Image 2D creation failed for maximum width");
return -1;
}
return 0;
}
-int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
size_t maxDimension;
@@ -623,9 +744,8 @@ int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_c
cl_image_format image_format_desc;
cl_ulong maxAllocSize;
cl_uint minRequiredDimension;
- size_t length;
- PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+ PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
auto version = get_device_cl_version(deviceID);
if (version == Version(1, 0))
@@ -638,16 +758,20 @@ int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_c
}
/* Just get any ol format to test with */
- error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &image_format_desc );
- test_error( error, "Unable to obtain suitable image format to test with!" );
+ error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE2D,
+ CL_MEM_READ_WRITE, 0, &image_format_desc);
+ test_error(error, "Unable to obtain suitable image format to test with!");
/* Get the max 2d image width */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxDimension ), &maxDimension, NULL );
- test_error( error, "Unable to get max image 2d height from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
+ sizeof(maxDimension), &maxDimension, NULL);
+ test_error(error, "Unable to get max image 2d height from device");
- if( maxDimension < minRequiredDimension )
+ if (maxDimension < minRequiredDimension)
{
- log_error( "ERROR: Reported max image 2d height is less than required! (%d)\n", (int)maxDimension );
+ log_error(
+ "ERROR: Reported max image 2d height is less than required! (%d)\n",
+ (int)maxDimension);
return -1;
}
log_info("Max reported height is %ld.\n", maxDimension);
@@ -655,56 +779,67 @@ int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_c
/* Verify we can use the format */
image_format_desc.image_channel_data_type = CL_UNORM_INT8;
image_format_desc.image_channel_order = CL_RGBA;
- if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) {
+ if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
+ CL_MEM_OBJECT_IMAGE2D, &image_format_desc))
+ {
log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
return -1;
}
/* Verify that we can actually allocate an image that large */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
- test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
- if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
- log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
- (cl_ulong)maxDimension*1*4, maxAllocSize);
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+ sizeof(maxAllocSize), &maxAllocSize, NULL);
+ test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE.");
+ if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize)
+ {
+ log_error("Can not allocate a large enough image (min size: %lld "
+ "bytes, max allowed: %lld bytes) to test.\n",
+ (cl_ulong)maxDimension * 1 * 4, maxAllocSize);
return -1;
}
- log_info("Attempting to create an image of size 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
+ log_info("Attempting to create an image of size 1 x %d = %gMB.\n",
+ (int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0));
/* Try to allocate a very big image */
- streams[0] = create_image_2d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, maxDimension, 0, NULL, &error );
- if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+ streams[0] = create_image_2d(context, CL_MEM_READ_ONLY, &image_format_desc,
+ 1, maxDimension, 0, NULL, &error);
+ if ((streams[0] == NULL) || (error != CL_SUCCESS))
{
- print_error( error, "Image 2D creation failed for maximum height" );
+ print_error(error, "Image 2D creation failed for maximum height");
return -1;
}
return 0;
}
-int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
size_t maxDimension;
clMemWrapper streams[1];
- cl_image_format image_format_desc;
+ cl_image_format image_format_desc;
cl_ulong maxAllocSize;
- PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
+ PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(deviceID)
/* Just get any ol format to test with */
error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
CL_MEM_READ_ONLY, 0, &image_format_desc);
- test_error( error, "Unable to obtain suitable image format to test with!" );
+ test_error(error, "Unable to obtain suitable image format to test with!");
/* Get the max 2d image width */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxDimension ), &maxDimension, NULL );
- test_error( error, "Unable to get max image 3d width from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE3D_MAX_WIDTH,
+ sizeof(maxDimension), &maxDimension, NULL);
+ test_error(error, "Unable to get max image 3d width from device");
- if( maxDimension < 2048 )
+ if (maxDimension < 2048)
{
- log_error( "ERROR: Reported max image 3d width is less than required! (%d)\n", (int)maxDimension );
+ log_error(
+ "ERROR: Reported max image 3d width is less than required! (%d)\n",
+ (int)maxDimension);
return -1;
}
log_info("Max reported width is %ld.\n", maxDimension);
@@ -712,56 +847,68 @@ int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_co
/* Verify we can use the format */
image_format_desc.image_channel_data_type = CL_UNORM_INT8;
image_format_desc.image_channel_order = CL_RGBA;
- if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) {
+ if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
+ CL_MEM_OBJECT_IMAGE3D, &image_format_desc))
+ {
log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
return -1;
}
/* Verify that we can actually allocate an image that large */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
- test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
- if ( (cl_ulong)maxDimension*2*4 > maxAllocSize ) {
- log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
- (cl_ulong)maxDimension*2*4, maxAllocSize);
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+ sizeof(maxAllocSize), &maxAllocSize, NULL);
+ test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE.");
+ if ((cl_ulong)maxDimension * 2 * 4 > maxAllocSize)
+ {
+ log_error("Can not allocate a large enough image (min size: %lld "
+ "bytes, max allowed: %lld bytes) to test.\n",
+ (cl_ulong)maxDimension * 2 * 4, maxAllocSize);
return -1;
}
- log_info("Attempting to create an image of size %d x 1 x 2 = %gMB.\n", (int)maxDimension, (2*(float)maxDimension*4/1024.0/1024.0));
+ log_info("Attempting to create an image of size %d x 1 x 2 = %gMB.\n",
+ (int)maxDimension,
+ (2 * (float)maxDimension * 4 / 1024.0 / 1024.0));
/* Try to allocate a very big image */
- streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimension, 1, 2, 0, 0, NULL, &error );
- if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+ streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &image_format_desc,
+ maxDimension, 1, 2, 0, 0, NULL, &error);
+ if ((streams[0] == NULL) || (error != CL_SUCCESS))
{
- print_error( error, "Image 3D creation failed for maximum width" );
+ print_error(error, "Image 3D creation failed for maximum width");
return -1;
}
return 0;
}
-int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
size_t maxDimension;
clMemWrapper streams[1];
- cl_image_format image_format_desc;
+ cl_image_format image_format_desc;
cl_ulong maxAllocSize;
- PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
+ PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(deviceID)
/* Just get any ol format to test with */
error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
CL_MEM_READ_ONLY, 0, &image_format_desc);
- test_error( error, "Unable to obtain suitable image format to test with!" );
+ test_error(error, "Unable to obtain suitable image format to test with!");
/* Get the max 2d image width */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxDimension ), &maxDimension, NULL );
- test_error( error, "Unable to get max image 3d height from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
+ sizeof(maxDimension), &maxDimension, NULL);
+ test_error(error, "Unable to get max image 3d height from device");
- if( maxDimension < 2048 )
+ if (maxDimension < 2048)
{
- log_error( "ERROR: Reported max image 3d height is less than required! (%d)\n", (int)maxDimension );
+ log_error(
+ "ERROR: Reported max image 3d height is less than required! (%d)\n",
+ (int)maxDimension);
return -1;
}
log_info("Max reported height is %ld.\n", maxDimension);
@@ -769,27 +916,35 @@ int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_c
/* Verify we can use the format */
image_format_desc.image_channel_data_type = CL_UNORM_INT8;
image_format_desc.image_channel_order = CL_RGBA;
- if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) {
+ if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
+ CL_MEM_OBJECT_IMAGE3D, &image_format_desc))
+ {
log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
return -1;
}
/* Verify that we can actually allocate an image that large */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
- test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
- if ( (cl_ulong)maxDimension*2*4 > maxAllocSize ) {
- log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
- (cl_ulong)maxDimension*2*4, maxAllocSize);
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+ sizeof(maxAllocSize), &maxAllocSize, NULL);
+ test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE.");
+ if ((cl_ulong)maxDimension * 2 * 4 > maxAllocSize)
+ {
+ log_error("Can not allocate a large enough image (min size: %lld "
+ "bytes, max allowed: %lld bytes) to test.\n",
+ (cl_ulong)maxDimension * 2 * 4, maxAllocSize);
return -1;
}
- log_info("Attempting to create an image of size 1 x %d x 2 = %gMB.\n", (int)maxDimension, (2*(float)maxDimension*4/1024.0/1024.0));
+ log_info("Attempting to create an image of size 1 x %d x 2 = %gMB.\n",
+ (int)maxDimension,
+ (2 * (float)maxDimension * 4 / 1024.0 / 1024.0));
/* Try to allocate a very big image */
- streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, maxDimension, 2, 0, 0, NULL, &error );
- if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+ streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &image_format_desc,
+ 1, maxDimension, 2, 0, 0, NULL, &error);
+ if ((streams[0] == NULL) || (error != CL_SUCCESS))
{
- print_error( error, "Image 3D creation failed for maximum height" );
+ print_error(error, "Image 3D creation failed for maximum height");
return -1;
}
@@ -797,29 +952,33 @@ int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_c
}
-int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
size_t maxDimension;
clMemWrapper streams[1];
- cl_image_format image_format_desc;
+ cl_image_format image_format_desc;
cl_ulong maxAllocSize;
- PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
+ PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(deviceID)
/* Just get any ol format to test with */
error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
CL_MEM_READ_ONLY, 0, &image_format_desc);
- test_error( error, "Unable to obtain suitable image format to test with!" );
+ test_error(error, "Unable to obtain suitable image format to test with!");
/* Get the max 2d image width */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDimension ), &maxDimension, NULL );
- test_error( error, "Unable to get max image 3d depth from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE3D_MAX_DEPTH,
+ sizeof(maxDimension), &maxDimension, NULL);
+ test_error(error, "Unable to get max image 3d depth from device");
- if( maxDimension < 2048 )
+ if (maxDimension < 2048)
{
- log_error( "ERROR: Reported max image 3d depth is less than required! (%d)\n", (int)maxDimension );
+ log_error(
+ "ERROR: Reported max image 3d depth is less than required! (%d)\n",
+ (int)maxDimension);
return -1;
}
log_info("Max reported depth is %ld.\n", maxDimension);
@@ -827,55 +986,67 @@ int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_co
/* Verify we can use the format */
image_format_desc.image_channel_data_type = CL_UNORM_INT8;
image_format_desc.image_channel_order = CL_RGBA;
- if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) {
+ if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
+ CL_MEM_OBJECT_IMAGE3D, &image_format_desc))
+ {
log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
return -1;
}
/* Verify that we can actually allocate an image that large */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
- test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
- if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
- log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
- (cl_ulong)maxDimension*1*4, maxAllocSize);
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+ sizeof(maxAllocSize), &maxAllocSize, NULL);
+ test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE.");
+ if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize)
+ {
+ log_error("Can not allocate a large enough image (min size: %lld "
+ "bytes, max allowed: %lld bytes) to test.\n",
+ (cl_ulong)maxDimension * 1 * 4, maxAllocSize);
return -1;
}
- log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
+ log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n",
+ (int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0));
/* Try to allocate a very big image */
- streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, 1, maxDimension, 0, 0, NULL, &error );
- if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+ streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &image_format_desc,
+ 1, 1, maxDimension, 0, 0, NULL, &error);
+ if ((streams[0] == NULL) || (error != CL_SUCCESS))
{
- print_error( error, "Image 3D creation failed for maximum depth" );
+ print_error(error, "Image 3D creation failed for maximum depth");
return -1;
}
return 0;
}
-int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_image_array_size(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
size_t maxDimension;
clMemWrapper streams[1];
- cl_image_format image_format_desc;
+ cl_image_format image_format_desc;
cl_ulong maxAllocSize;
size_t minRequiredDimension = gIsEmbedded ? 256 : 2048;
- PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID );
+ PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID);
/* Just get any ol format to test with */
- error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_READ_WRITE, 0, &image_format_desc );
- test_error( error, "Unable to obtain suitable image format to test with!" );
+ error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE2D_ARRAY,
+ CL_MEM_READ_WRITE, 0, &image_format_desc);
+ test_error(error, "Unable to obtain suitable image format to test with!");
/* Get the max image array width */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxDimension ), &maxDimension, NULL );
- test_error( error, "Unable to get max image array size from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE,
+ sizeof(maxDimension), &maxDimension, NULL);
+ test_error(error, "Unable to get max image array size from device");
- if( maxDimension < minRequiredDimension )
+ if (maxDimension < minRequiredDimension)
{
- log_error( "ERROR: Reported max image array size is less than required! (%d)\n", (int)maxDimension );
+ log_error("ERROR: Reported max image array size is less than required! "
+ "(%d)\n",
+ (int)maxDimension);
return -1;
}
log_info("Max reported image array size is %ld.\n", maxDimension);
@@ -883,96 +1054,127 @@ int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_
/* Verify we can use the format */
image_format_desc.image_channel_data_type = CL_UNORM_INT8;
image_format_desc.image_channel_order = CL_RGBA;
- if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D_ARRAY, &image_format_desc)) {
+ if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
+ CL_MEM_OBJECT_IMAGE2D_ARRAY,
+ &image_format_desc))
+ {
log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
return -1;
}
/* Verify that we can actually allocate an image that large */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
- test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
- if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
- log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
- (cl_ulong)maxDimension*1*4, maxAllocSize);
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+ sizeof(maxAllocSize), &maxAllocSize, NULL);
+ test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE.");
+ if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize)
+ {
+ log_error("Can not allocate a large enough image (min size: %lld "
+ "bytes, max allowed: %lld bytes) to test.\n",
+ (cl_ulong)maxDimension * 1 * 4, maxAllocSize);
return -1;
}
- log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
+ log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n",
+ (int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0));
/* Try to allocate a very big image */
- streams[0] = create_image_2d_array( context, CL_MEM_READ_ONLY, &image_format_desc, 1, 1, maxDimension, 0, 0, NULL, &error );
- if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+ streams[0] =
+ create_image_2d_array(context, CL_MEM_READ_ONLY, &image_format_desc, 1,
+ 1, maxDimension, 0, 0, NULL, &error);
+ if ((streams[0] == NULL) || (error != CL_SUCCESS))
{
- print_error( error, "2D Image Array creation failed for maximum array size" );
+ print_error(error,
+ "2D Image Array creation failed for maximum array size");
return -1;
}
return 0;
}
-int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
size_t maxDimensionPixels;
clMemWrapper streams[2];
- cl_image_format image_format_desc = {0};
+ cl_image_format image_format_desc = { 0 };
cl_ulong maxAllocSize;
size_t minRequiredDimension = gIsEmbedded ? 2048 : 65536;
unsigned int i = 0;
size_t pixelBytes = 0;
- PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID );
+ PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID);
/* Get the max memory allocation size */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
- test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+ sizeof(maxAllocSize), &maxAllocSize, NULL);
+ test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE.");
/* Get the max image array width */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof( maxDimensionPixels ), &maxDimensionPixels, NULL );
- test_error( error, "Unable to get max image buffer size from device" );
+ error =
+ clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE,
+ sizeof(maxDimensionPixels), &maxDimensionPixels, NULL);
+ test_error(error, "Unable to get max image buffer size from device");
- if( maxDimensionPixels < minRequiredDimension )
+ if (maxDimensionPixels < minRequiredDimension)
{
- log_error( "ERROR: Reported max image buffer size is less than required! (%d)\n", (int)maxDimensionPixels );
+ log_error("ERROR: Reported max image buffer size is less than "
+ "required! (%d)\n",
+ (int)maxDimensionPixels);
return -1;
}
- log_info("Max reported image buffer size is %ld pixels.\n", maxDimensionPixels);
+ log_info("Max reported image buffer size is %ld pixels.\n",
+ maxDimensionPixels);
pixelBytes = maxAllocSize / maxDimensionPixels;
- if ( pixelBytes == 0 )
+ if (pixelBytes == 0)
{
- log_error( "Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image of maximum size!\n" );
+ log_error("Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than "
+ "CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image "
+ "of maximum size!\n");
return -1;
}
error = -1;
- for ( i = pixelBytes; i > 0; --i )
+ for (i = pixelBytes; i > 0; --i)
{
- error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE1D, CL_MEM_READ_ONLY, i, &image_format_desc );
- if ( error == CL_SUCCESS )
+ error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE1D,
+ CL_MEM_READ_ONLY, i, &image_format_desc);
+ if (error == CL_SUCCESS)
{
pixelBytes = i;
break;
}
}
- test_error( error, "Device does not support format to be used to allocate image of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE\n" );
+ test_error(error,
+ "Device does not support format to be used to allocate image of "
+ "CL_DEVICE_IMAGE_MAX_BUFFER_SIZE\n");
- log_info("Attempting to create an 1D image with channel order %s from buffer of size %d = %gMB.\n",
- GetChannelOrderName( image_format_desc.image_channel_order ), (int)maxDimensionPixels, ((float)maxDimensionPixels*pixelBytes/1024.0/1024.0));
+ log_info("Attempting to create an 1D image with channel order %s from "
+ "buffer of size %d = %gMB.\n",
+ GetChannelOrderName(image_format_desc.image_channel_order),
+ (int)maxDimensionPixels,
+ ((float)maxDimensionPixels * pixelBytes / 1024.0 / 1024.0));
/* Try to allocate a buffer */
- streams[0] = clCreateBuffer( context, CL_MEM_READ_ONLY, maxDimensionPixels*pixelBytes, NULL, &error );
- if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+ streams[0] = clCreateBuffer(context, CL_MEM_READ_ONLY,
+ maxDimensionPixels * pixelBytes, NULL, &error);
+ if ((streams[0] == NULL) || (error != CL_SUCCESS))
{
- print_error( error, "Buffer creation failed for maximum image buffer size" );
+ print_error(error,
+ "Buffer creation failed for maximum image buffer size");
return -1;
}
/* Try to allocate a 1D image array from buffer */
- streams[1] = create_image_1d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimensionPixels, 0, NULL, streams[0], &error );
- if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
- {
- print_error( error, "1D Image from buffer creation failed for maximum image buffer size" );
+ streams[1] =
+ create_image_1d(context, CL_MEM_READ_ONLY, &image_format_desc,
+ maxDimensionPixels, 0, NULL, streams[0], &error);
+ if ((streams[0] == NULL) || (error != CL_SUCCESS))
+ {
+ print_error(error,
+ "1D Image from buffer creation failed for maximum image "
+ "buffer size");
return -1;
}
@@ -980,8 +1182,8 @@ int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl
}
-
-int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_parameter_size(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error, retVal, i;
size_t maxSize;
@@ -1000,62 +1202,78 @@ int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_co
/* Get the max param size */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxSize ), &maxSize, NULL );
- test_error( error, "Unable to get max parameter size from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE,
+ sizeof(maxSize), &maxSize, NULL);
+ test_error(error, "Unable to get max parameter size from device");
- if( ((!gIsEmbedded) && (maxSize < 1024)) || ((gIsEmbedded) && (maxSize < 256)) )
+ if (((!gIsEmbedded) && (maxSize < 1024))
+ || ((gIsEmbedded) && (maxSize < 256)))
{
- log_error( "ERROR: Reported max parameter size is less than required! (%d)\n", (int)maxSize );
+ log_error(
+ "ERROR: Reported max parameter size is less than required! (%d)\n",
+ (int)maxSize);
return -1;
}
/* The embedded profile without cles_khr_int64 extension does not require
* longs, so use ints */
if (embeddedNoLong)
- numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_int);
+ numberOfIntParametersToTry = numberExpected =
+ (maxSize - sizeof(cl_mem)) / sizeof(cl_int);
else
- numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_long);
+ numberOfIntParametersToTry = numberExpected =
+ (maxSize - sizeof(cl_mem)) / sizeof(cl_long);
- decrement = (size_t)(numberOfIntParametersToTry/8);
- if (decrement < 1)
- decrement = 1;
+ decrement = (size_t)(numberOfIntParametersToTry / 8);
+ if (decrement < 1) decrement = 1;
log_info("Reported max parameter size of %d bytes.\n", (int)maxSize);
- while (numberOfIntParametersToTry > 0) {
- // These need to be inside to be deallocated automatically on each loop iteration.
+ while (numberOfIntParametersToTry > 0)
+ {
+ // These need to be inside to be deallocated automatically on each loop
+ // iteration.
clProgramWrapper program;
clMemWrapper mem;
clKernelWrapper kernel;
if (embeddedNoLong)
{
- log_info("Trying a kernel with %ld int arguments (%ld bytes) and one cl_mem (%ld bytes) for %ld bytes total.\n",
- numberOfIntParametersToTry, sizeof(cl_int)*numberOfIntParametersToTry, sizeof(cl_mem),
- sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_int));
+ log_info(
+ "Trying a kernel with %ld int arguments (%ld bytes) and one "
+ "cl_mem (%ld bytes) for %ld bytes total.\n",
+ numberOfIntParametersToTry,
+ sizeof(cl_int) * numberOfIntParametersToTry, sizeof(cl_mem),
+ sizeof(cl_mem) + numberOfIntParametersToTry * sizeof(cl_int));
}
else
{
- log_info("Trying a kernel with %ld long arguments (%ld bytes) and one cl_mem (%ld bytes) for %ld bytes total.\n",
- numberOfIntParametersToTry, sizeof(cl_long)*numberOfIntParametersToTry, sizeof(cl_mem),
- sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_long));
+ log_info(
+ "Trying a kernel with %ld long arguments (%ld bytes) and one "
+ "cl_mem (%ld bytes) for %ld bytes total.\n",
+ numberOfIntParametersToTry,
+ sizeof(cl_long) * numberOfIntParametersToTry, sizeof(cl_mem),
+ sizeof(cl_mem) + numberOfIntParametersToTry * sizeof(cl_long));
}
// Allocate memory for the program storage
- data = malloc(sizeof(cl_long)*numberOfIntParametersToTry);
-
- argumentLine = (char*)malloc(sizeof(char)*numberOfIntParametersToTry*32);
- codeLines = (char*)malloc(sizeof(char)*numberOfIntParametersToTry*32);
- programSrc = (char*)malloc(sizeof(char)*(numberOfIntParametersToTry*64+1024));
+ data = malloc(sizeof(cl_long) * numberOfIntParametersToTry);
+
+ argumentLine =
+ (char *)malloc(sizeof(char) * numberOfIntParametersToTry * 32);
+ codeLines =
+ (char *)malloc(sizeof(char) * numberOfIntParametersToTry * 32);
+ programSrc = (char *)malloc(sizeof(char)
+ * (numberOfIntParametersToTry * 64 + 1024));
argumentLine[0] = '\0';
codeLines[0] = '\0';
programSrc[0] = '\0';
// Generate our results
expectedResult = 0;
- for (i=0; i<(int)numberOfIntParametersToTry; i++)
- {
- if( gHasLong )
+ for (i = 0; i < (int)numberOfIntParametersToTry; i++)
+ {
+ if (gHasLong)
{
((cl_long *)data)[i] = i;
expectedResult += i;
@@ -1068,30 +1286,35 @@ int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_co
}
// Build the program
- if( gHasLong)
+ if (gHasLong)
sprintf(argumentLine, "%s", "long arg0");
else
sprintf(argumentLine, "%s", "int arg0");
sprintf(codeLines, "%s", "result[0] += arg0;");
- for (i=1; i<(int)numberOfIntParametersToTry; i++)
+ for (i = 1; i < (int)numberOfIntParametersToTry; i++)
{
- if( gHasLong)
- sprintf(argumentLine + strlen( argumentLine), ", long arg%d", i);
+ if (gHasLong)
+ sprintf(argumentLine + strlen(argumentLine), ", long arg%d", i);
else
- sprintf(argumentLine + strlen( argumentLine), ", int arg%d", i);
+ sprintf(argumentLine + strlen(argumentLine), ", int arg%d", i);
- sprintf(codeLines + strlen( codeLines), "\nresult[0] += arg%d;", i);
+ sprintf(codeLines + strlen(codeLines), "\nresult[0] += arg%d;", i);
}
/* Create a kernel to test with */
- sprintf( programSrc, gHasLong ? sample_large_parmam_kernel_pattern[0]:
- sample_large_int_parmam_kernel_pattern[0], argumentLine, codeLines);
+ sprintf(programSrc,
+ gHasLong ? sample_large_parmam_kernel_pattern[0]
+ : sample_large_int_parmam_kernel_pattern[0],
+ argumentLine, codeLines);
ptr = programSrc;
- if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&ptr, "sample_test" ) != 0 )
+ if (create_single_kernel_helper(context, &program, &kernel, 1,
+ (const char **)&ptr, "sample_test")
+ != 0)
{
- log_info("Create program failed, decrementing number of parameters to try.\n");
+ log_info("Create program failed, decrementing number of parameters "
+ "to try.\n");
numberOfIntParametersToTry -= decrement;
continue;
}
@@ -1103,88 +1326,119 @@ int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_co
&error);
test_error(error, "clCreateBuffer failed");
- for (i=0; i<(int)numberOfIntParametersToTry; i++) {
- if(gHasLong)
- error = clSetKernelArg(kernel, i, sizeof(cl_long), &(((cl_long*)data)[i]));
+ for (i = 0; i < (int)numberOfIntParametersToTry; i++)
+ {
+ if (gHasLong)
+ error = clSetKernelArg(kernel, i, sizeof(cl_long),
+ &(((cl_long *)data)[i]));
else
- error = clSetKernelArg(kernel, i, sizeof(cl_int), &(((cl_int*)data)[i]));
+ error = clSetKernelArg(kernel, i, sizeof(cl_int),
+ &(((cl_int *)data)[i]));
- if (error != CL_SUCCESS) {
- log_info( "clSetKernelArg failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error));
+ if (error != CL_SUCCESS)
+ {
+ log_info("clSetKernelArg failed (%s), decrementing number of "
+ "parameters to try.\n",
+ IGetErrorString(error));
numberOfIntParametersToTry -= decrement;
break;
}
}
- if (error != CL_SUCCESS)
- continue;
+ if (error != CL_SUCCESS) continue;
error = clSetKernelArg(kernel, i, sizeof(cl_mem), &mem);
- if (error != CL_SUCCESS) {
- log_info( "clSetKernelArg failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error));
+ if (error != CL_SUCCESS)
+ {
+ log_info("clSetKernelArg failed (%s), decrementing number of "
+ "parameters to try.\n",
+ IGetErrorString(error));
numberOfIntParametersToTry -= decrement;
continue;
}
- size_t globalDim[3]={1,1,1}, localDim[3]={1,1,1};
- error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim, 0, NULL, &event);
- if (error != CL_SUCCESS) {
- log_info( "clEnqueueNDRangeKernel failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error));
+ size_t globalDim[3] = { 1, 1, 1 }, localDim[3] = { 1, 1, 1 };
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim,
+ localDim, 0, NULL, &event);
+ if (error != CL_SUCCESS)
+ {
+ log_info("clEnqueueNDRangeKernel failed (%s), decrementing number "
+ "of parameters to try.\n",
+ IGetErrorString(error));
numberOfIntParametersToTry -= decrement;
continue;
}
// Verify that the event does not return an error from the execution
error = clWaitForEvents(1, &event);
- test_error( error, "clWaitForEvent failed");
- error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
- test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+ test_error(error, "clWaitForEvent failed");
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(event_status), &event_status, NULL);
+ test_error(
+ error,
+ "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
clReleaseEvent(event);
if (event_status < 0)
test_error(error, "Kernel execution event returned error");
- if(gHasLong)
- error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_long), &long_result, 0, NULL, NULL);
+ if (gHasLong)
+ error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_long),
+ &long_result, 0, NULL, NULL);
else
- error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_int), &int_result, 0, NULL, NULL);
+ error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_int),
+ &int_result, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed")
- free(data);
+ free(data);
free(argumentLine);
free(codeLines);
free(programSrc);
- if(gHasLong)
+ if (gHasLong)
{
- if (long_result != expectedResult) {
- log_error("Expected result (%lld) does not equal actual result (%lld).\n", expectedResult, long_result);
+ if (long_result != expectedResult)
+ {
+ log_error("Expected result (%lld) does not equal actual result "
+ "(%lld).\n",
+ expectedResult, long_result);
numberOfIntParametersToTry -= decrement;
continue;
- } else {
- log_info("Results verified at %ld bytes of arguments.\n", sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_long));
+ }
+ else
+ {
+ log_info("Results verified at %ld bytes of arguments.\n",
+ sizeof(cl_mem)
+ + numberOfIntParametersToTry * sizeof(cl_long));
break;
}
}
else
{
- if (int_result != expectedResult) {
- log_error("Expected result (%lld) does not equal actual result (%d).\n", expectedResult, int_result);
+ if (int_result != expectedResult)
+ {
+ log_error("Expected result (%lld) does not equal actual result "
+ "(%d).\n",
+ expectedResult, int_result);
numberOfIntParametersToTry -= decrement;
continue;
- } else {
- log_info("Results verified at %ld bytes of arguments.\n", sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_int));
+ }
+ else
+ {
+ log_info("Results verified at %ld bytes of arguments.\n",
+ sizeof(cl_mem)
+ + numberOfIntParametersToTry * sizeof(cl_int));
break;
}
}
}
- if (numberOfIntParametersToTry == (long)numberExpected)
- return 0;
+ if (numberOfIntParametersToTry == (long)numberExpected) return 0;
return -1;
}
-int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_samplers(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_uint maxSamplers, i;
@@ -1197,104 +1451,124 @@ int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_
cl_uint minRequiredSamplers = gIsEmbedded ? 8 : 16;
- PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+ PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
/* Get the max value */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_SAMPLERS, sizeof( maxSamplers ), &maxSamplers, NULL );
- test_error( error, "Unable to get max sampler count from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_SAMPLERS,
+ sizeof(maxSamplers), &maxSamplers, NULL);
+ test_error(error, "Unable to get max sampler count from device");
- if( maxSamplers < minRequiredSamplers )
+ if (maxSamplers < minRequiredSamplers)
{
- log_error( "ERROR: Reported max sampler count is less than required! (%d)\n", (int)maxSamplers );
+ log_error(
+ "ERROR: Reported max sampler count is less than required! (%d)\n",
+ (int)maxSamplers);
return -1;
}
log_info("Reported max %d samplers.\n", maxSamplers);
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
- test_error( error, "Unable to get max parameter size from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE,
+ sizeof(maxParameterSize), &maxParameterSize, NULL);
+ test_error(error, "Unable to get max parameter size from device");
// Subtract the size of the result
- maxParameterSize -= 2*sizeof(cl_mem);
+ maxParameterSize -= 2 * sizeof(cl_mem);
// Calculate the number we can use
- if (maxParameterSize/sizeof(cl_sampler) < maxSamplers) {
- log_info("WARNING: Max parameter size of %d bytes limits test to %d max sampler arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_sampler)));
- maxSamplers = (unsigned int)(maxParameterSize/sizeof(cl_sampler));
+ if (maxParameterSize / sizeof(cl_sampler) < maxSamplers)
+ {
+ log_info("WARNING: Max parameter size of %d bytes limits test to %d "
+ "max sampler arguments.\n",
+ (int)maxParameterSize,
+ (int)(maxParameterSize / sizeof(cl_sampler)));
+ maxSamplers = (unsigned int)(maxParameterSize / sizeof(cl_sampler));
}
/* Create a kernel to test with */
- programSrc = (char *)malloc( ( strlen( sample_sampler_kernel_pattern[ 1 ] ) + 8 ) * ( maxSamplers ) +
- strlen( sample_sampler_kernel_pattern[ 0 ] ) + strlen( sample_sampler_kernel_pattern[ 2 ] ) +
- ( strlen( sample_sampler_kernel_pattern[ 3 ] ) + 8 ) * maxSamplers +
- strlen( sample_sampler_kernel_pattern[ 4 ] ) );
- strcpy( programSrc, sample_sampler_kernel_pattern[ 0 ] );
- for( i = 0; i < maxSamplers; i++ )
+ programSrc = (char *)malloc(
+ (strlen(sample_sampler_kernel_pattern[1]) + 8) * (maxSamplers)
+ + strlen(sample_sampler_kernel_pattern[0])
+ + strlen(sample_sampler_kernel_pattern[2])
+ + (strlen(sample_sampler_kernel_pattern[3]) + 8) * maxSamplers
+ + strlen(sample_sampler_kernel_pattern[4]));
+ strcpy(programSrc, sample_sampler_kernel_pattern[0]);
+ for (i = 0; i < maxSamplers; i++)
{
- sprintf( samplerLine, sample_sampler_kernel_pattern[ 1 ], i );
- strcat( programSrc, samplerLine );
+ sprintf(samplerLine, sample_sampler_kernel_pattern[1], i);
+ strcat(programSrc, samplerLine);
}
- strcat( programSrc, sample_sampler_kernel_pattern[ 2 ] );
- for( i = 0; i < maxSamplers; i++ )
+ strcat(programSrc, sample_sampler_kernel_pattern[2]);
+ for (i = 0; i < maxSamplers; i++)
{
- sprintf( samplerLine, sample_sampler_kernel_pattern[ 3 ], i );
- strcat( programSrc, samplerLine );
+ sprintf(samplerLine, sample_sampler_kernel_pattern[3], i);
+ strcat(programSrc, samplerLine);
}
- strcat( programSrc, sample_sampler_kernel_pattern[ 4 ] );
+ strcat(programSrc, sample_sampler_kernel_pattern[4]);
- error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test");
- test_error( error, "Failed to create the program and kernel.");
+ error =
+ create_single_kernel_helper(context, &program, &kernel, 1,
+ (const char **)&programSrc, "sample_test");
+ test_error(error, "Failed to create the program and kernel.");
// We have to set up some fake parameters so it'll work
clSamplerWrapper *samplers = new clSamplerWrapper[maxSamplers];
cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
- clMemWrapper image = create_image_2d( context, CL_MEM_READ_WRITE, &format, 16, 16, 0, NULL, &error );
- test_error( error, "Unable to create a test image" );
+ clMemWrapper image = create_image_2d(context, CL_MEM_READ_WRITE, &format,
+ 16, 16, 0, NULL, &error);
+ test_error(error, "Unable to create a test image");
clMemWrapper stream =
clCreateBuffer(context, CL_MEM_READ_WRITE, 16, NULL, &error);
- test_error( error, "Unable to create test buffer" );
+ test_error(error, "Unable to create test buffer");
- error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &image );
- error |= clSetKernelArg( kernel, 1, sizeof( cl_mem ), &stream );
- test_error( error, "Unable to set kernel arguments" );
- for( i = 0; i < maxSamplers; i++ )
+ error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &image);
+ error |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &stream);
+ test_error(error, "Unable to set kernel arguments");
+ for (i = 0; i < maxSamplers; i++)
{
- samplers[ i ] = clCreateSampler( context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
- test_error( error, "Unable to create sampler" );
+ samplers[i] = clCreateSampler(context, CL_FALSE, CL_ADDRESS_NONE,
+ CL_FILTER_NEAREST, &error);
+ test_error(error, "Unable to create sampler");
- error = clSetKernelArg( kernel, 2 + i, sizeof( cl_sampler ), &samplers[ i ] );
- test_error( error, "Unable to set sampler argument" );
+ error = clSetKernelArg(kernel, 2 + i, sizeof(cl_sampler), &samplers[i]);
+ test_error(error, "Unable to set sampler argument");
}
- size_t globalDim[3]={1,1,1}, localDim[3]={1,1,1};
- error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim, 0, NULL, &event);
- test_error(error, "clEnqueueNDRangeKernel failed with maximum number of samplers.");
+ size_t globalDim[3] = { 1, 1, 1 }, localDim[3] = { 1, 1, 1 };
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim,
+ 0, NULL, &event);
+ test_error(
+ error,
+ "clEnqueueNDRangeKernel failed with maximum number of samplers.");
// Verify that the event does not return an error from the execution
error = clWaitForEvents(1, &event);
- test_error( error, "clWaitForEvent failed");
- error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
- test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+ test_error(error, "clWaitForEvent failed");
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(event_status), &event_status, NULL);
+ test_error(error,
+ "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
clReleaseEvent(event);
if (event_status < 0)
test_error(error, "Kernel execution event returned error");
- free( programSrc );
+ free(programSrc);
delete[] samplers;
return 0;
}
#define PASSING_FRACTION 4
-int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
- size_t threads[1], localThreads[1];
+ size_t threads[1], localThreads[1];
cl_int *constantData, *resultData;
cl_ulong maxSize, stepSize, currentSize, maxGlobalSize, maxAllocSize;
int i;
@@ -1303,48 +1577,56 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
MTdata d;
/* Verify our test buffer won't be bigger than allowed */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
- test_error( error, "Unable to get max constant buffer size" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
+ sizeof(maxSize), &maxSize, 0);
+ test_error(error, "Unable to get max constant buffer size");
- if( ( 0 == gIsEmbedded && maxSize < 64L * 1024L ) || maxSize < 1L * 1024L )
+ if ((0 == gIsEmbedded && maxSize < 64L * 1024L) || maxSize < 1L * 1024L)
{
- log_error( "ERROR: Reported max constant buffer size less than required by OpenCL 1.0 (reported %d KB)\n", (int)( maxSize / 1024L ) );
+ log_error("ERROR: Reported max constant buffer size less than required "
+ "by OpenCL 1.0 (reported %d KB)\n",
+ (int)(maxSize / 1024L));
return -1;
}
log_info("Reported max constant buffer size of %lld bytes.\n", maxSize);
// Limit test buffer size to 1/8 of CL_DEVICE_GLOBAL_MEM_SIZE
- error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0);
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE,
+ sizeof(maxGlobalSize), &maxGlobalSize, 0);
test_error(error, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
- if (maxSize > maxGlobalSize / 8)
- maxSize = maxGlobalSize / 8;
+ if (maxSize > maxGlobalSize / 8) maxSize = maxGlobalSize / 8;
- error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0);
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+ sizeof(maxAllocSize), &maxAllocSize, 0);
test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE ");
-
- if (maxSize > maxAllocSize)
- maxSize = maxAllocSize;
-
+
+ if (maxSize > maxAllocSize) maxSize = maxAllocSize;
+
/* Create a kernel to test with */
- if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_arg_kernel, "sample_test" ) != 0 )
+ if (create_single_kernel_helper(context, &program, &kernel, 1,
+ sample_const_arg_kernel, "sample_test")
+ != 0)
{
return -1;
}
/* Try the returned max size and decrease it until we get one that works. */
- stepSize = maxSize/16;
+ stepSize = maxSize / 16;
currentSize = maxSize;
int allocPassed = 0;
- d = init_genrand( gRandomSeed );
- while (!allocPassed && currentSize >= maxSize/PASSING_FRACTION) {
- log_info("Attempting to allocate constant buffer of size %lld bytes\n", maxSize);
+ d = init_genrand(gRandomSeed);
+ while (!allocPassed && currentSize >= maxSize / PASSING_FRACTION)
+ {
+ log_info("Attempting to allocate constant buffer of size %lld bytes\n",
+ maxSize);
/* Create some I/O streams */
- size_t sizeToAllocate = ((size_t)currentSize/sizeof( cl_int ))*sizeof(cl_int);
- size_t numberOfInts = sizeToAllocate/sizeof(cl_int);
- constantData = (cl_int *)malloc( sizeToAllocate);
+ size_t sizeToAllocate =
+ ((size_t)currentSize / sizeof(cl_int)) * sizeof(cl_int);
+ size_t numberOfInts = sizeToAllocate / sizeof(cl_int);
+ constantData = (cl_int *)malloc(sizeToAllocate);
if (constantData == NULL)
{
log_error("Failed to allocate memory for constantData!\n");
@@ -1352,53 +1634,74 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
return EXIT_FAILURE;
}
- for(i=0; i<(int)(numberOfInts); i++)
+ for (i = 0; i < (int)(numberOfInts); i++)
constantData[i] = (int)genrand_int32(d);
clMemWrapper streams[3];
streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
sizeToAllocate, constantData, &error);
- test_error( error, "Creating test array failed" );
+ test_error(error, "Creating test array failed");
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate,
NULL, &error);
- test_error( error, "Creating test array failed" );
+ test_error(error, "Creating test array failed");
/* Set the arguments */
- error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
- test_error( error, "Unable to set indexed kernel arguments" );
- error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
- test_error( error, "Unable to set indexed kernel arguments" );
+ error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+ test_error(error, "Unable to set indexed kernel arguments");
+ error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
+ test_error(error, "Unable to set indexed kernel arguments");
/* Test running the kernel and verifying it */
threads[0] = numberOfInts;
localThreads[0] = 1;
- log_info("Filling constant buffer with %d cl_ints (%d bytes).\n", (int)threads[0], (int)(threads[0]*sizeof(cl_int)));
-
- error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &event );
- /* If we failed due to a resource issue, reduce the size and try again. */
- if ((error == CL_OUT_OF_RESOURCES) || (error == CL_MEM_OBJECT_ALLOCATION_FAILURE) || (error == CL_OUT_OF_HOST_MEMORY)) {
- log_info("Kernel enqueue failed at size %lld, trying at a reduced size.\n", currentSize);
+ log_info("Filling constant buffer with %d cl_ints (%d bytes).\n",
+ (int)threads[0], (int)(threads[0] * sizeof(cl_int)));
+
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+ localThreads, 0, NULL, &event);
+ /* If we failed due to a resource issue, reduce the size and try again.
+ */
+ if ((error == CL_OUT_OF_RESOURCES)
+ || (error == CL_MEM_OBJECT_ALLOCATION_FAILURE)
+ || (error == CL_OUT_OF_HOST_MEMORY))
+ {
+ log_info("Kernel enqueue failed at size %lld, trying at a reduced "
+ "size.\n",
+ currentSize);
currentSize -= stepSize;
free(constantData);
continue;
}
- test_error( error, "clEnqueueNDRangeKernel with maximum constant buffer size failed.");
+ test_error(
+ error,
+ "clEnqueueNDRangeKernel with maximum constant buffer size failed.");
// Verify that the event does not return an error from the execution
error = clWaitForEvents(1, &event);
- test_error( error, "clWaitForEvent failed");
- error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
- test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+ test_error(error, "clWaitForEvent failed");
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(event_status), &event_status, NULL);
+ test_error(
+ error,
+ "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
clReleaseEvent(event);
- if (event_status < 0) {
- if ((event_status == CL_OUT_OF_RESOURCES) || (event_status == CL_MEM_OBJECT_ALLOCATION_FAILURE) || (event_status == CL_OUT_OF_HOST_MEMORY)) {
- log_info("Kernel event indicates failure at size %lld, trying at a reduced size.\n", currentSize);
+ if (event_status < 0)
+ {
+ if ((event_status == CL_OUT_OF_RESOURCES)
+ || (event_status == CL_MEM_OBJECT_ALLOCATION_FAILURE)
+ || (event_status == CL_OUT_OF_HOST_MEMORY))
+ {
+ log_info("Kernel event indicates failure at size %lld, trying "
+ "at a reduced size.\n",
+ currentSize);
currentSize -= stepSize;
free(constantData);
continue;
- } else {
+ }
+ else
+ {
test_error(error, "Kernel execution event returned error");
}
}
@@ -1415,30 +1718,41 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
return EXIT_FAILURE;
}
- error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, resultData, 0, NULL, NULL);
- test_error( error, "clEnqueueReadBuffer failed");
+ error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0,
+ sizeToAllocate, resultData, 0, NULL, NULL);
+ test_error(error, "clEnqueueReadBuffer failed");
- for(i=0; i<(int)(numberOfInts); i++)
- if (constantData[i] != resultData[i]) {
- log_error("Data failed to verify: constantData[%d]=%d != resultData[%d]=%d\n",
+ for (i = 0; i < (int)(numberOfInts); i++)
+ if (constantData[i] != resultData[i])
+ {
+ log_error("Data failed to verify: constantData[%d]=%d != "
+ "resultData[%d]=%d\n",
i, constantData[i], i, resultData[i]);
- free( constantData );
+ free(constantData);
free(resultData);
- free_mtdata(d); d = NULL;
+ free_mtdata(d);
+ d = NULL;
return -1;
}
- free( constantData );
+ free(constantData);
free(resultData);
}
- free_mtdata(d); d = NULL;
+ free_mtdata(d);
+ d = NULL;
- if (allocPassed) {
- if (currentSize < maxSize/PASSING_FRACTION) {
- log_error("Failed to allocate at least 1/8 of the reported constant size.\n");
+ if (allocPassed)
+ {
+ if (currentSize < maxSize / PASSING_FRACTION)
+ {
+ log_error("Failed to allocate at least 1/8 of the reported "
+ "constant size.\n");
return -1;
- } else if (currentSize != maxSize) {
- log_info("Passed at reduced size. (%lld of %lld bytes)\n", currentSize, maxSize);
+ }
+ else if (currentSize != maxSize)
+ {
+ log_info("Passed at reduced size. (%lld of %lld bytes)\n",
+ currentSize, maxSize);
return 0;
}
return 0;
@@ -1446,13 +1760,14 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
return -1;
}
-int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_constant_args(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
- clMemWrapper *streams;
- size_t threads[1], localThreads[1];
+ clMemWrapper *streams;
+ size_t threads[1], localThreads[1];
cl_uint i, maxArgs;
cl_ulong maxSize;
cl_ulong maxParameterSize;
@@ -1465,119 +1780,145 @@ int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_com
/* Verify our test buffer won't be bigger than allowed */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof( maxArgs ), &maxArgs, 0 );
- test_error( error, "Unable to get max constant arg count" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_CONSTANT_ARGS,
+ sizeof(maxArgs), &maxArgs, 0);
+ test_error(error, "Unable to get max constant arg count");
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
- test_error( error, "Unable to get max parameter size from device" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE,
+ sizeof(maxParameterSize), &maxParameterSize, NULL);
+ test_error(error, "Unable to get max parameter size from device");
// Subtract the size of the result
maxParameterSize -= sizeof(cl_mem);
// Calculate the number we can use
- if (maxParameterSize/sizeof(cl_mem) < maxArgs) {
- log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_mem)));
- maxArgs = (unsigned int)(maxParameterSize/sizeof(cl_mem));
+ if (maxParameterSize / sizeof(cl_mem) < maxArgs)
+ {
+ log_info("WARNING: Max parameter size of %d bytes limits test to %d "
+ "max image arguments.\n",
+ (int)maxParameterSize,
+ (int)(maxParameterSize / sizeof(cl_mem)));
+ maxArgs = (unsigned int)(maxParameterSize / sizeof(cl_mem));
}
- if( maxArgs < (gIsEmbedded ? 4 : 8) )
+ if (maxArgs < (gIsEmbedded ? 4 : 8))
{
- log_error( "ERROR: Reported max constant arg count less than required by OpenCL 1.0 (reported %d)\n", (int)maxArgs );
+ log_error("ERROR: Reported max constant arg count less than required "
+ "by OpenCL 1.0 (reported %d)\n",
+ (int)maxArgs);
return -1;
}
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
- test_error( error, "Unable to get max constant buffer size" );
- individualBufferSize = ((int)maxSize/2)/maxArgs;
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
+ sizeof(maxSize), &maxSize, 0);
+ test_error(error, "Unable to get max constant buffer size");
+ individualBufferSize = (maxSize / 2) / maxArgs;
- log_info("Reported max constant arg count of %d and max constant buffer size of %d. Test will attempt to allocate half of that, or %d buffers of size %d.\n",
- (int)maxArgs, (int)maxSize, (int)maxArgs, (int)individualBufferSize);
+ log_info(
+ "Reported max constant arg count of %u and max constant buffer "
+ "size of %llu. Test will attempt to allocate half of that, or %llu "
+ "buffers of size %zu.\n",
+ maxArgs, maxSize, maxArgs, individualBufferSize);
- str2 = (char*)malloc(sizeof(char)*32*(maxArgs+2));
- constArgs = (char*)malloc(sizeof(char)*32*(maxArgs+2));
- programSrc = (char*)malloc(sizeof(char)*32*2*(maxArgs+2)+1024);
+ str2 = (char *)malloc(sizeof(char) * 32 * (maxArgs + 2));
+ constArgs = (char *)malloc(sizeof(char) * 32 * (maxArgs + 2));
+ programSrc = (char *)malloc(sizeof(char) * 32 * 2 * (maxArgs + 2) + 1024);
/* Create a test program */
constArgs[0] = 0;
str2[0] = 0;
- for( i = 0; i < maxArgs-1; i++ )
- {
- sprintf( str, ", __constant int *src%d", (int)( i + 2 ) );
- strcat( constArgs, str );
- sprintf( str2 + strlen( str2), "\tdst[tid] += src%d[tid];\n", (int)(i+2));
- if (strlen(str2) > (sizeof(char)*32*(maxArgs+2)-32) || strlen(constArgs) > (sizeof(char)*32*(maxArgs+2)-32)) {
- log_info("Limiting number of arguments tested to %d due to test program allocation size.\n", i);
+ for (i = 0; i < maxArgs - 1; i++)
+ {
+ sprintf(str, ", __constant int *src%d", (int)(i + 2));
+ strcat(constArgs, str);
+ sprintf(str2 + strlen(str2), "\tdst[tid] += src%d[tid];\n",
+ (int)(i + 2));
+ if (strlen(str2) > (sizeof(char) * 32 * (maxArgs + 2) - 32)
+ || strlen(constArgs) > (sizeof(char) * 32 * (maxArgs + 2) - 32))
+ {
+ log_info("Limiting number of arguments tested to %d due to test "
+ "program allocation size.\n",
+ i);
break;
}
}
- sprintf( programSrc, sample_const_max_arg_kernel_pattern, constArgs, str2 );
+ sprintf(programSrc, sample_const_max_arg_kernel_pattern, constArgs, str2);
/* Create a kernel to test with */
ptr = programSrc;
- if( create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_test" ) != 0 )
+ if (create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
+ "sample_test")
+ != 0)
{
return -1;
}
/* Create some I/O streams */
- streams = new clMemWrapper[ maxArgs + 1 ];
- for( i = 0; i < maxArgs + 1; i++ )
+ streams = new clMemWrapper[maxArgs + 1];
+ for (i = 0; i < maxArgs + 1; i++)
{
streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
individualBufferSize, NULL, &error);
- test_error( error, "Creating test array failed" );
+ test_error(error, "Creating test array failed");
}
/* Set the arguments */
- for( i = 0; i < maxArgs + 1; i++ )
+ for (i = 0; i < maxArgs + 1; i++)
{
- error = clSetKernelArg(kernel, i, sizeof( streams[i] ), &streams[i]);
- test_error( error, "Unable to set kernel argument" );
+ error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]);
+ test_error(error, "Unable to set kernel argument");
}
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
- while (threads[0]*sizeof(cl_int) > individualBufferSize)
- threads[0]--;
+ while (threads[0] * sizeof(cl_int) > individualBufferSize) threads[0]--;
- error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
- test_error( error, "Unable to get work group size to use" );
+ error = get_max_common_work_group_size(context, kernel, threads[0],
+ &localThreads[0]);
+ test_error(error, "Unable to get work group size to use");
- error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &event );
- test_error( error, "clEnqueueNDRangeKernel failed");
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+ localThreads, 0, NULL, &event);
+ test_error(error, "clEnqueueNDRangeKernel failed");
// Verify that the event does not return an error from the execution
error = clWaitForEvents(1, &event);
- test_error( error, "clWaitForEvent failed");
- error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
- test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+ test_error(error, "clWaitForEvent failed");
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(event_status), &event_status, NULL);
+ test_error(error,
+ "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
clReleaseEvent(event);
if (event_status < 0)
test_error(error, "Kernel execution event returned error");
error = clFinish(queue);
- test_error( error, "clFinish failed.");
+ test_error(error, "clFinish failed.");
- delete [] streams;
+ delete[] streams;
free(str2);
free(constArgs);
free(programSrc);
return 0;
}
-int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_compute_units(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_uint value;
- error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof( value ), &value, 0 );
- test_error( error, "Unable to get compute unit count" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS,
+ sizeof(value), &value, 0);
+ test_error(error, "Unable to get compute unit count");
- if( value < 1 )
+ if (value < 1)
{
- log_error( "ERROR: Reported compute unit count less than required by OpenCL 1.0 (reported %d)\n", (int)value );
+ log_error("ERROR: Reported compute unit count less than required by "
+ "OpenCL 1.0 (reported %d)\n",
+ (int)value);
return -1;
}
@@ -1586,18 +1927,22 @@ int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_com
return 0;
}
-int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_address_bits(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_uint value;
- error = clGetDeviceInfo( deviceID, CL_DEVICE_ADDRESS_BITS, sizeof( value ), &value, 0 );
- test_error( error, "Unable to get address bit count" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_ADDRESS_BITS, sizeof(value),
+ &value, 0);
+ test_error(error, "Unable to get address bit count");
- if( value != 32 && value != 64 )
+ if (value != 32 && value != 64)
{
- log_error( "ERROR: Reported address bit count not valid by OpenCL 1.0 (reported %d)\n", (int)value );
+ log_error("ERROR: Reported address bit count not valid by OpenCL 1.0 "
+ "(reported %d)\n",
+ (int)value);
return -1;
}
@@ -1606,68 +1951,84 @@ int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_comm
return 0;
}
-int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_device_fp_config value;
char profile[128] = "";
- error = clGetDeviceInfo( deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( value ), &value, 0 );
- test_error( error, "Unable to get device single fp config" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(value),
+ &value, 0);
+ test_error(error, "Unable to get device single fp config");
- //Check to see if we are an embedded profile device
- if((error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL )))
+ // Check to see if we are an embedded profile device
+ if ((error = clGetDeviceInfo(deviceID, CL_DEVICE_PROFILE, sizeof(profile),
+ profile, NULL)))
{
- log_error( "FAILURE: Unable to get CL_DEVICE_PROFILE: error %d\n", error );
+ log_error("FAILURE: Unable to get CL_DEVICE_PROFILE: error %d\n",
+ error);
return error;
}
- if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ))
+ if (0 == strcmp(profile, "EMBEDDED_PROFILE"))
{ // embedded device
- if( 0 == (value & (CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO)))
+ if (0 == (value & (CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO)))
{
- log_error( "FAILURE: embedded device supports neither CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n" );
+ log_error("FAILURE: embedded device supports neither "
+ "CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n");
return -1;
}
}
else
{ // Full profile
- if( ( value & ( CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN )) != ( CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN ) )
+ if ((value & (CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN))
+ != (CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN))
{
- log_error( "ERROR: Reported single fp config doesn't meet minimum set by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
+ log_error("ERROR: Reported single fp config doesn't meet minimum "
+ "set by OpenCL 1.0 (reported 0x%08x)\n",
+ (int)value);
return -1;
}
}
return 0;
}
-int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_device_fp_config value;
- error = clGetDeviceInfo( deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof( value ), &value, 0 );
- test_error( error, "Unable to get device double fp config" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(value),
+ &value, 0);
+ test_error(error, "Unable to get device double fp config");
- if (value == 0)
- return 0;
+ if (value == 0) return 0;
- if( ( value & (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)) != ( CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM) )
+ if ((value
+ & (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO
+ | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM))
+ != (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO
+ | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM))
{
- log_error( "ERROR: Reported double fp config doesn't meet minimum set by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
+ log_error("ERROR: Reported double fp config doesn't meet minimum set "
+ "by OpenCL 1.0 (reported 0x%08x)\n",
+ (int)value);
return -1;
}
return 0;
}
-int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
- clMemWrapper streams[3];
- size_t threads[1], localThreads[1];
+ clMemWrapper streams[3];
+ size_t threads[1], localThreads[1];
cl_int *localData, *resultData;
cl_ulong maxSize, kernelLocalUsage, min_max_local_mem_size;
Version device_version;
@@ -1676,8 +2037,9 @@ int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_co
MTdata d;
/* Verify our test buffer won't be bigger than allowed */
- error = clGetDeviceInfo( deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( maxSize ), &maxSize, 0 );
- test_error( error, "Unable to get max local buffer size" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(maxSize),
+ &maxSize, 0);
+ test_error(error, "Unable to get max local buffer size");
try
{
@@ -1709,65 +2071,80 @@ int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_co
return -1;
}
- log_info("Reported max local buffer size for device: %lld bytes.\n", maxSize);
+ log_info("Reported max local buffer size for device: %lld bytes.\n",
+ maxSize);
/* Create a kernel to test with */
- if( create_single_kernel_helper( context, &program, &kernel, 1, sample_local_arg_kernel, "sample_test" ) != 0 )
+ if (create_single_kernel_helper(context, &program, &kernel, 1,
+ sample_local_arg_kernel, "sample_test")
+ != 0)
{
return -1;
}
- error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernelLocalUsage), &kernelLocalUsage, NULL);
- test_error(error, "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed");
+ error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE,
+ sizeof(kernelLocalUsage),
+ &kernelLocalUsage, NULL);
+ test_error(error,
+ "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed");
- log_info("Reported local buffer usage for kernel (CL_KERNEL_LOCAL_MEM_SIZE): %lld bytes.\n", kernelLocalUsage);
+ log_info("Reported local buffer usage for kernel "
+ "(CL_KERNEL_LOCAL_MEM_SIZE): %lld bytes.\n",
+ kernelLocalUsage);
/* Create some I/O streams */
- size_t sizeToAllocate = ((size_t)(maxSize-kernelLocalUsage)/sizeof( cl_int ))*sizeof(cl_int);
- size_t numberOfInts = sizeToAllocate/sizeof(cl_int);
+ size_t sizeToAllocate =
+ ((size_t)(maxSize - kernelLocalUsage) / sizeof(cl_int))
+ * sizeof(cl_int);
+ size_t numberOfInts = sizeToAllocate / sizeof(cl_int);
- log_info("Attempting to use %lld bytes of local memory.\n", (cl_ulong)sizeToAllocate);
+ log_info("Attempting to use %zu bytes of local memory.\n", sizeToAllocate);
- localData = (cl_int *)malloc( sizeToAllocate );
- d = init_genrand( gRandomSeed );
- for(i=0; i<(int)(numberOfInts); i++)
+ localData = (cl_int *)malloc(sizeToAllocate);
+ d = init_genrand(gRandomSeed);
+ for (i = 0; i < (int)(numberOfInts); i++)
localData[i] = (int)genrand_int32(d);
- free_mtdata(d); d = NULL;
+ free_mtdata(d);
+ d = NULL;
streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate,
localData, &error);
- test_error( error, "Creating test array failed" );
+ test_error(error, "Creating test array failed");
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate,
NULL, &error);
- test_error( error, "Creating test array failed" );
+ test_error(error, "Creating test array failed");
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeToAllocate, NULL);
- test_error( error, "Unable to set indexed kernel arguments" );
- error = clSetKernelArg(kernel, 1, sizeof( streams[0] ), &streams[0]);
- test_error( error, "Unable to set indexed kernel arguments" );
- error = clSetKernelArg(kernel, 2, sizeof( streams[1] ), &streams[1]);
- test_error( error, "Unable to set indexed kernel arguments" );
+ test_error(error, "Unable to set indexed kernel arguments");
+ error = clSetKernelArg(kernel, 1, sizeof(streams[0]), &streams[0]);
+ test_error(error, "Unable to set indexed kernel arguments");
+ error = clSetKernelArg(kernel, 2, sizeof(streams[1]), &streams[1]);
+ test_error(error, "Unable to set indexed kernel arguments");
/* Test running the kernel and verifying it */
threads[0] = numberOfInts;
localThreads[0] = 1;
- log_info("Creating local buffer with %d cl_ints (%d bytes).\n", (int)numberOfInts, (int)sizeToAllocate);
+ log_info("Creating local buffer with %zu cl_ints (%zu bytes).\n",
+ numberOfInts, sizeToAllocate);
cl_event evt;
- cl_int evt_err;
- error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &evt );
+ cl_int evt_err;
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+ localThreads, 0, NULL, &evt);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clFinish(queue);
- test_error( error, "clFinish failed");
+ test_error(error, "clFinish failed");
- error = clGetEventInfo(evt, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof evt_err, &evt_err, NULL);
- test_error( error, "clGetEventInfo with maximum local buffer size failed.");
+ error = clGetEventInfo(evt, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof evt_err, &evt_err, NULL);
+ test_error(error, "clGetEventInfo with maximum local buffer size failed.");
- if (evt_err != CL_COMPLETE) {
+ if (evt_err != CL_COMPLETE)
+ {
print_error(evt_err, "Kernel event returned error");
clReleaseEvent(evt);
return -1;
@@ -1775,95 +2152,118 @@ int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_co
resultData = (cl_int *)malloc(sizeToAllocate);
- error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, resultData, 0, NULL, NULL);
- test_error( error, "clEnqueueReadBuffer failed");
+ error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate,
+ resultData, 0, NULL, NULL);
+ test_error(error, "clEnqueueReadBuffer failed");
- for(i=0; i<(int)(numberOfInts); i++)
- if (localData[i] != resultData[i]) {
+ for (i = 0; i < (int)(numberOfInts); i++)
+ if (localData[i] != resultData[i])
+ {
clReleaseEvent(evt);
- free( localData );
+ free(localData);
free(resultData);
log_error("Results failed to verify.\n");
return -1;
}
clReleaseEvent(evt);
- free( localData );
+ free(localData);
free(resultData);
return err;
}
-int test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_kernel_preferred_work_group_size_multiple(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements)
{
- int err;
+ int err;
clProgramWrapper program;
clKernelWrapper kernel;
size_t max_local_workgroup_size[3];
size_t max_workgroup_size = 0, preferred_workgroup_size = 0;
- err = create_single_kernel_helper(context, &program, &kernel, 1, sample_local_arg_kernel, "sample_test" );
+ err = create_single_kernel_helper(context, &program, &kernel, 1,
+ sample_local_arg_kernel, "sample_test");
test_error(err, "Failed to build kernel/program.");
err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE,
- sizeof(max_workgroup_size), &max_workgroup_size, NULL);
+ sizeof(max_workgroup_size),
+ &max_workgroup_size, NULL);
test_error(err, "clGetKernelWorkgroupInfo failed.");
- err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
- sizeof(preferred_workgroup_size), &preferred_workgroup_size, NULL);
+ err = clGetKernelWorkGroupInfo(
+ kernel, deviceID, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
+ sizeof(preferred_workgroup_size), &preferred_workgroup_size, NULL);
test_error(err, "clGetKernelWorkgroupInfo failed.");
- err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+ err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+ sizeof(max_local_workgroup_size),
+ max_local_workgroup_size, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
- // Since the preferred size is only a performance hint, we can only really check that we get a sane value
- // back
- log_info( "size: %ld preferred: %ld max: %ld\n", max_workgroup_size, preferred_workgroup_size, max_local_workgroup_size[0] );
+ // Since the preferred size is only a performance hint, we can only really
+ // check that we get a sane value back
+ log_info("size: %ld preferred: %ld max: %ld\n", max_workgroup_size,
+ preferred_workgroup_size, max_local_workgroup_size[0]);
- if( preferred_workgroup_size > max_workgroup_size )
+ if (preferred_workgroup_size > max_workgroup_size)
{
- log_error( "ERROR: Reported preferred workgroup multiple larger than max workgroup size (preferred %ld, max %ld)\n", preferred_workgroup_size, max_workgroup_size );
+ log_error("ERROR: Reported preferred workgroup multiple larger than "
+ "max workgroup size (preferred %ld, max %ld)\n",
+ preferred_workgroup_size, max_workgroup_size);
return -1;
}
return 0;
}
-int test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_execution_capabilities(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
int error;
cl_device_exec_capabilities value;
- error = clGetDeviceInfo( deviceID, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof( value ), &value, 0 );
- test_error( error, "Unable to get execution capabilities" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_EXECUTION_CAPABILITIES,
+ sizeof(value), &value, 0);
+ test_error(error, "Unable to get execution capabilities");
- if( ( value & CL_EXEC_KERNEL ) != CL_EXEC_KERNEL )
+ if ((value & CL_EXEC_KERNEL) != CL_EXEC_KERNEL)
{
- log_error( "ERROR: Reported execution capabilities less than required by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
+ log_error("ERROR: Reported execution capabilities less than required "
+ "by OpenCL 1.0 (reported 0x%08x)\n",
+ (int)value);
return -1;
}
return 0;
}
-int test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_queue_properties(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_command_queue_properties value;
- error = clGetDeviceInfo( deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof( value ), &value, 0 );
- test_error( error, "Unable to get queue properties" );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
+ sizeof(value), &value, 0);
+ test_error(error, "Unable to get queue properties");
- if( ( value & CL_QUEUE_PROFILING_ENABLE ) != CL_QUEUE_PROFILING_ENABLE )
+ if ((value & CL_QUEUE_PROFILING_ENABLE) != CL_QUEUE_PROFILING_ENABLE)
{
- log_error( "ERROR: Reported queue properties less than required by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
+ log_error("ERROR: Reported queue properties less than required by "
+ "OpenCL 1.0 (reported 0x%08x)\n",
+ (int)value);
return -1;
}
return 0;
}
-int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_device_version(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
// Query for the device version.
Version device_cl_version = get_device_cl_version(deviceID);
@@ -1959,84 +2359,101 @@ int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_co
return 0;
}
-int test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_language_version(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_int error;
- cl_char buffer[ 4098 ];
+ cl_char buffer[4098];
size_t length;
// Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
- error = clGetDeviceInfo( deviceID, CL_DEVICE_OPENCL_C_VERSION, sizeof( buffer ), buffer, &length );
- test_error( error, "Unable to get device opencl c version string" );
- if( memcmp( buffer, "OpenCL C ", strlen( "OpenCL C " ) ) != 0 )
- {
- log_error( "ERROR: Initial part of device language version string does not match required format! (returned: \"%s\")\n", (char *)buffer );
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_OPENCL_C_VERSION,
+ sizeof(buffer), buffer, &length);
+ test_error(error, "Unable to get device opencl c version string");
+ if (memcmp(buffer, "OpenCL C ", strlen("OpenCL C ")) != 0)
+ {
+ log_error("ERROR: Initial part of device language version string does "
+ "not match required format! (returned: \"%s\")\n",
+ (char *)buffer);
return -1;
}
log_info("Returned version \"%s\".\n", buffer);
- char *p1 = (char *)buffer + strlen( "OpenCL C " );
- while( *p1 == ' ' )
- p1++;
+ char *p1 = (char *)buffer + strlen("OpenCL C ");
+ while (*p1 == ' ') p1++;
char *p2 = p1;
- if( ! isdigit(*p2) )
+ if (!isdigit(*p2))
{
- log_error( "ERROR: Major revision number must follow space behind OpenCL C! (returned %s)\n", (char*) buffer );
+ log_error("ERROR: Major revision number must follow space behind "
+ "OpenCL C! (returned %s)\n",
+ (char *)buffer);
return -1;
}
- while( isdigit( *p2 ) )
- p2++;
- if( *p2 != '.' )
+ while (isdigit(*p2)) p2++;
+ if (*p2 != '.')
{
- log_error( "ERROR: Version number must contain a decimal point! (returned: %s)\n", (char *)buffer );
+ log_error("ERROR: Version number must contain a decimal point! "
+ "(returned: %s)\n",
+ (char *)buffer);
return -1;
}
char *p3 = p2 + 1;
- if( ! isdigit(*p3) )
+ if (!isdigit(*p3))
{
- log_error( "ERROR: Minor revision number is missing or does not abut the decimal point! (returned %s)\n", (char*) buffer );
+ log_error("ERROR: Minor revision number is missing or does not abut "
+ "the decimal point! (returned %s)\n",
+ (char *)buffer);
return -1;
}
- while( isdigit( *p3 ) )
- p3++;
- if( *p3 != ' ' )
+ while (isdigit(*p3)) p3++;
+ if (*p3 != ' ')
{
- log_error( "ERROR: A space must appear after the minor version! (returned: %s)\n", (char *)buffer );
+ log_error("ERROR: A space must appear after the minor version! "
+ "(returned: %s)\n",
+ (char *)buffer);
return -1;
}
*p2 = ' '; // Put in a space for atoi below.
p2++;
- int major = atoi( p1 );
- int minor = atoi( p2 );
+ int major = atoi(p1);
+ int minor = atoi(p2);
int minor_revision = 2;
- if( major * 10 + minor < 10 + minor_revision )
+ if (major * 10 + minor < 10 + minor_revision)
{
- // If the language version did not match, check to see if OPENCL_1_0_DEVICE is set.
- if( getenv("OPENCL_1_0_DEVICE"))
+ // If the language version did not match, check to see if
+ // OPENCL_1_0_DEVICE is set.
+ if (getenv("OPENCL_1_0_DEVICE"))
{
- log_info( "WARNING: This test was run with OPENCL_1_0_DEVICE defined! This is not a OpenCL 1.1 or OpenCL 1.2 compatible device!!!\n" );
+ log_info("WARNING: This test was run with OPENCL_1_0_DEVICE "
+ "defined! This is not a OpenCL 1.1 or OpenCL 1.2 "
+ "compatible device!!!\n");
}
- else if( getenv("OPENCL_1_1_DEVICE"))
+ else if (getenv("OPENCL_1_1_DEVICE"))
{
- log_info( "WARNING: This test was run with OPENCL_1_1_DEVICE defined! This is not a OpenCL 1.2 compatible device!!!\n" );
+ log_info(
+ "WARNING: This test was run with OPENCL_1_1_DEVICE defined! "
+ "This is not a OpenCL 1.2 compatible device!!!\n");
}
else
{
- log_error( "ERROR: OpenCL device language version returned is less than 1.%d! (Returned: %s)\n", minor_revision, (char *)buffer );
- return -1;
+ log_error("ERROR: OpenCL device language version returned is less "
+ "than 1.%d! (Returned: %s)\n",
+ minor_revision, (char *)buffer);
+ return -1;
}
}
// Sanity checks on the returned values
- if( length != (strlen( (char *)buffer ) + 1 ))
+ if (length != (strlen((char *)buffer) + 1))
{
- log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( (char *)buffer ), (int)length );
+ log_error("ERROR: Returned length of version string does not match "
+ "actual length (actual: %d, returned: %d)\n",
+ (int)strlen((char *)buffer), (int)length);
return -1;
}
return 0;
}
-
diff --git a/test_conformance/api/test_context_destructor_callback.cpp b/test_conformance/api/test_context_destructor_callback.cpp
index 1d73a3c4..d29d9039 100644
--- a/test_conformance/api/test_context_destructor_callback.cpp
+++ b/test_conformance/api/test_context_destructor_callback.cpp
@@ -52,12 +52,7 @@ int test_context_destructor_callback(cl_device_id deviceID, cl_context context,
test_error(error, "Unable to set destructor callback");
// Now release the context, which SHOULD call the callbacks
- error = clReleaseContext(localContext);
- test_error(error, "Unable to release local context");
-
- // Note: since we manually released the context, we need to set it to NULL
- // to prevent a double-release
- localContext = NULL;
+ localContext.reset();
// At this point, all three callbacks should have already been called
int numErrors = 0;
diff --git a/test_conformance/api/test_kernel_arg_info.cpp b/test_conformance/api/test_kernel_arg_info.cpp
index 8073e0de..d0681dfd 100644
--- a/test_conformance/api/test_kernel_arg_info.cpp
+++ b/test_conformance/api/test_kernel_arg_info.cpp
@@ -22,11 +22,8 @@
#define MINIMUM_OPENCL_PIPE_VERSION Version(2, 0)
-static constexpr size_t CL_VERSION_LENGTH = 128;
static constexpr size_t KERNEL_ARGUMENT_LENGTH = 128;
static constexpr char KERNEL_ARGUMENT_NAME[] = "argument";
-static constexpr size_t KERNEL_ARGUMENT_NAME_LENGTH =
- sizeof(KERNEL_ARGUMENT_NAME) + 1;
static constexpr int SINGLE_KERNEL_ARG_NUMBER = 0;
static constexpr int MAX_NUMBER_OF_KERNEL_ARGS = 128;
@@ -167,7 +164,8 @@ static std::string generate_argument(const KernelArgInfo& kernel_arg)
/* This function generates a kernel source and allows for multiple arguments to
* be passed in and subsequently queried. */
static std::string generate_kernel(const std::vector<KernelArgInfo>& all_args,
- const bool supports_3d_image_writes = false)
+ const bool supports_3d_image_writes = false,
+ const bool kernel_uses_half_type = false)
{
std::string ret;
@@ -175,10 +173,13 @@ static std::string generate_kernel(const std::vector<KernelArgInfo>& all_args,
{
ret += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable\n";
}
+ if (kernel_uses_half_type)
+ {
+ ret += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+ }
ret += "kernel void get_kernel_arg_info(\n";
for (int i = 0; i < all_args.size(); ++i)
{
- const KernelArgInfo& arg = all_args[i];
ret += generate_argument(all_args[i]);
if (i == all_args.size() - 1)
{
@@ -537,6 +538,7 @@ size_t get_param_size(const std::string& arg_type, cl_device_id deviceID,
cl_int err = clGetDeviceInfo(deviceID, CL_DEVICE_ADDRESS_BITS,
sizeof(device_address_bits),
&device_address_bits, NULL);
+ test_error_ret(err, "clGetDeviceInfo", 0);
return (device_address_bits / 8);
}
@@ -673,8 +675,8 @@ static int run_scalar_vector_tests(cl_context context, cl_device_id deviceID)
if (param_size + total_param_size >= max_param_size
|| all_args.size() == MAX_NUMBER_OF_KERNEL_ARGS)
{
- const std::string kernel_src =
- generate_kernel(all_args);
+ const std::string kernel_src = generate_kernel(
+ all_args, false, device_supports_half(deviceID));
failed_tests += compare_kernel_with_expected(
context, deviceID, kernel_src.c_str(),
expected_args);
@@ -696,7 +698,8 @@ static int run_scalar_vector_tests(cl_context context, cl_device_id deviceID)
}
}
}
- const std::string kernel_src = generate_kernel(all_args);
+ const std::string kernel_src =
+ generate_kernel(all_args, false, device_supports_half(deviceID));
failed_tests += compare_kernel_with_expected(
context, deviceID, kernel_src.c_str(), expected_args);
return failed_tests;
@@ -808,8 +811,34 @@ static int run_image_tests(cl_context context, cl_device_id deviceID)
cl_kernel_arg_address_qualifier address_qualifier =
CL_KERNEL_ARG_ADDRESS_GLOBAL;
+ Version version = get_device_cl_version(deviceID);
+ bool supports_read_write_images = false;
+ if (version >= Version(3, 0))
+ {
+ cl_uint maxReadWriteImageArgs = 0;
+ cl_int error = clGetDeviceInfo(
+ deviceID, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
+ sizeof(maxReadWriteImageArgs), &maxReadWriteImageArgs, NULL);
+ test_error(error,
+ "Unable to query "
+ "CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS");
+
+ // read-write images are supported if MAX_READ_WRITE_IMAGE_ARGS is
+ // nonzero
+ supports_read_write_images = maxReadWriteImageArgs != 0;
+ }
+ else if (version >= Version(2, 0))
+ {
+ // read-write images are required for OpenCL 2.x
+ supports_read_write_images = true;
+ }
+
for (auto access_qualifier : access_qualifiers)
{
+ if (access_qualifier == CL_KERNEL_ARG_ACCESS_READ_WRITE
+ && !supports_read_write_images)
+ continue;
+
bool is_write =
(access_qualifier == CL_KERNEL_ARG_ACCESS_WRITE_ONLY
|| access_qualifier == CL_KERNEL_ARG_ACCESS_READ_WRITE);
diff --git a/test_conformance/api/test_kernel_attributes.cpp b/test_conformance/api/test_kernel_attributes.cpp
index 2e4e0a7f..ad4baa0f 100644
--- a/test_conformance/api/test_kernel_attributes.cpp
+++ b/test_conformance/api/test_kernel_attributes.cpp
@@ -275,16 +275,16 @@ static bool run_test(cl_context context, cl_device_id deviceID,
clKernelWrapper kernel;
cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
&kernel_src, "test_kernel");
- test_error(err, "create_single_kernel_helper");
+ test_error_ret(err, "create_single_kernel_helper", false);
// Get the size of the kernel attribute string returned
size_t size = 0;
err = clGetKernelInfo(kernel, CL_KERNEL_ATTRIBUTES, 0, nullptr, &size);
- test_error(err, "clGetKernelInfo");
+ test_error_ret(err, "clGetKernelInfo", false);
std::vector<char> attributes(size);
err = clGetKernelInfo(kernel, CL_KERNEL_ATTRIBUTES, attributes.size(),
attributes.data(), nullptr);
- test_error(err, "clGetKernelInfo");
+ test_error_ret(err, "clGetKernelInfo", false);
std::string attribute_string(attributes.data());
attribute_string.erase(
std::remove(attribute_string.begin(), attribute_string.end(), ' '),
diff --git a/test_conformance/api/test_mem_object_info.cpp b/test_conformance/api/test_mem_object_info.cpp
index ccfeaafa..8dc8f6cf 100644
--- a/test_conformance/api/test_mem_object_info.cpp
+++ b/test_conformance/api/test_mem_object_info.cpp
@@ -348,14 +348,7 @@ int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (cl_mem)bufferObject, "associated mem object", "%p", void * )
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_OFFSET, offset, (size_t)( addressAlign ), "offset", "%ld", size_t )
-
- clReleaseMemObject( subBufferObject );
- subBufferObject = NULL;
-
}
-
- clReleaseMemObject( bufferObject );
- bufferObject = NULL;
}
return CL_SUCCESS;
@@ -370,8 +363,6 @@ int test_get_imageObject_info( cl_mem * image, cl_mem_flags objectFlags, cl_imag
cl_mem_flags flags;
cl_uint mapCount;
cl_uint refCount;
- size_t rowPitchMultiplier;
- size_t slicePitchMultiplier;
cl_context otherCtx;
size_t offset;
size_t sz;
diff --git a/test_conformance/api/test_mem_objects.cpp b/test_conformance/api/test_mem_objects.cpp
index c29613f9..f1a4e993 100644
--- a/test_conformance/api/test_mem_objects.cpp
+++ b/test_conformance/api/test_mem_objects.cpp
@@ -48,12 +48,7 @@ int test_mem_object_destructor_callback_single(clMemWrapper &memObject)
test_error(error, "Unable to set destructor callback");
// Now release the buffer, which SHOULD call the callbacks
- error = clReleaseMemObject(memObject);
- test_error(error, "Unable to release test buffer");
-
- // Note: since we manually released the mem wrapper, we need to set it to
- // NULL to prevent a double-release
- memObject = NULL;
+ memObject.reset();
// At this point, all three callbacks should have already been called
int numErrors = 0;
diff --git a/test_conformance/api/test_null_buffer_arg.cpp b/test_conformance/api/test_null_buffer_arg.cpp
index d412d4ea..75bdd479 100644
--- a/test_conformance/api/test_null_buffer_arg.cpp
+++ b/test_conformance/api/test_null_buffer_arg.cpp
@@ -149,7 +149,6 @@ int test_null_buffer_arg(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
{
unsigned int test_success = 0;
- unsigned int i;
unsigned int buffer_size;
cl_int status;
cl_program program;
diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp
index 469a1934..a7703a76 100644
--- a/test_conformance/api/test_queries.cpp
+++ b/test_conformance/api/test_queries.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -19,6 +19,7 @@
#include <stdlib.h>
#include <ctype.h>
#include <algorithm>
+#include <vector>
int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
@@ -345,87 +346,100 @@ int command_queue_param_test(cl_command_queue queue,
return 0;
}
-#define MIN_NUM_COMMAND_QUEUE_PROPERTIES 2
-#define OOO_NUM_COMMAND_QUEUE_PROPERTIES 4
-static cl_command_queue_properties property_options[] = {
- 0,
-
- CL_QUEUE_PROFILING_ENABLE,
-
- CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-
- CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-
- CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-
- CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE
- | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-
- CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT
- | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-
- CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT
- | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
-};
-
int check_get_command_queue_info_params(cl_device_id deviceID,
cl_context context,
bool is_compatibility)
{
- int error;
- size_t size;
+ const cl_command_queue_properties host_optional[] = {
+ CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
+ CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
+ };
+
+ const cl_command_queue_properties device_required[] = {
+ CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
+ CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE
+ | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
+ CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT
+ | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
+ CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE
+ | CL_QUEUE_ON_DEVICE_DEFAULT
+ | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
+ };
+
+ const size_t host_optional_size = ARRAY_SIZE(host_optional);
+ const size_t device_required_size = ARRAY_SIZE(device_required);
+
+ Version version = get_device_cl_version(deviceID);
+
+ const cl_device_info host_queue_query = version >= Version(2, 0)
+ ? CL_DEVICE_QUEUE_ON_HOST_PROPERTIES
+ : CL_DEVICE_QUEUE_PROPERTIES;
- cl_queue_properties host_queue_props, device_queue_props;
- cl_queue_properties queue_props[] = { CL_QUEUE_PROPERTIES, 0, 0 };
+ cl_queue_properties host_queue_props = 0;
+ int error =
+ clGetDeviceInfo(deviceID, host_queue_query, sizeof(host_queue_props),
+ &host_queue_props, NULL);
+ test_error(error, "clGetDeviceInfo failed");
+ log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n", host_queue_props);
- clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
- sizeof(host_queue_props), &host_queue_props, NULL);
- log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n",
- (int)host_queue_props);
- clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES,
- sizeof(device_queue_props), &device_queue_props, NULL);
- log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n",
- (int)device_queue_props);
+ cl_queue_properties device_queue_props = 0;
+ if (version >= Version(2, 0))
+ {
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES,
+ sizeof(device_queue_props), &device_queue_props,
+ NULL);
+ test_error(error, "clGetDeviceInfo failed");
+ log_info("CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES is %d\n",
+ device_queue_props);
+ }
- auto version = get_device_cl_version(deviceID);
+ bool out_of_order_supported =
+ host_queue_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
- // Are on device queues supported
bool on_device_supported =
(version >= Version(2, 0) && version < Version(3, 0))
|| (version >= Version(3, 0) && device_queue_props != 0);
- int num_test_options = MIN_NUM_COMMAND_QUEUE_PROPERTIES;
- if (host_queue_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
+ // test device queues if the device and the API under test support it
+ bool test_on_device = on_device_supported && !is_compatibility;
+
+ std::vector<cl_queue_properties> queue_props{ 0,
+ CL_QUEUE_PROFILING_ENABLE };
+
+ if (out_of_order_supported)
{
- // Test out-of-order queues properties if supported
- num_test_options = OOO_NUM_COMMAND_QUEUE_PROPERTIES;
- }
- if (on_device_supported && !is_compatibility)
+ queue_props.insert(queue_props.end(), &host_optional[0],
+ &host_optional[host_optional_size]);
+ };
+
+ cl_queue_properties queue_props_arg[] = { CL_QUEUE_PROPERTIES, 0, 0 };
+
+ if (test_on_device)
{
- // Test queue on device if supported (in this case out-of-order must
- // also be supported)
- num_test_options = ARRAY_SIZE(property_options);
- }
+ queue_props.insert(queue_props.end(), &device_required[0],
+ &device_required[device_required_size]);
+ };
- for (int i = 0; i < num_test_options; i++)
+ for (cl_queue_properties props : queue_props)
{
- queue_props[1] = property_options[i];
- clCommandQueueWrapper queue;
+ queue_props_arg[1] = props;
+
+ clCommandQueueWrapper queue;
if (is_compatibility)
{
- queue =
- clCreateCommandQueue(context, deviceID, queue_props[1], &error);
+ queue = clCreateCommandQueue(context, deviceID, props, &error);
test_error(error, "Unable to create command queue to test with");
}
else
{
queue = clCreateCommandQueueWithProperties(context, deviceID,
- &queue_props[0], &error);
+ queue_props_arg, &error);
test_error(error, "Unable to create command queue to test with");
}
cl_uint refCount;
+ size_t size;
error = clGetCommandQueueInfo(queue, CL_QUEUE_REFERENCE_COUNT,
sizeof(refCount), &refCount, &size);
test_error(error, "Unable to get command queue reference count");
@@ -442,11 +456,12 @@ int check_get_command_queue_info_params(cl_device_id deviceID,
test_error(error, "param checking failed");
error = command_queue_param_test(queue, CL_QUEUE_PROPERTIES,
- queue_props[1], "properties");
+ queue_props_arg[1], "properties");
test_error(error, "param checking failed");
}
return 0;
}
+
int test_get_command_queue_info(cl_device_id deviceID, cl_context context,
cl_command_queue ignoreQueue, int num_elements)
{
@@ -511,26 +526,6 @@ void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
free( data );
}
-// All possible combinations of valid cl_mem_flags.
-static cl_mem_flags all_flags[16] = {
- 0,
- CL_MEM_READ_WRITE,
- CL_MEM_READ_ONLY,
- CL_MEM_WRITE_ONLY,
- CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
- CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
- CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
- CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
- CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
- CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
- CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
- CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
- CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
- CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
- CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
-};
-
#define TEST_DEVICE_PARAM( device, paramName, val, name, type, cast ) \
error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size ); \
test_error( error, "Unable to get device " name ); \
@@ -824,5 +819,3 @@ int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, c
return 0;
}
-
-
diff --git a/test_conformance/api/test_sub_group_dispatch.cpp b/test_conformance/api/test_sub_group_dispatch.cpp
index 01d0ffa3..61d9a524 100644
--- a/test_conformance/api/test_sub_group_dispatch.cpp
+++ b/test_conformance/api/test_sub_group_dispatch.cpp
@@ -56,11 +56,9 @@ cl_int get_sub_group_num(cl_command_queue queue, cl_kernel kernel, clMemWrapper&
int test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
- static const size_t gsize0 = 80;
- int i, error;
+ int error;
size_t realSize;
size_t kernel_max_subgroup_size, kernel_subgroup_count;
- size_t global[] = {1,1,1};
size_t max_local;
cl_platform_id platform;
diff --git a/test_conformance/atomics/main.cpp b/test_conformance/atomics/main.cpp
index afdea376..987d6bfa 100644
--- a/test_conformance/atomics/main.cpp
+++ b/test_conformance/atomics/main.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -24,6 +24,7 @@
#include <unistd.h>
#endif
+// clang-format off
test_definition test_list[] = {
ADD_TEST( atomic_add ),
ADD_TEST( atomic_sub ),
@@ -40,11 +41,11 @@ test_definition test_list[] = {
ADD_TEST( atomic_add_index ),
ADD_TEST( atomic_add_index_bin ),
};
+// clang-format on
-const int test_num = ARRAY_SIZE( test_list );
+const int test_num = ARRAY_SIZE(test_list);
int main(int argc, const char *argv[])
{
return runTestHarness(argc, argv, test_num, test_list, false, 0);
}
-
diff --git a/test_conformance/atomics/procs.h b/test_conformance/atomics/procs.h
index bf053f25..fa85aad5 100644
--- a/test_conformance/atomics/procs.h
+++ b/test_conformance/atomics/procs.h
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -18,22 +18,35 @@
#include "harness/threadTesting.h"
#include "harness/typeWrappers.h"
-extern int create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
-
-extern int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-
-extern int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-
+extern int create_program_and_kernel(const char *source,
+ const char *kernel_name,
+ cl_program *program_ret,
+ cl_kernel *kernel_ret);
+extern int test_atomic_add(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_sub(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_xchg(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_min(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_max(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_inc(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_dec(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_and(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_or(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_xor(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_add_index(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
diff --git a/test_conformance/atomics/testBase.h b/test_conformance/atomics/testBase.h
index ba67d140..22bce1d2 100644
--- a/test_conformance/atomics/testBase.h
+++ b/test_conformance/atomics/testBase.h
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -26,6 +26,3 @@
#include "procs.h"
#endif // _testBase_h
-
-
-
diff --git a/test_conformance/atomics/test_atomics.cpp b/test_conformance/atomics/test_atomics.cpp
index 34b34ed3..caa4b78f 100644
--- a/test_conformance/atomics/test_atomics.cpp
+++ b/test_conformance/atomics/test_atomics.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -19,10 +19,12 @@
#include <unistd.h>
#endif
+#include <cinttypes>
+
#define INT_TEST_VALUE 402258822
#define LONG_TEST_VALUE 515154531254381446LL
-
+// clang-format off
const char *atomic_global_pattern[] = {
"__kernel void test_atomic_fn(volatile __global %s *destMemory, __global %s *oldValues)\n"
"{\n"
@@ -36,19 +38,20 @@ const char *atomic_local_pattern[] = {
"__kernel void test_atomic_fn(__global %s *finalDest, __global %s *oldValues, volatile __local %s *destMemory, int numDestItems )\n"
"{\n"
" int tid = get_global_id(0);\n"
- " int dstItemIdx;\n"
+ " int dstItemIdx;\n"
"\n"
" // Everybody does the following line(s), but it all has the same result. We still need to ensure we sync before the atomic op, though\n"
- " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
+ " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
" destMemory[ dstItemIdx ] = finalDest[ dstItemIdx ];\n"
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
,
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" // Finally, write out the last value. Again, we're synced, so everyone will be writing the same value\n"
- " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
+ " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
" finalDest[ dstItemIdx ] = destMemory[ dstItemIdx ];\n"
"}\n" };
+// clang-format on
#define TEST_COUNT 128 * 1024
@@ -56,41 +59,48 @@ const char *atomic_local_pattern[] = {
struct TestFns
{
- cl_int mIntStartValue;
- cl_long mLongStartValue;
+ cl_int mIntStartValue;
+ cl_long mLongStartValue;
- size_t (*NumResultsFn)( size_t threadSize, ExplicitType dataType );
+ size_t (*NumResultsFn)(size_t threadSize, ExplicitType dataType);
// Integer versions
- cl_int (*ExpectedValueIntFn)( size_t size, cl_int *startRefValues, size_t whichDestValue );
- void (*GenerateRefsIntFn)( size_t size, cl_int *startRefValues, MTdata d );
- bool (*VerifyRefsIntFn)( size_t size, cl_int *refValues, cl_int finalValue );
+ cl_int (*ExpectedValueIntFn)(size_t size, cl_int *startRefValues,
+ size_t whichDestValue);
+ void (*GenerateRefsIntFn)(size_t size, cl_int *startRefValues, MTdata d);
+ bool (*VerifyRefsIntFn)(size_t size, cl_int *refValues, cl_int finalValue);
// Long versions
- cl_long (*ExpectedValueLongFn)( size_t size, cl_long *startRefValues, size_t whichDestValue );
- void (*GenerateRefsLongFn)( size_t size, cl_long *startRefValues, MTdata d );
- bool (*VerifyRefsLongFn)( size_t size, cl_long *refValues, cl_long finalValue );
+ cl_long (*ExpectedValueLongFn)(size_t size, cl_long *startRefValues,
+ size_t whichDestValue);
+ void (*GenerateRefsLongFn)(size_t size, cl_long *startRefValues, MTdata d);
+ bool (*VerifyRefsLongFn)(size_t size, cl_long *refValues,
+ cl_long finalValue);
// Float versions
- cl_float (*ExpectedValueFloatFn)( size_t size, cl_float *startRefValues, size_t whichDestValue );
- void (*GenerateRefsFloatFn)( size_t size, cl_float *startRefValues, MTdata d );
- bool (*VerifyRefsFloatFn)( size_t size, cl_float *refValues, cl_float finalValue );
+ cl_float (*ExpectedValueFloatFn)(size_t size, cl_float *startRefValues,
+ size_t whichDestValue);
+ void (*GenerateRefsFloatFn)(size_t size, cl_float *startRefValues,
+ MTdata d);
+ bool (*VerifyRefsFloatFn)(size_t size, cl_float *refValues,
+ cl_float finalValue);
};
-bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, ExplicitType dataType )
+bool check_atomic_support(cl_device_id device, bool extended, bool isLocal,
+ ExplicitType dataType)
{
+ // clang-format off
const char *extensionNames[8] = {
"cl_khr_global_int32_base_atomics", "cl_khr_global_int32_extended_atomics",
"cl_khr_local_int32_base_atomics", "cl_khr_local_int32_extended_atomics",
"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics",
"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics" // this line intended to be the same as the last one
};
+ // clang-format on
size_t index = 0;
- if( extended )
- index += 1;
- if( isLocal )
- index += 2;
+ if (extended) index += 1;
+ if (isLocal) index += 2;
Version version = get_device_cl_version(device);
@@ -98,26 +108,28 @@ bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, Exp
{
case kInt:
case kUInt:
- if( version >= Version(1,1) )
- return 1;
+ if (version >= Version(1, 1)) return 1;
break;
case kLong:
- case kULong:
- index += 4;
- break;
- case kFloat: // this has to stay separate since the float atomics arent in the 1.0 extensions
- return version >= Version(1,1);
+ case kULong: index += 4; break;
+ case kFloat: // this has to stay separate since the float atomics arent
+ // in the 1.0 extensions
+ return version >= Version(1, 1);
default:
- log_error( "ERROR: Unsupported data type (%d) in check_atomic_support\n", dataType );
+ log_error(
+ "ERROR: Unsupported data type (%d) in check_atomic_support\n",
+ dataType);
return 0;
}
- return is_extension_available( device, extensionNames[index] );
+ return is_extension_available(device, extensionNames[index]);
}
-int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
- TestFns testFns,
- bool extended, bool isLocal, ExplicitType dataType, bool matchGroupSize )
+int test_atomic_function(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ const char *programCore, TestFns testFns,
+ bool extended, bool isLocal, ExplicitType dataType,
+ bool matchGroupSize)
{
clProgramWrapper program;
clKernelWrapper kernel;
@@ -127,55 +139,65 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
void *refValues, *startRefValues;
size_t threadSize, groupSize;
const char *programLines[4];
- char pragma[ 512 ];
- char programHeader[ 512 ];
+ char pragma[512];
+ char programHeader[512];
MTdata d;
- size_t typeSize = get_explicit_type_size( dataType );
+ size_t typeSize = get_explicit_type_size(dataType);
// Verify we can run first
- bool isUnsigned = ( dataType == kULong ) || ( dataType == kUInt );
- if( !check_atomic_support( deviceID, extended, isLocal, dataType ) )
+ bool isUnsigned = (dataType == kULong) || (dataType == kUInt);
+ if (!check_atomic_support(deviceID, extended, isLocal, dataType))
{
- // Only print for the signed (unsigned comes right after, and if signed isn't supported, unsigned isn't either)
- if( dataType == kFloat )
- log_info( "\t%s float not supported\n", isLocal ? "Local" : "Global" );
- else if( !isUnsigned )
- log_info( "\t%s %sint%d not supported\n", isLocal ? "Local" : "Global", isUnsigned ? "u" : "", (int)typeSize * 8 );
+ // Only print for the signed (unsigned comes right after, and if signed
+ // isn't supported, unsigned isn't either)
+ if (dataType == kFloat)
+ log_info("\t%s float not supported\n",
+ isLocal ? "Local" : "Global");
+ else if (!isUnsigned)
+ log_info("\t%s %sint%d not supported\n",
+ isLocal ? "Local" : "Global", isUnsigned ? "u" : "",
+ (int)typeSize * 8);
// Since we don't support the operation, they implicitly pass
return 0;
}
else
{
- if( dataType == kFloat )
- log_info( "\t%s float%s...", isLocal ? "local" : "global", isLocal ? " " : "" );
+ if (dataType == kFloat)
+ log_info("\t%s float%s...", isLocal ? "local" : "global",
+ isLocal ? " " : "");
else
- log_info( "\t%s %sint%d%s%s...", isLocal ? "local" : "global", isUnsigned ? "u" : "",
- (int)typeSize * 8, isUnsigned ? "" : " ", isLocal ? " " : "" );
+ log_info("\t%s %sint%d%s%s...", isLocal ? "local" : "global",
+ isUnsigned ? "u" : "", (int)typeSize * 8,
+ isUnsigned ? "" : " ", isLocal ? " " : "");
}
//// Set up the kernel code
// Create the pragma line for this kernel
- bool isLong = ( dataType == kLong || dataType == kULong );
- sprintf( pragma, "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n",
- isLong ? "" : (isLocal ? "_local" : "_global"), isLong ? "64" : "32",
- extended ? "extended" : "base" );
+ bool isLong = (dataType == kLong || dataType == kULong);
+ sprintf(pragma,
+ "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n",
+ isLong ? "" : (isLocal ? "_local" : "_global"),
+ isLong ? "64" : "32", extended ? "extended" : "base");
// Now create the program header
- const char *typeName = get_explicit_type_name( dataType );
- if( isLocal )
- sprintf( programHeader, atomic_local_pattern[ 0 ], typeName, typeName, typeName );
+ const char *typeName = get_explicit_type_name(dataType);
+ if (isLocal)
+ sprintf(programHeader, atomic_local_pattern[0], typeName, typeName,
+ typeName);
else
- sprintf( programHeader, atomic_global_pattern[ 0 ], typeName, typeName );
+ sprintf(programHeader, atomic_global_pattern[0], typeName, typeName);
// Set up our entire program now
- programLines[ 0 ] = pragma;
- programLines[ 1 ] = programHeader;
- programLines[ 2 ] = programCore;
- programLines[ 3 ] = ( isLocal ) ? atomic_local_pattern[ 1 ] : atomic_global_pattern[ 1 ];
-
- if( create_single_kernel_helper( context, &program, &kernel, 4, programLines, "test_atomic_fn" ) )
+ programLines[0] = pragma;
+ programLines[1] = programHeader;
+ programLines[2] = programCore;
+ programLines[3] =
+ (isLocal) ? atomic_local_pattern[1] : atomic_global_pattern[1];
+
+ if (create_single_kernel_helper(context, &program, &kernel, 4, programLines,
+ "test_atomic_fn"))
{
return -1;
}
@@ -183,25 +205,37 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
//// Set up to actually run
threadSize = num_elements;
- error = get_max_common_work_group_size( context, kernel, threadSize, &groupSize );
- test_error( error, "Unable to get thread group max size" );
+ error =
+ get_max_common_work_group_size(context, kernel, threadSize, &groupSize);
+ test_error(error, "Unable to get thread group max size");
- if( matchGroupSize )
+ if (matchGroupSize)
// HACK because xchg and cmpxchg apparently are limited by hardware
threadSize = groupSize;
- if( isLocal )
+ if (isLocal)
{
- size_t maxSizes[3] = {0, 0, 0};
- error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3*sizeof(size_t), maxSizes, 0);
- test_error( error, "Unable to obtain max work item sizes for the device" );
+ size_t maxSizes[3] = { 0, 0, 0 };
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+ 3 * sizeof(size_t), maxSizes, 0);
+ test_error(error,
+ "Unable to obtain max work item sizes for the device");
size_t workSize;
- error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL );
- test_error( error, "Unable to obtain max work group size for device and kernel combo" );
-
- // "workSize" is limited to that of the first dimension as only a 1DRange is executed.
- if( maxSizes[0] < workSize )
+ error = clGetKernelWorkGroupInfo(kernel, deviceID,
+ CL_KERNEL_WORK_GROUP_SIZE,
+ sizeof(workSize), &workSize, NULL);
+ test_error(
+ error,
+ "Unable to obtain max work group size for device and kernel combo");
+
+ // Limit workSize to avoid extremely large local buffer size and slow
+ // run.
+ if (workSize > 65536) workSize = 65536;
+
+ // "workSize" is limited to that of the first dimension as only a
+ // 1DRange is executed.
+ if (maxSizes[0] < workSize)
{
workSize = maxSizes[0];
}
@@ -210,38 +244,43 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
}
- log_info( "\t(thread count %d, group size %d)\n", (int)threadSize, (int)groupSize );
+ log_info("\t(thread count %d, group size %d)\n", (int)threadSize,
+ (int)groupSize);
- refValues = (cl_int *)malloc( typeSize * threadSize );
+ refValues = (cl_int *)malloc(typeSize * threadSize);
- if( testFns.GenerateRefsIntFn != NULL )
+ if (testFns.GenerateRefsIntFn != NULL)
{
// We have a ref generator provided
- d = init_genrand( gRandomSeed );
- startRefValues = malloc( typeSize * threadSize );
- if( typeSize == 4 )
- testFns.GenerateRefsIntFn( threadSize, (cl_int *)startRefValues, d );
+ d = init_genrand(gRandomSeed);
+ startRefValues = malloc(typeSize * threadSize);
+ if (typeSize == 4)
+ testFns.GenerateRefsIntFn(threadSize, (cl_int *)startRefValues, d);
else
- testFns.GenerateRefsLongFn( threadSize, (cl_long *)startRefValues, d );
+ testFns.GenerateRefsLongFn(threadSize, (cl_long *)startRefValues,
+ d);
free_mtdata(d);
d = NULL;
}
else
startRefValues = NULL;
- // If we're given a num_results function, we need to determine how many result objects we need. If
- // we don't have it, we assume it's just 1
- size_t numDestItems = ( testFns.NumResultsFn != NULL ) ? testFns.NumResultsFn( threadSize, dataType ) : 1;
+ // If we're given a num_results function, we need to determine how many
+ // result objects we need. If we don't have it, we assume it's just 1
+ size_t numDestItems = (testFns.NumResultsFn != NULL)
+ ? testFns.NumResultsFn(threadSize, dataType)
+ : 1;
- char * destItems = new char[ typeSize * numDestItems ];
- if( destItems == NULL )
+ char *destItems = new char[typeSize * numDestItems];
+ if (destItems == NULL)
{
- log_error( "ERROR: Unable to allocate memory!\n" );
+ log_error("ERROR: Unable to allocate memory!\n");
return -1;
}
- void * startValue = ( typeSize == 4 ) ? (void *)&testFns.mIntStartValue : (void *)&testFns.mLongStartValue;
- for( size_t i = 0; i < numDestItems; i++ )
- memcpy( destItems + i * typeSize, startValue, typeSize );
+ void *startValue = (typeSize == 4) ? (void *)&testFns.mIntStartValue
+ : (void *)&testFns.mLongStartValue;
+ for (size_t i = 0; i < numDestItems; i++)
+ memcpy(destItems + i * typeSize, startValue, typeSize);
streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
typeSize * numDestItems, destItems, NULL);
@@ -261,82 +300,97 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
}
/* Set the arguments */
- error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
- test_error( error, "Unable to set indexed kernel arguments" );
- error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
- test_error( error, "Unable to set indexed kernel arguments" );
+ error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+ test_error(error, "Unable to set indexed kernel arguments");
+ error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
+ test_error(error, "Unable to set indexed kernel arguments");
- if( isLocal )
+ if (isLocal)
{
- error = clSetKernelArg( kernel, 2, typeSize * numDestItems, NULL );
- test_error( error, "Unable to set indexed local kernel argument" );
+ error = clSetKernelArg(kernel, 2, typeSize * numDestItems, NULL);
+ test_error(error, "Unable to set indexed local kernel argument");
cl_int numDestItemsInt = (cl_int)numDestItems;
- error = clSetKernelArg( kernel, 3, sizeof( cl_int ), &numDestItemsInt );
- test_error( error, "Unable to set indexed kernel argument" );
+ error = clSetKernelArg(kernel, 3, sizeof(cl_int), &numDestItemsInt);
+ test_error(error, "Unable to set indexed kernel argument");
}
/* Run the kernel */
threads[0] = threadSize;
- error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &groupSize, 0, NULL, NULL );
- test_error( error, "Unable to execute test kernel" );
-
- error = clEnqueueReadBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
- test_error( error, "Unable to read result value!" );
-
- error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize * threadSize, refValues, 0, NULL, NULL );
- test_error( error, "Unable to read reference values!" );
-
- // If we have an expectedFn, then we need to generate a final value to compare against. If we don't
- // have one, it's because we're comparing ref values only
- if( testFns.ExpectedValueIntFn != NULL )
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, &groupSize,
+ 0, NULL, NULL);
+ test_error(error, "Unable to execute test kernel");
+
+ error =
+ clEnqueueReadBuffer(queue, streams[0], true, 0, typeSize * numDestItems,
+ destItems, 0, NULL, NULL);
+ test_error(error, "Unable to read result value!");
+
+ error =
+ clEnqueueReadBuffer(queue, streams[1], true, 0, typeSize * threadSize,
+ refValues, 0, NULL, NULL);
+ test_error(error, "Unable to read reference values!");
+
+ // If we have an expectedFn, then we need to generate a final value to
+ // compare against. If we don't have one, it's because we're comparing ref
+ // values only
+ if (testFns.ExpectedValueIntFn != NULL)
{
- for( size_t i = 0; i < numDestItems; i++ )
+ for (size_t i = 0; i < numDestItems; i++)
{
- char expected[ 8 ];
+ char expected[8];
cl_int intVal;
cl_long longVal;
- if( typeSize == 4 )
+ if (typeSize == 4)
{
// Int version
- intVal = testFns.ExpectedValueIntFn( threadSize, (cl_int *)startRefValues, i );
- memcpy( expected, &intVal, sizeof( intVal ) );
+ intVal = testFns.ExpectedValueIntFn(
+ threadSize, (cl_int *)startRefValues, i);
+ memcpy(expected, &intVal, sizeof(intVal));
}
else
{
// Long version
- longVal = testFns.ExpectedValueLongFn( threadSize, (cl_long *)startRefValues, i );
- memcpy( expected, &longVal, sizeof( longVal ) );
+ longVal = testFns.ExpectedValueLongFn(
+ threadSize, (cl_long *)startRefValues, i);
+ memcpy(expected, &longVal, sizeof(longVal));
}
- if( memcmp( expected, destItems + i * typeSize, typeSize ) != 0 )
+ if (memcmp(expected, destItems + i * typeSize, typeSize) != 0)
{
- if( typeSize == 4 )
+ if (typeSize == 4)
{
- cl_int *outValue = (cl_int *)( destItems + i * typeSize );
- log_error( "ERROR: Result %ld from kernel does not validate! (should be %d, was %d)\n", i, intVal, *outValue );
+ cl_int *outValue = (cl_int *)(destItems + i * typeSize);
+ log_error("ERROR: Result %zu from kernel does not "
+ "validate! (should be %d, was %d)\n",
+ i, intVal, *outValue);
cl_int *startRefs = (cl_int *)startRefValues;
cl_int *refs = (cl_int *)refValues;
- for( i = 0; i < threadSize; i++ )
+ for (i = 0; i < threadSize; i++)
{
- if( startRefs != NULL )
- log_info( " --- %ld - %d --- %d\n", i, startRefs[i], refs[i] );
+ if (startRefs != NULL)
+ log_info(" --- %zu - %d --- %d\n", i, startRefs[i],
+ refs[i]);
else
- log_info( " --- %ld --- %d\n", i, refs[i] );
+ log_info(" --- %zu --- %d\n", i, refs[i]);
}
}
else
{
- cl_long *outValue = (cl_long *)( destItems + i * typeSize );
- log_error( "ERROR: Result %ld from kernel does not validate! (should be %lld, was %lld)\n", i, longVal, *outValue );
+ cl_long *outValue = (cl_long *)(destItems + i * typeSize);
+ log_error("ERROR: Result %zu from kernel does not "
+ "validate! (should be %" PRId64 ", was %" PRId64
+ ")\n",
+ i, longVal, *outValue);
cl_long *startRefs = (cl_long *)startRefValues;
cl_long *refs = (cl_long *)refValues;
- for( i = 0; i < threadSize; i++ )
+ for (i = 0; i < threadSize; i++)
{
- if( startRefs != NULL )
- log_info( " --- %ld - %lld --- %lld\n", i, startRefs[i], refs[i] );
+ if (startRefs != NULL)
+ log_info(" --- %zu - %" PRId64 " --- %" PRId64 "\n",
+ i, startRefs[i], refs[i]);
else
- log_info( " --- %ld --- %lld\n", i, refs[i] );
+ log_info(" --- %zu --- %" PRId64 "\n", i, refs[i]);
}
}
return -1;
@@ -344,104 +398,141 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
}
}
- if( testFns.VerifyRefsIntFn != NULL )
+ if (testFns.VerifyRefsIntFn != NULL)
{
/* Use the verify function to also check the results */
- if( dataType == kFloat )
+ if (dataType == kFloat)
{
cl_float *outValue = (cl_float *)destItems;
- if( !testFns.VerifyRefsFloatFn( threadSize, (cl_float *)refValues, *outValue ) != 0 )
+ if (!testFns.VerifyRefsFloatFn(threadSize, (cl_float *)refValues,
+ *outValue)
+ != 0)
{
- log_error( "ERROR: Reference values did not validate!\n" );
+ log_error("ERROR: Reference values did not validate!\n");
return -1;
}
}
- else if( typeSize == 4 )
+ else if (typeSize == 4)
{
cl_int *outValue = (cl_int *)destItems;
- if( !testFns.VerifyRefsIntFn( threadSize, (cl_int *)refValues, *outValue ) != 0 )
+ if (!testFns.VerifyRefsIntFn(threadSize, (cl_int *)refValues,
+ *outValue)
+ != 0)
{
- log_error( "ERROR: Reference values did not validate!\n" );
+ log_error("ERROR: Reference values did not validate!\n");
return -1;
}
}
else
{
cl_long *outValue = (cl_long *)destItems;
- if( !testFns.VerifyRefsLongFn( threadSize, (cl_long *)refValues, *outValue ) != 0 )
+ if (!testFns.VerifyRefsLongFn(threadSize, (cl_long *)refValues,
+ *outValue)
+ != 0)
{
- log_error( "ERROR: Reference values did not validate!\n" );
+ log_error("ERROR: Reference values did not validate!\n");
return -1;
}
}
}
- else if( testFns.ExpectedValueIntFn == NULL )
+ else if (testFns.ExpectedValueIntFn == NULL)
{
- log_error( "ERROR: Test doesn't check total or refs; no values are verified!\n" );
+ log_error("ERROR: Test doesn't check total or refs; no values are "
+ "verified!\n");
return -1;
}
/* Re-write the starting value */
- for( size_t i = 0; i < numDestItems; i++ )
- memcpy( destItems + i * typeSize, startValue, typeSize );
- error = clEnqueueWriteBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
- test_error( error, "Unable to write starting values!" );
-
- /* Run the kernel once for a single thread, so we can verify that the returned value is the original one */
+ for (size_t i = 0; i < numDestItems; i++)
+ memcpy(destItems + i * typeSize, startValue, typeSize);
+ error =
+ clEnqueueWriteBuffer(queue, streams[0], true, 0,
+ typeSize * numDestItems, destItems, 0, NULL, NULL);
+ test_error(error, "Unable to write starting values!");
+
+ /* Run the kernel once for a single thread, so we can verify that the
+ * returned value is the original one */
threads[0] = 1;
- error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, threads, 0, NULL, NULL );
- test_error( error, "Unable to execute test kernel" );
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, threads, 0,
+ NULL, NULL);
+ test_error(error, "Unable to execute test kernel");
- error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize, refValues, 0, NULL, NULL );
- test_error( error, "Unable to read reference values!" );
+ error = clEnqueueReadBuffer(queue, streams[1], true, 0, typeSize, refValues,
+ 0, NULL, NULL);
+ test_error(error, "Unable to read reference values!");
- if( memcmp( refValues, destItems, typeSize ) != 0 )
+ if (memcmp(refValues, destItems, typeSize) != 0)
{
- if( typeSize == 4 )
+ if (typeSize == 4)
{
cl_int *s = (cl_int *)destItems;
cl_int *r = (cl_int *)refValues;
- log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
- " (should have been %d, returned %d)!\n", *s, *r );
+ log_error("ERROR: atomic function operated correctly but did NOT "
+ "return correct 'old' value "
+ " (should have been %d, returned %d)!\n",
+ *s, *r);
}
else
{
cl_long *s = (cl_long *)destItems;
cl_long *r = (cl_long *)refValues;
- log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
- " (should have been %lld, returned %lld)!\n", *s, *r );
+ log_error("ERROR: atomic function operated correctly but did NOT "
+ "return correct 'old' value "
+ " (should have been %" PRId64 ", returned %" PRId64
+ ")!\n",
+ *s, *r);
}
return -1;
}
- delete [] destItems;
- free( refValues );
- if( startRefValues != NULL )
- free( startRefValues );
+ delete[] destItems;
+ free(refValues);
+ if (startRefValues != NULL) free(startRefValues);
return 0;
}
-int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
- TestFns testFns,
- bool extended, bool matchGroupSize, bool usingAtomicPrefix )
+int test_atomic_function_set(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ const char *programCore, TestFns testFns,
+ bool extended, bool matchGroupSize,
+ bool usingAtomicPrefix)
{
- log_info(" Testing %s functions...\n", usingAtomicPrefix ? "atomic_" : "atom_");
+ log_info(" Testing %s functions...\n",
+ usingAtomicPrefix ? "atomic_" : "atom_");
int errors = 0;
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kInt, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kUInt, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kInt, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kUInt, matchGroupSize );
-
- // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64 bit functions still use the "atom" prefix.
- // The argument usingAtomicPrefix is set to true if programCore was generated with the "atomic" prefix.
- if (!usingAtomicPrefix) {
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kLong, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kULong, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kLong, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kULong, matchGroupSize );
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, false, kInt,
+ matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, false, kUInt,
+ matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, true, kInt,
+ matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, true, kUInt,
+ matchGroupSize);
+
+ // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64
+ // bit functions still use the "atom" prefix. The argument usingAtomicPrefix
+ // is set to true if programCore was generated with the "atomic" prefix.
+ if (!usingAtomicPrefix)
+ {
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, false,
+ kLong, matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, false,
+ kULong, matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, true,
+ kLong, matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, true,
+ kULong, matchGroupSize);
}
return errors;
@@ -450,265 +541,346 @@ int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_comma
#pragma mark ---- add
const char atom_add_core[] =
-" oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
-" atom_add( &destMemory[0], tid + 3 );\n"
-" atom_add( &destMemory[0], tid + 3 );\n"
-" atom_add( &destMemory[0], tid + 3 );\n";
+ " oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
+ " atom_add( &destMemory[0], tid + 3 );\n"
+ " atom_add( &destMemory[0], tid + 3 );\n"
+ " atom_add( &destMemory[0], tid + 3 );\n";
const char atomic_add_core[] =
-" oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
-" atomic_add( &destMemory[0], tid + 3 );\n"
-" atomic_add( &destMemory[0], tid + 3 );\n"
-" atomic_add( &destMemory[0], tid + 3 );\n";
+ " oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
+ " atomic_add( &destMemory[0], tid + 3 );\n"
+ " atomic_add( &destMemory[0], tid + 3 );\n"
+ " atomic_add( &destMemory[0], tid + 3 );\n";
-cl_int test_atomic_add_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_add_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
cl_int total = 0;
- for( size_t i = 0; i < size; i++ )
- total += ( (cl_int)i + 3 ) * 4;
+ for (size_t i = 0; i < size; i++) total += ((cl_int)i + 3) * 4;
return total;
}
-cl_long test_atomic_add_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_add_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
cl_long total = 0;
- for( size_t i = 0; i < size; i++ )
- total += ( ( i + 3 ) * 4 );
+ for (size_t i = 0; i < size; i++) total += ((i + 3) * 4);
return total;
}
-int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_add(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0, 0LL, NULL, test_atomic_add_result_int, NULL, NULL, test_atomic_add_result_long, NULL, NULL };
-
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ TestFns set = { 0,
+ 0LL,
+ NULL,
+ test_atomic_add_result_int,
+ NULL,
+ NULL,
+ test_atomic_add_result_long,
+ NULL,
+ NULL };
+
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_add_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
+ return -1;
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_add_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
- return -1;
return 0;
}
#pragma mark ---- sub
-const char atom_sub_core[] = " oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n";
+const char atom_sub_core[] =
+ " oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n";
-const char atomic_sub_core[] = " oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n";
+const char atomic_sub_core[] =
+ " oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n";
-cl_int test_atomic_sub_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_sub_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
cl_int total = INT_TEST_VALUE;
- for( size_t i = 0; i < size; i++ )
- total -= (cl_int)i + 3;
+ for (size_t i = 0; i < size; i++) total -= (cl_int)i + 3;
return total;
}
-cl_long test_atomic_sub_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_sub_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
cl_long total = LONG_TEST_VALUE;
- for( size_t i = 0; i < size; i++ )
- total -= i + 3;
+ for (size_t i = 0; i < size; i++) total -= i + 3;
return total;
}
-int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_sub(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_sub_result_int, NULL, NULL, test_atomic_sub_result_long, NULL, NULL };
-
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ TestFns set = { INT_TEST_VALUE,
+ LONG_TEST_VALUE,
+ NULL,
+ test_atomic_sub_result_int,
+ NULL,
+ NULL,
+ test_atomic_sub_result_long,
+ NULL,
+ NULL };
+
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_sub_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_sub_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
#pragma mark ---- xchg
-const char atom_xchg_core[] = " oldValues[tid] = atom_xchg( &destMemory[0], tid );\n";
+const char atom_xchg_core[] =
+ " oldValues[tid] = atom_xchg( &destMemory[0], tid );\n";
-const char atomic_xchg_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
-const char atomic_xchg_float_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
+const char atomic_xchg_core[] =
+ " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
+const char atomic_xchg_float_core[] =
+ " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
-bool test_atomic_xchg_verify_int( size_t size, cl_int *refValues, cl_int finalValue )
+bool test_atomic_xchg_verify_int(size_t size, cl_int *refValues,
+ cl_int finalValue)
{
- /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+ /* For xchg, each value from 0 to size - 1 should have an entry in the ref
+ * array, and ONLY one entry */
char *valids;
size_t i;
char originalValidCount = 0;
- valids = (char *)malloc( sizeof( char ) * size );
- memset( valids, 0, sizeof( char ) * size );
+ valids = (char *)malloc(sizeof(char) * size);
+ memset(valids, 0, sizeof(char) * size);
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- if( refValues[ i ] == INT_TEST_VALUE )
+ if (refValues[i] == INT_TEST_VALUE)
{
// Special initial value
originalValidCount++;
continue;
}
- if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+ if (refValues[i] < 0 || (size_t)refValues[i] >= size)
{
- log_error( "ERROR: Reference value %ld outside of valid range! (%d)\n", i, refValues[ i ] );
+ log_error(
+ "ERROR: Reference value %zu outside of valid range! (%d)\n", i,
+ refValues[i]);
return false;
}
- valids[ refValues[ i ] ] ++;
+ valids[refValues[i]]++;
}
- /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
- the final value outputted */
- if( valids[ finalValue ] > 0 )
+ /* Note: ONE entry will have zero count. It'll be the last one that
+ executed, because that value should be the final value outputted */
+ if (valids[finalValue] > 0)
{
- log_error( "ERROR: Final value %d was also in ref list!\n", finalValue );
+ log_error("ERROR: Final value %d was also in ref list!\n", finalValue);
return false;
}
else
- valids[ finalValue ] = 1; // So the following loop will be okay
+ valids[finalValue] = 1; // So the following loop will be okay
/* Now check that every entry has one and only one count */
- if( originalValidCount != 1 )
+ if (originalValidCount != 1)
{
- log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+ log_error("ERROR: Starting reference value %d did not occur "
+ "once-and-only-once (occurred %d)\n",
+ 65191, originalValidCount);
return false;
}
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- if( valids[ i ] != 1 )
+ if (valids[i] != 1)
{
- log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
- for( size_t j = 0; j < size; j++ )
- log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+ log_error("ERROR: Reference value %zu did not occur "
+ "once-and-only-once (occurred %d)\n",
+ i, valids[i]);
+ for (size_t j = 0; j < size; j++)
+ log_info("%d: %d\n", (int)j, (int)valids[j]);
return false;
}
}
- free( valids );
+ free(valids);
return true;
}
-bool test_atomic_xchg_verify_long( size_t size, cl_long *refValues, cl_long finalValue )
+bool test_atomic_xchg_verify_long(size_t size, cl_long *refValues,
+ cl_long finalValue)
{
- /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+ /* For xchg, each value from 0 to size - 1 should have an entry in the ref
+ * array, and ONLY one entry */
char *valids;
size_t i;
char originalValidCount = 0;
- valids = (char *)malloc( sizeof( char ) * size );
- memset( valids, 0, sizeof( char ) * size );
+ valids = (char *)malloc(sizeof(char) * size);
+ memset(valids, 0, sizeof(char) * size);
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- if( refValues[ i ] == LONG_TEST_VALUE )
+ if (refValues[i] == LONG_TEST_VALUE)
{
// Special initial value
originalValidCount++;
continue;
}
- if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+ if (refValues[i] < 0 || (size_t)refValues[i] >= size)
{
- log_error( "ERROR: Reference value %ld outside of valid range! (%lld)\n", i, refValues[ i ] );
+ log_error(
+ "ERROR: Reference value %zu outside of valid range! (%" PRId64
+ ")\n",
+ i, refValues[i]);
return false;
}
- valids[ refValues[ i ] ] ++;
+ valids[refValues[i]]++;
}
- /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
- the final value outputted */
- if( valids[ finalValue ] > 0 )
+ /* Note: ONE entry will have zero count. It'll be the last one that
+ executed, because that value should be the final value outputted */
+ if (valids[finalValue] > 0)
{
- log_error( "ERROR: Final value %lld was also in ref list!\n", finalValue );
+ log_error("ERROR: Final value %" PRId64 " was also in ref list!\n",
+ finalValue);
return false;
}
else
- valids[ finalValue ] = 1; // So the following loop will be okay
+ valids[finalValue] = 1; // So the following loop will be okay
/* Now check that every entry has one and only one count */
- if( originalValidCount != 1 )
+ if (originalValidCount != 1)
{
- log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+ log_error("ERROR: Starting reference value %d did not occur "
+ "once-and-only-once (occurred %d)\n",
+ 65191, originalValidCount);
return false;
}
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- if( valids[ i ] != 1 )
+ if (valids[i] != 1)
{
- log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
- for( size_t j = 0; j < size; j++ )
- log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+ log_error("ERROR: Reference value %zu did not occur "
+ "once-and-only-once (occurred %d)\n",
+ i, valids[i]);
+ for (size_t j = 0; j < size; j++)
+ log_info("%d: %d\n", (int)j, (int)valids[j]);
return false;
}
}
- free( valids );
+ free(valids);
return true;
}
-bool test_atomic_xchg_verify_float( size_t size, cl_float *refValues, cl_float finalValue )
+bool test_atomic_xchg_verify_float(size_t size, cl_float *refValues,
+ cl_float finalValue)
{
- /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+ /* For xchg, each value from 0 to size - 1 should have an entry in the ref
+ * array, and ONLY one entry */
char *valids;
size_t i;
char originalValidCount = 0;
- valids = (char *)malloc( sizeof( char ) * size );
- memset( valids, 0, sizeof( char ) * size );
+ valids = (char *)malloc(sizeof(char) * size);
+ memset(valids, 0, sizeof(char) * size);
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- cl_int *intRefValue = (cl_int *)( &refValues[ i ] );
- if( *intRefValue == INT_TEST_VALUE )
+ cl_int *intRefValue = (cl_int *)(&refValues[i]);
+ if (*intRefValue == INT_TEST_VALUE)
{
// Special initial value
originalValidCount++;
continue;
}
- if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+ if (refValues[i] < 0 || (size_t)refValues[i] >= size)
{
- log_error( "ERROR: Reference value %ld outside of valid range! (%a)\n", i, refValues[ i ] );
+ log_error(
+ "ERROR: Reference value %zu outside of valid range! (%a)\n", i,
+ refValues[i]);
return false;
}
- valids[ (int)refValues[ i ] ] ++;
+ valids[(int)refValues[i]]++;
}
- /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
- the final value outputted */
- if( valids[ (int)finalValue ] > 0 )
+ /* Note: ONE entry will have zero count. It'll be the last one that
+ executed, because that value should be the final value outputted */
+ if (valids[(int)finalValue] > 0)
{
- log_error( "ERROR: Final value %a was also in ref list!\n", finalValue );
+ log_error("ERROR: Final value %a was also in ref list!\n", finalValue);
return false;
}
else
- valids[ (int)finalValue ] = 1; // So the following loop will be okay
+ valids[(int)finalValue] = 1; // So the following loop will be okay
/* Now check that every entry has one and only one count */
- if( originalValidCount != 1 )
+ if (originalValidCount != 1)
{
- log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+ log_error("ERROR: Starting reference value %d did not occur "
+ "once-and-only-once (occurred %d)\n",
+ 65191, originalValidCount);
return false;
}
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- if( valids[ i ] != 1 )
+ if (valids[i] != 1)
{
- log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
- for( size_t j = 0; j < size; j++ )
- log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+ log_error("ERROR: Reference value %zu did not occur "
+ "once-and-only-once (occurred %d)\n",
+ i, valids[i]);
+ for (size_t j = 0; j < size; j++)
+ log_info("%d: %d\n", (int)j, (int)valids[j]);
return false;
}
}
- free( valids );
+ free(valids);
return true;
}
-int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_xchg(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, NULL, NULL, test_atomic_xchg_verify_int, NULL, NULL, test_atomic_xchg_verify_long, NULL, NULL, test_atomic_xchg_verify_float };
-
- int errors = test_atomic_function_set( deviceID, context, queue, num_elements, atom_xchg_core, set, false, true, /*usingAtomicPrefix*/ false );
- errors |= test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xchg_core, set, false, true, /*usingAtomicPrefix*/ true );
-
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, false, kFloat, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, true, kFloat, true );
+ TestFns set = { INT_TEST_VALUE,
+ LONG_TEST_VALUE,
+ NULL,
+ NULL,
+ NULL,
+ test_atomic_xchg_verify_int,
+ NULL,
+ NULL,
+ test_atomic_xchg_verify_long,
+ NULL,
+ NULL,
+ test_atomic_xchg_verify_float };
+
+ int errors = test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_xchg_core, set, false,
+ true, /*usingAtomicPrefix*/ false);
+ errors |= test_atomic_function_set(deviceID, context, queue, num_elements,
+ atomic_xchg_core, set, false, true,
+ /*usingAtomicPrefix*/ true);
+
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_xchg_float_core, set, false, false,
+ kFloat, true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_xchg_float_core, set, false, true,
+ kFloat, true);
return errors;
}
@@ -716,51 +888,71 @@ int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue
#pragma mark ---- min
-const char atom_min_core[] = " oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n";
+const char atom_min_core[] =
+ " oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n";
-const char atomic_min_core[] = " oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n";
+const char atomic_min_core[] =
+ " oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n";
-cl_int test_atomic_min_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_min_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
cl_int total = 0x7fffffffL;
- for( size_t i = 0; i < size; i++ )
+ for (size_t i = 0; i < size; i++)
{
- if( startRefValues[ i ] < total )
- total = startRefValues[ i ];
+ if (startRefValues[i] < total) total = startRefValues[i];
}
return total;
}
-void test_atomic_min_gen_int( size_t size, cl_int *startRefValues, MTdata d )
+void test_atomic_min_gen_int(size_t size, cl_int *startRefValues, MTdata d)
{
- for( size_t i = 0; i < size; i++ )
- startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
+ for (size_t i = 0; i < size; i++)
+ startRefValues[i] =
+ (cl_int)(genrand_int32(d) % 0x3fffffff) + 0x3fffffff;
}
-cl_long test_atomic_min_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_min_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
cl_long total = 0x7fffffffffffffffLL;
- for( size_t i = 0; i < size; i++ )
+ for (size_t i = 0; i < size; i++)
{
- if( startRefValues[ i ] < total )
- total = startRefValues[ i ];
+ if (startRefValues[i] < total) total = startRefValues[i];
}
return total;
}
-void test_atomic_min_gen_long( size_t size, cl_long *startRefValues, MTdata d )
+void test_atomic_min_gen_long(size_t size, cl_long *startRefValues, MTdata d)
{
- for( size_t i = 0; i < size; i++ )
- startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
+ for (size_t i = 0; i < size; i++)
+ startRefValues[i] =
+ (cl_long)(genrand_int32(d)
+ | (((cl_long)genrand_int32(d) & 0x7fffffffL) << 16));
}
-int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_min(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0x7fffffffL, 0x7fffffffffffffffLL, NULL, test_atomic_min_result_int, test_atomic_min_gen_int, NULL, test_atomic_min_result_long, test_atomic_min_gen_long, NULL };
-
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ TestFns set = { 0x7fffffffL,
+ 0x7fffffffffffffffLL,
+ NULL,
+ test_atomic_min_result_int,
+ test_atomic_min_gen_int,
+ NULL,
+ test_atomic_min_result_long,
+ test_atomic_min_gen_long,
+ NULL };
+
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_min_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_min_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
@@ -768,79 +960,118 @@ int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue
#pragma mark ---- max
-const char atom_max_core[] = " oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n";
+const char atom_max_core[] =
+ " oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n";
-const char atomic_max_core[] = " oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n";
+const char atomic_max_core[] =
+ " oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n";
-cl_int test_atomic_max_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_max_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
cl_int total = 0;
- for( size_t i = 0; i < size; i++ )
+ for (size_t i = 0; i < size; i++)
{
- if( startRefValues[ i ] > total )
- total = startRefValues[ i ];
+ if (startRefValues[i] > total) total = startRefValues[i];
}
return total;
}
-void test_atomic_max_gen_int( size_t size, cl_int *startRefValues, MTdata d )
+void test_atomic_max_gen_int(size_t size, cl_int *startRefValues, MTdata d)
{
- for( size_t i = 0; i < size; i++ )
- startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
+ for (size_t i = 0; i < size; i++)
+ startRefValues[i] =
+ (cl_int)(genrand_int32(d) % 0x3fffffff) + 0x3fffffff;
}
-cl_long test_atomic_max_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_max_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
cl_long total = 0;
- for( size_t i = 0; i < size; i++ )
+ for (size_t i = 0; i < size; i++)
{
- if( startRefValues[ i ] > total )
- total = startRefValues[ i ];
+ if (startRefValues[i] > total) total = startRefValues[i];
}
return total;
}
-void test_atomic_max_gen_long( size_t size, cl_long *startRefValues, MTdata d )
+void test_atomic_max_gen_long(size_t size, cl_long *startRefValues, MTdata d)
{
- for( size_t i = 0; i < size; i++ )
- startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
+ for (size_t i = 0; i < size; i++)
+ startRefValues[i] =
+ (cl_long)(genrand_int32(d)
+ | (((cl_long)genrand_int32(d) & 0x7fffffffL) << 16));
}
-int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_max(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0, 0, NULL, test_atomic_max_result_int, test_atomic_max_gen_int, NULL, test_atomic_max_result_long, test_atomic_max_gen_long, NULL };
-
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ TestFns set = { 0,
+ 0,
+ NULL,
+ test_atomic_max_result_int,
+ test_atomic_max_gen_int,
+ NULL,
+ test_atomic_max_result_long,
+ test_atomic_max_gen_long,
+ NULL };
+
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_max_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
+ return -1;
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_max_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
- return -1;
return 0;
}
#pragma mark ---- inc
-const char atom_inc_core[] = " oldValues[tid] = atom_inc( &destMemory[0] );\n";
+const char atom_inc_core[] =
+ " oldValues[tid] = atom_inc( &destMemory[0] );\n";
-const char atomic_inc_core[] = " oldValues[tid] = atomic_inc( &destMemory[0] );\n";
+const char atomic_inc_core[] =
+ " oldValues[tid] = atomic_inc( &destMemory[0] );\n";
-cl_int test_atomic_inc_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_inc_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
return INT_TEST_VALUE + (cl_int)size;
}
-cl_long test_atomic_inc_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_inc_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
return LONG_TEST_VALUE + size;
}
-int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_inc(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_inc_result_int, NULL, NULL, test_atomic_inc_result_long, NULL, NULL };
-
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ TestFns set = { INT_TEST_VALUE,
+ LONG_TEST_VALUE,
+ NULL,
+ test_atomic_inc_result_int,
+ NULL,
+ NULL,
+ test_atomic_inc_result_long,
+ NULL,
+ NULL };
+
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_inc_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_inc_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
@@ -848,27 +1079,46 @@ int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue
#pragma mark ---- dec
-const char atom_dec_core[] = " oldValues[tid] = atom_dec( &destMemory[0] );\n";
+const char atom_dec_core[] =
+ " oldValues[tid] = atom_dec( &destMemory[0] );\n";
-const char atomic_dec_core[] = " oldValues[tid] = atomic_dec( &destMemory[0] );\n";
+const char atomic_dec_core[] =
+ " oldValues[tid] = atomic_dec( &destMemory[0] );\n";
-cl_int test_atomic_dec_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_dec_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
return INT_TEST_VALUE - (cl_int)size;
}
-cl_long test_atomic_dec_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_dec_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
return LONG_TEST_VALUE - size;
}
-int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_dec(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_dec_result_int, NULL, NULL, test_atomic_dec_result_long, NULL, NULL };
-
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ TestFns set = { INT_TEST_VALUE,
+ LONG_TEST_VALUE,
+ NULL,
+ test_atomic_dec_result_int,
+ NULL,
+ NULL,
+ test_atomic_dec_result_long,
+ NULL,
+ NULL };
+
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_dec_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_dec_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
@@ -877,129 +1127,159 @@ int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue
#pragma mark ---- cmpxchg
/* We test cmpxchg by implementing (the long way) atom_add */
+// clang-format off
const char atom_cmpxchg_core[] =
-" int oldValue, origValue, newValue;\n"
-" do { \n"
-" origValue = destMemory[0];\n"
-" newValue = origValue + tid + 2;\n"
-" oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
-" } while( oldValue != origValue );\n"
-" oldValues[tid] = oldValue;\n"
-;
+ " int oldValue, origValue, newValue;\n"
+ " do { \n"
+ " origValue = destMemory[0];\n"
+ " newValue = origValue + tid + 2;\n"
+ " oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
+ " } while( oldValue != origValue );\n"
+ " oldValues[tid] = oldValue;\n";
const char atom_cmpxchg64_core[] =
-" long oldValue, origValue, newValue;\n"
-" do { \n"
-" origValue = destMemory[0];\n"
-" newValue = origValue + tid + 2;\n"
-" oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
-" } while( oldValue != origValue );\n"
-" oldValues[tid] = oldValue;\n"
-;
+ " long oldValue, origValue, newValue;\n"
+ " do { \n"
+ " origValue = destMemory[0];\n"
+ " newValue = origValue + tid + 2;\n"
+ " oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
+ " } while( oldValue != origValue );\n"
+ " oldValues[tid] = oldValue;\n";
const char atomic_cmpxchg_core[] =
-" int oldValue, origValue, newValue;\n"
-" do { \n"
-" origValue = destMemory[0];\n"
-" newValue = origValue + tid + 2;\n"
-" oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n"
-" } while( oldValue != origValue );\n"
-" oldValues[tid] = oldValue;\n"
-;
-
-cl_int test_atomic_cmpxchg_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+ " int oldValue, origValue, newValue;\n"
+ " do { \n"
+ " origValue = destMemory[0];\n"
+ " newValue = origValue + tid + 2;\n"
+ " oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n"
+ " } while( oldValue != origValue );\n"
+ " oldValues[tid] = oldValue;\n";
+// clang-format on
+
+cl_int test_atomic_cmpxchg_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
cl_int total = INT_TEST_VALUE;
- for( size_t i = 0; i < size; i++ )
- total += (cl_int)i + 2;
+ for (size_t i = 0; i < size; i++) total += (cl_int)i + 2;
return total;
}
-cl_long test_atomic_cmpxchg_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_cmpxchg_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
cl_long total = LONG_TEST_VALUE;
- for( size_t i = 0; i < size; i++ )
- total += i + 2;
+ for (size_t i = 0; i < size; i++) total += i + 2;
return total;
}
-int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_cmpxchg_result_int, NULL, NULL, test_atomic_cmpxchg_result_long, NULL, NULL };
+ TestFns set = { INT_TEST_VALUE,
+ LONG_TEST_VALUE,
+ NULL,
+ test_atomic_cmpxchg_result_int,
+ NULL,
+ NULL,
+ test_atomic_cmpxchg_result_long,
+ NULL,
+ NULL };
int errors = 0;
log_info(" Testing atom_ functions...\n");
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kUInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kUInt, true );
-
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kLong, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kULong, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kLong, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kULong, true );
+ errors |=
+ test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg_core, set, false, false, kInt, true);
+ errors |=
+ test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg_core, set, false, false, kUInt, true);
+ errors |=
+ test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg_core, set, false, true, kInt, true);
+ errors |=
+ test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg_core, set, false, true, kUInt, true);
+
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg64_core, set, false, false,
+ kLong, true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg64_core, set, false, false,
+ kULong, true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg64_core, set, false, true, kLong,
+ true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg64_core, set, false, true,
+ kULong, true);
log_info(" Testing atomic_ functions...\n");
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kUInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kUInt, true );
-
- if( errors )
- return -1;
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_cmpxchg_core, set, false, false, kInt,
+ true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_cmpxchg_core, set, false, false,
+ kUInt, true);
+ errors |=
+ test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_cmpxchg_core, set, false, true, kInt, true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_cmpxchg_core, set, false, true, kUInt,
+ true);
+
+ if (errors) return -1;
return 0;
}
#pragma mark -------- Bitwise functions
-size_t test_bitwise_num_results( size_t threadCount, ExplicitType dataType )
+size_t test_bitwise_num_results(size_t threadCount, ExplicitType dataType)
{
- size_t numBits = get_explicit_type_size( dataType ) * 8;
+ size_t numBits = get_explicit_type_size(dataType) * 8;
- return ( threadCount + numBits - 1 ) / numBits;
+ return (threadCount + numBits - 1) / numBits;
}
#pragma mark ---- and
+// clang-format off
const char atom_and_core[] =
-" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-" int whichResult = tid / numBits;\n"
-" int bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-" oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
-;
+ " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+ " int whichResult = tid / numBits;\n"
+ " int bitIndex = tid - ( whichResult * numBits );\n"
+ "\n"
+ " oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n";
const char atomic_and_core[] =
-" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-" int whichResult = tid / numBits;\n"
-" int bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-" oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
-;
+ " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+ " int whichResult = tid / numBits;\n"
+ " int bitIndex = tid - ( whichResult * numBits );\n"
+ "\n"
+ " oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n";
+// clang-format on
-cl_int test_atomic_and_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+cl_int test_atomic_and_result_int(size_t size, cl_int *startRefValues,
+ size_t whichResult)
{
- size_t numThreads = ( (size_t)size + 31 ) / 32;
- if( whichResult < numThreads - 1 )
- return 0;
+ size_t numThreads = ((size_t)size + 31) / 32;
+ if (whichResult < numThreads - 1) return 0;
// Last item doesn't get and'ed on every bit, so we have to mask away
size_t numBits = (size_t)size - whichResult * 32;
cl_int bits = (cl_int)0xffffffffL;
- for( size_t i = 0; i < numBits; i++ )
- bits &= ~( 1 << i );
+ for (size_t i = 0; i < numBits; i++) bits &= ~(1 << i);
return bits;
}
-cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+cl_long test_atomic_and_result_long(size_t size, cl_long *startRefValues,
+ size_t whichResult)
{
- size_t numThreads = ( (size_t)size + 63 ) / 64;
- if( whichResult < numThreads - 1 )
- return 0;
+ size_t numThreads = ((size_t)size + 63) / 64;
+ if (whichResult < numThreads - 1) return 0;
// Last item doesn't get and'ed on every bit, so we have to mask away
size_t numBits = (size_t)size - whichResult * 64;
@@ -1009,14 +1289,28 @@ cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_
return bits;
}
-int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_and(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0xffffffff, 0xffffffffffffffffLL, test_bitwise_num_results,
- test_atomic_and_result_int, NULL, NULL, test_atomic_and_result_long, NULL, NULL };
-
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ TestFns set = { 0xffffffff,
+ 0xffffffffffffffffLL,
+ test_bitwise_num_results,
+ test_atomic_and_result_int,
+ NULL,
+ NULL,
+ test_atomic_and_result_long,
+ NULL,
+ NULL };
+
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_and_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_and_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
@@ -1024,59 +1318,68 @@ int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue
#pragma mark ---- or
+// clang-format off
const char atom_or_core[] =
-" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-" int whichResult = tid / numBits;\n"
-" int bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-" oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
-;
+ " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+ " int whichResult = tid / numBits;\n"
+ " int bitIndex = tid - ( whichResult * numBits );\n"
+ "\n"
+ " oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n";
const char atomic_or_core[] =
-" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-" int whichResult = tid / numBits;\n"
-" int bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-" oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
-;
-
-cl_int test_atomic_or_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+ " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+ " int whichResult = tid / numBits;\n"
+ " int bitIndex = tid - ( whichResult * numBits );\n"
+ "\n"
+ " oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n";
+// clang-format on
+
+cl_int test_atomic_or_result_int(size_t size, cl_int *startRefValues,
+ size_t whichResult)
{
- size_t numThreads = ( (size_t)size + 31 ) / 32;
- if( whichResult < numThreads - 1 )
- return 0xffffffff;
+ size_t numThreads = ((size_t)size + 31) / 32;
+ if (whichResult < numThreads - 1) return 0xffffffff;
// Last item doesn't get and'ed on every bit, so we have to mask away
size_t numBits = (size_t)size - whichResult * 32;
cl_int bits = 0;
- for( size_t i = 0; i < numBits; i++ )
- bits |= ( 1 << i );
+ for (size_t i = 0; i < numBits; i++) bits |= (1 << i);
return bits;
}
-cl_long test_atomic_or_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+cl_long test_atomic_or_result_long(size_t size, cl_long *startRefValues,
+ size_t whichResult)
{
- size_t numThreads = ( (size_t)size + 63 ) / 64;
- if( whichResult < numThreads - 1 )
- return 0x0ffffffffffffffffLL;
+ size_t numThreads = ((size_t)size + 63) / 64;
+ if (whichResult < numThreads - 1) return 0x0ffffffffffffffffLL;
// Last item doesn't get and'ed on every bit, so we have to mask away
size_t numBits = (size_t)size - whichResult * 64;
cl_long bits = 0;
- for( size_t i = 0; i < numBits; i++ )
- bits |= ( 1LL << i );
+ for (size_t i = 0; i < numBits; i++) bits |= (1LL << i);
return bits;
}
-int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_or(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int, NULL, NULL, test_atomic_or_result_long, NULL, NULL };
+ TestFns set = {
+ 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int,
+ NULL, NULL, test_atomic_or_result_long, NULL,
+ NULL
+ };
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_or_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_or_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
@@ -1096,33 +1399,44 @@ const char atomic_xor_core[] =
"\n"
" oldValues[tid] = atomic_xor( &destMemory[0], 1L << bitIndex );\n";
-cl_int test_atomic_xor_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+cl_int test_atomic_xor_result_int(size_t size, cl_int *startRefValues,
+ size_t whichResult)
{
cl_int total = 0x2f08ab41;
- for( size_t i = 0; i < size; i++ )
- total ^= ( 1 << ( i & 31 ) );
+ for (size_t i = 0; i < size; i++) total ^= (1 << (i & 31));
return total;
}
-cl_long test_atomic_xor_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+cl_long test_atomic_xor_result_long(size_t size, cl_long *startRefValues,
+ size_t whichResult)
{
cl_long total = 0x2f08ab418ba0541LL;
- for( size_t i = 0; i < size; i++ )
- total ^= ( 1LL << ( i & 63 ) );
+ for (size_t i = 0; i < size; i++) total ^= (1LL << (i & 63));
return total;
}
-int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_xor(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0x2f08ab41, 0x2f08ab418ba0541LL, NULL, test_atomic_xor_result_int, NULL, NULL, test_atomic_xor_result_long, NULL, NULL };
-
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ TestFns set = { 0x2f08ab41,
+ 0x2f08ab418ba0541LL,
+ NULL,
+ test_atomic_xor_result_int,
+ NULL,
+ NULL,
+ test_atomic_xor_result_long,
+ NULL,
+ NULL };
+
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_xor_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_xor_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
-
-
-
-
diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp
index b85e3d24..2bba3e24 100644
--- a/test_conformance/atomics/test_indexed_cases.cpp
+++ b/test_conformance/atomics/test_indexed_cases.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -16,48 +16,55 @@
#include "testBase.h"
#include "harness/conversions.h"
-const char * atomic_index_source =
-"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
-"// Counter keeps track of which index in counts we are using.\n"
-"// We get that value, increment it, and then set that index in counts to our thread ID.\n"
-"// At the end of this we should have all thread IDs in some random location in counts\n"
-"// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
-"// will be missing some.\n"
-"\n"
-"__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
-" int tid = get_global_id(0);\n"
-" \n"
-" int counter_to_use = atom_add(counter, 1);\n"
-" counts[counter_to_use] = tid;\n"
-"}";
-
-int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+// clang-format off
+const char *atomic_index_source =
+ "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
+ "// Counter keeps track of which index in counts we are using.\n"
+ "// We get that value, increment it, and then set that index in counts to our thread ID.\n"
+ "// At the end of this we should have all thread IDs in some random location in counts\n"
+ "// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
+ "// will be missing some.\n"
+ "\n"
+ "__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
+ " int tid = get_global_id(0);\n"
+ " \n"
+ " int counter_to_use = atom_add(counter, 1);\n"
+ " counts[counter_to_use] = tid;\n"
+ "}";
+// clang-format on
+
+int test_atomic_add_index(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper counter, counters;
size_t numGlobalThreads, numLocalThreads;
- int fail = 0, succeed = 0, err;
+ int fail = 0, err;
- /* Check if atomics are supported. */
- if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
- log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
- return 0;
- }
+ /* Check if atomics are supported. */
+ if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
+ {
+ log_info("Base atomics not supported "
+ "(cl_khr_global_int32_base_atomics). Skipping test.\n");
+ return 0;
+ }
//===== add_index test
// The index test replicates what particles does.
- // It uses one memory location to keep track of the current index and then each thread
- // does an atomic add to it to get its new location. The threads then write to their
- // assigned location. At the end we check to make sure that each thread's ID shows up
- // exactly once in the output.
+ // It uses one memory location to keep track of the current index and then
+ // each thread does an atomic add to it to get its new location. The threads
+ // then write to their assigned location. At the end we check to make sure
+ // that each thread's ID shows up exactly once in the output.
numGlobalThreads = 2048;
- if( create_single_kernel_helper( context, &program, &kernel, 1, &atomic_index_source, "add_index_test" ) )
+ if (create_single_kernel_helper(context, &program, &kernel, 1,
+ &atomic_index_source, "add_index_test"))
return -1;
- if( get_max_common_work_group_size( context, kernel, numGlobalThreads, &numLocalThreads ) )
+ if (get_max_common_work_group_size(context, kernel, numGlobalThreads,
+ &numLocalThreads))
return -1;
log_info("Execute global_threads:%d local_threads:%d\n",
@@ -72,103 +79,148 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_
sizeof(cl_int) * numGlobalThreads, NULL, NULL);
// Reset all those locations to -1 to indciate they have not been used.
- cl_int *values = (cl_int*) malloc(sizeof(cl_int)*numGlobalThreads);
- if (values == NULL) {
- log_error("add_index_test FAILED to allocate memory for initial values.\n");
- fail = 1; succeed = -1;
- } else {
+ cl_int *values = (cl_int *)malloc(sizeof(cl_int) * numGlobalThreads);
+ if (values == NULL)
+ {
+ log_error(
+ "add_index_test FAILED to allocate memory for initial values.\n");
+ fail = 1;
+ }
+ else
+ {
memset(values, -1, numLocalThreads);
- unsigned int i=0;
- for (i=0; i<numGlobalThreads; i++)
- values[i] = -1;
- int init=0;
- err = clEnqueueWriteBuffer(queue, counters, true, 0, numGlobalThreads*sizeof(cl_int), values, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(queue, counter, true, 0,1*sizeof(cl_int), &init, 0, NULL, NULL);
- if (err) {
- log_error("add_index_test FAILED to write initial values to arrays: %d\n", err);
- fail=1; succeed=-1;
- } else {
+ unsigned int i = 0;
+ for (i = 0; i < numGlobalThreads; i++) values[i] = -1;
+ int init = 0;
+ err = clEnqueueWriteBuffer(queue, counters, true, 0,
+ numGlobalThreads * sizeof(cl_int), values, 0,
+ NULL, NULL);
+ err |= clEnqueueWriteBuffer(queue, counter, true, 0, 1 * sizeof(cl_int),
+ &init, 0, NULL, NULL);
+ if (err)
+ {
+ log_error(
+ "add_index_test FAILED to write initial values to arrays: %d\n",
+ err);
+ fail = 1;
+ }
+ else
+ {
err = clSetKernelArg(kernel, 0, sizeof(counter), &counter);
err |= clSetKernelArg(kernel, 1, sizeof(counters), &counters);
- if (err) {
- log_error("add_index_test FAILED to set kernel arguments: %d\n", err);
- fail=1; succeed=-1;
- } else {
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numGlobalThreads, &numLocalThreads, 0, NULL, NULL );
- if (err) {
- log_error("add_index_test FAILED to execute kernel: %d\n", err);
- fail=1; succeed=-1;
- } else {
- err = clEnqueueReadBuffer( queue, counters, true, 0, sizeof(cl_int)*numGlobalThreads, values, 0, NULL, NULL );
- if (err) {
- log_error("add_index_test FAILED to read back results: %d\n", err);
- fail = 1; succeed=-1;
- } else {
+ if (err)
+ {
+ log_error("add_index_test FAILED to set kernel arguments: %d\n",
+ err);
+ fail = 1;
+ }
+ else
+ {
+ err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
+ &numGlobalThreads,
+ &numLocalThreads, 0, NULL, NULL);
+ if (err)
+ {
+ log_error("add_index_test FAILED to execute kernel: %d\n",
+ err);
+ fail = 1;
+ }
+ else
+ {
+ err = clEnqueueReadBuffer(queue, counters, true, 0,
+ sizeof(cl_int) * numGlobalThreads,
+ values, 0, NULL, NULL);
+ if (err)
+ {
+ log_error(
+ "add_index_test FAILED to read back results: %d\n",
+ err);
+ fail = 1;
+ }
+ else
+ {
unsigned int looking_for, index;
- for (looking_for=0; looking_for<numGlobalThreads; looking_for++) {
- int instances_found=0;
- for (index=0; index<numGlobalThreads; index++) {
- if (values[index]==(int)looking_for)
+ for (looking_for = 0; looking_for < numGlobalThreads;
+ looking_for++)
+ {
+ int instances_found = 0;
+ for (index = 0; index < numGlobalThreads; index++)
+ {
+ if (values[index] == (int)looking_for)
instances_found++;
}
- if (instances_found != 1) {
- log_error("add_index_test FAILED: wrong number of instances (%d!=1) for counter %d.\n", instances_found, looking_for);
- fail = 1; succeed=-1;
+ if (instances_found != 1)
+ {
+ log_error(
+ "add_index_test FAILED: wrong number of "
+ "instances (%d!=1) for counter %d.\n",
+ instances_found, looking_for);
+ fail = 1;
}
}
}
}
}
}
- if (!fail) {
- log_info("add_index_test passed. Each thread used exactly one index.\n");
+ if (!fail)
+ {
+ log_info(
+ "add_index_test passed. Each thread used exactly one index.\n");
}
free(values);
}
return fail;
}
+// clang-format off
const char *add_index_bin_kernel[] = {
-"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
-"// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
-"// using an atomic add to keep track of the current location to write into in each bin.\n"
-"// This is the same as the memory update for the particles demo.\n"
-"\n"
-"__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" int location = bin_assignments[tid];\n"
-" int counter = atom_add(&bin_counters[location], 1);\n"
-" bins[location*max_counts_per_bin + counter] = tid;\n"
-"}" };
-
-// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel
-// using an atomic add to keep track of the current location to write into in each bin.
-// This is the same as the memory update for the particles demo.
-int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_context context, MTdata d)
+ "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
+ "// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
+ "// using an atomic add to keep track of the current location to write into in each bin.\n"
+ "// This is the same as the memory update for the particles demo.\n"
+ "\n"
+ "__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
+ " int tid = get_global_id(0);\n"
+ "\n"
+ " int location = bin_assignments[tid];\n"
+ " int counter = atom_add(&bin_counters[location], 1);\n"
+ " bins[location*max_counts_per_bin + counter] = tid;\n"
+ "}" };
+// clang-format on
+
+// This test assigns a bunch of values to bins and then tries to put them in the
+// bins in parallel using an atomic add to keep track of the current location to
+// write into in each bin. This is the same as the memory update for the
+// particles demo.
+int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
+ cl_context context, MTdata d)
{
int number_of_items = (int)global_threads[0];
size_t local_threads[1];
int divisor = 12;
- int number_of_bins = number_of_items/divisor;
- int max_counts_per_bin = divisor*2;
+ int number_of_bins = number_of_items / divisor;
+ int max_counts_per_bin = divisor * 2;
int fail = 0;
- int succeed = 0;
int err;
clProgramWrapper program;
clKernelWrapper kernel;
- // log_info("add_index_bin_test: %d items, into %d bins, with a max of %d items per bin (bins is %d long).\n",
- // number_of_items, number_of_bins, max_counts_per_bin, number_of_bins*max_counts_per_bin);
+ // log_info("add_index_bin_test: %d items, into %d bins, with a max of %d
+ // items per bin (bins is %d long).\n",
+ // number_of_items, number_of_bins, max_counts_per_bin,
+ // number_of_bins*max_counts_per_bin);
//===== add_index_bin test
// The index test replicates what particles does.
- err = create_single_kernel_helper(context, &program, &kernel, 1, add_index_bin_kernel, "add_index_bin_test" );
- test_error( err, "Unable to create testing kernel" );
+ err =
+ create_single_kernel_helper(context, &program, &kernel, 1,
+ add_index_bin_kernel, "add_index_bin_test");
+ test_error(err, "Unable to create testing kernel");
- if( get_max_common_work_group_size( context, kernel, global_threads[0], &local_threads[0] ) )
+ if (get_max_common_work_group_size(context, kernel, global_threads[0],
+ &local_threads[0]))
return -1;
log_info("Execute global_threads:%d local_threads:%d\n",
@@ -185,152 +237,228 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_contex
clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(cl_int) * number_of_items, NULL, NULL);
- if (bin_counters == NULL) {
+ if (bin_counters == NULL)
+ {
log_error("add_index_bin_test FAILED to allocate bin_counters.\n");
return -1;
}
- if (bins == NULL) {
+ if (bins == NULL)
+ {
log_error("add_index_bin_test FAILED to allocate bins.\n");
return -1;
}
- if (bin_assignments == NULL) {
+ if (bin_assignments == NULL)
+ {
log_error("add_index_bin_test FAILED to allocate bin_assignments.\n");
return -1;
}
// Initialize our storage
- cl_int *l_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
- if (!l_bin_counts) {
- log_error("add_index_bin_test FAILED to allocate initial values for bin_counters.\n");
+ cl_int *l_bin_counts = (cl_int *)malloc(sizeof(cl_int) * number_of_bins);
+ if (!l_bin_counts)
+ {
+ log_error("add_index_bin_test FAILED to allocate initial values for "
+ "bin_counters.\n");
return -1;
}
int i;
- for (i=0; i<number_of_bins; i++)
- l_bin_counts[i] = 0;
- err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, l_bin_counts, 0, NULL, NULL);
- if (err) {
- log_error("add_index_bin_test FAILED to set initial values for bin_counters: %d\n", err);
+ for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0;
+ err = clEnqueueWriteBuffer(queue, bin_counters, true, 0,
+ sizeof(cl_int) * number_of_bins, l_bin_counts, 0,
+ NULL, NULL);
+ if (err)
+ {
+ log_error("add_index_bin_test FAILED to set initial values for "
+ "bin_counters: %d\n",
+ err);
return -1;
}
- cl_int *values = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
- if (!values) {
- log_error("add_index_bin_test FAILED to allocate initial values for bins.\n");
+ cl_int *values =
+ (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin);
+ if (!values)
+ {
+ log_error(
+ "add_index_bin_test FAILED to allocate initial values for bins.\n");
return -1;
}
- for (i=0; i<number_of_bins*max_counts_per_bin; i++)
- values[i] = -1;
- err = clEnqueueWriteBuffer(queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, values, 0, NULL, NULL);
- if (err) {
- log_error("add_index_bin_test FAILED to set initial values for bins: %d\n", err);
+ for (i = 0; i < number_of_bins * max_counts_per_bin; i++) values[i] = -1;
+ err = clEnqueueWriteBuffer(queue, bins, true, 0,
+ sizeof(cl_int) * number_of_bins
+ * max_counts_per_bin,
+ values, 0, NULL, NULL);
+ if (err)
+ {
+ log_error(
+ "add_index_bin_test FAILED to set initial values for bins: %d\n",
+ err);
return -1;
}
free(values);
- cl_int *l_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_items);
- if (!l_bin_assignments) {
- log_error("add_index_bin_test FAILED to allocate initial values for l_bin_assignments.\n");
+ cl_int *l_bin_assignments =
+ (cl_int *)malloc(sizeof(cl_int) * number_of_items);
+ if (!l_bin_assignments)
+ {
+ log_error("add_index_bin_test FAILED to allocate initial values for "
+ "l_bin_assignments.\n");
return -1;
}
- for (i=0; i<number_of_items; i++) {
- int bin = random_in_range(0, number_of_bins-1, d);
- while (l_bin_counts[bin] >= max_counts_per_bin) {
- bin = random_in_range(0, number_of_bins-1, d);
+ for (i = 0; i < number_of_items; i++)
+ {
+ int bin = random_in_range(0, number_of_bins - 1, d);
+ while (l_bin_counts[bin] >= max_counts_per_bin)
+ {
+ bin = random_in_range(0, number_of_bins - 1, d);
}
if (bin >= number_of_bins)
- log_error("add_index_bin_test internal error generating bin assignments: bin %d >= number_of_bins %d.\n", bin, number_of_bins);
- if (l_bin_counts[bin]+1 > max_counts_per_bin)
- log_error("add_index_bin_test internal error generating bin assignments: bin %d has more entries (%d) than max_counts_per_bin (%d).\n", bin, l_bin_counts[bin], max_counts_per_bin);
+ log_error("add_index_bin_test internal error generating bin "
+ "assignments: bin %d >= number_of_bins %d.\n",
+ bin, number_of_bins);
+ if (l_bin_counts[bin] + 1 > max_counts_per_bin)
+ log_error(
+ "add_index_bin_test internal error generating bin assignments: "
+ "bin %d has more entries (%d) than max_counts_per_bin (%d).\n",
+ bin, l_bin_counts[bin], max_counts_per_bin);
l_bin_counts[bin]++;
l_bin_assignments[i] = bin;
- // log_info("item %d assigned to bin %d (%d items)\n", i, bin, l_bin_counts[bin]);
+ // log_info("item %d assigned to bin %d (%d items)\n", i, bin,
+ // l_bin_counts[bin]);
}
- err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int)*number_of_items, l_bin_assignments, 0, NULL, NULL);
- if (err) {
- log_error("add_index_bin_test FAILED to set initial values for bin_assignments: %d\n", err);
+ err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0,
+ sizeof(cl_int) * number_of_items,
+ l_bin_assignments, 0, NULL, NULL);
+ if (err)
+ {
+ log_error("add_index_bin_test FAILED to set initial values for "
+ "bin_assignments: %d\n",
+ err);
return -1;
}
// Setup the kernel
err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters);
err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins);
err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments);
- err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin), &max_counts_per_bin);
- if (err) {
- log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", err);
- fail=1; succeed=-1;
+ err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin),
+ &max_counts_per_bin);
+ if (err)
+ {
+ log_error("add_index_bin_test FAILED to set kernel arguments: %d\n",
+ err);
+ fail = 1;
return -1;
}
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
- if (err) {
+ err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads,
+ local_threads, 0, NULL, NULL);
+ if (err)
+ {
log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
- fail=1; succeed=-1;
+ fail = 1;
}
- cl_int *final_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
- if (!final_bin_assignments) {
- log_error("add_index_bin_test FAILED to allocate initial values for final_bin_assignments.\n");
+ cl_int *final_bin_assignments =
+ (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin);
+ if (!final_bin_assignments)
+ {
+ log_error("add_index_bin_test FAILED to allocate initial values for "
+ "final_bin_assignments.\n");
return -1;
}
- err = clEnqueueReadBuffer( queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, final_bin_assignments, 0, NULL, NULL );
- if (err) {
+ err = clEnqueueReadBuffer(queue, bins, true, 0,
+ sizeof(cl_int) * number_of_bins
+ * max_counts_per_bin,
+ final_bin_assignments, 0, NULL, NULL);
+ if (err)
+ {
log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
- fail = 1; succeed=-1;
+ fail = 1;
}
- cl_int *final_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
- if (!final_bin_counts) {
- log_error("add_index_bin_test FAILED to allocate initial values for final_bin_counts.\n");
+ cl_int *final_bin_counts =
+ (cl_int *)malloc(sizeof(cl_int) * number_of_bins);
+ if (!final_bin_counts)
+ {
+ log_error("add_index_bin_test FAILED to allocate initial values for "
+ "final_bin_counts.\n");
return -1;
}
- err = clEnqueueReadBuffer( queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, final_bin_counts, 0, NULL, NULL );
- if (err) {
- log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", err);
- fail = 1; succeed=-1;
+ err = clEnqueueReadBuffer(queue, bin_counters, true, 0,
+ sizeof(cl_int) * number_of_bins, final_bin_counts,
+ 0, NULL, NULL);
+ if (err)
+ {
+ log_error("add_index_bin_test FAILED to read back bin_counters: %d\n",
+ err);
+ fail = 1;
}
// Verification.
- int errors=0;
+ int errors = 0;
int current_bin;
int search;
// Print out all the contents of the bins.
// for (current_bin=0; current_bin<number_of_bins; current_bin++)
// for (search=0; search<max_counts_per_bin; search++)
- // log_info("[bin %d, entry %d] = %d\n", current_bin, search, final_bin_assignments[current_bin*max_counts_per_bin+search]);
+ // log_info("[bin %d, entry %d] = %d\n", current_bin, search,
+ // final_bin_assignments[current_bin*max_counts_per_bin+search]);
// First verify that there are the correct number in each bin.
- for (current_bin=0; current_bin<number_of_bins; current_bin++) {
+ for (current_bin = 0; current_bin < number_of_bins; current_bin++)
+ {
int expected_number = l_bin_counts[current_bin];
int actual_number = final_bin_counts[current_bin];
- if (expected_number != actual_number) {
- log_error("add_index_bin_test FAILED: bin %d reported %d entries when %d were expected.\n", current_bin, actual_number, expected_number);
+ if (expected_number != actual_number)
+ {
+ log_error("add_index_bin_test FAILED: bin %d reported %d entries "
+ "when %d were expected.\n",
+ current_bin, actual_number, expected_number);
errors++;
}
- for (search=0; search<expected_number; search++) {
- if (final_bin_assignments[current_bin*max_counts_per_bin+search] == -1) {
- log_error("add_index_bin_test FAILED: bin %d had no entry at position %d when it should have had %d entries.\n", current_bin, search, expected_number);
+ for (search = 0; search < expected_number; search++)
+ {
+ if (final_bin_assignments[current_bin * max_counts_per_bin + search]
+ == -1)
+ {
+ log_error("add_index_bin_test FAILED: bin %d had no entry at "
+ "position %d when it should have had %d entries.\n",
+ current_bin, search, expected_number);
errors++;
}
}
- for (search=expected_number; search<max_counts_per_bin; search++) {
- if (final_bin_assignments[current_bin*max_counts_per_bin+search] != -1) {
- log_error("add_index_bin_test FAILED: bin %d had an extra entry at position %d when it should have had only %d entries.\n", current_bin, search, expected_number);
+ for (search = expected_number; search < max_counts_per_bin; search++)
+ {
+ if (final_bin_assignments[current_bin * max_counts_per_bin + search]
+ != -1)
+ {
+ log_error(
+ "add_index_bin_test FAILED: bin %d had an extra entry at "
+ "position %d when it should have had only %d entries.\n",
+ current_bin, search, expected_number);
errors++;
}
}
}
// Now verify that the correct ones are in each bin
int index;
- for (index=0; index<number_of_items; index++) {
+ for (index = 0; index < number_of_items; index++)
+ {
int expected_bin = l_bin_assignments[index];
int found_it = 0;
- for (search=0; search<l_bin_counts[expected_bin]; search++) {
- if (final_bin_assignments[expected_bin*max_counts_per_bin+search] == index) {
+ for (search = 0; search < l_bin_counts[expected_bin]; search++)
+ {
+ if (final_bin_assignments[expected_bin * max_counts_per_bin
+ + search]
+ == index)
+ {
found_it = 1;
}
}
- if (found_it == 0) {
- log_error("add_index_bin_test FAILED: did not find item %d in bin %d.\n", index, expected_bin);
+ if (found_it == 0)
+ {
+ log_error(
+ "add_index_bin_test FAILED: did not find item %d in bin %d.\n",
+ index, expected_bin);
errors++;
}
}
@@ -341,41 +469,49 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_contex
clReleaseMemObject(bin_counters);
clReleaseMemObject(bins);
clReleaseMemObject(bin_assignments);
- if (errors == 0) {
- log_info("add_index_bin_test passed. Each item was put in the correct bin in parallel.\n");
+ if (errors == 0)
+ {
+ log_info("add_index_bin_test passed. Each item was put in the correct "
+ "bin in parallel.\n");
return 0;
- } else {
+ }
+ else
+ {
log_error("add_index_bin_test FAILED: %d errors.\n", errors);
return -1;
}
}
-int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
//===== add_index_bin test
size_t numGlobalThreads = 2048;
- int iteration=0;
+ int iteration = 0;
int err, failed = 0;
- MTdata d = init_genrand( gRandomSeed );
-
- /* Check if atomics are supported. */
- if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
- log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
- free_mtdata( d );
- return 0;
- }
+ MTdata d = init_genrand(gRandomSeed);
+
+ /* Check if atomics are supported. */
+ if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
+ {
+ log_info("Base atomics not supported "
+ "(cl_khr_global_int32_base_atomics). Skipping test.\n");
+ free_mtdata(d);
+ return 0;
+ }
- for(iteration=0; iteration<10; iteration++) {
- log_info("add_index_bin_test with %d elements:\n", (int)numGlobalThreads);
- err = add_index_bin_test(&numGlobalThreads, queue, context, d);
- if (err) {
+ for (iteration = 0; iteration < 10; iteration++)
+ {
+ log_info("add_index_bin_test with %d elements:\n",
+ (int)numGlobalThreads);
+ err = add_index_bin_test(&numGlobalThreads, queue, context, d);
+ if (err)
+ {
failed++;
break;
}
- numGlobalThreads*=2;
+ numGlobalThreads *= 2;
}
- free_mtdata( d );
+ free_mtdata(d);
return failed;
}
-
-
diff --git a/test_conformance/basic/test_arraycopy.cpp b/test_conformance/basic/test_arraycopy.cpp
index 5a352869..d9dbcc1b 100644
--- a/test_conformance/basic/test_arraycopy.cpp
+++ b/test_conformance/basic/test_arraycopy.cpp
@@ -181,9 +181,8 @@ test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue,
}
}
- // Keep track of multiple errors.
- if (error_count != 0)
- err = error_count;
+ // Keep track of multiple errors.
+ if (error_count != 0) err = error_count;
if (err)
log_error("\tCL_MEM_USE_HOST_PTR buffer with kernel copy FAILED\n");
diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp
index 9fbdcb6e..bf3f1552 100644
--- a/test_conformance/basic/test_async_copy2D.cpp
+++ b/test_conformance/basic/test_async_copy2D.cpp
@@ -25,77 +25,81 @@
#include "../../test_common/harness/conversions.h"
#include "procs.h"
-static const char *async_global_to_local_kernel2D =
- "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
- "%s\n" // optional pragma string
- "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
- "%s *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int "
- "lineCopiesPerWorkItem, int srcStride, int dstStride )\n"
- "{\n"
- " int i, j;\n"
- // Zero the local storage first
- " for(i=0; i<lineCopiesPerWorkItem; i++)\n"
- " for(j=0; j<numElementsPerLine; j++)\n"
- " localBuffer[ (get_local_id( 0 "
- ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + dstStride)+j ] = "
- "(%s)(%s)0;\n"
- // Do this to verify all kernels are done zeroing the local buffer before we
- // try the copy
- " barrier( CLK_LOCAL_MEM_FENCE );\n"
- " event_t event;\n"
- " event = async_work_group_copy_2D2D( (__local %s*)localBuffer, "
- "(__global const "
- "%s*)(src+lineCopiesPerWorkgroup*get_group_id(0)*(numElementsPerLine + "
- "srcStride)), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, "
- "srcStride, dstStride, 0 );\n"
- // Wait for the copy to complete, then verify by manually copying to the
- // dest
- " wait_group_events( 1, &event );\n"
- " for(i=0; i<lineCopiesPerWorkItem; i++)\n"
- " for(j=0; j<numElementsPerLine; j++)\n"
- " dst[ (get_global_id( 0 "
- ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + dstStride)+j ] = "
- "localBuffer[ (get_local_id( 0 "
- ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + dstStride)+j ];\n"
- "}\n";
-
-static const char *async_local_to_global_kernel2D =
- "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
- "%s\n" // optional pragma string
- "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
- "%s *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int "
- "lineCopiesPerWorkItem, int srcStride, int dstStride )\n"
- "{\n"
- " int i, j;\n"
- // Zero the local storage first
- " for(i=0; i<lineCopiesPerWorkItem; i++)\n"
- " for(j=0; j<numElementsPerLine; j++)\n"
- " localBuffer[ (get_local_id( 0 "
- ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + srcStride)+j ] = "
- "(%s)(%s)0;\n"
- // Do this to verify all kernels are done zeroing the local buffer before we
- // try the copy
- " barrier( CLK_LOCAL_MEM_FENCE );\n"
- " for(i=0; i<lineCopiesPerWorkItem; i++)\n"
- " for(j=0; j<numElementsPerLine; j++)\n"
- " localBuffer[ (get_local_id( 0 "
- ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + srcStride)+j ] = src[ "
- "(get_global_id( 0 )*lineCopiesPerWorkItem+i)*(numElementsPerLine + "
- "srcStride)+j ];\n"
- // Do this to verify all kernels are done copying to the local buffer before
- // we try the copy
- " barrier( CLK_LOCAL_MEM_FENCE );\n"
- " event_t event;\n"
- " event = async_work_group_copy_2D2D((__global "
- "%s*)(dst+lineCopiesPerWorkgroup*get_group_id(0)*(numElementsPerLine + "
- "dstStride)), (__local const %s*)localBuffer, (size_t)numElementsPerLine, "
- "(size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0 );\n"
- " wait_group_events( 1, &event );\n"
- "}\n";
+static const char *async_global_to_local_kernel2D = R"OpenCLC(
+#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
+%s // optional pragma string
+
+__kernel void test_fn(const __global %s *src, __global %s *dst,
+ __local %s *localBuffer, int numElementsPerLine,
+ int lineCopiesPerWorkgroup, int lineCopiesPerWorkItem,
+ int srcStride, int dstStride) {
+ // Zero the local storage first
+ for (int i = 0; i < lineCopiesPerWorkItem; i++) {
+ for (int j = 0; j < numElementsPerLine; j++) {
+ const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
+ localBuffer[index] = (%s)(%s)0;
+ }
+ }
+
+ // Do this to verify all kernels are done zeroing the local buffer before we
+ // try the copy
+ barrier( CLK_LOCAL_MEM_FENCE );
+ event_t event = async_work_group_copy_2D2D(localBuffer, 0, src,
+ lineCopiesPerWorkgroup * get_group_id(0) * srcStride, sizeof(%s),
+ (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0);
+
+ // Wait for the copy to complete, then verify by manually copying to the dest
+ wait_group_events(1, &event);
+
+ for (int i = 0; i < lineCopiesPerWorkItem; i++) {
+ for (int j = 0; j < numElementsPerLine; j++) {
+ const int local_index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
+ const int global_index = (get_global_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
+ dst[global_index] = localBuffer[local_index];
+ }
+ }
+}
+)OpenCLC";
+
+static const char *async_local_to_global_kernel2D = R"OpenCLC(
+#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
+%s // optional pragma string
+
+__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
+ int numElementsPerLine, int lineCopiesPerWorkgroup,
+ int lineCopiesPerWorkItem, int srcStride, int dstStride) {
+ // Zero the local storage first
+ for (int i = 0; i < lineCopiesPerWorkItem; i++) {
+ for (int j = 0; j < numElementsPerLine; j++) {
+ const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * srcStride + j;
+ localBuffer[index] = (%s)(%s)0;
+ }
+ }
+
+ // Do this to verify all kernels are done zeroing the local buffer before we try the copy
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ for (int i = 0; i < lineCopiesPerWorkItem; i++) {
+ for (int j = 0; j < numElementsPerLine; j++) {
+ const int local_index = (get_local_id(0) * lineCopiesPerWorkItem + i) * srcStride + j;
+ const int global_index = (get_global_id(0)*lineCopiesPerWorkItem + i) * srcStride + j;
+ localBuffer[local_index] = src[global_index];
+ }
+ }
+
+ // Do this to verify all kernels are done copying to the local buffer before we try the copy
+ barrier(CLK_LOCAL_MEM_FENCE);
+ event_t event = async_work_group_copy_2D2D(dst, lineCopiesPerWorkgroup * get_group_id(0) * dstStride,
+ localBuffer, 0, sizeof(%s), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride,
+ dstStride, 0 );
+
+ wait_group_events(1, &event);
+};
+)OpenCLC";
int test_copy2D(cl_device_id deviceID, cl_context context,
cl_command_queue queue, const char *kernelCode,
- ExplicitType vecType, int vecSize, int srcStride, int dstStride,
+ ExplicitType vecType, int vecSize, int srcMargin, int dstMargin,
bool localIsDst)
{
int error;
@@ -114,8 +118,8 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
vecSize);
size_t elementSize = get_explicit_type_size(vecType) * vecSize;
- log_info("Testing %s with srcStride = %d, dstStride = %d\n", vecNameString,
- srcStride, dstStride);
+ log_info("Testing %s with srcMargin = %d, dstMargin = %d\n", vecNameString,
+ srcMargin, dstMargin);
cl_long max_local_mem_size;
error =
@@ -153,7 +157,7 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
: "",
vecNameString, vecNameString, vecNameString, vecNameString,
- get_explicit_type_name(vecType), vecNameString, vecNameString);
+ get_explicit_type_name(vecType), vecNameString);
// log_info("program: %s\n", programSource);
programPtr = programSource;
@@ -180,12 +184,17 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
if (max_workgroup_size > max_local_workgroup_size[0])
max_workgroup_size = max_local_workgroup_size[0];
- size_t numElementsPerLine = 10;
- size_t lineCopiesPerWorkItem = 13;
+ const size_t numElementsPerLine = 10;
+ const cl_int dstStride = numElementsPerLine + dstMargin;
+ const cl_int srcStride = numElementsPerLine + srcMargin;
+
elementSize =
get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
- size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem * elementSize
- * (numElementsPerLine + (localIsDst ? dstStride : srcStride));
+
+ const size_t lineCopiesPerWorkItem = 13;
+ const size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem
+ * elementSize * (localIsDst ? dstStride : srcStride);
+
size_t maxLocalWorkgroupSize =
(((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem);
@@ -199,34 +208,39 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
if (maxLocalWorkgroupSize > max_workgroup_size)
localWorkgroupSize = max_workgroup_size;
- size_t maxTotalLinesIn = (max_alloc_size / elementSize + srcStride)
- / (numElementsPerLine + srcStride);
- size_t maxTotalLinesOut = (max_alloc_size / elementSize + dstStride)
- / (numElementsPerLine + dstStride);
- size_t maxTotalLines = (std::min)(maxTotalLinesIn, maxTotalLinesOut);
- size_t maxLocalWorkgroups =
+
+ const size_t maxTotalLinesIn =
+ (max_alloc_size / elementSize + srcMargin) / srcStride;
+ const size_t maxTotalLinesOut =
+ (max_alloc_size / elementSize + dstMargin) / dstStride;
+ const size_t maxTotalLines = std::min(maxTotalLinesIn, maxTotalLinesOut);
+ const size_t maxLocalWorkgroups =
maxTotalLines / (localWorkgroupSize * lineCopiesPerWorkItem);
- size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem
- - (localIsDst ? dstStride : srcStride);
- size_t numberOfLocalWorkgroups = (std::min)(1111, (int)maxLocalWorkgroups);
- size_t totalLines =
+ const size_t localBufferSize =
+ localWorkgroupSize * localStorageSpacePerWorkitem
+ - (localIsDst ? dstMargin : srcMargin);
+ const size_t numberOfLocalWorkgroups =
+ std::min(1111, (int)maxLocalWorkgroups);
+ const size_t totalLines =
numberOfLocalWorkgroups * localWorkgroupSize * lineCopiesPerWorkItem;
- size_t inBufferSize = elementSize
- * (totalLines * numElementsPerLine + (totalLines - 1) * srcStride);
- size_t outBufferSize = elementSize
- * (totalLines * numElementsPerLine + (totalLines - 1) * dstStride);
- size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize;
+ const size_t inBufferSize = elementSize
+ * (totalLines * numElementsPerLine + (totalLines - 1) * srcMargin);
+ const size_t outBufferSize = elementSize
+ * (totalLines * numElementsPerLine + (totalLines - 1) * dstMargin);
+ const size_t globalWorkgroupSize =
+ numberOfLocalWorkgroups * localWorkgroupSize;
inBuffer = (void *)malloc(inBufferSize);
outBuffer = (void *)malloc(outBufferSize);
outBufferCopy = (void *)malloc(outBufferSize);
- cl_int lineCopiesPerWorkItemInt, numElementsPerLineInt,
- lineCopiesPerWorkgroup;
- lineCopiesPerWorkItemInt = (int)lineCopiesPerWorkItem;
- numElementsPerLineInt = (int)numElementsPerLine;
- lineCopiesPerWorkgroup = (int)(lineCopiesPerWorkItem * localWorkgroupSize);
+ const cl_int lineCopiesPerWorkItemInt =
+ static_cast<cl_int>(lineCopiesPerWorkItem);
+ const cl_int numElementsPerLineInt =
+ static_cast<cl_int>(numElementsPerLine);
+ const cl_int lineCopiesPerWorkgroup =
+ static_cast<cl_int>(lineCopiesPerWorkItem * localWorkgroupSize);
log_info(
"Global: %d, local %d, local buffer %db, global in buffer %db, "
@@ -296,8 +310,8 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
for (int j = 0; j < (int)numElementsPerLine * elementSize;
j += elementSize)
{
- int inIdx = i * (numElementsPerLine + srcStride) + j;
- int outIdx = i * (numElementsPerLine + dstStride) + j;
+ int inIdx = i * srcStride + j;
+ int outIdx = i * dstStride + j;
if (memcmp(((char *)inBuffer) + inIdx, ((char *)outBuffer) + outIdx,
typeSize)
!= 0)
@@ -332,11 +346,10 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
if (i < (int)(globalWorkgroupSize * lineCopiesPerWorkItem - 1)
* elementSize)
{
- int outIdx = i * (numElementsPerLine + dstStride)
- + numElementsPerLine * elementSize;
+ int outIdx = i * dstStride + numElementsPerLine * elementSize;
if (memcmp(((char *)outBuffer) + outIdx,
((char *)outBufferCopy) + outIdx,
- dstStride * elementSize)
+ dstMargin * elementSize)
!= 0)
{
if (failuresPrinted == 0)
@@ -373,9 +386,12 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context,
kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong,
kULong, kFloat, kDouble, kNumExplicitTypes
};
+ // The margins below represent the number of elements between the end of
+ // one line and the start of the next. The strides are equivalent to the
+ // length of the line plus the chosen margin.
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
- unsigned int smallTypesStrideSizes[] = { 0, 10, 100 };
- unsigned int size, typeIndex, srcStride, dstStride;
+ unsigned int smallTypesMarginSizes[] = { 0, 10, 100 };
+ unsigned int size, typeIndex, srcMargin, dstMargin;
int errors = 0;
@@ -401,19 +417,19 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context,
if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
<= 2) // small type
{
- for (srcStride = 0; srcStride < sizeof(smallTypesStrideSizes)
- / sizeof(smallTypesStrideSizes[0]);
- srcStride++)
+ for (srcMargin = 0; srcMargin < sizeof(smallTypesMarginSizes)
+ / sizeof(smallTypesMarginSizes[0]);
+ srcMargin++)
{
- for (dstStride = 0;
- dstStride < sizeof(smallTypesStrideSizes)
- / sizeof(smallTypesStrideSizes[0]);
- dstStride++)
+ for (dstMargin = 0;
+ dstMargin < sizeof(smallTypesMarginSizes)
+ / sizeof(smallTypesMarginSizes[0]);
+ dstMargin++)
{
if (test_copy2D(deviceID, context, queue, kernelCode,
vecType[typeIndex], vecSizes[size],
- smallTypesStrideSizes[srcStride],
- smallTypesStrideSizes[dstStride],
+ smallTypesMarginSizes[srcMargin],
+ smallTypesMarginSizes[dstMargin],
localIsDst))
{
errors++;
diff --git a/test_conformance/basic/test_async_copy3D.cpp b/test_conformance/basic/test_async_copy3D.cpp
index 252159bc..5eb41ebc 100644
--- a/test_conformance/basic/test_async_copy3D.cpp
+++ b/test_conformance/basic/test_async_copy3D.cpp
@@ -25,96 +25,95 @@
#include "../../test_common/harness/conversions.h"
#include "procs.h"
-static const char *async_global_to_local_kernel3D =
- "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
- "%s\n" // optional pragma string
- "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
- "%s *localBuffer, int numElementsPerLine, int numLines, int "
- "planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, "
- "int dstLineStride, int srcPlaneStride, int dstPlaneStride )\n"
- "{\n"
- " int i, j, k;\n"
- // Zero the local storage first
- " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
- " for(j=0; j<numLines; j++)\n"
- " for(k=0; k<numElementsPerLine; k++)\n"
- " localBuffer[ (get_local_id( 0 "
- ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
- "numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
- "dstLineStride) + k ] = (%s)(%s)0;\n"
- // Do this to verify all kernels are done zeroing the local buffer before we
- // try the copy
- " barrier( CLK_LOCAL_MEM_FENCE );\n"
- " event_t event;\n"
- " event = async_work_group_copy_3D3D( (__local %s*)localBuffer, "
- "(__global const "
- "%s*)(src+planesCopiesPerWorkgroup*get_group_id(0)*(numLines*"
- "numElementsPerLine + numLines*srcLineStride + srcPlaneStride)), "
- "(size_t)numElementsPerLine, (size_t)numLines, srcLineStride, "
- "dstLineStride, planesCopiesPerWorkgroup, srcPlaneStride, dstPlaneStride, "
- "0 );\n"
- // Wait for the copy to complete, then verify by manually copying to the
- // dest
- " wait_group_events( 1, &event );\n"
- " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
- " for(j=0; j<numLines; j++)\n"
- " for(k=0; k<numElementsPerLine; k++)\n"
- " dst[ (get_global_id( 0 "
- ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
- "numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
- "dstLineStride) + k ] = localBuffer[ (get_local_id( 0 "
- ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
- "numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
- "dstLineStride) + k ];\n"
- "}\n";
-
-static const char *async_local_to_global_kernel3D =
- "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
- "%s\n" // optional pragma string
- "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
- "%s *localBuffer, int numElementsPerLine, int numLines, int "
- "planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, "
- "int dstLineStride, int srcPlaneStride, int dstPlaneStride )\n"
- "{\n"
- " int i, j, k;\n"
- // Zero the local storage first
- " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
- " for(j=0; j<numLines; j++)\n"
- " for(k=0; k<numElementsPerLine; k++)\n"
- " localBuffer[ (get_local_id( 0 "
- ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
- "numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
- "srcLineStride) + k ] = (%s)(%s)0;\n"
- // Do this to verify all kernels are done zeroing the local buffer before we
- // try the copy
- " barrier( CLK_LOCAL_MEM_FENCE );\n"
- " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
- " for(j=0; j<numLines; j++)\n"
- " for(k=0; k<numElementsPerLine; k++)\n"
- " localBuffer[ (get_local_id( 0 "
- ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
- "numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
- "srcLineStride) + k ] = src[ (get_global_id( 0 "
- ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
- "numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
- "srcLineStride) + k ];\n"
- // Do this to verify all kernels are done copying to the local buffer before
- // we try the copy
- " barrier( CLK_LOCAL_MEM_FENCE );\n"
- " event_t event;\n"
- " event = async_work_group_copy_3D3D((__global "
- "%s*)(dst+planesCopiesPerWorkgroup*get_group_id(0)*(numLines*"
- "numElementsPerLine + numLines*dstLineStride + dstPlaneStride)), (__local "
- "const %s*)localBuffer, (size_t)numElementsPerLine, (size_t)numLines, "
- "srcLineStride, dstLineStride, planesCopiesPerWorkgroup, srcPlaneStride, "
- "dstPlaneStride, 0 );\n"
- " wait_group_events( 1, &event );\n"
- "}\n";
+static const char *async_global_to_local_kernel3D = R"OpenCLC(
+#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
+%s // optional pragma string
+
+__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
+ int numElementsPerLine, int numLines, int planesCopiesPerWorkgroup,
+ int planesCopiesPerWorkItem, int srcLineStride,
+ int dstLineStride, int srcPlaneStride, int dstPlaneStride ) {
+ // Zero the local storage first
+ for (int i = 0; i < planesCopiesPerWorkItem; i++) {
+ for (int j = 0; j < numLines; j++) {
+ for (int k = 0; k < numElementsPerLine; k++) {
+ const int index = (get_local_id(0) * planesCopiesPerWorkItem + i) * dstPlaneStride + j * dstLineStride + k;
+ localBuffer[index] = (%s)(%s)0;
+ }
+ }
+ }
+
+ // Do this to verify all kernels are done zeroing the local buffer before we try the copy
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ event_t event = async_work_group_copy_3D3D(localBuffer, 0, src,
+ planesCopiesPerWorkgroup * get_group_id(0) * srcPlaneStride,
+ sizeof(%s), (size_t)numElementsPerLine, (size_t)numLines,
+ planesCopiesPerWorkgroup, srcLineStride, srcPlaneStride, dstLineStride,
+ dstPlaneStride, 0);
+
+ // Wait for the copy to complete, then verify by manually copying to the dest
+ wait_group_events(1, &event);
+
+ for (int i = 0; i < planesCopiesPerWorkItem; i++) {
+ for (int j = 0; j < numLines; j++) {
+ for(int k = 0; k < numElementsPerLine; k++) {
+ const int local_index = (get_local_id(0) * planesCopiesPerWorkItem + i) * dstPlaneStride + j * dstLineStride + k;
+ const int global_index = (get_global_id(0) * planesCopiesPerWorkItem + i) * dstPlaneStride + j * dstLineStride + k;
+ dst[global_index] = localBuffer[local_index];
+ }
+ }
+ }
+}
+)OpenCLC";
+
+static const char *async_local_to_global_kernel3D = R"OpenCLC(
+#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
+%s // optional pragma string
+
+__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
+ int numElementsPerLine, int numLines, int planesCopiesPerWorkgroup,
+ int planesCopiesPerWorkItem, int srcLineStride,
+ int dstLineStride, int srcPlaneStride, int dstPlaneStride) {
+ // Zero the local storage first
+ for (int i = 0; i < planesCopiesPerWorkItem; i++) {
+ for (int j = 0; j < numLines; j++) {
+ for (int k = 0; k < numElementsPerLine; k++) {
+ const int index = (get_local_id(0) * planesCopiesPerWorkItem + i) * srcPlaneStride + j * srcLineStride + k;
+ localBuffer[index] = (%s)(%s)0;
+ }
+ }
+ }
+
+ // Do this to verify all kernels are done zeroing the local buffer before we try the copy
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ for (int i=0; i < planesCopiesPerWorkItem; i++) {
+ for (int j=0; j < numLines; j++) {
+ for (int k=0; k < numElementsPerLine; k++) {
+ const int local_index = (get_local_id(0) * planesCopiesPerWorkItem + i) * srcPlaneStride + j * srcLineStride + k;
+ const int global_index = (get_global_id(0) * planesCopiesPerWorkItem + i) * srcPlaneStride + j*srcLineStride + k;
+ localBuffer[local_index] = src[global_index];
+ }
+ }
+ }
+
+ // Do this to verify all kernels are done copying to the local buffer before we try the copy
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ event_t event = async_work_group_copy_3D3D(dst,
+ planesCopiesPerWorkgroup * get_group_id(0) * dstPlaneStride, localBuffer, 0,
+ sizeof(%s), (size_t)numElementsPerLine, (size_t)numLines, planesCopiesPerWorkgroup,
+ srcLineStride, srcPlaneStride, dstLineStride, dstPlaneStride, 0);
+
+ wait_group_events(1, &event);
+}
+)OpenCLC";
int test_copy3D(cl_device_id deviceID, cl_context context,
cl_command_queue queue, const char *kernelCode,
- ExplicitType vecType, int vecSize, int srcLineStride,
- int dstLineStride, int srcPlaneStride, int dstPlaneStride,
+ ExplicitType vecType, int vecSize, int srcLineMargin,
+ int dstLineMargin, int srcPlaneMargin, int dstPlaneMargin,
bool localIsDst)
{
int error;
@@ -133,10 +132,10 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
vecSize);
size_t elementSize = get_explicit_type_size(vecType) * vecSize;
- log_info("Testing %s with srcLineStride = %d, dstLineStride = %d, "
- "srcPlaneStride = %d, dstPlaneStride = %d\n",
- vecNameString, srcLineStride, dstLineStride, srcPlaneStride,
- dstPlaneStride);
+ log_info("Testing %s with srcLineMargin = %d, dstLineMargin = %d, "
+ "srcPlaneMargin = %d, dstPlaneMargin = %d\n",
+ vecNameString, srcLineMargin, dstLineMargin, srcPlaneMargin,
+ dstPlaneMargin);
cl_long max_local_mem_size;
error =
@@ -201,16 +200,20 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
if (max_workgroup_size > max_local_workgroup_size[0])
max_workgroup_size = max_local_workgroup_size[0];
- size_t numElementsPerLine = 10;
- size_t numLines = 13;
- size_t planesCopiesPerWorkItem = 2;
+ const size_t numElementsPerLine = 10;
+ const cl_int dstLineStride = numElementsPerLine + dstLineMargin;
+ const cl_int srcLineStride = numElementsPerLine + srcLineMargin;
+
+ const size_t numLines = 13;
+ const cl_int dstPlaneStride = (numLines * dstLineStride) + dstPlaneMargin;
+ const cl_int srcPlaneStride = (numLines * srcLineStride) + srcPlaneMargin;
+
elementSize =
get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
- size_t localStorageSpacePerWorkitem = elementSize
- * (planesCopiesPerWorkItem
- * (numLines * numElementsPerLine
- + numLines * (localIsDst ? dstLineStride : srcLineStride)
- + (localIsDst ? dstPlaneStride : srcPlaneStride)));
+ const size_t planesCopiesPerWorkItem = 2;
+ const size_t localStorageSpacePerWorkitem = elementSize
+ * planesCopiesPerWorkItem
+ * (localIsDst ? dstPlaneStride : srcPlaneStride);
size_t maxLocalWorkgroupSize =
(((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem);
@@ -224,42 +227,41 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
if (maxLocalWorkgroupSize > max_workgroup_size)
localWorkgroupSize = max_workgroup_size;
- size_t maxTotalPlanesIn = ((max_alloc_size / elementSize) + srcPlaneStride)
- / ((numLines * numElementsPerLine + numLines * srcLineStride)
- + srcPlaneStride);
- size_t maxTotalPlanesOut = ((max_alloc_size / elementSize) + dstPlaneStride)
- / ((numLines * numElementsPerLine + numLines * dstLineStride)
- + dstPlaneStride);
- size_t maxTotalPlanes = (std::min)(maxTotalPlanesIn, maxTotalPlanesOut);
- size_t maxLocalWorkgroups =
+ const size_t maxTotalPlanesIn =
+ ((max_alloc_size / elementSize) + srcPlaneMargin) / srcPlaneStride;
+ const size_t maxTotalPlanesOut =
+ ((max_alloc_size / elementSize) + dstPlaneMargin) / dstPlaneStride;
+ const size_t maxTotalPlanes = std::min(maxTotalPlanesIn, maxTotalPlanesOut);
+ const size_t maxLocalWorkgroups =
maxTotalPlanes / (localWorkgroupSize * planesCopiesPerWorkItem);
- size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem
- - (localIsDst ? dstPlaneStride : srcPlaneStride);
- size_t numberOfLocalWorkgroups = (std::min)(1111, (int)maxLocalWorkgroups);
- size_t totalPlanes =
+ const size_t localBufferSize =
+ localWorkgroupSize * localStorageSpacePerWorkitem
+ - (localIsDst ? dstPlaneMargin : srcPlaneMargin);
+ const size_t numberOfLocalWorkgroups =
+ std::min(1111, (int)maxLocalWorkgroups);
+ const size_t totalPlanes =
numberOfLocalWorkgroups * localWorkgroupSize * planesCopiesPerWorkItem;
- size_t inBufferSize = elementSize
- * (totalPlanes
- * (numLines * numElementsPerLine + numLines * srcLineStride)
- + (totalPlanes - 1) * srcPlaneStride);
- size_t outBufferSize = elementSize
- * (totalPlanes
- * (numLines * numElementsPerLine + numLines * dstLineStride)
- + (totalPlanes - 1) * dstPlaneStride);
- size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize;
+ const size_t inBufferSize = elementSize
+ * (totalPlanes * numLines * srcLineStride
+ + (totalPlanes - 1) * srcPlaneMargin);
+ const size_t outBufferSize = elementSize
+ * (totalPlanes * numLines * dstLineStride
+ + (totalPlanes - 1) * dstPlaneMargin);
+ const size_t globalWorkgroupSize =
+ numberOfLocalWorkgroups * localWorkgroupSize;
inBuffer = (void *)malloc(inBufferSize);
outBuffer = (void *)malloc(outBufferSize);
outBufferCopy = (void *)malloc(outBufferSize);
- cl_int planesCopiesPerWorkItemInt, numElementsPerLineInt, numLinesInt,
- planesCopiesPerWorkgroup;
- planesCopiesPerWorkItemInt = (int)planesCopiesPerWorkItem;
- numElementsPerLineInt = (int)numElementsPerLine;
- numLinesInt = (int)numLines;
- planesCopiesPerWorkgroup =
- (int)(planesCopiesPerWorkItem * localWorkgroupSize);
+ const cl_int planesCopiesPerWorkItemInt =
+ static_cast<cl_int>(planesCopiesPerWorkItem);
+ const cl_int numElementsPerLineInt =
+ static_cast<cl_int>(numElementsPerLine);
+ const cl_int numLinesInt = static_cast<cl_int>(numLines);
+ const cl_int planesCopiesPerWorkgroup =
+ static_cast<cl_int>(planesCopiesPerWorkItem * localWorkgroupSize);
log_info("Global: %d, local %d, local buffer %db, global in buffer %db, "
"global out buffer %db, each work group will copy %d planes and "
@@ -336,14 +338,8 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
for (int k = 0; k < (int)numElementsPerLine * elementSize;
k += elementSize)
{
- int inIdx = i
- * (numLines * numElementsPerLine
- + numLines * srcLineStride + srcPlaneStride)
- + j * (numElementsPerLine + srcLineStride) + k;
- int outIdx = i
- * (numLines * numElementsPerLine
- + numLines * dstLineStride + dstPlaneStride)
- + j * (numElementsPerLine + dstLineStride) + k;
+ int inIdx = i * srcPlaneStride + j * srcLineStride + k;
+ int outIdx = i * dstPlaneStride + j * dstLineStride + k;
if (memcmp(((char *)inBuffer) + inIdx,
((char *)outBuffer) + outIdx, typeSize)
!= 0)
@@ -378,14 +374,11 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
}
if (j < (int)numLines * elementSize)
{
- int outIdx = i
- * (numLines * numElementsPerLine
- + numLines * dstLineStride + dstPlaneStride)
- + j * (numElementsPerLine + dstLineStride)
+ int outIdx = i * dstPlaneStride + j * dstLineStride
+ numElementsPerLine * elementSize;
if (memcmp(((char *)outBuffer) + outIdx,
((char *)outBufferCopy) + outIdx,
- dstLineStride * elementSize)
+ dstLineMargin * elementSize)
!= 0)
{
if (failuresPrinted == 0)
@@ -409,14 +402,11 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
if (i < (int)(globalWorkgroupSize * planesCopiesPerWorkItem - 1)
* elementSize)
{
- int outIdx = i
- * (numLines * numElementsPerLine + numLines * dstLineStride
- + dstPlaneStride)
- + (numLines * elementSize) * (numElementsPerLine)
- + (numLines * elementSize) * (dstLineStride);
+ int outIdx =
+ i * dstPlaneStride + numLines * dstLineStride * elementSize;
if (memcmp(((char *)outBuffer) + outIdx,
((char *)outBufferCopy) + outIdx,
- dstPlaneStride * elementSize)
+ dstPlaneMargin * elementSize)
!= 0)
{
if (failuresPrinted == 0)
@@ -453,10 +443,13 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context,
kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong,
kULong, kFloat, kDouble, kNumExplicitTypes
};
+ // The margins below represent the number of elements between the end of
+ // one line or plane and the start of the next. The strides are equivalent
+ // to the size of the line or plane plus the chosen margin.
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
- unsigned int smallTypesStrideSizes[] = { 0, 10, 100 };
- unsigned int size, typeIndex, srcLineStride, dstLineStride, srcPlaneStride,
- dstPlaneStride;
+ unsigned int smallTypesMarginSizes[] = { 0, 10, 100 };
+ unsigned int size, typeIndex, srcLineMargin, dstLineMargin, srcPlaneMargin,
+ dstPlaneMargin;
int errors = 0;
@@ -482,33 +475,33 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context,
if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
<= 2) // small type
{
- for (srcLineStride = 0;
- srcLineStride < sizeof(smallTypesStrideSizes)
- / sizeof(smallTypesStrideSizes[0]);
- srcLineStride++)
+ for (srcLineMargin = 0;
+ srcLineMargin < sizeof(smallTypesMarginSizes)
+ / sizeof(smallTypesMarginSizes[0]);
+ srcLineMargin++)
{
- for (dstLineStride = 0;
- dstLineStride < sizeof(smallTypesStrideSizes)
- / sizeof(smallTypesStrideSizes[0]);
- dstLineStride++)
+ for (dstLineMargin = 0;
+ dstLineMargin < sizeof(smallTypesMarginSizes)
+ / sizeof(smallTypesMarginSizes[0]);
+ dstLineMargin++)
{
- for (srcPlaneStride = 0;
- srcPlaneStride < sizeof(smallTypesStrideSizes)
- / sizeof(smallTypesStrideSizes[0]);
- srcPlaneStride++)
+ for (srcPlaneMargin = 0;
+ srcPlaneMargin < sizeof(smallTypesMarginSizes)
+ / sizeof(smallTypesMarginSizes[0]);
+ srcPlaneMargin++)
{
- for (dstPlaneStride = 0;
- dstPlaneStride < sizeof(smallTypesStrideSizes)
- / sizeof(smallTypesStrideSizes[0]);
- dstPlaneStride++)
+ for (dstPlaneMargin = 0;
+ dstPlaneMargin < sizeof(smallTypesMarginSizes)
+ / sizeof(smallTypesMarginSizes[0]);
+ dstPlaneMargin++)
{
if (test_copy3D(
deviceID, context, queue, kernelCode,
vecType[typeIndex], vecSizes[size],
- smallTypesStrideSizes[srcLineStride],
- smallTypesStrideSizes[dstLineStride],
- smallTypesStrideSizes[srcPlaneStride],
- smallTypesStrideSizes[dstPlaneStride],
+ smallTypesMarginSizes[srcLineMargin],
+ smallTypesMarginSizes[dstLineMargin],
+ smallTypesMarginSizes[srcPlaneMargin],
+ smallTypesMarginSizes[dstPlaneMargin],
localIsDst))
{
errors++;
diff --git a/test_conformance/basic/test_enqueue_map.cpp b/test_conformance/basic/test_enqueue_map.cpp
index 3702726f..d28f7e41 100644
--- a/test_conformance/basic/test_enqueue_map.cpp
+++ b/test_conformance/basic/test_enqueue_map.cpp
@@ -146,7 +146,7 @@ int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command
clMemWrapper memObject;
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
- generate_random_data(kUInt, (unsigned int)(imageSize * imageSize), d,
+ generate_random_data(kUInt, (unsigned int)(imageSize * imageSize * 4), d,
hostPtrData);
memcpy(referenceData, hostPtrData, imageDataSize);
diff --git a/test_conformance/basic/test_enqueued_local_size.cpp b/test_conformance/basic/test_enqueued_local_size.cpp
index f52162a8..ea95df68 100644
--- a/test_conformance/basic/test_enqueued_local_size.cpp
+++ b/test_conformance/basic/test_enqueued_local_size.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -14,42 +14,45 @@
// limitations under the License.
//
#include "harness/compat.h"
+#include "harness/rounding_mode.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
-#include "harness/rounding_mode.h"
+
+#include <algorithm>
#include "procs.h"
-static const char *enqueued_local_size_2d_code =
-"__kernel void test_enqueued_local_size_2d(global int *dst)\n"
-"{\n"
-" if ((get_global_id(0) == 0) && (get_global_id(1) == 0))\n"
-" {\n"
-" dst[0] = (int)get_enqueued_local_size(0)\n;"
-" dst[1] = (int)get_enqueued_local_size(1)\n;"
-" }\n"
-"}\n";
-
-static const char *enqueued_local_size_1d_code =
-"__kernel void test_enqueued_local_size_1d(global int *dst)\n"
-"{\n"
-" int tid_x = get_global_id(0);\n"
-" if (get_global_id(0) == 0)\n"
-" {\n"
-" dst[tid_x] = (int)get_enqueued_local_size(0)\n;"
-" }\n"
-"}\n";
-
-
-static int
-verify_enqueued_local_size(int *result, size_t *expected, int n)
+static const char *enqueued_local_size_2d_code = R"(
+__kernel void test_enqueued_local_size_2d(global int *dst)
+{
+ if ((get_global_id(0) == 0) && (get_global_id(1) == 0))
+ {
+ dst[0] = (int)get_enqueued_local_size(0);
+ dst[1] = (int)get_enqueued_local_size(1);
+ }
+}
+)";
+
+static const char *enqueued_local_size_1d_code = R"(
+__kernel void test_enqueued_local_size_1d(global int *dst)
+{
+ int tid_x = get_global_id(0);
+ if (get_global_id(0) == 0)
+ {
+ dst[tid_x] = (int)get_enqueued_local_size(0);
+ }
+}
+)";
+
+
+static int verify_enqueued_local_size(int *result, size_t *expected, int n)
{
int i;
- for (i=0; i<n; i++)
+ for (i = 0; i < n; i++)
{
if (result[i] != (int)expected[i])
{
@@ -62,14 +65,14 @@ verify_enqueued_local_size(int *result, size_t *expected, int n)
}
-int
-test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_enqueued_local_size(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- cl_mem streams;
- cl_program program[2];
- cl_kernel kernel[2];
+ clMemWrapper stream;
+ clProgramWrapper program[2];
+ clKernelWrapper kernel[2];
- int *output_ptr;
+ cl_int output_ptr[2];
size_t globalsize[2];
size_t localsize[2];
int err;
@@ -95,37 +98,36 @@ test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_que
}
}
- output_ptr = (int*)malloc(2 * sizeof(int));
-
- streams =
- clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * sizeof(int), NULL, &err);
- test_error( err, "clCreateBuffer failed.");
+ stream = clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * sizeof(cl_int),
+ nullptr, &err);
+ test_error(err, "clCreateBuffer failed.");
std::string cl_std = "-cl-std=CL";
cl_std += (get_device_cl_version(device) == Version(3, 0)) ? "3.0" : "2.0";
err = create_single_kernel_helper_with_build_options(
context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code,
"test_enqueued_local_size_1d", cl_std.c_str());
- test_error( err, "create_single_kernel_helper failed");
+ test_error(err, "create_single_kernel_helper failed");
err = create_single_kernel_helper_with_build_options(
context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code,
"test_enqueued_local_size_2d", cl_std.c_str());
- test_error( err, "create_single_kernel_helper failed");
+ test_error(err, "create_single_kernel_helper failed");
- err = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
- test_error( err, "clSetKernelArgs failed.");
- err = clSetKernelArg(kernel[1], 0, sizeof streams, &streams);
- test_error( err, "clSetKernelArgs failed.");
+ err = clSetKernelArg(kernel[0], 0, sizeof stream, &stream);
+ test_error(err, "clSetKernelArgs failed.");
+ err = clSetKernelArg(kernel[1], 0, sizeof stream, &stream);
+ test_error(err, "clSetKernelArgs failed.");
- globalsize[0] = (size_t)num_elements;
- globalsize[1] = (size_t)num_elements;
+ globalsize[0] = static_cast<size_t>(num_elements);
+ globalsize[1] = static_cast<size_t>(num_elements);
size_t max_wgs;
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_wgs), &max_wgs, NULL);
- test_error( err, "clGetDeviceInfo failed.");
+ err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+ sizeof(max_wgs), &max_wgs, nullptr);
+ test_error(err, "clGetDeviceInfo failed.");
- localsize[0] = MIN(16, max_wgs);
- localsize[1] = MIN(11, max_wgs / localsize[0]);
+ localsize[0] = std::min<size_t>(16, max_wgs);
+ localsize[1] = std::min<size_t>(11, max_wgs / localsize[0]);
// If we need to use uniform workgroups because non-uniform workgroups are
// not supported, round up to the next global size that is divisible by the
// local size.
@@ -141,35 +143,31 @@ test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_que
}
}
- err = clEnqueueNDRangeKernel(queue, kernel[1], 2, NULL, globalsize, localsize, 0, NULL, NULL);
- test_error( err, "clEnqueueNDRangeKernel failed.");
+ err = clEnqueueNDRangeKernel(queue, kernel[1], 2, nullptr, globalsize,
+ localsize, 0, nullptr, nullptr);
+ test_error(err, "clEnqueueNDRangeKernel failed.");
- err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
- test_error( err, "clEnqueueReadBuffer failed.");
+ err = clEnqueueReadBuffer(queue, stream, CL_BLOCKING, 0, 2 * sizeof(int),
+ output_ptr, 0, nullptr, nullptr);
+ test_error(err, "clEnqueueReadBuffer failed.");
err = verify_enqueued_local_size(output_ptr, localsize, 2);
- globalsize[0] = (size_t)num_elements;
+ globalsize[0] = static_cast<size_t>(num_elements);
localsize[0] = 9;
if (use_uniform_work_groups && (globalsize[0] % localsize[0]))
{
globalsize[0] += (localsize[0] - (globalsize[0] % localsize[0]));
}
- err = clEnqueueNDRangeKernel(queue, kernel[1], 1, NULL, globalsize, localsize, 0, NULL, NULL);
- test_error( err, "clEnqueueNDRangeKernel failed.");
+ err = clEnqueueNDRangeKernel(queue, kernel[1], 1, nullptr, globalsize,
+ localsize, 0, nullptr, nullptr);
+ test_error(err, "clEnqueueNDRangeKernel failed.");
- err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
- test_error( err, "clEnqueueReadBuffer failed.");
+ err = clEnqueueReadBuffer(queue, stream, CL_BLOCKING, 0, 2 * sizeof(int),
+ output_ptr, 0, nullptr, nullptr);
+ test_error(err, "clEnqueueReadBuffer failed.");
err = verify_enqueued_local_size(output_ptr, localsize, 1);
- // cleanup
- clReleaseMemObject(streams);
- clReleaseKernel(kernel[0]);
- clReleaseKernel(kernel[1]);
- clReleaseProgram(program[0]);
- clReleaseProgram(program[1]);
- free(output_ptr);
-
return err;
}
diff --git a/test_conformance/basic/test_fpmath_float.cpp b/test_conformance/basic/test_fpmath_float.cpp
index 6e5deb4b..60d509b0 100644
--- a/test_conformance/basic/test_fpmath_float.cpp
+++ b/test_conformance/basic/test_fpmath_float.cpp
@@ -49,8 +49,6 @@ static const char *fpmul_kernel_code =
"}\n";
-static const float MAX_ERR = 1e-5f;
-
static int
verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n)
{
diff --git a/test_conformance/basic/test_hiloeo.cpp b/test_conformance/basic/test_hiloeo.cpp
index 4cdf2ac7..3470ad00 100644
--- a/test_conformance/basic/test_hiloeo.cpp
+++ b/test_conformance/basic/test_hiloeo.cpp
@@ -43,8 +43,6 @@ static const unsigned int out_vector_idx[] = { 0, 0, 1, 1, 3, 4};
// input type name is strcat(gentype, vector_size_names[i]);
// and output type name is
// strcat(gentype, vector_size_names[out_vector_idx[i]]);
-static const int size_to_idx[] = {-1,0,1,2,3,-1,-1,-1,4,
- -1,-1,-1,-1,-1,-1,-1,5};
static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"};
static const size_t kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
diff --git a/test_conformance/basic/test_hostptr.cpp b/test_conformance/basic/test_hostptr.cpp
index 65af5c3c..dee78675 100644
--- a/test_conformance/basic/test_hostptr.cpp
+++ b/test_conformance/basic/test_hostptr.cpp
@@ -32,8 +32,6 @@ const char *hostptr_kernel_code =
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
-static const float MAX_ERR = 1e-5f;
-
static int verify_hostptr(cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n)
{
cl_float r;
diff --git a/test_conformance/basic/test_multireadimageonefmt.cpp b/test_conformance/basic/test_multireadimageonefmt.cpp
index b37c8414..c230e67a 100644
--- a/test_conformance/basic/test_multireadimageonefmt.cpp
+++ b/test_conformance/basic/test_multireadimageonefmt.cpp
@@ -153,14 +153,14 @@ int test_mri_one(cl_device_id device, cl_context context, cl_command_queue queue
err = clSetKernelArg(kernel, 0, sizeof i, &i);
err |= clSetKernelArg(kernel, 1, sizeof err, &err);
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
- for (i=0; i<8; i++)
- err |= clSetKernelArg(kernel, 3+i, sizeof streams[i], &streams[i]);
+ for (i = 0; i < 8; i++)
+ err |= clSetKernelArg(kernel, 3 + i, sizeof streams[i], &streams[i]);
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
+ if (err != CL_SUCCESS)
+ {
+ log_error("clSetKernelArgs failed\n");
+ return -1;
+ }
threads[0] = (unsigned int)img_width;
threads[1] = (unsigned int)img_height;
@@ -182,15 +182,13 @@ int test_mri_one(cl_device_id device, cl_context context, cl_command_queue queue
// cleanup
clReleaseSampler(sampler);
- for (i=0; i<8; i++)
- clReleaseMemObject(streams[i]);
+ for (i = 0; i < 8; i++) clReleaseMemObject(streams[i]);
clReleaseKernel(kernel);
clReleaseProgram(program);
- for (i=0; i<7; i++)
- free(input_ptr[i]);
- free(output_ptr);
+ for (i = 0; i < 7; i++) free(input_ptr[i]);
+ free(output_ptr);
- return err;
+ return err;
}
diff --git a/test_conformance/basic/test_preprocessors.cpp b/test_conformance/basic/test_preprocessors.cpp
index 2038d150..e67487eb 100644
--- a/test_conformance/basic/test_preprocessors.cpp
+++ b/test_conformance/basic/test_preprocessors.cpp
@@ -97,10 +97,10 @@ int test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context context, c
char programSource[4096];
char curFileName[512];
char *programPtr = programSource;
- int i = 0;
snprintf(curFileName, 512, "%s", __FILE__);
#ifdef _WIN32
// Replace "\" with "\\"
+ int i = 0;
while(curFileName[i] != '\0') {
if (curFileName[i] == '\\') {
int j = i + 1;
diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp
index 62c0a6be..e202d276 100644
--- a/test_conformance/basic/test_progvar.cpp
+++ b/test_conformance/basic/test_progvar.cpp
@@ -15,12 +15,13 @@
//
#include "harness/compat.h"
-// Bug: Missing in spec: atomic_intptr_t is always supported if device is 32-bits.
+// Bug: Missing in spec: atomic_intptr_t is always supported if device is
+// 32-bits.
// Bug: Missing in spec: CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE
#define FLUSH fflush(stdout)
-#define MAX_STR 16*1024
+#define MAX_STR 16 * 1024
#define ALIGNMENT 128
@@ -66,7 +67,11 @@ static int l_host_is_big_endian = 1;
static size_t l_max_global_id0 = 0;
static cl_bool l_linker_available = false;
-#define check_error(errCode,msg,...) ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n", ## __VA_ARGS__, __FILE__, __LINE__), 1) : 0)
+#define check_error(errCode, msg, ...) \
+ ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n", \
+ ##__VA_ARGS__, __FILE__, __LINE__), \
+ 1) \
+ : 0)
////////////////////
// Info about types we can use for program scope variables.
@@ -75,110 +80,135 @@ static cl_bool l_linker_available = false;
class TypeInfo {
public:
- TypeInfo() :
- name(""),
- m_buf_elem_type(""),
- m_is_vecbase(false),
- m_is_atomic(false),
- m_is_like_size_t(false),
- m_is_bool(false),
- m_elem_type(0), m_num_elem(0),
- m_size(0),
- m_value_size(0)
- {}
- TypeInfo(const char* name_arg) :
- name(name_arg),
- m_buf_elem_type(name_arg),
- m_is_vecbase(false),
- m_is_atomic(false),
- m_is_like_size_t(false),
- m_is_bool(false),
- m_elem_type(0), m_num_elem(0),
- m_size(0),
- m_value_size(0)
- { }
+ TypeInfo()
+ : name(""), m_buf_elem_type(""), m_is_vecbase(false),
+ m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false),
+ m_elem_type(0), m_num_elem(0), m_size(0), m_value_size(0)
+ {}
+ TypeInfo(const char* name_arg)
+ : name(name_arg), m_buf_elem_type(name_arg), m_is_vecbase(false),
+ m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false),
+ m_elem_type(0), m_num_elem(0), m_size(0), m_value_size(0)
+ {}
// Vectors
- TypeInfo( TypeInfo* elem_type, int num_elem ) :
- m_is_vecbase(false),
- m_is_atomic(false),
- m_is_like_size_t(false),
- m_is_bool(false),
- m_elem_type(elem_type),
- m_num_elem(num_elem)
- {
- char the_name[10]; // long enough for longest vector type name "double16"
- snprintf(the_name,sizeof(the_name),"%s%d",elem_type->get_name_c_str(),m_num_elem);
+ TypeInfo(TypeInfo* elem_type, int num_elem)
+ : m_is_vecbase(false), m_is_atomic(false), m_is_like_size_t(false),
+ m_is_bool(false), m_elem_type(elem_type), m_num_elem(num_elem)
+ {
+ char
+ the_name[10]; // long enough for longest vector type name "double16"
+ snprintf(the_name, sizeof(the_name), "%s%d",
+ elem_type->get_name_c_str(), m_num_elem);
this->name = std::string(the_name);
this->m_buf_elem_type = std::string(the_name);
this->m_value_size = num_elem * elem_type->get_size();
- if ( m_num_elem == 3 ) {
+ if (m_num_elem == 3)
+ {
this->m_size = 4 * elem_type->get_size();
- } else {
+ }
+ else
+ {
this->m_size = num_elem * elem_type->get_size();
}
}
const std::string& get_name(void) const { return name; }
const char* get_name_c_str(void) const { return name.c_str(); }
- TypeInfo& set_vecbase(void) { this->m_is_vecbase = true; return *this; }
- TypeInfo& set_atomic(void) { this->m_is_atomic = true; return *this; }
- TypeInfo& set_like_size_t(void) {
+ TypeInfo& set_vecbase(void)
+ {
+ this->m_is_vecbase = true;
+ return *this;
+ }
+ TypeInfo& set_atomic(void)
+ {
+ this->m_is_atomic = true;
+ return *this;
+ }
+ TypeInfo& set_like_size_t(void)
+ {
this->m_is_like_size_t = true;
- this->set_size( l_64bit_device ? 8 : 4 );
+ this->set_size(l_64bit_device ? 8 : 4);
this->m_buf_elem_type = l_64bit_device ? "ulong" : "uint";
return *this;
}
- TypeInfo& set_bool(void) { this->m_is_bool = true; return *this; }
- TypeInfo& set_size(size_t n) { this->m_value_size = this->m_size = n; return *this; }
- TypeInfo& set_buf_elem_type( const char* name ) { this->m_buf_elem_type = std::string(name); return *this; }
+ TypeInfo& set_bool(void)
+ {
+ this->m_is_bool = true;
+ return *this;
+ }
+ TypeInfo& set_size(size_t n)
+ {
+ this->m_value_size = this->m_size = n;
+ return *this;
+ }
+ TypeInfo& set_buf_elem_type(const char* name)
+ {
+ this->m_buf_elem_type = std::string(name);
+ return *this;
+ }
const TypeInfo* elem_type(void) const { return m_elem_type; }
int num_elem(void) const { return m_num_elem; }
- bool is_vecbase(void) const {return m_is_vecbase;}
- bool is_atomic(void) const {return m_is_atomic;}
- bool is_atomic_64bit(void) const {return m_is_atomic && m_size == 8;}
- bool is_like_size_t(void) const {return m_is_like_size_t;}
- bool is_bool(void) const {return m_is_bool;}
- size_t get_size(void) const {return m_size;}
- size_t get_value_size(void) const {return m_value_size;}
+ bool is_vecbase(void) const { return m_is_vecbase; }
+ bool is_atomic(void) const { return m_is_atomic; }
+ bool is_atomic_64bit(void) const { return m_is_atomic && m_size == 8; }
+ bool is_like_size_t(void) const { return m_is_like_size_t; }
+ bool is_bool(void) const { return m_is_bool; }
+ size_t get_size(void) const { return m_size; }
+ size_t get_value_size(void) const { return m_value_size; }
// When passing values of this type to a kernel, what buffer type
// should be used?
- const char* get_buf_elem_type(void) const { return m_buf_elem_type.c_str(); }
+ const char* get_buf_elem_type(void) const
+ {
+ return m_buf_elem_type.c_str();
+ }
- std::string as_string(const cl_uchar* value_ptr) const {
+ std::string as_string(const cl_uchar* value_ptr) const
+ {
// This method would be shorter if I had a real handle to element
// vector type.
- if ( this->is_bool() ) {
- std::string result( name );
+ if (this->is_bool())
+ {
+ std::string result(name);
result += "<";
result += (*value_ptr ? "true" : "false");
result += ", ";
char buf[10];
- sprintf(buf,"%02x",*value_ptr);
+ sprintf(buf, "%02x", *value_ptr);
result += buf;
result += ">";
return result;
- } else if ( this->num_elem() ) {
- std::string result( name );
+ }
+ else if (this->num_elem())
+ {
+ std::string result(name);
result += "<";
- for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) {
+ for (unsigned ielem = 0; ielem < this->num_elem(); ielem++)
+ {
char buf[MAX_STR];
- if ( ielem ) result += ", ";
- for ( unsigned ibyte = 0; ibyte < this->m_elem_type->get_size() ; ibyte++ ) {
- sprintf(buf + 2*ibyte,"%02x", value_ptr[ ielem * this->m_elem_type->get_size() + ibyte ] );
+ if (ielem) result += ", ";
+ for (unsigned ibyte = 0; ibyte < this->m_elem_type->get_size();
+ ibyte++)
+ {
+ sprintf(buf + 2 * ibyte, "%02x",
+ value_ptr[ielem * this->m_elem_type->get_size()
+ + ibyte]);
}
result += buf;
}
result += ">";
return result;
- } else {
- std::string result( name );
+ }
+ else
+ {
+ std::string result(name);
result += "<";
char buf[MAX_STR];
- for ( unsigned ibyte = 0; ibyte < this->get_size() ; ibyte++ ) {
- sprintf(buf + 2*ibyte,"%02x", value_ptr[ ibyte ] );
+ for (unsigned ibyte = 0; ibyte < this->get_size(); ibyte++)
+ {
+ sprintf(buf + 2 * ibyte, "%02x", value_ptr[ibyte]);
}
result += buf;
result += ">";
@@ -189,51 +219,71 @@ public:
// Initialize the given buffer to a constant value initialized as if it
// were from the INIT_VAR macro below.
// Only needs to support values 0 and 1.
- void init( cl_uchar* buf, cl_uchar val) const {
- if ( this->num_elem() ) {
- for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) {
+ void init(cl_uchar* buf, cl_uchar val) const
+ {
+ if (this->num_elem())
+ {
+ for (unsigned ielem = 0; ielem < this->num_elem(); ielem++)
+ {
// Delegate!
- this->init_elem( buf + ielem * this->get_value_size()/this->num_elem(), val );
+ this->init_elem(
+ buf + ielem * this->get_value_size() / this->num_elem(),
+ val);
}
- } else {
- init_elem( buf, val );
+ }
+ else
+ {
+ init_elem(buf, val);
}
}
private:
- void init_elem( cl_uchar* buf, cl_uchar val ) const {
- size_t elem_size = this->num_elem() ? this->get_value_size()/this->num_elem() : this->get_size();
- memset(buf,0,elem_size);
- if ( val ) {
- if ( strstr( name.c_str(), "float" ) ) {
+ void init_elem(cl_uchar* buf, cl_uchar val) const
+ {
+ size_t elem_size = this->num_elem()
+ ? this->get_value_size() / this->num_elem()
+ : this->get_size();
+ memset(buf, 0, elem_size);
+ if (val)
+ {
+ if (strstr(name.c_str(), "float"))
+ {
*(float*)buf = (float)val;
return;
}
- if ( strstr( name.c_str(), "double" ) ) {
+ if (strstr(name.c_str(), "double"))
+ {
*(double*)buf = (double)val;
return;
}
- if ( this->is_bool() ) { *buf = (bool)val; return; }
+ if (this->is_bool())
+ {
+ *buf = (bool)val;
+ return;
+ }
// Write a single character value to the correct spot,
// depending on host endianness.
- if ( l_host_is_big_endian ) *(buf + elem_size-1) = (cl_uchar)val;
- else *buf = (cl_uchar)val;
+ if (l_host_is_big_endian)
+ *(buf + elem_size - 1) = (cl_uchar)val;
+ else
+ *buf = (cl_uchar)val;
}
}
-public:
- void dump(FILE* fp) const {
- fprintf(fp,"Type %s : <%d,%d,%s> ", name.c_str(),
- (int)m_size,
- (int)m_value_size,
- m_buf_elem_type.c_str() );
- if ( this->m_elem_type ) fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(), this->num_elem() );
- if ( this->m_is_vecbase ) fprintf(fp, " vecbase");
- if ( this->m_is_bool ) fprintf(fp, " bool");
- if ( this->m_is_like_size_t ) fprintf(fp, " like-size_t");
- if ( this->m_is_atomic ) fprintf(fp, " atomic");
- fprintf(fp,"\n");
+public:
+ void dump(FILE* fp) const
+ {
+ fprintf(fp, "Type %s : <%d,%d,%s> ", name.c_str(), (int)m_size,
+ (int)m_value_size, m_buf_elem_type.c_str());
+ if (this->m_elem_type)
+ fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(),
+ this->num_elem());
+ if (this->m_is_vecbase) fprintf(fp, " vecbase");
+ if (this->m_is_bool) fprintf(fp, " bool");
+ if (this->m_is_like_size_t) fprintf(fp, " like-size_t");
+ if (this->m_is_atomic) fprintf(fp, " atomic");
+ fprintf(fp, "\n");
fflush(fp);
}
@@ -246,7 +296,8 @@ private:
bool m_is_like_size_t;
bool m_is_bool;
size_t m_size; // Number of bytes of storage occupied by this type.
- size_t m_value_size; // Number of bytes of value significant for this type. Differs for vec3.
+ size_t m_value_size; // Number of bytes of value significant for this type.
+ // Differs for vec3.
// When passing values of this type to a kernel, what buffer type
// should be used?
@@ -256,46 +307,65 @@ private:
};
-#define NUM_SCALAR_TYPES (8+2) // signed and unsigned integral types, float and double
-#define NUM_VECTOR_SIZES (5) // 2,3,4,8,16
-#define NUM_PLAIN_TYPES \
- 5 /*boolean and size_t family */ \
- + NUM_SCALAR_TYPES \
- + NUM_SCALAR_TYPES*NUM_VECTOR_SIZES \
- + 10 /* atomic types */
+#define NUM_SCALAR_TYPES \
+ (8 + 2) // signed and unsigned integral types, float and double
+#define NUM_VECTOR_SIZES (5) // 2,3,4,8,16
+#define NUM_PLAIN_TYPES \
+ 5 /*boolean and size_t family */ \
+ + NUM_SCALAR_TYPES + NUM_SCALAR_TYPES* NUM_VECTOR_SIZES \
+ + 10 /* atomic types */
// Need room for plain, array, pointer, struct
-#define MAX_TYPES (4*NUM_PLAIN_TYPES)
+#define MAX_TYPES (4 * NUM_PLAIN_TYPES)
static TypeInfo type_info[MAX_TYPES];
static int num_type_info = 0; // Number of valid entries in type_info[]
-
-
// A helper class to form kernel source arguments for clCreateProgramWithSource.
class StringTable {
public:
- StringTable() : m_c_strs(NULL), m_lengths(NULL), m_frozen(false), m_strings() {}
+ StringTable(): m_c_strs(NULL), m_lengths(NULL), m_frozen(false), m_strings()
+ {}
~StringTable() { release_frozen(); }
- void add(std::string s) { release_frozen(); m_strings.push_back(s); }
+ void add(std::string s)
+ {
+ release_frozen();
+ m_strings.push_back(s);
+ }
- const size_t num_str() { freeze(); return m_strings.size(); }
- const char** strs() { freeze(); return m_c_strs; }
- const size_t* lengths() { freeze(); return m_lengths; }
+ const size_t num_str()
+ {
+ freeze();
+ return m_strings.size();
+ }
+ const char** strs()
+ {
+ freeze();
+ return m_c_strs;
+ }
+ const size_t* lengths()
+ {
+ freeze();
+ return m_lengths;
+ }
private:
- void freeze(void) {
- if ( !m_frozen ) {
+ void freeze(void)
+ {
+ if (!m_frozen)
+ {
release_frozen();
- m_c_strs = (const char**) malloc(sizeof(const char*) * m_strings.size());
- m_lengths = (size_t*) malloc(sizeof(size_t) * m_strings.size());
- assert( m_c_strs );
- assert( m_lengths );
+ m_c_strs =
+ (const char**)malloc(sizeof(const char*) * m_strings.size());
+ m_lengths = (size_t*)malloc(sizeof(size_t) * m_strings.size());
+ assert(m_c_strs);
+ assert(m_lengths);
- for ( size_t i = 0; i < m_strings.size() ; i++ ) {
+ for (size_t i = 0; i < m_strings.size(); i++)
+ {
m_c_strs[i] = m_strings[i].c_str();
m_lengths[i] = strlen(m_c_strs[i]);
}
@@ -303,9 +373,18 @@ private:
m_frozen = true;
}
}
- void release_frozen(void) {
- if ( m_c_strs ) { free(m_c_strs); m_c_strs = 0; }
- if ( m_lengths ) { free(m_lengths); m_lengths = 0; }
+ void release_frozen(void)
+ {
+ if (m_c_strs)
+ {
+ free(m_c_strs);
+ m_c_strs = 0;
+ }
+ if (m_lengths)
+ {
+ free(m_lengths);
+ m_lengths = 0;
+ }
m_frozen = false;
}
@@ -325,11 +404,15 @@ static const char* l_get_fp64_pragma(void);
static const char* l_get_cles_int64_pragma(void);
static int l_build_type_table(cl_device_id device);
-static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret);
+static int l_get_device_info(cl_device_id device, size_t* max_size_ret,
+ size_t* pref_size_ret);
-static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state );
-static int l_compare( const cl_uchar* expected, const cl_uchar* received, unsigned num_values, const TypeInfo&ti );
-static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti );
+static void l_set_randomly(cl_uchar* buf, size_t buf_size,
+ RandomSeed& rand_state);
+static int l_compare(const cl_uchar* expected, const cl_uchar* received,
+ unsigned num_values, const TypeInfo& ti);
+static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src,
+ unsigned src_idx, const TypeInfo& ti);
static std::string conversion_functions(const TypeInfo& ti);
static std::string global_decls(const TypeInfo& ti, bool with_init);
@@ -337,90 +420,123 @@ static std::string global_check_function(const TypeInfo& ti);
static std::string writer_function(const TypeInfo& ti);
static std::string reader_function(const TypeInfo& ti);
-static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue );
-static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state );
+static int l_write_read(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+static int l_write_read_for_type(cl_device_id device, cl_context context,
+ cl_command_queue queue, const TypeInfo& ti,
+ RandomSeed& rand_state);
-static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue );
-static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state );
-
-static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size );
-static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size, bool separate_compilation );
+static int l_init_write_read(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+static int l_init_write_read_for_type(cl_device_id device, cl_context context,
+ cl_command_queue queue,
+ const TypeInfo& ti,
+ RandomSeed& rand_state);
+static int l_capacity(cl_device_id device, cl_context context,
+ cl_command_queue queue, size_t max_size);
+static int l_user_type(cl_device_id device, cl_context context,
+ cl_command_queue queue, size_t max_size,
+ bool separate_compilation);
////////////////////
// File scope function definitions
-static cl_int print_build_log(cl_program program, cl_uint num_devices, cl_device_id *device_list, cl_uint count, const char **strings, const size_t *lengths, const char* options)
+static cl_int print_build_log(cl_program program, cl_uint num_devices,
+ cl_device_id* device_list, cl_uint count,
+ const char** strings, const size_t* lengths,
+ const char* options)
{
cl_uint i;
cl_int error;
BufferOwningPtr<cl_device_id> devices;
- if(num_devices == 0 || device_list == NULL)
+ if (num_devices == 0 || device_list == NULL)
{
- error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(num_devices), &num_devices, NULL);
+ error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
+ sizeof(num_devices), &num_devices, NULL);
test_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
- device_list = (cl_device_id*)malloc(sizeof(cl_device_id)*num_devices);
+ device_list = (cl_device_id*)malloc(sizeof(cl_device_id) * num_devices);
devices.reset(device_list);
memset(device_list, 0, sizeof(cl_device_id) * num_devices);
- error = clGetProgramInfo(program, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * num_devices, device_list, NULL);
+ error = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
+ sizeof(cl_device_id) * num_devices,
+ device_list, NULL);
test_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
}
cl_uint z;
bool sourcePrinted = false;
- for(z = 0; z < num_devices; z++)
+ for (z = 0; z < num_devices; z++)
{
char deviceName[4096] = "";
- error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
- check_error(error, "Device \"%d\" failed to return a name. clGetDeviceInfo CL_DEVICE_NAME failed", z);
+ error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME,
+ sizeof(deviceName), deviceName, NULL);
+ check_error(error,
+ "Device \"%d\" failed to return a name. clGetDeviceInfo "
+ "CL_DEVICE_NAME failed",
+ z);
cl_build_status buildStatus;
- error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
- check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
+ error = clGetProgramBuildInfo(program, device_list[z],
+ CL_PROGRAM_BUILD_STATUS,
+ sizeof(buildStatus), &buildStatus, NULL);
+ check_error(error,
+ "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
- if(buildStatus != CL_BUILD_SUCCESS)
+ if (buildStatus != CL_BUILD_SUCCESS)
{
- if(!sourcePrinted)
+ if (!sourcePrinted)
{
log_error("Build options: %s\n", options);
- if(count && strings)
+ if (count && strings)
{
log_error("Original source is: ------------\n");
- for(i = 0; i < count; i++) log_error("%s", strings[i]);
+ for (i = 0; i < count; i++) log_error("%s", strings[i]);
}
sourcePrinted = true;
}
char statusString[64] = "";
if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
- sprintf(statusString, "CL_BUILD_SUCCESS");
+ sprintf(statusString, "CL_BUILD_SUCCESS");
else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
- sprintf(statusString, "CL_BUILD_NONE");
+ sprintf(statusString, "CL_BUILD_NONE");
else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
- sprintf(statusString, "CL_BUILD_ERROR");
+ sprintf(statusString, "CL_BUILD_ERROR");
else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
- sprintf(statusString, "CL_BUILD_IN_PROGRESS");
+ sprintf(statusString, "CL_BUILD_IN_PROGRESS");
else
- sprintf(statusString, "UNKNOWN (%d)", buildStatus);
+ sprintf(statusString, "UNKNOWN (%d)", buildStatus);
- log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
+ log_error("Build not successful for device \"%s\", status: %s\n",
+ deviceName, statusString);
size_t paramSize = 0;
- error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, 0, NULL, &paramSize);
- if(check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed")) break;
+ error = clGetProgramBuildInfo(program, device_list[z],
+ CL_PROGRAM_BUILD_LOG, 0, NULL,
+ &paramSize);
+ if (check_error(
+ error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed"))
+ break;
std::string log;
- log.resize(paramSize/sizeof(char));
-
- error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, paramSize, &log[0], NULL);
- if(check_error(error, "Device %d (%s) failed to return a build log", z, deviceName)) break;
- if(log[0] == 0) log_error("clGetProgramBuildInfo returned an empty log.\n");
+ log.resize(paramSize / sizeof(char));
+
+ error = clGetProgramBuildInfo(program, device_list[z],
+ CL_PROGRAM_BUILD_LOG, paramSize,
+ &log[0], NULL);
+ if (check_error(error,
+ "Device %d (%s) failed to return a build log", z,
+ deviceName))
+ break;
+ if (log[0] == 0)
+ log_error("clGetProgramBuildInfo returned an empty log.\n");
else
{
log_error("Build log:\n", deviceName);
@@ -433,25 +549,29 @@ static cl_int print_build_log(cl_program program, cl_uint num_devices, cl_device
static void l_load_abilities(cl_device_id device)
{
- l_has_half = is_extension_available(device,"cl_khr_fp16");
- l_has_double = is_extension_available(device,"cl_khr_fp64");
- l_has_cles_int64 = is_extension_available(device,"cles_khr_int64");
+ l_has_half = is_extension_available(device, "cl_khr_fp16");
+ l_has_double = is_extension_available(device, "cl_khr_fp64");
+ l_has_cles_int64 = is_extension_available(device, "cles_khr_int64");
- l_has_int64_atomics
- = is_extension_available(device,"cl_khr_int64_base_atomics")
- && is_extension_available(device,"cl_khr_int64_extended_atomics");
+ l_has_int64_atomics =
+ is_extension_available(device, "cl_khr_int64_base_atomics")
+ && is_extension_available(device, "cl_khr_int64_extended_atomics");
{
int status = CL_SUCCESS;
cl_uint addr_bits = 32;
- status = clGetDeviceInfo(device,CL_DEVICE_ADDRESS_BITS,sizeof(addr_bits),&addr_bits,0);
- l_64bit_device = ( status == CL_SUCCESS && addr_bits == 64 );
+ status = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS,
+ sizeof(addr_bits), &addr_bits, 0);
+ l_64bit_device = (status == CL_SUCCESS && addr_bits == 64);
}
// 32-bit devices always have intptr atomics.
l_has_intptr_atomics = !l_64bit_device || l_has_int64_atomics;
- union { char c[4]; int i; } probe;
+ union {
+ char c[4];
+ int i;
+ } probe;
probe.i = 1;
l_host_is_big_endian = !probe.c[0];
@@ -459,33 +579,40 @@ static void l_load_abilities(cl_device_id device)
{
int status = CL_SUCCESS;
cl_uint max_dim = 0;
- status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(max_dim),&max_dim,0);
- assert( status == CL_SUCCESS );
- assert( max_dim > 0 );
+ status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
+ sizeof(max_dim), &max_dim, 0);
+ assert(status == CL_SUCCESS);
+ assert(max_dim > 0);
size_t max_id[3];
max_id[0] = 0;
- status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,max_dim*sizeof(size_t),&max_id[0],0);
- assert( status == CL_SUCCESS );
+ status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+ max_dim * sizeof(size_t), &max_id[0], 0);
+ assert(status == CL_SUCCESS);
l_max_global_id0 = max_id[0];
}
{ // Is separate compilation supported?
int status = CL_SUCCESS;
l_linker_available = false;
- status = clGetDeviceInfo(device,CL_DEVICE_LINKER_AVAILABLE,sizeof(l_linker_available),&l_linker_available,0);
- assert( status == CL_SUCCESS );
+ status =
+ clGetDeviceInfo(device, CL_DEVICE_LINKER_AVAILABLE,
+ sizeof(l_linker_available), &l_linker_available, 0);
+ assert(status == CL_SUCCESS);
}
}
static const char* l_get_fp64_pragma(void)
{
- return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" : "";
+ return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+ : "";
}
static const char* l_get_cles_int64_pragma(void)
{
- return l_has_cles_int64 ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n" : "";
+ return l_has_cles_int64
+ ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n"
+ : "";
}
static const char* l_get_int64_atomic_pragma(void)
@@ -500,89 +627,83 @@ static int l_build_type_table(cl_device_id device)
size_t iscalar = 0;
size_t ivecsize = 0;
int vecsizes[] = { 2, 3, 4, 8, 16 };
- const char* vecbase[] = {
- "uchar", "char",
- "ushort", "short",
- "uint", "int",
- "ulong", "long",
- "float",
- "double"
- };
- int vecbase_size[] = {
- 1, 1,
- 2, 2,
- 4, 4,
- 8, 8,
- 4,
- 8
- };
- const char* like_size_t[] = {
- "intptr_t",
- "uintptr_t",
- "size_t",
- "ptrdiff_t"
- };
+ const char* vecbase[] = { "uchar", "char", "ushort", "short", "uint",
+ "int", "ulong", "long", "float", "double" };
+ int vecbase_size[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
+ const char* like_size_t[] = { "intptr_t", "uintptr_t", "size_t",
+ "ptrdiff_t" };
const char* atomics[] = {
- "atomic_int", "atomic_uint",
- "atomic_long", "atomic_ulong",
- "atomic_float",
- "atomic_double",
- };
- int atomics_size[] = {
- 4, 4,
- 8, 8,
- 4,
- 8
- };
- const char* intptr_atomics[] = {
- "atomic_intptr_t",
- "atomic_uintptr_t",
- "atomic_size_t",
- "atomic_ptrdiff_t"
+ "atomic_int", "atomic_uint", "atomic_long",
+ "atomic_ulong", "atomic_float", "atomic_double",
};
+ int atomics_size[] = { 4, 4, 8, 8, 4, 8 };
+ const char* intptr_atomics[] = { "atomic_intptr_t", "atomic_uintptr_t",
+ "atomic_size_t", "atomic_ptrdiff_t" };
l_load_abilities(device);
num_type_info = 0;
// Boolean.
- type_info[ num_type_info++ ] = TypeInfo( "bool" ).set_bool().set_size(1).set_buf_elem_type("uchar");
+ type_info[num_type_info++] =
+ TypeInfo("bool").set_bool().set_size(1).set_buf_elem_type("uchar");
// Vector types, and the related scalar element types.
- for ( iscalar=0; iscalar < sizeof(vecbase)/sizeof(vecbase[0]) ; ++iscalar ) {
- if ( !gHasLong && strstr(vecbase[iscalar],"long") ) continue;
- if ( !l_has_double && strstr(vecbase[iscalar],"double") ) continue;
+ for (iscalar = 0; iscalar < sizeof(vecbase) / sizeof(vecbase[0]); ++iscalar)
+ {
+ if (!gHasLong && strstr(vecbase[iscalar], "long")) continue;
+ if (!l_has_double && strstr(vecbase[iscalar], "double")) continue;
// Scalar
TypeInfo* elem_type = type_info + num_type_info++;
- *elem_type = TypeInfo( vecbase[iscalar] ).set_vecbase().set_size( vecbase_size[iscalar] );
+ *elem_type = TypeInfo(vecbase[iscalar])
+ .set_vecbase()
+ .set_size(vecbase_size[iscalar]);
// Vector
- for ( ivecsize=0; ivecsize < sizeof(vecsizes)/sizeof(vecsizes[0]) ; ivecsize++ ) {
- type_info[ num_type_info++ ] = TypeInfo( elem_type, vecsizes[ivecsize] );
+ for (ivecsize = 0; ivecsize < sizeof(vecsizes) / sizeof(vecsizes[0]);
+ ivecsize++)
+ {
+ type_info[num_type_info++] =
+ TypeInfo(elem_type, vecsizes[ivecsize]);
}
}
// Size_t-like types
- for ( iscalar=0; iscalar < sizeof(like_size_t)/sizeof(like_size_t[0]) ; ++iscalar ) {
- type_info[ num_type_info++ ] = TypeInfo( like_size_t[iscalar] ).set_like_size_t();
+ for (iscalar = 0; iscalar < sizeof(like_size_t) / sizeof(like_size_t[0]);
+ ++iscalar)
+ {
+ type_info[num_type_info++] =
+ TypeInfo(like_size_t[iscalar]).set_like_size_t();
}
// Atomic types.
- for ( iscalar=0; iscalar < sizeof(atomics)/sizeof(atomics[0]) ; ++iscalar ) {
- if ( !l_has_int64_atomics && strstr(atomics[iscalar],"long") ) continue;
- if ( !(l_has_int64_atomics && l_has_double) && strstr(atomics[iscalar],"double") ) continue;
+ for (iscalar = 0; iscalar < sizeof(atomics) / sizeof(atomics[0]); ++iscalar)
+ {
+ if (!l_has_int64_atomics && strstr(atomics[iscalar], "long")) continue;
+ if (!(l_has_int64_atomics && l_has_double)
+ && strstr(atomics[iscalar], "double"))
+ continue;
// The +7 is used to skip over the "atomic_" prefix.
const char* buf_type = atomics[iscalar] + 7;
- type_info[ num_type_info++ ] = TypeInfo( atomics[iscalar] ).set_atomic().set_size( atomics_size[iscalar] ).set_buf_elem_type( buf_type );
+ type_info[num_type_info++] = TypeInfo(atomics[iscalar])
+ .set_atomic()
+ .set_size(atomics_size[iscalar])
+ .set_buf_elem_type(buf_type);
}
- if ( l_has_intptr_atomics ) {
- for ( iscalar=0; iscalar < sizeof(intptr_atomics)/sizeof(intptr_atomics[0]) ; ++iscalar ) {
- type_info[ num_type_info++ ] = TypeInfo( intptr_atomics[iscalar] ).set_atomic().set_like_size_t();
+ if (l_has_intptr_atomics)
+ {
+ for (iscalar = 0;
+ iscalar < sizeof(intptr_atomics) / sizeof(intptr_atomics[0]);
+ ++iscalar)
+ {
+ type_info[num_type_info++] = TypeInfo(intptr_atomics[iscalar])
+ .set_atomic()
+ .set_like_size_t();
}
}
- assert( num_type_info <= MAX_TYPES ); // or increase MAX_TYPES
+ assert(num_type_info <= MAX_TYPES); // or increase MAX_TYPES
#if 0
for ( size_t i = 0 ; i < num_type_info ; i++ ) {
@@ -594,7 +715,7 @@ static int l_build_type_table(cl_device_id device)
return status;
}
-static const TypeInfo& l_find_type( const char* name )
+static const TypeInfo& l_find_type(const char* name)
{
auto itr =
std::find_if(type_info, type_info + num_type_info,
@@ -604,36 +725,54 @@ static const TypeInfo& l_find_type( const char* name )
}
+// Populate return parameters for max program variable size, preferred program
+// variable size.
-// Populate return parameters for max program variable size, preferred program variable size.
-
-static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret)
+static int l_get_device_info(cl_device_id device, size_t* max_size_ret,
+ size_t* pref_size_ret)
{
int err = CL_SUCCESS;
size_t return_size = 0;
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, sizeof(*max_size_ret), max_size_ret, &return_size);
- if ( err != CL_SUCCESS ) {
- log_error("Error: Failed to get device info for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n");
+ err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE,
+ sizeof(*max_size_ret), max_size_ret, &return_size);
+ if (err != CL_SUCCESS)
+ {
+ log_error("Error: Failed to get device info for "
+ "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n");
return err;
}
- if ( return_size != sizeof(size_t) ) {
- log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size );
+ if (return_size != sizeof(size_t))
+ {
+ log_error("Error: Invalid size %d returned for "
+ "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n",
+ (int)return_size);
return 1;
}
- if ( return_size != sizeof(size_t) ) {
- log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size );
+ if (return_size != sizeof(size_t))
+ {
+ log_error("Error: Invalid size %d returned for "
+ "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n",
+ (int)return_size);
return 1;
}
return_size = 0;
- err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, sizeof(*pref_size_ret), pref_size_ret, &return_size);
- if ( err != CL_SUCCESS ) {
- log_error("Error: Failed to get device info for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n",err);
+ err =
+ clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE,
+ sizeof(*pref_size_ret), pref_size_ret, &return_size);
+ if (err != CL_SUCCESS)
+ {
+ log_error("Error: Failed to get device info for "
+ "CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n",
+ err);
return err;
}
- if ( return_size != sizeof(size_t) ) {
- log_error("Error: Invalid size %d returned for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n", (int)return_size );
+ if (return_size != sizeof(size_t))
+ {
+ log_error("Error: Invalid size %d returned for "
+ "CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n",
+ (int)return_size);
return 1;
}
@@ -641,11 +780,13 @@ static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t*
}
-static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state )
+static void l_set_randomly(cl_uchar* buf, size_t buf_size,
+ RandomSeed& rand_state)
{
- assert( 0 == (buf_size % sizeof(cl_uint) ) );
- for ( size_t i = 0; i < buf_size ; i += sizeof(cl_uint) ) {
- *( (cl_uint*)(buf + i) ) = genrand_int32( rand_state );
+ assert(0 == (buf_size % sizeof(cl_uint)));
+ for (size_t i = 0; i < buf_size; i += sizeof(cl_uint))
+ {
+ *((cl_uint*)(buf + i)) = genrand_int32(rand_state);
}
#if 0
for ( size_t i = 0; i < buf_size ; i++ ) {
@@ -657,20 +798,23 @@ static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_sta
// Return num_value values of the given type.
// Returns CL_SUCCESS if they compared as equal.
-static int l_compare( const char* test_name, const cl_uchar* expected, const cl_uchar* received, size_t num_values, const TypeInfo&ti )
+static int l_compare(const char* test_name, const cl_uchar* expected,
+ const cl_uchar* received, size_t num_values,
+ const TypeInfo& ti)
{
// Compare only the valid returned bytes.
- for ( unsigned value_idx = 0; value_idx < num_values; value_idx++ ) {
+ for (unsigned value_idx = 0; value_idx < num_values; value_idx++)
+ {
const cl_uchar* expv = expected + value_idx * ti.get_size();
const cl_uchar* gotv = received + value_idx * ti.get_size();
- if ( memcmp( expv, gotv, ti.get_value_size() ) ) {
- std::string exp_str = ti.as_string( expv );
- std::string got_str = ti.as_string( gotv );
- log_error("Error: %s test for type %s, at index %d: Expected %s got %s\n",
- test_name,
- ti.get_name_c_str(), value_idx,
- exp_str.c_str(),
- got_str.c_str() );
+ if (memcmp(expv, gotv, ti.get_value_size()))
+ {
+ std::string exp_str = ti.as_string(expv);
+ std::string got_str = ti.as_string(gotv);
+ log_error(
+ "Error: %s test for type %s, at index %d: Expected %s got %s\n",
+ test_name, ti.get_name_c_str(), value_idx, exp_str.c_str(),
+ got_str.c_str());
return 1;
}
}
@@ -678,11 +822,12 @@ static int l_compare( const char* test_name, const cl_uchar* expected, const cl_
}
// Copy a target value from src[idx] to dest[idx]
-static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti )
+static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src,
+ unsigned src_idx, const TypeInfo& ti)
{
- cl_uchar* raw_dest = dest + dest_idx * ti.get_size();
- const cl_uchar* raw_src = src + src_idx * ti.get_size();
- memcpy( raw_dest, raw_src, ti.get_value_size() );
+ cl_uchar* raw_dest = dest + dest_idx * ti.get_size();
+ const cl_uchar* raw_src = src + src_idx * ti.get_size();
+ memcpy(raw_dest, raw_src, ti.get_value_size());
return 0;
}
@@ -694,59 +839,70 @@ static std::string conversion_functions(const TypeInfo& ti)
static char buf[MAX_STR];
int num_printed = 0;
// The atomic types just use the base type.
- if ( ti.is_atomic() || 0 == strcmp( ti.get_buf_elem_type(), ti.get_name_c_str() ) ) {
+ if (ti.is_atomic()
+ || 0 == strcmp(ti.get_buf_elem_type(), ti.get_name_c_str()))
+ {
// The type is represented in a buffer by itself.
- num_printed = snprintf(buf,MAX_STR,
- "%s from_buf(%s a) { return a; }\n"
- "%s to_buf(%s a) { return a; }\n",
- ti.get_buf_elem_type(), ti.get_buf_elem_type(),
- ti.get_buf_elem_type(), ti.get_buf_elem_type() );
- } else {
+ num_printed = snprintf(buf, MAX_STR,
+ "%s from_buf(%s a) { return a; }\n"
+ "%s to_buf(%s a) { return a; }\n",
+ ti.get_buf_elem_type(), ti.get_buf_elem_type(),
+ ti.get_buf_elem_type(), ti.get_buf_elem_type());
+ }
+ else
+ {
// Just use C-style cast.
- num_printed = snprintf(buf,MAX_STR,
- "%s from_buf(%s a) { return (%s)a; }\n"
- "%s to_buf(%s a) { return (%s)a; }\n",
- ti.get_name_c_str(), ti.get_buf_elem_type(), ti.get_name_c_str(),
- ti.get_buf_elem_type(), ti.get_name_c_str(), ti.get_buf_elem_type() );
+ num_printed = snprintf(buf, MAX_STR,
+ "%s from_buf(%s a) { return (%s)a; }\n"
+ "%s to_buf(%s a) { return (%s)a; }\n",
+ ti.get_name_c_str(), ti.get_buf_elem_type(),
+ ti.get_name_c_str(), ti.get_buf_elem_type(),
+ ti.get_name_c_str(), ti.get_buf_elem_type());
}
// Add initializations.
- if ( ti.is_atomic() ) {
- num_printed += snprintf( buf + num_printed, MAX_STR-num_printed,
- "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n" );
- } else {
+ if (ti.is_atomic())
+ {
+ num_printed += snprintf(buf + num_printed, MAX_STR - num_printed,
+ "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n");
+ }
+ else
+ {
// This cast works even if the target type is a vector type.
- num_printed += snprintf( buf + num_printed, MAX_STR-num_printed,
- "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str());
+ num_printed +=
+ snprintf(buf + num_printed, MAX_STR - num_printed,
+ "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str());
}
- assert( num_printed < MAX_STR ); // or increase MAX_STR
+ assert(num_printed < MAX_STR); // or increase MAX_STR
result = buf;
return result;
}
-static std::string global_decls(const TypeInfo& ti, bool with_init )
+static std::string global_decls(const TypeInfo& ti, bool with_init)
{
const char* tn = ti.get_name_c_str();
const char* vol = (ti.is_atomic() ? " volatile " : " ");
static char decls[MAX_STR];
int num_printed = 0;
- if ( with_init ) {
- const char *decls_template_with_init =
+ if (with_init)
+ {
+ const char* decls_template_with_init =
"%s %s var = INIT_VAR(0);\n"
"global %s %s g_var = INIT_VAR(1);\n"
"%s %s a_var[2] = { INIT_VAR(1), INIT_VAR(1) };\n"
"volatile global %s %s* p_var = &a_var[1];\n\n";
- num_printed = snprintf(decls,sizeof(decls),decls_template_with_init,
- vol,tn,vol,tn,vol,tn,vol,tn);
- } else {
- const char *decls_template_no_init =
- "%s %s var;\n"
- "global %s %s g_var;\n"
- "%s %s a_var[2];\n"
- "global %s %s* p_var;\n\n";
- num_printed = snprintf(decls,sizeof(decls),decls_template_no_init,
- vol,tn,vol,tn,vol,tn,vol,tn);
- }
- assert( num_printed < sizeof(decls) );
+ num_printed = snprintf(decls, sizeof(decls), decls_template_with_init,
+ vol, tn, vol, tn, vol, tn, vol, tn);
+ }
+ else
+ {
+ const char* decls_template_no_init = "%s %s var;\n"
+ "global %s %s g_var;\n"
+ "%s %s a_var[2];\n"
+ "global %s %s* p_var;\n\n";
+ num_printed = snprintf(decls, sizeof(decls), decls_template_no_init,
+ vol, tn, vol, tn, vol, tn, vol, tn);
+ }
+ assert(num_printed < sizeof(decls));
return std::string(decls);
}
@@ -761,18 +917,26 @@ static std::string global_check_function(const TypeInfo& ti)
// all() should only be used on vector inputs. For scalar comparison, the
// result of the equality operator can be used as a bool value.
- const bool is_scalar = ti.num_elem() == 0; // 0 is used to represent scalar types, not 1.
+ const bool is_scalar =
+ ti.num_elem() == 0; // 0 is used to represent scalar types, not 1.
const std::string is_equality_true = is_scalar ? "" : "all";
std::string code = "kernel void global_check(global int* out) {\n";
code += " const " + type_name + " zero = ((" + type_name + ")0);\n";
code += " bool status = true;\n";
- if (ti.is_atomic()) {
- code += " status &= " + is_equality_true + "(atomic_load(&var) == zero);\n";
- code += " status &= " + is_equality_true + "(atomic_load(&g_var) == zero);\n";
- code += " status &= " + is_equality_true + "(atomic_load(&a_var[0]) == zero);\n";
- code += " status &= " + is_equality_true + "(atomic_load(&a_var[1]) == zero);\n";
- } else {
+ if (ti.is_atomic())
+ {
+ code += " status &= " + is_equality_true
+ + "(atomic_load(&var) == zero);\n";
+ code += " status &= " + is_equality_true
+ + "(atomic_load(&g_var) == zero);\n";
+ code += " status &= " + is_equality_true
+ + "(atomic_load(&a_var[0]) == zero);\n";
+ code += " status &= " + is_equality_true
+ + "(atomic_load(&a_var[1]) == zero);\n";
+ }
+ else
+ {
code += " status &= " + is_equality_true + "(var == zero);\n";
code += " status &= " + is_equality_true + "(g_var == zero);\n";
code += " status &= " + is_equality_true + "(a_var[0] == zero);\n";
@@ -792,7 +956,8 @@ static std::string writer_function(const TypeInfo& ti)
{
static char writer_src[MAX_STR];
int num_printed = 0;
- if ( !ti.is_atomic() ) {
+ if (!ti.is_atomic())
+ {
const char* writer_template_normal =
"kernel void writer( global %s* src, uint idx ) {\n"
" var = from_buf(src[0]);\n"
@@ -801,8 +966,11 @@ static std::string writer_function(const TypeInfo& ti)
" a_var[1] = from_buf(src[3]);\n"
" p_var = a_var + idx;\n"
"}\n\n";
- num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_normal,ti.get_buf_elem_type());
- } else {
+ num_printed = snprintf(writer_src, sizeof(writer_src),
+ writer_template_normal, ti.get_buf_elem_type());
+ }
+ else
+ {
const char* writer_template_atomic =
"kernel void writer( global %s* src, uint idx ) {\n"
" atomic_store( &var, from_buf(src[0]) );\n"
@@ -811,9 +979,10 @@ static std::string writer_function(const TypeInfo& ti)
" atomic_store( &a_var[1], from_buf(src[3]) );\n"
" p_var = a_var + idx;\n"
"}\n\n";
- num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_atomic,ti.get_buf_elem_type());
+ num_printed = snprintf(writer_src, sizeof(writer_src),
+ writer_template_atomic, ti.get_buf_elem_type());
}
- assert( num_printed < sizeof(writer_src) );
+ assert(num_printed < sizeof(writer_src));
std::string result = writer_src;
return result;
}
@@ -826,7 +995,8 @@ static std::string reader_function(const TypeInfo& ti)
{
static char reader_src[MAX_STR];
int num_printed = 0;
- if ( !ti.is_atomic() ) {
+ if (!ti.is_atomic())
+ {
const char* reader_template_normal =
"kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
" *p_var = from_buf(ptr_write_val);\n"
@@ -835,8 +1005,12 @@ static std::string reader_function(const TypeInfo& ti)
" dest[2] = to_buf(a_var[0]);\n"
" dest[3] = to_buf(a_var[1]);\n"
"}\n\n";
- num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_normal,ti.get_buf_elem_type(),ti.get_buf_elem_type());
- } else {
+ num_printed =
+ snprintf(reader_src, sizeof(reader_src), reader_template_normal,
+ ti.get_buf_elem_type(), ti.get_buf_elem_type());
+ }
+ else
+ {
const char* reader_template_atomic =
"kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
" atomic_store( p_var, from_buf(ptr_write_val) );\n"
@@ -845,40 +1019,53 @@ static std::string reader_function(const TypeInfo& ti)
" dest[2] = to_buf( atomic_load( &a_var[0] ) );\n"
" dest[3] = to_buf( atomic_load( &a_var[1] ) );\n"
"}\n\n";
- num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_atomic,ti.get_buf_elem_type(),ti.get_buf_elem_type());
+ num_printed =
+ snprintf(reader_src, sizeof(reader_src), reader_template_atomic,
+ ti.get_buf_elem_type(), ti.get_buf_elem_type());
}
- assert( num_printed < sizeof(reader_src) );
+ assert(num_printed < sizeof(reader_src));
std::string result = reader_src;
return result;
}
// Check that all globals where appropriately default-initialized.
-static int check_global_initialization(cl_context context, cl_program program, cl_command_queue queue)
+static int check_global_initialization(cl_context context, cl_program program,
+ cl_command_queue queue)
{
int status = CL_SUCCESS;
// Create a buffer on device to store a unique integer.
cl_int is_init_valid = 0;
- clMemWrapper buffer(clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(is_init_valid), &is_init_valid, &status));
+ clMemWrapper buffer(
+ clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+ sizeof(is_init_valid), &is_init_valid, &status));
test_error_ret(status, "Failed to allocate buffer", status);
// Create, setup and invoke kernel.
- clKernelWrapper global_check(clCreateKernel(program, "global_check", &status));
+ clKernelWrapper global_check(
+ clCreateKernel(program, "global_check", &status));
test_error_ret(status, "Failed to create global_check kernel", status);
status = clSetKernelArg(global_check, 0, sizeof(cl_mem), &buffer);
- test_error_ret(status, "Failed to set up argument for the global_check kernel", status);
+ test_error_ret(status,
+ "Failed to set up argument for the global_check kernel",
+ status);
const cl_uint work_dim = 1;
const size_t global_work_offset[] = { 0 };
const size_t global_work_size[] = { 1 };
- status = clEnqueueNDRangeKernel(queue, global_check, work_dim, global_work_offset, global_work_size, nullptr, 0, nullptr, nullptr);
+ status = clEnqueueNDRangeKernel(queue, global_check, work_dim,
+ global_work_offset, global_work_size,
+ nullptr, 0, nullptr, nullptr);
test_error_ret(status, "Failed to run global_check kernel", status);
status = clFinish(queue);
test_error_ret(status, "clFinish() failed", status);
// Read back the memory buffer from the device.
- status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(is_init_valid), &is_init_valid, 0, nullptr, nullptr);
+ status =
+ clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(is_init_valid),
+ &is_init_valid, 0, nullptr, nullptr);
test_error_ret(status, "Failed to read buffer from device", status);
- if (is_init_valid == 0) {
+ if (is_init_valid == 0)
+ {
log_error("Unexpected default values were detected");
return 1;
}
@@ -887,58 +1074,75 @@ static int check_global_initialization(cl_context context, cl_program program, c
}
// Check write-then-read.
-static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue )
+static int l_write_read(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
int status = CL_SUCCESS;
int itype;
- RandomSeed rand_state( gRandomSeed );
+ RandomSeed rand_state(gRandomSeed);
- for ( itype = 0; itype < num_type_info ; itype++ ) {
- status = status | l_write_read_for_type(device,context,queue,type_info[itype], rand_state );
+ for (itype = 0; itype < num_type_info; itype++)
+ {
+ status = status
+ | l_write_read_for_type(device, context, queue, type_info[itype],
+ rand_state);
FLUSH;
}
return status;
}
-static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state )
+static int l_write_read_for_type(cl_device_id device, cl_context context,
+ cl_command_queue queue, const TypeInfo& ti,
+ RandomSeed& rand_state)
{
int err = CL_SUCCESS;
- std::string type_name( ti.get_name() );
+ std::string type_name(ti.get_name());
const char* tn = type_name.c_str();
- log_info(" %s ",tn);
+ log_info(" %s ", tn);
StringTable ksrc;
- ksrc.add( l_get_fp64_pragma() );
- ksrc.add( l_get_cles_int64_pragma() );
- if (ti.is_atomic_64bit())
- ksrc.add( l_get_int64_atomic_pragma() );
- ksrc.add( conversion_functions(ti) );
- ksrc.add( global_decls(ti,false) );
- ksrc.add( global_check_function(ti) );
- ksrc.add( writer_function(ti) );
- ksrc.add( reader_function(ti) );
+ ksrc.add(l_get_fp64_pragma());
+ ksrc.add(l_get_cles_int64_pragma());
+ if (ti.is_atomic_64bit()) ksrc.add(l_get_int64_atomic_pragma());
+ ksrc.add(conversion_functions(ti));
+ ksrc.add(global_decls(ti, false));
+ ksrc.add(global_check_function(ti));
+ ksrc.add(writer_function(ti));
+ ksrc.add(reader_function(ti));
int status = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper writer;
- status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS);
- test_error_ret(status,"Failed to create program for read-after-write test",status);
+ status = create_single_kernel_helper_with_build_options(
+ context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer",
+ OPTIONS);
+ test_error_ret(status, "Failed to create program for read-after-write test",
+ status);
- clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
- test_error_ret(status,"Failed to create reader kernel for read-after-write test",status);
+ clKernelWrapper reader(clCreateKernel(program, "reader", &status));
+ test_error_ret(status,
+ "Failed to create reader kernel for read-after-write test",
+ status);
// Check size query.
size_t used_bytes = 0;
- status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
- test_error_ret(status,"Failed to query global variable total size",status);
- size_t expected_used_bytes =
- (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements.
- + ( l_64bit_device ? 8 : 4 ); // The pointer
- if ( used_bytes < expected_used_bytes ) {
- log_error("Error program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes );
+ status = clGetProgramBuildInfo(program, device,
+ CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+ sizeof(used_bytes), &used_bytes, 0);
+ test_error_ret(status, "Failed to query global variable total size",
+ status);
+ size_t expected_used_bytes = (NUM_TESTED_VALUES - 1)
+ * ti.get_size() // Two regular variables and an array of 2 elements.
+ + (l_64bit_device ? 8 : 4); // The pointer
+ if (used_bytes < expected_used_bytes)
+ {
+ log_error("Error program query for global variable total size query "
+ "failed: Expected at least %llu but got %llu\n",
+ (unsigned long long)expected_used_bytes,
+ (unsigned long long)used_bytes);
err |= 1;
}
@@ -951,90 +1155,131 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
- clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) );
- test_error_ret(status,"Failed to allocate write buffer",status);
- clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) );
- test_error_ret(status,"Failed to allocate read buffer",status);
+ clMemWrapper write_mem(clCreateBuffer(
+ context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status));
+ test_error_ret(status, "Failed to allocate write buffer", status);
+ clMemWrapper read_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+ read_data_size, read_data, &status));
+ test_error_ret(status, "Failed to allocate read buffer", status);
- status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
- status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status);
+ status = clSetKernelArg(writer, 0, sizeof(cl_mem), &write_mem);
+ test_error_ret(status, "set arg", status);
+ status = clSetKernelArg(reader, 0, sizeof(cl_mem), &read_mem);
+ test_error_ret(status, "set arg", status);
// Boolean random data needs to be massaged a bit more.
- const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS;
+ const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES) : NUM_ROUNDS;
unsigned bool_iter = 0;
- for ( int iround = 0; iround < num_rounds ; iround++ ) {
- for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer
+ for (int iround = 0; iround < num_rounds; iround++)
+ {
+ for (cl_uint iptr_idx = 0; iptr_idx < 2; iptr_idx++)
+ { // Index into array, to write via pointer
// Generate new random data to push through.
- // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
+ // Generate 5 * 128 bytes all the time, even though the test for
+ // many types use less than all that.
- cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0);
+ cl_uchar* write_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0,
+ 0, 0, 0);
- if ( ti.is_bool() ) {
+ if (ti.is_bool())
+ {
// For boolean, random data cast to bool isn't very random.
// So use the bottom bit of bool_value_iter to get true
// diversity.
- for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) {
- write_data[value_idx] = (1<<value_idx) & bool_iter;
- //printf(" %s", (write_data[value_idx] ? "true" : "false" ));
+ for (unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES;
+ value_idx++)
+ {
+ write_data[value_idx] = (1 << value_idx) & bool_iter;
+ // printf(" %s", (write_data[value_idx] ? "true" : "false"
+ // ));
}
bool_iter++;
- } else {
- l_set_randomly( write_data, write_data_size, rand_state );
}
- status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
+ else
+ {
+ l_set_randomly(write_data, write_data_size, rand_state);
+ }
+ status = clSetKernelArg(writer, 1, sizeof(cl_uint), &iptr_idx);
+ test_error_ret(status, "set arg", status);
// The value to write via the pointer should be taken from the
// 5th typed slot of the write_data.
- status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
+ status = clSetKernelArg(
+ reader, 1, ti.get_size(),
+ write_data + (NUM_TESTED_VALUES - 1) * ti.get_size());
+ test_error_ret(status, "set arg", status);
// Determine the expected values.
cl_uchar expected[read_data_size];
- memset( expected, -1, sizeof(expected) );
- l_copy( expected, 0, write_data, 0, ti );
- l_copy( expected, 1, write_data, 1, ti );
- l_copy( expected, 2, write_data, 2, ti );
- l_copy( expected, 3, write_data, 3, ti );
- // But we need to take into account the value from the pointer write.
- // The 2 represents where the "a" array values begin in our read-back.
- l_copy( expected, 2 + iptr_idx, write_data, 4, ti );
+ memset(expected, -1, sizeof(expected));
+ l_copy(expected, 0, write_data, 0, ti);
+ l_copy(expected, 1, write_data, 1, ti);
+ l_copy(expected, 2, write_data, 2, ti);
+ l_copy(expected, 3, write_data, 3, ti);
+ // But we need to take into account the value from the pointer
+ // write. The 2 represents where the "a" array values begin in our
+ // read-back.
+ l_copy(expected, 2 + iptr_idx, write_data, 4, ti);
clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
- if ( ti.is_bool() ) {
+ if (ti.is_bool())
+ {
// Collapse down to one bit.
- for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i];
+ for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
+ expected[i] = (bool)expected[i];
}
- cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
+ cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
+ 0, 0);
memset(read_data, -1, read_data_size);
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
// Now run the kernel
const size_t one = 1;
- status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
- status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
- status = clFinish(queue); test_error_ret(status,"finish",status);
-
- read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
-
- if ( ti.is_bool() ) {
+ status =
+ clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0, 0, 0);
+ test_error_ret(status, "enqueue writer", status);
+ status =
+ clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
+ test_error_ret(status, "enqueue reader", status);
+ status = clFinish(queue);
+ test_error_ret(status, "finish", status);
+
+ read_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
+ 0, 0);
+
+ if (ti.is_bool())
+ {
// Collapse down to one bit.
- for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) read_data[i] = (bool)read_data[i];
+ for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
+ read_data[i] = (bool)read_data[i];
}
// Compare only the valid returned bytes.
- int compare_result = l_compare( "read-after-write", expected, read_data, NUM_TESTED_VALUES-1, ti );
- // log_info("Compared %d values each of size %llu. Result %d\n", NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(), compare_result );
+ int compare_result =
+ l_compare("read-after-write", expected, read_data,
+ NUM_TESTED_VALUES - 1, ti);
+ // log_info("Compared %d values each of size %llu. Result %d\n",
+ // NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(),
+ // compare_result );
err |= compare_result;
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
- if ( err ) break;
+ if (err) break;
}
}
- if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+ if (CL_SUCCESS == err)
+ {
+ log_info("OK\n");
+ FLUSH;
+ }
align_free(write_data);
align_free(read_data);
return err;
@@ -1042,74 +1287,97 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
// Check initialization, then, read, then write, then read.
-static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue )
+static int l_init_write_read(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
int status = CL_SUCCESS;
int itype;
- RandomSeed rand_state( gRandomSeed );
+ RandomSeed rand_state(gRandomSeed);
- for ( itype = 0; itype < num_type_info ; itype++ ) {
- status = status | l_init_write_read_for_type(device,context,queue,type_info[itype], rand_state );
+ for (itype = 0; itype < num_type_info; itype++)
+ {
+ status = status
+ | l_init_write_read_for_type(device, context, queue,
+ type_info[itype], rand_state);
}
return status;
}
-static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state )
+static int l_init_write_read_for_type(cl_device_id device, cl_context context,
+ cl_command_queue queue,
+ const TypeInfo& ti,
+ RandomSeed& rand_state)
{
int err = CL_SUCCESS;
- std::string type_name( ti.get_name() );
+ std::string type_name(ti.get_name());
const char* tn = type_name.c_str();
- log_info(" %s ",tn);
+ log_info(" %s ", tn);
StringTable ksrc;
- ksrc.add( l_get_fp64_pragma() );
- ksrc.add( l_get_cles_int64_pragma() );
- if (ti.is_atomic_64bit())
- ksrc.add( l_get_int64_atomic_pragma() );
- ksrc.add( conversion_functions(ti) );
- ksrc.add( global_decls(ti,true) );
- ksrc.add( writer_function(ti) );
- ksrc.add( reader_function(ti) );
+ ksrc.add(l_get_fp64_pragma());
+ ksrc.add(l_get_cles_int64_pragma());
+ if (ti.is_atomic_64bit()) ksrc.add(l_get_int64_atomic_pragma());
+ ksrc.add(conversion_functions(ti));
+ ksrc.add(global_decls(ti, true));
+ ksrc.add(writer_function(ti));
+ ksrc.add(reader_function(ti));
int status = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper writer;
- status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS);
- test_error_ret(status,"Failed to create program for init-read-after-write test",status);
+ status = create_single_kernel_helper_with_build_options(
+ context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer",
+ OPTIONS);
+ test_error_ret(status,
+ "Failed to create program for init-read-after-write test",
+ status);
- clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
- test_error_ret(status,"Failed to create reader kernel for init-read-after-write test",status);
+ clKernelWrapper reader(clCreateKernel(program, "reader", &status));
+ test_error_ret(
+ status, "Failed to create reader kernel for init-read-after-write test",
+ status);
// Check size query.
size_t used_bytes = 0;
- status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
- test_error_ret(status,"Failed to query global variable total size",status);
- size_t expected_used_bytes =
- (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements.
- + ( l_64bit_device ? 8 : 4 ); // The pointer
- if ( used_bytes < expected_used_bytes ) {
- log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes );
+ status = clGetProgramBuildInfo(program, device,
+ CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+ sizeof(used_bytes), &used_bytes, 0);
+ test_error_ret(status, "Failed to query global variable total size",
+ status);
+ size_t expected_used_bytes = (NUM_TESTED_VALUES - 1)
+ * ti.get_size() // Two regular variables and an array of 2 elements.
+ + (l_64bit_device ? 8 : 4); // The pointer
+ if (used_bytes < expected_used_bytes)
+ {
+ log_error("Error: program query for global variable total size query "
+ "failed: Expected at least %llu but got %llu\n",
+ (unsigned long long)expected_used_bytes,
+ (unsigned long long)used_bytes);
err |= 1;
}
// We need to create 5 random values of the given type,
// and read 4 of them back.
const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16);
- const size_t read_data_size = (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16);
+ const size_t read_data_size = (NUM_TESTED_VALUES - 1) * sizeof(cl_ulong16);
cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
- clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) );
- test_error_ret(status,"Failed to allocate write buffer",status);
- clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) );
- test_error_ret(status,"Failed to allocate read buffer",status);
-
- status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
- status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status);
+ clMemWrapper write_mem(clCreateBuffer(
+ context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status));
+ test_error_ret(status, "Failed to allocate write buffer", status);
+ clMemWrapper read_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+ read_data_size, read_data, &status));
+ test_error_ret(status, "Failed to allocate read buffer", status);
+
+ status = clSetKernelArg(writer, 0, sizeof(cl_mem), &write_mem);
+ test_error_ret(status, "set arg", status);
+ status = clSetKernelArg(reader, 0, sizeof(cl_mem), &read_mem);
+ test_error_ret(status, "set arg", status);
// Boolean random data needs to be massaged a bit more.
- const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS;
+ const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES) : NUM_ROUNDS;
unsigned bool_iter = 0;
// We need to count iterations. We do something *different on the
@@ -1117,107 +1385,152 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
// values.
unsigned iteration = 0;
- for ( int iround = 0; iround < num_rounds ; iround++ ) {
- for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer
+ for (int iround = 0; iround < num_rounds; iround++)
+ {
+ for (cl_uint iptr_idx = 0; iptr_idx < 2; iptr_idx++)
+ { // Index into array, to write via pointer
// Generate new random data to push through.
- // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
+ // Generate 5 * 128 bytes all the time, even though the test for
+ // many types use less than all that.
- cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0);
+ cl_uchar* write_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0,
+ 0, 0, 0);
- if ( ti.is_bool() ) {
+ if (ti.is_bool())
+ {
// For boolean, random data cast to bool isn't very random.
// So use the bottom bit of bool_value_iter to get true
// diversity.
- for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) {
- write_data[value_idx] = (1<<value_idx) & bool_iter;
- //printf(" %s", (write_data[value_idx] ? "true" : "false" ));
+ for (unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES;
+ value_idx++)
+ {
+ write_data[value_idx] = (1 << value_idx) & bool_iter;
+ // printf(" %s", (write_data[value_idx] ? "true" : "false"
+ // ));
}
bool_iter++;
- } else {
- l_set_randomly( write_data, write_data_size, rand_state );
}
- status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
+ else
+ {
+ l_set_randomly(write_data, write_data_size, rand_state);
+ }
+ status = clSetKernelArg(writer, 1, sizeof(cl_uint), &iptr_idx);
+ test_error_ret(status, "set arg", status);
- if ( !iteration ) {
+ if (!iteration)
+ {
// On first iteration, the value we write via the last arg
// to the "reader" function is 0.
// It's way easier to code the test this way.
- ti.init( write_data + (NUM_TESTED_VALUES-1)*ti.get_size(), 0 );
+ ti.init(write_data + (NUM_TESTED_VALUES - 1) * ti.get_size(),
+ 0);
}
// The value to write via the pointer should be taken from the
// 5th typed slot of the write_data.
- status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
+ status = clSetKernelArg(
+ reader, 1, ti.get_size(),
+ write_data + (NUM_TESTED_VALUES - 1) * ti.get_size());
+ test_error_ret(status, "set arg", status);
// Determine the expected values.
cl_uchar expected[read_data_size];
- memset( expected, -1, sizeof(expected) );
- if ( iteration ) {
- l_copy( expected, 0, write_data, 0, ti );
- l_copy( expected, 1, write_data, 1, ti );
- l_copy( expected, 2, write_data, 2, ti );
- l_copy( expected, 3, write_data, 3, ti );
- // But we need to take into account the value from the pointer write.
- // The 2 represents where the "a" array values begin in our read-back.
- // But we need to take into account the value from the pointer write.
- l_copy( expected, 2 + iptr_idx, write_data, 4, ti );
- } else {
+ memset(expected, -1, sizeof(expected));
+ if (iteration)
+ {
+ l_copy(expected, 0, write_data, 0, ti);
+ l_copy(expected, 1, write_data, 1, ti);
+ l_copy(expected, 2, write_data, 2, ti);
+ l_copy(expected, 3, write_data, 3, ti);
+ // But we need to take into account the value from the pointer
+ // write. The 2 represents where the "a" array values begin in
+ // our read-back. But we need to take into account the value
+ // from the pointer write.
+ l_copy(expected, 2 + iptr_idx, write_data, 4, ti);
+ }
+ else
+ {
// On first iteration, expect these initialized values!
// See the decls_template_with_init above.
- ti.init( expected, 0 );
- ti.init( expected + ti.get_size(), 1 );
- ti.init( expected + 2*ti.get_size(), 1 );
+ ti.init(expected, 0);
+ ti.init(expected + ti.get_size(), 1);
+ ti.init(expected + 2 * ti.get_size(), 1);
// Emulate the effect of the write via the pointer.
// The value is 0, not 1 (see above).
// The pointer is always initialized to the second element
// of the array. So it goes into slot 3 of the "expected" array.
- ti.init( expected + 3*ti.get_size(), 0 );
+ ti.init(expected + 3 * ti.get_size(), 0);
}
- if ( ti.is_bool() ) {
+ if (ti.is_bool())
+ {
// Collapse down to one bit.
- for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i];
+ for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
+ expected[i] = (bool)expected[i];
}
clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
- cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
- memset( read_data, -1, read_data_size );
+ cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
+ 0, 0);
+ memset(read_data, -1, read_data_size);
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
// Now run the kernel
const size_t one = 1;
- if ( iteration ) {
- status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
- } else {
+ if (iteration)
+ {
+ status = clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0,
+ 0, 0);
+ test_error_ret(status, "enqueue writer", status);
+ }
+ else
+ {
// On first iteration, we should be picking up the
// initialized value. So don't enqueue the writer.
}
- status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
- status = clFinish(queue); test_error_ret(status,"finish",status);
+ status =
+ clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
+ test_error_ret(status, "enqueue reader", status);
+ status = clFinish(queue);
+ test_error_ret(status, "finish", status);
- read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
+ read_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
+ 0, 0);
- if ( ti.is_bool() ) {
+ if (ti.is_bool())
+ {
// Collapse down to one bit.
- for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) read_data[i] = (bool)read_data[i];
+ for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
+ read_data[i] = (bool)read_data[i];
}
// Compare only the valid returned bytes.
- //log_info(" Round %d ptr_idx %u\n", iround, iptr_idx );
- int compare_result = l_compare( "init-write-read", expected, read_data, NUM_TESTED_VALUES-1, ti );
- //log_info("Compared %d values each of size %llu. Result %d\n", NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(), compare_result );
+ // log_info(" Round %d ptr_idx %u\n", iround, iptr_idx );
+ int compare_result =
+ l_compare("init-write-read", expected, read_data,
+ NUM_TESTED_VALUES - 1, ti);
+ // log_info("Compared %d values each of size %llu. Result %d\n",
+ // NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(),
+ // compare_result );
err |= compare_result;
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
- if ( err ) break;
+ if (err) break;
iteration++;
}
}
- if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+ if (CL_SUCCESS == err)
+ {
+ log_info("OK\n");
+ FLUSH;
+ }
align_free(write_data);
align_free(read_data);
@@ -1226,12 +1539,14 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
// Check that we can make at least one variable with size
-// max_size which is returned from the device info property : CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE.
-static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size )
+// max_size which is returned from the device info property :
+// CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE.
+static int l_capacity(cl_device_id device, cl_context context,
+ cl_command_queue queue, size_t max_size)
{
int err = CL_SUCCESS;
// Just test one type.
- const TypeInfo ti( l_find_type("uchar") );
+ const TypeInfo ti(l_find_type("uchar"));
log_info(" l_capacity...");
const char prog_src_template[] =
@@ -1254,83 +1569,132 @@ static int l_capacity( cl_device_id device, cl_context context, cl_command_queue
" dest[get_global_linear_id()] = var[get_global_id(0)];\n"
"}\n\n";
char prog_src[MAX_STR];
- int num_printed = snprintf(prog_src,sizeof(prog_src),prog_src_template,max_size, max_size);
- assert( num_printed < MAX_STR ); // or increase MAX_STR
+ int num_printed = snprintf(prog_src, sizeof(prog_src), prog_src_template,
+ max_size, max_size);
+ assert(num_printed < MAX_STR); // or increase MAX_STR
+ (void)num_printed;
StringTable ksrc;
- ksrc.add( prog_src );
+ ksrc.add(prog_src);
int status = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper get_max_size;
- status = create_single_kernel_helper_with_build_options(context, &program, &get_max_size, ksrc.num_str(), ksrc.strs(), "get_max_size", OPTIONS);
- test_error_ret(status,"Failed to create program for capacity test",status);
+ status = create_single_kernel_helper_with_build_options(
+ context, &program, &get_max_size, ksrc.num_str(), ksrc.strs(),
+ "get_max_size", OPTIONS);
+ test_error_ret(status, "Failed to create program for capacity test",
+ status);
// Check size query.
size_t used_bytes = 0;
- status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
- test_error_ret(status,"Failed to query global variable total size",status);
- if ( used_bytes < max_size ) {
- log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)max_size, (unsigned long long)used_bytes );
+ status = clGetProgramBuildInfo(program, device,
+ CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+ sizeof(used_bytes), &used_bytes, 0);
+ test_error_ret(status, "Failed to query global variable total size",
+ status);
+ if (used_bytes < max_size)
+ {
+ log_error("Error: program query for global variable total size query "
+ "failed: Expected at least %llu but got %llu\n",
+ (unsigned long long)max_size, (unsigned long long)used_bytes);
err |= 1;
}
// Prepare to execute
- clKernelWrapper writer( clCreateKernel( program, "writer", &status ) );
- test_error_ret(status,"Failed to create writer kernel for capacity test",status);
- clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
- test_error_ret(status,"Failed to create reader kernel for capacity test",status);
+ clKernelWrapper writer(clCreateKernel(program, "writer", &status));
+ test_error_ret(status, "Failed to create writer kernel for capacity test",
+ status);
+ clKernelWrapper reader(clCreateKernel(program, "reader", &status));
+ test_error_ret(status, "Failed to create reader kernel for capacity test",
+ status);
cl_ulong max_size_ret = 0;
- const size_t arr_size = 10*1024*1024;
- cl_uchar* buffer = (cl_uchar*) align_malloc( arr_size, ALIGNMENT );
+ const size_t arr_size = 10 * 1024 * 1024;
+ cl_uchar* buffer = (cl_uchar*)align_malloc(arr_size, ALIGNMENT);
- if ( !buffer ) { log_error("Failed to allocate buffer\n"); return 1; }
-
- clMemWrapper max_size_ret_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(max_size_ret), &max_size_ret, &status ) );
- test_error_ret(status,"Failed to allocate size query buffer",status);
- clMemWrapper buffer_mem( clCreateBuffer( context, CL_MEM_READ_WRITE, arr_size, 0, &status ) );
- test_error_ret(status,"Failed to allocate write buffer",status);
+ if (!buffer)
+ {
+ log_error("Failed to allocate buffer\n");
+ return 1;
+ }
- status = clSetKernelArg(get_max_size,0,sizeof(cl_mem),&max_size_ret_mem); test_error_ret(status,"set arg",status);
- status = clSetKernelArg(writer,0,sizeof(cl_mem),&buffer_mem); test_error_ret(status,"set arg",status);
- status = clSetKernelArg(reader,0,sizeof(cl_mem),&buffer_mem); test_error_ret(status,"set arg",status);
+ clMemWrapper max_size_ret_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+ sizeof(max_size_ret),
+ &max_size_ret, &status));
+ test_error_ret(status, "Failed to allocate size query buffer", status);
+ clMemWrapper buffer_mem(
+ clCreateBuffer(context, CL_MEM_READ_WRITE, arr_size, 0, &status));
+ test_error_ret(status, "Failed to allocate write buffer", status);
+
+ status = clSetKernelArg(get_max_size, 0, sizeof(cl_mem), &max_size_ret_mem);
+ test_error_ret(status, "set arg", status);
+ status = clSetKernelArg(writer, 0, sizeof(cl_mem), &buffer_mem);
+ test_error_ret(status, "set arg", status);
+ status = clSetKernelArg(reader, 0, sizeof(cl_mem), &buffer_mem);
+ test_error_ret(status, "set arg", status);
// Check the macro value of CL_DEVICE_MAX_GLOBAL_VARIABLE
const size_t one = 1;
- status = clEnqueueNDRangeKernel(queue,get_max_size,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue size query",status);
- status = clFinish(queue); test_error_ret(status,"finish",status);
-
- cl_uchar *max_size_ret_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, max_size_ret_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(max_size_ret), 0, 0, 0, 0);
- if ( max_size_ret != max_size ) {
- log_error("Error: preprocessor definition for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE is %llu and does not match device query value %llu\n",
- (unsigned long long) max_size_ret,
- (unsigned long long) max_size );
+ status =
+ clEnqueueNDRangeKernel(queue, get_max_size, 1, 0, &one, 0, 0, 0, 0);
+ test_error_ret(status, "enqueue size query", status);
+ status = clFinish(queue);
+ test_error_ret(status, "finish", status);
+
+ cl_uchar* max_size_ret_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, max_size_ret_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(max_size_ret),
+ 0, 0, 0, 0);
+ if (max_size_ret != max_size)
+ {
+ log_error("Error: preprocessor definition for "
+ "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE is %llu and does not "
+ "match device query value %llu\n",
+ (unsigned long long)max_size_ret,
+ (unsigned long long)max_size);
err |= 1;
}
clEnqueueUnmapMemObject(queue, max_size_ret_mem, max_size_ret_ptr, 0, 0, 0);
- RandomSeed rand_state_write( gRandomSeed );
- for ( size_t offset = 0; offset < max_size ; offset += arr_size ) {
- size_t curr_size = (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
- l_set_randomly( buffer, curr_size, rand_state_write );
- status = clEnqueueWriteBuffer (queue, buffer_mem, CL_TRUE, 0, curr_size, buffer, 0, 0, 0);test_error_ret(status,"populate buffer_mem object",status);
- status = clEnqueueNDRangeKernel(queue,writer,1,&offset,&curr_size,0,0,0,0); test_error_ret(status,"enqueue writer",status);
- status = clFinish(queue); test_error_ret(status,"finish",status);
- }
-
- RandomSeed rand_state_read( gRandomSeed );
- for ( size_t offset = 0; offset < max_size ; offset += arr_size ) {
- size_t curr_size = (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
- status = clEnqueueNDRangeKernel(queue,reader,1,&offset,&curr_size,0,0,0,0); test_error_ret(status,"enqueue reader",status);
- cl_uchar* read_mem_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, buffer_mem, CL_TRUE, CL_MAP_READ, 0, curr_size, 0, 0, 0, &status);test_error_ret(status,"map read data",status);
- l_set_randomly( buffer, curr_size, rand_state_read );
- err |= l_compare( "capacity", buffer, read_mem_ptr, curr_size, ti );
+ RandomSeed rand_state_write(gRandomSeed);
+ for (size_t offset = 0; offset < max_size; offset += arr_size)
+ {
+ size_t curr_size =
+ (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
+ l_set_randomly(buffer, curr_size, rand_state_write);
+ status = clEnqueueWriteBuffer(queue, buffer_mem, CL_TRUE, 0, curr_size,
+ buffer, 0, 0, 0);
+ test_error_ret(status, "populate buffer_mem object", status);
+ status = clEnqueueNDRangeKernel(queue, writer, 1, &offset, &curr_size,
+ 0, 0, 0, 0);
+ test_error_ret(status, "enqueue writer", status);
+ status = clFinish(queue);
+ test_error_ret(status, "finish", status);
+ }
+
+ RandomSeed rand_state_read(gRandomSeed);
+ for (size_t offset = 0; offset < max_size; offset += arr_size)
+ {
+ size_t curr_size =
+ (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
+ status = clEnqueueNDRangeKernel(queue, reader, 1, &offset, &curr_size,
+ 0, 0, 0, 0);
+ test_error_ret(status, "enqueue reader", status);
+ cl_uchar* read_mem_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, buffer_mem, CL_TRUE, CL_MAP_READ, 0, curr_size, 0, 0, 0,
+ &status);
+ test_error_ret(status, "map read data", status);
+ l_set_randomly(buffer, curr_size, rand_state_read);
+ err |= l_compare("capacity", buffer, read_mem_ptr, curr_size, ti);
clEnqueueUnmapMemObject(queue, buffer_mem, read_mem_ptr, 0, 0, 0);
}
- if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+ if (CL_SUCCESS == err)
+ {
+ log_info("OK\n");
+ FLUSH;
+ }
align_free(buffer);
return err;
@@ -1338,32 +1702,33 @@ static int l_capacity( cl_device_id device, cl_context context, cl_command_queue
// Check operation on a user type.
-static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, bool separate_compile )
+static int l_user_type(cl_device_id device, cl_context context,
+ cl_command_queue queue, bool separate_compile)
{
int err = CL_SUCCESS;
// Just test one type.
- const TypeInfo ti( l_find_type("uchar") );
- log_info(" l_user_type %s...", separate_compile ? "separate compilation" : "single source compilation" );
+ const TypeInfo ti(l_find_type("uchar"));
+ log_info(" l_user_type %s...",
+ separate_compile ? "separate compilation"
+ : "single source compilation");
- if ( separate_compile && ! l_linker_available ) {
+ if (separate_compile && !l_linker_available)
+ {
log_info("Separate compilation is not supported. Skipping test\n");
return err;
}
const char type_src[] =
"typedef struct { uchar c; uint i; } my_struct_t;\n\n";
- const char def_src[] =
- "my_struct_t var = { 'a', 42 };\n\n";
- const char decl_src[] =
- "extern my_struct_t var;\n\n";
+ const char def_src[] = "my_struct_t var = { 'a', 42 };\n\n";
+ const char decl_src[] = "extern my_struct_t var;\n\n";
// Don't use a host struct. We can't guarantee that the host
// compiler has the same structure layout as the device compiler.
- const char writer_src[] =
- "kernel void writer( uchar c, uint i ) {\n"
- " var.c = c;\n"
- " var.i = i;\n"
- "}\n\n";
+ const char writer_src[] = "kernel void writer( uchar c, uint i ) {\n"
+ " var.c = c;\n"
+ " var.i = i;\n"
+ "}\n\n";
const char reader_src[] =
"kernel void reader( global uchar* C, global uint* I ) {\n"
" *C = var.c;\n"
@@ -1372,36 +1737,53 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu
clProgramWrapper program;
- if ( separate_compile ) {
+ if (separate_compile)
+ {
// Separate compilation flow.
StringTable wksrc;
- wksrc.add( type_src );
- wksrc.add( def_src );
- wksrc.add( writer_src );
+ wksrc.add(type_src);
+ wksrc.add(def_src);
+ wksrc.add(writer_src);
StringTable rksrc;
- rksrc.add( type_src );
- rksrc.add( decl_src );
- rksrc.add( reader_src );
+ rksrc.add(type_src);
+ rksrc.add(decl_src);
+ rksrc.add(reader_src);
int status = CL_SUCCESS;
- clProgramWrapper writer_program( clCreateProgramWithSource( context, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), &status ) );
- test_error_ret(status,"Failed to create writer program for user type test",status);
-
- status = clCompileProgram( writer_program, 1, &device, OPTIONS, 0, 0, 0, 0, 0 );
- if(check_error(status, "Failed to compile writer program for user type test (%s)", IGetErrorString(status)))
+ clProgramWrapper writer_program(clCreateProgramWithSource(
+ context, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), &status));
+ test_error_ret(status,
+ "Failed to create writer program for user type test",
+ status);
+
+ status = clCompileProgram(writer_program, 1, &device, OPTIONS, 0, 0, 0,
+ 0, 0);
+ if (check_error(
+ status,
+ "Failed to compile writer program for user type test (%s)",
+ IGetErrorString(status)))
{
- print_build_log(writer_program, 1, &device, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), OPTIONS);
+ print_build_log(writer_program, 1, &device, wksrc.num_str(),
+ wksrc.strs(), wksrc.lengths(), OPTIONS);
return status;
}
- clProgramWrapper reader_program( clCreateProgramWithSource( context, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), &status ) );
- test_error_ret(status,"Failed to create reader program for user type test",status);
-
- status = clCompileProgram( reader_program, 1, &device, OPTIONS, 0, 0, 0, 0, 0 );
- if(check_error(status, "Failed to compile reader program for user type test (%s)", IGetErrorString(status)))
+ clProgramWrapper reader_program(clCreateProgramWithSource(
+ context, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), &status));
+ test_error_ret(status,
+ "Failed to create reader program for user type test",
+ status);
+
+ status = clCompileProgram(reader_program, 1, &device, OPTIONS, 0, 0, 0,
+ 0, 0);
+ if (check_error(
+ status,
+ "Failed to compile reader program for user type test (%s)",
+ IGetErrorString(status)))
{
- print_build_log(reader_program, 1, &device, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), OPTIONS);
+ print_build_log(reader_program, 1, &device, rksrc.num_str(),
+ rksrc.strs(), rksrc.lengths(), OPTIONS);
return status;
}
@@ -1409,33 +1791,45 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu
progs[0] = writer_program;
progs[1] = reader_program;
- program = clLinkProgram( context, 1, &device, "", 2, progs, 0, 0, &status );
- if(check_error(status, "Failed to link program for user type test (%s)", IGetErrorString(status)))
+ program =
+ clLinkProgram(context, 1, &device, "", 2, progs, 0, 0, &status);
+ if (check_error(status,
+ "Failed to link program for user type test (%s)",
+ IGetErrorString(status)))
{
print_build_log(program, 1, &device, 0, NULL, NULL, "");
return status;
}
- } else {
+ }
+ else
+ {
// Single compilation flow.
StringTable ksrc;
- ksrc.add( type_src );
- ksrc.add( def_src );
- ksrc.add( writer_src );
- ksrc.add( reader_src );
+ ksrc.add(type_src);
+ ksrc.add(def_src);
+ ksrc.add(writer_src);
+ ksrc.add(reader_src);
int status = CL_SUCCESS;
- status = create_single_kernel_helper_create_program(context, &program, ksrc.num_str(), ksrc.strs(), OPTIONS);
- if(check_error(status, "Failed to build program for user type test (%s)", IGetErrorString(status)))
+ status = create_single_kernel_helper_create_program(
+ context, &program, ksrc.num_str(), ksrc.strs(), OPTIONS);
+ if (check_error(status,
+ "Failed to build program for user type test (%s)",
+ IGetErrorString(status)))
{
- print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
+ print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(),
+ ksrc.lengths(), OPTIONS);
return status;
}
status = clBuildProgram(program, 1, &device, OPTIONS, 0, 0);
- if(check_error(status, "Failed to compile program for user type test (%s)", IGetErrorString(status)))
+ if (check_error(status,
+ "Failed to compile program for user type test (%s)",
+ IGetErrorString(status)))
{
- print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
+ print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(),
+ ksrc.lengths(), OPTIONS);
return status;
}
}
@@ -1443,48 +1837,71 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu
// Check size query.
size_t used_bytes = 0;
- int status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
- test_error_ret(status,"Failed to query global variable total size",status);
+ int status = clGetProgramBuildInfo(
+ program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+ sizeof(used_bytes), &used_bytes, 0);
+ test_error_ret(status, "Failed to query global variable total size",
+ status);
size_t expected_size = sizeof(cl_uchar) + sizeof(cl_uint);
- if ( used_bytes < expected_size ) {
- log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes );
+ if (used_bytes < expected_size)
+ {
+ log_error("Error: program query for global variable total size query "
+ "failed: Expected at least %llu but got %llu\n",
+ (unsigned long long)expected_size,
+ (unsigned long long)used_bytes);
err |= 1;
}
// Prepare to execute
- clKernelWrapper writer( clCreateKernel( program, "writer", &status ) );
- test_error_ret(status,"Failed to create writer kernel for user type test",status);
- clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
- test_error_ret(status,"Failed to create reader kernel for user type test",status);
+ clKernelWrapper writer(clCreateKernel(program, "writer", &status));
+ test_error_ret(status, "Failed to create writer kernel for user type test",
+ status);
+ clKernelWrapper reader(clCreateKernel(program, "reader", &status));
+ test_error_ret(status, "Failed to create reader kernel for user type test",
+ status);
// Set up data.
cl_uchar* uchar_data = (cl_uchar*)align_malloc(sizeof(cl_uchar), ALIGNMENT);
cl_uint* uint_data = (cl_uint*)align_malloc(sizeof(cl_uint), ALIGNMENT);
- clMemWrapper uchar_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status ) );
- test_error_ret(status,"Failed to allocate uchar buffer",status);
- clMemWrapper uint_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uint), uint_data, &status ) );
- test_error_ret(status,"Failed to allocate uint buffer",status);
+ clMemWrapper uchar_mem(clCreateBuffer(
+ context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status));
+ test_error_ret(status, "Failed to allocate uchar buffer", status);
+ clMemWrapper uint_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+ sizeof(cl_uint), uint_data, &status));
+ test_error_ret(status, "Failed to allocate uint buffer", status);
- status = clSetKernelArg(reader,0,sizeof(cl_mem),&uchar_mem); test_error_ret(status,"set arg",status);
- status = clSetKernelArg(reader,1,sizeof(cl_mem),&uint_mem); test_error_ret(status,"set arg",status);
+ status = clSetKernelArg(reader, 0, sizeof(cl_mem), &uchar_mem);
+ test_error_ret(status, "set arg", status);
+ status = clSetKernelArg(reader, 1, sizeof(cl_mem), &uint_mem);
+ test_error_ret(status, "set arg", status);
cl_uchar expected_uchar = 'a';
cl_uint expected_uint = 42;
- for ( unsigned iter = 0; iter < 5 ; iter++ ) { // Must go around at least twice
+ for (unsigned iter = 0; iter < 5; iter++)
+ { // Must go around at least twice
// Read back data
*uchar_data = -1;
*uint_data = -1;
const size_t one = 1;
- status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
- status = clFinish(queue); test_error_ret(status,"finish",status);
-
- cl_uchar *uint_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uint), 0, 0, 0, 0);
- cl_uchar *uchar_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0, 0, 0);
-
- if ( expected_uchar != *uchar_data || expected_uint != *uint_data ) {
- log_error("FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n",
- iter, (int)*uchar_data, *uint_data, (int)expected_uchar, expected_uint );
+ status = clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
+ test_error_ret(status, "enqueue reader", status);
+ status = clFinish(queue);
+ test_error_ret(status, "finish", status);
+
+ cl_uchar* uint_data_ptr =
+ (cl_uchar*)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ,
+ 0, sizeof(cl_uint), 0, 0, 0, 0);
+ cl_uchar* uchar_data_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0,
+ 0, 0);
+
+ if (expected_uchar != *uchar_data || expected_uint != *uint_data)
+ {
+ log_error(
+ "FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n",
+ iter, (int)*uchar_data, *uint_data, (int)expected_uchar,
+ expected_uint);
err |= 1;
}
@@ -1498,13 +1915,21 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu
// Write the new values into persistent store.
*uchar_data = expected_uchar;
*uint_data = expected_uint;
- status = clSetKernelArg(writer,0,sizeof(cl_uchar),uchar_data); test_error_ret(status,"set arg",status);
- status = clSetKernelArg(writer,1,sizeof(cl_uint),uint_data); test_error_ret(status,"set arg",status);
- status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
- status = clFinish(queue); test_error_ret(status,"finish",status);
+ status = clSetKernelArg(writer, 0, sizeof(cl_uchar), uchar_data);
+ test_error_ret(status, "set arg", status);
+ status = clSetKernelArg(writer, 1, sizeof(cl_uint), uint_data);
+ test_error_ret(status, "set arg", status);
+ status = clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0, 0, 0);
+ test_error_ret(status, "enqueue writer", status);
+ status = clFinish(queue);
+ test_error_ret(status, "finish", status);
}
- if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+ if (CL_SUCCESS == err)
+ {
+ log_info("OK\n");
+ FLUSH;
+ }
align_free(uchar_data);
align_free(uint_data);
return err;
@@ -1539,7 +1964,8 @@ static cl_int should_skip(cl_device_id device, cl_bool& skip)
// Test support for variables at program scope. Miscellaneous
-int test_progvar_prog_scope_misc(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_progvar_prog_scope_misc(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_bool skip{ CL_FALSE };
auto error = should_skip(device, skip);
@@ -1558,19 +1984,20 @@ int test_progvar_prog_scope_misc(cl_device_id device, cl_context context, cl_com
cl_int err = CL_SUCCESS;
- err = l_get_device_info( device, &max_size, &pref_size );
- err |= l_build_type_table( device );
+ err = l_get_device_info(device, &max_size, &pref_size);
+ err |= l_build_type_table(device);
- err |= l_capacity( device, context, queue, max_size );
- err |= l_user_type( device, context, queue, false );
- err |= l_user_type( device, context, queue, true );
+ err |= l_capacity(device, context, queue, max_size);
+ err |= l_user_type(device, context, queue, false);
+ err |= l_user_type(device, context, queue, true);
return err;
}
// Test support for variables at program scope. Unitialized data
-int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_bool skip{ CL_FALSE };
auto error = should_skip(device, skip);
@@ -1590,16 +2017,17 @@ int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context, cl_c
cl_int err = CL_SUCCESS;
- err = l_get_device_info( device, &max_size, &pref_size );
- err |= l_build_type_table( device );
+ err = l_get_device_info(device, &max_size, &pref_size);
+ err |= l_build_type_table(device);
- err |= l_write_read( device, context, queue );
+ err |= l_write_read(device, context, queue);
return err;
}
// Test support for variables at program scope. Initialized data.
-int test_progvar_prog_scope_init(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_progvar_prog_scope_init(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_bool skip{ CL_FALSE };
auto error = should_skip(device, skip);
@@ -1618,17 +2046,18 @@ int test_progvar_prog_scope_init(cl_device_id device, cl_context context, cl_com
cl_int err = CL_SUCCESS;
- err = l_get_device_info( device, &max_size, &pref_size );
- err |= l_build_type_table( device );
+ err = l_get_device_info(device, &max_size, &pref_size);
+ err |= l_build_type_table(device);
- err |= l_init_write_read( device, context, queue );
+ err |= l_init_write_read(device, context, queue);
return err;
}
// A simple test for support of static variables inside a kernel.
-int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_progvar_func_scope(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_bool skip{ CL_FALSE };
auto error = should_skip(device, skip);
@@ -1642,56 +2071,70 @@ int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_
"supported on this device\n");
return TEST_SKIPPED_ITSELF;
}
- size_t max_size = 0;
- size_t pref_size = 0;
cl_int err = CL_SUCCESS;
// Deliberately have two variables with the same name but in different
// scopes.
// Also, use a large initialized structure in both cases.
+ // clang-format off
const char prog_src[] =
"typedef struct { char c; int16 i; } mystruct_t;\n"
- "kernel void test_bump( global int* value, int which ) {\n"
- " if ( which ) {\n"
+ "kernel void test_bump(global int* value, int which) {\n"
+ " if (which) {\n"
// Explicit address space.
// Last element set to 0
- " static global mystruct_t persistent = {'a',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0) };\n"
+ " static global mystruct_t persistent = { 'a', (int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0) };\n"
" *value = persistent.i.sf++;\n"
" } else {\n"
// Implicitly global
// Last element set to 100
- " static mystruct_t persistent = {'b',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,100) };\n"
+ " static mystruct_t persistent = { 'b' , (int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,100) };\n"
" *value = persistent.i.sf++;\n"
" }\n"
"}\n";
+ // clang-format on
StringTable ksrc;
- ksrc.add( prog_src );
+ ksrc.add(prog_src);
int status = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper test_bump;
- status = create_single_kernel_helper_with_build_options(context, &program, &test_bump, ksrc.num_str(), ksrc.strs(), "test_bump", OPTIONS);
- test_error_ret(status, "Failed to create program for function static variable test", status);
+ status = create_single_kernel_helper_with_build_options(
+ context, &program, &test_bump, ksrc.num_str(), ksrc.strs(), "test_bump",
+ OPTIONS);
+ test_error_ret(status,
+ "Failed to create program for function static variable test",
+ status);
// Check size query.
size_t used_bytes = 0;
- status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
- test_error_ret(status,"Failed to query global variable total size",status);
+ status = clGetProgramBuildInfo(program, device,
+ CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+ sizeof(used_bytes), &used_bytes, 0);
+ test_error_ret(status, "Failed to query global variable total size",
+ status);
size_t expected_size = 2 * sizeof(cl_int); // Two ints.
- if ( used_bytes < expected_size ) {
- log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes );
+ if (used_bytes < expected_size)
+ {
+ log_error("Error: program query for global variable total size query "
+ "failed: Expected at least %llu but got %llu\n",
+ (unsigned long long)expected_size,
+ (unsigned long long)used_bytes);
err |= 1;
}
// Prepare the data.
cl_int counter_value = 0;
- clMemWrapper counter_value_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(counter_value), &counter_value, &status ) );
- test_error_ret(status,"Failed to allocate counter query buffer",status);
+ clMemWrapper counter_value_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+ sizeof(counter_value),
+ &counter_value, &status));
+ test_error_ret(status, "Failed to allocate counter query buffer", status);
- status = clSetKernelArg(test_bump,0,sizeof(cl_mem),&counter_value_mem); test_error_ret(status,"set arg",status);
+ status = clSetKernelArg(test_bump, 0, sizeof(cl_mem), &counter_value_mem);
+ test_error_ret(status, "set arg", status);
// Go a few rounds, alternating between the two counters in the kernel.
@@ -1701,26 +2144,41 @@ int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_
cl_int expected_counter[2] = { 100, 0 };
const size_t one = 1;
- for ( int iround = 0; iround < 5 ; iround++ ) { // Must go at least twice around
- for ( int iwhich = 0; iwhich < 2 ; iwhich++ ) { // Cover both counters
- status = clSetKernelArg(test_bump,1,sizeof(iwhich),&iwhich); test_error_ret(status,"set arg",status);
- status = clEnqueueNDRangeKernel(queue,test_bump,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue test_bump",status);
- status = clFinish(queue); test_error_ret(status,"finish",status);
-
- cl_uchar *counter_value_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, counter_value_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(counter_value), 0, 0, 0, 0);
-
- if ( counter_value != expected_counter[iwhich] ) {
- log_error("Error: Round %d on counter %d: Expected %d but got %d\n",
- iround, iwhich, expected_counter[iwhich], counter_value );
+ for (int iround = 0; iround < 5; iround++)
+ { // Must go at least twice around
+ for (int iwhich = 0; iwhich < 2; iwhich++)
+ { // Cover both counters
+ status = clSetKernelArg(test_bump, 1, sizeof(iwhich), &iwhich);
+ test_error_ret(status, "set arg", status);
+ status = clEnqueueNDRangeKernel(queue, test_bump, 1, 0, &one, 0, 0,
+ 0, 0);
+ test_error_ret(status, "enqueue test_bump", status);
+ status = clFinish(queue);
+ test_error_ret(status, "finish", status);
+
+ cl_uchar* counter_value_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, counter_value_mem, CL_TRUE, CL_MAP_READ, 0,
+ sizeof(counter_value), 0, 0, 0, 0);
+
+ if (counter_value != expected_counter[iwhich])
+ {
+ log_error(
+ "Error: Round %d on counter %d: Expected %d but got %d\n",
+ iround, iwhich, expected_counter[iwhich], counter_value);
err |= 1;
}
expected_counter[iwhich]++; // Emulate behaviour of the kernel.
- clEnqueueUnmapMemObject(queue, counter_value_mem, counter_value_ptr, 0, 0, 0);
+ clEnqueueUnmapMemObject(queue, counter_value_mem, counter_value_ptr,
+ 0, 0, 0);
}
}
- if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+ if (CL_SUCCESS == err)
+ {
+ log_info("OK\n");
+ FLUSH;
+ }
return err;
}
diff --git a/test_conformance/basic/test_queue_priority.cpp b/test_conformance/basic/test_queue_priority.cpp
index 57ce5041..ff6283cd 100644
--- a/test_conformance/basic/test_queue_priority.cpp
+++ b/test_conformance/basic/test_queue_priority.cpp
@@ -48,13 +48,9 @@ static const char *fpmul_kernel_code =
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
-
-static const float MAX_ERR = 1e-5f;
-
static int
verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
{
- float r;
int i;
float * reference_ptr = (float *)malloc(n * sizeof(float));
@@ -82,7 +78,6 @@ verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
static int
verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
{
- float r;
int i;
float * reference_ptr = (float *)malloc(n * sizeof(float));
@@ -110,7 +105,6 @@ verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
static int
verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
{
- float r;
int i;
float * reference_ptr = (float *)malloc(n * sizeof(float));
diff --git a/test_conformance/basic/test_readimage3d.cpp b/test_conformance/basic/test_readimage3d.cpp
index 1337c9fb..5fd7d109 100644
--- a/test_conformance/basic/test_readimage3d.cpp
+++ b/test_conformance/basic/test_readimage3d.cpp
@@ -142,7 +142,7 @@ int test_readimage3d(cl_device_id device, cl_context context, cl_command_queue q
int img_width = 64;
int img_height = 64;
int img_depth = 64;
- int i, err;
+ int err;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {img_width, img_height, img_depth};
size_t length = img_width * img_height * img_depth * 4 * sizeof(float);
diff --git a/test_conformance/basic/test_simple_image_pitch.cpp b/test_conformance/basic/test_simple_image_pitch.cpp
index 1cd82b6f..2eb43b3a 100644
--- a/test_conformance/basic/test_simple_image_pitch.cpp
+++ b/test_conformance/basic/test_simple_image_pitch.cpp
@@ -83,7 +83,7 @@ int test_simple_read_image_pitch(cl_device_id device, cl_context cl_context_, cl
free(host_image);
free(host_buffer);
- return CL_SUCCESS;
+ return errors == 0 ? TEST_PASS : TEST_FAIL;
}
int test_simple_write_image_pitch(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements)
@@ -149,5 +149,5 @@ int test_simple_write_image_pitch(cl_device_id device, cl_context cl_context_, c
free(host_image);
free(host_buffer);
- return CL_SUCCESS;
+ return errors == 0 ? TEST_PASS : TEST_FAIL;
}
diff --git a/test_conformance/basic/test_sizeof.cpp b/test_conformance/basic/test_sizeof.cpp
index 66a6c563..e980ed68 100644
--- a/test_conformance/basic/test_sizeof.cpp
+++ b/test_conformance/basic/test_sizeof.cpp
@@ -35,9 +35,9 @@ cl_int get_type_size( cl_context context, cl_command_queue queue, const char *ty
"}\n"
};
- cl_program p;
- cl_kernel k;
- cl_mem m;
+ clProgramWrapper p;
+ clKernelWrapper k;
+ clMemWrapper m;
cl_uint temp;
@@ -51,42 +51,19 @@ cl_int get_type_size( cl_context context, cl_command_queue queue, const char *ty
}
cl_int err = create_single_kernel_helper_with_build_options(
context, &p, &k, 4, sizeof_kernel_code, "test_sizeof", nullptr);
- if( err )
- return err;
+ test_error(err, "Failed to build kernel/program.");
m = clCreateBuffer( context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof( cl_ulong ), size, &err );
- if( NULL == m )
- {
- clReleaseProgram( p );
- clReleaseKernel( k );
- log_error("\nclCreateBuffer FAILED\n");
- return err;
- }
+ test_error(err, "clCreateBuffer failed.");
err = clSetKernelArg( k, 0, sizeof( cl_mem ), &m );
- if( err )
- {
- clReleaseProgram( p );
- clReleaseKernel( k );
- clReleaseMemObject( m );
- log_error("\nclSetKernelArg FAILED\n");
- return err;
- }
+ test_error(err, "clSetKernelArg failed.");
err = clEnqueueTask( queue, k, 0, NULL, NULL );
- clReleaseProgram( p );
- clReleaseKernel( k );
- if( err )
- {
- clReleaseMemObject( m );
- log_error( "\nclEnqueueTask FAILED\n" );
- return err;
- }
+ test_error(err, "clEnqueueTask failed.");
err = clEnqueueReadBuffer( queue, m, CL_TRUE, 0, sizeof( cl_uint ), &temp, 0, NULL, NULL );
- clReleaseMemObject( m );
- if( err )
- log_error( "\nclEnqueueReadBuffer FAILED\n" );
+ test_error(err, "clEnqueueReadBuffer failed.");
*size = (cl_ulong) temp;
@@ -292,11 +269,11 @@ int test_sizeof(cl_device_id device, cl_context context, cl_command_queue queue,
continue;
}
- if( gIsEmbedded &&
- 0 == strcmp(other_types[i], "image3d_t") &&
- checkFor3DImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+ if (0 == strcmp(other_types[i], "image3d_t")
+ && checkFor3DImageSupport(device) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
{
- log_info("\n3D images are not supported by this device. Skipping test.\t");
+ log_info("\n3D images are not supported by this device. "
+ "Skipping test.\t");
continue;
}
diff --git a/test_conformance/basic/test_vector_swizzle.cpp b/test_conformance/basic/test_vector_swizzle.cpp
index 5ab3ea4f..884bcf36 100644
--- a/test_conformance/basic/test_vector_swizzle.cpp
+++ b/test_conformance/basic/test_vector_swizzle.cpp
@@ -610,9 +610,6 @@ static int test_vectype(const char* type_name, cl_device_id device,
cl_int error = CL_SUCCESS;
int result = TEST_PASS;
- clProgramWrapper program;
- clKernelWrapper kernel;
-
std::string buildOptions{ "-DTYPE=" };
buildOptions += type_name;
buildOptions += std::to_string(N);
@@ -628,35 +625,50 @@ static int test_vectype(const char* type_name, cl_device_id device,
makeReference<T, N, S>(reference);
// XYZW swizzles:
+ {
+ clProgramWrapper program;
+ clKernelWrapper kernel;
- const char* xyzw_source = TestInfo<N>::kernel_source_xyzw;
- error = create_single_kernel_helper(
- context, &program, &kernel, 1, &xyzw_source, "test_vector_swizzle_xyzw",
- buildOptions.c_str());
- test_error(error, "Unable to create xyzw test kernel");
+ const char* xyzw_source = TestInfo<N>::kernel_source_xyzw;
+ error = create_single_kernel_helper(
+ context, &program, &kernel, 1, &xyzw_source,
+ "test_vector_swizzle_xyzw", buildOptions.c_str());
+ test_error(error, "Unable to create xyzw test kernel");
- result |= test_vectype_case(value, reference, context, kernel, queue);
+ result |= test_vectype_case(value, reference, context, kernel, queue);
+ }
// sN swizzles:
- const char* sN_source = TestInfo<N>::kernel_source_sN;
- error = create_single_kernel_helper(context, &program, &kernel, 1,
- &sN_source, "test_vector_swizzle_sN",
- buildOptions.c_str());
- test_error(error, "Unable to create sN test kernel");
+ {
+ clProgramWrapper program;
+ clKernelWrapper kernel;
+
+ const char* sN_source = TestInfo<N>::kernel_source_sN;
+ error = create_single_kernel_helper(
+ context, &program, &kernel, 1, &sN_source, "test_vector_swizzle_sN",
+ buildOptions.c_str());
+ test_error(error, "Unable to create sN test kernel");
- result |= test_vectype_case(value, reference, context, kernel, queue);
+ result |= test_vectype_case(value, reference, context, kernel, queue);
+ }
// RGBA swizzles for OpenCL 3.0 and newer:
- const Version device_version = get_device_cl_version(device);
- if (device_version >= Version(3, 0))
{
- const char* rgba_source = TestInfo<N>::kernel_source_rgba;
- error = create_single_kernel_helper(
- context, &program, &kernel, 1, &rgba_source,
- "test_vector_swizzle_rgba", buildOptions.c_str());
- test_error(error, "Unable to create rgba test kernel");
+ clProgramWrapper program;
+ clKernelWrapper kernel;
- result |= test_vectype_case(value, reference, context, kernel, queue);
+ const Version device_version = get_device_cl_version(device);
+ if (device_version >= Version(3, 0))
+ {
+ const char* rgba_source = TestInfo<N>::kernel_source_rgba;
+ error = create_single_kernel_helper(
+ context, &program, &kernel, 1, &rgba_source,
+ "test_vector_swizzle_rgba", buildOptions.c_str());
+ test_error(error, "Unable to create rgba test kernel");
+
+ result |=
+ test_vectype_case(value, reference, context, kernel, queue);
+ }
}
return result;
diff --git a/test_conformance/basic/test_writeimage_fp32.cpp b/test_conformance/basic/test_writeimage_fp32.cpp
index fef71874..c68463ac 100644
--- a/test_conformance/basic/test_writeimage_fp32.cpp
+++ b/test_conformance/basic/test_writeimage_fp32.cpp
@@ -122,9 +122,10 @@ int test_writeimage_fp32(cl_device_id device, cl_context context, cl_command_que
return -1;
}
- err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgbaFFFF_write_kernel_code, "test_rgbaFFFF_write" );
- if (err)
- return -1;
+ err = create_single_kernel_helper(context, &program, &kernel[0], 1,
+ &rgbaFFFF_write_kernel_code,
+ "test_rgbaFFFF_write");
+ if (err) return -1;
kernel[1] = clCreateKernel(program, "test_rgbaFFFF_write", NULL);
if (!kernel[1])
{
diff --git a/test_conformance/basic/test_writeimage_int16.cpp b/test_conformance/basic/test_writeimage_int16.cpp
index 8afb77a9..d863a3a3 100644
--- a/test_conformance/basic/test_writeimage_int16.cpp
+++ b/test_conformance/basic/test_writeimage_int16.cpp
@@ -128,9 +128,10 @@ int test_writeimage_int16(cl_device_id device, cl_context context, cl_command_qu
return -1;
}
- err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgba16_write_kernel_code, "test_rgba16_write" );
- if (err)
- return -1;
+ err = create_single_kernel_helper(context, &program, &kernel[0], 1,
+ &rgba16_write_kernel_code,
+ "test_rgba16_write");
+ if (err) return -1;
kernel[1] = clCreateKernel(program, "test_rgba16_write", NULL);
if (!kernel[1])
{
diff --git a/test_conformance/buffers/test_buffer_fill.cpp b/test_conformance/buffers/test_buffer_fill.cpp
index 9c9c7d17..92079794 100644
--- a/test_conformance/buffers/test_buffer_fill.cpp
+++ b/test_conformance/buffers/test_buffer_fill.cpp
@@ -703,8 +703,6 @@ int test_buffer_fill( cl_device_id deviceID, cl_context context, cl_command_queu
int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
TestStruct pattern;
- clProgramWrapper program;
- clKernelWrapper kernel;
size_t ptrSize = sizeof( TestStruct );
size_t global_work_size[3];
int n, err;
@@ -720,6 +718,8 @@ int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_comma
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
+ clProgramWrapper program;
+ clKernelWrapper kernel;
log_info("Testing with cl_mem_flags: %s\n",
flag_set_names[src_flag_id]);
diff --git a/test_conformance/buffers/test_buffer_migrate.cpp b/test_conformance/buffers/test_buffer_migrate.cpp
index f3098366..6cdc271b 100644
--- a/test_conformance/buffers/test_buffer_migrate.cpp
+++ b/test_conformance/buffers/test_buffer_migrate.cpp
@@ -80,7 +80,7 @@ static cl_int migrateMemObject(enum migrations migrate, cl_command_queue *queues
static cl_int restoreBuffer(cl_command_queue *queues, cl_mem *buffers, cl_uint num_devices, cl_mem_migration_flags *flags, cl_uint *buffer)
{
- cl_uint i, j;
+ cl_uint i;
cl_int err;
// If the buffer was previously migrated with undefined content, reload the content.
diff --git a/test_conformance/buffers/test_buffer_read.cpp b/test_conformance/buffers/test_buffer_read.cpp
index 39cf3297..49a57f92 100644
--- a/test_conformance/buffers/test_buffer_read.cpp
+++ b/test_conformance/buffers/test_buffer_read.cpp
@@ -763,7 +763,6 @@ int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_comman
{
clProgramWrapper program[5];
clKernelWrapper kernel[5];
- clEventWrapper event;
void *outptr[5];
void *inptr[5];
size_t global_work_size[3];
@@ -805,6 +804,7 @@ int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_comman
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
clMemWrapper buffer;
+ clEventWrapper event;
outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
if ( ! outptr[i] ){
log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
@@ -900,7 +900,6 @@ int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, c
{
clProgramWrapper program[5];
clKernelWrapper kernel[5];
- clEventWrapper event;
void *outptr[5], *inptr[5];
size_t global_work_size[3];
cl_int err;
@@ -941,6 +940,7 @@ int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, c
for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
{
clMemWrapper buffer;
+ clEventWrapper event;
outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
if ( ! outptr[i] ){
log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
diff --git a/test_conformance/buffers/test_image_migrate.cpp b/test_conformance/buffers/test_image_migrate.cpp
index dbdca9cc..6c8acdce 100644
--- a/test_conformance/buffers/test_image_migrate.cpp
+++ b/test_conformance/buffers/test_image_migrate.cpp
@@ -128,7 +128,6 @@ int test_image_migrate(cl_device_id deviceID, cl_context context, cl_command_que
cl_mem_migration_flags *flagsA, *flagsB, *flagsC;
cl_device_partition_property property[] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, 0, 0};
cl_mem *imageA, *imageB, *imageC;
- cl_mem_flags flags;
cl_image_format format;
cl_sampler sampler = NULL;
cl_program program = NULL;
diff --git a/test_conformance/buffers/test_sub_buffers.cpp b/test_conformance/buffers/test_sub_buffers.cpp
index 3e50121a..d6ab111e 100644
--- a/test_conformance/buffers/test_sub_buffers.cpp
+++ b/test_conformance/buffers/test_sub_buffers.cpp
@@ -15,6 +15,8 @@
//
#include "procs.h"
+#include <algorithm>
+
// Design:
// To test sub buffers, we first create one main buffer. We then create several sub-buffers and
// queue Actions on each one. Each Action is encapsulated in a class so it can keep track of
@@ -39,7 +41,8 @@ public:
region.size = mSize;
cl_int error;
- mMem = clCreateSubBuffer( mParentBuffer, flags, CL_BUFFER_CREATE_TYPE_REGION, &region, &error );
+ reset(clCreateSubBuffer(mParentBuffer, flags,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error));
return error;
}
};
@@ -100,13 +103,6 @@ public:
}
};
-#ifndef MAX
-#define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) )
-#endif
-#ifndef MIN
-#define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) )
-#endif
-
class CopyAction : public Action
{
public:
@@ -116,7 +112,8 @@ public:
virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
{
// Copy from sub-buffer 1 to sub-buffer 2
- size_t size = get_random_size_t( 0, MIN( buffer1.mSize, buffer2.mSize ), GetRandSeed() );
+ size_t size = get_random_size_t(
+ 0, std::min(buffer1.mSize, buffer2.mSize), GetRandSeed());
size_t startOffset = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
size_t endOffset = get_random_size_t( 0, buffer2.mSize - size, GetRandSeed() );
@@ -265,7 +262,11 @@ int test_sub_buffers_read_write_core( cl_context context, cl_command_queue queue
endRange = mainSize;
size_t offset = get_random_size_t( toStartFrom / addressAlign, endRange / addressAlign, Action::GetRandSeed() ) * addressAlign;
- size_t size = get_random_size_t( 1, ( MIN( mainSize / 8, mainSize - offset ) ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
+ size_t size =
+ get_random_size_t(
+ 1, (std::min(mainSize / 8, mainSize - offset)) / addressAlign,
+ Action::GetRandSeed())
+ * addressAlign;
error = subBuffers[ numSubBuffers ].Allocate( mainBuffer, CL_MEM_READ_WRITE, offset, size );
test_error( error, "Unable to allocate sub buffer" );
@@ -442,7 +443,7 @@ int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context
error = get_reasonable_buffer_size( otherDevice, maxBuffer2 );
test_error( error, "Unable to get buffer size for secondary device" );
- maxBuffer1 = MIN( maxBuffer1, maxBuffer2 );
+ maxBuffer1 = std::min(maxBuffer1, maxBuffer2);
cl_uint addressAlign1Bits, addressAlign2Bits;
error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign1Bits ), &addressAlign1Bits, NULL );
@@ -451,7 +452,7 @@ int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context
error = clGetDeviceInfo( otherDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign2Bits ), &addressAlign2Bits, NULL );
test_error( error, "Unable to get secondary device's address alignment" );
- cl_uint addressAlign1 = MAX( addressAlign1Bits, addressAlign2Bits ) / 8;
+ cl_uint addressAlign1 = std::max(addressAlign1Bits, addressAlign2Bits) / 8;
// Finally time to run!
return test_sub_buffers_read_write_core( testingContext, queue1, queue2, maxBuffer1, addressAlign1 );
diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index bbcc68c6..6c7d0b12 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -28,10 +28,9 @@
#define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
#define MAX_HOST_THREADS GetThreadCount()
-#define EXECUTE_TEST(error, test)\
- error |= test;\
- if(error && !gContinueOnError)\
- return error;
+#define EXECUTE_TEST(error, test) \
+ error |= test; \
+ if (error && !gContinueOnError) return error;
enum TExplicitAtomicType
{
@@ -57,764 +56,918 @@ enum TExplicitMemoryScopeType
MEMORY_SCOPE_ALL_SVM_DEVICES
};
-extern bool gHost; // temporary flag for testing native host threads (test verification)
+extern bool
+ gHost; // temporary flag for testing native host threads (test verification)
extern bool gOldAPI; // temporary flag for testing with old API (OpenCL 1.2)
extern bool gContinueOnError; // execute all cases even when errors detected
-extern bool gNoGlobalVariables; // disable cases with global atomics in program scope
+extern bool
+ gNoGlobalVariables; // disable cases with global atomics in program scope
extern bool gNoGenericAddressSpace; // disable cases with generic address space
extern bool gUseHostPtr; // use malloc/free instead of clSVMAlloc/clSVMFree
extern bool gDebug; // print OpenCL kernel code
-extern int gInternalIterations; // internal test iterations for atomic operation, sufficient to verify atomicity
-extern int gMaxDeviceThreads; // maximum number of threads executed on OCL device
+extern int gInternalIterations; // internal test iterations for atomic
+ // operation, sufficient to verify atomicity
+extern int
+ gMaxDeviceThreads; // maximum number of threads executed on OCL device
extern cl_device_atomic_capabilities gAtomicMemCap,
gAtomicFenceCap; // atomic memory and fence capabilities for this device
-extern const char *get_memory_order_type_name(TExplicitMemoryOrderType orderType);
-extern const char *get_memory_scope_type_name(TExplicitMemoryScopeType scopeType);
+extern const char *
+get_memory_order_type_name(TExplicitMemoryOrderType orderType);
+extern const char *
+get_memory_scope_type_name(TExplicitMemoryScopeType scopeType);
extern cl_int getSupportedMemoryOrdersAndScopes(
cl_device_id device, std::vector<TExplicitMemoryOrderType> &memoryOrders,
std::vector<TExplicitMemoryScopeType> &memoryScopes);
-class AtomicTypeInfo
-{
+class AtomicTypeInfo {
public:
- TExplicitAtomicType _type;
- AtomicTypeInfo(TExplicitAtomicType type): _type(type) {}
- cl_uint Size(cl_device_id device);
- const char* AtomicTypeName();
- const char* RegularTypeName();
- const char* AddSubOperandTypeName();
- int IsSupported(cl_device_id device);
+ TExplicitAtomicType _type;
+ AtomicTypeInfo(TExplicitAtomicType type): _type(type) {}
+ cl_uint Size(cl_device_id device);
+ const char *AtomicTypeName();
+ const char *RegularTypeName();
+ const char *AddSubOperandTypeName();
+ int IsSupported(cl_device_id device);
};
-template<typename HostDataType>
-class AtomicTypeExtendedInfo : public AtomicTypeInfo
-{
+template <typename HostDataType>
+class AtomicTypeExtendedInfo : public AtomicTypeInfo {
public:
- AtomicTypeExtendedInfo(TExplicitAtomicType type) : AtomicTypeInfo(type) {}
- HostDataType MinValue();
- HostDataType MaxValue();
- HostDataType SpecialValue(cl_uchar x)
- {
- HostDataType tmp;
- cl_uchar *ptr = (cl_uchar*)&tmp;
- for(cl_uint i = 0; i < sizeof(HostDataType)/sizeof(cl_uchar); i++)
- ptr[i] = x;
- return tmp;
- }
- HostDataType SpecialValue(cl_ushort x)
- {
- HostDataType tmp;
- cl_ushort *ptr = (cl_ushort*)&tmp;
- for(cl_uint i = 0; i < sizeof(HostDataType)/sizeof(cl_ushort); i++)
- ptr[i] = x;
- return tmp;
- }
+ AtomicTypeExtendedInfo(TExplicitAtomicType type): AtomicTypeInfo(type) {}
+ HostDataType MinValue();
+ HostDataType MaxValue();
+ HostDataType SpecialValue(cl_uchar x)
+ {
+ HostDataType tmp;
+ cl_uchar *ptr = (cl_uchar *)&tmp;
+ for (cl_uint i = 0; i < sizeof(HostDataType) / sizeof(cl_uchar); i++)
+ ptr[i] = x;
+ return tmp;
+ }
+ HostDataType SpecialValue(cl_ushort x)
+ {
+ HostDataType tmp;
+ cl_ushort *ptr = (cl_ushort *)&tmp;
+ for (cl_uint i = 0; i < sizeof(HostDataType) / sizeof(cl_ushort); i++)
+ ptr[i] = x;
+ return tmp;
+ }
};
-class CTest {
+class CTest {
public:
- virtual int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) = 0;
+ virtual int Execute(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements) = 0;
};
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTest : CTest
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTest : CTest {
public:
- typedef struct {
- CBasicTest *test;
- cl_uint tid;
- cl_uint threadCount;
- volatile HostAtomicType *destMemory;
- HostDataType *oldValues;
- } THostThreadContext;
- static cl_int HostThreadFunction(cl_uint job_id, cl_uint thread_id, void *userInfo)
- {
- THostThreadContext *threadContext = ((THostThreadContext*)userInfo)+job_id;
- threadContext->test->HostFunction(threadContext->tid, threadContext->threadCount, threadContext->destMemory, threadContext->oldValues);
- return 0;
- }
- CBasicTest(TExplicitAtomicType dataType, bool useSVM) : CTest(),
- _maxDeviceThreads(MAX_DEVICE_THREADS),
- _dataType(dataType), _useSVM(useSVM), _startValue(255),
- _localMemory(false), _declaredInProgram(false),
- _usedInFunction(false), _genericAddrSpace(false),
- _oldValueCheck(true), _localRefValues(false),
- _maxGroupSize(0), _passCount(0), _iterations(gInternalIterations)
- {
- }
- virtual ~CBasicTest()
- {
- if(_passCount)
- log_info(" %u tests executed successfully for %s\n", _passCount, DataType().AtomicTypeName());
- }
- virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
- {
- return 1;
- }
- virtual cl_uint NumNonAtomicVariablesPerThread()
- {
- return 1;
- }
- virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
- {
- return false;
- }
- virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
- {
- return false;
- }
- virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
- {
- return false;
- }
- virtual std::string PragmaHeader(cl_device_id deviceID);
- virtual std::string ProgramHeader(cl_uint maxNumDestItems);
- virtual std::string FunctionCode();
- virtual std::string KernelCode(cl_uint maxNumDestItems);
- virtual std::string ProgramCore() = 0;
- virtual std::string SingleTestName()
- {
- std::string testName = LocalMemory() ? "local" : "global";
- testName += " ";
- testName += DataType().AtomicTypeName();
- if(DeclaredInProgram())
- {
- testName += " declared in program";
- }
- if(DeclaredInProgram() && UsedInFunction())
- testName += ",";
- if(UsedInFunction())
- {
- testName += " used in ";
- if(GenericAddrSpace())
- testName += "generic ";
- testName += "function";
- }
- return testName;
- }
- virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue);
- int ExecuteForEachPointerType(cl_device_id deviceID, cl_context context, cl_command_queue queue)
- {
- int error = 0;
- UsedInFunction(false);
- EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue));
- UsedInFunction(true);
- GenericAddrSpace(false);
- EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue));
- GenericAddrSpace(true);
- EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue));
- GenericAddrSpace(false);
- return error;
- }
- int ExecuteForEachDeclarationType(cl_device_id deviceID, cl_context context, cl_command_queue queue)
- {
- int error = 0;
- DeclaredInProgram(false);
- EXECUTE_TEST(error, ExecuteForEachPointerType(deviceID, context, queue));
- if(!UseSVM())
- {
- DeclaredInProgram(true);
- EXECUTE_TEST(error, ExecuteForEachPointerType(deviceID, context, queue));
- }
- return error;
- }
- virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
- {
- int error = 0;
- if(_maxDeviceThreads > 0 && !UseSVM())
- {
- LocalMemory(true);
- EXECUTE_TEST(error, ExecuteForEachDeclarationType(deviceID, context, queue));
- }
- if(_maxDeviceThreads+MaxHostThreads() > 0)
- {
- LocalMemory(false);
- EXECUTE_TEST(error, ExecuteForEachDeclarationType(deviceID, context, queue));
- }
- return error;
- }
- virtual int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
- {
- if(sizeof(HostAtomicType) != DataType().Size(deviceID))
- {
- log_info("Invalid test: Host atomic type size (%u) is different than OpenCL type size (%u)\n", (cl_uint)sizeof(HostAtomicType), DataType().Size(deviceID));
- return -1;
- }
- if(sizeof(HostAtomicType) != sizeof(HostDataType))
- {
- log_info("Invalid test: Host atomic type size (%u) is different than corresponding type size (%u)\n", (cl_uint)sizeof(HostAtomicType), (cl_uint)sizeof(HostDataType));
- return -1;
- }
- // Verify we can run first
- if(UseSVM() && !gUseHostPtr)
- {
- cl_device_svm_capabilities caps;
- cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(caps), &caps, 0);
- test_error(error, "clGetDeviceInfo failed");
- if((caps & CL_DEVICE_SVM_ATOMICS) == 0)
- {
- log_info("\t%s - SVM_ATOMICS not supported\n", DataType().AtomicTypeName());
- // implicit pass
+ typedef struct
+ {
+ CBasicTest *test;
+ cl_uint tid;
+ cl_uint threadCount;
+ volatile HostAtomicType *destMemory;
+ HostDataType *oldValues;
+ } THostThreadContext;
+ static cl_int HostThreadFunction(cl_uint job_id, cl_uint thread_id,
+ void *userInfo)
+ {
+ THostThreadContext *threadContext =
+ ((THostThreadContext *)userInfo) + job_id;
+ threadContext->test->HostFunction(
+ threadContext->tid, threadContext->threadCount,
+ threadContext->destMemory, threadContext->oldValues);
return 0;
- }
}
- if(!DataType().IsSupported(deviceID))
+ CBasicTest(TExplicitAtomicType dataType, bool useSVM)
+ : CTest(), _maxDeviceThreads(MAX_DEVICE_THREADS), _dataType(dataType),
+ _useSVM(useSVM), _startValue(255), _localMemory(false),
+ _declaredInProgram(false), _usedInFunction(false),
+ _genericAddrSpace(false), _oldValueCheck(true),
+ _localRefValues(false), _maxGroupSize(0), _passCount(0),
+ _iterations(gInternalIterations)
+ {}
+ virtual ~CBasicTest()
+ {
+ if (_passCount)
+ log_info(" %u tests executed successfully for %s\n", _passCount,
+ DataType().AtomicTypeName());
+ }
+ virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+ {
+ return 1;
+ }
+ virtual cl_uint NumNonAtomicVariablesPerThread() { return 1; }
+ virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+ HostDataType *startRefValues,
+ cl_uint whichDestValue)
+ {
+ return false;
+ }
+ virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+ MTdata d)
+ {
+ return false;
+ }
+ virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
+ HostDataType *refValues,
+ HostAtomicType *finalValues)
+ {
+ return false;
+ }
+ virtual std::string PragmaHeader(cl_device_id deviceID);
+ virtual std::string ProgramHeader(cl_uint maxNumDestItems);
+ virtual std::string FunctionCode();
+ virtual std::string KernelCode(cl_uint maxNumDestItems);
+ virtual std::string ProgramCore() = 0;
+ virtual std::string SingleTestName()
+ {
+ std::string testName = LocalMemory() ? "local" : "global";
+ testName += " ";
+ testName += DataType().AtomicTypeName();
+ if (DeclaredInProgram())
+ {
+ testName += " declared in program";
+ }
+ if (DeclaredInProgram() && UsedInFunction()) testName += ",";
+ if (UsedInFunction())
+ {
+ testName += " used in ";
+ if (GenericAddrSpace()) testName += "generic ";
+ testName += "function";
+ }
+ return testName;
+ }
+ virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue);
+ int ExecuteForEachPointerType(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue)
+ {
+ int error = 0;
+ UsedInFunction(false);
+ EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue));
+ UsedInFunction(true);
+ GenericAddrSpace(false);
+ EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue));
+ GenericAddrSpace(true);
+ EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue));
+ GenericAddrSpace(false);
+ return error;
+ }
+ int ExecuteForEachDeclarationType(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue)
+ {
+ int error = 0;
+ DeclaredInProgram(false);
+ EXECUTE_TEST(error,
+ ExecuteForEachPointerType(deviceID, context, queue));
+ if (!UseSVM())
+ {
+ DeclaredInProgram(true);
+ EXECUTE_TEST(error,
+ ExecuteForEachPointerType(deviceID, context, queue));
+ }
+ return error;
+ }
+ virtual int ExecuteForEachParameterSet(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue)
+ {
+ int error = 0;
+ if (_maxDeviceThreads > 0 && !UseSVM())
+ {
+ LocalMemory(true);
+ EXECUTE_TEST(
+ error, ExecuteForEachDeclarationType(deviceID, context, queue));
+ }
+ if (_maxDeviceThreads + MaxHostThreads() > 0)
+ {
+ LocalMemory(false);
+ EXECUTE_TEST(
+ error, ExecuteForEachDeclarationType(deviceID, context, queue));
+ }
+ return error;
+ }
+ virtual int Execute(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
+ {
+ if (sizeof(HostAtomicType) != DataType().Size(deviceID))
+ {
+ log_info("Invalid test: Host atomic type size (%u) is different "
+ "than OpenCL type size (%u)\n",
+ (cl_uint)sizeof(HostAtomicType),
+ DataType().Size(deviceID));
+ return -1;
+ }
+ if (sizeof(HostAtomicType) != sizeof(HostDataType))
+ {
+ log_info("Invalid test: Host atomic type size (%u) is different "
+ "than corresponding type size (%u)\n",
+ (cl_uint)sizeof(HostAtomicType),
+ (cl_uint)sizeof(HostDataType));
+ return -1;
+ }
+ // Verify we can run first
+ if (UseSVM() && !gUseHostPtr)
+ {
+ cl_device_svm_capabilities caps;
+ cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES,
+ sizeof(caps), &caps, 0);
+ test_error(error, "clGetDeviceInfo failed");
+ if ((caps & CL_DEVICE_SVM_ATOMICS) == 0)
+ {
+ log_info("\t%s - SVM_ATOMICS not supported\n",
+ DataType().AtomicTypeName());
+ // implicit pass
+ return 0;
+ }
+ }
+ if (!DataType().IsSupported(deviceID))
+ {
+ log_info("\t%s not supported\n", DataType().AtomicTypeName());
+ // implicit pass or host test (debug feature)
+ if (UseSVM()) return 0;
+ _maxDeviceThreads = 0;
+ }
+ if (_maxDeviceThreads + MaxHostThreads() == 0) return 0;
+ return ExecuteForEachParameterSet(deviceID, context, queue);
+ }
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ log_info("Empty thread function %u\n", (cl_uint)tid);
+ }
+ AtomicTypeExtendedInfo<HostDataType> DataType() const
+ {
+ return AtomicTypeExtendedInfo<HostDataType>(_dataType);
+ }
+ cl_uint _maxDeviceThreads;
+ virtual cl_uint MaxHostThreads()
+ {
+ if (UseSVM() || gHost)
+ return MAX_HOST_THREADS;
+ else
+ return 0;
+ }
+
+ int CheckCapabilities(TExplicitMemoryScopeType memoryScope,
+ TExplicitMemoryOrderType memoryOrder)
{
- log_info("\t%s not supported\n", DataType().AtomicTypeName());
- // implicit pass or host test (debug feature)
- if(UseSVM())
+ /*
+ Differentiation between atomic fence and other atomic operations
+ does not need to occur here.
+
+ The initialisation of this test checks that the minimum required
+ capabilities are supported by this device.
+
+ The following switches allow the test to skip if optional
+ capabilites are not supported by the device.
+ */
+ switch (memoryScope)
+ {
+ case MEMORY_SCOPE_EMPTY: {
+ break;
+ }
+ case MEMORY_SCOPE_WORK_GROUP: {
+ if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) == 0)
+ {
+ return TEST_SKIPPED_ITSELF;
+ }
+ break;
+ }
+ case MEMORY_SCOPE_DEVICE: {
+ if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0)
+ {
+ return TEST_SKIPPED_ITSELF;
+ }
+ break;
+ }
+ case MEMORY_SCOPE_ALL_DEVICES: // fallthough
+ case MEMORY_SCOPE_ALL_SVM_DEVICES: {
+ if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) == 0)
+ {
+ return TEST_SKIPPED_ITSELF;
+ }
+ break;
+ }
+ default: {
+ log_info("Invalid memory scope\n");
+ break;
+ }
+ }
+
+ switch (memoryOrder)
+ {
+ case MEMORY_ORDER_EMPTY: {
+ break;
+ }
+ case MEMORY_ORDER_RELAXED: {
+ if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_RELAXED) == 0)
+ {
+ return TEST_SKIPPED_ITSELF;
+ }
+ break;
+ }
+ case MEMORY_ORDER_ACQUIRE:
+ case MEMORY_ORDER_RELEASE:
+ case MEMORY_ORDER_ACQ_REL: {
+ if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0)
+ {
+ return TEST_SKIPPED_ITSELF;
+ }
+ break;
+ }
+ case MEMORY_ORDER_SEQ_CST: {
+ if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) == 0)
+ {
+ return TEST_SKIPPED_ITSELF;
+ }
+ break;
+ }
+ default: {
+ log_info("Invalid memory order\n");
+ break;
+ }
+ }
+
return 0;
- _maxDeviceThreads = 0;
- }
- if(_maxDeviceThreads+MaxHostThreads() == 0)
- return 0;
- return ExecuteForEachParameterSet(deviceID, context, queue);
- }
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- log_info("Empty thread function %u\n", (cl_uint)tid);
- }
- AtomicTypeExtendedInfo<HostDataType> DataType() const
- {
- return AtomicTypeExtendedInfo<HostDataType>(_dataType);
- }
- cl_uint _maxDeviceThreads;
- virtual cl_uint MaxHostThreads()
- {
- if(UseSVM() || gHost)
- return MAX_HOST_THREADS;
- else
- return 0;
- }
-
- int CheckCapabilities(TExplicitMemoryScopeType memoryScope,
- TExplicitMemoryOrderType memoryOrder)
- {
- /*
- Differentiation between atomic fence and other atomic operations
- does not need to occur here.
-
- The initialisation of this test checks that the minimum required
- capabilities are supported by this device.
-
- The following switches allow the test to skip if optional capabilites
- are not supported by the device.
- */
- switch (memoryScope)
- {
- case MEMORY_SCOPE_EMPTY: {
- break;
- }
- case MEMORY_SCOPE_WORK_GROUP: {
- if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) == 0)
- {
- return TEST_SKIPPED_ITSELF;
- }
- break;
- }
- case MEMORY_SCOPE_DEVICE: {
- if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0)
- {
- return TEST_SKIPPED_ITSELF;
- }
- break;
- }
- case MEMORY_SCOPE_ALL_DEVICES: // fallthough
- case MEMORY_SCOPE_ALL_SVM_DEVICES: {
- if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) == 0)
- {
- return TEST_SKIPPED_ITSELF;
- }
- break;
- }
- default: {
- log_info("Invalid memory scope\n");
- break;
- }
- }
-
- switch (memoryOrder)
- {
- case MEMORY_ORDER_EMPTY: {
- break;
- }
- case MEMORY_ORDER_RELAXED: {
- if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_RELAXED) == 0)
- {
- return TEST_SKIPPED_ITSELF;
- }
- break;
- }
- case MEMORY_ORDER_ACQUIRE:
- case MEMORY_ORDER_RELEASE:
- case MEMORY_ORDER_ACQ_REL: {
- if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0)
- {
- return TEST_SKIPPED_ITSELF;
- }
- break;
- }
- case MEMORY_ORDER_SEQ_CST: {
- if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) == 0)
- {
- return TEST_SKIPPED_ITSELF;
- }
- break;
- }
- default: {
- log_info("Invalid memory order\n");
- break;
- }
- }
-
- return 0;
- }
- virtual bool SVMDataBufferAllSVMConsistent() {return false;}
- bool UseSVM() {return _useSVM;}
- void StartValue(HostDataType startValue) {_startValue = startValue;}
- HostDataType StartValue() {return _startValue;}
- void LocalMemory(bool local) {_localMemory = local;}
- bool LocalMemory() {return _localMemory;}
- void DeclaredInProgram(bool declaredInProgram) {_declaredInProgram = declaredInProgram;}
- bool DeclaredInProgram() {return _declaredInProgram;}
- void UsedInFunction(bool local) {_usedInFunction = local;}
- bool UsedInFunction() {return _usedInFunction;}
- void GenericAddrSpace(bool genericAddrSpace) {_genericAddrSpace = genericAddrSpace;}
- bool GenericAddrSpace() {return _genericAddrSpace;}
- void OldValueCheck(bool check) {_oldValueCheck = check;}
- bool OldValueCheck() {return _oldValueCheck;}
- void LocalRefValues(bool localRefValues) {_localRefValues = localRefValues;}
- bool LocalRefValues() {return _localRefValues;}
- void MaxGroupSize(cl_uint maxGroupSize) {_maxGroupSize = maxGroupSize;}
- cl_uint MaxGroupSize() {return _maxGroupSize;}
- void CurrentGroupSize(cl_uint currentGroupSize)
- {
- if(MaxGroupSize() && MaxGroupSize() < currentGroupSize)
- _currentGroupSize = MaxGroupSize();
- else
- _currentGroupSize = currentGroupSize;
- }
- cl_uint CurrentGroupSize() {return _currentGroupSize;}
- virtual cl_uint CurrentGroupNum(cl_uint threadCount)
- {
- if(threadCount == 0)
- return 0;
- if(LocalMemory())
- return 1;
- return threadCount/CurrentGroupSize();
- }
- cl_int Iterations() {return _iterations;}
- std::string IterationsStr() {std::stringstream ss; ss << _iterations; return ss.str();}
+ }
+ virtual bool SVMDataBufferAllSVMConsistent() { return false; }
+ bool UseSVM() { return _useSVM; }
+ void StartValue(HostDataType startValue) { _startValue = startValue; }
+ HostDataType StartValue() { return _startValue; }
+ void LocalMemory(bool local) { _localMemory = local; }
+ bool LocalMemory() { return _localMemory; }
+ void DeclaredInProgram(bool declaredInProgram)
+ {
+ _declaredInProgram = declaredInProgram;
+ }
+ bool DeclaredInProgram() { return _declaredInProgram; }
+ void UsedInFunction(bool local) { _usedInFunction = local; }
+ bool UsedInFunction() { return _usedInFunction; }
+ void GenericAddrSpace(bool genericAddrSpace)
+ {
+ _genericAddrSpace = genericAddrSpace;
+ }
+ bool GenericAddrSpace() { return _genericAddrSpace; }
+ void OldValueCheck(bool check) { _oldValueCheck = check; }
+ bool OldValueCheck() { return _oldValueCheck; }
+ void LocalRefValues(bool localRefValues)
+ {
+ _localRefValues = localRefValues;
+ }
+ bool LocalRefValues() { return _localRefValues; }
+ void MaxGroupSize(cl_uint maxGroupSize) { _maxGroupSize = maxGroupSize; }
+ cl_uint MaxGroupSize() { return _maxGroupSize; }
+ void CurrentGroupSize(cl_uint currentGroupSize)
+ {
+ if (MaxGroupSize() && MaxGroupSize() < currentGroupSize)
+ _currentGroupSize = MaxGroupSize();
+ else
+ _currentGroupSize = currentGroupSize;
+ }
+ cl_uint CurrentGroupSize() { return _currentGroupSize; }
+ virtual cl_uint CurrentGroupNum(cl_uint threadCount)
+ {
+ if (threadCount == 0) return 0;
+ if (LocalMemory()) return 1;
+ return threadCount / CurrentGroupSize();
+ }
+ cl_int Iterations() { return _iterations; }
+ std::string IterationsStr()
+ {
+ std::stringstream ss;
+ ss << _iterations;
+ return ss.str();
+ }
+
private:
- const TExplicitAtomicType _dataType;
- const bool _useSVM;
- HostDataType _startValue;
- bool _localMemory;
- bool _declaredInProgram;
- bool _usedInFunction;
- bool _genericAddrSpace;
- bool _oldValueCheck;
- bool _localRefValues;
- cl_uint _maxGroupSize;
- cl_uint _currentGroupSize;
- cl_uint _passCount;
- const cl_int _iterations;
+ const TExplicitAtomicType _dataType;
+ const bool _useSVM;
+ HostDataType _startValue;
+ bool _localMemory;
+ bool _declaredInProgram;
+ bool _usedInFunction;
+ bool _genericAddrSpace;
+ bool _oldValueCheck;
+ bool _localRefValues;
+ cl_uint _maxGroupSize;
+ cl_uint _currentGroupSize;
+ cl_uint _passCount;
+ const cl_int _iterations;
};
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestMemOrderScope : public CBasicTest<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestMemOrderScope
+ : public CBasicTest<HostAtomicType, HostDataType> {
public:
- using CBasicTest<HostAtomicType, HostDataType>::LocalMemory;
- using CBasicTest<HostAtomicType, HostDataType>::MaxGroupSize;
- using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
- CBasicTestMemOrderScope(TExplicitAtomicType dataType, bool useSVM = false) : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- }
- virtual std::string ProgramHeader(cl_uint maxNumDestItems)
- {
- std::string header;
- if(gOldAPI)
- {
- std::string s = MemoryScope() == MEMORY_SCOPE_EMPTY ? "" : ",s";
- header +=
- "#define atomic_store_explicit(x,y,o"+s+") atomic_store(x,y)\n"
- "#define atomic_load_explicit(x,o"+s+") atomic_load(x)\n"
- "#define atomic_exchange_explicit(x,y,o"+s+") atomic_exchange(x,y)\n"
- "#define atomic_compare_exchange_strong_explicit(x,y,z,os,of"+s+") atomic_compare_exchange_strong(x,y,z)\n"
- "#define atomic_compare_exchange_weak_explicit(x,y,z,os,of"+s+") atomic_compare_exchange_weak(x,y,z)\n"
- "#define atomic_fetch_add_explicit(x,y,o"+s+") atomic_fetch_add(x,y)\n"
- "#define atomic_fetch_sub_explicit(x,y,o"+s+") atomic_fetch_sub(x,y)\n"
- "#define atomic_fetch_or_explicit(x,y,o"+s+") atomic_fetch_or(x,y)\n"
- "#define atomic_fetch_xor_explicit(x,y,o"+s+") atomic_fetch_xor(x,y)\n"
- "#define atomic_fetch_and_explicit(x,y,o"+s+") atomic_fetch_and(x,y)\n"
- "#define atomic_fetch_min_explicit(x,y,o"+s+") atomic_fetch_min(x,y)\n"
- "#define atomic_fetch_max_explicit(x,y,o"+s+") atomic_fetch_max(x,y)\n"
- "#define atomic_flag_test_and_set_explicit(x,o"+s+") atomic_flag_test_and_set(x)\n"
- "#define atomic_flag_clear_explicit(x,o"+s+") atomic_flag_clear(x)\n";
- }
- return header+CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(maxNumDestItems);
- }
- virtual std::string SingleTestName()
- {
- std::string testName = CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
- if(MemoryOrder() != MEMORY_ORDER_EMPTY)
- {
- testName += std::string(", ")+std::string(get_memory_order_type_name(MemoryOrder())).substr(sizeof("memory"));
- }
- if(MemoryScope() != MEMORY_SCOPE_EMPTY)
- {
- testName += std::string(", ")+std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory"));
- }
- return testName;
- }
- virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
- {
- if(LocalMemory() &&
- MemoryScope() != MEMORY_SCOPE_EMPTY &&
- MemoryScope() != MEMORY_SCOPE_WORK_GROUP) //memory scope should only be used for global memory
- return 0;
- if(MemoryScope() == MEMORY_SCOPE_DEVICE)
- MaxGroupSize(16); // increase number of groups by forcing smaller group size
- else
- MaxGroupSize(0); // group size limited by device capabilities
-
- if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
- return 0; // skip test - not applicable
-
- return CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
- }
- virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
- {
- // repeat test for each reasonable memory order/scope combination
- std::vector<TExplicitMemoryOrderType> memoryOrder;
- std::vector<TExplicitMemoryScopeType> memoryScope;
- int error = 0;
-
- // For OpenCL-3.0 and later some orderings and scopes are optional, so here
- // we query for the supported ones.
- test_error_ret(
- getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, memoryScope),
- "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL);
-
- for(unsigned oi = 0; oi < memoryOrder.size(); oi++)
- {
- for(unsigned si = 0; si < memoryScope.size(); si++)
- {
- if(memoryOrder[oi] == MEMORY_ORDER_EMPTY && memoryScope[si] != MEMORY_SCOPE_EMPTY)
- continue;
- MemoryOrder(memoryOrder[oi]);
- MemoryScope(memoryScope[si]);
- EXECUTE_TEST(error, (CBasicTest<HostAtomicType, HostDataType>::ExecuteForEachParameterSet(deviceID, context, queue)));
- }
- }
- return error;
- }
- void MemoryOrder(TExplicitMemoryOrderType memoryOrder) {_memoryOrder = memoryOrder;}
- TExplicitMemoryOrderType MemoryOrder() {return _memoryOrder;}
- std::string MemoryOrderStr()
- {
- if(MemoryOrder() != MEMORY_ORDER_EMPTY)
- return std::string(", ")+get_memory_order_type_name(MemoryOrder());
- return "";
- }
- void MemoryScope(TExplicitMemoryScopeType memoryScope) {_memoryScope = memoryScope;}
- TExplicitMemoryScopeType MemoryScope() {return _memoryScope;}
- std::string MemoryScopeStr()
- {
- if(MemoryScope() != MEMORY_SCOPE_EMPTY)
- return std::string(", ")+get_memory_scope_type_name(MemoryScope());
- return "";
- }
- std::string MemoryOrderScopeStr()
- {
- return MemoryOrderStr()+MemoryScopeStr();
- }
- virtual cl_uint CurrentGroupNum(cl_uint threadCount)
- {
- if(MemoryScope() == MEMORY_SCOPE_WORK_GROUP)
- return 1;
- return CBasicTest<HostAtomicType, HostDataType>::CurrentGroupNum(threadCount);
- }
- virtual cl_uint MaxHostThreads()
- {
- // block host threads execution for memory scope different than
- // memory_scope_all_svm_devices
- if (MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
- || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES || gHost)
- {
- return CBasicTest<HostAtomicType, HostDataType>::MaxHostThreads();
- }
- else
- {
- return 0;
- }
- }
+ using CBasicTest<HostAtomicType, HostDataType>::LocalMemory;
+ using CBasicTest<HostAtomicType, HostDataType>::MaxGroupSize;
+ using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
+ CBasicTestMemOrderScope(TExplicitAtomicType dataType, bool useSVM = false)
+ : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
+ {}
+ virtual std::string ProgramHeader(cl_uint maxNumDestItems)
+ {
+ std::string header;
+ if (gOldAPI)
+ {
+ std::string s = MemoryScope() == MEMORY_SCOPE_EMPTY ? "" : ",s";
+ header += "#define atomic_store_explicit(x,y,o" + s
+ + ") atomic_store(x,y)\n"
+ "#define atomic_load_explicit(x,o"
+ + s
+ + ") atomic_load(x)\n"
+ "#define atomic_exchange_explicit(x,y,o"
+ + s
+ + ") atomic_exchange(x,y)\n"
+ "#define atomic_compare_exchange_strong_explicit(x,y,z,os,of"
+ + s
+ + ") atomic_compare_exchange_strong(x,y,z)\n"
+ "#define atomic_compare_exchange_weak_explicit(x,y,z,os,of"
+ + s
+ + ") atomic_compare_exchange_weak(x,y,z)\n"
+ "#define atomic_fetch_add_explicit(x,y,o"
+ + s
+ + ") atomic_fetch_add(x,y)\n"
+ "#define atomic_fetch_sub_explicit(x,y,o"
+ + s
+ + ") atomic_fetch_sub(x,y)\n"
+ "#define atomic_fetch_or_explicit(x,y,o"
+ + s
+ + ") atomic_fetch_or(x,y)\n"
+ "#define atomic_fetch_xor_explicit(x,y,o"
+ + s
+ + ") atomic_fetch_xor(x,y)\n"
+ "#define atomic_fetch_and_explicit(x,y,o"
+ + s
+ + ") atomic_fetch_and(x,y)\n"
+ "#define atomic_fetch_min_explicit(x,y,o"
+ + s
+ + ") atomic_fetch_min(x,y)\n"
+ "#define atomic_fetch_max_explicit(x,y,o"
+ + s
+ + ") atomic_fetch_max(x,y)\n"
+ "#define atomic_flag_test_and_set_explicit(x,o"
+ + s
+ + ") atomic_flag_test_and_set(x)\n"
+ "#define atomic_flag_clear_explicit(x,o"
+ + s + ") atomic_flag_clear(x)\n";
+ }
+ return header
+ + CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(
+ maxNumDestItems);
+ }
+ virtual std::string SingleTestName()
+ {
+ std::string testName =
+ CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
+ if (MemoryOrder() != MEMORY_ORDER_EMPTY)
+ {
+ testName += std::string(", ")
+ + std::string(get_memory_order_type_name(MemoryOrder()))
+ .substr(sizeof("memory"));
+ }
+ if (MemoryScope() != MEMORY_SCOPE_EMPTY)
+ {
+ testName += std::string(", ")
+ + std::string(get_memory_scope_type_name(MemoryScope()))
+ .substr(sizeof("memory"));
+ }
+ return testName;
+ }
+ virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue)
+ {
+ if (LocalMemory() && MemoryScope() != MEMORY_SCOPE_EMPTY
+ && MemoryScope()
+ != MEMORY_SCOPE_WORK_GROUP) // memory scope should only be used
+ // for global memory
+ return 0;
+ if (MemoryScope() == MEMORY_SCOPE_DEVICE)
+ MaxGroupSize(
+ 16); // increase number of groups by forcing smaller group size
+ else
+ MaxGroupSize(0); // group size limited by device capabilities
+
+ if (CheckCapabilities(MemoryScope(), MemoryOrder())
+ == TEST_SKIPPED_ITSELF)
+ return 0; // skip test - not applicable
+
+ return CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
+ deviceID, context, queue);
+ }
+ virtual int ExecuteForEachParameterSet(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue)
+ {
+ // repeat test for each reasonable memory order/scope combination
+ std::vector<TExplicitMemoryOrderType> memoryOrder;
+ std::vector<TExplicitMemoryScopeType> memoryScope;
+ int error = 0;
+
+ // For OpenCL-3.0 and later some orderings and scopes are optional, so
+ // here we query for the supported ones.
+ test_error_ret(getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder,
+ memoryScope),
+ "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL);
+
+ for (unsigned oi = 0; oi < memoryOrder.size(); oi++)
+ {
+ for (unsigned si = 0; si < memoryScope.size(); si++)
+ {
+ if (memoryOrder[oi] == MEMORY_ORDER_EMPTY
+ && memoryScope[si] != MEMORY_SCOPE_EMPTY)
+ continue;
+ MemoryOrder(memoryOrder[oi]);
+ MemoryScope(memoryScope[si]);
+ EXECUTE_TEST(
+ error,
+ (CBasicTest<HostAtomicType, HostDataType>::
+ ExecuteForEachParameterSet(deviceID, context, queue)));
+ }
+ }
+ return error;
+ }
+ void MemoryOrder(TExplicitMemoryOrderType memoryOrder)
+ {
+ _memoryOrder = memoryOrder;
+ }
+ TExplicitMemoryOrderType MemoryOrder() { return _memoryOrder; }
+ std::string MemoryOrderStr()
+ {
+ if (MemoryOrder() != MEMORY_ORDER_EMPTY)
+ return std::string(", ")
+ + get_memory_order_type_name(MemoryOrder());
+ return "";
+ }
+ void MemoryScope(TExplicitMemoryScopeType memoryScope)
+ {
+ _memoryScope = memoryScope;
+ }
+ TExplicitMemoryScopeType MemoryScope() { return _memoryScope; }
+ std::string MemoryScopeStr()
+ {
+ if (MemoryScope() != MEMORY_SCOPE_EMPTY)
+ return std::string(", ")
+ + get_memory_scope_type_name(MemoryScope());
+ return "";
+ }
+ std::string MemoryOrderScopeStr()
+ {
+ return MemoryOrderStr() + MemoryScopeStr();
+ }
+ virtual cl_uint CurrentGroupNum(cl_uint threadCount)
+ {
+ if (MemoryScope() == MEMORY_SCOPE_WORK_GROUP) return 1;
+ return CBasicTest<HostAtomicType, HostDataType>::CurrentGroupNum(
+ threadCount);
+ }
+ virtual cl_uint MaxHostThreads()
+ {
+ // block host threads execution for memory scope different than
+ // memory_scope_all_svm_devices
+ if (MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
+ || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES || gHost)
+ {
+ return CBasicTest<HostAtomicType, HostDataType>::MaxHostThreads();
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
private:
- TExplicitMemoryOrderType _memoryOrder;
- TExplicitMemoryScopeType _memoryScope;
+ TExplicitMemoryOrderType _memoryOrder;
+ TExplicitMemoryScopeType _memoryScope;
};
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestMemOrder2Scope : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestMemOrder2Scope
+ : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderStr;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
- using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
-
- CBasicTestMemOrder2Scope(TExplicitAtomicType dataType, bool useSVM = false) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- }
- virtual std::string SingleTestName()
- {
- std::string testName = CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
- if(MemoryOrder() != MEMORY_ORDER_EMPTY)
- testName += std::string(", ")+std::string(get_memory_order_type_name(MemoryOrder())).substr(sizeof("memory"));
- if(MemoryOrder2() != MEMORY_ORDER_EMPTY)
- testName += std::string(", ")+std::string(get_memory_order_type_name(MemoryOrder2())).substr(sizeof("memory"));
- if(MemoryScope() != MEMORY_SCOPE_EMPTY)
- testName += std::string(", ")+std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory"));
- return testName;
- }
- virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
- {
- // repeat test for each reasonable memory order/scope combination
- std::vector<TExplicitMemoryOrderType> memoryOrder;
- std::vector<TExplicitMemoryScopeType> memoryScope;
- int error = 0;
-
- // For OpenCL-3.0 and later some orderings and scopes are optional, so here
- // we query for the supported ones.
- test_error_ret(
- getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, memoryScope),
- "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL);
-
- for(unsigned oi = 0; oi < memoryOrder.size(); oi++)
- {
- for(unsigned o2i = 0; o2i < memoryOrder.size(); o2i++)
- {
- for(unsigned si = 0; si < memoryScope.size(); si++)
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderStr;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
+ using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
+
+ CBasicTestMemOrder2Scope(TExplicitAtomicType dataType, bool useSVM = false)
+ : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+ useSVM)
+ {}
+ virtual std::string SingleTestName()
+ {
+ std::string testName =
+ CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
+ if (MemoryOrder() != MEMORY_ORDER_EMPTY)
+ testName += std::string(", ")
+ + std::string(get_memory_order_type_name(MemoryOrder()))
+ .substr(sizeof("memory"));
+ if (MemoryOrder2() != MEMORY_ORDER_EMPTY)
+ testName += std::string(", ")
+ + std::string(get_memory_order_type_name(MemoryOrder2()))
+ .substr(sizeof("memory"));
+ if (MemoryScope() != MEMORY_SCOPE_EMPTY)
+ testName += std::string(", ")
+ + std::string(get_memory_scope_type_name(MemoryScope()))
+ .substr(sizeof("memory"));
+ return testName;
+ }
+ virtual int ExecuteForEachParameterSet(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue)
+ {
+ // repeat test for each reasonable memory order/scope combination
+ std::vector<TExplicitMemoryOrderType> memoryOrder;
+ std::vector<TExplicitMemoryScopeType> memoryScope;
+ int error = 0;
+
+ // For OpenCL-3.0 and later some orderings and scopes are optional, so
+ // here we query for the supported ones.
+ test_error_ret(getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder,
+ memoryScope),
+ "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL);
+
+ for (unsigned oi = 0; oi < memoryOrder.size(); oi++)
{
- if((memoryOrder[oi] == MEMORY_ORDER_EMPTY || memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
- && memoryOrder[oi] != memoryOrder[o2i])
- continue; // both memory order arguments must be set (or none)
- if((memoryOrder[oi] == MEMORY_ORDER_EMPTY || memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
- && memoryScope[si] != MEMORY_SCOPE_EMPTY)
- continue; // memory scope without memory order is not allowed
- MemoryOrder(memoryOrder[oi]);
- MemoryOrder2(memoryOrder[o2i]);
- MemoryScope(memoryScope[si]);
-
- if (CheckCapabilities(MemoryScope(), MemoryOrder())
- == TEST_SKIPPED_ITSELF)
- continue; // skip test - not applicable
-
- if (CheckCapabilities(MemoryScope(), MemoryOrder2())
- == TEST_SKIPPED_ITSELF)
- continue; // skip test - not applicable
-
- EXECUTE_TEST(error, (CBasicTest<HostAtomicType, HostDataType>::ExecuteForEachParameterSet(deviceID, context, queue)));
+ for (unsigned o2i = 0; o2i < memoryOrder.size(); o2i++)
+ {
+ for (unsigned si = 0; si < memoryScope.size(); si++)
+ {
+ if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY
+ || memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
+ && memoryOrder[oi] != memoryOrder[o2i])
+ continue; // both memory order arguments must be set (or
+ // none)
+ if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY
+ || memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
+ && memoryScope[si] != MEMORY_SCOPE_EMPTY)
+ continue; // memory scope without memory order is not
+ // allowed
+ MemoryOrder(memoryOrder[oi]);
+ MemoryOrder2(memoryOrder[o2i]);
+ MemoryScope(memoryScope[si]);
+
+ if (CheckCapabilities(MemoryScope(), MemoryOrder())
+ == TEST_SKIPPED_ITSELF)
+ continue; // skip test - not applicable
+
+ if (CheckCapabilities(MemoryScope(), MemoryOrder2())
+ == TEST_SKIPPED_ITSELF)
+ continue; // skip test - not applicable
+
+ EXECUTE_TEST(error,
+ (CBasicTest<HostAtomicType, HostDataType>::
+ ExecuteForEachParameterSet(
+ deviceID, context, queue)));
+ }
+ }
}
- }
- }
- return error;
- }
- void MemoryOrder2(TExplicitMemoryOrderType memoryOrderFail) {_memoryOrder2 = memoryOrderFail;}
- TExplicitMemoryOrderType MemoryOrder2() {return _memoryOrder2;}
- std::string MemoryOrderFailStr()
- {
- if(MemoryOrder2() != MEMORY_ORDER_EMPTY)
- return std::string(", ")+get_memory_order_type_name(MemoryOrder2());
- return "";
- }
- std::string MemoryOrderScope()
- {
- return MemoryOrderStr()+MemoryOrderFailStr()+MemoryScopeStr();
- }
+ return error;
+ }
+ void MemoryOrder2(TExplicitMemoryOrderType memoryOrderFail)
+ {
+ _memoryOrder2 = memoryOrderFail;
+ }
+ TExplicitMemoryOrderType MemoryOrder2() { return _memoryOrder2; }
+ std::string MemoryOrderFailStr()
+ {
+ if (MemoryOrder2() != MEMORY_ORDER_EMPTY)
+ return std::string(", ")
+ + get_memory_order_type_name(MemoryOrder2());
+ return "";
+ }
+ std::string MemoryOrderScope()
+ {
+ return MemoryOrderStr() + MemoryOrderFailStr() + MemoryScopeStr();
+ }
+
private:
- TExplicitMemoryOrderType _memoryOrder2;
+ TExplicitMemoryOrderType _memoryOrder2;
};
-template<typename HostAtomicType, typename HostDataType>
-std::string CBasicTest<HostAtomicType, HostDataType>::PragmaHeader(cl_device_id deviceID)
+template <typename HostAtomicType, typename HostDataType>
+std::string
+CBasicTest<HostAtomicType, HostDataType>::PragmaHeader(cl_device_id deviceID)
{
- std::string pragma;
-
- if(gOldAPI)
- {
- pragma += "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n";
- pragma += "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n";
- pragma += "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n";
- pragma += "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n";
- }
- // Create the pragma lines for this kernel
- if(DataType().Size(deviceID) == 8)
- {
- pragma += "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n";
- pragma += "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n";
- }
- if(_dataType == TYPE_ATOMIC_DOUBLE)
- pragma += "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
- return pragma;
+ std::string pragma;
+
+ if (gOldAPI)
+ {
+ pragma += "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : "
+ "enable\n";
+ pragma += "#pragma OPENCL EXTENSION "
+ "cl_khr_local_int32_extended_atomics : enable\n";
+ pragma += "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : "
+ "enable\n";
+ pragma += "#pragma OPENCL EXTENSION "
+ "cl_khr_global_int32_extended_atomics : enable\n";
+ }
+ // Create the pragma lines for this kernel
+ if (DataType().Size(deviceID) == 8)
+ {
+ pragma +=
+ "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n";
+ pragma +=
+ "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n";
+ }
+ if (_dataType == TYPE_ATOMIC_DOUBLE)
+ pragma += "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+ return pragma;
}
-template<typename HostAtomicType, typename HostDataType>
-std::string CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
+template <typename HostAtomicType, typename HostDataType>
+std::string
+CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
{
- // Create the program header
- std::string header;
- std::string aTypeName = DataType().AtomicTypeName();
- std::string cTypeName = DataType().RegularTypeName();
- std::string argListForKernel;
- std::string argListForFunction;
- std::string argListNoTypes;
- std::string functionPrototype;
- std::string addressSpace = LocalMemory() ? "__local " : "__global ";
-
- if(gOldAPI)
- {
- header += std::string("#define ")+aTypeName+" "+cTypeName+"\n"
- "#define atomic_store(x,y) (*(x) = y)\n"
- "#define atomic_load(x) (*(x))\n"
- "#define ATOMIC_VAR_INIT(x) (x)\n"
- "#define ATOMIC_FLAG_INIT 0\n"
- "#define atomic_init(x,y) atomic_store(x,y)\n";
- if(aTypeName == "atomic_float")
- header += "#define atomic_exchange(x,y) atomic_xchg(x,y)\n";
- else if(aTypeName == "atomic_double")
- header += "double atomic_exchange(volatile "+addressSpace+"atomic_double *x, double y)\n"
- "{\n"
- " long tmp = *(long*)&y, res;\n"
- " volatile "+addressSpace+"long *tmpA = (volatile "+addressSpace+"long)x;\n"
- " res = atom_xchg(tmpA,tmp);\n"
- " return *(double*)&res;\n"
- "}\n";
- else
- header += "#define atomic_exchange(x,y) atom_xchg(x,y)\n";
- if(aTypeName != "atomic_float" && aTypeName != "atomic_double")
- header +=
- "bool atomic_compare_exchange_strong(volatile "+addressSpace+" "+aTypeName+" *a, "+cTypeName+" *expected, "+cTypeName+" desired)\n"
- "{\n"
- " "+cTypeName+" old = atom_cmpxchg(a, *expected, desired);\n"
- " if(old == *expected)\n"
- " return true;\n"
- " *expected = old;\n"
- " return false;\n"
- "}\n"
- "#define atomic_compare_exchange_weak atomic_compare_exchange_strong\n";
- header +=
- "#define atomic_fetch_add(x,y) atom_add(x,y)\n"
- "#define atomic_fetch_sub(x,y) atom_sub(x,y)\n"
- "#define atomic_fetch_or(x,y) atom_or(x,y)\n"
- "#define atomic_fetch_xor(x,y) atom_xor(x,y)\n"
- "#define atomic_fetch_and(x,y) atom_and(x,y)\n"
- "#define atomic_fetch_min(x,y) atom_min(x,y)\n"
- "#define atomic_fetch_max(x,y) atom_max(x,y)\n"
- "#define atomic_flag_test_and_set(x) atomic_exchange(x,1)\n"
- "#define atomic_flag_clear(x) atomic_store(x,0)\n"
- "\n";
- }
- if(!LocalMemory() && DeclaredInProgram())
- {
- // additional atomic variable for results copying (last thread will do this)
- header += "__global volatile atomic_uint finishedThreads = ATOMIC_VAR_INIT(0);\n";
- // atomic variables declared in program scope - test data
- std::stringstream ss;
- ss << maxNumDestItems;
- header += std::string("__global volatile ")+aTypeName+" destMemory["+ss.str()+"] = {\n";
- ss.str("");
- ss << _startValue;
- for(cl_uint i = 0; i < maxNumDestItems; i++)
- {
- if(aTypeName == "atomic_flag")
- header += " ATOMIC_FLAG_INIT";
- else
- header += " ATOMIC_VAR_INIT("+ss.str()+")";
- if(i+1 < maxNumDestItems)
- header += ",";
- header += "\n";
- }
- header+=
- "};\n"
- "\n";
- }
- return header;
+ // Create the program header
+ std::string header;
+ std::string aTypeName = DataType().AtomicTypeName();
+ std::string cTypeName = DataType().RegularTypeName();
+ std::string argListForKernel;
+ std::string argListForFunction;
+ std::string argListNoTypes;
+ std::string functionPrototype;
+ std::string addressSpace = LocalMemory() ? "__local " : "__global ";
+
+ if (gOldAPI)
+ {
+ header += std::string("#define ") + aTypeName + " " + cTypeName
+ + "\n"
+ "#define atomic_store(x,y) (*(x) "
+ "= y)\n"
+ "#define atomic_load(x) "
+ "(*(x))\n"
+ "#define ATOMIC_VAR_INIT(x) (x)\n"
+ "#define ATOMIC_FLAG_INIT 0\n"
+ "#define atomic_init(x,y) "
+ "atomic_store(x,y)\n";
+ if (aTypeName == "atomic_float")
+ header += "#define atomic_exchange(x,y) "
+ " atomic_xchg(x,y)\n";
+ else if (aTypeName == "atomic_double")
+ header += "double atomic_exchange(volatile " + addressSpace
+ + "atomic_double *x, double y)\n"
+ "{\n"
+ " long tmp = *(long*)&y, res;\n"
+ " volatile "
+ + addressSpace + "long *tmpA = (volatile " + addressSpace
+ + "long)x;\n"
+ " res = atom_xchg(tmpA,tmp);\n"
+ " return *(double*)&res;\n"
+ "}\n";
+ else
+ header += "#define atomic_exchange(x,y) "
+ " atom_xchg(x,y)\n";
+ if (aTypeName != "atomic_float" && aTypeName != "atomic_double")
+ header += "bool atomic_compare_exchange_strong(volatile "
+ + addressSpace + " " + aTypeName + " *a, " + cTypeName
+ + " *expected, " + cTypeName
+ + " desired)\n"
+ "{\n"
+ " "
+ + cTypeName
+ + " old = atom_cmpxchg(a, *expected, desired);\n"
+ " if(old == *expected)\n"
+ " return true;\n"
+ " *expected = old;\n"
+ " return false;\n"
+ "}\n"
+ "#define atomic_compare_exchange_weak "
+ "atomic_compare_exchange_strong\n";
+ header += "#define atomic_fetch_add(x,y) "
+ "atom_add(x,y)\n"
+ "#define atomic_fetch_sub(x,y) "
+ "atom_sub(x,y)\n"
+ "#define atomic_fetch_or(x,y) "
+ "atom_or(x,y)\n"
+ "#define atomic_fetch_xor(x,y) "
+ "atom_xor(x,y)\n"
+ "#define atomic_fetch_and(x,y) "
+ "atom_and(x,y)\n"
+ "#define atomic_fetch_min(x,y) "
+ "atom_min(x,y)\n"
+ "#define atomic_fetch_max(x,y) "
+ "atom_max(x,y)\n"
+ "#define atomic_flag_test_and_set(x) "
+ "atomic_exchange(x,1)\n"
+ "#define atomic_flag_clear(x) "
+ "atomic_store(x,0)\n"
+ "\n";
+ }
+ if (!LocalMemory() && DeclaredInProgram())
+ {
+ // additional atomic variable for results copying (last thread will do
+ // this)
+ header += "__global volatile atomic_uint finishedThreads = "
+ "ATOMIC_VAR_INIT(0);\n";
+ // atomic variables declared in program scope - test data
+ std::stringstream ss;
+ ss << maxNumDestItems;
+ header += std::string("__global volatile ") + aTypeName + " destMemory["
+ + ss.str() + "] = {\n";
+ ss.str("");
+ ss << _startValue;
+ for (cl_uint i = 0; i < maxNumDestItems; i++)
+ {
+ if (aTypeName == "atomic_flag")
+ header += " ATOMIC_FLAG_INIT";
+ else
+ header += " ATOMIC_VAR_INIT(" + ss.str() + ")";
+ if (i + 1 < maxNumDestItems) header += ",";
+ header += "\n";
+ }
+ header += "};\n"
+ "\n";
+ }
+ return header;
}
-template<typename HostAtomicType, typename HostDataType>
+template <typename HostAtomicType, typename HostDataType>
std::string CBasicTest<HostAtomicType, HostDataType>::FunctionCode()
{
- if(!UsedInFunction())
- return "";
- std::string addressSpace = LocalMemory() ? "__local " : "__global ";
- std::string code = "void test_atomic_function(uint tid, uint threadCount, uint numDestItems, volatile ";
- if(!GenericAddrSpace())
- code += addressSpace;
- code += std::string(DataType().AtomicTypeName())+" *destMemory, __global "+DataType().RegularTypeName()+
- " *oldValues";
- if(LocalRefValues())
- code += std::string(", __local ")+DataType().RegularTypeName()+" *localValues";
- code += ")\n"
- "{\n";
- code += ProgramCore();
- code += "}\n"
- "\n";
- return code;
+ if (!UsedInFunction()) return "";
+ std::string addressSpace = LocalMemory() ? "__local " : "__global ";
+ std::string code = "void test_atomic_function(uint tid, uint threadCount, "
+ "uint numDestItems, volatile ";
+ if (!GenericAddrSpace()) code += addressSpace;
+ code += std::string(DataType().AtomicTypeName()) + " *destMemory, __global "
+ + DataType().RegularTypeName() + " *oldValues";
+ if (LocalRefValues())
+ code += std::string(", __local ") + DataType().RegularTypeName()
+ + " *localValues";
+ code += ")\n"
+ "{\n";
+ code += ProgramCore();
+ code += "}\n"
+ "\n";
+ return code;
}
-template<typename HostAtomicType, typename HostDataType>
-std::string CBasicTest<HostAtomicType, HostDataType>::KernelCode(cl_uint maxNumDestItems)
+template <typename HostAtomicType, typename HostDataType>
+std::string
+CBasicTest<HostAtomicType, HostDataType>::KernelCode(cl_uint maxNumDestItems)
{
- std::string aTypeName = DataType().AtomicTypeName();
- std::string cTypeName = DataType().RegularTypeName();
- std::string addressSpace = LocalMemory() ? "__local " : "__global ";
- std::string code = "__kernel void test_atomic_kernel(uint threadCount, uint numDestItems, ";
-
- // prepare list of arguments for kernel
- if(LocalMemory())
- {
- code += std::string("__global ")+cTypeName+" *finalDest, __global "+cTypeName+" *oldValues,"
- " volatile "+addressSpace+aTypeName+" *"+(DeclaredInProgram() ? "notUsed" : "")+"destMemory";
- }
- else
- {
- code += "volatile "+addressSpace+(DeclaredInProgram() ? (cTypeName+" *finalDest") : (aTypeName+" *destMemory"))+
- ", __global "+cTypeName+" *oldValues";
- }
- if(LocalRefValues())
- code += std::string(", __local ")+cTypeName+" *localValues";
- code += ")\n"
- "{\n";
- if(LocalMemory() && DeclaredInProgram())
- {
- // local atomics declared in kernel scope
- std::stringstream ss;
- ss << maxNumDestItems;
- code += std::string(" __local volatile ")+aTypeName+" destMemory["+ss.str()+"];\n";
- }
- code += " uint tid = get_global_id(0);\n"
- "\n";
- if(LocalMemory())
- {
- // memory_order_relaxed is sufficient for these initialization operations
- // as the barrier below will act as a fence, providing an order to the
- // operations. memory_scope_work_group is sufficient as local memory is
- // only visible within the work-group.
- code += R"(
+ std::string aTypeName = DataType().AtomicTypeName();
+ std::string cTypeName = DataType().RegularTypeName();
+ std::string addressSpace = LocalMemory() ? "__local " : "__global ";
+ std::string code = "__kernel void test_atomic_kernel(uint threadCount, "
+ "uint numDestItems, ";
+
+ // prepare list of arguments for kernel
+ if (LocalMemory())
+ {
+ code += std::string("__global ") + cTypeName + " *finalDest, __global "
+ + cTypeName
+ + " *oldValues,"
+ " volatile "
+ + addressSpace + aTypeName + " *"
+ + (DeclaredInProgram() ? "notUsed" : "") + "destMemory";
+ }
+ else
+ {
+ code += "volatile " + addressSpace
+ + (DeclaredInProgram() ? (cTypeName + " *finalDest")
+ : (aTypeName + " *destMemory"))
+ + ", __global " + cTypeName + " *oldValues";
+ }
+ if (LocalRefValues())
+ code += std::string(", __local ") + cTypeName + " *localValues";
+ code += ")\n"
+ "{\n";
+ if (LocalMemory() && DeclaredInProgram())
+ {
+ // local atomics declared in kernel scope
+ std::stringstream ss;
+ ss << maxNumDestItems;
+ code += std::string(" __local volatile ") + aTypeName + " destMemory["
+ + ss.str() + "];\n";
+ }
+ code += " uint tid = get_global_id(0);\n"
+ "\n";
+ if (LocalMemory())
+ {
+ // memory_order_relaxed is sufficient for these initialization
+ // operations as the barrier below will act as a fence, providing an
+ // order to the operations. memory_scope_work_group is sufficient as
+ // local memory is only visible within the work-group.
+ code += R"(
// initialize atomics not reachable from host (first thread
// is doing this, other threads are waiting on barrier)
if(get_local_id(0) == 0)
for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++)
{)";
- if (aTypeName == "atomic_flag")
- {
- code += R"(
+ if (aTypeName == "atomic_flag")
+ {
+ code += R"(
if(finalDest[dstItemIdx])
atomic_flag_test_and_set_explicit(destMemory+dstItemIdx,
memory_order_relaxed,
@@ -823,512 +976,595 @@ std::string CBasicTest<HostAtomicType, HostDataType>::KernelCode(cl_uint maxNumD
atomic_flag_clear_explicit(destMemory+dstItemIdx,
memory_order_relaxed,
memory_scope_work_group);)";
- }
- else
- {
- code += R"(
+ }
+ else
+ {
+ code += R"(
atomic_store_explicit(destMemory+dstItemIdx,
finalDest[dstItemIdx],
memory_order_relaxed,
memory_scope_work_group);)";
+ }
+ code += " }\n"
+ " barrier(CLK_LOCAL_MEM_FENCE);\n"
+ "\n";
}
- code +=
- " }\n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- "\n";
- }
- if (LocalRefValues())
- {
- code +=
- " // Copy input reference values into local memory\n";
- if (NumNonAtomicVariablesPerThread() == 1)
- code += " localValues[get_local_id(0)] = oldValues[tid];\n";
- else
+ if (LocalRefValues())
{
- std::stringstream ss;
- ss << NumNonAtomicVariablesPerThread();
- code +=
- " for(uint rfId = 0; rfId < " + ss.str() + "; rfId++)\n"
- " localValues[get_local_id(0)*" + ss.str() + "+rfId] = oldValues[tid*" + ss.str() + "+rfId];\n";
- }
- code +=
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- "\n";
- }
- if (UsedInFunction())
- code += std::string(" test_atomic_function(tid, threadCount, numDestItems, destMemory, oldValues")+
- (LocalRefValues() ? ", localValues" : "")+");\n";
- else
- code += ProgramCore();
- code += "\n";
- if (LocalRefValues())
- {
- code +=
- " // Copy local reference values into output array\n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n";
- if (NumNonAtomicVariablesPerThread() == 1)
- code += " oldValues[tid] = localValues[get_local_id(0)];\n";
+ code += " // Copy input reference values into local memory\n";
+ if (NumNonAtomicVariablesPerThread() == 1)
+ code += " localValues[get_local_id(0)] = oldValues[tid];\n";
+ else
+ {
+ std::stringstream ss;
+ ss << NumNonAtomicVariablesPerThread();
+ code += " for(uint rfId = 0; rfId < " + ss.str()
+ + "; rfId++)\n"
+ " localValues[get_local_id(0)*"
+ + ss.str() + "+rfId] = oldValues[tid*" + ss.str() + "+rfId];\n";
+ }
+ code += " barrier(CLK_LOCAL_MEM_FENCE);\n"
+ "\n";
+ }
+ if (UsedInFunction())
+ code += std::string(" test_atomic_function(tid, threadCount, "
+ "numDestItems, destMemory, oldValues")
+ + (LocalRefValues() ? ", localValues" : "") + ");\n";
else
+ code += ProgramCore();
+ code += "\n";
+ if (LocalRefValues())
{
- std::stringstream ss;
- ss << NumNonAtomicVariablesPerThread();
- code +=
- " for(uint rfId = 0; rfId < " + ss.str() + "; rfId++)\n"
- " oldValues[tid*" + ss.str() + "+rfId] = localValues[get_local_id(0)*" + ss.str() + "+rfId];\n";
+ code += " // Copy local reference values into output array\n"
+ " barrier(CLK_LOCAL_MEM_FENCE);\n";
+ if (NumNonAtomicVariablesPerThread() == 1)
+ code += " oldValues[tid] = localValues[get_local_id(0)];\n";
+ else
+ {
+ std::stringstream ss;
+ ss << NumNonAtomicVariablesPerThread();
+ code += " for(uint rfId = 0; rfId < " + ss.str()
+ + "; rfId++)\n"
+ " oldValues[tid*"
+ + ss.str() + "+rfId] = localValues[get_local_id(0)*" + ss.str()
+ + "+rfId];\n";
+ }
+ code += "\n";
}
- code += "\n";
- }
- if(LocalMemory() || DeclaredInProgram())
- {
- code += " // Copy final values to host reachable buffer\n";
- if(LocalMemory())
- code +=
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " if(get_local_id(0) == 0) // first thread in workgroup\n";
- else
- // global atomics declared in program scope
- code += R"(
- if(atomic_fetch_add_explicit(&finishedThreads, 1u,
- memory_order_relaxed,
- memory_scope_work_group)
- == get_global_size(0)-1) // last finished thread
- )";
- code +=
- " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++)\n";
- if(aTypeName == "atomic_flag")
+ if (LocalMemory())
{
- code += R"(
+ code += " // Copy final values to host reachable buffer\n";
+ code += " barrier(CLK_LOCAL_MEM_FENCE);\n"
+ " if(get_local_id(0) == 0) // first thread in workgroup\n";
+ code += " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; "
+ "dstItemIdx++)\n";
+ if (aTypeName == "atomic_flag")
+ {
+ code += R"(
finalDest[dstItemIdx] =
atomic_flag_test_and_set_explicit(destMemory+dstItemIdx,
memory_order_relaxed,
memory_scope_work_group);)";
+ }
+ else
+ {
+ code += R"(
+ finalDest[dstItemIdx] =
+ atomic_load_explicit(destMemory+dstItemIdx,
+ memory_order_relaxed,
+ memory_scope_work_group);)";
+ }
}
- else
+ else if (DeclaredInProgram())
{
+ // global atomics declared in program scope
+ code += " // Copy final values to host reachable buffer\n";
code += R"(
+ if(atomic_fetch_add_explicit(&finishedThreads, 1u,
+ memory_order_acq_rel,
+ memory_scope_device)
+ == get_global_size(0)-1) // last finished thread
+ )";
+ code += " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; "
+ "dstItemIdx++)\n";
+ if (aTypeName == "atomic_flag")
+ {
+ code += R"(
+ finalDest[dstItemIdx] =
+ atomic_flag_test_and_set_explicit(destMemory+dstItemIdx,
+ memory_order_relaxed,
+ memory_scope_device);)";
+ }
+ else
+ {
+ code += R"(
finalDest[dstItemIdx] =
atomic_load_explicit(destMemory+dstItemIdx,
memory_order_relaxed,
- memory_scope_work_group);)";
+ memory_scope_device);)";
+ }
}
- }
- code += "}\n"
- "\n";
- return code;
+ code += "}\n"
+ "\n";
+ return code;
}
template <typename HostAtomicType, typename HostDataType>
-int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue)
{
- int error;
- clProgramWrapper program;
- clKernelWrapper kernel;
- size_t threadNum[1];
- clMemWrapper streams[2];
- std::vector<HostAtomicType> destItems;
- HostAtomicType *svmAtomicBuffer = 0;
- std::vector<HostDataType> refValues, startRefValues;
- HostDataType *svmDataBuffer = 0;
- cl_uint deviceThreadCount, hostThreadCount, threadCount;
- size_t groupSize = 0;
- std::string programSource;
- const char *programLine;
- MTdata d;
- size_t typeSize = DataType().Size(deviceID);
-
- deviceThreadCount = _maxDeviceThreads;
- hostThreadCount = MaxHostThreads();
- threadCount = deviceThreadCount+hostThreadCount;
-
- //log_info("\t%s %s%s...\n", local ? "local" : "global", DataType().AtomicTypeName(), memoryOrderScope.c_str());
- log_info("\t%s...\n", SingleTestName().c_str());
-
- if(!LocalMemory() && DeclaredInProgram() && gNoGlobalVariables) // no support for program scope global variables
- {
- log_info("\t\tTest disabled\n");
- return 0;
- }
- if(UsedInFunction() && GenericAddrSpace() && gNoGenericAddressSpace)
- {
- log_info("\t\tTest disabled\n");
- return 0;
- }
-
- // set up work sizes based on device capabilities and test configuration
- error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(groupSize), &groupSize, NULL);
- test_error(error, "Unable to obtain max work group size for device");
- CurrentGroupSize((cl_uint)groupSize);
- if(CurrentGroupSize() > deviceThreadCount)
- CurrentGroupSize(deviceThreadCount);
- if(CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI)
- deviceThreadCount = CurrentGroupSize()*CurrentGroupNum(deviceThreadCount);
- threadCount = deviceThreadCount+hostThreadCount;
-
- // If we're given a num_results function, we need to determine how many result objects we need.
- // This is the first assessment for current maximum number of threads (exact thread count is not known here)
- // - needed for program source code generation (arrays of atomics declared in program)
- cl_uint numDestItems = NumResults(threadCount, deviceID);
-
- if(deviceThreadCount > 0)
- {
- // This loop iteratively reduces the workgroup size by 2 and then
- // re-generates the kernel with the reduced
- // workgroup size until we find a size which is admissible for the kernel
- // being run or reduce the wg size
- // to the trivial case of 1 (which was separately verified to be accurate
- // for the kernel being run)
-
- while ((CurrentGroupSize() > 1))
- {
- // Re-generate the kernel code with the current group size
- if (kernel) clReleaseKernel(kernel);
- if (program) clReleaseProgram(program);
- programSource = PragmaHeader(deviceID) + ProgramHeader(numDestItems)
- + FunctionCode() + KernelCode(numDestItems);
- programLine = programSource.c_str();
- if (create_single_kernel_helper_with_build_options(
- context, &program, &kernel, 1, &programLine,
- "test_atomic_kernel", gOldAPI ? "" : nullptr))
- {
- return -1;
- }
- // Get work group size for the new kernel
- error = clGetKernelWorkGroupInfo(kernel, deviceID,
- CL_KERNEL_WORK_GROUP_SIZE,
- sizeof(groupSize), &groupSize, NULL);
- test_error(error,
- "Unable to obtain max work group size for device and "
- "kernel combo");
-
- if (LocalMemory())
- {
- cl_ulong usedLocalMemory;
- cl_ulong totalLocalMemory;
- cl_uint maxWorkGroupSize;
-
- error = clGetKernelWorkGroupInfo(
- kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE,
- sizeof(usedLocalMemory), &usedLocalMemory, NULL);
- test_error(error, "clGetKernelWorkGroupInfo failed");
-
- error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE,
- sizeof(totalLocalMemory),
- &totalLocalMemory, NULL);
- test_error(error, "clGetDeviceInfo failed");
-
- // We know that each work-group is going to use typeSize *
- // deviceThreadCount bytes of local memory
- // so pick the maximum value for deviceThreadCount that uses all
- // the local memory.
- maxWorkGroupSize =
- ((totalLocalMemory - usedLocalMemory) / typeSize);
-
- if (maxWorkGroupSize < groupSize) groupSize = maxWorkGroupSize;
- }
- if (CurrentGroupSize() <= groupSize)
- break;
- else
- CurrentGroupSize(CurrentGroupSize() / 2);
- }
- if(CurrentGroupSize() > deviceThreadCount)
- CurrentGroupSize(deviceThreadCount);
- if(CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI)
- deviceThreadCount = CurrentGroupSize()*CurrentGroupNum(deviceThreadCount);
- threadCount = deviceThreadCount+hostThreadCount;
- }
- if (gDebug)
- {
- log_info("Program source:\n");
- log_info("%s\n", programLine);
- }
- if(deviceThreadCount > 0)
- log_info("\t\t(thread count %u, group size %u)\n", deviceThreadCount, CurrentGroupSize());
- if(hostThreadCount > 0)
- log_info("\t\t(host threads %u)\n", hostThreadCount);
-
- refValues.resize(threadCount*NumNonAtomicVariablesPerThread());
-
- // Generate ref data if we have a ref generator provided
- d = init_genrand(gRandomSeed);
- startRefValues.resize(threadCount*NumNonAtomicVariablesPerThread());
- if(GenerateRefs(threadCount, &startRefValues[0], d))
- {
- //copy ref values for host threads
- memcpy(&refValues[0], &startRefValues[0], sizeof(HostDataType)*threadCount*NumNonAtomicVariablesPerThread());
- }
- else
- {
- startRefValues.resize(0);
- }
- free_mtdata(d);
- d = NULL;
-
- // If we're given a num_results function, we need to determine how many result objects we need. If
- // we don't have it, we assume it's just 1
- // This is final value (exact thread count is known in this place)
- numDestItems = NumResults(threadCount, deviceID);
-
- destItems.resize(numDestItems);
- for(cl_uint i = 0; i < numDestItems; i++)
- destItems[i] = _startValue;
-
- // Create main buffer with atomic variables (array size dependent on particular test)
- if(UseSVM())
- {
- if(gUseHostPtr)
- svmAtomicBuffer = (HostAtomicType*)malloc(typeSize * numDestItems);
- else
- svmAtomicBuffer = (HostAtomicType*)clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, typeSize * numDestItems, 0);
- if(!svmAtomicBuffer)
- {
- log_error("ERROR: clSVMAlloc failed!\n");
- return -1;
- }
- memcpy(svmAtomicBuffer, &destItems[0], typeSize * numDestItems);
- streams[0] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
- typeSize * numDestItems, svmAtomicBuffer, NULL);
- }
- else
- {
- streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
- typeSize * numDestItems, &destItems[0], NULL);
- }
- if (!streams[0])
- {
- log_error("ERROR: Creating output array failed!\n");
- return -1;
- }
- // Create buffer for per-thread input/output data
- if(UseSVM())
- {
- if(gUseHostPtr)
- svmDataBuffer = (HostDataType*)malloc(typeSize*threadCount*NumNonAtomicVariablesPerThread());
- else
- svmDataBuffer = (HostDataType*)clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | (SVMDataBufferAllSVMConsistent() ? CL_MEM_SVM_ATOMICS : 0), typeSize*threadCount*NumNonAtomicVariablesPerThread(), 0);
- if(!svmDataBuffer)
- {
- log_error("ERROR: clSVMAlloc failed!\n");
- return -1;
- }
- if(startRefValues.size())
- memcpy(svmDataBuffer, &startRefValues[0], typeSize*threadCount*NumNonAtomicVariablesPerThread());
- streams[1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
- typeSize * threadCount
- * NumNonAtomicVariablesPerThread(),
- svmDataBuffer, NULL);
- }
- else
- {
- streams[1] = clCreateBuffer(
- context,
- ((startRefValues.size() ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE)),
- typeSize * threadCount * NumNonAtomicVariablesPerThread(),
- startRefValues.size() ? &startRefValues[0] : 0, NULL);
- }
- if (!streams[1])
- {
- log_error("ERROR: Creating reference array failed!\n");
- return -1;
- }
- if(deviceThreadCount > 0)
- {
- cl_uint argInd = 0;
- /* Set the arguments */
- error = clSetKernelArg(kernel, argInd++, sizeof(threadCount), &threadCount);
- test_error(error, "Unable to set kernel argument");
- error = clSetKernelArg(kernel, argInd++, sizeof(numDestItems), &numDestItems);
- test_error(error, "Unable to set indexed kernel argument");
- error = clSetKernelArg(kernel, argInd++, sizeof(streams[0]), &streams[0]);
- test_error(error, "Unable to set indexed kernel arguments");
- error = clSetKernelArg(kernel, argInd++, sizeof(streams[1]), &streams[1]);
- test_error(error, "Unable to set indexed kernel arguments");
- if(LocalMemory())
- {
- error = clSetKernelArg(kernel, argInd++, typeSize * numDestItems, NULL);
- test_error(error, "Unable to set indexed local kernel argument");
- }
- if(LocalRefValues())
- {
- error = clSetKernelArg(kernel, argInd++, LocalRefValues() ? typeSize*CurrentGroupSize()*NumNonAtomicVariablesPerThread() : 1, NULL);
- test_error(error, "Unable to set indexed kernel argument");
- }
- }
- /* Configure host threads */
- std::vector<THostThreadContext> hostThreadContexts(hostThreadCount);
- for(unsigned int t = 0; t < hostThreadCount; t++)
- {
- hostThreadContexts[t].test = this;
- hostThreadContexts[t].tid = deviceThreadCount+t;
- hostThreadContexts[t].threadCount = threadCount;
- hostThreadContexts[t].destMemory = UseSVM() ? svmAtomicBuffer : &destItems[0];
- hostThreadContexts[t].oldValues = UseSVM() ? svmDataBuffer : &refValues[0];
- }
-
- if(deviceThreadCount > 0)
- {
- /* Run the kernel */
- threadNum[0] = deviceThreadCount;
- groupSize = CurrentGroupSize();
- error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum, &groupSize, 0, NULL, NULL);
- test_error(error, "Unable to execute test kernel");
- /* start device threads */
- error = clFlush(queue);
- test_error(error, "clFlush failed");
- }
-
- /* Start host threads and wait for finish */
- if(hostThreadCount > 0)
- ThreadPool_Do(HostThreadFunction, hostThreadCount, &hostThreadContexts[0]);
-
- if(UseSVM())
- {
- error = clFinish(queue);
- test_error(error, "clFinish failed");
- memcpy(&destItems[0], svmAtomicBuffer, typeSize*numDestItems);
- memcpy(&refValues[0], svmDataBuffer, typeSize*threadCount*NumNonAtomicVariablesPerThread());
- }
- else
- {
- if(deviceThreadCount > 0)
- {
- error = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0, typeSize * numDestItems, &destItems[0], 0, NULL, NULL);
- test_error(error, "Unable to read result value!");
- error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, typeSize * deviceThreadCount*NumNonAtomicVariablesPerThread(), &refValues[0], 0, NULL, NULL);
- test_error(error, "Unable to read reference values!");
- }
- }
- bool dataVerified = false;
- // If we have an expectedFn, then we need to generate a final value to compare against. If we don't
- // have one, it's because we're comparing ref values only
- for(cl_uint i = 0; i < numDestItems; i++)
- {
- HostDataType expected;
-
- if(!ExpectedValue(expected, threadCount, startRefValues.size() ? &startRefValues[0] : 0, i))
- break; // no expected value function provided
-
- if(expected != destItems[i])
- {
- std::stringstream logLine;
- logLine << "ERROR: Result " << i << " from kernel does not validate! (should be " << expected << ", was " << destItems[i] << ")\n";
- log_error("%s", logLine.str().c_str());
- for(i = 0; i < threadCount; i++)
- {
- logLine.str("");
- logLine << " --- " << i << " - ";
- if(startRefValues.size())
- logLine << startRefValues[i] << " -> " << refValues[i];
- else
- logLine << refValues[i];
- logLine << " --- ";
- if(i < numDestItems)
- logLine << destItems[i];
- logLine << "\n";
- log_info("%s", logLine.str().c_str());
- }
- if(!gDebug)
- {
- log_info("Program source:\n");
- log_info("%s\n", programLine);
- }
- return -1;
- }
- dataVerified = true;
- }
-
- bool dataCorrect = false;
- /* Use the verify function (if provided) to also check the results */
- if(VerifyRefs(dataCorrect, threadCount, &refValues[0], &destItems[0]))
- {
- if(!dataCorrect)
- {
- log_error("ERROR: Reference values did not validate!\n");
- std::stringstream logLine;
- for(cl_uint i = 0; i < threadCount; i++)
- for (cl_uint j = 0; j < NumNonAtomicVariablesPerThread(); j++)
- {
- logLine.str("");
- logLine << " --- " << i << " - " << refValues[i*NumNonAtomicVariablesPerThread()+j] << " --- ";
- if(j == 0 && i < numDestItems)
- logLine << destItems[i];
- logLine << "\n";
- log_info("%s", logLine.str().c_str());
- }
- if(!gDebug)
- {
+ int error;
+ clProgramWrapper program;
+ clKernelWrapper kernel;
+ size_t threadNum[1];
+ clMemWrapper streams[2];
+ std::vector<HostAtomicType> destItems;
+ HostAtomicType *svmAtomicBuffer = 0;
+ std::vector<HostDataType> refValues, startRefValues;
+ HostDataType *svmDataBuffer = 0;
+ cl_uint deviceThreadCount, hostThreadCount, threadCount;
+ size_t groupSize = 0;
+ std::string programSource;
+ const char *programLine;
+ MTdata d;
+ size_t typeSize = DataType().Size(deviceID);
+
+ deviceThreadCount = _maxDeviceThreads;
+ hostThreadCount = MaxHostThreads();
+ threadCount = deviceThreadCount + hostThreadCount;
+
+ // log_info("\t%s %s%s...\n", local ? "local" : "global",
+ // DataType().AtomicTypeName(), memoryOrderScope.c_str());
+ log_info("\t%s...\n", SingleTestName().c_str());
+
+ if (!LocalMemory() && DeclaredInProgram()
+ && gNoGlobalVariables) // no support for program scope global variables
+ {
+ log_info("\t\tTest disabled\n");
+ return 0;
+ }
+ if (UsedInFunction() && GenericAddrSpace() && gNoGenericAddressSpace)
+ {
+ log_info("\t\tTest disabled\n");
+ return 0;
+ }
+ if (!LocalMemory() && DeclaredInProgram())
+ {
+ if (((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0)
+ || ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0))
+ {
+ log_info("\t\tTest disabled\n");
+ return 0;
+ }
+ }
+
+ // set up work sizes based on device capabilities and test configuration
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+ sizeof(groupSize), &groupSize, NULL);
+ test_error(error, "Unable to obtain max work group size for device");
+ CurrentGroupSize((cl_uint)groupSize);
+ if (CurrentGroupSize() > deviceThreadCount)
+ CurrentGroupSize(deviceThreadCount);
+ if (CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI)
+ deviceThreadCount =
+ CurrentGroupSize() * CurrentGroupNum(deviceThreadCount);
+ threadCount = deviceThreadCount + hostThreadCount;
+
+ // If we're given a num_results function, we need to determine how many
+ // result objects we need. This is the first assessment for current maximum
+ // number of threads (exact thread count is not known here)
+ // - needed for program source code generation (arrays of atomics declared
+ // in program)
+ cl_uint numDestItems = NumResults(threadCount, deviceID);
+
+ if (deviceThreadCount > 0)
+ {
+ // This loop iteratively reduces the workgroup size by 2 and then
+ // re-generates the kernel with the reduced
+ // workgroup size until we find a size which is admissible for the
+ // kernel being run or reduce the wg size to the trivial case of 1
+ // (which was separately verified to be accurate for the kernel being
+ // run)
+
+ while ((CurrentGroupSize() > 1))
+ {
+ // Re-generate the kernel code with the current group size
+ if (kernel) clReleaseKernel(kernel);
+ if (program) clReleaseProgram(program);
+ programSource = PragmaHeader(deviceID) + ProgramHeader(numDestItems)
+ + FunctionCode() + KernelCode(numDestItems);
+ programLine = programSource.c_str();
+ if (create_single_kernel_helper_with_build_options(
+ context, &program, &kernel, 1, &programLine,
+ "test_atomic_kernel", gOldAPI ? "" : nullptr))
+ {
+ return -1;
+ }
+ // Get work group size for the new kernel
+ error = clGetKernelWorkGroupInfo(
+ kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(groupSize),
+ &groupSize, NULL);
+ test_error(error,
+ "Unable to obtain max work group size for device and "
+ "kernel combo");
+
+ if (LocalMemory())
+ {
+ cl_ulong usedLocalMemory;
+ cl_ulong totalLocalMemory;
+ cl_uint maxWorkGroupSize;
+
+ error = clGetKernelWorkGroupInfo(
+ kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE,
+ sizeof(usedLocalMemory), &usedLocalMemory, NULL);
+ test_error(error, "clGetKernelWorkGroupInfo failed");
+
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE,
+ sizeof(totalLocalMemory),
+ &totalLocalMemory, NULL);
+ test_error(error, "clGetDeviceInfo failed");
+
+ // We know that each work-group is going to use typeSize *
+ // deviceThreadCount bytes of local memory
+ // so pick the maximum value for deviceThreadCount that uses all
+ // the local memory.
+ maxWorkGroupSize =
+ ((totalLocalMemory - usedLocalMemory) / typeSize);
+
+ if (maxWorkGroupSize < groupSize) groupSize = maxWorkGroupSize;
+ }
+ if (CurrentGroupSize() <= groupSize)
+ break;
+ else
+ CurrentGroupSize(CurrentGroupSize() / 2);
+ }
+ if (CurrentGroupSize() > deviceThreadCount)
+ CurrentGroupSize(deviceThreadCount);
+ if (CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI)
+ deviceThreadCount =
+ CurrentGroupSize() * CurrentGroupNum(deviceThreadCount);
+ threadCount = deviceThreadCount + hostThreadCount;
+ }
+ if (gDebug)
+ {
log_info("Program source:\n");
log_info("%s\n", programLine);
- }
- return -1;
- }
- }
- else if(!dataVerified)
- {
- log_error("ERROR: Test doesn't check total or refs; no values are verified!\n");
- return -1;
- }
-
- if(OldValueCheck() &&
- !(DeclaredInProgram() && !LocalMemory())) // don't test for programs scope global atomics
- // 'old' value has been overwritten by previous clEnqueueNDRangeKernel
- {
- /* Re-write the starting value */
- for(size_t i = 0; i < numDestItems; i++)
- destItems[i] = _startValue;
- refValues[0] = 0;
- if(deviceThreadCount > 0)
- {
- error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, typeSize * numDestItems, &destItems[0], 0, NULL, NULL);
- test_error(error, "Unable to write starting values!");
-
- /* Run the kernel once for a single thread, so we can verify that the returned value is the original one */
- threadNum[0] = 1;
- error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum, threadNum, 0, NULL, NULL);
- test_error(error, "Unable to execute test kernel");
-
- error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, typeSize, &refValues[0], 0, NULL, NULL);
- test_error(error, "Unable to read reference values!");
+ }
+ if (deviceThreadCount > 0)
+ log_info("\t\t(thread count %u, group size %u)\n", deviceThreadCount,
+ CurrentGroupSize());
+ if (hostThreadCount > 0)
+ log_info("\t\t(host threads %u)\n", hostThreadCount);
+
+ refValues.resize(threadCount * NumNonAtomicVariablesPerThread());
+
+ // Generate ref data if we have a ref generator provided
+ d = init_genrand(gRandomSeed);
+ startRefValues.resize(threadCount * NumNonAtomicVariablesPerThread());
+ if (GenerateRefs(threadCount, &startRefValues[0], d))
+ {
+ // copy ref values for host threads
+ memcpy(&refValues[0], &startRefValues[0],
+ sizeof(HostDataType) * threadCount
+ * NumNonAtomicVariablesPerThread());
}
else
{
- /* Start host thread */
- HostFunction(0, 1, &destItems[0], &refValues[0]);
+ startRefValues.resize(0);
}
+ free_mtdata(d);
+ d = NULL;
- if(refValues[0] != _startValue)//destItems[0])
+ // If we're given a num_results function, we need to determine how many
+ // result objects we need. If we don't have it, we assume it's just 1 This
+ // is final value (exact thread count is known in this place)
+ numDestItems = NumResults(threadCount, deviceID);
+
+ destItems.resize(numDestItems);
+ for (cl_uint i = 0; i < numDestItems; i++) destItems[i] = _startValue;
+
+ // Create main buffer with atomic variables (array size dependent on
+ // particular test)
+ if (UseSVM())
{
- std::stringstream logLine;
- logLine << "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
- " (should have been " << destItems[0] << ", returned " << refValues[0] << ")!\n";
- log_error("%s", logLine.str().c_str());
- if(!gDebug)
- {
- log_info("Program source:\n");
- log_info("%s\n", programLine);
- }
- return -1;
- }
- }
- if(UseSVM())
- {
- // the buffer object must first be released before the SVM buffer is freed
- error = clReleaseMemObject(streams[0]);
- streams[0] = 0;
- test_error(error, "clReleaseMemObject failed");
- if(gUseHostPtr)
- free(svmAtomicBuffer);
+ if (gUseHostPtr)
+ svmAtomicBuffer = (HostAtomicType *)malloc(typeSize * numDestItems);
+ else
+ svmAtomicBuffer = (HostAtomicType *)clSVMAlloc(
+ context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
+ typeSize * numDestItems, 0);
+ if (!svmAtomicBuffer)
+ {
+ log_error("ERROR: clSVMAlloc failed!\n");
+ return -1;
+ }
+ memcpy(svmAtomicBuffer, &destItems[0], typeSize * numDestItems);
+ streams[0] =
+ clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+ typeSize * numDestItems, svmAtomicBuffer, NULL);
+ }
else
- clSVMFree(context, svmAtomicBuffer);
- error = clReleaseMemObject(streams[1]);
- streams[1] = 0;
- test_error(error, "clReleaseMemObject failed");
- if(gUseHostPtr)
- free(svmDataBuffer);
+ {
+ streams[0] =
+ clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+ typeSize * numDestItems, &destItems[0], NULL);
+ }
+ if (!streams[0])
+ {
+ log_error("ERROR: Creating output array failed!\n");
+ return -1;
+ }
+ // Create buffer for per-thread input/output data
+ if (UseSVM())
+ {
+ if (gUseHostPtr)
+ svmDataBuffer = (HostDataType *)malloc(
+ typeSize * threadCount * NumNonAtomicVariablesPerThread());
+ else
+ svmDataBuffer = (HostDataType *)clSVMAlloc(
+ context,
+ CL_MEM_SVM_FINE_GRAIN_BUFFER
+ | (SVMDataBufferAllSVMConsistent() ? CL_MEM_SVM_ATOMICS
+ : 0),
+ typeSize * threadCount * NumNonAtomicVariablesPerThread(), 0);
+ if (!svmDataBuffer)
+ {
+ log_error("ERROR: clSVMAlloc failed!\n");
+ return -1;
+ }
+ if (startRefValues.size())
+ memcpy(svmDataBuffer, &startRefValues[0],
+ typeSize * threadCount * NumNonAtomicVariablesPerThread());
+ streams[1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+ typeSize * threadCount
+ * NumNonAtomicVariablesPerThread(),
+ svmDataBuffer, NULL);
+ }
else
- clSVMFree(context, svmDataBuffer);
- }
- _passCount++;
- return 0;
+ {
+ streams[1] = clCreateBuffer(
+ context,
+ ((startRefValues.size() ? CL_MEM_COPY_HOST_PTR
+ : CL_MEM_READ_WRITE)),
+ typeSize * threadCount * NumNonAtomicVariablesPerThread(),
+ startRefValues.size() ? &startRefValues[0] : 0, NULL);
+ }
+ if (!streams[1])
+ {
+ log_error("ERROR: Creating reference array failed!\n");
+ return -1;
+ }
+ if (deviceThreadCount > 0)
+ {
+ cl_uint argInd = 0;
+ /* Set the arguments */
+ error =
+ clSetKernelArg(kernel, argInd++, sizeof(threadCount), &threadCount);
+ test_error(error, "Unable to set kernel argument");
+ error = clSetKernelArg(kernel, argInd++, sizeof(numDestItems),
+ &numDestItems);
+ test_error(error, "Unable to set indexed kernel argument");
+ error =
+ clSetKernelArg(kernel, argInd++, sizeof(streams[0]), &streams[0]);
+ test_error(error, "Unable to set indexed kernel arguments");
+ error =
+ clSetKernelArg(kernel, argInd++, sizeof(streams[1]), &streams[1]);
+ test_error(error, "Unable to set indexed kernel arguments");
+ if (LocalMemory())
+ {
+ error =
+ clSetKernelArg(kernel, argInd++, typeSize * numDestItems, NULL);
+ test_error(error, "Unable to set indexed local kernel argument");
+ }
+ if (LocalRefValues())
+ {
+ error =
+ clSetKernelArg(kernel, argInd++,
+ LocalRefValues() ? typeSize
+ * (CurrentGroupSize()
+ * NumNonAtomicVariablesPerThread())
+ : 1,
+ NULL);
+ test_error(error, "Unable to set indexed kernel argument");
+ }
+ }
+ /* Configure host threads */
+ std::vector<THostThreadContext> hostThreadContexts(hostThreadCount);
+ for (unsigned int t = 0; t < hostThreadCount; t++)
+ {
+ hostThreadContexts[t].test = this;
+ hostThreadContexts[t].tid = deviceThreadCount + t;
+ hostThreadContexts[t].threadCount = threadCount;
+ hostThreadContexts[t].destMemory =
+ UseSVM() ? svmAtomicBuffer : &destItems[0];
+ hostThreadContexts[t].oldValues =
+ UseSVM() ? svmDataBuffer : &refValues[0];
+ }
+
+ if (deviceThreadCount > 0)
+ {
+ /* Run the kernel */
+ threadNum[0] = deviceThreadCount;
+ groupSize = CurrentGroupSize();
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum,
+ &groupSize, 0, NULL, NULL);
+ test_error(error, "Unable to execute test kernel");
+ /* start device threads */
+ error = clFlush(queue);
+ test_error(error, "clFlush failed");
+ }
+
+ /* Start host threads and wait for finish */
+ if (hostThreadCount > 0)
+ ThreadPool_Do(HostThreadFunction, hostThreadCount,
+ &hostThreadContexts[0]);
+
+ if (UseSVM())
+ {
+ error = clFinish(queue);
+ test_error(error, "clFinish failed");
+ memcpy(&destItems[0], svmAtomicBuffer, typeSize * numDestItems);
+ memcpy(&refValues[0], svmDataBuffer,
+ typeSize * threadCount * NumNonAtomicVariablesPerThread());
+ }
+ else
+ {
+ if (deviceThreadCount > 0)
+ {
+ error = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0,
+ typeSize * numDestItems, &destItems[0],
+ 0, NULL, NULL);
+ test_error(error, "Unable to read result value!");
+ error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0,
+ typeSize * deviceThreadCount
+ * NumNonAtomicVariablesPerThread(),
+ &refValues[0], 0, NULL, NULL);
+ test_error(error, "Unable to read reference values!");
+ }
+ }
+ bool dataVerified = false;
+ // If we have an expectedFn, then we need to generate a final value to
+ // compare against. If we don't have one, it's because we're comparing ref
+ // values only
+ for (cl_uint i = 0; i < numDestItems; i++)
+ {
+ HostDataType expected;
+
+ if (!ExpectedValue(expected, threadCount,
+ startRefValues.size() ? &startRefValues[0] : 0, i))
+ break; // no expected value function provided
+
+ if (expected != destItems[i])
+ {
+ std::stringstream logLine;
+ logLine << "ERROR: Result " << i
+ << " from kernel does not validate! (should be " << expected
+ << ", was " << destItems[i] << ")\n";
+ log_error("%s", logLine.str().c_str());
+ for (i = 0; i < threadCount; i++)
+ {
+ logLine.str("");
+ logLine << " --- " << i << " - ";
+ if (startRefValues.size())
+ logLine << startRefValues[i] << " -> " << refValues[i];
+ else
+ logLine << refValues[i];
+ logLine << " --- ";
+ if (i < numDestItems) logLine << destItems[i];
+ logLine << "\n";
+ log_info("%s", logLine.str().c_str());
+ }
+ if (!gDebug)
+ {
+ log_info("Program source:\n");
+ log_info("%s\n", programLine);
+ }
+ return -1;
+ }
+ dataVerified = true;
+ }
+
+ bool dataCorrect = false;
+ /* Use the verify function (if provided) to also check the results */
+ if (VerifyRefs(dataCorrect, threadCount, &refValues[0], &destItems[0]))
+ {
+ if (!dataCorrect)
+ {
+ log_error("ERROR: Reference values did not validate!\n");
+ std::stringstream logLine;
+ for (cl_uint i = 0; i < threadCount; i++)
+ for (cl_uint j = 0; j < NumNonAtomicVariablesPerThread(); j++)
+ {
+ logLine.str("");
+ logLine
+ << " --- " << i << " - "
+ << refValues[i * NumNonAtomicVariablesPerThread() + j]
+ << " --- ";
+ if (j == 0 && i < numDestItems) logLine << destItems[i];
+ logLine << "\n";
+ log_info("%s", logLine.str().c_str());
+ }
+ if (!gDebug)
+ {
+ log_info("Program source:\n");
+ log_info("%s\n", programLine);
+ }
+ return -1;
+ }
+ }
+ else if (!dataVerified)
+ {
+ log_error("ERROR: Test doesn't check total or refs; no values are "
+ "verified!\n");
+ return -1;
+ }
+
+ if (OldValueCheck()
+ && !(DeclaredInProgram()
+ && !LocalMemory())) // don't test for programs scope global atomics
+ // 'old' value has been overwritten by previous
+ // clEnqueueNDRangeKernel
+ {
+ /* Re-write the starting value */
+ for (size_t i = 0; i < numDestItems; i++) destItems[i] = _startValue;
+ refValues[0] = 0;
+ if (deviceThreadCount > 0)
+ {
+ error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0,
+ typeSize * numDestItems, &destItems[0],
+ 0, NULL, NULL);
+ test_error(error, "Unable to write starting values!");
+
+ /* Run the kernel once for a single thread, so we can verify that
+ * the returned value is the original one */
+ threadNum[0] = 1;
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum,
+ threadNum, 0, NULL, NULL);
+ test_error(error, "Unable to execute test kernel");
+
+ error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, typeSize,
+ &refValues[0], 0, NULL, NULL);
+ test_error(error, "Unable to read reference values!");
+ }
+ else
+ {
+ /* Start host thread */
+ HostFunction(0, 1, &destItems[0], &refValues[0]);
+ }
+
+ if (refValues[0] != _startValue) // destItems[0])
+ {
+ std::stringstream logLine;
+ logLine << "ERROR: atomic function operated correctly but did NOT "
+ "return correct 'old' value "
+ " (should have been "
+ << destItems[0] << ", returned " << refValues[0] << ")!\n";
+ log_error("%s", logLine.str().c_str());
+ if (!gDebug)
+ {
+ log_info("Program source:\n");
+ log_info("%s\n", programLine);
+ }
+ return -1;
+ }
+ }
+ if (UseSVM())
+ {
+ // the buffer object must first be released before the SVM buffer is
+ // freed. The Wrapper Class method reset() will do that
+ streams[0].reset();
+ if (gUseHostPtr)
+ free(svmAtomicBuffer);
+ else
+ clSVMFree(context, svmAtomicBuffer);
+ streams[1].reset();
+ if (gUseHostPtr)
+ free(svmDataBuffer);
+ else
+ clSVMFree(context, svmDataBuffer);
+ }
+ _passCount++;
+ return 0;
}
#endif //_COMMON_H_
diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index c3a190b7..09c14ed1 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -23,2188 +23,3209 @@
#include <sstream>
#include <vector>
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestStore : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestStore
+ : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
- using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
- CBasicTestStore(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- OldValueCheck(false);
- }
- virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
- {
- return threadCount;
- }
- virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
- {
- if(MemoryOrder() == MEMORY_ORDER_ACQUIRE ||
- MemoryOrder() == MEMORY_ORDER_ACQ_REL)
- return 0; //skip test - not applicable
-
- if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
- return 0; // skip test - not applicable
-
- return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
- }
- virtual std::string ProgramCore()
- {
- std::string memoryOrderScope = MemoryOrderScopeStr();
- std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
- return
- " atomic_store"+postfix+"(&destMemory[tid], tid"+memoryOrderScope+");\n";
- }
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder());
- }
- virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
- {
- expected = (HostDataType)whichDestValue;
- return true;
- }
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
+ using CBasicTestMemOrderScope<HostAtomicType,
+ HostDataType>::MemoryOrderScopeStr;
+ using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
+ CBasicTestStore(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+ useSVM)
+ {
+ OldValueCheck(false);
+ }
+ virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+ {
+ return threadCount;
+ }
+ virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue)
+ {
+ if (MemoryOrder() == MEMORY_ORDER_ACQUIRE
+ || MemoryOrder() == MEMORY_ORDER_ACQ_REL)
+ return 0; // skip test - not applicable
+
+ if (CheckCapabilities(MemoryScope(), MemoryOrder())
+ == TEST_SKIPPED_ITSELF)
+ return 0; // skip test - not applicable
+
+ return CBasicTestMemOrderScope<
+ HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+ queue);
+ }
+ virtual std::string ProgramCore()
+ {
+ std::string memoryOrderScope = MemoryOrderScopeStr();
+ std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+ return " atomic_store" + postfix + "(&destMemory[tid], tid"
+ + memoryOrderScope + ");\n";
+ }
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder());
+ }
+ virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+ HostDataType *startRefValues,
+ cl_uint whichDestValue)
+ {
+ expected = (HostDataType)whichDestValue;
+ return true;
+ }
};
-int test_atomic_store_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_store_generic(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ bool useSVM)
{
- int error = 0;
- CBasicTestStore<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
- EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
- CBasicTestStore<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
- EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
- CBasicTestStore<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
- EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
- CBasicTestStore<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
- EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
- CBasicTestStore<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
- EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
- CBasicTestStore<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
- EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
- if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
- {
- CBasicTestStore<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestStore<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestStore<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestStore<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- else
- {
- CBasicTestStore<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestStore<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestStore<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestStore<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- return error;
+ int error = 0;
+ CBasicTestStore<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_int.Execute(deviceID, context, queue, num_elements));
+ CBasicTestStore<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_uint.Execute(deviceID, context, queue, num_elements));
+ CBasicTestStore<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_long.Execute(deviceID, context, queue, num_elements));
+ CBasicTestStore<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_ulong.Execute(deviceID, context, queue, num_elements));
+ CBasicTestStore<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_float.Execute(deviceID, context, queue, num_elements));
+ CBasicTestStore<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
+ TYPE_ATOMIC_DOUBLE, useSVM);
+ EXECUTE_TEST(error,
+ test_double.Execute(deviceID, context, queue, num_elements));
+ if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+ {
+ CBasicTestStore<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
+ TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestStore<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestStore<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestStore<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ else
+ {
+ CBasicTestStore<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
+ TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestStore<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestStore<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestStore<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ return error;
}
-int test_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_store(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_store_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_store_generic(deviceID, context, queue, num_elements,
+ false);
}
-int test_svm_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_store(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_store_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_store_generic(deviceID, context, queue, num_elements,
+ true);
}
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestInit : public CBasicTest<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestInit : public CBasicTest<HostAtomicType, HostDataType> {
public:
- using CBasicTest<HostAtomicType, HostDataType>::OldValueCheck;
- CBasicTestInit(TExplicitAtomicType dataType, bool useSVM) : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- OldValueCheck(false);
- }
- virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
- {
- return threadCount;
- }
- virtual std::string ProgramCore()
- {
- return
- " atomic_init(&destMemory[tid], tid);\n";
- }
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- host_atomic_init(&destMemory[tid], (HostDataType)tid);
- }
- virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
- {
- expected = (HostDataType)whichDestValue;
- return true;
- }
+ using CBasicTest<HostAtomicType, HostDataType>::OldValueCheck;
+ CBasicTestInit(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
+ {
+ OldValueCheck(false);
+ }
+ virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+ {
+ return threadCount;
+ }
+ virtual std::string ProgramCore()
+ {
+ return " atomic_init(&destMemory[tid], tid);\n";
+ }
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ host_atomic_init(&destMemory[tid], (HostDataType)tid);
+ }
+ virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+ HostDataType *startRefValues,
+ cl_uint whichDestValue)
+ {
+ expected = (HostDataType)whichDestValue;
+ return true;
+ }
};
-int test_atomic_init_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_init_generic(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ bool useSVM)
{
- int error = 0;
- CBasicTestInit<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
- EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
- CBasicTestInit<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
- EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
- CBasicTestInit<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
- EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
- CBasicTestInit<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
- EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
- CBasicTestInit<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
- EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
- CBasicTestInit<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
- EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
- if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
- {
- CBasicTestInit<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestInit<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestInit<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestInit<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- else
- {
- CBasicTestInit<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestInit<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestInit<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestInit<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- return error;
+ int error = 0;
+ CBasicTestInit<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+ EXECUTE_TEST(error,
+ test_int.Execute(deviceID, context, queue, num_elements));
+ CBasicTestInit<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_uint.Execute(deviceID, context, queue, num_elements));
+ CBasicTestInit<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_long.Execute(deviceID, context, queue, num_elements));
+ CBasicTestInit<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_ulong.Execute(deviceID, context, queue, num_elements));
+ CBasicTestInit<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_float.Execute(deviceID, context, queue, num_elements));
+ CBasicTestInit<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
+ TYPE_ATOMIC_DOUBLE, useSVM);
+ EXECUTE_TEST(error,
+ test_double.Execute(deviceID, context, queue, num_elements));
+ if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+ {
+ CBasicTestInit<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
+ TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestInit<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestInit<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestInit<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ else
+ {
+ CBasicTestInit<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
+ TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestInit<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestInit<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestInit<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ return error;
}
-int test_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_init(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_init_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_init_generic(deviceID, context, queue, num_elements,
+ false);
}
-int test_svm_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_init(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_init_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_init_generic(deviceID, context, queue, num_elements,
+ true);
}
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestLoad : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestLoad
+ : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
- using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
- CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- OldValueCheck(false);
- }
- virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
- {
- return threadCount;
- }
- virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
- {
- if(MemoryOrder() == MEMORY_ORDER_RELEASE ||
- MemoryOrder() == MEMORY_ORDER_ACQ_REL)
- return 0; //skip test - not applicable
-
- if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
- return 0; // skip test - not applicable
-
- return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
- }
- virtual std::string ProgramCore()
- {
- // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store
- // should be MEMORY_ORDER_RELEASE
- std::string memoryOrderScopeLoad = MemoryOrderScopeStr();
- std::string memoryOrderScopeStore =
- (MemoryOrder() == MEMORY_ORDER_ACQUIRE)
- ? (", memory_order_release" + MemoryScopeStr())
- : memoryOrderScopeLoad;
- std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit");
- return " atomic_store" + postfix + "(&destMemory[tid], tid"
- + memoryOrderScopeStore
- + ");\n"
- " oldValues[tid] = atomic_load"
- + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n";
- }
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- host_atomic_store(&destMemory[tid], (HostDataType)tid, MEMORY_ORDER_SEQ_CST);
- oldValues[tid] = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[tid], MemoryOrder());
- }
- virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
- {
- expected = (HostDataType)whichDestValue;
- return true;
- }
- virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
- {
- correct = true;
- for(cl_uint i = 0; i < threadCount; i++ )
- {
- if(refValues[i] != (HostDataType)i)
- {
- log_error("Invalid value for thread %u\n", (cl_uint)i);
- correct = false;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
+ using CBasicTestMemOrderScope<HostAtomicType,
+ HostDataType>::MemoryOrderScopeStr;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
+ using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
+ CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+ useSVM)
+ {
+ OldValueCheck(false);
+ }
+ virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+ {
+ return threadCount;
+ }
+ virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue)
+ {
+ if (MemoryOrder() == MEMORY_ORDER_RELEASE
+ || MemoryOrder() == MEMORY_ORDER_ACQ_REL)
+ return 0; // skip test - not applicable
+
+ if (CheckCapabilities(MemoryScope(), MemoryOrder())
+ == TEST_SKIPPED_ITSELF)
+ return 0; // skip test - not applicable
+
+ return CBasicTestMemOrderScope<
+ HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+ queue);
+ }
+ virtual std::string ProgramCore()
+ {
+ // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store
+ // should be MEMORY_ORDER_RELEASE
+ std::string memoryOrderScopeLoad = MemoryOrderScopeStr();
+ std::string memoryOrderScopeStore =
+ (MemoryOrder() == MEMORY_ORDER_ACQUIRE)
+ ? (", memory_order_release" + MemoryScopeStr())
+ : memoryOrderScopeLoad;
+ std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit");
+ return " atomic_store" + postfix + "(&destMemory[tid], tid"
+ + memoryOrderScopeStore
+ + ");\n"
+ " oldValues[tid] = atomic_load"
+ + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n";
+ }
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ host_atomic_store(&destMemory[tid], (HostDataType)tid,
+ MEMORY_ORDER_SEQ_CST);
+ oldValues[tid] = host_atomic_load<HostAtomicType, HostDataType>(
+ &destMemory[tid], MemoryOrder());
+ }
+ virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+ HostDataType *startRefValues,
+ cl_uint whichDestValue)
+ {
+ expected = (HostDataType)whichDestValue;
+ return true;
+ }
+ virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
+ HostDataType *refValues,
+ HostAtomicType *finalValues)
+ {
+ correct = true;
+ for (cl_uint i = 0; i < threadCount; i++)
+ {
+ if (refValues[i] != (HostDataType)i)
+ {
+ log_error("Invalid value for thread %u\n", (cl_uint)i);
+ correct = false;
+ return true;
+ }
+ }
return true;
- }
}
- return true;
- }
};
-int test_atomic_load_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_load_generic(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ bool useSVM)
{
- int error = 0;
- CBasicTestLoad<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
- EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
- CBasicTestLoad<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
- EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
- CBasicTestLoad<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
- EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
- CBasicTestLoad<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
- EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
- CBasicTestLoad<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
- EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
- CBasicTestLoad<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
- EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
- if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
- {
- CBasicTestLoad<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestLoad<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestLoad<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- else
- {
- CBasicTestLoad<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestLoad<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestLoad<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- return error;
+ int error = 0;
+ CBasicTestLoad<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+ EXECUTE_TEST(error,
+ test_int.Execute(deviceID, context, queue, num_elements));
+ CBasicTestLoad<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_uint.Execute(deviceID, context, queue, num_elements));
+ CBasicTestLoad<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_long.Execute(deviceID, context, queue, num_elements));
+ CBasicTestLoad<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_ulong.Execute(deviceID, context, queue, num_elements));
+ CBasicTestLoad<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_float.Execute(deviceID, context, queue, num_elements));
+ CBasicTestLoad<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
+ TYPE_ATOMIC_DOUBLE, useSVM);
+ EXECUTE_TEST(error,
+ test_double.Execute(deviceID, context, queue, num_elements));
+ if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+ {
+ CBasicTestLoad<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
+ TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestLoad<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestLoad<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ else
+ {
+ CBasicTestLoad<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
+ TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestLoad<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestLoad<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ return error;
}
-int test_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_load(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_load_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_load_generic(deviceID, context, queue, num_elements,
+ false);
}
-int test_svm_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_load(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_load_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_load_generic(deviceID, context, queue, num_elements,
+ true);
}
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestExchange : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestExchange
+ : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
- CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- StartValue(123456);
- }
- virtual std::string ProgramCore()
- {
- std::string memoryOrderScope = MemoryOrderScopeStr();
- std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
- return
- " oldValues[tid] = atomic_exchange"+postfix+"(&destMemory[0], tid"+memoryOrderScope+");\n"
- " for(int i = 0; i < "+IterationsStr()+"; i++)\n"
- " oldValues[tid] = atomic_exchange"+postfix+"(&destMemory[0], oldValues[tid]"+memoryOrderScope+");\n";
- }
-
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid, MemoryOrder());
- for(int i = 0; i < Iterations(); i++)
- oldValues[tid] = host_atomic_exchange(&destMemory[0], oldValues[tid], MemoryOrder());
- }
- virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
- {
- OldValueCheck(Iterations()%2 == 0); //check is valid for even number of iterations only
- correct = true;
- /* We are expecting values from 0 to size-1 and initial value from atomic variable */
- /* These values must be distributed across refValues array and atomic variable finalVaue[0] */
- /* Any repeated value is treated as an error */
- std::vector<bool> tidFound(threadCount);
- bool startValueFound = false;
- cl_uint i;
-
- for(i = 0; i <= threadCount; i++)
- {
- cl_uint value;
- if(i == threadCount)
- value = (cl_uint)finalValues[0]; //additional value from atomic variable (last written)
- else
- value = (cl_uint)refValues[i];
- if(value == (cl_uint)StartValue())
- {
- // Special initial value
- if(startValueFound)
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+ using CBasicTestMemOrderScope<HostAtomicType,
+ HostDataType>::MemoryOrderScopeStr;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
+ CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+ useSVM)
+ {
+ StartValue(123456);
+ }
+ virtual std::string ProgramCore()
+ {
+ std::string memoryOrderScope = MemoryOrderScopeStr();
+ std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+ return " oldValues[tid] = atomic_exchange" + postfix
+ + "(&destMemory[0], tid" + memoryOrderScope
+ + ");\n"
+ " for(int i = 0; i < "
+ + IterationsStr()
+ + "; i++)\n"
+ " oldValues[tid] = atomic_exchange"
+ + postfix + "(&destMemory[0], oldValues[tid]" + memoryOrderScope
+ + ");\n";
+ }
+
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid,
+ MemoryOrder());
+ for (int i = 0; i < Iterations(); i++)
+ oldValues[tid] = host_atomic_exchange(
+ &destMemory[0], oldValues[tid], MemoryOrder());
+ }
+ virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
+ HostDataType *refValues,
+ HostAtomicType *finalValues)
+ {
+ OldValueCheck(
+ Iterations() % 2
+ == 0); // check is valid for even number of iterations only
+ correct = true;
+ /* We are expecting values from 0 to size-1 and initial value from
+ * atomic variable */
+ /* These values must be distributed across refValues array and atomic
+ * variable finalVaue[0] */
+ /* Any repeated value is treated as an error */
+ std::vector<bool> tidFound(threadCount);
+ bool startValueFound = false;
+ cl_uint i;
+
+ for (i = 0; i <= threadCount; i++)
{
- log_error("ERROR: Starting reference value (%u) occurred more thane once\n", (cl_uint)StartValue());
- correct = false;
- return true;
+ cl_uint value;
+ if (i == threadCount)
+ value = (cl_uint)finalValues[0]; // additional value from atomic
+ // variable (last written)
+ else
+ value = (cl_uint)refValues[i];
+ if (value == (cl_uint)StartValue())
+ {
+ // Special initial value
+ if (startValueFound)
+ {
+ log_error("ERROR: Starting reference value (%u) occurred "
+ "more thane once\n",
+ (cl_uint)StartValue());
+ correct = false;
+ return true;
+ }
+ startValueFound = true;
+ continue;
+ }
+ if (value >= threadCount)
+ {
+ log_error(
+ "ERROR: Reference value %u outside of valid range! (%u)\n",
+ i, value);
+ correct = false;
+ return true;
+ }
+ if (tidFound[value])
+ {
+ log_error("ERROR: Value (%u) occurred more thane once\n",
+ value);
+ correct = false;
+ return true;
+ }
+ tidFound[value] = true;
}
- startValueFound = true;
- continue;
- }
- if(value >= threadCount)
- {
- log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
- correct = false;
- return true;
- }
- if(tidFound[value])
- {
- log_error("ERROR: Value (%u) occurred more thane once\n", value);
- correct = false;
return true;
- }
- tidFound[value] = true;
}
- return true;
- }
};
-int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ bool useSVM)
{
- int error = 0;
- CBasicTestExchange<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
- EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
- CBasicTestExchange<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
- EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
- CBasicTestExchange<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
- EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
- CBasicTestExchange<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
- EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
- CBasicTestExchange<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
- EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
- CBasicTestExchange<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
- EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
- if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
- {
- CBasicTestExchange<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestExchange<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestExchange<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- else
- {
- CBasicTestExchange<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestExchange<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestExchange<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- return error;
+ int error = 0;
+ CBasicTestExchange<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_int.Execute(deviceID, context, queue, num_elements));
+ CBasicTestExchange<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_uint.Execute(deviceID, context, queue, num_elements));
+ CBasicTestExchange<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_long.Execute(deviceID, context, queue, num_elements));
+ CBasicTestExchange<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+ TYPE_ATOMIC_ULONG, useSVM);
+ EXECUTE_TEST(error,
+ test_ulong.Execute(deviceID, context, queue, num_elements));
+ CBasicTestExchange<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
+ TYPE_ATOMIC_FLOAT, useSVM);
+ EXECUTE_TEST(error,
+ test_float.Execute(deviceID, context, queue, num_elements));
+ CBasicTestExchange<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
+ TYPE_ATOMIC_DOUBLE, useSVM);
+ EXECUTE_TEST(error,
+ test_double.Execute(deviceID, context, queue, num_elements));
+ if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+ {
+ CBasicTestExchange<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestExchange<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestExchange<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ else
+ {
+ CBasicTestExchange<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestExchange<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestExchange<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ return error;
}
-int test_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_exchange(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_exchange_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_exchange_generic(deviceID, context, queue, num_elements,
+ false);
}
-int test_svm_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_exchange(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_exchange_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_exchange_generic(deviceID, context, queue, num_elements,
+ true);
}
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestCompareStrong : public CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestCompareStrong
+ : public CBasicTestMemOrder2Scope<HostAtomicType, HostDataType> {
public:
- using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::StartValue;
- using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::OldValueCheck;
- using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder;
- using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder2;
- using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrderScope;
- using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryScope;
- using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::DataType;
- using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::Iterations;
- using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::IterationsStr;
- using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
- CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- StartValue(123456);
- OldValueCheck(false);
- }
- virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
- {
- if(MemoryOrder2() == MEMORY_ORDER_RELEASE ||
- MemoryOrder2() == MEMORY_ORDER_ACQ_REL)
- return 0; // not allowed as 'failure' argument
- if((MemoryOrder() == MEMORY_ORDER_RELAXED && MemoryOrder2() != MEMORY_ORDER_RELAXED) ||
- (MemoryOrder() != MEMORY_ORDER_SEQ_CST && MemoryOrder2() == MEMORY_ORDER_SEQ_CST))
- return 0; // failure argument shall be no stronger than the success
-
- if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
- return 0; // skip test - not applicable
-
- if (CheckCapabilities(MemoryScope(), MemoryOrder2()) == TEST_SKIPPED_ITSELF)
- return 0; // skip test - not applicable
-
- return CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
- }
- virtual std::string ProgramCore()
- {
- std::string memoryOrderScope = MemoryOrderScope();
- std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
- return
- std::string(" ")+DataType().RegularTypeName()+" expected, previous;\n"
- " int successCount = 0;\n"
- " oldValues[tid] = tid;\n"
- " expected = tid; // force failure at the beginning\n"
- " if(atomic_compare_exchange_strong"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+") || expected == tid)\n"
- " oldValues[tid] = threadCount+1; //mark unexpected success with invalid value\n"
- " else\n"
- " {\n"
- " for(int i = 0; i < "+IterationsStr()+" || successCount == 0; i++)\n"
- " {\n"
- " previous = expected;\n"
- " if(atomic_compare_exchange_strong"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+"))\n"
- " {\n"
- " oldValues[tid] = expected;\n"
- " successCount++;\n"
- " }\n"
- " else\n"
- " {\n"
- " if(previous == expected) // spurious failure - shouldn't occur for 'strong'\n"
- " {\n"
- " oldValues[tid] = threadCount; //mark fail with invalid value\n"
- " break;\n"
- " }\n"
- " }\n"
- " }\n"
- " }\n";
- }
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- HostDataType expected = (HostDataType)StartValue(), previous;
- oldValues[tid] = (HostDataType)tid;
- for(int i = 0; i < Iterations(); i++)
- {
- previous = expected;
- if(host_atomic_compare_exchange(&destMemory[0], &expected, oldValues[tid], MemoryOrder(), MemoryOrder2()))
- oldValues[tid] = expected;
- else
- {
- if(previous == expected) // shouldn't occur for 'strong'
+ using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::StartValue;
+ using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::OldValueCheck;
+ using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder;
+ using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder2;
+ using CBasicTestMemOrder2Scope<HostAtomicType,
+ HostDataType>::MemoryOrderScope;
+ using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryScope;
+ using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::DataType;
+ using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::Iterations;
+ using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::IterationsStr;
+ using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
+ CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>(dataType,
+ useSVM)
+ {
+ StartValue(123456);
+ OldValueCheck(false);
+ }
+ virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue)
+ {
+ if (MemoryOrder2() == MEMORY_ORDER_RELEASE
+ || MemoryOrder2() == MEMORY_ORDER_ACQ_REL)
+ return 0; // not allowed as 'failure' argument
+ if ((MemoryOrder() == MEMORY_ORDER_RELAXED
+ && MemoryOrder2() != MEMORY_ORDER_RELAXED)
+ || (MemoryOrder() != MEMORY_ORDER_SEQ_CST
+ && MemoryOrder2() == MEMORY_ORDER_SEQ_CST))
+ return 0; // failure argument shall be no stronger than the success
+
+ if (CheckCapabilities(MemoryScope(), MemoryOrder())
+ == TEST_SKIPPED_ITSELF)
+ return 0; // skip test - not applicable
+
+ if (CheckCapabilities(MemoryScope(), MemoryOrder2())
+ == TEST_SKIPPED_ITSELF)
+ return 0; // skip test - not applicable
+
+ return CBasicTestMemOrder2Scope<
+ HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+ queue);
+ }
+ virtual std::string ProgramCore()
+ {
+ std::string memoryOrderScope = MemoryOrderScope();
+ std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+ return std::string(" ") + DataType().RegularTypeName()
+ + " expected, previous;\n"
+ " int successCount = 0;\n"
+ " oldValues[tid] = tid;\n"
+ " expected = tid; // force failure at the beginning\n"
+ " if(atomic_compare_exchange_strong"
+ + postfix + "(&destMemory[0], &expected, oldValues[tid]"
+ + memoryOrderScope
+ + ") || expected == tid)\n"
+ " oldValues[tid] = threadCount+1; //mark unexpected success "
+ "with invalid value\n"
+ " else\n"
+ " {\n"
+ " for(int i = 0; i < "
+ + IterationsStr()
+ + " || successCount == 0; i++)\n"
+ " {\n"
+ " previous = expected;\n"
+ " if(atomic_compare_exchange_strong"
+ + postfix + "(&destMemory[0], &expected, oldValues[tid]"
+ + memoryOrderScope
+ + "))\n"
+ " {\n"
+ " oldValues[tid] = expected;\n"
+ " successCount++;\n"
+ " }\n"
+ " else\n"
+ " {\n"
+ " if(previous == expected) // spurious failure - "
+ "shouldn't occur for 'strong'\n"
+ " {\n"
+ " oldValues[tid] = threadCount; //mark fail with "
+ "invalid value\n"
+ " break;\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ " }\n";
+ }
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ HostDataType expected = (HostDataType)StartValue(), previous;
+ oldValues[tid] = (HostDataType)tid;
+ for (int i = 0; i < Iterations(); i++)
{
- oldValues[tid] = threadCount; //mark fail with invalid value
+ previous = expected;
+ if (host_atomic_compare_exchange(&destMemory[0], &expected,
+ oldValues[tid], MemoryOrder(),
+ MemoryOrder2()))
+ oldValues[tid] = expected;
+ else
+ {
+ if (previous == expected) // shouldn't occur for 'strong'
+ {
+ oldValues[tid] = threadCount; // mark fail with invalid
+ // value
+ }
+ }
}
- }
- }
- }
- virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
- {
- correct = true;
- /* We are expecting values from 0 to size-1 and initial value from atomic variable */
- /* These values must be distributed across refValues array and atomic variable finalVaue[0] */
- /* Any repeated value is treated as an error */
- std::vector<bool> tidFound(threadCount);
- bool startValueFound = false;
- cl_uint i;
-
- for(i = 0; i <= threadCount; i++)
- {
- cl_uint value;
- if(i == threadCount)
- value = (cl_uint)finalValues[0]; //additional value from atomic variable (last written)
- else
- value = (cl_uint)refValues[i];
- if(value == (cl_uint)StartValue())
- {
- // Special initial value
- if(startValueFound)
+ }
+ virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
+ HostDataType *refValues,
+ HostAtomicType *finalValues)
+ {
+ correct = true;
+ /* We are expecting values from 0 to size-1 and initial value from
+ * atomic variable */
+ /* These values must be distributed across refValues array and atomic
+ * variable finalVaue[0] */
+ /* Any repeated value is treated as an error */
+ std::vector<bool> tidFound(threadCount);
+ bool startValueFound = false;
+ cl_uint i;
+
+ for (i = 0; i <= threadCount; i++)
{
- log_error("ERROR: Starting reference value (%u) occurred more thane once\n", (cl_uint)StartValue());
- correct = false;
- return true;
+ cl_uint value;
+ if (i == threadCount)
+ value = (cl_uint)finalValues[0]; // additional value from atomic
+ // variable (last written)
+ else
+ value = (cl_uint)refValues[i];
+ if (value == (cl_uint)StartValue())
+ {
+ // Special initial value
+ if (startValueFound)
+ {
+ log_error("ERROR: Starting reference value (%u) occurred "
+ "more thane once\n",
+ (cl_uint)StartValue());
+ correct = false;
+ return true;
+ }
+ startValueFound = true;
+ continue;
+ }
+ if (value >= threadCount)
+ {
+ if (value == threadCount)
+ log_error("ERROR: Spurious failure detected for "
+ "atomic_compare_exchange_strong\n");
+ log_error(
+ "ERROR: Reference value %u outside of valid range! (%u)\n",
+ i, value);
+ correct = false;
+ return true;
+ }
+ if (tidFound[value])
+ {
+ log_error("ERROR: Value (%u) occurred more thane once\n",
+ value);
+ correct = false;
+ return true;
+ }
+ tidFound[value] = true;
}
- startValueFound = true;
- continue;
- }
- if(value >= threadCount)
- {
- if(value == threadCount)
- log_error("ERROR: Spurious failure detected for atomic_compare_exchange_strong\n");
- log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
- correct = false;
return true;
- }
- if(tidFound[value])
- {
- log_error("ERROR: Value (%u) occurred more thane once\n", value);
- correct = false;
- return true;
- }
- tidFound[value] = true;
}
- return true;
- }
};
-int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements, bool useSVM)
{
- int error = 0;
- CBasicTestCompareStrong<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
- EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareStrong<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
- EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareStrong<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
- EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareStrong<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
- EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
- if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
- {
- CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- else
- {
- CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- return error;
+ int error = 0;
+ CBasicTestCompareStrong<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_int.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareStrong<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
+ TYPE_ATOMIC_UINT, useSVM);
+ EXECUTE_TEST(error,
+ test_uint.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareStrong<HOST_ATOMIC_LONG, HOST_LONG> test_long(
+ TYPE_ATOMIC_LONG, useSVM);
+ EXECUTE_TEST(error,
+ test_long.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareStrong<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+ TYPE_ATOMIC_ULONG, useSVM);
+ EXECUTE_TEST(error,
+ test_ulong.Execute(deviceID, context, queue, num_elements));
+ if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+ {
+ CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32>
+ test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ else
+ {
+ CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64>
+ test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ return error;
}
-int test_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_compare_exchange_strong(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
- return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_compare_exchange_strong_generic(deviceID, context, queue,
+ num_elements, false);
}
-int test_svm_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_compare_exchange_strong(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
- return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_compare_exchange_strong_generic(deviceID, context, queue,
+ num_elements, true);
}
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestCompareWeak : public CBasicTestCompareStrong<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestCompareWeak
+ : public CBasicTestCompareStrong<HostAtomicType, HostDataType> {
public:
- using CBasicTestCompareStrong<HostAtomicType, HostDataType>::StartValue;
- using CBasicTestCompareStrong<HostAtomicType, HostDataType>::MemoryOrderScope;
- using CBasicTestCompareStrong<HostAtomicType, HostDataType>::DataType;
- using CBasicTestCompareStrong<HostAtomicType, HostDataType>::Iterations;
- using CBasicTestCompareStrong<HostAtomicType, HostDataType>::IterationsStr;
- CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM) : CBasicTestCompareStrong<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- }
- virtual std::string ProgramCore()
- {
- std::string memoryOrderScope = MemoryOrderScope();
- std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
- return
- std::string(" ")+DataType().RegularTypeName()+" expected , previous;\n"
- " int successCount = 0;\n"
- " oldValues[tid] = tid;\n"
- " expected = tid; // force failure at the beginning\n"
- " if(atomic_compare_exchange_weak"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+") || expected == tid)\n"
- " oldValues[tid] = threadCount+1; //mark unexpected success with invalid value\n"
- " else\n"
- " {\n"
- " for(int i = 0; i < "+IterationsStr()+" || successCount == 0; i++)\n"
- " {\n"
- " previous = expected;\n"
- " if(atomic_compare_exchange_weak"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+"))\n"
- " {\n"
- " oldValues[tid] = expected;\n"
- " successCount++;\n"
- " }\n"
- " }\n"
- " }\n";
- }
+ using CBasicTestCompareStrong<HostAtomicType, HostDataType>::StartValue;
+ using CBasicTestCompareStrong<HostAtomicType,
+ HostDataType>::MemoryOrderScope;
+ using CBasicTestCompareStrong<HostAtomicType, HostDataType>::DataType;
+ using CBasicTestCompareStrong<HostAtomicType, HostDataType>::Iterations;
+ using CBasicTestCompareStrong<HostAtomicType, HostDataType>::IterationsStr;
+ CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTestCompareStrong<HostAtomicType, HostDataType>(dataType,
+ useSVM)
+ {}
+ virtual std::string ProgramCore()
+ {
+ std::string memoryOrderScope = MemoryOrderScope();
+ std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+ return std::string(" ") + DataType().RegularTypeName()
+ + " expected , previous;\n"
+ " int successCount = 0;\n"
+ " oldValues[tid] = tid;\n"
+ " expected = tid; // force failure at the beginning\n"
+ " if(atomic_compare_exchange_weak"
+ + postfix + "(&destMemory[0], &expected, oldValues[tid]"
+ + memoryOrderScope
+ + ") || expected == tid)\n"
+ " oldValues[tid] = threadCount+1; //mark unexpected success "
+ "with invalid value\n"
+ " else\n"
+ " {\n"
+ " for(int i = 0; i < "
+ + IterationsStr()
+ + " || successCount == 0; i++)\n"
+ " {\n"
+ " previous = expected;\n"
+ " if(atomic_compare_exchange_weak"
+ + postfix + "(&destMemory[0], &expected, oldValues[tid]"
+ + memoryOrderScope
+ + "))\n"
+ " {\n"
+ " oldValues[tid] = expected;\n"
+ " successCount++;\n"
+ " }\n"
+ " }\n"
+ " }\n";
+ }
};
-int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements, bool useSVM)
{
- int error = 0;
- CBasicTestCompareWeak<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
- EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareWeak<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
- EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareWeak<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
- EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareWeak<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
- EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
- if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
- {
- CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- else
- {
- CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- return error;
+ int error = 0;
+ CBasicTestCompareWeak<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_int.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareWeak<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
+ TYPE_ATOMIC_UINT, useSVM);
+ EXECUTE_TEST(error,
+ test_uint.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareWeak<HOST_ATOMIC_LONG, HOST_LONG> test_long(
+ TYPE_ATOMIC_LONG, useSVM);
+ EXECUTE_TEST(error,
+ test_long.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareWeak<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+ TYPE_ATOMIC_ULONG, useSVM);
+ EXECUTE_TEST(error,
+ test_ulong.Execute(deviceID, context, queue, num_elements));
+ if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+ {
+ CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ else
+ {
+ CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ return error;
}
-int test_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_compare_exchange_weak_generic(deviceID, context, queue,
+ num_elements, false);
}
-int test_svm_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_compare_exchange_weak(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
- return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_compare_exchange_weak_generic(deviceID, context, queue,
+ num_elements, true);
}
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchAdd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchAdd
+ : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
- CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- }
- virtual std::string ProgramCore()
- {
- std::string memoryOrderScope = MemoryOrderScopeStr();
- std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
- return
- " oldValues[tid] = atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"+
- " atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"
- " atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"
- " atomic_fetch_add"+postfix+"(&destMemory[0], (("+DataType().AddSubOperandTypeName()+")tid + 3) << (sizeof("+DataType().AddSubOperandTypeName()+")-1)*8"+memoryOrderScope+");\n";
- }
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- oldValues[tid] = host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
- host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
- host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
- host_atomic_fetch_add(&destMemory[0], ((HostDataType)tid + 3) << (sizeof(HostDataType)-1)*8, MemoryOrder());
- }
- virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
- {
- expected = StartValue();
- for(cl_uint i = 0; i < threadCount; i++)
- expected += ((HostDataType)i+3)*3+(((HostDataType)i + 3) << (sizeof(HostDataType)-1)*8);
- return true;
- }
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+ using CBasicTestMemOrderScope<HostAtomicType,
+ HostDataType>::MemoryOrderScopeStr;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+ CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+ useSVM)
+ {}
+ virtual std::string ProgramCore()
+ {
+ std::string memoryOrderScope = MemoryOrderScopeStr();
+ std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+ return " oldValues[tid] = atomic_fetch_add" + postfix
+ + "(&destMemory[0], (" + DataType().AddSubOperandTypeName()
+ + ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add"
+ + postfix + "(&destMemory[0], ("
+ + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope
+ + ");\n"
+ " atomic_fetch_add"
+ + postfix + "(&destMemory[0], ("
+ + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope
+ + ");\n"
+ " atomic_fetch_add"
+ + postfix + "(&destMemory[0], (("
+ + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
+ + DataType().AddSubOperandTypeName() + ")-1)*8" + memoryOrderScope
+ + ");\n";
+ }
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ oldValues[tid] = host_atomic_fetch_add(
+ &destMemory[0], (HostDataType)tid + 3, MemoryOrder());
+ host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
+ MemoryOrder());
+ host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
+ MemoryOrder());
+ host_atomic_fetch_add(&destMemory[0],
+ ((HostDataType)tid + 3)
+ << (sizeof(HostDataType) - 1) * 8,
+ MemoryOrder());
+ }
+ virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+ HostDataType *startRefValues,
+ cl_uint whichDestValue)
+ {
+ expected = StartValue();
+ for (cl_uint i = 0; i < threadCount; i++)
+ expected += ((HostDataType)i + 3) * 3
+ + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
+ return true;
+ }
};
-int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ bool useSVM)
{
- int error = 0;
- CBasicTestFetchAdd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
- EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAdd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
- EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAdd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
- EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAdd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
- EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
- if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
- {
- CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- else
- {
- CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- return error;
+ int error = 0;
+ CBasicTestFetchAdd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_int.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAdd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_uint.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAdd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_long.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAdd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+ TYPE_ATOMIC_ULONG, useSVM);
+ EXECUTE_TEST(error,
+ test_ulong.Execute(deviceID, context, queue, num_elements));
+ if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+ {
+ CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ else
+ {
+ CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ return error;
}
-int test_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_add(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements,
+ false);
}
-int test_svm_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_add(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements,
+ true);
}
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchSub : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchSub
+ : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
- CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- }
- virtual std::string ProgramCore()
- {
- std::string memoryOrderScope = MemoryOrderScopeStr();
- std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
- return
- " oldValues[tid] = atomic_fetch_sub"+postfix+"(&destMemory[0], tid + 3 +((("+DataType().AddSubOperandTypeName()+")tid + 3) << (sizeof("+DataType().AddSubOperandTypeName()+")-1)*8)"+memoryOrderScope+");\n";
- }
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- oldValues[tid] = host_atomic_fetch_sub(&destMemory[0], (HostDataType)tid + 3+(((HostDataType)tid + 3) << (sizeof(HostDataType)-1)*8), MemoryOrder());
- }
- virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
- {
- expected = StartValue();
- for(cl_uint i = 0; i < threadCount; i++)
- expected -= (HostDataType)i + 3 +(((HostDataType)i + 3) << (sizeof(HostDataType)-1)*8);
- return true;
- }
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+ using CBasicTestMemOrderScope<HostAtomicType,
+ HostDataType>::MemoryOrderScopeStr;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+ CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+ useSVM)
+ {}
+ virtual std::string ProgramCore()
+ {
+ std::string memoryOrderScope = MemoryOrderScopeStr();
+ std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+ return " oldValues[tid] = atomic_fetch_sub" + postfix
+ + "(&destMemory[0], tid + 3 +((("
+ + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
+ + DataType().AddSubOperandTypeName() + ")-1)*8)" + memoryOrderScope
+ + ");\n";
+ }
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ oldValues[tid] = host_atomic_fetch_sub(
+ &destMemory[0],
+ (HostDataType)tid + 3
+ + (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8),
+ MemoryOrder());
+ }
+ virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+ HostDataType *startRefValues,
+ cl_uint whichDestValue)
+ {
+ expected = StartValue();
+ for (cl_uint i = 0; i < threadCount; i++)
+ expected -= (HostDataType)i + 3
+ + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
+ return true;
+ }
};
-int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ bool useSVM)
{
- int error = 0;
- CBasicTestFetchSub<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
- EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchSub<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
- EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchSub<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
- EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchSub<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
- EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
- if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
- {
- CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchSub<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- else
- {
- CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchSub<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- return error;
+ int error = 0;
+ CBasicTestFetchSub<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_int.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchSub<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_uint.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchSub<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_long.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchSub<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+ TYPE_ATOMIC_ULONG, useSVM);
+ EXECUTE_TEST(error,
+ test_ulong.Execute(deviceID, context, queue, num_elements));
+ if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+ {
+ CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchSub<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ else
+ {
+ CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchSub<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ return error;
}
-int test_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_sub(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements,
+ false);
}
-int test_svm_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_sub(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements,
+ true);
}
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchOr : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchOr
+ : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
- CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- StartValue(0);
- }
- virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
- {
- cl_uint numBits = DataType().Size(deviceID) * 8;
-
- return (threadCount + numBits - 1) / numBits;
- }
- virtual std::string ProgramCore()
- {
- std::string memoryOrderScope = MemoryOrderScopeStr();
- std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
- return
- std::string(" size_t numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
- " int whichResult = tid / numBits;\n"
- " int bitIndex = tid - (whichResult * numBits);\n"
- "\n"
- " oldValues[tid] = atomic_fetch_or"+postfix+"(&destMemory[whichResult], (("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
- }
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- size_t numBits = sizeof(HostDataType) * 8;
- size_t whichResult = tid / numBits;
- size_t bitIndex = tid - (whichResult * numBits);
-
- oldValues[tid] = host_atomic_fetch_or(&destMemory[whichResult], ((HostDataType)1 << bitIndex), MemoryOrder());
- }
- virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
- {
- cl_uint numValues = (threadCount + (sizeof(HostDataType)*8-1)) / (sizeof(HostDataType)*8);
- if(whichDestValue < numValues - 1)
- {
- expected = ~(HostDataType)0;
- return true;
- }
- // Last item doesn't get or'ed on every bit, so we have to mask away
- cl_uint numBits = threadCount - whichDestValue * (sizeof(HostDataType)*8);
- expected = StartValue();
- for(cl_uint i = 0; i < numBits; i++)
- expected |= ((HostDataType)1 << i);
- return true;
- }
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+ using CBasicTestMemOrderScope<HostAtomicType,
+ HostDataType>::MemoryOrderScopeStr;
+ CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+ useSVM)
+ {
+ StartValue(0);
+ }
+ virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+ {
+ cl_uint numBits = DataType().Size(deviceID) * 8;
+
+ return (threadCount + numBits - 1) / numBits;
+ }
+ virtual std::string ProgramCore()
+ {
+ std::string memoryOrderScope = MemoryOrderScopeStr();
+ std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+ return std::string(" size_t numBits = sizeof(")
+ + DataType().RegularTypeName()
+ + ") * 8;\n"
+ " int whichResult = tid / numBits;\n"
+ " int bitIndex = tid - (whichResult * numBits);\n"
+ "\n"
+ " oldValues[tid] = atomic_fetch_or"
+ + postfix + "(&destMemory[whichResult], (("
+ + DataType().RegularTypeName() + ")1 << bitIndex) "
+ + memoryOrderScope + ");\n";
+ }
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ size_t numBits = sizeof(HostDataType) * 8;
+ size_t whichResult = tid / numBits;
+ size_t bitIndex = tid - (whichResult * numBits);
+
+ oldValues[tid] =
+ host_atomic_fetch_or(&destMemory[whichResult],
+ ((HostDataType)1 << bitIndex), MemoryOrder());
+ }
+ virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+ HostDataType *startRefValues,
+ cl_uint whichDestValue)
+ {
+ cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1))
+ / (sizeof(HostDataType) * 8);
+ if (whichDestValue < numValues - 1)
+ {
+ expected = ~(HostDataType)0;
+ return true;
+ }
+ // Last item doesn't get or'ed on every bit, so we have to mask away
+ cl_uint numBits =
+ threadCount - whichDestValue * (sizeof(HostDataType) * 8);
+ expected = StartValue();
+ for (cl_uint i = 0; i < numBits; i++)
+ expected |= ((HostDataType)1 << i);
+ return true;
+ }
};
-int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ bool useSVM)
{
- int error = 0;
- CBasicTestFetchOr<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
- EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOr<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
- EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOr<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
- EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOr<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
- EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
- if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
- {
- CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOr<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- else
- {
- CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOr<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- return error;
+ int error = 0;
+ CBasicTestFetchOr<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_int.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOr<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_uint.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOr<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_long.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOr<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+ TYPE_ATOMIC_ULONG, useSVM);
+ EXECUTE_TEST(error,
+ test_ulong.Execute(deviceID, context, queue, num_elements));
+ if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+ {
+ CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOr<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ else
+ {
+ CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOr<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ return error;
}
-int test_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_or(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements,
+ false);
}
-int test_svm_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_or(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements,
+ true);
}
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchXor : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchXor
+ : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
- CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- StartValue((HostDataType)0x2f08ab418ba0541LL);
- }
- virtual std::string ProgramCore()
- {
- std::string memoryOrderScope = MemoryOrderScopeStr();
- std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
- return
- std::string(" int numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
- " int bitIndex = (numBits-1)*(tid+1)/threadCount;\n"
- "\n"
- " oldValues[tid] = atomic_fetch_xor"+postfix+"(&destMemory[0], (("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
- }
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- int numBits = sizeof(HostDataType) * 8;
- int bitIndex = (numBits-1)*(tid+1)/threadCount;
-
- oldValues[tid] = host_atomic_fetch_xor(&destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder());
- }
- virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
- {
- int numBits = sizeof(HostDataType)*8;
- expected = StartValue();
- for(cl_uint i = 0; i < threadCount; i++)
- {
- int bitIndex = (numBits-1)*(i+1)/threadCount;
- expected ^= ((HostDataType)1 << bitIndex);
- }
- return true;
- }
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+ using CBasicTestMemOrderScope<HostAtomicType,
+ HostDataType>::MemoryOrderScopeStr;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+ CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+ useSVM)
+ {
+ StartValue((HostDataType)0x2f08ab418ba0541LL);
+ }
+ virtual std::string ProgramCore()
+ {
+ std::string memoryOrderScope = MemoryOrderScopeStr();
+ std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+ return std::string(" int numBits = sizeof(")
+ + DataType().RegularTypeName()
+ + ") * 8;\n"
+ " int bitIndex = (numBits-1)*(tid+1)/threadCount;\n"
+ "\n"
+ " oldValues[tid] = atomic_fetch_xor"
+ + postfix + "(&destMemory[0], ((" + DataType().RegularTypeName()
+ + ")1 << bitIndex) " + memoryOrderScope + ");\n";
+ }
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ int numBits = sizeof(HostDataType) * 8;
+ int bitIndex = (numBits - 1) * (tid + 1) / threadCount;
+
+ oldValues[tid] = host_atomic_fetch_xor(
+ &destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder());
+ }
+ virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+ HostDataType *startRefValues,
+ cl_uint whichDestValue)
+ {
+ int numBits = sizeof(HostDataType) * 8;
+ expected = StartValue();
+ for (cl_uint i = 0; i < threadCount; i++)
+ {
+ int bitIndex = (numBits - 1) * (i + 1) / threadCount;
+ expected ^= ((HostDataType)1 << bitIndex);
+ }
+ return true;
+ }
};
-int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ bool useSVM)
{
- int error = 0;
- CBasicTestFetchXor<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
- EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
- EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
- EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
- EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
- if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
- {
- CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- else
- {
- CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- return error;
+ int error = 0;
+ CBasicTestFetchXor<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_int.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_uint.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_long.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+ TYPE_ATOMIC_ULONG, useSVM);
+ EXECUTE_TEST(error,
+ test_ulong.Execute(deviceID, context, queue, num_elements));
+ if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+ {
+ CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ else
+ {
+ CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ return error;
}
-int test_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_xor(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements,
+ false);
}
-int test_svm_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_xor(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements,
+ true);
}
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchAnd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchAnd
+ : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
- CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- StartValue(~(HostDataType)0);
- }
- virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
- {
- cl_uint numBits = DataType().Size(deviceID) * 8;
-
- return (threadCount + numBits - 1) / numBits;
- }
- virtual std::string ProgramCore()
- {
- std::string memoryOrderScope = MemoryOrderScopeStr();
- std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
- return
- std::string(" size_t numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
- " int whichResult = tid / numBits;\n"
- " int bitIndex = tid - (whichResult * numBits);\n"
- "\n"
- " oldValues[tid] = atomic_fetch_and"+postfix+"(&destMemory[whichResult], ~(("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
- }
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- size_t numBits = sizeof(HostDataType) * 8;
- size_t whichResult = tid / numBits;
- size_t bitIndex = tid - (whichResult * numBits);
-
- oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult], ~((HostDataType)1 << bitIndex), MemoryOrder());
- }
- virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
- {
- cl_uint numValues = (threadCount + (sizeof(HostDataType)*8-1)) / (sizeof(HostDataType)*8);
- if(whichDestValue < numValues - 1)
- {
- expected = 0;
- return true;
- }
- // Last item doesn't get and'ed on every bit, so we have to mask away
- size_t numBits = threadCount - whichDestValue * (sizeof(HostDataType)*8);
- expected = StartValue();
- for(size_t i = 0; i < numBits; i++)
- expected &= ~((HostDataType)1 << i);
- return true;
- }
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+ using CBasicTestMemOrderScope<HostAtomicType,
+ HostDataType>::MemoryOrderScopeStr;
+ CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+ useSVM)
+ {
+ StartValue(~(HostDataType)0);
+ }
+ virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+ {
+ cl_uint numBits = DataType().Size(deviceID) * 8;
+
+ return (threadCount + numBits - 1) / numBits;
+ }
+ virtual std::string ProgramCore()
+ {
+ std::string memoryOrderScope = MemoryOrderScopeStr();
+ std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+ return std::string(" size_t numBits = sizeof(")
+ + DataType().RegularTypeName()
+ + ") * 8;\n"
+ " int whichResult = tid / numBits;\n"
+ " int bitIndex = tid - (whichResult * numBits);\n"
+ "\n"
+ " oldValues[tid] = atomic_fetch_and"
+ + postfix + "(&destMemory[whichResult], ~(("
+ + DataType().RegularTypeName() + ")1 << bitIndex) "
+ + memoryOrderScope + ");\n";
+ }
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ size_t numBits = sizeof(HostDataType) * 8;
+ size_t whichResult = tid / numBits;
+ size_t bitIndex = tid - (whichResult * numBits);
+
+ oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult],
+ ~((HostDataType)1 << bitIndex),
+ MemoryOrder());
+ }
+ virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+ HostDataType *startRefValues,
+ cl_uint whichDestValue)
+ {
+ cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1))
+ / (sizeof(HostDataType) * 8);
+ if (whichDestValue < numValues - 1)
+ {
+ expected = 0;
+ return true;
+ }
+ // Last item doesn't get and'ed on every bit, so we have to mask away
+ size_t numBits =
+ threadCount - whichDestValue * (sizeof(HostDataType) * 8);
+ expected = StartValue();
+ for (size_t i = 0; i < numBits; i++)
+ expected &= ~((HostDataType)1 << i);
+ return true;
+ }
};
-int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ bool useSVM)
{
- int error = 0;
- CBasicTestFetchAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
- EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
- EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
- EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
- EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
- if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
- {
- CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- else
- {
- CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- return error;
+ int error = 0;
+ CBasicTestFetchAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_int.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_uint.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_long.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+ TYPE_ATOMIC_ULONG, useSVM);
+ EXECUTE_TEST(error,
+ test_ulong.Execute(deviceID, context, queue, num_elements));
+ if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+ {
+ CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ else
+ {
+ CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ return error;
}
-int test_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_and(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements,
+ false);
}
-int test_svm_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_and(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements,
+ true);
}
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchOrAnd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchOrAnd
+ : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
- CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- StartValue(0);
- }
- virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
- {
- return 1+(threadCount-1)/(DataType().Size(deviceID)*8);
- }
- // each thread modifies (with OR and AND operations) and verifies
- // only one bit in atomic variable
- // other bits are modified by other threads but it must not affect current thread operation
- virtual std::string ProgramCore()
- {
- std::string memoryOrderScope = MemoryOrderScopeStr();
- std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
- return
- std::string(" int bits = sizeof(")+DataType().RegularTypeName()+")*8;\n"+
- " size_t valueInd = tid/bits;\n"
- " "+DataType().RegularTypeName()+" value, bitMask = ("+DataType().RegularTypeName()+")1 << tid%bits;\n"
- " oldValues[tid] = 0;\n"
- " for(int i = 0; i < "+IterationsStr()+"; i++)\n"
- " {\n"
- " value = atomic_fetch_or"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
- " if(value & bitMask) // bit should be set to 0\n"
- " oldValues[tid]++;\n"
- " value = atomic_fetch_and"+postfix+"(destMemory+valueInd, ~bitMask"+memoryOrderScope+");\n"
- " if(!(value & bitMask)) // bit should be set to 1\n"
- " oldValues[tid]++;\n"
- " }\n";
- }
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- int bits = sizeof(HostDataType)*8;
- size_t valueInd = tid/bits;
- HostDataType value, bitMask = (HostDataType)1 << tid%bits;
- oldValues[tid] = 0;
- for(int i = 0; i < Iterations(); i++)
- {
- value = host_atomic_fetch_or(destMemory+valueInd, bitMask, MemoryOrder());
- if(value & bitMask) // bit should be set to 0
- oldValues[tid]++;
- value = host_atomic_fetch_and(destMemory+valueInd, ~bitMask, MemoryOrder());
- if(!(value & bitMask)) // bit should be set to 1
- oldValues[tid]++;
- }
- }
- virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
- {
- expected = 0;
- return true;
- }
- virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
- {
- correct = true;
- for(cl_uint i = 0; i < threadCount; i++)
- {
- if(refValues[i] > 0)
- {
- log_error("Thread %d found %d mismatch(es)\n", i, (cl_uint)refValues[i]);
- correct = false;
- }
- }
- return true;
- }
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+ using CBasicTestMemOrderScope<HostAtomicType,
+ HostDataType>::MemoryOrderScopeStr;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
+ CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+ useSVM)
+ {
+ StartValue(0);
+ }
+ virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+ {
+ return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8);
+ }
+ // each thread modifies (with OR and AND operations) and verifies
+ // only one bit in atomic variable
+ // other bits are modified by other threads but it must not affect current
+ // thread operation
+ virtual std::string ProgramCore()
+ {
+ std::string memoryOrderScope = MemoryOrderScopeStr();
+ std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+ return std::string(" int bits = sizeof(")
+ + DataType().RegularTypeName() + ")*8;\n"
+ + " size_t valueInd = tid/bits;\n"
+ " "
+ + DataType().RegularTypeName() + " value, bitMask = ("
+ + DataType().RegularTypeName()
+ + ")1 << tid%bits;\n"
+ " oldValues[tid] = 0;\n"
+ " for(int i = 0; i < "
+ + IterationsStr()
+ + "; i++)\n"
+ " {\n"
+ " value = atomic_fetch_or"
+ + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
+ + ");\n"
+ " if(value & bitMask) // bit should be set to 0\n"
+ " oldValues[tid]++;\n"
+ " value = atomic_fetch_and"
+ + postfix + "(destMemory+valueInd, ~bitMask" + memoryOrderScope
+ + ");\n"
+ " if(!(value & bitMask)) // bit should be set to 1\n"
+ " oldValues[tid]++;\n"
+ " }\n";
+ }
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ int bits = sizeof(HostDataType) * 8;
+ size_t valueInd = tid / bits;
+ HostDataType value, bitMask = (HostDataType)1 << tid % bits;
+ oldValues[tid] = 0;
+ for (int i = 0; i < Iterations(); i++)
+ {
+ value = host_atomic_fetch_or(destMemory + valueInd, bitMask,
+ MemoryOrder());
+ if (value & bitMask) // bit should be set to 0
+ oldValues[tid]++;
+ value = host_atomic_fetch_and(destMemory + valueInd, ~bitMask,
+ MemoryOrder());
+ if (!(value & bitMask)) // bit should be set to 1
+ oldValues[tid]++;
+ }
+ }
+ virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+ HostDataType *startRefValues,
+ cl_uint whichDestValue)
+ {
+ expected = 0;
+ return true;
+ }
+ virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
+ HostDataType *refValues,
+ HostAtomicType *finalValues)
+ {
+ correct = true;
+ for (cl_uint i = 0; i < threadCount; i++)
+ {
+ if (refValues[i] > 0)
+ {
+ log_error("Thread %d found %d mismatch(es)\n", i,
+ (cl_uint)refValues[i]);
+ correct = false;
+ }
+ }
+ return true;
+ }
};
-int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ bool useSVM)
{
- int error = 0;
- CBasicTestFetchOrAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
- EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOrAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
- EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOrAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
- EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOrAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
- EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
- if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
- {
- CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- else
- {
- CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- return error;
+ int error = 0;
+ CBasicTestFetchOrAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_int.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOrAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
+ TYPE_ATOMIC_UINT, useSVM);
+ EXECUTE_TEST(error,
+ test_uint.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOrAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(
+ TYPE_ATOMIC_LONG, useSVM);
+ EXECUTE_TEST(error,
+ test_long.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOrAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+ TYPE_ATOMIC_ULONG, useSVM);
+ EXECUTE_TEST(error,
+ test_ulong.Execute(deviceID, context, queue, num_elements));
+ if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+ {
+ CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ else
+ {
+ CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ return error;
}
-int test_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_orand(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_fetch_orand_generic(deviceID, context, queue,
+ num_elements, false);
}
-int test_svm_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_orand(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_fetch_orand_generic(deviceID, context, queue,
+ num_elements, true);
}
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchXor2 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchXor2
+ : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
- CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- StartValue(0);
- }
- virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
- {
- return 1+(threadCount-1)/(DataType().Size(deviceID)*8);
- }
- // each thread modifies (with XOR operation) and verifies
- // only one bit in atomic variable
- // other bits are modified by other threads but it must not affect current thread operation
- virtual std::string ProgramCore()
- {
- std::string memoryOrderScope = MemoryOrderScopeStr();
- std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
- return
- std::string(" int bits = sizeof(")+DataType().RegularTypeName()+")*8;\n"+
- " size_t valueInd = tid/bits;\n"
- " "+DataType().RegularTypeName()+" value, bitMask = ("+DataType().RegularTypeName()+")1 << tid%bits;\n"
- " oldValues[tid] = 0;\n"
- " for(int i = 0; i < "+IterationsStr()+"; i++)\n"
- " {\n"
- " value = atomic_fetch_xor"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
- " if(value & bitMask) // bit should be set to 0\n"
- " oldValues[tid]++;\n"
- " value = atomic_fetch_xor"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
- " if(!(value & bitMask)) // bit should be set to 1\n"
- " oldValues[tid]++;\n"
- " }\n";
- }
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- int bits = sizeof(HostDataType)*8;
- size_t valueInd = tid/bits;
- HostDataType value, bitMask = (HostDataType)1 << tid%bits;
- oldValues[tid] = 0;
- for(int i = 0; i < Iterations(); i++)
- {
- value = host_atomic_fetch_xor(destMemory+valueInd, bitMask, MemoryOrder());
- if(value & bitMask) // bit should be set to 0
- oldValues[tid]++;
- value = host_atomic_fetch_xor(destMemory+valueInd, bitMask, MemoryOrder());
- if(!(value & bitMask)) // bit should be set to 1
- oldValues[tid]++;
- }
- }
- virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
- {
- expected = 0;
- return true;
- }
- virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
- {
- correct = true;
- for(cl_uint i = 0; i < threadCount; i++)
- {
- if(refValues[i] > 0)
- {
- log_error("Thread %d found %d mismatches\n", i, (cl_uint)refValues[i]);
- correct = false;
- }
- }
- return true;
- }
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+ using CBasicTestMemOrderScope<HostAtomicType,
+ HostDataType>::MemoryOrderScopeStr;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
+ CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+ useSVM)
+ {
+ StartValue(0);
+ }
+ virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+ {
+ return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8);
+ }
+ // each thread modifies (with XOR operation) and verifies
+ // only one bit in atomic variable
+ // other bits are modified by other threads but it must not affect current
+ // thread operation
+ virtual std::string ProgramCore()
+ {
+ std::string memoryOrderScope = MemoryOrderScopeStr();
+ std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+ return std::string(" int bits = sizeof(")
+ + DataType().RegularTypeName() + ")*8;\n"
+ + " size_t valueInd = tid/bits;\n"
+ " "
+ + DataType().RegularTypeName() + " value, bitMask = ("
+ + DataType().RegularTypeName()
+ + ")1 << tid%bits;\n"
+ " oldValues[tid] = 0;\n"
+ " for(int i = 0; i < "
+ + IterationsStr()
+ + "; i++)\n"
+ " {\n"
+ " value = atomic_fetch_xor"
+ + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
+ + ");\n"
+ " if(value & bitMask) // bit should be set to 0\n"
+ " oldValues[tid]++;\n"
+ " value = atomic_fetch_xor"
+ + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
+ + ");\n"
+ " if(!(value & bitMask)) // bit should be set to 1\n"
+ " oldValues[tid]++;\n"
+ " }\n";
+ }
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ int bits = sizeof(HostDataType) * 8;
+ size_t valueInd = tid / bits;
+ HostDataType value, bitMask = (HostDataType)1 << tid % bits;
+ oldValues[tid] = 0;
+ for (int i = 0; i < Iterations(); i++)
+ {
+ value = host_atomic_fetch_xor(destMemory + valueInd, bitMask,
+ MemoryOrder());
+ if (value & bitMask) // bit should be set to 0
+ oldValues[tid]++;
+ value = host_atomic_fetch_xor(destMemory + valueInd, bitMask,
+ MemoryOrder());
+ if (!(value & bitMask)) // bit should be set to 1
+ oldValues[tid]++;
+ }
+ }
+ virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+ HostDataType *startRefValues,
+ cl_uint whichDestValue)
+ {
+ expected = 0;
+ return true;
+ }
+ virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
+ HostDataType *refValues,
+ HostAtomicType *finalValues)
+ {
+ correct = true;
+ for (cl_uint i = 0; i < threadCount; i++)
+ {
+ if (refValues[i] > 0)
+ {
+ log_error("Thread %d found %d mismatches\n", i,
+ (cl_uint)refValues[i]);
+ correct = false;
+ }
+ }
+ return true;
+ }
};
-int test_atomic_fetch_xor2_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_xor2_generic(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ bool useSVM)
{
- int error = 0;
- CBasicTestFetchXor2<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
- EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor2<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
- EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor2<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
- EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor2<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
- EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
- if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
- {
- CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- else
- {
- CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- return error;
+ int error = 0;
+ CBasicTestFetchXor2<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_int.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor2<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_uint.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor2<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_long.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor2<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+ TYPE_ATOMIC_ULONG, useSVM);
+ EXECUTE_TEST(error,
+ test_ulong.Execute(deviceID, context, queue, num_elements));
+ if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+ {
+ CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ else
+ {
+ CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ return error;
}
-int test_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_xor2(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_fetch_xor2_generic(deviceID, context, queue,
+ num_elements, false);
}
-int test_svm_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_xor2(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_fetch_xor2_generic(deviceID, context, queue,
+ num_elements, true);
}
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchMin : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchMin
+ : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
- CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- StartValue(DataType().MaxValue());
- }
- virtual std::string ProgramCore()
- {
- std::string memoryOrderScope = MemoryOrderScopeStr();
- std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
- return
- " oldValues[tid] = atomic_fetch_min"+postfix+"(&destMemory[0], oldValues[tid] "+memoryOrderScope+");\n";
- }
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid], MemoryOrder());
- }
- virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
- {
- for(cl_uint i = 0; i < threadCount; i++)
- {
- startRefValues[i] = genrand_int32(d);
- if(sizeof(HostDataType) >= 8)
- startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
- }
- return true;
- }
- virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
- {
- expected = StartValue();
- for(cl_uint i = 0; i < threadCount; i++)
- {
- if(startRefValues[ i ] < expected)
- expected = startRefValues[ i ];
- }
- return true;
- }
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+ using CBasicTestMemOrderScope<HostAtomicType,
+ HostDataType>::MemoryOrderScopeStr;
+ CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+ useSVM)
+ {
+ StartValue(DataType().MaxValue());
+ }
+ virtual std::string ProgramCore()
+ {
+ std::string memoryOrderScope = MemoryOrderScopeStr();
+ std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+ return " oldValues[tid] = atomic_fetch_min" + postfix
+ + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n";
+ }
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid],
+ MemoryOrder());
+ }
+ virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+ MTdata d)
+ {
+ for (cl_uint i = 0; i < threadCount; i++)
+ {
+ startRefValues[i] = genrand_int32(d);
+ if (sizeof(HostDataType) >= 8)
+ startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
+ }
+ return true;
+ }
+ virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+ HostDataType *startRefValues,
+ cl_uint whichDestValue)
+ {
+ expected = StartValue();
+ for (cl_uint i = 0; i < threadCount; i++)
+ {
+ if (startRefValues[i] < expected) expected = startRefValues[i];
+ }
+ return true;
+ }
};
-int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ bool useSVM)
{
- int error = 0;
- CBasicTestFetchMin<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
- EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMin<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
- EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMin<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
- EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMin<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
- EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
- if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
- {
- CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMin<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- else
- {
- CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMin<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- return error;
+ int error = 0;
+ CBasicTestFetchMin<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_int.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMin<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_uint.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMin<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_long.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMin<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+ TYPE_ATOMIC_ULONG, useSVM);
+ EXECUTE_TEST(error,
+ test_ulong.Execute(deviceID, context, queue, num_elements));
+ if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+ {
+ CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMin<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ else
+ {
+ CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMin<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ return error;
}
-int test_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_min(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements,
+ false);
}
-int test_svm_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_min(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements,
+ true);
}
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchMax : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchMax
+ : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
- CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- StartValue(DataType().MinValue());
- }
- virtual std::string ProgramCore()
- {
- std::string memoryOrderScope = MemoryOrderScopeStr();
- std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
- return
- " oldValues[tid] = atomic_fetch_max"+postfix+"(&destMemory[0], oldValues[tid] "+memoryOrderScope+");\n";
- }
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid], MemoryOrder());
- }
- virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
- {
- for(cl_uint i = 0; i < threadCount; i++)
- {
- startRefValues[i] = genrand_int32(d);
- if(sizeof(HostDataType) >= 8)
- startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
- }
- return true;
- }
- virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
- {
- expected = StartValue();
- for(cl_uint i = 0; i < threadCount; i++)
- {
- if(startRefValues[ i ] > expected)
- expected = startRefValues[ i ];
- }
- return true;
- }
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+ using CBasicTestMemOrderScope<HostAtomicType,
+ HostDataType>::MemoryOrderScopeStr;
+ CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+ useSVM)
+ {
+ StartValue(DataType().MinValue());
+ }
+ virtual std::string ProgramCore()
+ {
+ std::string memoryOrderScope = MemoryOrderScopeStr();
+ std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+ return " oldValues[tid] = atomic_fetch_max" + postfix
+ + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n";
+ }
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid],
+ MemoryOrder());
+ }
+ virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+ MTdata d)
+ {
+ for (cl_uint i = 0; i < threadCount; i++)
+ {
+ startRefValues[i] = genrand_int32(d);
+ if (sizeof(HostDataType) >= 8)
+ startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
+ }
+ return true;
+ }
+ virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+ HostDataType *startRefValues,
+ cl_uint whichDestValue)
+ {
+ expected = StartValue();
+ for (cl_uint i = 0; i < threadCount; i++)
+ {
+ if (startRefValues[i] > expected) expected = startRefValues[i];
+ }
+ return true;
+ }
};
-int test_atomic_fetch_max_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_max_generic(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ bool useSVM)
{
- int error = 0;
- CBasicTestFetchMax<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
- EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMax<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
- EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMax<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
- EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMax<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
- EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
- if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
- {
- CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMax<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- else
- {
- CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMax<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- return error;
+ int error = 0;
+ CBasicTestFetchMax<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_int.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMax<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_uint.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMax<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_long.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMax<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+ TYPE_ATOMIC_ULONG, useSVM);
+ EXECUTE_TEST(error,
+ test_ulong.Execute(deviceID, context, queue, num_elements));
+ if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+ {
+ CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMax<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ else
+ {
+ CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+ test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMax<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ return error;
}
-int test_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_max(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements,
+ false);
}
-int test_svm_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_max(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements,
+ true);
}
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFlag : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
- static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000;
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFlag
+ : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
+ static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000;
+
public:
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
- CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- StartValue(0);
- OldValueCheck(false);
- }
- virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
- {
- return threadCount;
- }
- TExplicitMemoryOrderType MemoryOrderForClear()
- {
- // Memory ordering for atomic_flag_clear function
- // ("shall not be memory_order_acquire nor memory_order_acq_rel")
- if(MemoryOrder() == MEMORY_ORDER_ACQUIRE)
- return MEMORY_ORDER_RELAXED;
- if (MemoryOrder() == MEMORY_ORDER_ACQ_REL)
- return MEMORY_ORDER_RELEASE;
- return MemoryOrder();
- }
- std::string MemoryOrderScopeStrForClear()
- {
- std::string orderStr;
- if (MemoryOrder() != MEMORY_ORDER_EMPTY)
- orderStr = std::string(", ") + get_memory_order_type_name(MemoryOrderForClear());
- return orderStr + MemoryScopeStr();
- }
-
- virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
- cl_command_queue queue)
- {
- // This test assumes support for the memory_scope_device scope in the case
- // that LocalMemory() == false. Therefore we should skip this test in that
- // configuration on a 3.0 driver since supporting the memory_scope_device
- // scope is optionaly.
- if (get_device_cl_version(deviceID) >= Version{ 3, 0 })
- {
- if (!LocalMemory()
- && !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE))
- {
- log_info(
- "Skipping atomic_flag test due to use of atomic_scope_device "
- "which is optionally not supported on this device\n");
- return 0; // skip test - not applicable
- }
- }
- return CBasicTestMemOrderScope<HostAtomicType,
- HostDataType>::ExecuteSingleTest(deviceID,
- context,
- queue);
- }
- virtual std::string ProgramCore()
- {
- std::string memoryOrderScope = MemoryOrderScopeStr();
- std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
- std::string program =
- " uint cnt, stop = 0;\n"
- " for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n"
- " {\n"
- " bool set = atomic_flag_test_and_set" + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n";
- if (MemoryOrder() == MEMORY_ORDER_RELAXED || MemoryOrder() == MEMORY_ORDER_RELEASE)
- program += " atomic_work_item_fence(" +
- std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") +
- "memory_order_acquire," +
- std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) +
- ");\n";
-
- program +=
- " if (!set)\n"
- " {\n";
-
- if (LocalMemory())
- program += " uint csIndex = get_enqueued_local_size(0)*get_group_id(0)+cnt;\n";
- else
- program += " uint csIndex = cnt;\n";
-
- std::ostringstream csNotVisited;
- csNotVisited << CRITICAL_SECTION_NOT_VISITED;
- program +=
- " // verify that thread is the first visitor\n"
- " if(oldValues[csIndex] == "+csNotVisited.str()+")\n"
- " {\n"
- " oldValues[csIndex] = tid; // set the winner id for this critical section\n"
- " stop = 1;\n"
- " }\n";
-
- if (MemoryOrder() == MEMORY_ORDER_ACQUIRE || MemoryOrder() == MEMORY_ORDER_RELAXED)
- program += " atomic_work_item_fence(" +
- std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") +
- "memory_order_release," +
- std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) +
- ");\n";
-
- program +=
- " atomic_flag_clear" + postfix + "(&destMemory[cnt]" + MemoryOrderScopeStrForClear() + ");\n"
- " }\n"
- " }\n";
- return program;
- }
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- cl_uint cnt, stop = 0;
- for (cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n"
- {
- if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder()))
- {
- cl_uint csIndex = cnt;
- // verify that thread is the first visitor\n"
- if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED)
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
+ using CBasicTestMemOrderScope<HostAtomicType,
+ HostDataType>::MemoryOrderScopeStr;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
+ CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+ useSVM)
+ {
+ StartValue(0);
+ OldValueCheck(false);
+ }
+ virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+ {
+ return threadCount;
+ }
+ TExplicitMemoryOrderType MemoryOrderForClear()
+ {
+ // Memory ordering for atomic_flag_clear function
+ // ("shall not be memory_order_acquire nor memory_order_acq_rel")
+ if (MemoryOrder() == MEMORY_ORDER_ACQUIRE) return MEMORY_ORDER_RELAXED;
+ if (MemoryOrder() == MEMORY_ORDER_ACQ_REL) return MEMORY_ORDER_RELEASE;
+ return MemoryOrder();
+ }
+ std::string MemoryOrderScopeStrForClear()
+ {
+ std::string orderStr;
+ if (MemoryOrder() != MEMORY_ORDER_EMPTY)
+ orderStr = std::string(", ")
+ + get_memory_order_type_name(MemoryOrderForClear());
+ return orderStr + MemoryScopeStr();
+ }
+
+ virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue)
+ {
+ // This test assumes support for the memory_scope_device scope in the
+ // case that LocalMemory() == false. Therefore we should skip this test
+ // in that configuration on a 3.0 driver since supporting the
+ // memory_scope_device scope is optionaly.
+ if (get_device_cl_version(deviceID) >= Version{ 3, 0 })
{
- oldValues[csIndex] = tid; // set the winner id for this critical section\n"
- stop = 1;
+ if (!LocalMemory()
+ && !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE))
+ {
+ log_info("Skipping atomic_flag test due to use of "
+ "atomic_scope_device "
+ "which is optionally not supported on this device\n");
+ return 0; // skip test - not applicable
+ }
}
- host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear());
- }
- }
- }
- virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
- {
- expected = StartValue();
- return true;
- }
- virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
- {
- for(cl_uint i = 0 ; i < threadCount; i++)
- startRefValues[i] = CRITICAL_SECTION_NOT_VISITED;
- return true;
- }
- virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
- {
- correct = true;
- /* We are expecting unique values from 0 to threadCount-1 (each critical section must be visited) */
- /* These values must be distributed across refValues array */
- std::vector<bool> tidFound(threadCount);
- cl_uint i;
-
- for (i = 0; i < threadCount; i++)
- {
- cl_uint value = (cl_uint)refValues[i];
- if (value == CRITICAL_SECTION_NOT_VISITED)
- {
- // Special initial value
- log_error("ERROR: Critical section %u not visited\n", i);
- correct = false;
+ return CBasicTestMemOrderScope<
+ HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+ queue);
+ }
+ virtual std::string ProgramCore()
+ {
+ std::string memoryOrderScope = MemoryOrderScopeStr();
+ std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+ std::string program =
+ " uint cnt, stop = 0;\n"
+ " for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread "
+ "must find critical section where it is the first visitor\n"
+ " {\n"
+ " bool set = atomic_flag_test_and_set"
+ + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n";
+ if (MemoryOrder() == MEMORY_ORDER_RELAXED
+ || MemoryOrder() == MEMORY_ORDER_RELEASE || LocalMemory())
+ program += " atomic_work_item_fence("
+ + std::string(
+ LocalMemory()
+ ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, "
+ : "CLK_GLOBAL_MEM_FENCE, ")
+ + "memory_order_acquire,"
+ + std::string(LocalMemory()
+ ? "memory_scope_work_group"
+ : (UseSVM() ? "memory_scope_all_svm_devices"
+ : "memory_scope_device"))
+ + ");\n";
+
+ program += " if (!set)\n"
+ " {\n";
+
+ if (LocalMemory())
+ program += " uint csIndex = "
+ "get_enqueued_local_size(0)*get_group_id(0)+cnt;\n";
+ else
+ program += " uint csIndex = cnt;\n";
+
+ std::ostringstream csNotVisited;
+ csNotVisited << CRITICAL_SECTION_NOT_VISITED;
+ program += " // verify that thread is the first visitor\n"
+ " if(oldValues[csIndex] == "
+ + csNotVisited.str()
+ + ")\n"
+ " {\n"
+ " oldValues[csIndex] = tid; // set the winner id for this "
+ "critical section\n"
+ " stop = 1;\n"
+ " }\n";
+
+ if (MemoryOrder() == MEMORY_ORDER_ACQUIRE
+ || MemoryOrder() == MEMORY_ORDER_RELAXED || LocalMemory())
+ program += " atomic_work_item_fence("
+ + std::string(
+ LocalMemory()
+ ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, "
+ : "CLK_GLOBAL_MEM_FENCE, ")
+ + "memory_order_release,"
+ + std::string(LocalMemory()
+ ? "memory_scope_work_group"
+ : (UseSVM() ? "memory_scope_all_svm_devices"
+ : "memory_scope_device"))
+ + ");\n";
+
+ program += " atomic_flag_clear" + postfix + "(&destMemory[cnt]"
+ + MemoryOrderScopeStrForClear()
+ + ");\n"
+ " }\n"
+ " }\n";
+ return program;
+ }
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ cl_uint cnt, stop = 0;
+ for (cnt = 0; !stop && cnt < threadCount;
+ cnt++) // each thread must find critical section where it is the
+ // first visitor\n"
+ {
+ if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder()))
+ {
+ cl_uint csIndex = cnt;
+ // verify that thread is the first visitor\n"
+ if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED)
+ {
+ oldValues[csIndex] =
+ tid; // set the winner id for this critical section\n"
+ stop = 1;
+ }
+ host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear());
+ }
+ }
+ }
+ virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+ HostDataType *startRefValues,
+ cl_uint whichDestValue)
+ {
+ expected = StartValue();
return true;
- }
- if (value >= threadCount)
- {
- log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
- correct = false;
+ }
+ virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+ MTdata d)
+ {
+ for (cl_uint i = 0; i < threadCount; i++)
+ startRefValues[i] = CRITICAL_SECTION_NOT_VISITED;
return true;
- }
- if (tidFound[value])
- {
- log_error("ERROR: Value (%u) occurred more thane once\n", value);
- correct = false;
+ }
+ virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
+ HostDataType *refValues,
+ HostAtomicType *finalValues)
+ {
+ correct = true;
+ /* We are expecting unique values from 0 to threadCount-1 (each critical
+ * section must be visited) */
+ /* These values must be distributed across refValues array */
+ std::vector<bool> tidFound(threadCount);
+ cl_uint i;
+
+ for (i = 0; i < threadCount; i++)
+ {
+ cl_uint value = (cl_uint)refValues[i];
+ if (value == CRITICAL_SECTION_NOT_VISITED)
+ {
+ // Special initial value
+ log_error("ERROR: Critical section %u not visited\n", i);
+ correct = false;
+ return true;
+ }
+ if (value >= threadCount)
+ {
+ log_error(
+ "ERROR: Reference value %u outside of valid range! (%u)\n",
+ i, value);
+ correct = false;
+ return true;
+ }
+ if (tidFound[value])
+ {
+ log_error("ERROR: Value (%u) occurred more thane once\n",
+ value);
+ correct = false;
+ return true;
+ }
+ tidFound[value] = true;
+ }
return true;
- }
- tidFound[value] = true;
}
- return true;
- }
};
-int test_atomic_flag_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_flag_generic(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ bool useSVM)
{
- int error = 0;
- CBasicTestFlag<HOST_ATOMIC_FLAG, HOST_FLAG> test_flag(TYPE_ATOMIC_FLAG, useSVM);
- EXECUTE_TEST(error, test_flag.Execute(deviceID, context, queue, num_elements));
- return error;
+ int error = 0;
+ CBasicTestFlag<HOST_ATOMIC_FLAG, HOST_FLAG> test_flag(TYPE_ATOMIC_FLAG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_flag.Execute(deviceID, context, queue, num_elements));
+ return error;
}
-int test_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_flag(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_flag_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_flag_generic(deviceID, context, queue, num_elements,
+ false);
}
-int test_svm_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_flag(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_flag_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_flag_generic(deviceID, context, queue, num_elements,
+ true);
}
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFence : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
- struct TestDefinition {
- bool op1IsFence;
- TExplicitMemoryOrderType op1MemOrder;
- bool op2IsFence;
- TExplicitMemoryOrderType op2MemOrder;
- };
-public:
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DeclaredInProgram;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UsedInFunction;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::CurrentGroupSize;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
- using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalRefValues;
- CBasicTestFence(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
- {
- StartValue(0);
- OldValueCheck(false);
- }
- virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
- {
- return threadCount;
- }
- virtual cl_uint NumNonAtomicVariablesPerThread()
- {
- if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
- return 1;
- if (LocalMemory())
- {
- if (gIsEmbedded)
- {
- if (CurrentGroupSize() > 1024)
- CurrentGroupSize(1024);
- return 1; //1KB of local memory required by spec. Clamp group size to 1k and allow 1 variable per thread
- }
- else
- return 32 * 1024 / 8 / CurrentGroupSize() - 1; //32KB of local memory required by spec
- }
- return 256;
- }
- virtual std::string SingleTestName()
- {
- std::string testName;
- if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
- testName += "seq_cst fence, ";
- else
- testName += std::string(get_memory_order_type_name(_subCase.op1MemOrder)).substr(sizeof("memory_order"))
- + (_subCase.op1IsFence ? " fence" : " atomic") + " synchronizes-with "
- + std::string(get_memory_order_type_name(_subCase.op2MemOrder)).substr(sizeof("memory_order"))
- + (_subCase.op2IsFence ? " fence" : " atomic") + ", ";
- testName += CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
- testName += std::string(", ") + std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory"));
- return testName;
- }
- virtual bool SVMDataBufferAllSVMConsistent()
- {
- // Although memory_scope_all_devices doesn't mention SVM it is just an
- // alias for memory_scope_all_svm_devices. So both scopes interact with
- // SVM allocations, on devices that support those, just the same.
- return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
- || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES;
- }
- virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
- {
- int error = 0;
- // execute 3 (maximum) sub cases for each memory order
- for (_subCaseId = 0; _subCaseId < 3; _subCaseId++)
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFence
+ : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
+ struct TestDefinition
{
- EXECUTE_TEST(error, (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteForEachParameterSet(deviceID, context, queue)));
- }
- return error;
- }
- virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
- {
- if(DeclaredInProgram() || UsedInFunction())
- return 0; //skip test - not applicable - no overloaded fence functions for different address spaces
- if(MemoryOrder() == MEMORY_ORDER_EMPTY ||
- MemoryScope() == MEMORY_SCOPE_EMPTY) // empty 'scope' not required since opencl20-openclc-rev15
- return 0; //skip test - not applicable
- if((UseSVM() || gHost)
- && LocalMemory())
- return 0; // skip test - not applicable for SVM and local memory
- struct TestDefinition acqTests[] = {
- // {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder}
- { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
- { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
- { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE }
+ bool op1IsFence;
+ TExplicitMemoryOrderType op1MemOrder;
+ bool op2IsFence;
+ TExplicitMemoryOrderType op2MemOrder;
};
- struct TestDefinition relTests[] = {
- { true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE },
- { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }
- };
- struct TestDefinition arTests[] = {
- { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL },
- { true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE },
- { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL }
- };
- switch (MemoryOrder())
- {
- case MEMORY_ORDER_ACQUIRE:
- if (_subCaseId >= sizeof(acqTests) / sizeof(struct TestDefinition))
- return 0;
- _subCase = acqTests[_subCaseId];
- break;
- case MEMORY_ORDER_RELEASE:
- if (_subCaseId >= sizeof(relTests) / sizeof(struct TestDefinition))
- return 0;
- _subCase = relTests[_subCaseId];
- break;
- case MEMORY_ORDER_ACQ_REL:
- if (_subCaseId >= sizeof(arTests) / sizeof(struct TestDefinition))
- return 0;
- _subCase = arTests[_subCaseId];
- break;
- case MEMORY_ORDER_SEQ_CST:
- if (_subCaseId != 0) // one special case only
- return 0;
- break;
- default:
- return 0;
- }
- LocalRefValues(LocalMemory());
- return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
- }
- virtual std::string ProgramHeader(cl_uint maxNumDestItems)
- {
- std::string header;
- if(gOldAPI)
- {
- if(MemoryScope() == MEMORY_SCOPE_EMPTY)
- {
- header += "#define atomic_work_item_fence(x,y) mem_fence(x)\n";
- }
- else
- {
- header += "#define atomic_work_item_fence(x,y,z) mem_fence(x)\n";
- }
- }
- return header+CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ProgramHeader(maxNumDestItems);
- }
- virtual std::string ProgramCore()
- {
- std::ostringstream naValues;
- naValues << NumNonAtomicVariablesPerThread();
- std::string program, fenceType, nonAtomic;
- if (LocalMemory())
- {
- program = " size_t myId = get_local_id(0), hisId = get_local_size(0)-1-myId;\n";
- fenceType = "CLK_LOCAL_MEM_FENCE";
- nonAtomic = "localValues";
- }
- else
+
+public:
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
+ using CBasicTestMemOrderScope<HostAtomicType,
+ HostDataType>::DeclaredInProgram;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UsedInFunction;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+ using CBasicTestMemOrderScope<HostAtomicType,
+ HostDataType>::CurrentGroupSize;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
+ using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalRefValues;
+ CBasicTestFence(TExplicitAtomicType dataType, bool useSVM)
+ : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+ useSVM)
{
- program = " size_t myId = tid, hisId = threadCount-1-tid;\n";
- fenceType = "CLK_GLOBAL_MEM_FENCE";
- nonAtomic = "oldValues";
- }
- if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
- {
- // All threads are divided into pairs.
- // Each thread has its own atomic variable and performs the following actions:
- // - increments its own variable
- // - performs fence operation to propagate its value and to see value from other thread
- // - reads value from other thread's variable
- // - repeats the above steps when both values are the same (and less than 1000000)
- // - stores the last value read from other thread (in additional variable)
- // At the end of execution at least one thread should know the last value from other thread
- program += std::string("") +
- " " + DataType().RegularTypeName() + " myValue = 0, hisValue; \n"
- " do {\n"
- " myValue++;\n"
- " atomic_store_explicit(&destMemory[myId], myValue, memory_order_relaxed" + MemoryScopeStr() + ");\n"
- " atomic_work_item_fence(" + fenceType + ", memory_order_seq_cst" + MemoryScopeStr() + "); \n"
- " hisValue = atomic_load_explicit(&destMemory[hisId], memory_order_relaxed" + MemoryScopeStr() + ");\n"
- " } while(myValue == hisValue && myValue < 1000000);\n"
- " " + nonAtomic + "[myId] = hisValue; \n";
+ StartValue(0);
+ OldValueCheck(false);
}
- else
+ virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
- // Each thread modifies one of its non-atomic variables, increments value of its atomic variable
- // and reads values from another thread in typical synchronizes-with scenario with:
- // - non-atomic variable (at index A) modification (value change from 0 to A)
- // - release operation (additional fence or within atomic) + atomic variable modification (value A)
- // - atomic variable read (value B) + acquire operation (additional fence or within atomic)
- // - non-atomic variable (at index B) read (value C)
- // Each thread verifies dependency between atomic and non-atomic value read from another thread
- // The following condition must be true: B == C
- program += std::string("") +
- " " + DataType().RegularTypeName() + " myValue = 0, hisAtomicValue, hisValue; \n"
- " do {\n"
- " myValue++;\n"
- " " + nonAtomic + "[myId*" + naValues.str() +"+myValue] = myValue;\n";
- if (_subCase.op1IsFence)
- program += std::string("") +
- " atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + "); \n"
- " atomic_store_explicit(&destMemory[myId], myValue, memory_order_relaxed" + MemoryScopeStr() + ");\n";
- else
- program += std::string("") +
- " atomic_store_explicit(&destMemory[myId], myValue, " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + ");\n";
- if (_subCase.op2IsFence)
- program += std::string("") +
- " hisAtomicValue = atomic_load_explicit(&destMemory[hisId], memory_order_relaxed" + MemoryScopeStr() + ");\n"
- " atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + "); \n";
- else
- program += std::string("") +
- " hisAtomicValue = atomic_load_explicit(&destMemory[hisId], " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + ");\n";
- program +=
- " hisValue = " + nonAtomic + "[hisId*" + naValues.str() + "+hisAtomicValue]; \n";
- if (LocalMemory())
- program += " hisId = (hisId+1)%get_local_size(0);\n";
- else
- program += " hisId = (hisId+1)%threadCount;\n";
- program +=
- " } while(hisAtomicValue == hisValue && myValue < "+naValues.str()+"-1);\n"
- " if(hisAtomicValue != hisValue)\n"
- " { // fail\n"
- " atomic_store(&destMemory[myId], myValue-1);\n";
- if (LocalMemory())
- program += " hisId = (hisId+get_local_size(0)-1)%get_local_size(0);\n";
- else
- program += " hisId = (hisId+threadCount-1)%threadCount;\n";
- program +=
- " if(myValue+1 < " + naValues.str() + ")\n"
- " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+1] = hisId;\n"
- " if(myValue+2 < " + naValues.str() + ")\n"
- " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+2] = hisAtomicValue;\n"
- " if(myValue+3 < " + naValues.str() + ")\n"
- " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+3] = hisValue;\n";
- if (gDebug)
- {
- program +=
- " printf(\"WI %d: atomic value (%d) at index %d is different than non-atomic value (%d)\\n\", tid, hisAtomicValue, hisId, hisValue);\n";
- }
- program +=
- " }\n";
- }
- return program;
- }
- virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
- {
- size_t myId = tid, hisId = threadCount - 1 - tid;
- if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
- {
- HostDataType myValue = 0, hisValue;
- // CPU thread typically starts faster - wait for GPU thread
- myValue++;
- host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST);
- while (host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_SEQ_CST) == 0);
- do {
- myValue++;
- host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
- host_atomic_thread_fence(MemoryOrder());
- hisValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_RELAXED);
- } while (myValue == hisValue && hisValue < 1000000);
- oldValues[tid] = hisValue;
+ return threadCount;
}
- else
+ virtual cl_uint NumNonAtomicVariablesPerThread()
{
- HostDataType myValue = 0, hisAtomicValue, hisValue;
- do {
- myValue++;
- oldValues[myId*NumNonAtomicVariablesPerThread()+myValue] = myValue;
- if (_subCase.op1IsFence)
+ if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) return 1;
+ if (LocalMemory())
{
- host_atomic_thread_fence(_subCase.op1MemOrder);
- host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
+ if (gIsEmbedded)
+ {
+ if (CurrentGroupSize() > 512) CurrentGroupSize(512);
+ return 2; // 1KB of local memory required by spec. Clamp group
+ // size to 512 and allow 2 variables per thread
+ }
+ else
+ return 32 * 1024 / 8 / CurrentGroupSize()
+ - 1; // 32KB of local memory required by spec
}
+ return 256;
+ }
+ virtual std::string SingleTestName()
+ {
+ std::string testName;
+ if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
+ testName += "seq_cst fence, ";
else
- host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, _subCase.op1MemOrder);
- if (_subCase.op2IsFence)
+ testName +=
+ std::string(get_memory_order_type_name(_subCase.op1MemOrder))
+ .substr(sizeof("memory_order"))
+ + (_subCase.op1IsFence ? " fence" : " atomic")
+ + " synchronizes-with "
+ + std::string(get_memory_order_type_name(_subCase.op2MemOrder))
+ .substr(sizeof("memory_order"))
+ + (_subCase.op2IsFence ? " fence" : " atomic") + ", ";
+ testName += CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
+ testName += std::string(", ")
+ + std::string(get_memory_scope_type_name(MemoryScope()))
+ .substr(sizeof("memory"));
+ return testName;
+ }
+ virtual bool SVMDataBufferAllSVMConsistent()
+ {
+ // Although memory_scope_all_devices doesn't mention SVM it is just an
+ // alias for memory_scope_all_svm_devices. So both scopes interact with
+ // SVM allocations, on devices that support those, just the same.
+ return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
+ || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES;
+ }
+ virtual int ExecuteForEachParameterSet(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue)
+ {
+ int error = 0;
+ // execute 3 (maximum) sub cases for each memory order
+ for (_subCaseId = 0; _subCaseId < 3; _subCaseId++)
{
- hisAtomicValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_RELAXED);
- host_atomic_thread_fence(_subCase.op2MemOrder);
+ EXECUTE_TEST(
+ error,
+ (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
+ ExecuteForEachParameterSet(deviceID, context, queue)));
}
- else
- hisAtomicValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], _subCase.op2MemOrder);
- hisValue = oldValues[hisId*NumNonAtomicVariablesPerThread() + hisAtomicValue];
- hisId = (hisId + 1) % threadCount;
- } while(hisAtomicValue == hisValue && myValue < (HostDataType)NumNonAtomicVariablesPerThread()-1);
- if(hisAtomicValue != hisValue)
- { // fail
- host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue-1, MEMORY_ORDER_SEQ_CST);
- if (gDebug)
+ return error;
+ }
+ virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue)
+ {
+ if (DeclaredInProgram() || UsedInFunction())
+ return 0; // skip test - not applicable - no overloaded fence
+ // functions for different address spaces
+ if (MemoryOrder() == MEMORY_ORDER_EMPTY
+ || MemoryScope()
+ == MEMORY_SCOPE_EMPTY) // empty 'scope' not required since
+ // opencl20-openclc-rev15
+ return 0; // skip test - not applicable
+ if ((UseSVM() || gHost) && LocalMemory())
+ return 0; // skip test - not applicable for SVM and local memory
+ struct TestDefinition acqTests[] = {
+ // {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder}
+ { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
+ { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
+ { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE }
+ };
+ struct TestDefinition relTests[] = {
+ { true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE },
+ { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }
+ };
+ struct TestDefinition arTests[] = {
+ { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL },
+ { true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE },
+ { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL }
+ };
+ switch (MemoryOrder())
{
- hisId = (hisId + threadCount - 1) % threadCount;
- printf("WI %d: atomic value (%d) at index %d is different than non-atomic value (%d)\n", tid, hisAtomicValue, hisId, hisValue);
+ case MEMORY_ORDER_ACQUIRE:
+ if (_subCaseId
+ >= sizeof(acqTests) / sizeof(struct TestDefinition))
+ return 0;
+ _subCase = acqTests[_subCaseId];
+ break;
+ case MEMORY_ORDER_RELEASE:
+ if (_subCaseId
+ >= sizeof(relTests) / sizeof(struct TestDefinition))
+ return 0;
+ _subCase = relTests[_subCaseId];
+ break;
+ case MEMORY_ORDER_ACQ_REL:
+ if (_subCaseId
+ >= sizeof(arTests) / sizeof(struct TestDefinition))
+ return 0;
+ _subCase = arTests[_subCaseId];
+ break;
+ case MEMORY_ORDER_SEQ_CST:
+ if (_subCaseId != 0) // one special case only
+ return 0;
+ break;
+ default: return 0;
}
- }
- }
- }
- virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
- {
- for(cl_uint i = 0 ; i < threadCount*NumNonAtomicVariablesPerThread(); i++)
- startRefValues[i] = 0;
- return true;
- }
- virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
- {
- correct = true;
- cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount;
- for(cl_uint workOffset = 0; workOffset < threadCount; workOffset+= workSize)
- {
- if(workOffset+workSize > threadCount)
- // last workgroup (host threads)
- workSize = threadCount-workOffset;
- for(cl_uint i = 0 ; i < workSize && workOffset+i < threadCount; i++)
- {
- HostAtomicType myValue = finalValues[workOffset + i];
- if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
+ LocalRefValues(LocalMemory());
+ return CBasicTestMemOrderScope<
+ HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+ queue);
+ }
+ virtual std::string ProgramHeader(cl_uint maxNumDestItems)
+ {
+ std::string header;
+ if (gOldAPI)
{
- HostDataType hisValue = refValues[workOffset + i];
- if (myValue == hisValue)
- {
- // a draw - both threads should reach final value 1000000
- if (myValue != 1000000)
+ if (MemoryScope() == MEMORY_SCOPE_EMPTY)
{
- log_error("ERROR: Invalid reference value #%u (%d instead of 1000000)\n", workOffset + i, myValue);
- correct = false;
- return true;
+ header += "#define atomic_work_item_fence(x,y) "
+ " mem_fence(x)\n";
}
- }
- else
- {
- //slower thread (in total order of seq_cst operations) must know last value written by faster thread
- HostAtomicType hisRealValue = finalValues[workOffset + workSize - 1 - i];
- HostDataType myValueReadByHim = refValues[workOffset + workSize - 1 - i];
-
- // who is the winner? - thread with lower private counter value
- if (myValue == hisRealValue) // forbidden result - fence doesn't work
+ else
{
- log_error("ERROR: Atomic counter values #%u and #%u are the same (%u)\n", workOffset + i, workOffset + workSize - 1 - i, myValue);
- log_error("ERROR: Both threads have outdated values read from another thread (%u and %u)\n", hisValue, myValueReadByHim);
- correct = false;
- return true;
+ header += "#define atomic_work_item_fence(x,y,z) "
+ " mem_fence(x)\n";
}
- if (myValue > hisRealValue) // I'm slower
+ }
+ return header
+ + CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
+ ProgramHeader(maxNumDestItems);
+ }
+ virtual std::string ProgramCore()
+ {
+ std::ostringstream naValues;
+ naValues << NumNonAtomicVariablesPerThread();
+ std::string program, fenceType, nonAtomic;
+ if (LocalMemory())
+ {
+ program = " size_t myId = get_local_id(0), hisId = "
+ "get_local_size(0)-1-myId;\n";
+ fenceType = "CLK_LOCAL_MEM_FENCE";
+ nonAtomic = "localValues";
+ }
+ else
+ {
+ program = " size_t myId = tid, hisId = threadCount-1-tid;\n";
+ fenceType = "CLK_GLOBAL_MEM_FENCE";
+ nonAtomic = "oldValues";
+ }
+ if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
+ {
+ // All threads are divided into pairs.
+ // Each thread has its own atomic variable and performs the
+ // following actions:
+ // - increments its own variable
+ // - performs fence operation to propagate its value and to see
+ // value from other thread
+ // - reads value from other thread's variable
+ // - repeats the above steps when both values are the same (and less
+ // than 1000000)
+ // - stores the last value read from other thread (in additional
+ // variable) At the end of execution at least one thread should know
+ // the last value from other thread
+ program += std::string("") + " " + DataType().RegularTypeName()
+ + " myValue = 0, hisValue; \n"
+ " do {\n"
+ " myValue++;\n"
+ " atomic_store_explicit(&destMemory[myId], myValue, "
+ "memory_order_relaxed"
+ + MemoryScopeStr()
+ + ");\n"
+ " atomic_work_item_fence("
+ + fenceType + ", memory_order_seq_cst" + MemoryScopeStr()
+ + "); \n"
+ " hisValue = atomic_load_explicit(&destMemory[hisId], "
+ "memory_order_relaxed"
+ + MemoryScopeStr()
+ + ");\n"
+ " } while(myValue == hisValue && myValue < 1000000);\n"
+ " "
+ + nonAtomic + "[myId] = hisValue; \n";
+ }
+ else
+ {
+ // Each thread modifies one of its non-atomic variables, increments
+ // value of its atomic variable and reads values from another thread
+ // in typical synchronizes-with scenario with:
+ // - non-atomic variable (at index A) modification (value change
+ // from 0 to A)
+ // - release operation (additional fence or within atomic) + atomic
+ // variable modification (value A)
+ // - atomic variable read (value B) + acquire operation (additional
+ // fence or within atomic)
+ // - non-atomic variable (at index B) read (value C)
+ // Each thread verifies dependency between atomic and non-atomic
+ // value read from another thread The following condition must be
+ // true: B == C
+ program += std::string("") + " " + DataType().RegularTypeName()
+ + " myValue = 0, hisAtomicValue, hisValue; \n"
+ " do {\n"
+ " myValue++;\n"
+ " "
+ + nonAtomic + "[myId*" + naValues.str()
+ + "+myValue] = myValue;\n";
+ if (_subCase.op1IsFence)
+ program += std::string("") + " atomic_work_item_fence("
+ + fenceType + ", "
+ + get_memory_order_type_name(_subCase.op1MemOrder)
+ + MemoryScopeStr()
+ + "); \n"
+ " atomic_store_explicit(&destMemory[myId], myValue, "
+ "memory_order_relaxed"
+ + MemoryScopeStr() + ");\n";
+ else
+ program += std::string("")
+ + " atomic_store_explicit(&destMemory[myId], myValue, "
+ + get_memory_order_type_name(_subCase.op1MemOrder)
+ + MemoryScopeStr() + ");\n";
+ if (_subCase.op2IsFence)
+ program += std::string("")
+ + " hisAtomicValue = "
+ "atomic_load_explicit(&destMemory[hisId], "
+ "memory_order_relaxed"
+ + MemoryScopeStr()
+ + ");\n"
+ " atomic_work_item_fence("
+ + fenceType + ", "
+ + get_memory_order_type_name(_subCase.op2MemOrder)
+ + MemoryScopeStr() + "); \n";
+ else
+ program += std::string("")
+ + " hisAtomicValue = "
+ "atomic_load_explicit(&destMemory[hisId], "
+ + get_memory_order_type_name(_subCase.op2MemOrder)
+ + MemoryScopeStr() + ");\n";
+ program += " hisValue = " + nonAtomic + "[hisId*"
+ + naValues.str() + "+hisAtomicValue]; \n";
+ if (LocalMemory())
+ program += " hisId = (hisId+1)%get_local_size(0);\n";
+ else
+ program += " hisId = (hisId+1)%threadCount;\n";
+ program += " } while(hisAtomicValue == hisValue && myValue < "
+ + naValues.str()
+ + "-1);\n"
+ " if(hisAtomicValue != hisValue)\n"
+ " { // fail\n"
+ " atomic_store(&destMemory[myId], myValue-1);\n";
+ if (LocalMemory())
+ program += " hisId = "
+ "(hisId+get_local_size(0)-1)%get_local_size(0);\n";
+ else
+ program += " hisId = (hisId+threadCount-1)%threadCount;\n";
+ program += " if(myValue+1 < " + naValues.str()
+ + ")\n"
+ " "
+ + nonAtomic + "[myId*" + naValues.str()
+ + "+myValue+1] = hisId;\n"
+ " if(myValue+2 < "
+ + naValues.str()
+ + ")\n"
+ " "
+ + nonAtomic + "[myId*" + naValues.str()
+ + "+myValue+2] = hisAtomicValue;\n"
+ " if(myValue+3 < "
+ + naValues.str()
+ + ")\n"
+ " "
+ + nonAtomic + "[myId*" + naValues.str()
+ + "+myValue+3] = hisValue;\n";
+ if (gDebug)
{
- if (hisRealValue != hisValue)
- {
- log_error("ERROR: Invalid reference value #%u (%d instead of %d)\n", workOffset + i, hisValue, hisRealValue);
- log_error("ERROR: Slower thread #%u should know value written by faster thread #%u\n", workOffset + i, workOffset + workSize - 1 - i);
- correct = false;
- return true;
- }
+ program += " printf(\"WI %d: atomic value (%d) at index %d "
+ "is different than non-atomic value (%d)\\n\", tid, "
+ "hisAtomicValue, hisId, hisValue);\n";
}
- else // I'm faster
+ program += " }\n";
+ }
+ return program;
+ }
+ virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+ volatile HostAtomicType *destMemory,
+ HostDataType *oldValues)
+ {
+ size_t myId = tid, hisId = threadCount - 1 - tid;
+ if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
+ {
+ HostDataType myValue = 0, hisValue;
+ // CPU thread typically starts faster - wait for GPU thread
+ myValue++;
+ host_atomic_store<HostAtomicType, HostDataType>(
+ &destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST);
+ while (host_atomic_load<HostAtomicType, HostDataType>(
+ &destMemory[hisId], MEMORY_ORDER_SEQ_CST)
+ == 0)
+ ;
+ do
{
- if (myValueReadByHim != myValue)
- {
- log_error("ERROR: Invalid reference value #%u (%d instead of %d)\n", workOffset + workSize - 1 - i, myValueReadByHim, myValue);
- log_error("ERROR: Slower thread #%u should know value written by faster thread #%u\n", workOffset + workSize - 1 - i, workOffset + i);
- correct = false;
- return true;
- }
- }
- }
+ myValue++;
+ host_atomic_store<HostAtomicType, HostDataType>(
+ &destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
+ host_atomic_thread_fence(MemoryOrder());
+ hisValue = host_atomic_load<HostAtomicType, HostDataType>(
+ &destMemory[hisId], MEMORY_ORDER_RELAXED);
+ } while (myValue == hisValue && hisValue < 1000000);
+ oldValues[tid] = hisValue;
}
else
{
- if (myValue != NumNonAtomicVariablesPerThread()-1)
- {
- log_error("ERROR: Invalid atomic value #%u (%d instead of %d)\n", workOffset + i, myValue, NumNonAtomicVariablesPerThread()-1);
- log_error("ERROR: Thread #%u observed invalid values in other thread's variables\n", workOffset + i, myValue);
- correct = false;
- return true;
- }
+ HostDataType myValue = 0, hisAtomicValue, hisValue;
+ do
+ {
+ myValue++;
+ oldValues[myId * NumNonAtomicVariablesPerThread() + myValue] =
+ myValue;
+ if (_subCase.op1IsFence)
+ {
+ host_atomic_thread_fence(_subCase.op1MemOrder);
+ host_atomic_store<HostAtomicType, HostDataType>(
+ &destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
+ }
+ else
+ host_atomic_store<HostAtomicType, HostDataType>(
+ &destMemory[myId], myValue, _subCase.op1MemOrder);
+ if (_subCase.op2IsFence)
+ {
+ hisAtomicValue =
+ host_atomic_load<HostAtomicType, HostDataType>(
+ &destMemory[hisId], MEMORY_ORDER_RELAXED);
+ host_atomic_thread_fence(_subCase.op2MemOrder);
+ }
+ else
+ hisAtomicValue =
+ host_atomic_load<HostAtomicType, HostDataType>(
+ &destMemory[hisId], _subCase.op2MemOrder);
+ hisValue = oldValues[hisId * NumNonAtomicVariablesPerThread()
+ + hisAtomicValue];
+ hisId = (hisId + 1) % threadCount;
+ } while (hisAtomicValue == hisValue
+ && myValue
+ < (HostDataType)NumNonAtomicVariablesPerThread() - 1);
+ if (hisAtomicValue != hisValue)
+ { // fail
+ host_atomic_store<HostAtomicType, HostDataType>(
+ &destMemory[myId], myValue - 1, MEMORY_ORDER_SEQ_CST);
+ if (gDebug)
+ {
+ hisId = (hisId + threadCount - 1) % threadCount;
+ printf("WI %d: atomic value (%d) at index %d is different "
+ "than non-atomic value (%d)\n",
+ tid, hisAtomicValue, hisId, hisValue);
+ }
+ }
+ }
+ }
+ virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+ MTdata d)
+ {
+ for (cl_uint i = 0; i < threadCount * NumNonAtomicVariablesPerThread();
+ i++)
+ startRefValues[i] = 0;
+ return true;
+ }
+ virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
+ HostDataType *refValues,
+ HostAtomicType *finalValues)
+ {
+ correct = true;
+ cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount;
+ for (cl_uint workOffset = 0; workOffset < threadCount;
+ workOffset += workSize)
+ {
+ if (workOffset + workSize > threadCount)
+ // last workgroup (host threads)
+ workSize = threadCount - workOffset;
+ for (cl_uint i = 0; i < workSize && workOffset + i < threadCount;
+ i++)
+ {
+ HostAtomicType myValue = finalValues[workOffset + i];
+ if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
+ {
+ HostDataType hisValue = refValues[workOffset + i];
+ if (myValue == hisValue)
+ {
+ // a draw - both threads should reach final value
+ // 1000000
+ if (myValue != 1000000)
+ {
+ log_error("ERROR: Invalid reference value #%u (%d "
+ "instead of 1000000)\n",
+ workOffset + i, myValue);
+ correct = false;
+ return true;
+ }
+ }
+ else
+ {
+ // slower thread (in total order of seq_cst operations)
+ // must know last value written by faster thread
+ HostAtomicType hisRealValue =
+ finalValues[workOffset + workSize - 1 - i];
+ HostDataType myValueReadByHim =
+ refValues[workOffset + workSize - 1 - i];
+
+ // who is the winner? - thread with lower private
+ // counter value
+ if (myValue == hisRealValue) // forbidden result - fence
+ // doesn't work
+ {
+ log_error("ERROR: Atomic counter values #%u and "
+ "#%u are the same (%u)\n",
+ workOffset + i,
+ workOffset + workSize - 1 - i, myValue);
+ log_error(
+ "ERROR: Both threads have outdated values read "
+ "from another thread (%u and %u)\n",
+ hisValue, myValueReadByHim);
+ correct = false;
+ return true;
+ }
+ if (myValue > hisRealValue) // I'm slower
+ {
+ if (hisRealValue != hisValue)
+ {
+ log_error("ERROR: Invalid reference value #%u "
+ "(%d instead of %d)\n",
+ workOffset + i, hisValue,
+ hisRealValue);
+ log_error(
+ "ERROR: Slower thread #%u should know "
+ "value written by faster thread #%u\n",
+ workOffset + i,
+ workOffset + workSize - 1 - i);
+ correct = false;
+ return true;
+ }
+ }
+ else // I'm faster
+ {
+ if (myValueReadByHim != myValue)
+ {
+ log_error("ERROR: Invalid reference value #%u "
+ "(%d instead of %d)\n",
+ workOffset + workSize - 1 - i,
+ myValueReadByHim, myValue);
+ log_error(
+ "ERROR: Slower thread #%u should know "
+ "value written by faster thread #%u\n",
+ workOffset + workSize - 1 - i,
+ workOffset + i);
+ correct = false;
+ return true;
+ }
+ }
+ }
+ }
+ else
+ {
+ if (myValue != NumNonAtomicVariablesPerThread() - 1)
+ {
+ log_error("ERROR: Invalid atomic value #%u (%d instead "
+ "of %d)\n",
+ workOffset + i, myValue,
+ NumNonAtomicVariablesPerThread() - 1);
+ log_error("ERROR: Thread #%u observed invalid values "
+ "in other thread's variables\n",
+ workOffset + i, myValue);
+ correct = false;
+ return true;
+ }
+ }
+ }
}
- }
+ return true;
}
- return true;
- }
+
private:
- int _subCaseId;
- struct TestDefinition _subCase;
+ int _subCaseId;
+ struct TestDefinition _subCase;
};
-int test_atomic_fence_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fence_generic(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ bool useSVM)
{
- int error = 0;
- CBasicTestFence<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
- EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
- CBasicTestFence<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
- EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
- CBasicTestFence<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
- EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
- CBasicTestFence<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
- EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
- if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
- {
- CBasicTestFence<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFence<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFence<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFence<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- else
- {
- CBasicTestFence<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
- EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFence<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
- EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFence<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
- EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
- CBasicTestFence<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
- EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
- }
- return error;
+ int error = 0;
+ CBasicTestFence<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_int.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFence<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_uint.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFence<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_long.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFence<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
+ useSVM);
+ EXECUTE_TEST(error,
+ test_ulong.Execute(deviceID, context, queue, num_elements));
+ if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+ {
+ CBasicTestFence<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
+ TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFence<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFence<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFence<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ else
+ {
+ CBasicTestFence<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
+ TYPE_ATOMIC_INTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_intptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFence<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+ test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFence<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+ TYPE_ATOMIC_SIZE_T, useSVM);
+ EXECUTE_TEST(
+ error, test_size_t.Execute(deviceID, context, queue, num_elements));
+ CBasicTestFence<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+ test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+ EXECUTE_TEST(
+ error,
+ test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+ }
+ return error;
}
-int test_atomic_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fence(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fence_generic(deviceID, context, queue, num_elements, false);
+ return test_atomic_fence_generic(deviceID, context, queue, num_elements,
+ false);
}
-int test_svm_atomic_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fence(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- return test_atomic_fence_generic(deviceID, context, queue, num_elements, true);
+ return test_atomic_fence_generic(deviceID, context, queue, num_elements,
+ true);
}
diff --git a/test_conformance/commonfns/test_sign.cpp b/test_conformance/commonfns/test_sign.cpp
index 1b842e35..6dba58da 100644
--- a/test_conformance/commonfns/test_sign.cpp
+++ b/test_conformance/commonfns/test_sign.cpp
@@ -223,14 +223,13 @@ test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int n
free(input_ptr[0]);
free(output_ptr);
- if(err)
- return err;
+ if (err) return err;
- if( ! is_extension_available( device, "cl_khr_fp64"))
- {
- log_info( "skipping double test -- cl_khr_fp64 not supported.\n" );
- return 0;
- }
+ if (!is_extension_available(device, "cl_khr_fp64"))
+ {
+ log_info("skipping double test -- cl_khr_fp64 not supported.\n");
+ return 0;
+ }
return test_sign_double( device, context, queue, n_elems);
}
diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp
index 0e3cfe07..330083b2 100644
--- a/test_conformance/commonfns/test_step.cpp
+++ b/test_conformance/commonfns/test_step.cpp
@@ -158,23 +158,20 @@ test_step(cl_device_id device, cl_context context, cl_command_queue queue, int n
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" );
- if (err)
- return -1;
+ if (err) return -1;
err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" );
- if (err)
- return -1;
+ if (err) return -1;
err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" );
- if (err)
- return -1;
- err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code, "test_step8" );
- if (err)
- return -1;
- err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code, "test_step16" );
- if (err)
- return -1;
- err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code, "test_step3" );
- if (err)
- return -1;
+ if (err) return -1;
+ err = create_single_kernel_helper(context, &program[3], &kernel[3], 1,
+ &step8_kernel_code, "test_step8");
+ if (err) return -1;
+ err = create_single_kernel_helper(context, &program[4], &kernel[4], 1,
+ &step16_kernel_code, "test_step16");
+ if (err) return -1;
+ err = create_single_kernel_helper(context, &program[5], &kernel[5], 1,
+ &step3_kernel_code, "test_step3");
+ if (err) return -1;
values[0] = streams[0];
values[1] = streams[1];
diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index 483adac9..b95b0f53 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -20,7 +20,7 @@
#include <unistd.h>
#endif
-
+// List should follow order in the extension spec
const char *known_extensions[] = {
"cl_khr_byte_addressable_store",
"cl_khr_3d_image_writes",
@@ -42,6 +42,7 @@ const char *known_extensions[] = {
"cl_khr_mipmap_image_writes",
"cl_khr_srgb_image_writes",
"cl_khr_subgroup_named_barrier",
+ "cl_khr_extended_async_copies",
"cl_khr_subgroup_extended_types",
"cl_khr_subgroup_non_uniform_vote",
"cl_khr_subgroup_ballot",
@@ -49,7 +50,9 @@ const char *known_extensions[] = {
"cl_khr_subgroup_shuffle",
"cl_khr_subgroup_shuffle_relative",
"cl_khr_subgroup_clustered_reduce",
-
+ "cl_khr_extended_bit_ops",
+ "cl_khr_integer_dot_product",
+ "cl_khr_subgroup_rotate",
// API-only extensions after this point. If you add above here, modify
// first_API_extension below.
"cl_khr_icd",
@@ -71,10 +74,23 @@ const char *known_extensions[] = {
"cl_khr_spirv_no_integer_wrap_decoration",
"cl_khr_extended_versioning",
"cl_khr_device_uuid",
+ "cl_khr_pci_bus_info",
+ "cl_khr_suggested_local_work_size",
+ "cl_khr_spirv_linkonce_odr",
+ "cl_khr_semaphore",
+ "cl_khr_external_semaphore",
+ "cl_khr_external_semaphore_win32",
+ "cl_khr_external_semaphore_sync_fd",
+ "cl_khr_external_semaphore_opaque_fd",
+ "cl_khr_external_memory",
+ "cl_khr_external_memory_win32",
+ "cl_khr_external_memory_opaque_fd",
+ "cl_khr_command_buffer",
+ "cl_khr_command_buffer_mutable_dispatch",
};
-size_t num_known_extensions = sizeof(known_extensions)/sizeof(char*);
-size_t first_API_extension = 27;
+size_t num_known_extensions = ARRAY_SIZE(known_extensions);
+size_t first_API_extension = 31;
const char *known_embedded_extensions[] = {
"cles_khr_int64",
@@ -314,8 +330,15 @@ int test_compiler_defines_for_extensions(cl_device_id device, cl_context context
}
// Build the kernel
- char *kernel_code = (char*)malloc(1025*256*(num_not_supported_extensions+num_of_supported_extensions));
- memset(kernel_code, 0, 1025*256*(num_not_supported_extensions+num_of_supported_extensions));
+ char *kernel_code = (char *)malloc(
+ 1
+ + 1025 * 256
+ * (num_not_supported_extensions + num_of_supported_extensions));
+ memset(
+ kernel_code, 0,
+ 1
+ + 1025 * 256
+ * (num_not_supported_extensions + num_of_supported_extensions));
int i, index = 0;
strcat(kernel_code, kernel_strings[0]);
@@ -340,8 +363,6 @@ int test_compiler_defines_for_extensions(cl_device_id device, cl_context context
clProgramWrapper program;
clKernelWrapper kernel;
- Version version = get_device_cl_version(device);
-
error = create_single_kernel_helper(context, &program, &kernel, 1,
(const char **)&kernel_code, "test");
test_error(error, "create_single_kernel_helper failed");
diff --git a/test_conformance/compiler/test_feature_macro.cpp b/test_conformance/compiler/test_feature_macro.cpp
index ac355dd4..ef3c0028 100644
--- a/test_conformance/compiler/test_feature_macro.cpp
+++ b/test_conformance/compiler/test_feature_macro.cpp
@@ -579,6 +579,78 @@ int test_feature_macro_fp64(cl_device_id deviceID, cl_context context,
compiler_status, supported);
}
+int test_feature_macro_integer_dot_product_input_4x8bit_packed(
+ cl_device_id deviceID, cl_context context, std::string test_macro_name,
+ cl_bool& supported)
+{
+ cl_int error = TEST_FAIL;
+ cl_bool api_status;
+ cl_bool compiler_status;
+ log_info("\n%s ...\n", test_macro_name.c_str());
+
+ if (!is_extension_available(deviceID, "cl_khr_integer_dot_product"))
+ {
+ supported = false;
+ return TEST_PASS;
+ }
+
+ error = check_api_feature_info_capabilities<
+ cl_device_integer_dot_product_capabilities_khr>(
+ deviceID, context, api_status,
+ CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR,
+ CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR);
+ if (error != CL_SUCCESS)
+ {
+ return error;
+ }
+
+ error = check_compiler_feature_info(deviceID, context, test_macro_name,
+ compiler_status);
+ if (error != CL_SUCCESS)
+ {
+ return error;
+ }
+
+ return feature_macro_verify_results(test_macro_name, api_status,
+ compiler_status, supported);
+}
+
+int test_feature_macro_integer_dot_product_input_4x8bit(
+ cl_device_id deviceID, cl_context context, std::string test_macro_name,
+ cl_bool& supported)
+{
+ cl_int error = TEST_FAIL;
+ cl_bool api_status;
+ cl_bool compiler_status;
+ log_info("\n%s ...\n", test_macro_name.c_str());
+
+ if (!is_extension_available(deviceID, "cl_khr_integer_dot_product"))
+ {
+ supported = false;
+ return TEST_PASS;
+ }
+
+ error = check_api_feature_info_capabilities<
+ cl_device_integer_dot_product_capabilities_khr>(
+ deviceID, context, api_status,
+ CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR,
+ CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR);
+ if (error != CL_SUCCESS)
+ {
+ return error;
+ }
+
+ error = check_compiler_feature_info(deviceID, context, test_macro_name,
+ compiler_status);
+ if (error != CL_SUCCESS)
+ {
+ return error;
+ }
+
+ return feature_macro_verify_results(test_macro_name, api_status,
+ compiler_status, supported);
+}
+
int test_feature_macro_int64(cl_device_id deviceID, cl_context context,
std::string test_macro_name, cl_bool& supported)
{
@@ -686,15 +758,6 @@ int test_consistency_c_features_list(cl_device_id deviceID,
sort(vec_to_cmp.begin(), vec_to_cmp.end());
sort(vec_device_feature_names.begin(), vec_device_feature_names.end());
- if (vec_device_feature_names == vec_to_cmp)
- {
- log_info("Comparison list of features - passed\n");
- }
- else
- {
- log_info("Comparison list of features - failed\n");
- error = TEST_FAIL;
- }
log_info(
"Supported features based on CL_DEVICE_OPENCL_C_FEATURES API query:\n");
for (auto each_f : vec_device_feature_names)
@@ -703,11 +766,26 @@ int test_consistency_c_features_list(cl_device_id deviceID,
}
log_info("\nSupported features based on queries to API/compiler :\n");
+
for (auto each_f : vec_to_cmp)
{
log_info("%s\n", each_f.c_str());
}
+ for (auto each_f : vec_to_cmp)
+ {
+ if (find(vec_device_feature_names.begin(),
+ vec_device_feature_names.end(), each_f)
+ == vec_device_feature_names.end())
+ {
+ log_info("Comparison list of features - failed - missing %s\n",
+ each_f.c_str());
+ return TEST_FAIL;
+ }
+ }
+
+ log_info("Comparison list of features - passed\n");
+
return error;
}
@@ -748,6 +826,8 @@ int test_features_macro(cl_device_id deviceID, cl_context context,
NEW_FEATURE_MACRO_TEST(images);
NEW_FEATURE_MACRO_TEST(fp64);
NEW_FEATURE_MACRO_TEST(int64);
+ NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit);
+ NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit_packed);
error |= test_consistency_c_features_list(deviceID, supported_features_vec);
diff --git a/test_conformance/computeinfo/CMakeLists.txt b/test_conformance/computeinfo/CMakeLists.txt
index 207223a3..06f0599c 100644
--- a/test_conformance/computeinfo/CMakeLists.txt
+++ b/test_conformance/computeinfo/CMakeLists.txt
@@ -5,6 +5,7 @@ set(${MODULE_NAME}_SOURCES
device_uuid.cpp
extended_versioning.cpp
conforming_version.cpp
+ pci_bus_info.cpp
)
include(../CMakeCommon.txt)
diff --git a/test_conformance/computeinfo/device_uuid.cpp b/test_conformance/computeinfo/device_uuid.cpp
index 1ef9dad2..7f29d0b6 100644
--- a/test_conformance/computeinfo/device_uuid.cpp
+++ b/test_conformance/computeinfo/device_uuid.cpp
@@ -105,7 +105,7 @@ int test_device_uuid(cl_device_id deviceID, cl_context context,
if (!is_extension_available(deviceID, "cl_khr_device_uuid"))
{
log_info("cl_khr_device_uuid not supported. Skipping test...\n");
- return 0;
+ return TEST_SKIPPED_ITSELF;
}
int total_errors = 0;
diff --git a/test_conformance/computeinfo/main.cpp b/test_conformance/computeinfo/main.cpp
index 4860b445..382cd6a3 100644
--- a/test_conformance/computeinfo/main.cpp
+++ b/test_conformance/computeinfo/main.cpp
@@ -95,8 +95,8 @@ typedef struct _version version_t;
struct _extensions
{
- int cl_khr_fp64;
- int cl_khr_fp16;
+ int has_cl_khr_fp64;
+ int has_cl_khr_fp16;
};
typedef struct _extensions extensions_t;
@@ -908,12 +908,6 @@ void dumpConfigInfo(config_info* info)
{
cl_name_version new_version_item =
info->config.cl_name_version_array[f];
- cl_version new_version_major =
- CL_VERSION_MAJOR_KHR(new_version_item.version);
- cl_version new_version_minor =
- CL_VERSION_MINOR_KHR(new_version_item.version);
- cl_version new_version_patch =
- CL_VERSION_PATCH_KHR(new_version_item.version);
log_info("\t\t\"%s\" %d.%d.%d\n", new_version_item.name,
CL_VERSION_MAJOR_KHR(new_version_item.version),
CL_VERSION_MINOR_KHR(new_version_item.version),
@@ -1069,11 +1063,11 @@ int parseExtensions(char const* str, extensions_t* extensions)
}
if (strncmp(begin, "cl_khr_fp64", length) == 0)
{
- extensions->cl_khr_fp64 = 1;
+ extensions->has_cl_khr_fp64 = 1;
}
if (strncmp(begin, "cl_khr_fp16", length) == 0)
{
- extensions->cl_khr_fp16 = 1;
+ extensions->has_cl_khr_fp16 = 1;
}
begin += length; // Skip word.
if (begin[0] == ' ')
@@ -1112,13 +1106,13 @@ int getConfigInfos(cl_device_id device)
// version 1.1, we have to check doubles are sopported. In
// OpenCL 1.2 CL_DEVICE_DOUBLE_FP_CONFIG should be reported
// unconditionally.
- get = extensions.cl_khr_fp64;
+ get = extensions.has_cl_khr_fp64;
};
if (info.opcode == CL_DEVICE_HALF_FP_CONFIG)
{
// CL_DEVICE_HALF_FP_CONFIG should be reported only when cl_khr_fp16
// extension is available
- get = extensions.cl_khr_fp16;
+ get = extensions.has_cl_khr_fp16;
};
if (get)
{
@@ -1421,15 +1415,16 @@ int test_computeinfo(cl_device_id deviceID, cl_context context,
extern int test_extended_versioning(cl_device_id, cl_context, cl_command_queue,
int);
extern int test_device_uuid(cl_device_id, cl_context, cl_command_queue, int);
-
extern int test_conformance_version(cl_device_id, cl_context, cl_command_queue,
int);
+extern int test_pci_bus_info(cl_device_id, cl_context, cl_command_queue, int);
test_definition test_list[] = {
ADD_TEST(computeinfo),
ADD_TEST(extended_versioning),
ADD_TEST(device_uuid),
ADD_TEST_VERSION(conformance_version, Version(3, 0)),
+ ADD_TEST(pci_bus_info),
};
const int test_num = ARRAY_SIZE(test_list);
diff --git a/test_conformance/computeinfo/pci_bus_info.cpp b/test_conformance/computeinfo/pci_bus_info.cpp
new file mode 100644
index 00000000..cd62ca05
--- /dev/null
+++ b/test_conformance/computeinfo/pci_bus_info.cpp
@@ -0,0 +1,53 @@
+//
+// Copyright (c) 2021 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness/compat.h"
+
+#include <array>
+#include <bitset>
+
+#include "harness/testHarness.h"
+#include "harness/deviceInfo.h"
+
+int test_pci_bus_info(cl_device_id deviceID, cl_context context,
+ cl_command_queue ignoreQueue, int num_elements)
+{
+ if (!is_extension_available(deviceID, "cl_khr_pci_bus_info"))
+ {
+ log_info("cl_khr_pci_bus_info not supported. Skipping test...\n");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ cl_int error;
+
+ cl_device_pci_bus_info_khr info;
+
+ size_t size_ret;
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_PCI_BUS_INFO_KHR, 0, NULL,
+ &size_ret);
+ test_error(error, "Unable to query CL_DEVICE_PCI_BUS_INFO_KHR size");
+ test_assert_error(
+ size_ret == sizeof(info),
+ "Query for CL_DEVICE_PCI_BUS_INFO_KHR returned an unexpected size");
+
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_PCI_BUS_INFO_KHR, sizeof(info),
+ &info, NULL);
+ test_error(error, "Unable to query CL_DEVICE_PCI_BUS_INFO_KHR");
+
+ log_info("\tPCI Bus Info: %04x:%02x:%02x.%x\n", info.pci_domain,
+ info.pci_bus, info.pci_device, info.pci_function);
+
+ return TEST_PASS;
+}
diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp
index dddebb40..474fd364 100644
--- a/test_conformance/contractions/contractions.cpp
+++ b/test_conformance/contractions/contractions.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -434,7 +434,6 @@ static int ParseArgs( int argc, const char **argv )
gArgCount++;
}
}
- vlog( "\n\nTest binary built %s %s\n", __DATE__, __TIME__ );
PrintArch();
diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp
index 32998841..3ee072da 100644
--- a/test_conformance/conversions/basic_test_conversions.cpp
+++ b/test_conformance/conversions/basic_test_conversions.cpp
@@ -696,7 +696,8 @@ static void int2short( void *out, void *in){ ((cl_short*) out)[0] = ((cl_int*) i
static void int2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_int*) in)[0]; }
static void int2float( void *out, void *in)
{
- cl_int l = ((cl_int*) in)[0];
+ // Use volatile to prevent optimization by Clang compiler
+ volatile cl_int l = ((cl_int *)in)[0];
((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
}
static void int2double( void *out, void *in)
diff --git a/test_conformance/conversions/fplib.cpp b/test_conformance/conversions/fplib.cpp
index e739b9ae..3b19b56d 100644
--- a/test_conformance/conversions/fplib.cpp
+++ b/test_conformance/conversions/fplib.cpp
@@ -79,7 +79,6 @@ float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd)
uint32_t mantissa;
if (mantShift >= 0){
uint64_t temp = (uint64_t)data >> mantShift;
- uint64_t mask = (1 << mantShift) - 1;
if ((temp << mantShift) != data)
inExact = 1;
mantissa = (uint32_t)temp;
@@ -124,7 +123,6 @@ float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd)
uint32_t mantissa;
if (mantShift >= 0){
uint64_t temp = (uint64_t)data >> mantShift;
- uint64_t mask = (1 << mantShift) - 1;
if (temp << mantShift != data)
inExact = 1;
mantissa = (uint32_t)temp;
@@ -183,7 +181,6 @@ float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd)
uint32_t mantissa;
if (mantShift >= 0){
uint64_t temp = data >> mantShift;
- uint64_t mask = (1 << mantShift) - 1;
if (temp << mantShift != data)
inExact = 1;
mantissa = (uint32_t)temp;
@@ -209,7 +206,6 @@ float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd)
uint32_t mantissa;
if (mantShift >= 0){
uint64_t temp = (uint64_t)data >> mantShift;
- uint64_t mask = (1 << mantShift) - 1;
if (temp << mantShift != data)
inExact = 1;
mantissa = (uint32_t)temp;
diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp
index 87b8ead7..2b18b925 100644
--- a/test_conformance/conversions/test_conversions.cpp
+++ b/test_conformance/conversions/test_conversions.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -38,6 +38,7 @@
#include <sys/param.h>
#endif
+#include <sstream>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
@@ -47,6 +48,8 @@
#endif
#include <time.h>
+#include <algorithm>
+
#include "Sleep.h"
#include "basic_test_conversions.h"
@@ -340,7 +343,7 @@ int main (int argc, const char **argv )
static int ParseArgs( int argc, const char **argv )
{
int i;
- argList = (const char **)calloc( argc - 1, sizeof( char*) );
+ argList = (const char **)calloc(argc, sizeof(char *));
argCount = 0;
if( NULL == argList && argc > 1 )
@@ -481,8 +484,6 @@ static int ParseArgs( int argc, const char **argv )
vlog( "\n" );
- vlog( "Test binary built %s %s\n", __DATE__, __TIME__ );
-
PrintArch();
if( gWimpyMode )
@@ -1003,7 +1004,8 @@ static int DoTest( cl_device_id device, Type outType, Type inType, SaturationMod
uint64_t i;
gTestCount++;
- size_t blockCount = BUFFER_SIZE / MAX( gTypeSizes[ inType ], gTypeSizes[ outType ] );
+ size_t blockCount =
+ BUFFER_SIZE / std::max(gTypeSizes[inType], gTypeSizes[outType]);
size_t step = blockCount;
uint64_t lastCase = 1ULL << (8*gTypeSizes[ inType ]);
cl_event writeInputBuffer = NULL;
@@ -1078,7 +1080,7 @@ static int DoTest( cl_device_id device, Type outType, Type inType, SaturationMod
fflush(stdout);
}
- cl_uint count = (uint32_t) MIN( blockCount, lastCase - i );
+ cl_uint count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i);
writeInputBufferInfo.count = count;
// Crate a user event to represent the status of the reference value computation completion
@@ -1556,84 +1558,40 @@ static cl_program MakeProgram( Type outType, Type inType, SaturationMode sat,
cl_program program;
char testName[256];
int error = 0;
- const char **strings;
- size_t stringCount = 0;
+
+ std::ostringstream source;
+ if (outType == kdouble || inType == kdouble)
+ source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
// Create the program. This is a bit complicated because we are trying to avoid byte and short stores.
if (0 == vectorSize)
{
+ // Create the type names.
char inName[32];
char outName[32];
- const char *programSource[] =
- {
- "", // optional pragma
- "__kernel void ", testName, "( __global ", inName, " *src, __global ", outName, " *dest )\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " dest[i] = src[i];\n"
- "}\n"
- };
- stringCount = sizeof(programSource) / sizeof(programSource[0]);
- strings = programSource;
-
- if (outType == kdouble || inType == kdouble)
- programSource[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
-
- //create the type name
strncpy(inName, gTypeNames[inType], sizeof(inName));
strncpy(outName, gTypeNames[outType], sizeof(outName));
sprintf(testName, "test_implicit_%s_%s", outName, inName);
- vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType], gTypeNames[outType]);
+
+ source << "__kernel void " << testName << "( __global " << inName
+ << " *src, __global " << outName << " *dest )\n";
+ source << "{\n";
+ source << " size_t i = get_global_id(0);\n";
+ source << " dest[i] = src[i];\n";
+ source << "}\n";
+
+ vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType],
+ gTypeNames[outType]);
fflush(stdout);
}
else
{
int vectorSizetmp = vectorSizes[vectorSize];
+ // Create the type names.
char convertString[128];
char inName[32];
char outName[32];
- const char *programSource[] =
- {
- "", // optional pragma
- "__kernel void ", testName, "( __global ", inName, " *src, __global ", outName, " *dest )\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " dest[i] = ", convertString, "( src[i] );\n"
- "}\n"
- };
- const char *programSourceV3[] =
- {
- "", // optional pragma
- "__kernel void ", testName, "( __global ", inName, " *src, __global ", outName, " *dest )\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " if( i + 1 < get_global_size(0))\n"
- " vstore3( ", convertString, "( vload3( i, src)), i, dest );\n"
- " else\n"
- " {\n"
- " ", inName, "3 in;\n"
- " ", outName, "3 out;\n"
- " if( 0 == (i & 1) )\n"
- " in.y = src[3*i+1];\n"
- " in.x = src[3*i];\n"
- " out = ", convertString, "( in ); \n"
- " dest[3*i] = out.x;\n"
- " if( 0 == (i & 1) )\n"
- " dest[3*i+1] = out.y;\n"
- " }\n"
- "}\n"
- };
- stringCount = 3 == vectorSizetmp ? sizeof(programSourceV3) / sizeof(programSourceV3[0]) :
- sizeof(programSource) / sizeof(programSource[0]);
- strings = 3 == vectorSizetmp ? programSourceV3 : programSource;
-
- if (outType == kdouble || inType == kdouble) {
- programSource[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
- programSourceV3[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
- }
-
- //create the type name
switch (vectorSizetmp)
{
case 1:
@@ -1658,8 +1616,40 @@ static cl_program MakeProgram( Type outType, Type inType, SaturationMode sat,
vlog("Building %s( %s ) test\n", convertString, inName);
break;
}
-
fflush(stdout);
+
+ if (vectorSizetmp == 3)
+ {
+ source << "__kernel void " << testName << "( __global " << inName
+ << " *src, __global " << outName << " *dest )\n";
+ source << "{\n";
+ source << " size_t i = get_global_id(0);\n";
+ source << " if( i + 1 < get_global_size(0))\n";
+ source << " vstore3( " << convertString
+ << "( vload3( i, src)), i, dest );\n";
+ source << " else\n";
+ source << " {\n";
+ source << " " << inName << "3 in;\n";
+ source << " " << outName << "3 out;\n";
+ source << " if( 0 == (i & 1) )\n";
+ source << " in.y = src[3*i+1];\n";
+ source << " in.x = src[3*i];\n";
+ source << " out = " << convertString << "( in ); \n";
+ source << " dest[3*i] = out.x;\n";
+ source << " if( 0 == (i & 1) )\n";
+ source << " dest[3*i+1] = out.y;\n";
+ source << " }\n";
+ source << "}\n";
+ }
+ else
+ {
+ source << "__kernel void " << testName << "( __global " << inName
+ << " *src, __global " << outName << " *dest )\n";
+ source << "{\n";
+ source << " size_t i = get_global_id(0);\n";
+ source << " dest[i] = " << convertString << "( src[i] );\n";
+ source << "}\n";
+ }
}
*outKernel = NULL;
@@ -1668,11 +1658,12 @@ static cl_program MakeProgram( Type outType, Type inType, SaturationMode sat,
flags = "-cl-denorms-are-zero";
// build it
- error = create_single_kernel_helper(gContext, &program, outKernel, (cl_uint)stringCount, strings, testName, flags);
+ std::string sourceString = source.str();
+ const char *programSource = sourceString.c_str();
+ error = create_single_kernel_helper(gContext, &program, outKernel, 1,
+ &programSource, testName, flags);
if (error)
{
- char buffer[2048] = "";
-
vlog_error("Failed to build kernel/program.\n", error);
clReleaseProgram(program);
return NULL;
diff --git a/test_conformance/device_execution/enqueue_ndrange.cpp b/test_conformance/device_execution/enqueue_ndrange.cpp
index 8ced6629..f228f063 100644
--- a/test_conformance/device_execution/enqueue_ndrange.cpp
+++ b/test_conformance/device_execution/enqueue_ndrange.cpp
@@ -18,6 +18,7 @@
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
+#include <algorithm>
#include <vector>
#include "procs.h"
@@ -645,7 +646,7 @@ int test_enqueue_ndrange(cl_device_id device, cl_context context, cl_command_que
max_local_size = (max_local_size > MAX_GWS)? MAX_GWS: max_local_size;
if(gWimpyMode)
{
- max_local_size = MIN(8, max_local_size);
+ max_local_size = std::min((size_t)8, max_local_size);
}
cl_uint num = 10;
diff --git a/test_conformance/device_execution/host_queue_order.cpp b/test_conformance/device_execution/host_queue_order.cpp
index 2b5688d1..5376ea40 100644
--- a/test_conformance/device_execution/host_queue_order.cpp
+++ b/test_conformance/device_execution/host_queue_order.cpp
@@ -18,6 +18,7 @@
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
+#include <algorithm>
#include <vector>
#include "procs.h"
@@ -124,7 +125,7 @@ int test_host_queue_order(cl_device_id device, cl_context context, cl_command_qu
cl_uint num = arr_size(result);
if( gWimpyMode )
{
- num = MAX(num / 16, 4);
+ num = std::max(num / 16, 4U);
}
clMemWrapper res_mem;
diff --git a/test_conformance/events/action_classes.cpp b/test_conformance/events/action_classes.cpp
index d70d76bd..a84be6b6 100644
--- a/test_conformance/events/action_classes.cpp
+++ b/test_conformance/events/action_classes.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -19,7 +19,8 @@
const cl_uint BufferSizeReductionFactor = 20;
-cl_int Action::IGetPreferredImageSize2D( cl_device_id device, size_t &outWidth, size_t &outHeight )
+cl_int Action::IGetPreferredImageSize2D(cl_device_id device, size_t &outWidth,
+ size_t &outHeight)
{
cl_ulong maxAllocSize;
size_t maxWidth, maxHeight;
@@ -27,23 +28,27 @@ cl_int Action::IGetPreferredImageSize2D( cl_device_id device, size_t &outWidt
// Get the largest possible buffer we could allocate
- error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
- error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
- error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
- test_error( error, "Unable to get device config" );
+ error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+ sizeof(maxAllocSize), &maxAllocSize, NULL);
+ error |= clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH,
+ sizeof(maxWidth), &maxWidth, NULL);
+ error |= clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
+ sizeof(maxHeight), &maxHeight, NULL);
+ test_error(error, "Unable to get device config");
// Create something of a decent size
- if( maxWidth * maxHeight * 4 > maxAllocSize / BufferSizeReductionFactor )
+ if (maxWidth * maxHeight * 4 > maxAllocSize / BufferSizeReductionFactor)
{
- float rootSize = sqrtf( (float)( maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) );
+ float rootSize =
+ sqrtf((float)(maxAllocSize / (BufferSizeReductionFactor * 4)));
- if( (size_t)rootSize > maxWidth )
+ if ((size_t)rootSize > maxWidth)
outWidth = maxWidth;
else
outWidth = (size_t)rootSize;
- outHeight = (size_t)( ( maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) / outWidth );
- if( outHeight > maxHeight )
- outHeight = maxHeight;
+ outHeight = (size_t)((maxAllocSize / (BufferSizeReductionFactor * 4))
+ / outWidth);
+ if (outHeight > maxHeight) outHeight = maxHeight;
}
else
{
@@ -51,19 +56,18 @@ cl_int Action::IGetPreferredImageSize2D( cl_device_id device, size_t &outWidt
outHeight = maxHeight;
}
- outWidth /=2;
- outHeight /=2;
+ outWidth /= 2;
+ outHeight /= 2;
- if (outWidth > 2048)
- outWidth = 2048;
- if (outHeight > 2048)
- outHeight = 2048;
+ if (outWidth > 2048) outWidth = 2048;
+ if (outHeight > 2048) outHeight = 2048;
log_info("\tImage size: %d x %d (%gMB)\n", (int)outWidth, (int)outHeight,
- (double)((int)outWidth*(int)outHeight*4)/(1024.0*1024.0));
+ (double)((int)outWidth * (int)outHeight * 4) / (1024.0 * 1024.0));
return CL_SUCCESS;
}
-cl_int Action::IGetPreferredImageSize3D( cl_device_id device, size_t &outWidth, size_t &outHeight, size_t &outDepth )
+cl_int Action::IGetPreferredImageSize3D(cl_device_id device, size_t &outWidth,
+ size_t &outHeight, size_t &outDepth)
{
cl_ulong maxAllocSize;
size_t maxWidth, maxHeight, maxDepth;
@@ -71,28 +75,34 @@ cl_int Action::IGetPreferredImageSize3D( cl_device_id device, size_t &outWidt
// Get the largest possible buffer we could allocate
- error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
- error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
- error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
- error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
- test_error( error, "Unable to get device config" );
+ error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+ sizeof(maxAllocSize), &maxAllocSize, NULL);
+ error |= clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH,
+ sizeof(maxWidth), &maxWidth, NULL);
+ error |= clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
+ sizeof(maxHeight), &maxHeight, NULL);
+ error |= clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH,
+ sizeof(maxDepth), &maxDepth, NULL);
+ test_error(error, "Unable to get device config");
// Create something of a decent size
- if( (cl_ulong)maxWidth * maxHeight * maxDepth > maxAllocSize / ( BufferSizeReductionFactor * 4 ) )
+ if ((cl_ulong)maxWidth * maxHeight * maxDepth
+ > maxAllocSize / (BufferSizeReductionFactor * 4))
{
- float rootSize = cbrtf( (float)( maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) );
+ float rootSize =
+ cbrtf((float)(maxAllocSize / (BufferSizeReductionFactor * 4)));
- if( (size_t)rootSize > maxWidth )
+ if ((size_t)rootSize > maxWidth)
outWidth = maxWidth;
else
outWidth = (size_t)rootSize;
- if( (size_t)rootSize > maxHeight )
+ if ((size_t)rootSize > maxHeight)
outHeight = maxHeight;
else
outHeight = (size_t)rootSize;
- outDepth = (size_t)( ( maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) / ( outWidth * outHeight ) );
- if( outDepth > maxDepth )
- outDepth = maxDepth;
+ outDepth = (size_t)((maxAllocSize / (BufferSizeReductionFactor * 4))
+ / (outWidth * outHeight));
+ if (outDepth > maxDepth) outDepth = maxDepth;
}
else
{
@@ -101,25 +111,25 @@ cl_int Action::IGetPreferredImageSize3D( cl_device_id device, size_t &outWidt
outDepth = maxDepth;
}
- outWidth /=2;
- outHeight /=2;
- outDepth /=2;
+ outWidth /= 2;
+ outHeight /= 2;
+ outDepth /= 2;
- if (outWidth > 512)
- outWidth = 512;
- if (outHeight > 512)
- outHeight = 512;
- if (outDepth > 512)
- outDepth = 512;
- log_info("\tImage size: %d x %d x %d (%gMB)\n", (int)outWidth, (int)outHeight, (int)outDepth,
- (double)((int)outWidth*(int)outHeight*(int)outDepth*4)/(1024.0*1024.0));
+ if (outWidth > 512) outWidth = 512;
+ if (outHeight > 512) outHeight = 512;
+ if (outDepth > 512) outDepth = 512;
+ log_info("\tImage size: %d x %d x %d (%gMB)\n", (int)outWidth,
+ (int)outHeight, (int)outDepth,
+ (double)((int)outWidth * (int)outHeight * (int)outDepth * 4)
+ / (1024.0 * 1024.0));
return CL_SUCCESS;
}
#pragma mark -------------------- Execution Sub-Classes -------------------------
-cl_int NDRangeKernelAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int NDRangeKernelAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
const char *long_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
@@ -132,101 +142,116 @@ cl_int NDRangeKernelAction::Setup( cl_device_id device, cl_context context, cl_c
" dst[tid] = (int)src[tid] * 3;\n"
" }\n"
"\n"
- "}\n" };
+ "}\n"
+ };
size_t threads[1] = { 1000 };
int error;
- if( create_single_kernel_helper( context, &mProgram, &mKernel, 1, long_kernel, "sample_test" ) )
+ if (create_single_kernel_helper(context, &mProgram, &mKernel, 1,
+ long_kernel, "sample_test"))
{
return -1;
}
- error = get_max_common_work_group_size( context, mKernel, threads[0], &mLocalThreads[0] );
- test_error( error, "Unable to get work group size to use" );
+ error = get_max_common_work_group_size(context, mKernel, threads[0],
+ &mLocalThreads[0]);
+ test_error(error, "Unable to get work group size to use");
mStreams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_float) * 1000, NULL, &error);
- test_error( error, "Creating test array failed" );
+ test_error(error, "Creating test array failed");
mStreams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int) * 1000, NULL, &error);
- test_error( error, "Creating test array failed" );
+ test_error(error, "Creating test array failed");
/* Set the arguments */
- error = clSetKernelArg( mKernel, 0, sizeof( mStreams[0] ), &mStreams[0] );
- test_error( error, "Unable to set kernel arguments" );
- error = clSetKernelArg( mKernel, 1, sizeof( mStreams[1] ), &mStreams[1] );
- test_error( error, "Unable to set kernel arguments" );
+ error = clSetKernelArg(mKernel, 0, sizeof(mStreams[0]), &mStreams[0]);
+ test_error(error, "Unable to set kernel arguments");
+ error = clSetKernelArg(mKernel, 1, sizeof(mStreams[1]), &mStreams[1]);
+ test_error(error, "Unable to set kernel arguments");
return CL_SUCCESS;
}
-cl_int NDRangeKernelAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int NDRangeKernelAction::Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent)
{
size_t threads[1] = { 1000 };
- cl_int error = clEnqueueNDRangeKernel( queue, mKernel, 1, NULL, threads, mLocalThreads, numWaits, waits, outEvent );
- test_error( error, "Unable to execute kernel" );
+ cl_int error =
+ clEnqueueNDRangeKernel(queue, mKernel, 1, NULL, threads, mLocalThreads,
+ numWaits, waits, outEvent);
+ test_error(error, "Unable to execute kernel");
return CL_SUCCESS;
}
#pragma mark -------------------- Buffer Sub-Classes -------------------------
-cl_int BufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue, bool allocate )
+cl_int BufferAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue, bool allocate)
{
cl_int error;
cl_ulong maxAllocSize;
// Get the largest possible buffer we could allocate
- error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+ error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+ sizeof(maxAllocSize), &maxAllocSize, NULL);
- // Don't create a buffer quite that big, just so we have some space left over for other work
- mSize = (size_t)( maxAllocSize / BufferSizeReductionFactor );
+ // Don't create a buffer quite that big, just so we have some space left
+ // over for other work
+ mSize = (size_t)(maxAllocSize / BufferSizeReductionFactor);
// Cap at 128M so tests complete in a reasonable amount of time.
- if (mSize > 128 << 20)
- mSize = 128 << 20;
+ if (mSize > 128 << 20) mSize = 128 << 20;
- mSize /=2;
+ mSize /= 2;
- log_info("\tBuffer size: %gMB\n", (double)mSize/(1024.0*1024.0));
+ log_info("\tBuffer size: %gMB\n", (double)mSize / (1024.0 * 1024.0));
- mBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, mSize, NULL, &error );
- test_error( error, "Unable to create buffer to test against" );
+ mBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+ mSize, NULL, &error);
+ test_error(error, "Unable to create buffer to test against");
- mOutBuffer = malloc( mSize );
- if( mOutBuffer == NULL )
+ mOutBuffer = malloc(mSize);
+ if (mOutBuffer == NULL)
{
- log_error( "ERROR: Unable to allocate temp buffer (out of memory)\n" );
+ log_error("ERROR: Unable to allocate temp buffer (out of memory)\n");
return CL_OUT_OF_RESOURCES;
}
return CL_SUCCESS;
}
-cl_int ReadBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int ReadBufferAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
- return BufferAction::Setup( device, context, queue, true );
+ return BufferAction::Setup(device, context, queue, true);
}
-cl_int ReadBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int ReadBufferAction::Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent)
{
- cl_int error = clEnqueueReadBuffer( queue, mBuffer, CL_FALSE, 0, mSize, mOutBuffer, numWaits, waits, outEvent );
- test_error( error, "Unable to enqueue buffer read" );
+ cl_int error = clEnqueueReadBuffer(queue, mBuffer, CL_FALSE, 0, mSize,
+ mOutBuffer, numWaits, waits, outEvent);
+ test_error(error, "Unable to enqueue buffer read");
return CL_SUCCESS;
}
-cl_int WriteBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int WriteBufferAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
- return BufferAction::Setup( device, context, queue, true );
+ return BufferAction::Setup(device, context, queue, true);
}
-cl_int WriteBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int WriteBufferAction::Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent)
{
- cl_int error = clEnqueueWriteBuffer( queue, mBuffer, CL_FALSE, 0, mSize, mOutBuffer, numWaits, waits, outEvent );
- test_error( error, "Unable to enqueue buffer write" );
+ cl_int error = clEnqueueWriteBuffer(queue, mBuffer, CL_FALSE, 0, mSize,
+ mOutBuffer, numWaits, waits, outEvent);
+ test_error(error, "Unable to enqueue buffer write");
return CL_SUCCESS;
}
@@ -234,40 +259,46 @@ cl_int WriteBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_
MapBufferAction::~MapBufferAction()
{
if (mQueue)
- clEnqueueUnmapMemObject( mQueue, mBuffer, mMappedPtr, 0, NULL, NULL );
+ clEnqueueUnmapMemObject(mQueue, mBuffer, mMappedPtr, 0, NULL, NULL);
}
-cl_int MapBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int MapBufferAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
- return BufferAction::Setup( device, context, queue, false );
+ return BufferAction::Setup(device, context, queue, false);
}
-cl_int MapBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int MapBufferAction::Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent)
{
cl_int error;
mQueue = queue;
- mMappedPtr = clEnqueueMapBuffer( queue, mBuffer, CL_FALSE, CL_MAP_READ, 0, mSize, numWaits, waits, outEvent, &error );
- test_error( error, "Unable to enqueue buffer map" );
+ mMappedPtr = clEnqueueMapBuffer(queue, mBuffer, CL_FALSE, CL_MAP_READ, 0,
+ mSize, numWaits, waits, outEvent, &error);
+ test_error(error, "Unable to enqueue buffer map");
return CL_SUCCESS;
}
-cl_int UnmapBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int UnmapBufferAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
- cl_int error = BufferAction::Setup( device, context, queue, false );
- if( error != CL_SUCCESS )
- return error;
+ cl_int error = BufferAction::Setup(device, context, queue, false);
+ if (error != CL_SUCCESS) return error;
- mMappedPtr = clEnqueueMapBuffer( queue, mBuffer, CL_TRUE, CL_MAP_READ, 0, mSize, 0, NULL, NULL, &error );
- test_error( error, "Unable to enqueue buffer map" );
+ mMappedPtr = clEnqueueMapBuffer(queue, mBuffer, CL_TRUE, CL_MAP_READ, 0,
+ mSize, 0, NULL, NULL, &error);
+ test_error(error, "Unable to enqueue buffer map");
return CL_SUCCESS;
}
-cl_int UnmapBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int UnmapBufferAction::Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent)
{
- cl_int error = clEnqueueUnmapMemObject( queue, mBuffer, mMappedPtr, numWaits, waits, outEvent );
- test_error( error, "Unable to enqueue buffer unmap" );
+ cl_int error = clEnqueueUnmapMemObject(queue, mBuffer, mMappedPtr, numWaits,
+ waits, outEvent);
+ test_error(error, "Unable to enqueue buffer unmap");
return CL_SUCCESS;
}
@@ -275,349 +306,410 @@ cl_int UnmapBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_
#pragma mark -------------------- Read/Write Image Classes -------------------------
-cl_int ReadImage2DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int ReadImage2DAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
cl_int error;
- if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+ if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight)))
return error;
cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
- mImage = create_image_2d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
+ mImage = create_image_2d(context, CL_MEM_READ_ONLY, &format, mWidth,
+ mHeight, 0, NULL, &error);
- test_error( error, "Unable to create image to test against" );
+ test_error(error, "Unable to create image to test against");
- mOutput = malloc( mWidth * mHeight * 4 );
- if( mOutput == NULL )
+ mOutput = malloc(mWidth * mHeight * 4);
+ if (mOutput == NULL)
{
- log_error( "ERROR: Unable to allocate buffer: out of memory\n" );
+ log_error("ERROR: Unable to allocate buffer: out of memory\n");
return CL_OUT_OF_RESOURCES;
}
return CL_SUCCESS;
}
-cl_int ReadImage2DAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int ReadImage2DAction::Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent)
{
- size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 };
+ size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, 1 };
- cl_int error = clEnqueueReadImage( queue, mImage, CL_FALSE, origin, region, 0, 0, mOutput, numWaits, waits, outEvent );
- test_error( error, "Unable to enqueue image read" );
+ cl_int error = clEnqueueReadImage(queue, mImage, CL_FALSE, origin, region,
+ 0, 0, mOutput, numWaits, waits, outEvent);
+ test_error(error, "Unable to enqueue image read");
return CL_SUCCESS;
}
-cl_int ReadImage3DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int ReadImage3DAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
cl_int error;
- if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+ if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth)))
return error;
cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
- mImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
- test_error( error, "Unable to create image to test against" );
+ mImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth,
+ mHeight, mDepth, 0, 0, NULL, &error);
+ test_error(error, "Unable to create image to test against");
- mOutput = malloc( mWidth * mHeight * mDepth * 4 );
- if( mOutput == NULL )
+ mOutput = malloc(mWidth * mHeight * mDepth * 4);
+ if (mOutput == NULL)
{
- log_error( "ERROR: Unable to allocate buffer: out of memory\n" );
+ log_error("ERROR: Unable to allocate buffer: out of memory\n");
return CL_OUT_OF_RESOURCES;
}
return CL_SUCCESS;
}
-cl_int ReadImage3DAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int ReadImage3DAction::Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent)
{
- size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth };
+ size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, mDepth };
- cl_int error = clEnqueueReadImage( queue, mImage, CL_FALSE, origin, region, 0, 0, mOutput, numWaits, waits, outEvent );
- test_error( error, "Unable to enqueue image read" );
+ cl_int error = clEnqueueReadImage(queue, mImage, CL_FALSE, origin, region,
+ 0, 0, mOutput, numWaits, waits, outEvent);
+ test_error(error, "Unable to enqueue image read");
return CL_SUCCESS;
}
-cl_int WriteImage2DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int WriteImage2DAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
cl_int error;
- if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+ if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight)))
return error;
cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
- mImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
- test_error( error, "Unable to create image to test against" );
+ mImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &format, mWidth,
+ mHeight, 0, NULL, &error);
+ test_error(error, "Unable to create image to test against");
- mOutput = malloc( mWidth * mHeight * 4 );
- if( mOutput == NULL )
+ mOutput = malloc(mWidth * mHeight * 4);
+ if (mOutput == NULL)
{
- log_error( "ERROR: Unable to allocate buffer: out of memory\n" );
+ log_error("ERROR: Unable to allocate buffer: out of memory\n");
return CL_OUT_OF_RESOURCES;
}
return CL_SUCCESS;
}
-cl_int WriteImage2DAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int WriteImage2DAction::Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent)
{
- size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 };
+ size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, 1 };
- cl_int error = clEnqueueWriteImage( queue, mImage, CL_FALSE, origin, region, 0, 0, mOutput, numWaits, waits, outEvent );
- test_error( error, "Unable to enqueue image write" );
+ cl_int error =
+ clEnqueueWriteImage(queue, mImage, CL_FALSE, origin, region, 0, 0,
+ mOutput, numWaits, waits, outEvent);
+ test_error(error, "Unable to enqueue image write");
return CL_SUCCESS;
}
-cl_int WriteImage3DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int WriteImage3DAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
cl_int error;
- if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+ if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth)))
return error;
cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
- mImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
- test_error( error, "Unable to create image to test against" );
+ mImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth,
+ mHeight, mDepth, 0, 0, NULL, &error);
+ test_error(error, "Unable to create image to test against");
- mOutput = malloc( mWidth * mHeight * mDepth * 4 );
- if( mOutput == NULL )
+ mOutput = malloc(mWidth * mHeight * mDepth * 4);
+ if (mOutput == NULL)
{
- log_error( "ERROR: Unable to allocate buffer: out of memory\n" );
+ log_error("ERROR: Unable to allocate buffer: out of memory\n");
return CL_OUT_OF_RESOURCES;
}
return CL_SUCCESS;
}
-cl_int WriteImage3DAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int WriteImage3DAction::Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent)
{
- size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth };
+ size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, mDepth };
- cl_int error = clEnqueueWriteImage( queue, mImage, CL_FALSE, origin, region, 0, 0, mOutput, numWaits, waits, outEvent );
- test_error( error, "Unable to enqueue image write" );
+ cl_int error =
+ clEnqueueWriteImage(queue, mImage, CL_FALSE, origin, region, 0, 0,
+ mOutput, numWaits, waits, outEvent);
+ test_error(error, "Unable to enqueue image write");
return CL_SUCCESS;
}
#pragma mark -------------------- Copy Image Classes -------------------------
-cl_int CopyImageAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int CopyImageAction::Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent)
{
- size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth };
+ size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, mDepth };
- cl_int error = clEnqueueCopyImage( queue, mSrcImage, mDstImage, origin, origin, region, numWaits, waits, outEvent );
- test_error( error, "Unable to enqueue image copy" );
+ cl_int error =
+ clEnqueueCopyImage(queue, mSrcImage, mDstImage, origin, origin, region,
+ numWaits, waits, outEvent);
+ test_error(error, "Unable to enqueue image copy");
return CL_SUCCESS;
}
-cl_int CopyImage2Dto2DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int CopyImage2Dto2DAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
cl_int error;
- if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+ if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight)))
return error;
mWidth /= 2;
cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
- mSrcImage = create_image_2d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
- test_error( error, "Unable to create image to test against" );
+ mSrcImage = create_image_2d(context, CL_MEM_READ_ONLY, &format, mWidth,
+ mHeight, 0, NULL, &error);
+ test_error(error, "Unable to create image to test against");
- mDstImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
- test_error( error, "Unable to create image to test against" );
+ mDstImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &format, mWidth,
+ mHeight, 0, NULL, &error);
+ test_error(error, "Unable to create image to test against");
mDepth = 1;
return CL_SUCCESS;
}
-cl_int CopyImage2Dto3DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int CopyImage2Dto3DAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
cl_int error;
- if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+ if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth)))
return error;
mDepth /= 2;
cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
- mSrcImage = create_image_2d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
- test_error( error, "Unable to create image to test against" );
+ mSrcImage = create_image_2d(context, CL_MEM_READ_ONLY, &format, mWidth,
+ mHeight, 0, NULL, &error);
+ test_error(error, "Unable to create image to test against");
- mDstImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
- test_error( error, "Unable to create image to test against" );
+ mDstImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth,
+ mHeight, mDepth, 0, 0, NULL, &error);
+ test_error(error, "Unable to create image to test against");
mDepth = 1;
return CL_SUCCESS;
}
-cl_int CopyImage3Dto2DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int CopyImage3Dto2DAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
cl_int error;
- if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+ if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth)))
return error;
mDepth /= 2;
cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
- mSrcImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
- test_error( error, "Unable to create image to test against" );
+ mSrcImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth,
+ mHeight, mDepth, 0, 0, NULL, &error);
+ test_error(error, "Unable to create image to test against");
- mDstImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
- test_error( error, "Unable to create image to test against" );
+ mDstImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &format, mWidth,
+ mHeight, 0, NULL, &error);
+ test_error(error, "Unable to create image to test against");
mDepth = 1;
return CL_SUCCESS;
}
-cl_int CopyImage3Dto3DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int CopyImage3Dto3DAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
cl_int error;
- if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+ if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth)))
return error;
mDepth /= 2;
cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
- mSrcImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
- test_error( error, "Unable to create image to test against" );
+ mSrcImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth,
+ mHeight, mDepth, 0, 0, NULL, &error);
+ test_error(error, "Unable to create image to test against");
- mDstImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
- test_error( error, "Unable to create image to test against" );
+ mDstImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth,
+ mHeight, mDepth, 0, 0, NULL, &error);
+ test_error(error, "Unable to create image to test against");
return CL_SUCCESS;
}
#pragma mark -------------------- Copy Image/Buffer Classes -------------------------
-cl_int Copy2DImageToBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int Copy2DImageToBufferAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
cl_int error;
- if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+ if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight)))
return error;
mWidth /= 2;
cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
- mSrcImage = create_image_2d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
- test_error( error, "Unable to create image to test against" );
+ mSrcImage = create_image_2d(context, CL_MEM_READ_ONLY, &format, mWidth,
+ mHeight, 0, NULL, &error);
+ test_error(error, "Unable to create image to test against");
- mDstBuffer = clCreateBuffer( context, CL_MEM_WRITE_ONLY, mWidth * mHeight * 4, NULL, &error );
- test_error( error, "Unable to create buffer to test against" );
+ mDstBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+ mWidth * mHeight * 4, NULL, &error);
+ test_error(error, "Unable to create buffer to test against");
return CL_SUCCESS;
}
-cl_int Copy2DImageToBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int Copy2DImageToBufferAction::Execute(cl_command_queue queue,
+ cl_uint numWaits, cl_event *waits,
+ cl_event *outEvent)
{
- size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 };
+ size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, 1 };
- cl_int error = clEnqueueCopyImageToBuffer( queue, mSrcImage, mDstBuffer, origin, region, 0, numWaits, waits, outEvent );
- test_error( error, "Unable to enqueue image to buffer copy" );
+ cl_int error =
+ clEnqueueCopyImageToBuffer(queue, mSrcImage, mDstBuffer, origin, region,
+ 0, numWaits, waits, outEvent);
+ test_error(error, "Unable to enqueue image to buffer copy");
return CL_SUCCESS;
}
-cl_int Copy3DImageToBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int Copy3DImageToBufferAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
cl_int error;
- if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+ if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth)))
return error;
mDepth /= 2;
cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
- mSrcImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
- test_error( error, "Unable to create image to test against" );
+ mSrcImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth,
+ mHeight, mDepth, 0, 0, NULL, &error);
+ test_error(error, "Unable to create image to test against");
- mDstBuffer = clCreateBuffer( context, CL_MEM_WRITE_ONLY, mWidth * mHeight * mDepth * 4, NULL, &error );
- test_error( error, "Unable to create buffer to test against" );
+ mDstBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+ mWidth * mHeight * mDepth * 4, NULL, &error);
+ test_error(error, "Unable to create buffer to test against");
return CL_SUCCESS;
}
-cl_int Copy3DImageToBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int Copy3DImageToBufferAction::Execute(cl_command_queue queue,
+ cl_uint numWaits, cl_event *waits,
+ cl_event *outEvent)
{
- size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth };
+ size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, mDepth };
- cl_int error = clEnqueueCopyImageToBuffer( queue, mSrcImage, mDstBuffer, origin, region, 0, numWaits, waits, outEvent );
- test_error( error, "Unable to enqueue image to buffer copy" );
+ cl_int error =
+ clEnqueueCopyImageToBuffer(queue, mSrcImage, mDstBuffer, origin, region,
+ 0, numWaits, waits, outEvent);
+ test_error(error, "Unable to enqueue image to buffer copy");
return CL_SUCCESS;
}
-cl_int CopyBufferTo2DImageAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int CopyBufferTo2DImageAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
cl_int error;
- if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+ if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight)))
return error;
mWidth /= 2;
cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
- mSrcBuffer = clCreateBuffer( context, CL_MEM_READ_ONLY, mWidth * mHeight * 4, NULL, &error );
- test_error( error, "Unable to create buffer to test against" );
+ mSrcBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, mWidth * mHeight * 4,
+ NULL, &error);
+ test_error(error, "Unable to create buffer to test against");
- mDstImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
- test_error( error, "Unable to create image to test against" );
+ mDstImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &format, mWidth,
+ mHeight, 0, NULL, &error);
+ test_error(error, "Unable to create image to test against");
return CL_SUCCESS;
}
-cl_int CopyBufferTo2DImageAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int CopyBufferTo2DImageAction::Execute(cl_command_queue queue,
+ cl_uint numWaits, cl_event *waits,
+ cl_event *outEvent)
{
- size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 };
+ size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, 1 };
- cl_int error = clEnqueueCopyBufferToImage( queue, mSrcBuffer, mDstImage, 0, origin, region, numWaits, waits, outEvent );
- test_error( error, "Unable to enqueue buffer to image copy" );
+ cl_int error =
+ clEnqueueCopyBufferToImage(queue, mSrcBuffer, mDstImage, 0, origin,
+ region, numWaits, waits, outEvent);
+ test_error(error, "Unable to enqueue buffer to image copy");
return CL_SUCCESS;
}
-cl_int CopyBufferTo3DImageAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int CopyBufferTo3DImageAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
cl_int error;
- if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+ if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth)))
return error;
mDepth /= 2;
- mSrcBuffer = clCreateBuffer( context, CL_MEM_READ_ONLY, mWidth * mHeight * mDepth * 4, NULL, &error );
- test_error( error, "Unable to create buffer to test against" );
+ mSrcBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY,
+ mWidth * mHeight * mDepth * 4, NULL, &error);
+ test_error(error, "Unable to create buffer to test against");
cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
- mDstImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
- test_error( error, "Unable to create image to test against" );
+ mDstImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth,
+ mHeight, mDepth, 0, 0, NULL, &error);
+ test_error(error, "Unable to create image to test against");
return CL_SUCCESS;
}
-cl_int CopyBufferTo3DImageAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int CopyBufferTo3DImageAction::Execute(cl_command_queue queue,
+ cl_uint numWaits, cl_event *waits,
+ cl_event *outEvent)
{
- size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth };
+ size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, mDepth };
- cl_int error = clEnqueueCopyBufferToImage( queue, mSrcBuffer, mDstImage, 0, origin, region, numWaits, waits, outEvent );
- test_error( error, "Unable to enqueue buffer to image copy" );
+ cl_int error =
+ clEnqueueCopyBufferToImage(queue, mSrcBuffer, mDstImage, 0, origin,
+ region, numWaits, waits, outEvent);
+ test_error(error, "Unable to enqueue buffer to image copy");
return CL_SUCCESS;
}
@@ -627,34 +719,39 @@ cl_int CopyBufferTo3DImageAction::Execute( cl_command_queue queue, cl_uint numWa
MapImageAction::~MapImageAction()
{
if (mQueue)
- clEnqueueUnmapMemObject( mQueue, mImage, mMappedPtr, 0, NULL, NULL );
+ clEnqueueUnmapMemObject(mQueue, mImage, mMappedPtr, 0, NULL, NULL);
}
-cl_int MapImageAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int MapImageAction::Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
cl_int error;
- if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+ if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight)))
return error;
cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
- mImage = create_image_2d( context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, &format, mWidth, mHeight, 0, NULL, &error );
- test_error( error, "Unable to create image to test against" );
+ mImage = create_image_2d(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+ &format, mWidth, mHeight, 0, NULL, &error);
+ test_error(error, "Unable to create image to test against");
return CL_SUCCESS;
}
-cl_int MapImageAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int MapImageAction::Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent)
{
cl_int error;
- size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 };
+ size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, 1 };
size_t outPitch;
mQueue = queue;
- mMappedPtr = clEnqueueMapImage( queue, mImage, CL_FALSE, CL_MAP_READ, origin, region, &outPitch, NULL, numWaits, waits, outEvent, &error );
- test_error( error, "Unable to enqueue image map" );
+ mMappedPtr =
+ clEnqueueMapImage(queue, mImage, CL_FALSE, CL_MAP_READ, origin, region,
+ &outPitch, NULL, numWaits, waits, outEvent, &error);
+ test_error(error, "Unable to enqueue image map");
return CL_SUCCESS;
}
diff --git a/test_conformance/events/action_classes.h b/test_conformance/events/action_classes.h
index 069ed346..e528f11a 100644
--- a/test_conformance/events/action_classes.h
+++ b/test_conformance/events/action_classes.h
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -23,303 +23,319 @@
// it would potentially be possible for an implementation to make actions
// wait on one another based on their shared I/O, not because of their
// wait lists!
-class Action
-{
- public:
- Action() {}
- virtual ~Action() {}
-
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue ) = 0;
- virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) = 0;
-
- virtual const char * GetName( void ) const = 0;
-
- protected:
-
- cl_int IGetPreferredImageSize2D( cl_device_id device, size_t &outWidth, size_t &outHeight );
- cl_int IGetPreferredImageSize3D( cl_device_id device, size_t &outWidth, size_t &outHeight, size_t &outDepth );
+class Action {
+public:
+ Action() {}
+ virtual ~Action() {}
+
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue) = 0;
+ virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent) = 0;
+
+ virtual const char *GetName(void) const = 0;
+
+protected:
+ cl_int IGetPreferredImageSize2D(cl_device_id device, size_t &outWidth,
+ size_t &outHeight);
+ cl_int IGetPreferredImageSize3D(cl_device_id device, size_t &outWidth,
+ size_t &outHeight, size_t &outDepth);
};
// Simple NDRangeKernel execution that takes a noticable amount of time
-class NDRangeKernelAction : public Action
-{
- public:
- NDRangeKernelAction() {}
- virtual ~NDRangeKernelAction() {}
-
- size_t mLocalThreads[ 1 ];
- clMemWrapper mStreams[ 2 ];
- clProgramWrapper mProgram;
- clKernelWrapper mKernel;
-
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
- virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
-
- virtual const char * GetName( void ) const { return "NDRangeKernel"; }
+class NDRangeKernelAction : public Action {
+public:
+ NDRangeKernelAction() {}
+ virtual ~NDRangeKernelAction() {}
+
+ size_t mLocalThreads[1];
+ clMemWrapper mStreams[2];
+ clProgramWrapper mProgram;
+ clKernelWrapper mKernel;
+
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+ virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent);
+
+ virtual const char *GetName(void) const { return "NDRangeKernel"; }
};
// Base action for buffer actions
-class BufferAction : public Action
-{
- public:
- clMemWrapper mBuffer;
- size_t mSize;
- void *mOutBuffer;
+class BufferAction : public Action {
+public:
+ clMemWrapper mBuffer;
+ size_t mSize;
+ void *mOutBuffer;
- BufferAction() { mOutBuffer = NULL; }
- virtual ~BufferAction() { free( mOutBuffer ); }
+ BufferAction() { mOutBuffer = NULL; }
+ virtual ~BufferAction() { free(mOutBuffer); }
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue, bool allocate );
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue, bool allocate);
};
-class ReadBufferAction : public BufferAction
-{
- public:
- ReadBufferAction() {}
- virtual ~ReadBufferAction() {}
+class ReadBufferAction : public BufferAction {
+public:
+ ReadBufferAction() {}
+ virtual ~ReadBufferAction() {}
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
- virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+ virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent);
- virtual const char * GetName( void ) const { return "ReadBuffer"; }
+ virtual const char *GetName(void) const { return "ReadBuffer"; }
};
-class WriteBufferAction : public BufferAction
-{
- public:
- WriteBufferAction() {}
- virtual ~WriteBufferAction() {}
+class WriteBufferAction : public BufferAction {
+public:
+ WriteBufferAction() {}
+ virtual ~WriteBufferAction() {}
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
- virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+ virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent);
- virtual const char * GetName( void ) const { return "WriteBuffer"; }
+ virtual const char *GetName(void) const { return "WriteBuffer"; }
};
-class MapBufferAction : public BufferAction
-{
- public:
- MapBufferAction() : mQueue(0) {}
+class MapBufferAction : public BufferAction {
+public:
+ MapBufferAction(): mQueue(0) {}
- cl_command_queue mQueue;
- void *mMappedPtr;
+ cl_command_queue mQueue;
+ void *mMappedPtr;
- virtual ~MapBufferAction();
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
- virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+ virtual ~MapBufferAction();
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+ virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent);
- virtual const char * GetName( void ) const { return "MapBuffer"; }
+ virtual const char *GetName(void) const { return "MapBuffer"; }
};
-class UnmapBufferAction : public BufferAction
-{
- public:
- UnmapBufferAction() {}
- virtual ~UnmapBufferAction() {}
+class UnmapBufferAction : public BufferAction {
+public:
+ UnmapBufferAction() {}
+ virtual ~UnmapBufferAction() {}
- void *mMappedPtr;
+ void *mMappedPtr;
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
- virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+ virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent);
- virtual const char * GetName( void ) const { return "UnmapBuffer"; }
+ virtual const char *GetName(void) const { return "UnmapBuffer"; }
};
-class ReadImage2DAction : public Action
-{
- public:
- ReadImage2DAction() { mOutput = NULL; }
- virtual ~ReadImage2DAction() { free( mOutput ); }
+class ReadImage2DAction : public Action {
+public:
+ ReadImage2DAction() { mOutput = NULL; }
+ virtual ~ReadImage2DAction() { free(mOutput); }
- clMemWrapper mImage;
- size_t mWidth, mHeight;
- void *mOutput;
+ clMemWrapper mImage;
+ size_t mWidth, mHeight;
+ void *mOutput;
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
- virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+ virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent);
- virtual const char * GetName( void ) const { return "ReadImage2D"; }
+ virtual const char *GetName(void) const { return "ReadImage2D"; }
};
-class ReadImage3DAction : public Action
-{
- public:
- ReadImage3DAction() { mOutput = NULL; }
- virtual ~ReadImage3DAction() { free( mOutput ); }
+class ReadImage3DAction : public Action {
+public:
+ ReadImage3DAction() { mOutput = NULL; }
+ virtual ~ReadImage3DAction() { free(mOutput); }
- clMemWrapper mImage;
- size_t mWidth, mHeight, mDepth;
- void *mOutput;
+ clMemWrapper mImage;
+ size_t mWidth, mHeight, mDepth;
+ void *mOutput;
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
- virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+ virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent);
- virtual const char * GetName( void ) const { return "ReadImage3D"; }
+ virtual const char *GetName(void) const { return "ReadImage3D"; }
};
-class WriteImage2DAction : public Action
-{
- public:
- clMemWrapper mImage;
- size_t mWidth, mHeight;
- void *mOutput;
+class WriteImage2DAction : public Action {
+public:
+ clMemWrapper mImage;
+ size_t mWidth, mHeight;
+ void *mOutput;
- WriteImage2DAction() { mOutput = NULL; }
- virtual ~WriteImage2DAction() { free( mOutput ); }
+ WriteImage2DAction() { mOutput = NULL; }
+ virtual ~WriteImage2DAction() { free(mOutput); }
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
- virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+ virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent);
- virtual const char * GetName( void ) const { return "WriteImage2D"; }
+ virtual const char *GetName(void) const { return "WriteImage2D"; }
};
-class WriteImage3DAction : public Action
-{
- public:
- clMemWrapper mImage;
- size_t mWidth, mHeight, mDepth;
- void *mOutput;
+class WriteImage3DAction : public Action {
+public:
+ clMemWrapper mImage;
+ size_t mWidth, mHeight, mDepth;
+ void *mOutput;
- WriteImage3DAction() { mOutput = NULL; }
- virtual ~WriteImage3DAction() { free( mOutput ); }
+ WriteImage3DAction() { mOutput = NULL; }
+ virtual ~WriteImage3DAction() { free(mOutput); }
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
- virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+ virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent);
- virtual const char * GetName( void ) const { return "WriteImage3D"; }
+ virtual const char *GetName(void) const { return "WriteImage3D"; }
};
-class CopyImageAction : public Action
-{
- public:
- CopyImageAction() {}
- virtual ~CopyImageAction() {}
+class CopyImageAction : public Action {
+public:
+ CopyImageAction() {}
+ virtual ~CopyImageAction() {}
- clMemWrapper mSrcImage, mDstImage;
- size_t mWidth, mHeight, mDepth;
+ clMemWrapper mSrcImage, mDstImage;
+ size_t mWidth, mHeight, mDepth;
- virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+ virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent);
};
-class CopyImage2Dto2DAction : public CopyImageAction
-{
- public:
- CopyImage2Dto2DAction() {}
- virtual ~CopyImage2Dto2DAction() {}
+class CopyImage2Dto2DAction : public CopyImageAction {
+public:
+ CopyImage2Dto2DAction() {}
+ virtual ~CopyImage2Dto2DAction() {}
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
- virtual const char * GetName( void ) const { return "CopyImage2Dto2D"; }
+ virtual const char *GetName(void) const { return "CopyImage2Dto2D"; }
};
-class CopyImage2Dto3DAction : public CopyImageAction
-{
- public:
- CopyImage2Dto3DAction() {}
- virtual ~CopyImage2Dto3DAction() {}
+class CopyImage2Dto3DAction : public CopyImageAction {
+public:
+ CopyImage2Dto3DAction() {}
+ virtual ~CopyImage2Dto3DAction() {}
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
- virtual const char * GetName( void ) const { return "CopyImage2Dto3D"; }
+ virtual const char *GetName(void) const { return "CopyImage2Dto3D"; }
};
-class CopyImage3Dto2DAction : public CopyImageAction
-{
- public:
- CopyImage3Dto2DAction() {}
- virtual ~CopyImage3Dto2DAction() {}
+class CopyImage3Dto2DAction : public CopyImageAction {
+public:
+ CopyImage3Dto2DAction() {}
+ virtual ~CopyImage3Dto2DAction() {}
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
- virtual const char * GetName( void ) const { return "CopyImage3Dto2D"; }
+ virtual const char *GetName(void) const { return "CopyImage3Dto2D"; }
};
-class CopyImage3Dto3DAction : public CopyImageAction
-{
- public:
- CopyImage3Dto3DAction() {}
- virtual ~CopyImage3Dto3DAction() {}
+class CopyImage3Dto3DAction : public CopyImageAction {
+public:
+ CopyImage3Dto3DAction() {}
+ virtual ~CopyImage3Dto3DAction() {}
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
- virtual const char * GetName( void ) const { return "CopyImage3Dto3D"; }
+ virtual const char *GetName(void) const { return "CopyImage3Dto3D"; }
};
-class Copy2DImageToBufferAction : public Action
-{
- public:
- Copy2DImageToBufferAction() {}
- virtual ~Copy2DImageToBufferAction() {}
+class Copy2DImageToBufferAction : public Action {
+public:
+ Copy2DImageToBufferAction() {}
+ virtual ~Copy2DImageToBufferAction() {}
- clMemWrapper mSrcImage, mDstBuffer;
- size_t mWidth, mHeight;
+ clMemWrapper mSrcImage, mDstBuffer;
+ size_t mWidth, mHeight;
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
- virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+ virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent);
- virtual const char * GetName( void ) const { return "Copy2DImageToBuffer"; }
+ virtual const char *GetName(void) const { return "Copy2DImageToBuffer"; }
};
-class Copy3DImageToBufferAction : public Action
-{
- public:
- Copy3DImageToBufferAction() {}
- virtual ~Copy3DImageToBufferAction() {}
+class Copy3DImageToBufferAction : public Action {
+public:
+ Copy3DImageToBufferAction() {}
+ virtual ~Copy3DImageToBufferAction() {}
- clMemWrapper mSrcImage, mDstBuffer;
- size_t mWidth, mHeight, mDepth;
+ clMemWrapper mSrcImage, mDstBuffer;
+ size_t mWidth, mHeight, mDepth;
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
- virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+ virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent);
- virtual const char * GetName( void ) const { return "Copy3DImageToBuffer"; }
+ virtual const char *GetName(void) const { return "Copy3DImageToBuffer"; }
};
-class CopyBufferTo2DImageAction : public Action
-{
- public:
- CopyBufferTo2DImageAction() {}
- virtual ~CopyBufferTo2DImageAction() {}
+class CopyBufferTo2DImageAction : public Action {
+public:
+ CopyBufferTo2DImageAction() {}
+ virtual ~CopyBufferTo2DImageAction() {}
- clMemWrapper mSrcBuffer, mDstImage;
- size_t mWidth, mHeight;
+ clMemWrapper mSrcBuffer, mDstImage;
+ size_t mWidth, mHeight;
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
- virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+ virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent);
- virtual const char * GetName( void ) const { return "CopyBufferTo2D"; }
+ virtual const char *GetName(void) const { return "CopyBufferTo2D"; }
};
-class CopyBufferTo3DImageAction : public Action
-{
- public:
- CopyBufferTo3DImageAction() {}
- virtual ~CopyBufferTo3DImageAction() {}
+class CopyBufferTo3DImageAction : public Action {
+public:
+ CopyBufferTo3DImageAction() {}
+ virtual ~CopyBufferTo3DImageAction() {}
- clMemWrapper mSrcBuffer, mDstImage;
- size_t mWidth, mHeight, mDepth;
+ clMemWrapper mSrcBuffer, mDstImage;
+ size_t mWidth, mHeight, mDepth;
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
- virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+ virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent);
- virtual const char * GetName( void ) const { return "CopyBufferTo3D"; }
+ virtual const char *GetName(void) const { return "CopyBufferTo3D"; }
};
-class MapImageAction : public Action
-{
- public:
- MapImageAction() : mQueue(0) {}
+class MapImageAction : public Action {
+public:
+ MapImageAction(): mQueue(0) {}
- clMemWrapper mImage;
- size_t mWidth, mHeight;
- void *mMappedPtr;
- cl_command_queue mQueue;
+ clMemWrapper mImage;
+ size_t mWidth, mHeight;
+ void *mMappedPtr;
+ cl_command_queue mQueue;
- virtual ~MapImageAction();
- virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
- virtual cl_int Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+ virtual ~MapImageAction();
+ virtual cl_int Setup(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+ virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+ cl_event *waits, cl_event *outEvent);
- virtual const char * GetName( void ) const { return "MapImage"; }
+ virtual const char *GetName(void) const { return "MapImage"; }
};
diff --git a/test_conformance/events/main.cpp b/test_conformance/events/main.cpp
index 777d2d36..74682f99 100644
--- a/test_conformance/events/main.cpp
+++ b/test_conformance/events/main.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -24,44 +24,44 @@
#endif
test_definition test_list[] = {
- ADD_TEST( event_get_execute_status ),
- ADD_TEST( event_get_write_array_status ),
- ADD_TEST( event_get_read_array_status ),
- ADD_TEST( event_get_info ),
- ADD_TEST( event_wait_for_execute ),
- ADD_TEST( event_wait_for_array ),
- ADD_TEST( event_flush ),
- ADD_TEST( event_finish_execute ),
- ADD_TEST( event_finish_array ),
- ADD_TEST( event_release_before_done ),
- ADD_TEST( event_enqueue_marker ),
+ ADD_TEST(event_get_execute_status),
+ ADD_TEST(event_get_write_array_status),
+ ADD_TEST(event_get_read_array_status),
+ ADD_TEST(event_get_info),
+ ADD_TEST(event_wait_for_execute),
+ ADD_TEST(event_wait_for_array),
+ ADD_TEST(event_flush),
+ ADD_TEST(event_finish_execute),
+ ADD_TEST(event_finish_array),
+ ADD_TEST(event_release_before_done),
+ ADD_TEST(event_enqueue_marker),
#ifdef CL_VERSION_1_2
- ADD_TEST( event_enqueue_marker_with_event_list ),
- ADD_TEST( event_enqueue_barrier_with_event_list ),
+ ADD_TEST(event_enqueue_marker_with_event_list),
+ ADD_TEST(event_enqueue_barrier_with_event_list),
#endif
- ADD_TEST( out_of_order_event_waitlist_single_queue ),
- ADD_TEST( out_of_order_event_waitlist_multi_queue ),
- ADD_TEST( out_of_order_event_waitlist_multi_queue_multi_device ),
- ADD_TEST( out_of_order_event_enqueue_wait_for_events_single_queue ),
- ADD_TEST( out_of_order_event_enqueue_wait_for_events_multi_queue ),
- ADD_TEST( out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device ),
- ADD_TEST( out_of_order_event_enqueue_marker_single_queue ),
- ADD_TEST( out_of_order_event_enqueue_marker_multi_queue ),
- ADD_TEST( out_of_order_event_enqueue_marker_multi_queue_multi_device ),
- ADD_TEST( out_of_order_event_enqueue_barrier_single_queue ),
+ ADD_TEST(out_of_order_event_waitlist_single_queue),
+ ADD_TEST(out_of_order_event_waitlist_multi_queue),
+ ADD_TEST(out_of_order_event_waitlist_multi_queue_multi_device),
+ ADD_TEST(out_of_order_event_enqueue_wait_for_events_single_queue),
+ ADD_TEST(out_of_order_event_enqueue_wait_for_events_multi_queue),
+ ADD_TEST(
+ out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device),
+ ADD_TEST(out_of_order_event_enqueue_marker_single_queue),
+ ADD_TEST(out_of_order_event_enqueue_marker_multi_queue),
+ ADD_TEST(out_of_order_event_enqueue_marker_multi_queue_multi_device),
+ ADD_TEST(out_of_order_event_enqueue_barrier_single_queue),
- ADD_TEST( waitlists ),
- ADD_TEST( userevents ),
- ADD_TEST( callbacks ),
- ADD_TEST( callbacks_simultaneous ),
- ADD_TEST( userevents_multithreaded ),
+ ADD_TEST(waitlists),
+ ADD_TEST(userevents),
+ ADD_TEST(callbacks),
+ ADD_TEST(callbacks_simultaneous),
+ ADD_TEST(userevents_multithreaded),
};
-const int test_num = ARRAY_SIZE( test_list );
+const int test_num = ARRAY_SIZE(test_list);
int main(int argc, const char *argv[])
{
return runTestHarness(argc, argv, test_num, test_list, false, 0);
}
-
diff --git a/test_conformance/events/procs.h b/test_conformance/events/procs.h
index f077c247..97309db3 100644
--- a/test_conformance/events/procs.h
+++ b/test_conformance/events/procs.h
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -18,44 +18,101 @@
#include "harness/typeWrappers.h"
#include "harness/clImageHelper.h"
-extern float random_float(float low, float high);
-extern float calculate_ulperror(float a, float b);
-
-
-extern int test_event_get_execute_status(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_event_get_write_array_status(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_event_get_read_array_status(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_event_get_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_event_wait_for_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_event_wait_for_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_event_flush(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_event_finish_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_event_finish_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_event_release_before_done(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_event_enqueue_marker(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-#ifdef CL_VERSION_1_2
-extern int test_event_enqueue_marker_with_event_list(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_event_enqueue_barrier_with_event_list(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-#endif
+extern float random_float(float low, float high);
+extern float calculate_ulperror(float a, float b);
-extern int test_out_of_order_event_waitlist_single_queue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_out_of_order_event_waitlist_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_out_of_order_event_waitlist_multi_queue_multi_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_out_of_order_event_enqueue_wait_for_events_single_queue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_out_of_order_event_enqueue_wait_for_events_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_event_get_execute_status(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_event_get_write_array_status(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_event_get_read_array_status(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_event_get_info(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_event_wait_for_execute(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_event_wait_for_array(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_event_flush(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_event_finish_execute(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_event_finish_array(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_event_release_before_done(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_event_enqueue_marker(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+#ifdef CL_VERSION_1_2
+extern int test_event_enqueue_marker_with_event_list(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_event_enqueue_barrier_with_event_list(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+#endif
-extern int test_out_of_order_event_enqueue_barrier_single_queue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_out_of_order_event_waitlist_single_queue(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_out_of_order_event_waitlist_multi_queue(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_out_of_order_event_waitlist_multi_queue_multi_device(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements);
-extern int test_out_of_order_event_enqueue_marker_single_queue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_out_of_order_event_enqueue_marker_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_out_of_order_event_enqueue_marker_multi_queue_multi_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_out_of_order_event_enqueue_wait_for_events_single_queue(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements);
+extern int test_out_of_order_event_enqueue_wait_for_events_multi_queue(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements);
+extern int
+test_out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements);
-extern int test_waitlists( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
-extern int test_userevents( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
-extern int test_callbacks( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
-extern int test_callbacks_simultaneous( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
-extern int test_userevents_multithreaded( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int test_out_of_order_event_enqueue_barrier_single_queue(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements);
+extern int test_out_of_order_event_enqueue_marker_single_queue(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements);
+extern int test_out_of_order_event_enqueue_marker_multi_queue(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements);
+extern int test_out_of_order_event_enqueue_marker_multi_queue_multi_device(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements);
+extern int test_waitlists(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_userevents(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_callbacks(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_callbacks_simultaneous(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_userevents_multithreaded(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
diff --git a/test_conformance/events/testBase.h b/test_conformance/events/testBase.h
index 5b49bfd7..63086d7e 100644
--- a/test_conformance/events/testBase.h
+++ b/test_conformance/events/testBase.h
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -26,6 +26,3 @@
#include "procs.h"
#endif // _testBase_h
-
-
-
diff --git a/test_conformance/events/test_callbacks.cpp b/test_conformance/events/test_callbacks.cpp
index 2ffb9ca7..04481dec 100644
--- a/test_conformance/events/test_callbacks.cpp
+++ b/test_conformance/events/test_callbacks.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -18,28 +18,34 @@
#include "harness/conversions.h"
#include "harness/ThreadPool.h"
-#if !defined (_MSC_VER)
+#if !defined(_MSC_VER)
#include <unistd.h>
#endif // !_MSC_VER
-extern const char *IGetStatusString( cl_int status );
+extern const char *IGetStatusString(cl_int status);
#define PRINT_OPS 0
-// Yes, this is somewhat nasty, in that we're relying on the CPU (the real CPU, not the OpenCL device)
-// to be atomic w.r.t. boolean values. Although if it isn't, we'll just miss the check on this bool
-// until the next time around, so it's not that big of a deal. Ideally, we'd be using a semaphore with
-// a trywait on it, but then that introduces the fun issue of what to do on Win32, etc. This way is
-// far more portable, and worst case of failure is a slightly longer test run.
+// Yes, this is somewhat nasty, in that we're relying on the CPU (the real CPU,
+// not the OpenCL device) to be atomic w.r.t. boolean values. Although if it
+// isn't, we'll just miss the check on this bool until the next time around, so
+// it's not that big of a deal. Ideally, we'd be using a semaphore with a
+// trywait on it, but then that introduces the fun issue of what to do on Win32,
+// etc. This way is far more portable, and worst case of failure is a slightly
+// longer test run.
static bool sCallbackTriggered = false;
#define EVENT_CALLBACK_TYPE_TOTAL 3
-static bool sCallbackTriggered_flag[ EVENT_CALLBACK_TYPE_TOTAL ] ={ false,false, false };
-cl_int event_callback_types[EVENT_CALLBACK_TYPE_TOTAL] ={ CL_SUBMITTED, CL_RUNNING, CL_COMPLETE};
+static bool sCallbackTriggered_flag[EVENT_CALLBACK_TYPE_TOTAL] = { false, false,
+ false };
+cl_int event_callback_types[EVENT_CALLBACK_TYPE_TOTAL] = { CL_SUBMITTED,
+ CL_RUNNING,
+ CL_COMPLETE };
// Our callback function
-/*void CL_CALLBACK single_event_callback_function( cl_event event, cl_int commandStatus, void * userData )
+/*void CL_CALLBACK single_event_callback_function( cl_event event, cl_int
+commandStatus, void * userData )
{
int i=*static_cast<int *>(userData);
log_info( "\tEvent callback %d triggered\n", i);
@@ -47,295 +53,322 @@ cl_int event_callback_types[EVENT_CALLBACK_TYPE_TOTAL] ={ CL_SUBMITTED, CL_RUNNI
}*/
/* use struct as call back para */
-typedef struct { cl_int enevt_type; int index; } CALL_BACK_USER_DATA;
+typedef struct
+{
+ cl_int event_type;
+ int index;
+} CALL_BACK_USER_DATA;
-void CL_CALLBACK single_event_callback_function_flags( cl_event event, cl_int commandStatus, void * userData )
+void CL_CALLBACK single_event_callback_function_flags(cl_event event,
+ cl_int commandStatus,
+ void *userData)
{
- // int i=*static_cast<int *>(userData);
- CALL_BACK_USER_DATA *pdata= static_cast<CALL_BACK_USER_DATA *>(userData);
+ // int i=*static_cast<int *>(userData);
+ CALL_BACK_USER_DATA *pdata = static_cast<CALL_BACK_USER_DATA *>(userData);
- log_info( "\tEvent callback %d of type %d triggered\n", pdata->index, pdata->enevt_type);
- sCallbackTriggered_flag [pdata->index ] = true;
+ log_info("\tEvent callback %d of type %d triggered\n", pdata->index,
+ pdata->event_type);
+ sCallbackTriggered_flag[pdata->index] = true;
}
-int test_callback_event_single( cl_device_id device, cl_context context, cl_command_queue queue, Action *actionToTest )
+int test_callback_event_single(cl_device_id device, cl_context context,
+ cl_command_queue queue, Action *actionToTest)
{
- // Note: we don't use the waiting feature here. We just want to verify that we get a callback called
- // when the given event finishes
+ // Note: we don't use the waiting feature here. We just want to verify that
+ // we get a callback called when the given event finishes
- cl_int error = actionToTest->Setup( device, context, queue );
- test_error( error, "Unable to set up test action" );
+ cl_int error = actionToTest->Setup(device, context, queue);
+ test_error(error, "Unable to set up test action");
// Set up a user event, which we use as a gate for the second event
- clEventWrapper gateEvent = clCreateUserEvent( context, &error );
- test_error( error, "Unable to set up user gate event" );
+ clEventWrapper gateEvent = clCreateUserEvent(context, &error);
+ test_error(error, "Unable to set up user gate event");
// Set up the execution of the action with its actual event
clEventWrapper actualEvent;
- error = actionToTest->Execute( queue, 1, &gateEvent, &actualEvent );
- test_error( error, "Unable to set up action execution" );
+ error = actionToTest->Execute(queue, 1, &gateEvent, &actualEvent);
+ test_error(error, "Unable to set up action execution");
// Set up the callback on the actual event
- /* use struct as call back para */
- CALL_BACK_USER_DATA user_data[EVENT_CALLBACK_TYPE_TOTAL];
- int index [EVENT_CALLBACK_TYPE_TOTAL]={ 0,1,2};
- for( int i=0;i< EVENT_CALLBACK_TYPE_TOTAL; i++)
- {
- user_data[i].enevt_type=event_callback_types[i];
- user_data[i].index =i;
- error = clSetEventCallback( actualEvent, event_callback_types[i], single_event_callback_function_flags, user_data+i );
-
- }
+ /* use struct as call back para */
+ CALL_BACK_USER_DATA user_data[EVENT_CALLBACK_TYPE_TOTAL];
+ for (int i = 0; i < EVENT_CALLBACK_TYPE_TOTAL; i++)
+ {
+ user_data[i].event_type = event_callback_types[i];
+ user_data[i].index = i;
+ error = clSetEventCallback(actualEvent, event_callback_types[i],
+ single_event_callback_function_flags,
+ user_data + i);
+ }
// Now release the user event, which will allow our actual action to run
- error = clSetUserEventStatus( gateEvent, CL_COMPLETE );
- test_error( error, "Unable to trigger gate event" );
+ error = clSetUserEventStatus(gateEvent, CL_COMPLETE);
+ test_error(error, "Unable to trigger gate event");
- // Now we wait for completion. Note that we can actually wait on the event itself, at least at first
- error = clWaitForEvents( 1, &actualEvent );
- test_error( error, "Unable to wait for actual test event" );
+ // Now we wait for completion. Note that we can actually wait on the event
+ // itself, at least at first
+ error = clWaitForEvents(1, &actualEvent);
+ test_error(error, "Unable to wait for actual test event");
- // Note: we can check our callback now, and it MIGHT have been triggered, but that's not guaranteed
- if( sCallbackTriggered )
+ // Note: we can check our callback now, and it MIGHT have been triggered,
+ // but that's not guaranteed
+ if (sCallbackTriggered)
{
// We're all good, so return success
return 0;
}
- // The callback has not yet been called, but that doesn't mean it won't be. So wait for it
- log_info( "\tWaiting for callback..." );
- fflush( stdout );
- for( int i = 0; i < 10 * 10; i++ )
+ // The callback has not yet been called, but that doesn't mean it won't be.
+ // So wait for it
+ log_info("\tWaiting for callback...");
+ fflush(stdout);
+ for (int i = 0; i < 10 * 10; i++)
{
- usleep( 100000 ); // 1/10th second
+ usleep(100000); // 1/10th second
- int cc=0;
- for( int k=0;k< EVENT_CALLBACK_TYPE_TOTAL;k++)
- if (sCallbackTriggered_flag[k]) {
- cc++;
- }
+ int cc = 0;
+ for (int k = 0; k < EVENT_CALLBACK_TYPE_TOTAL; k++)
+ if (sCallbackTriggered_flag[k])
+ {
+ cc++;
+ }
- if (cc== EVENT_CALLBACK_TYPE_TOTAL )
+ if (cc == EVENT_CALLBACK_TYPE_TOTAL)
{
- log_info( "\n" );
+ log_info("\n");
return 0;
}
- log_info( "." );
- fflush( stdout );
+ log_info(".");
+ fflush(stdout);
}
// If we got here, we never got the callback
- log_error( "\nCallback not called within 10 seconds! (assuming failure)\n" );
+ log_error("\nCallback not called within 10 seconds! (assuming failure)\n");
return -1;
}
-#define TEST_ACTION( name ) \
-{ \
- name##Action action; \
- log_info( "-- Testing " #name "...\n" ); \
- if( ( error = test_callback_event_single( deviceID, context, queue, &action ) ) != CL_SUCCESS ) \
- retVal++; \
- clFinish( queue ); \
-}
+#define TEST_ACTION(name) \
+ { \
+ name##Action action; \
+ log_info("-- Testing " #name "...\n"); \
+ if ((error = test_callback_event_single(deviceID, context, queue, \
+ &action)) \
+ != CL_SUCCESS) \
+ retVal++; \
+ clFinish(queue); \
+ }
-int test_callbacks( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+int test_callbacks(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_int error;
int retVal = 0;
- log_info( "\n" );
+ log_info("\n");
- TEST_ACTION( NDRangeKernel )
+ TEST_ACTION(NDRangeKernel)
- TEST_ACTION( ReadBuffer )
- TEST_ACTION( WriteBuffer )
- TEST_ACTION( MapBuffer )
- TEST_ACTION( UnmapBuffer )
+ TEST_ACTION(ReadBuffer)
+ TEST_ACTION(WriteBuffer)
+ TEST_ACTION(MapBuffer)
+ TEST_ACTION(UnmapBuffer)
- if( checkForImageSupport( deviceID ) == CL_IMAGE_FORMAT_NOT_SUPPORTED )
+ if (checkForImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
{
- log_info( "\nNote: device does not support images. Skipping remainder of callback tests...\n" );
+ log_info("\nNote: device does not support images. Skipping remainder "
+ "of callback tests...\n");
}
else
{
- TEST_ACTION( ReadImage2D )
- TEST_ACTION( WriteImage2D )
- TEST_ACTION( CopyImage2Dto2D )
- TEST_ACTION( Copy2DImageToBuffer )
- TEST_ACTION( CopyBufferTo2DImage )
- TEST_ACTION( MapImage )
-
- if( checkFor3DImageSupport( deviceID ) == CL_IMAGE_FORMAT_NOT_SUPPORTED )
- log_info( "\nNote: device does not support 3D images. Skipping remainder of waitlist tests...\n" );
+ TEST_ACTION(ReadImage2D)
+ TEST_ACTION(WriteImage2D)
+ TEST_ACTION(CopyImage2Dto2D)
+ TEST_ACTION(Copy2DImageToBuffer)
+ TEST_ACTION(CopyBufferTo2DImage)
+ TEST_ACTION(MapImage)
+
+ if (checkFor3DImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+ log_info("\nNote: device does not support 3D images. Skipping "
+ "remainder of waitlist tests...\n");
else
{
- TEST_ACTION( ReadImage3D )
- TEST_ACTION( WriteImage3D )
- TEST_ACTION( CopyImage2Dto3D )
- TEST_ACTION( CopyImage3Dto2D )
- TEST_ACTION( CopyImage3Dto3D )
- TEST_ACTION( Copy3DImageToBuffer )
- TEST_ACTION( CopyBufferTo3DImage )
+ TEST_ACTION(ReadImage3D)
+ TEST_ACTION(WriteImage3D)
+ TEST_ACTION(CopyImage2Dto3D)
+ TEST_ACTION(CopyImage3Dto2D)
+ TEST_ACTION(CopyImage3Dto3D)
+ TEST_ACTION(Copy3DImageToBuffer)
+ TEST_ACTION(CopyBufferTo3DImage)
}
}
return retVal;
}
-#define SIMUTANEOUS_ACTION_TOTAL 18
-static bool sSimultaneousFlags[ 54 ];// for 18 actions with 3 callback status
+#define SIMUTANEOUS_ACTION_TOTAL 18
+static bool sSimultaneousFlags[54]; // for 18 actions with 3 callback status
static volatile int sSimultaneousCount;
-Action * actions[ 19 ] = { 0 };
+Action *actions[19] = { 0 };
// Callback for the simultaneous tests
-void CL_CALLBACK simultaneous_event_callback_function( cl_event event, cl_int commandStatus, void * userData )
+void CL_CALLBACK simultaneous_event_callback_function(cl_event event,
+ cl_int commandStatus,
+ void *userData)
{
int eventIndex = (int)(size_t)userData;
- int actionIndex = eventIndex/EVENT_CALLBACK_TYPE_TOTAL;
- int statusIndex = eventIndex%EVENT_CALLBACK_TYPE_TOTAL;
- log_info( "\tEvent callback triggered for action %s callback type %s \n", actions[actionIndex]->GetName(), IGetStatusString(statusIndex) );
- sSimultaneousFlags[ actionIndex ] = true;
- ThreadPool_AtomicAdd(&sSimultaneousCount,1);
+ int actionIndex = eventIndex / EVENT_CALLBACK_TYPE_TOTAL;
+ int statusIndex = eventIndex % EVENT_CALLBACK_TYPE_TOTAL;
+ log_info("\tEvent callback triggered for action %s callback type %s \n",
+ actions[actionIndex]->GetName(), IGetStatusString(statusIndex));
+ sSimultaneousFlags[actionIndex] = true;
+ ThreadPool_AtomicAdd(&sSimultaneousCount, 1);
}
-int test_callbacks_simultaneous( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+int test_callbacks_simultaneous(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_int error;
- // Unlike the singles test, in this one, we run a bunch of events all at once, to verify that
- // the callbacks do get called once-and-only-once for each event, even if the run out of order or
- // are dependent on each other
+ // Unlike the singles test, in this one, we run a bunch of events all at
+ // once, to verify that the callbacks do get called once-and-only-once for
+ // each event, even if the run out of order or are dependent on each other
// First, the list of actions to run
int actionCount = 0, index = 0;
- actions[ index++ ] = new NDRangeKernelAction();
- actions[ index++ ] = new ReadBufferAction();
- actions[ index++ ] = new WriteBufferAction();
- actions[ index++ ] = new MapBufferAction();
- actions[ index++ ] = new UnmapBufferAction();
+ actions[index++] = new NDRangeKernelAction();
+ actions[index++] = new ReadBufferAction();
+ actions[index++] = new WriteBufferAction();
+ actions[index++] = new MapBufferAction();
+ actions[index++] = new UnmapBufferAction();
- if( checkForImageSupport( deviceID ) != CL_IMAGE_FORMAT_NOT_SUPPORTED )
+ if (checkForImageSupport(deviceID) != CL_IMAGE_FORMAT_NOT_SUPPORTED)
{
- actions[ index++ ] = new ReadImage2DAction();
- actions[ index++ ] = new WriteImage2DAction();
- actions[ index++ ] = new CopyImage2Dto2DAction();
- actions[ index++ ] = new Copy2DImageToBufferAction();
- actions[ index++ ] = new CopyBufferTo2DImageAction();
- actions[ index++ ] = new MapImageAction();
-
- if( checkFor3DImageSupport( deviceID ) != CL_IMAGE_FORMAT_NOT_SUPPORTED )
+ actions[index++] = new ReadImage2DAction();
+ actions[index++] = new WriteImage2DAction();
+ actions[index++] = new CopyImage2Dto2DAction();
+ actions[index++] = new Copy2DImageToBufferAction();
+ actions[index++] = new CopyBufferTo2DImageAction();
+ actions[index++] = new MapImageAction();
+
+ if (checkFor3DImageSupport(deviceID) != CL_IMAGE_FORMAT_NOT_SUPPORTED)
{
- actions[ index++ ] = new ReadImage3DAction();
- actions[ index++ ] = new WriteImage3DAction();
- actions[ index++ ] = new CopyImage2Dto3DAction();
- actions[ index++ ] = new CopyImage3Dto2DAction();
- actions[ index++ ] = new CopyImage3Dto3DAction();
- actions[ index++ ] = new Copy3DImageToBufferAction();
- actions[ index++ ] = new CopyBufferTo3DImageAction();
+ actions[index++] = new ReadImage3DAction();
+ actions[index++] = new WriteImage3DAction();
+ actions[index++] = new CopyImage2Dto3DAction();
+ actions[index++] = new CopyImage3Dto2DAction();
+ actions[index++] = new CopyImage3Dto3DAction();
+ actions[index++] = new Copy3DImageToBufferAction();
+ actions[index++] = new CopyBufferTo3DImageAction();
}
}
actionCount = index;
- actions[ index++ ] = NULL;
+ actions[index++] = NULL;
// Now set them all up
- log_info( "\tSetting up test events...\n" );
- for( index = 0; actions[ index ] != NULL; index++ )
+ log_info("\tSetting up test events...\n");
+ for (index = 0; actions[index] != NULL; index++)
{
- error = actions[ index ]->Setup( deviceID, context, queue );
- test_error( error, "Unable to set up test action" );
- sSimultaneousFlags[ index ] = false;
+ error = actions[index]->Setup(deviceID, context, queue);
+ test_error(error, "Unable to set up test action");
+ sSimultaneousFlags[index] = false;
}
sSimultaneousCount = 0;
// Set up the user event to start them all
- clEventWrapper gateEvent = clCreateUserEvent( context, &error );
- test_error( error, "Unable to set up user gate event" );
+ clEventWrapper gateEvent = clCreateUserEvent(context, &error);
+ test_error(error, "Unable to set up user gate event");
// Start executing, all tied to the gate event
- //clEventWrapper actionEvents[ 18 ];// current actionCount is 18
- clEventWrapper *actionEvents= new clEventWrapper[actionCount];
+ // clEventWrapper actionEvents[ 18 ];// current actionCount is 18
+ clEventWrapper *actionEvents = new clEventWrapper[actionCount];
if (actionEvents == NULL)
{
log_error(" memory error in test_callbacks_simultaneous \n");
- for (size_t i=0;i<(sizeof(actions)/sizeof(actions[0]));++i)
- if (actions[i]) delete actions[i];
- return -1;
+ for (size_t i = 0; i < (sizeof(actions) / sizeof(actions[0])); ++i)
+ if (actions[i]) delete actions[i];
+ return -1;
}
- RandomSeed seed( gRandomSeed );
- for( index = 0; actions[ index ] != NULL; index++ )
+ RandomSeed seed(gRandomSeed);
+ for (index = 0; actions[index] != NULL; index++)
{
// Randomly choose to wait on the gate, or wait on the previous event
- cl_event * eventPtr = &gateEvent;
- if( ( index > 0 ) && ( random_in_range( 0, 255, seed ) & 1 ) )
- eventPtr = &actionEvents[ index - 1 ];
-
- error = actions[ index ]->Execute( queue, 1, eventPtr, &actionEvents[ index ] );
- test_error( error, "Unable to execute test action" );
+ cl_event *eventPtr = &gateEvent;
+ if ((index > 0) && (random_in_range(0, 255, seed) & 1))
+ eventPtr = &actionEvents[index - 1];
+ error =
+ actions[index]->Execute(queue, 1, eventPtr, &actionEvents[index]);
+ test_error(error, "Unable to execute test action");
- for( int k=0; k< EVENT_CALLBACK_TYPE_TOTAL; k++)
- {
- error = clSetEventCallback( actionEvents[index], event_callback_types[k], simultaneous_event_callback_function, (void *)(size_t)(index*EVENT_CALLBACK_TYPE_TOTAL+k ) );
- test_error( error, "Unable to set event callback function" );
- }
+ for (int k = 0; k < EVENT_CALLBACK_TYPE_TOTAL; k++)
+ {
+ error = clSetEventCallback(
+ actionEvents[index], event_callback_types[k],
+ simultaneous_event_callback_function,
+ (void *)(size_t)(index * EVENT_CALLBACK_TYPE_TOTAL + k));
+ test_error(error, "Unable to set event callback function");
+ }
}
- int total_callbacks= actionCount * EVENT_CALLBACK_TYPE_TOTAL;
+ int total_callbacks = actionCount * EVENT_CALLBACK_TYPE_TOTAL;
// Now release the user event, which will allow our actual action to run
- error = clSetUserEventStatus( gateEvent, CL_COMPLETE );
- test_error( error, "Unable to trigger gate event" );
+ error = clSetUserEventStatus(gateEvent, CL_COMPLETE);
+ test_error(error, "Unable to trigger gate event");
// Wait on the actual action events now
- log_info( "\tWaiting for test completions...\n" );
- error = clWaitForEvents( actionCount, &actionEvents[ 0 ] );
- test_error( error, "Unable to wait for actual test events" );
-
- // Note: we can check our callback now, and it MIGHT have been triggered, but that's not guaranteed
- int last_count = 0;
- if( ((last_count = sSimultaneousCount)) == total_callbacks)
+ log_info("\tWaiting for test completions...\n");
+ error = clWaitForEvents(actionCount, &actionEvents[0]);
+ test_error(error, "Unable to wait for actual test events");
+
+ // Note: we can check our callback now, and it MIGHT have been triggered,
+ // but that's not guaranteed
+ int last_count = 0;
+ if (((last_count = sSimultaneousCount)) == total_callbacks)
{
// We're all good, so return success
- log_info( "\t%d of %d callbacks received\n", sSimultaneousCount, total_callbacks );
+ log_info("\t%d of %d callbacks received\n", sSimultaneousCount,
+ total_callbacks);
- if (actionEvents) delete [] actionEvents;
- for (size_t i=0;i<(sizeof(actions)/sizeof(actions[0]));++i)
- if (actions[i]) delete actions[i];
+ if (actionEvents) delete[] actionEvents;
+ for (size_t i = 0; i < (sizeof(actions) / sizeof(actions[0])); ++i)
+ if (actions[i]) delete actions[i];
return 0;
}
// We haven't gotten (all) of the callbacks, so wait for them
- log_info( "\tWe've only received %d of the %d callbacks we expected; waiting for more...\n", last_count, total_callbacks );
+ log_info("\tWe've only received %d of the %d callbacks we expected; "
+ "waiting for more...\n",
+ last_count, total_callbacks);
- for( int i = 0; i < 10 * 10; i++ )
+ for (int i = 0; i < 10 * 10; i++)
{
- usleep( 100000 ); // 1/10th second
- if( ((last_count = sSimultaneousCount)) == total_callbacks )
+ usleep(100000); // 1/10th second
+ if (((last_count = sSimultaneousCount)) == total_callbacks)
{
- // All of the callbacks were executed
- if (actionEvents) delete [] actionEvents;
- for (size_t i=0;i<(sizeof(actions)/sizeof(actions[0]));++i)
- if (actions[i]) delete actions[i];
- return 0;
+ // All of the callbacks were executed
+ if (actionEvents) delete[] actionEvents;
+ for (size_t i = 0; i < (sizeof(actions) / sizeof(actions[0])); ++i)
+ if (actions[i]) delete actions[i];
+ return 0;
}
}
// If we got here, some of the callbacks did not occur in time
- log_error( "\nError: We only ever received %d of our %d callbacks!\n", last_count, total_callbacks );
- log_error( "Events that did not receive callbacks:\n" );
- for( index = 0; actions[ index ] != NULL; index++ )
+ log_error("\nError: We only ever received %d of our %d callbacks!\n",
+ last_count, total_callbacks);
+ log_error("Events that did not receive callbacks:\n");
+ for (index = 0; actions[index] != NULL; index++)
{
- if( !sSimultaneousFlags[ index ] )
- log_error( "\t%s\n", actions[ index ]->GetName() );
+ if (!sSimultaneousFlags[index])
+ log_error("\t%s\n", actions[index]->GetName());
}
- if (actionEvents) delete [] actionEvents;
+ if (actionEvents) delete[] actionEvents;
return -1;
-
}
-
diff --git a/test_conformance/events/test_event_dependencies.cpp b/test_conformance/events/test_event_dependencies.cpp
index 41136548..45b260a6 100644
--- a/test_conformance/events/test_event_dependencies.cpp
+++ b/test_conformance/events/test_event_dependencies.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -39,61 +39,79 @@ const char *write_kernels[] = {
/*
Tests event dependencies by running two kernels that use the same buffer.
If two_queues is set they are run in separate queues.
- If test_enqueue_wait_for_events is set then clEnqueueWaitForEvent is called between them.
- If test_barrier is set then clEnqueueBarrier is called between them (only for single queue).
- If neither are set, nothing is done to prevent them from executing in the wrong order. This can be used for verification.
+ If test_enqueue_wait_for_events is set then clEnqueueWaitForEvent is called
+ between them. If test_barrier is set then clEnqueueBarrier is called between
+ them (only for single queue). If neither are set, nothing is done to prevent
+ them from executing in the wrong order. This can be used for verification.
*/
-int test_event_enqueue_wait_for_events_run_test( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int two_queues, int two_devices,
- int test_enqueue_wait_for_events, int test_barrier, int use_waitlist, int use_marker)
+int test_event_enqueue_wait_for_events_run_test(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements, int two_queues, int two_devices,
+ int test_enqueue_wait_for_events, int test_barrier, int use_waitlist,
+ int use_marker)
{
cl_int error = CL_SUCCESS;
- size_t threads[3] = {TEST_SIZE,0,0};
+ size_t threads[3] = { TEST_SIZE, 0, 0 };
int i, loop_count, event_count, expected_value, failed;
int expected_if_only_queue[2];
int max_count = TEST_SIZE;
cl_platform_id platform;
- cl_command_queue queues[2]; // Not a wrapper so we don't autorelease if they are the same
- clCommandQueueWrapper queueWrappers[2]; // If they are different, we use the wrapper so it will auto release
+ cl_command_queue
+ queues[2]; // Not a wrapper so we don't autorelease if they are the same
+ clCommandQueueWrapper queueWrappers[2]; // If they are different, we use the
+ // wrapper so it will auto release
clContextWrapper context_to_use;
clMemWrapper data;
clProgramWrapper program;
clKernelWrapper kernel1[TEST_COUNT], kernel2[TEST_COUNT];
- clEventWrapper event[TEST_COUNT*4+2]; // If we usemarkers we get 2 more events per iteration
+ clEventWrapper event[TEST_COUNT * 4 + 2]; // If we usemarkers we get 2 more
+ // events per iteration
if (test_enqueue_wait_for_events)
- log_info("\tTesting with clEnqueueBarrierWithWaitList as barrier function.\n");
+ log_info("\tTesting with clEnqueueBarrierWithWaitList as barrier "
+ "function.\n");
if (test_barrier)
- log_info("\tTesting with clEnqueueBarrierWithWaitList as barrier function.\n");
+ log_info("\tTesting with clEnqueueBarrierWithWaitList as barrier "
+ "function.\n");
if (use_waitlist)
- log_info("\tTesting with waitlist-based depenednecies between kernels.\n");
+ log_info(
+ "\tTesting with waitlist-based depenednecies between kernels.\n");
if (use_marker)
log_info("\tTesting with clEnqueueMarker as a barrier function.\n");
- if (test_barrier && (two_queues || two_devices)) {
- log_error("\tTest requested with clEnqueueBarrier across two queues. This is not a valid combination.\n");
+ if (test_barrier && (two_queues || two_devices))
+ {
+ log_error("\tTest requested with clEnqueueBarrier across two queues. "
+ "This is not a valid combination.\n");
return -1;
}
error = clGetPlatformIDs(1, &platform, NULL);
test_error(error, "clGetPlatformIDs failed.");
- // If we are to use two devices, then get them and create a context with both.
+ // If we are to use two devices, then get them and create a context with
+ // both.
cl_device_id *two_device_ids;
- if (two_devices) {
- two_device_ids = (cl_device_id*)malloc(sizeof(cl_device_id)*2);
+ if (two_devices)
+ {
+ two_device_ids = (cl_device_id *)malloc(sizeof(cl_device_id) * 2);
cl_uint number_returned;
- error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, two_device_ids, &number_returned);
- test_error( error, "clGetDeviceIDs for CL_DEVICE_TYPE_ALL failed.");
- if (number_returned != 2) {
+ error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, two_device_ids,
+ &number_returned);
+ test_error(error, "clGetDeviceIDs for CL_DEVICE_TYPE_ALL failed.");
+ if (number_returned != 2)
+ {
log_info("Failed to obtain two devices. Test can not run.\n");
free(two_device_ids);
return 0;
}
- for (i=0; i<2; i++) {
+ for (i = 0; i < 2; i++)
+ {
cl_device_type type;
- error = clGetDeviceInfo(two_device_ids[i], CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL);
- test_error( error, "clGetDeviceInfo failed.");
+ error = clGetDeviceInfo(two_device_ids[i], CL_DEVICE_TYPE,
+ sizeof(cl_device_type), &type, NULL);
+ test_error(error, "clGetDeviceInfo failed.");
if (type & CL_DEVICE_TYPE_CPU)
log_info("\tDevice %d is CL_DEVICE_TYPE_CPU.\n", i);
if (type & CL_DEVICE_TYPE_GPU)
@@ -104,12 +122,16 @@ int test_event_enqueue_wait_for_events_run_test( cl_device_id deviceID, cl_conte
log_info("\tDevice %d is CL_DEVICE_TYPE_DEFAULT.\n", i);
}
- context_to_use = clCreateContext(NULL, 2, two_device_ids, notify_callback, NULL, &error);
+ context_to_use = clCreateContext(NULL, 2, two_device_ids,
+ notify_callback, NULL, &error);
test_error(error, "clCreateContext failed for two devices.");
log_info("\tTesting with two devices.\n");
- } else {
- context_to_use = clCreateContext(NULL, 1, &deviceID, NULL, NULL, &error);
+ }
+ else
+ {
+ context_to_use =
+ clCreateContext(NULL, 1, &deviceID, NULL, NULL, &error);
test_error(error, "clCreateContext failed for one device.");
log_info("\tTesting with one device.\n");
@@ -117,41 +139,55 @@ int test_event_enqueue_wait_for_events_run_test( cl_device_id deviceID, cl_conte
// If we are using two queues then create them
cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
- if (two_queues) {
+ if (two_queues)
+ {
// Get a second queue
if (two_devices)
{
- if( !checkDeviceForQueueSupport( two_device_ids[ 0 ], props ) ||
- !checkDeviceForQueueSupport( two_device_ids[ 1 ], props ) )
+ if (!checkDeviceForQueueSupport(two_device_ids[0], props)
+ || !checkDeviceForQueueSupport(two_device_ids[1], props))
{
- log_info( "WARNING: One or more device for multi-device test does not support out-of-order exec mode; skipping test.\n" );
+ log_info(
+ "WARNING: One or more device for multi-device test does "
+ "not support out-of-order exec mode; skipping test.\n");
return -1942;
}
- queueWrappers[0] = clCreateCommandQueue(context_to_use, two_device_ids[0], props, &error);
- test_error(error, "clCreateCommandQueue for first queue on first device failed.");
- queueWrappers[1] = clCreateCommandQueue(context_to_use, two_device_ids[1], props, &error);
- test_error(error, "clCreateCommandQueue for second queue on second device failed.");
-
+ queueWrappers[0] = clCreateCommandQueue(
+ context_to_use, two_device_ids[0], props, &error);
+ test_error(
+ error,
+ "clCreateCommandQueue for first queue on first device failed.");
+ queueWrappers[1] = clCreateCommandQueue(
+ context_to_use, two_device_ids[1], props, &error);
+ test_error(error,
+ "clCreateCommandQueue for second queue on second device "
+ "failed.");
}
else
{
- // Single device has already been checked for out-of-order exec support
- queueWrappers[0] = clCreateCommandQueue(context_to_use, deviceID, props, &error);
+ // Single device has already been checked for out-of-order exec
+ // support
+ queueWrappers[0] =
+ clCreateCommandQueue(context_to_use, deviceID, props, &error);
test_error(error, "clCreateCommandQueue for first queue failed.");
- queueWrappers[1] = clCreateCommandQueue(context_to_use, deviceID, props, &error);
+ queueWrappers[1] =
+ clCreateCommandQueue(context_to_use, deviceID, props, &error);
test_error(error, "clCreateCommandQueue for second queue failed.");
}
- // Ugly hack to make sure we only have the wrapper auto-release if they are different queues
+ // Ugly hack to make sure we only have the wrapper auto-release if they
+ // are different queues
queues[0] = queueWrappers[0];
queues[1] = queueWrappers[1];
log_info("\tTesting with two queues.\n");
}
else
{
- // (Note: single device has already been checked for out-of-order exec support)
- // Otherwise create one queue and have the second one be the same
- queueWrappers[0] = clCreateCommandQueue(context_to_use, deviceID, props, &error);
+ // (Note: single device has already been checked for out-of-order exec
+ // support) Otherwise create one queue and have the second one be the
+ // same
+ queueWrappers[0] =
+ clCreateCommandQueue(context_to_use, deviceID, props, &error);
test_error(error, "clCreateCommandQueue for first queue failed.");
queues[0] = queueWrappers[0];
queues[1] = (cl_command_queue)queues[0];
@@ -160,236 +196,346 @@ int test_event_enqueue_wait_for_events_run_test( cl_device_id deviceID, cl_conte
// Setup - create a buffer and the two kernels
- data = clCreateBuffer(context_to_use, CL_MEM_READ_WRITE, TEST_SIZE*sizeof(cl_int), NULL, &error);
- test_error( error, "clCreateBuffer failed");
+ data = clCreateBuffer(context_to_use, CL_MEM_READ_WRITE,
+ TEST_SIZE * sizeof(cl_int), NULL, &error);
+ test_error(error, "clCreateBuffer failed");
// Initialize the values to zero
- cl_int *values = (cl_int*)malloc(TEST_SIZE*sizeof(cl_int));
- for (i=0; i<(int)TEST_SIZE; i++)
- values[i] = 0;
- error = clEnqueueWriteBuffer(queues[0], data, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), values, 0, NULL, NULL);
- test_error( error, "clEnqueueWriteBuffer failed");
+ cl_int *values = (cl_int *)malloc(TEST_SIZE * sizeof(cl_int));
+ for (i = 0; i < (int)TEST_SIZE; i++) values[i] = 0;
+ error =
+ clEnqueueWriteBuffer(queues[0], data, CL_TRUE, 0,
+ TEST_SIZE * sizeof(cl_int), values, 0, NULL, NULL);
+ test_error(error, "clEnqueueWriteBuffer failed");
expected_value = 0;
// Build the kernels
- if (create_single_kernel_helper( context_to_use, &program, &kernel1[0], 1, write_kernels, "write_up" ))
+ if (create_single_kernel_helper(context_to_use, &program, &kernel1[0], 1,
+ write_kernels, "write_up"))
return -1;
error = clSetKernelArg(kernel1[0], 0, sizeof(data), &data);
error |= clSetKernelArg(kernel1[0], 1, sizeof(max_count), &max_count);
- test_error( error, "clSetKernelArg 1 failed");
+ test_error(error, "clSetKernelArg 1 failed");
- for (i=1; i<TEST_COUNT; i++) {
+ for (i = 1; i < TEST_COUNT; i++)
+ {
kernel1[i] = clCreateKernel(program, "write_up", &error);
- test_error( error, "clCreateKernel 1 failed");
+ test_error(error, "clCreateKernel 1 failed");
error = clSetKernelArg(kernel1[i], 0, sizeof(data), &data);
error |= clSetKernelArg(kernel1[i], 1, sizeof(max_count), &max_count);
- test_error( error, "clSetKernelArg 1 failed");
+ test_error(error, "clSetKernelArg 1 failed");
}
- for (i=0; i<TEST_COUNT; i++) {
+ for (i = 0; i < TEST_COUNT; i++)
+ {
kernel2[i] = clCreateKernel(program, "write_down", &error);
- test_error( error, "clCreateKernel 2 failed");
+ test_error(error, "clCreateKernel 2 failed");
error = clSetKernelArg(kernel2[i], 0, sizeof(data), &data);
error |= clSetKernelArg(kernel2[i], 1, sizeof(max_count), &max_count);
- test_error( error, "clSetKernelArg 2 failed");
+ test_error(error, "clSetKernelArg 2 failed");
}
- // Execution - run the first kernel, then enqueue the wait on the events, then the second kernel
- // If clEnqueueBarrierWithWaitList works, the buffer will be filled with 1s, then multiplied by 4s,
- // then incremented to 5s, repeatedly. Otherwise the values may be 2s (if the first one doesn't work) or 8s
- // (if the second one doesn't work).
+ // Execution - run the first kernel, then enqueue the wait on the events,
+ // then the second kernel If clEnqueueBarrierWithWaitList works, the buffer
+ // will be filled with 1s, then multiplied by 4s, then incremented to 5s,
+ // repeatedly. Otherwise the values may be 2s (if the first one doesn't
+ // work) or 8s (if the second one doesn't work).
if (RANDOMIZE)
log_info("Queues chosen randomly for each kernel execution.\n");
else
log_info("Queues chosen alternatily for each kernel execution.\n");
event_count = 0;
- for (i=0; i<(int)TEST_SIZE; i++)
- values[i] = 1;
- error = clEnqueueWriteBuffer(queues[0], data, CL_FALSE, 0, TEST_SIZE*sizeof(cl_int), values, 0, NULL, &event[event_count]);
- test_error( error, "clEnqueueWriteBuffer 2 failed");
+ for (i = 0; i < (int)TEST_SIZE; i++) values[i] = 1;
+ error = clEnqueueWriteBuffer(queues[0], data, CL_FALSE, 0,
+ TEST_SIZE * sizeof(cl_int), values, 0, NULL,
+ &event[event_count]);
+ test_error(error, "clEnqueueWriteBuffer 2 failed");
expected_value = 1;
expected_if_only_queue[0] = 1;
expected_if_only_queue[1] = 1;
int queue_to_use = 1;
- if (test_enqueue_wait_for_events) {
- error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 1, &event[event_count], NULL );
- test_error( error, "Unable to queue wait for events" );
- } else if (test_barrier) {
- error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 0, NULL, NULL);
- test_error( error, "Unable to queue barrier" );
+ if (test_enqueue_wait_for_events)
+ {
+ error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 1,
+ &event[event_count], NULL);
+ test_error(error, "Unable to queue wait for events");
+ }
+ else if (test_barrier)
+ {
+ error =
+ clEnqueueBarrierWithWaitList(queues[queue_to_use], 0, NULL, NULL);
+ test_error(error, "Unable to queue barrier");
}
- for (loop_count=0; loop_count<TEST_COUNT; loop_count++) {
+ for (loop_count = 0; loop_count < TEST_COUNT; loop_count++)
+ {
// Execute kernel 1
event_count++;
- if (use_waitlist | use_marker) {
- if (DEBUG_OUT) log_info("clEnqueueNDRangeKernel(queues[%d], kernel1[%d], 1, NULL, threads, NULL, 1, &event[%d], &event[%d])\n", queue_to_use, loop_count, event_count-1, event_count);
- error = clEnqueueNDRangeKernel(queues[queue_to_use], kernel1[loop_count], 1, NULL, threads, NULL, 1, &event[event_count-1], &event[event_count]);
- } else {
- if (DEBUG_OUT) log_info("clEnqueueNDRangeKernel(queues[%d], kernel1[%d], 1, NULL, threads, NULL, 0, NULL, &event[%d])\n", queue_to_use, loop_count, event_count);
- error = clEnqueueNDRangeKernel(queues[queue_to_use], kernel1[loop_count], 1, NULL, threads, NULL, 0, NULL, &event[event_count]);
+ if (use_waitlist | use_marker)
+ {
+ if (DEBUG_OUT)
+ log_info("clEnqueueNDRangeKernel(queues[%d], kernel1[%d], 1, "
+ "NULL, threads, NULL, 1, &event[%d], &event[%d])\n",
+ queue_to_use, loop_count, event_count - 1,
+ event_count);
+ error = clEnqueueNDRangeKernel(
+ queues[queue_to_use], kernel1[loop_count], 1, NULL, threads,
+ NULL, 1, &event[event_count - 1], &event[event_count]);
}
- if (error) {
+ else
+ {
+ if (DEBUG_OUT)
+ log_info("clEnqueueNDRangeKernel(queues[%d], kernel1[%d], 1, "
+ "NULL, threads, NULL, 0, NULL, &event[%d])\n",
+ queue_to_use, loop_count, event_count);
+ error = clEnqueueNDRangeKernel(
+ queues[queue_to_use], kernel1[loop_count], 1, NULL, threads,
+ NULL, 0, NULL, &event[event_count]);
+ }
+ if (error)
+ {
log_info("\tLoop count %d\n", loop_count);
- print_error( error, "clEnqueueNDRangeKernel for kernel 1 failed");
+ print_error(error, "clEnqueueNDRangeKernel for kernel 1 failed");
return error;
}
expected_value *= 2;
expected_if_only_queue[queue_to_use] *= 2;
// If we are using a marker, it needs to go in the same queue
- if (use_marker) {
+ if (use_marker)
+ {
event_count++;
- if (DEBUG_OUT) log_info("clEnqueueMarker(queues[%d], event[%d])\n", queue_to_use, event_count);
-
- #ifdef CL_VERSION_1_2
- error = clEnqueueMarkerWithWaitList(queues[queue_to_use], 0, NULL, &event[event_count]);
- #else
- error = clEnqueueMarker(queues[queue_to_use], &event[event_count]);
- #endif
-
+ if (DEBUG_OUT)
+ log_info("clEnqueueMarker(queues[%d], event[%d])\n",
+ queue_to_use, event_count);
+
+#ifdef CL_VERSION_1_2
+ error = clEnqueueMarkerWithWaitList(queues[queue_to_use], 0, NULL,
+ &event[event_count]);
+#else
+ error = clEnqueueMarker(queues[queue_to_use], &event[event_count]);
+#endif
}
// Pick the next queue to run
if (RANDOMIZE)
- queue_to_use = rand()%2;
+ queue_to_use = rand() % 2;
else
- queue_to_use = (queue_to_use + 1)%2;
+ queue_to_use = (queue_to_use + 1) % 2;
// Put in a barrier if requested
- if (test_enqueue_wait_for_events) {
- if (DEBUG_OUT) log_info("clEnqueueBarrierWithWaitList(queues[%d], 1, &event[%d], NULL)\n", queue_to_use, event_count);
- error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 1, &event[event_count], NULL);
- test_error( error, "Unable to queue wait for events" );
- } else if (test_barrier) {
- if (DEBUG_OUT) log_info("clEnqueueBarrierWithWaitList(queues[%d])\n", queue_to_use);
- error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 0, NULL, NULL);
- test_error( error, "Unable to queue barrier" );
+ if (test_enqueue_wait_for_events)
+ {
+ if (DEBUG_OUT)
+ log_info("clEnqueueBarrierWithWaitList(queues[%d], 1, "
+ "&event[%d], NULL)\n",
+ queue_to_use, event_count);
+ error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 1,
+ &event[event_count], NULL);
+ test_error(error, "Unable to queue wait for events");
+ }
+ else if (test_barrier)
+ {
+ if (DEBUG_OUT)
+ log_info("clEnqueueBarrierWithWaitList(queues[%d])\n",
+ queue_to_use);
+ error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 0, NULL,
+ NULL);
+ test_error(error, "Unable to queue barrier");
}
// Execute Kernel 2
event_count++;
- if (use_waitlist | use_marker) {
- if (DEBUG_OUT) log_info("clEnqueueNDRangeKernel(queues[%d], kernel2[%d], 1, NULL, threads, NULL, 1, &event[%d], &event[%d])\n", queue_to_use, loop_count, event_count-1, event_count);
- error = clEnqueueNDRangeKernel(queues[queue_to_use], kernel2[loop_count], 1, NULL, threads, NULL, 1, &event[event_count-1], &event[event_count]);
- } else {
- if (DEBUG_OUT) log_info("clEnqueueNDRangeKernel(queues[%d], kernel2[%d], 1, NULL, threads, NULL, 0, NULL, &event[%d])\n", queue_to_use, loop_count, event_count);
- error = clEnqueueNDRangeKernel(queues[queue_to_use], kernel2[loop_count], 1, NULL, threads, NULL, 0, NULL, &event[event_count]);
+ if (use_waitlist | use_marker)
+ {
+ if (DEBUG_OUT)
+ log_info("clEnqueueNDRangeKernel(queues[%d], kernel2[%d], 1, "
+ "NULL, threads, NULL, 1, &event[%d], &event[%d])\n",
+ queue_to_use, loop_count, event_count - 1,
+ event_count);
+ error = clEnqueueNDRangeKernel(
+ queues[queue_to_use], kernel2[loop_count], 1, NULL, threads,
+ NULL, 1, &event[event_count - 1], &event[event_count]);
+ }
+ else
+ {
+ if (DEBUG_OUT)
+ log_info("clEnqueueNDRangeKernel(queues[%d], kernel2[%d], 1, "
+ "NULL, threads, NULL, 0, NULL, &event[%d])\n",
+ queue_to_use, loop_count, event_count);
+ error = clEnqueueNDRangeKernel(
+ queues[queue_to_use], kernel2[loop_count], 1, NULL, threads,
+ NULL, 0, NULL, &event[event_count]);
}
- if (error) {
+ if (error)
+ {
log_info("\tLoop count %d\n", loop_count);
- print_error( error, "clEnqueueNDRangeKernel for kernel 2 failed");
+ print_error(error, "clEnqueueNDRangeKernel for kernel 2 failed");
return error;
}
expected_value--;
expected_if_only_queue[queue_to_use]--;
// If we are using a marker, it needs to go in the same queue
- if (use_marker) {
+ if (use_marker)
+ {
event_count++;
- if (DEBUG_OUT) log_info("clEnqueueMarker(queues[%d], event[%d])\n", queue_to_use, event_count);
-
- #ifdef CL_VERSION_1_2
- error = clEnqueueMarkerWithWaitList(queues[queue_to_use], 0, NULL, &event[event_count]);
- #else
+ if (DEBUG_OUT)
+ log_info("clEnqueueMarker(queues[%d], event[%d])\n",
+ queue_to_use, event_count);
+
+#ifdef CL_VERSION_1_2
+ error = clEnqueueMarkerWithWaitList(queues[queue_to_use], 0, NULL,
+ &event[event_count]);
+#else
error = clEnqueueMarker(queues[queue_to_use], &event[event_count]);
- #endif
+#endif
}
// Pick the next queue to run
if (RANDOMIZE)
- queue_to_use = rand()%2;
+ queue_to_use = rand() % 2;
else
- queue_to_use = (queue_to_use + 1)%2;
+ queue_to_use = (queue_to_use + 1) % 2;
// Put in a barrier if requested
- if (test_enqueue_wait_for_events) {
- if (DEBUG_OUT) log_info("clEnqueueBarrierWithWaitList(queues[%d], 1, &event[%d], NULL)\n", queue_to_use, event_count);
- error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 1, &event[event_count], NULL );
- test_error( error, "Unable to queue wait for events" );
- } else if (test_barrier) {
- if (DEBUG_OUT) log_info("clEnqueueBarrierWithWaitList(queues[%d])\n", queue_to_use);
- error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 0, NULL, NULL);
- test_error( error, "Unable to queue barrier" );
+ if (test_enqueue_wait_for_events)
+ {
+ if (DEBUG_OUT)
+ log_info("clEnqueueBarrierWithWaitList(queues[%d], 1, "
+ "&event[%d], NULL)\n",
+ queue_to_use, event_count);
+ error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 1,
+ &event[event_count], NULL);
+ test_error(error, "Unable to queue wait for events");
+ }
+ else if (test_barrier)
+ {
+ if (DEBUG_OUT)
+ log_info("clEnqueueBarrierWithWaitList(queues[%d])\n",
+ queue_to_use);
+ error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 0, NULL,
+ NULL);
+ test_error(error, "Unable to queue barrier");
}
}
// Now finish up everything
- if (two_queues) {
+ if (two_queues)
+ {
error = clFlush(queues[1]);
- test_error( error, "clFlush[1] failed");
+ test_error(error, "clFlush[1] failed");
}
- error = clEnqueueReadBuffer(queues[0], data, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), values, 1, &event[event_count], NULL);
+ error = clEnqueueReadBuffer(queues[0], data, CL_TRUE, 0,
+ TEST_SIZE * sizeof(cl_int), values, 1,
+ &event[event_count], NULL);
test_error(error, "clEnqueueReadBuffer failed");
failed = 0;
- for (i=0; i<(int)TEST_SIZE; i++)
- if (values[i] != expected_value) {
+ for (i = 0; i < (int)TEST_SIZE; i++)
+ if (values[i] != expected_value)
+ {
failed = 1;
- log_info("\tvalues[%d] = %d, expected %d (If only queue 1 accessed memory: %d only queue 2 accessed memory: %d)\n",
- i, values[i], expected_value, expected_if_only_queue[0], expected_if_only_queue[1]);
+ log_info("\tvalues[%d] = %d, expected %d (If only queue 1 accessed "
+ "memory: %d only queue 2 accessed memory: %d)\n",
+ i, values[i], expected_value, expected_if_only_queue[0],
+ expected_if_only_queue[1]);
break;
}
free(values);
- if (two_devices)
- free(two_device_ids);
+ if (two_devices) free(two_device_ids);
return failed;
}
-int test( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
- int two_queues, int two_devices,
- int test_enqueue_wait_for_events, int test_barrier, int use_waitlists, int use_marker)
+int test(cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements, int two_queues, int two_devices,
+ int test_enqueue_wait_for_events, int test_barrier, int use_waitlists,
+ int use_marker)
{
- if( !checkDeviceForQueueSupport( deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ) )
+ if (!checkDeviceForQueueSupport(deviceID,
+ CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE))
{
- log_info( "WARNING: Device does not support out-of-order exec mode; skipping test.\n" );
+ log_info("WARNING: Device does not support out-of-order exec mode; "
+ "skipping test.\n");
return 0;
}
- log_info("Running test for baseline results to determine if out-of-order execution can be detected...\n");
- int baseline_results = test_event_enqueue_wait_for_events_run_test(deviceID, context, queue, num_elements, two_queues, two_devices, 0, 0, 0, 0);
- if (baseline_results == 0) {
+ log_info("Running test for baseline results to determine if out-of-order "
+ "execution can be detected...\n");
+ int baseline_results = test_event_enqueue_wait_for_events_run_test(
+ deviceID, context, queue, num_elements, two_queues, two_devices, 0, 0,
+ 0, 0);
+ if (baseline_results == 0)
+ {
if (test_enqueue_wait_for_events)
- log_info("WARNING: could not detect any out-of-order execution without using clEnqueueBarrierWithWaitList, so this test is not a valid test of out-of-order event dependencies.\n");
+ log_info(
+ "WARNING: could not detect any out-of-order execution without "
+ "using clEnqueueBarrierWithWaitList, so this test is not a "
+ "valid test of out-of-order event dependencies.\n");
if (test_barrier)
- log_info("WARNING: could not detect any out-of-order execution without using clEnqueueBarrierWithWaitList, so this test is not a valid test of out-of-order event dependencies.\n");
+ log_info(
+ "WARNING: could not detect any out-of-order execution without "
+ "using clEnqueueBarrierWithWaitList, so this test is not a "
+ "valid test of out-of-order event dependencies.\n");
if (use_waitlists)
- log_info("WARNING: could not detect any out-of-order execution without using waitlists, so this test is not a valid test of out-of-order event dependencies.\n");
+ log_info("WARNING: could not detect any out-of-order execution "
+ "without using waitlists, so this test is not a valid "
+ "test of out-of-order event dependencies.\n");
if (use_marker)
- log_info("WARNING: could not detect any out-of-order execution without using clEnqueueMarker, so this test is not a valid test of out-of-order event dependencies.\n");
- } else if (baseline_results == 1) {
+ log_info("WARNING: could not detect any out-of-order execution "
+ "without using clEnqueueMarker, so this test is not a "
+ "valid test of out-of-order event dependencies.\n");
+ }
+ else if (baseline_results == 1)
+ {
if (test_enqueue_wait_for_events)
- log_info("Detected incorrect execution (possibly out-of-order) without clEnqueueBarrierWithWaitList. Test can be a valid test of out-of-order event dependencies.\n");
+ log_info("Detected incorrect execution (possibly out-of-order) "
+ "without clEnqueueBarrierWithWaitList. Test can be a "
+ "valid test of out-of-order event dependencies.\n");
if (test_barrier)
- log_info("Detected incorrect execution (possibly out-of-order) without clEnqueueBarrierWithWaitList. Test can be a valid test of out-of-order event dependencies.\n");
+ log_info("Detected incorrect execution (possibly out-of-order) "
+ "without clEnqueueBarrierWithWaitList. Test can be a "
+ "valid test of out-of-order event dependencies.\n");
if (use_waitlists)
- log_info("Detected incorrect execution (possibly out-of-order) without waitlists. Test can be a valid test of out-of-order event dependencies.\n");
+ log_info("Detected incorrect execution (possibly out-of-order) "
+ "without waitlists. Test can be a valid test of "
+ "out-of-order event dependencies.\n");
if (use_marker)
- log_info("Detected incorrect execution (possibly out-of-order) without clEnqueueMarker. Test can be a valid test of out-of-order event dependencies.\n");
- } else if( baseline_results == -1942 ) {
+ log_info("Detected incorrect execution (possibly out-of-order) "
+ "without clEnqueueMarker. Test can be a valid test of "
+ "out-of-order event dependencies.\n");
+ }
+ else if (baseline_results == -1942)
+ {
// Just ignore and return (out-of-order exec mode not supported)
return 0;
- } else {
+ }
+ else
+ {
print_error(baseline_results, "Baseline run failed");
return baseline_results;
}
log_info("Running test for actual results...\n");
- return test_event_enqueue_wait_for_events_run_test(deviceID, context, queue, num_elements, two_queues, two_devices,
- test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+ return test_event_enqueue_wait_for_events_run_test(
+ deviceID, context, queue, num_elements, two_queues, two_devices,
+ test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
}
-int test_out_of_order_event_waitlist_single_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_waitlist_single_queue(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
int two_queues = 0;
int two_devices = 0;
@@ -397,10 +543,15 @@ int test_out_of_order_event_waitlist_single_queue( cl_device_id deviceID, cl_con
int test_barrier = 0;
int use_waitlists = 1;
int use_marker = 0;
- return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+ return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+ test_enqueue_wait_for_events, test_barrier, use_waitlists,
+ use_marker);
}
-int test_out_of_order_event_waitlist_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_waitlist_multi_queue(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
int two_queues = 1;
int two_devices = 0;
@@ -408,10 +559,14 @@ int test_out_of_order_event_waitlist_multi_queue( cl_device_id deviceID, cl_cont
int test_barrier = 0;
int use_waitlists = 1;
int use_marker = 0;
- return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+ return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+ test_enqueue_wait_for_events, test_barrier, use_waitlists,
+ use_marker);
}
-int test_out_of_order_event_waitlist_multi_queue_multi_device( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_waitlist_multi_queue_multi_device(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements)
{
int two_queues = 1;
int two_devices = 1;
@@ -419,11 +574,15 @@ int test_out_of_order_event_waitlist_multi_queue_multi_device( cl_device_id devi
int test_barrier = 0;
int use_waitlists = 1;
int use_marker = 0;
- return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+ return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+ test_enqueue_wait_for_events, test_barrier, use_waitlists,
+ use_marker);
}
-int test_out_of_order_event_enqueue_wait_for_events_single_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_enqueue_wait_for_events_single_queue(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements)
{
int two_queues = 0;
int two_devices = 0;
@@ -431,10 +590,14 @@ int test_out_of_order_event_enqueue_wait_for_events_single_queue( cl_device_id d
int test_barrier = 0;
int use_waitlists = 0;
int use_marker = 0;
- return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+ return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+ test_enqueue_wait_for_events, test_barrier, use_waitlists,
+ use_marker);
}
-int test_out_of_order_event_enqueue_wait_for_events_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_enqueue_wait_for_events_multi_queue(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements)
{
int two_queues = 1;
int two_devices = 0;
@@ -442,11 +605,15 @@ int test_out_of_order_event_enqueue_wait_for_events_multi_queue( cl_device_id de
int test_barrier = 0;
int use_waitlists = 0;
int use_marker = 0;
- return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+ return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+ test_enqueue_wait_for_events, test_barrier, use_waitlists,
+ use_marker);
}
-int test_out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements)
{
int two_queues = 1;
int two_devices = 1;
@@ -454,13 +621,16 @@ int test_out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device( cl
int test_barrier = 0;
int use_waitlists = 0;
int use_marker = 0;
- return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+ return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+ test_enqueue_wait_for_events, test_barrier, use_waitlists,
+ use_marker);
}
-
-
-int test_out_of_order_event_enqueue_barrier_single_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_enqueue_barrier_single_queue(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
int two_queues = 0;
int two_devices = 0;
@@ -468,11 +638,16 @@ int test_out_of_order_event_enqueue_barrier_single_queue( cl_device_id deviceID,
int test_barrier = 1;
int use_waitlists = 0;
int use_marker = 0;
- return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+ return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+ test_enqueue_wait_for_events, test_barrier, use_waitlists,
+ use_marker);
}
-int test_out_of_order_event_enqueue_marker_single_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_enqueue_marker_single_queue(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
int two_queues = 0;
int two_devices = 0;
@@ -480,10 +655,15 @@ int test_out_of_order_event_enqueue_marker_single_queue( cl_device_id deviceID,
int test_barrier = 0;
int use_waitlists = 0;
int use_marker = 1;
- return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+ return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+ test_enqueue_wait_for_events, test_barrier, use_waitlists,
+ use_marker);
}
-int test_out_of_order_event_enqueue_marker_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_enqueue_marker_multi_queue(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
int two_queues = 1;
int two_devices = 0;
@@ -491,11 +671,15 @@ int test_out_of_order_event_enqueue_marker_multi_queue( cl_device_id deviceID, c
int test_barrier = 0;
int use_waitlists = 0;
int use_marker = 1;
- return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+ return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+ test_enqueue_wait_for_events, test_barrier, use_waitlists,
+ use_marker);
}
-int test_out_of_order_event_enqueue_marker_multi_queue_multi_device( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_enqueue_marker_multi_queue_multi_device(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements)
{
int two_queues = 1;
int two_devices = 1;
@@ -503,7 +687,7 @@ int test_out_of_order_event_enqueue_marker_multi_queue_multi_device( cl_device_i
int test_barrier = 0;
int use_waitlists = 0;
int use_marker = 1;
- return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+ return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+ test_enqueue_wait_for_events, test_barrier, use_waitlists,
+ use_marker);
}
-
-
diff --git a/test_conformance/events/test_events.cpp b/test_conformance/events/test_events.cpp
index 26693f99..34157fa0 100644
--- a/test_conformance/events/test_events.cpp
+++ b/test_conformance/events/test_events.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -15,97 +15,112 @@
//
#include "testBase.h"
-#if ! defined( _WIN32 )
- #include "unistd.h" // for "sleep" used in the "while (1)" busy wait loop in
+#if !defined(_WIN32)
+#include "unistd.h" // for "sleep" used in the "while (1)" busy wait loop in
#endif
// test_event_flush
const char *sample_long_test_kernel[] = {
-"__kernel void sample_test(__global float *src, __global int *dst)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-" int i;\n"
-"\n"
-" for( i = 0; i < 10000; i++ )\n"
-" {\n"
-" dst[tid] = (int)src[tid] * 3;\n"
-" }\n"
-"\n"
-"}\n" };
-
-int create_and_execute_kernel( cl_context inContext, cl_command_queue inQueue, cl_program *outProgram, cl_kernel *outKernel, cl_mem *streams,
- unsigned int lineCount, const char **lines, const char *kernelName, cl_event *outEvent )
+ "__kernel void sample_test(__global float *src, __global int *dst)\n"
+ "{\n"
+ " int tid = get_global_id(0);\n"
+ " int i;\n"
+ "\n"
+ " for( i = 0; i < 10000; i++ )\n"
+ " {\n"
+ " dst[tid] = (int)src[tid] * 3;\n"
+ " }\n"
+ "\n"
+ "}\n"
+};
+
+int create_and_execute_kernel(cl_context inContext, cl_command_queue inQueue,
+ cl_program *outProgram, cl_kernel *outKernel,
+ cl_mem *streams, unsigned int lineCount,
+ const char **lines, const char *kernelName,
+ cl_event *outEvent)
{
size_t threads[1] = { 1000 }, localThreads[1];
int error;
- if( create_single_kernel_helper( inContext, outProgram, outKernel, lineCount, lines, kernelName ) )
+ if (create_single_kernel_helper(inContext, outProgram, outKernel, lineCount,
+ lines, kernelName))
{
return -1;
}
- error = get_max_common_work_group_size( inContext, *outKernel, threads[0], &localThreads[0] );
- test_error( error, "Unable to get work group size to use" );
+ error = get_max_common_work_group_size(inContext, *outKernel, threads[0],
+ &localThreads[0]);
+ test_error(error, "Unable to get work group size to use");
streams[0] = clCreateBuffer(inContext, CL_MEM_READ_WRITE,
sizeof(cl_float) * 1000, NULL, &error);
- test_error( error, "Creating test array failed" );
+ test_error(error, "Creating test array failed");
streams[1] = clCreateBuffer(inContext, CL_MEM_READ_WRITE,
sizeof(cl_int) * 1000, NULL, &error);
- test_error( error, "Creating test array failed" );
+ test_error(error, "Creating test array failed");
/* Set the arguments */
- error = clSetKernelArg( *outKernel, 0, sizeof( streams[0] ), &streams[0] );
- test_error( error, "Unable to set kernel arguments" );
- error = clSetKernelArg( *outKernel, 1, sizeof( streams[1] ), &streams[1] );
- test_error( error, "Unable to set kernel arguments" );
+ error = clSetKernelArg(*outKernel, 0, sizeof(streams[0]), &streams[0]);
+ test_error(error, "Unable to set kernel arguments");
+ error = clSetKernelArg(*outKernel, 1, sizeof(streams[1]), &streams[1]);
+ test_error(error, "Unable to set kernel arguments");
- error = clEnqueueNDRangeKernel(inQueue, *outKernel, 1, NULL, threads, localThreads, 0, NULL, outEvent);
- test_error( error, "Unable to execute test kernel" );
+ error = clEnqueueNDRangeKernel(inQueue, *outKernel, 1, NULL, threads,
+ localThreads, 0, NULL, outEvent);
+ test_error(error, "Unable to execute test kernel");
return 0;
}
-#define SETUP_EVENT( c, q ) \
-clProgramWrapper program; \
-clKernelWrapper kernel; \
-clMemWrapper streams[2]; \
-clEventWrapper event; \
-int error; \
-if( create_and_execute_kernel( c, q, &program, &kernel, &streams[0], 1, sample_long_test_kernel, "sample_test", &event ) ) return -1;
+#define SETUP_EVENT(c, q) \
+ clProgramWrapper program; \
+ clKernelWrapper kernel; \
+ clMemWrapper streams[2]; \
+ clEventWrapper event; \
+ int error; \
+ if (create_and_execute_kernel(c, q, &program, &kernel, &streams[0], 1, \
+ sample_long_test_kernel, "sample_test", \
+ &event)) \
+ return -1;
#define FINISH_EVENT(_q) clFinish(_q)
-const char *IGetStatusString( cl_int status )
+const char *IGetStatusString(cl_int status)
{
- static char tempString[ 128 ];
- switch( status )
+ static char tempString[128];
+ switch (status)
{
- case CL_COMPLETE: return "CL_COMPLETE";
- case CL_RUNNING: return "CL_RUNNING";
- case CL_QUEUED: return "CL_QUEUED";
- case CL_SUBMITTED: return "CL_SUBMITTED";
+ case CL_COMPLETE: return "CL_COMPLETE";
+ case CL_RUNNING: return "CL_RUNNING";
+ case CL_QUEUED: return "CL_QUEUED";
+ case CL_SUBMITTED: return "CL_SUBMITTED";
default:
- sprintf( tempString, "<unknown: %d>", (int)status );
+ sprintf(tempString, "<unknown: %d>", (int)status);
return tempString;
}
}
/* Note: tests clGetEventStatus and clReleaseEvent (implicitly) */
-int test_event_get_execute_status( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_get_execute_status(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_int status;
- SETUP_EVENT( context, queue );
+ SETUP_EVENT(context, queue);
/* Now wait for it to be done */
- error = clWaitForEvents( 1, &event );
- test_error( error, "Unable to wait for event" );
-
- error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Calling clGetEventStatus to wait for event completion failed" );
- if( status != CL_COMPLETE )
+ error = clWaitForEvents(1, &event);
+ test_error(error, "Unable to wait for event");
+
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error,
+ "Calling clGetEventStatus to wait for event completion failed");
+ if (status != CL_COMPLETE)
{
- log_error( "ERROR: Incorrect status returned from clGetErrorStatus after event complete (%d:%s)\n", status, IGetStatusString( status ) );
+ log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+ "after event complete (%d:%s)\n",
+ status, IGetStatusString(status));
return -1;
}
@@ -113,57 +128,75 @@ int test_event_get_execute_status( cl_device_id deviceID, cl_context context, cl
return 0;
}
-int test_event_get_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_get_info(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- SETUP_EVENT( context, queue );
+ SETUP_EVENT(context, queue);
/* Verify parameters of clGetEventInfo not already tested by other tests */
cl_command_queue otherQueue;
size_t size;
- error = clGetEventInfo( event, CL_EVENT_COMMAND_QUEUE, sizeof( otherQueue ), &otherQueue, &size );
- test_error( error, "Unable to get event info!" );
- // We can not check if this is the right queue because this is an opaque object.
- if( size != sizeof( queue ) )
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_QUEUE, sizeof(otherQueue),
+ &otherQueue, &size);
+ test_error(error, "Unable to get event info!");
+ // We can not check if this is the right queue because this is an opaque
+ // object.
+ if (size != sizeof(queue))
{
- log_error( "ERROR: Returned command queue size does not validate (expected %d, got %d)\n", (int)sizeof( queue ), (int)size );
+ log_error("ERROR: Returned command queue size does not validate "
+ "(expected %d, got %d)\n",
+ (int)sizeof(queue), (int)size);
return -1;
}
cl_command_type type;
- error = clGetEventInfo( event, CL_EVENT_COMMAND_TYPE, sizeof( type ), &type, &size );
- test_error( error, "Unable to get event info!" );
- if( type != CL_COMMAND_NDRANGE_KERNEL )
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_TYPE, sizeof(type), &type,
+ &size);
+ test_error(error, "Unable to get event info!");
+ if (type != CL_COMMAND_NDRANGE_KERNEL)
{
- log_error( "ERROR: Returned command type does not validate (expected %d, got %d)\n", (int)CL_COMMAND_NDRANGE_KERNEL, (int)type );
+ log_error("ERROR: Returned command type does not validate (expected "
+ "%d, got %d)\n",
+ (int)CL_COMMAND_NDRANGE_KERNEL, (int)type);
return -1;
}
- if( size != sizeof( type ) )
+ if (size != sizeof(type))
{
- log_error( "ERROR: Returned command type size does not validate (expected %d, got %d)\n", (int)sizeof( type ), (int)size );
+ log_error("ERROR: Returned command type size does not validate "
+ "(expected %d, got %d)\n",
+ (int)sizeof(type), (int)size);
return -1;
}
cl_uint count;
- error = clGetEventInfo( event, CL_EVENT_REFERENCE_COUNT, sizeof( count ), &count, &size );
- test_error( error, "Unable to get event info for CL_EVENT_REFERENCE_COUNT!" );
- if( size != sizeof( count ) )
+ error = clGetEventInfo(event, CL_EVENT_REFERENCE_COUNT, sizeof(count),
+ &count, &size);
+ test_error(error, "Unable to get event info for CL_EVENT_REFERENCE_COUNT!");
+ if (size != sizeof(count))
{
- log_error( "ERROR: Returned command type size does not validate (expected %d, got %d)\n", (int)sizeof( type ), (int)size );
+ log_error("ERROR: Returned command type size does not validate "
+ "(expected %d, got %d)\n",
+ (int)sizeof(type), (int)size);
return -1;
}
cl_context testCtx;
- error = clGetEventInfo( event, CL_EVENT_CONTEXT, sizeof( testCtx ), &testCtx, &size );
- test_error( error, "Unable to get event context info!" );
- if( size != sizeof( context ) )
+ error = clGetEventInfo(event, CL_EVENT_CONTEXT, sizeof(testCtx), &testCtx,
+ &size);
+ test_error(error, "Unable to get event context info!");
+ if (size != sizeof(context))
{
- log_error( "ERROR: Returned context size does not validate (expected %d, got %d)\n", (int)sizeof( context ), (int)size );
+ log_error("ERROR: Returned context size does not validate (expected "
+ "%d, got %d)\n",
+ (int)sizeof(context), (int)size);
return -1;
}
- if( testCtx != context )
+ if (testCtx != context)
{
- log_error( "ERROR: Returned context does not match (expected %p, got %p)\n", (void *)context, (void *)testCtx );
+ log_error(
+ "ERROR: Returned context does not match (expected %p, got %p)\n",
+ (void *)context, (void *)testCtx);
return -1;
}
@@ -171,10 +204,11 @@ int test_event_get_info( cl_device_id deviceID, cl_context context, cl_command_q
return 0;
}
-int test_event_get_write_array_status( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_get_write_array_status(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_mem stream;
- cl_float testArray[ 1024 * 32 ];
+ cl_float testArray[1024 * 32];
cl_event event;
int error;
cl_int status;
@@ -182,34 +216,41 @@ int test_event_get_write_array_status( cl_device_id deviceID, cl_context context
stream = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_float) * 1024 * 32, NULL, &error);
- test_error( error, "Creating test array failed" );
+ test_error(error, "Creating test array failed");
- error = clEnqueueWriteBuffer(queue, stream, CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)testArray, 0, NULL, &event);
- test_error( error, "Unable to set testing kernel data" );
+ error = clEnqueueWriteBuffer(queue, stream, CL_FALSE, 0,
+ sizeof(cl_float) * 1024 * 32,
+ (void *)testArray, 0, NULL, &event);
+ test_error(error, "Unable to set testing kernel data");
/* Now wait for it to be done */
- error = clWaitForEvents( 1, &event );
- test_error( error, "Unable to wait for event" );
-
- error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Calling clGetEventStatus to wait for event completion failed" );
- if( status != CL_COMPLETE )
+ error = clWaitForEvents(1, &event);
+ test_error(error, "Unable to wait for event");
+
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error,
+ "Calling clGetEventStatus to wait for event completion failed");
+ if (status != CL_COMPLETE)
{
- log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array write complete (%d:%s)\n", status, IGetStatusString( status ) );
+ log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+ "after array write complete (%d:%s)\n",
+ status, IGetStatusString(status));
return -1;
}
- clReleaseMemObject( stream );
- clReleaseEvent( event );
+ clReleaseMemObject(stream);
+ clReleaseEvent(event);
return 0;
}
-int test_event_get_read_array_status( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_get_read_array_status(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_mem stream;
- cl_float testArray[ 1024 * 32 ];
+ cl_float testArray[1024 * 32];
cl_event event;
int error;
cl_int status;
@@ -217,58 +258,72 @@ int test_event_get_read_array_status( cl_device_id deviceID, cl_context context,
stream = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_float) * 1024 * 32, NULL, &error);
- test_error( error, "Creating test array failed" );
+ test_error(error, "Creating test array failed");
- error = clEnqueueReadBuffer(queue, stream, CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)testArray, 0, NULL, &event);
- test_error( error, "Unable to get testing kernel data" );
+ error = clEnqueueReadBuffer(queue, stream, CL_FALSE, 0,
+ sizeof(cl_float) * 1024 * 32, (void *)testArray,
+ 0, NULL, &event);
+ test_error(error, "Unable to get testing kernel data");
/* It should still be running... */
- error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Calling clGetEventStatus didn't work!" );
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error, "Calling clGetEventStatus didn't work!");
- if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE)
+ if (status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED
+ && status != CL_COMPLETE)
{
- log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array read (%d:%s)\n", status, IGetStatusString( status ) );
+ log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+ "during array read (%d:%s)\n",
+ status, IGetStatusString(status));
return -1;
}
/* Now wait for it to be done */
- error = clWaitForEvents( 1, &event );
- test_error( error, "Unable to wait for event" );
-
- error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Calling clGetEventStatus to wait for event completion failed" );
- if( status != CL_COMPLETE )
+ error = clWaitForEvents(1, &event);
+ test_error(error, "Unable to wait for event");
+
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error,
+ "Calling clGetEventStatus to wait for event completion failed");
+ if (status != CL_COMPLETE)
{
- log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array read complete (%d:%s)\n", status, IGetStatusString( status ) );
+ log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+ "after array read complete (%d:%s)\n",
+ status, IGetStatusString(status));
return -1;
}
- clReleaseMemObject( stream );
- clReleaseEvent( event );
+ clReleaseMemObject(stream);
+ clReleaseEvent(event);
return 0;
}
/* clGetEventStatus not implemented yet */
-int test_event_wait_for_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_wait_for_execute(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_int status;
- SETUP_EVENT( context, queue );
+ SETUP_EVENT(context, queue);
/* Now we wait for it to be done, then test the status again */
- error = clWaitForEvents( 1, &event );
- test_error( error, "Unable to wait for execute event" );
+ error = clWaitForEvents(1, &event);
+ test_error(error, "Unable to wait for execute event");
/* Make sure it worked */
- error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Calling clGetEventStatus didn't work!" );
- if( status != CL_COMPLETE )
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error, "Calling clGetEventStatus didn't work!");
+ if (status != CL_COMPLETE)
{
- log_error( "ERROR: Incorrect status returned from clGetErrorStatus after event complete (%d:%s)\n", status, IGetStatusString( status ) );
+ log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+ "after event complete (%d:%s)\n",
+ status, IGetStatusString(status));
return -1;
}
@@ -276,11 +331,12 @@ int test_event_wait_for_execute( cl_device_id deviceID, cl_context context, cl_c
return 0;
}
-int test_event_wait_for_array( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_wait_for_array(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
- cl_float readArray[ 1024 * 32 ];
- cl_float writeArray[ 1024 * 32 ];
+ cl_float readArray[1024 * 32];
+ cl_float writeArray[1024 * 32];
cl_event events[2];
int error;
cl_int status;
@@ -288,128 +344,155 @@ int test_event_wait_for_array( cl_device_id deviceID, cl_context context, cl_com
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_float) * 1024 * 32, NULL, &error);
- test_error( error, "Creating test array failed" );
+ test_error(error, "Creating test array failed");
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_float) * 1024 * 32, NULL, &error);
- test_error( error, "Creating test array failed" );
+ test_error(error, "Creating test array failed");
- error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)readArray, 0, NULL, &events[0]);
- test_error( error, "Unable to read testing kernel data" );
+ error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0,
+ sizeof(cl_float) * 1024 * 32, (void *)readArray,
+ 0, NULL, &events[0]);
+ test_error(error, "Unable to read testing kernel data");
- error = clEnqueueWriteBuffer(queue, streams[1], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)writeArray, 0, NULL, &events[1]);
- test_error( error, "Unable to write testing kernel data" );
+ error = clEnqueueWriteBuffer(queue, streams[1], CL_FALSE, 0,
+ sizeof(cl_float) * 1024 * 32,
+ (void *)writeArray, 0, NULL, &events[1]);
+ test_error(error, "Unable to write testing kernel data");
/* Both should still be running */
- error = clGetEventInfo( events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Calling clGetEventStatus didn't work!" );
- if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE)
+ error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error, "Calling clGetEventStatus didn't work!");
+ if (status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED
+ && status != CL_COMPLETE)
{
- log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array read (%d:%s)\n", status, IGetStatusString( status ) );
+ log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+ "during array read (%d:%s)\n",
+ status, IGetStatusString(status));
return -1;
}
- error = clGetEventInfo( events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Calling clGetEventStatus didn't work!" );
- if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE)
+ error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error, "Calling clGetEventStatus didn't work!");
+ if (status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED
+ && status != CL_COMPLETE)
{
- log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array write (%d:%s)\n", status, IGetStatusString( status ) );
+ log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+ "during array write (%d:%s)\n",
+ status, IGetStatusString(status));
return -1;
}
/* Now try waiting for both */
- error = clWaitForEvents( 2, events );
- test_error( error, "Unable to wait for array events" );
+ error = clWaitForEvents(2, events);
+ test_error(error, "Unable to wait for array events");
/* Double check status on both */
- error = clGetEventInfo( events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Calling clGetEventStatus didn't work!" );
- if( status != CL_COMPLETE )
+ error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error, "Calling clGetEventStatus didn't work!");
+ if (status != CL_COMPLETE)
{
- log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array read complete (%d:%s)\n", status, IGetStatusString( status ) );
+ log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+ "after array read complete (%d:%s)\n",
+ status, IGetStatusString(status));
return -1;
}
- error = clGetEventInfo( events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Calling clGetEventStatus didn't work!" );
- if( status != CL_COMPLETE )
+ error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error, "Calling clGetEventStatus didn't work!");
+ if (status != CL_COMPLETE)
{
- log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array write complete (%d:%s)\n", status, IGetStatusString( status ) );
+ log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+ "after array write complete (%d:%s)\n",
+ status, IGetStatusString(status));
return -1;
}
- clReleaseMemObject( streams[0] );
- clReleaseMemObject( streams[1] );
- clReleaseEvent( events[0] );
- clReleaseEvent( events[1] );
+ clReleaseMemObject(streams[0]);
+ clReleaseMemObject(streams[1]);
+ clReleaseEvent(events[0]);
+ clReleaseEvent(events[1]);
return 0;
}
-int test_event_flush( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_flush(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int loopCount = 0;
cl_int status;
- SETUP_EVENT( context, queue );
+ SETUP_EVENT(context, queue);
- /* Now flush. Note that we can't guarantee this actually lets the op finish, but we can guarantee it's no longer queued */
- error = clFlush( queue );
- test_error( error, "Unable to flush events" );
+ /* Now flush. Note that we can't guarantee this actually lets the op finish,
+ * but we can guarantee it's no longer queued */
+ error = clFlush(queue);
+ test_error(error, "Unable to flush events");
/* Make sure it worked */
- while (1) {
- error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS,
- sizeof( status ), &status, NULL );
- test_error( error, "Calling clGetEventStatus didn't work!" );
+ while (1)
+ {
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error, "Calling clGetEventStatus didn't work!");
- if( status != CL_QUEUED )
- break;
+ if (status != CL_QUEUED) break;
-#if ! defined( _WIN32 )
+#if !defined(_WIN32)
sleep(1); // give it some time here.
#else // _WIN32
- Sleep(1000);
+ Sleep(1000);
#endif
++loopCount;
- }
-
-/*
-CL_QUEUED (command has been enqueued in the command-queue),
-CL_SUBMITTED (enqueued command has been submitted by the host to the device associated with the command-queue),
-CL_RUNNING (device is currently executing this command),
-CL_COMPLETE (the command has completed), or
-Error code given by a negative integer value. (command was abnormally terminated – this may be caused by a bad memory access etc.).
-*/
- if(status != CL_COMPLETE && status != CL_SUBMITTED &&
- status != CL_RUNNING && status != CL_COMPLETE)
- {
- log_error( "ERROR: Incorrect status returned from clGetErrorStatus after event flush (%d:%s)\n", status, IGetStatusString( status ) );
+ }
+
+ /*
+ CL_QUEUED (command has been enqueued in the command-queue),
+ CL_SUBMITTED (enqueued command has been submitted by the host to the device
+ associated with the command-queue), CL_RUNNING (device is currently
+ executing this command), CL_COMPLETE (the command has completed), or Error
+ code given by a negative integer value. (command was abnormally terminated –
+ this may be caused by a bad memory access etc.).
+ */
+ if (status != CL_COMPLETE && status != CL_SUBMITTED && status != CL_RUNNING
+ && status != CL_COMPLETE)
+ {
+ log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+ "after event flush (%d:%s)\n",
+ status, IGetStatusString(status));
return -1;
}
/* Now wait */
- error = clFinish( queue );
- test_error( error, "Unable to finish events" );
+ error = clFinish(queue);
+ test_error(error, "Unable to finish events");
FINISH_EVENT(queue);
return 0;
}
-int test_event_finish_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_finish_execute(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_int status;
- SETUP_EVENT( context, queue );
+ SETUP_EVENT(context, queue);
/* Now flush and finish all ops */
- error = clFinish( queue );
- test_error( error, "Unable to finish all events" );
+ error = clFinish(queue);
+ test_error(error, "Unable to finish all events");
/* Make sure it worked */
- error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Calling clGetEventStatus didn't work!" );
- if( status != CL_COMPLETE )
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error, "Calling clGetEventStatus didn't work!");
+ if (status != CL_COMPLETE)
{
- log_error( "ERROR: Incorrect status returned from clGetErrorStatus after event complete (%d:%s)\n", status, IGetStatusString( status ) );
+ log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+ "after event complete (%d:%s)\n",
+ status, IGetStatusString(status));
return -1;
}
@@ -417,11 +500,12 @@ int test_event_finish_execute( cl_device_id deviceID, cl_context context, cl_com
return 0;
}
-int test_event_finish_array( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_finish_array(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
- cl_float readArray[ 1024 * 32 ];
- cl_float writeArray[ 1024 * 32 ];
+ cl_float readArray[1024 * 32];
+ cl_float writeArray[1024 * 32];
cl_event events[2];
int error;
cl_int status;
@@ -429,59 +513,77 @@ int test_event_finish_array( cl_device_id deviceID, cl_context context, cl_comma
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_float) * 1024 * 32, NULL, &error);
- test_error( error, "Creating test array failed" );
+ test_error(error, "Creating test array failed");
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_float) * 1024 * 32, NULL, &error);
- test_error( error, "Creating test array failed" );
+ test_error(error, "Creating test array failed");
- error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)readArray, 0, NULL, &events[0]);
- test_error( error, "Unable to read testing kernel data" );
+ error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0,
+ sizeof(cl_float) * 1024 * 32, (void *)readArray,
+ 0, NULL, &events[0]);
+ test_error(error, "Unable to read testing kernel data");
- error = clEnqueueWriteBuffer(queue, streams[1], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)writeArray, 0, NULL, &events[1]);
- test_error( error, "Unable to write testing kernel data" );
+ error = clEnqueueWriteBuffer(queue, streams[1], CL_FALSE, 0,
+ sizeof(cl_float) * 1024 * 32,
+ (void *)writeArray, 0, NULL, &events[1]);
+ test_error(error, "Unable to write testing kernel data");
/* Both should still be running */
- error = clGetEventInfo( events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Calling clGetEventStatus didn't work!" );
- if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE)
+ error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error, "Calling clGetEventStatus didn't work!");
+ if (status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED
+ && status != CL_COMPLETE)
{
- log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array read (%d:%s)\n", status, IGetStatusString( status ) );
+ log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+ "during array read (%d:%s)\n",
+ status, IGetStatusString(status));
return -1;
}
- error = clGetEventInfo( events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Calling clGetEventStatus didn't work!" );
- if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE)
+ error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error, "Calling clGetEventStatus didn't work!");
+ if (status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED
+ && status != CL_COMPLETE)
{
- log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array write (%d:%s)\n", status, IGetStatusString( status ) );
+ log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+ "during array write (%d:%s)\n",
+ status, IGetStatusString(status));
return -1;
}
/* Now try finishing all ops */
- error = clFinish( queue );
- test_error( error, "Unable to finish all events" );
+ error = clFinish(queue);
+ test_error(error, "Unable to finish all events");
/* Double check status on both */
- error = clGetEventInfo( events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Calling clGetEventStatus didn't work!" );
- if( status != CL_COMPLETE )
+ error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error, "Calling clGetEventStatus didn't work!");
+ if (status != CL_COMPLETE)
{
- log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array read complete (%d:%s)\n", status, IGetStatusString( status ) );
+ log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+ "after array read complete (%d:%s)\n",
+ status, IGetStatusString(status));
return -1;
}
- error = clGetEventInfo( events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Calling clGetEventStatus didn't work!" );
- if( status != CL_COMPLETE )
+ error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error, "Calling clGetEventStatus didn't work!");
+ if (status != CL_COMPLETE)
{
- log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array write complete (%d:%s)\n", status, IGetStatusString( status ) );
+ log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+ "after array write complete (%d:%s)\n",
+ status, IGetStatusString(status));
return -1;
}
- clReleaseMemObject( streams[0] );
- clReleaseMemObject( streams[1] );
- clReleaseEvent( events[0] );
- clReleaseEvent( events[1] );
+ clReleaseMemObject(streams[0]);
+ clReleaseMemObject(streams[1]);
+ clReleaseEvent(events[0]);
+ clReleaseEvent(events[1]);
return 0;
}
@@ -489,7 +591,8 @@ int test_event_finish_array( cl_device_id deviceID, cl_context context, cl_comma
#define NUM_EVENT_RUNS 100
-int test_event_release_before_done( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_release_before_done(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
// Create a kernel to run
clProgramWrapper program;
@@ -501,21 +604,24 @@ int test_event_release_before_done( cl_device_id deviceID, cl_context context, c
int error, i;
// Create a kernel
- if( create_single_kernel_helper( context, &program, &kernel[0], 1, sample_long_test_kernel, "sample_test" ) )
+ if (create_single_kernel_helper(context, &program, &kernel[0], 1,
+ sample_long_test_kernel, "sample_test"))
{
return -1;
}
- for( i = 1; i < NUM_EVENT_RUNS; i++ ) {
- kernel[i] = clCreateKernel(program, "sample_test", &error);
- test_error(error, "Unable to create kernel");
- }
+ for (i = 1; i < NUM_EVENT_RUNS; i++)
+ {
+ kernel[i] = clCreateKernel(program, "sample_test", &error);
+ test_error(error, "Unable to create kernel");
+ }
- error = get_max_common_work_group_size( context, kernel[0], 1024, &threads[0] );
- test_error( error, "Unable to get work group size to use" );
+ error =
+ get_max_common_work_group_size(context, kernel[0], 1024, &threads[0]);
+ test_error(error, "Unable to get work group size to use");
// Create a set of streams to use as arguments
- for( i = 0; i < NUM_EVENT_RUNS; i++ )
+ for (i = 0; i < NUM_EVENT_RUNS; i++)
{
streams[i][0] =
clCreateBuffer(context, CL_MEM_READ_WRITE,
@@ -523,77 +629,89 @@ int test_event_release_before_done( cl_device_id deviceID, cl_context context, c
streams[i][1] =
clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int) * threads[0], NULL, &error);
- if( ( streams[i][0] == NULL ) || ( streams[i][1] == NULL ) )
+ if ((streams[i][0] == NULL) || (streams[i][1] == NULL))
{
- log_error( "ERROR: Unable to allocate testing streams" );
+ log_error("ERROR: Unable to allocate testing streams");
return -1;
}
}
- // Execute the kernels one by one, hopefully making sure they won't be done by the time we get to the end
- for( i = 0; i < NUM_EVENT_RUNS; i++ )
+ // Execute the kernels one by one, hopefully making sure they won't be done
+ // by the time we get to the end
+ for (i = 0; i < NUM_EVENT_RUNS; i++)
{
- error = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), &streams[i][0] );
- error |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), &streams[i][1] );
- test_error( error, "Unable to set kernel arguments" );
+ error = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), &streams[i][0]);
+ error |= clSetKernelArg(kernel[i], 1, sizeof(cl_mem), &streams[i][1]);
+ test_error(error, "Unable to set kernel arguments");
- error = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, threads, 0, NULL, &events[i]);
- test_error( error, "Unable to execute test kernel" );
+ error = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads,
+ threads, 0, NULL, &events[i]);
+ test_error(error, "Unable to execute test kernel");
}
// Free all but the last event
- for( i = 0; i < NUM_EVENT_RUNS - 1; i++ )
+ for (i = 0; i < NUM_EVENT_RUNS - 1; i++)
{
- clReleaseEvent( events[ i ] );
+ clReleaseEvent(events[i]);
}
// Get status on the last one, then free it
- error = clGetEventInfo( events[ NUM_EVENT_RUNS - 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Unable to get event status" );
+ error = clGetEventInfo(events[NUM_EVENT_RUNS - 1],
+ CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status),
+ &status, NULL);
+ test_error(error, "Unable to get event status");
- clReleaseEvent( events[ NUM_EVENT_RUNS - 1 ] );
+ clReleaseEvent(events[NUM_EVENT_RUNS - 1]);
// Was the status still-running?
- if( status == CL_COMPLETE )
+ if (status == CL_COMPLETE)
{
- log_info( "WARNING: Events completed before they could be released, so test is a null-op. Increase workload and try again." );
+ log_info("WARNING: Events completed before they could be released, so "
+ "test is a null-op. Increase workload and try again.");
}
- else if( status == CL_RUNNING || status == CL_QUEUED || status == CL_SUBMITTED )
+ else if (status == CL_RUNNING || status == CL_QUEUED
+ || status == CL_SUBMITTED)
{
- log_info( "Note: Event status was running or queued when released, so test was good.\n" );
+ log_info("Note: Event status was running or queued when released, so "
+ "test was good.\n");
}
// If we didn't crash by now, the test succeeded
- clFinish( queue );
+ clFinish(queue);
return 0;
}
-int test_event_enqueue_marker( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_enqueue_marker(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_int status;
- SETUP_EVENT( context, queue );
+ SETUP_EVENT(context, queue);
- /* Now we queue a marker and wait for that, which--since it queues afterwards--should guarantee the execute finishes too */
+ /* Now we queue a marker and wait for that, which--since it queues
+ * afterwards--should guarantee the execute finishes too */
clEventWrapper markerEvent;
- //error = clEnqueueMarker( queue, &markerEvent );
+ // error = clEnqueueMarker( queue, &markerEvent );
#ifdef CL_VERSION_1_2
- error = clEnqueueMarkerWithWaitList(queue, 0, NULL, &markerEvent );
+ error = clEnqueueMarkerWithWaitList(queue, 0, NULL, &markerEvent);
#else
- error = clEnqueueMarker( queue, &markerEvent );
+ error = clEnqueueMarker(queue, &markerEvent);
#endif
- test_error( error, "Unable to queue marker" );
+ test_error(error, "Unable to queue marker");
/* Now we wait for it to be done, then test the status again */
- error = clWaitForEvents( 1, &markerEvent );
- test_error( error, "Unable to wait for marker event" );
+ error = clWaitForEvents(1, &markerEvent);
+ test_error(error, "Unable to wait for marker event");
/* Check the status of the first event */
- error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Calling clGetEventInfo didn't work!" );
- if( status != CL_COMPLETE )
+ error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error, "Calling clGetEventInfo didn't work!");
+ if (status != CL_COMPLETE)
{
- log_error( "ERROR: Incorrect status returned from clGetEventInfo after event complete (%d:%s)\n", status, IGetStatusString( status ) );
+ log_error("ERROR: Incorrect status returned from clGetEventInfo after "
+ "event complete (%d:%s)\n",
+ status, IGetStatusString(status));
return -1;
}
@@ -602,85 +720,101 @@ int test_event_enqueue_marker( cl_device_id deviceID, cl_context context, cl_com
}
#ifdef CL_VERSION_1_2
-int test_event_enqueue_marker_with_event_list( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_enqueue_marker_with_event_list(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
+ SETUP_EVENT(context, queue);
+ cl_event event_list[3] = { NULL, NULL, NULL };
- cl_int status;
- SETUP_EVENT( context, queue );
- cl_event event_list[3]={ NULL, NULL, NULL};
+ size_t threads[1] = { 10 }, localThreads[1] = { 1 };
+ cl_uint event_count = 2;
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+ localThreads, 0, NULL, &event_list[0]);
+ test_error(error, " clEnqueueMarkerWithWaitList 1 ");
- size_t threads[1] = { 10 }, localThreads[1]={1};
- cl_uint event_count=2;
- error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[0]);
- test_error( error, " clEnqueueMarkerWithWaitList 1 " );
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+ localThreads, 0, NULL, &event_list[1]);
+ test_error(error, " clEnqueueMarkerWithWaitList 2");
- error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[1]);
- test_error( error, " clEnqueueMarkerWithWaitList 2" );
-
- error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, NULL);
- test_error( error, " clEnqueueMarkerWithWaitList 3" );
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+ localThreads, 0, NULL, NULL);
+ test_error(error, " clEnqueueMarkerWithWaitList 3");
// test the case event returned
- error =clEnqueueMarkerWithWaitList(queue, event_count, event_list, &event_list[2]);
- test_error( error, " clEnqueueMarkerWithWaitList " );
+ error = clEnqueueMarkerWithWaitList(queue, event_count, event_list,
+ &event_list[2]);
+ test_error(error, " clEnqueueMarkerWithWaitList ");
error = clReleaseEvent(event_list[0]);
error |= clReleaseEvent(event_list[1]);
- test_error( error, "clReleaseEvent" );
+ test_error(error, "clReleaseEvent");
- error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[0]);
- test_error( error, " clEnqueueMarkerWithWaitList 1 -1 " );
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+ localThreads, 0, NULL, &event_list[0]);
+ test_error(error, " clEnqueueMarkerWithWaitList 1 -1 ");
- error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[1]);
- test_error( error, " clEnqueueMarkerWithWaitList 2-2" );
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+ localThreads, 0, NULL, &event_list[1]);
+ test_error(error, " clEnqueueMarkerWithWaitList 2-2");
- // test the case event =NULL, caused [CL_INVALID_VALUE] : OpenCL Error : clEnqueueMarkerWithWaitList failed: event is a NULL value
- error =clEnqueueMarkerWithWaitList(queue, event_count, event_list, NULL);
- test_error( error, " clEnqueueMarkerWithWaitList " );
+ // test the case event =NULL, caused [CL_INVALID_VALUE] : OpenCL Error :
+ // clEnqueueMarkerWithWaitList failed: event is a NULL value
+ error = clEnqueueMarkerWithWaitList(queue, event_count, event_list, NULL);
+ test_error(error, " clEnqueueMarkerWithWaitList ");
error = clReleaseEvent(event_list[0]);
error |= clReleaseEvent(event_list[1]);
error |= clReleaseEvent(event_list[2]);
- test_error( error, "clReleaseEvent" );
+ test_error(error, "clReleaseEvent");
FINISH_EVENT(queue);
return 0;
}
-int test_event_enqueue_barrier_with_event_list( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_enqueue_barrier_with_event_list(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
+ SETUP_EVENT(context, queue);
+ cl_event event_list[3] = { NULL, NULL, NULL };
- cl_int status;
- SETUP_EVENT( context, queue );
- cl_event event_list[3]={ NULL, NULL, NULL};
-
- size_t threads[1] = { 10 }, localThreads[1]={1};
- cl_uint event_count=2;
- error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[0]);
- test_error( error, " clEnqueueBarrierWithWaitList 1 " );
+ size_t threads[1] = { 10 }, localThreads[1] = { 1 };
+ cl_uint event_count = 2;
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+ localThreads, 0, NULL, &event_list[0]);
+ test_error(error, " clEnqueueBarrierWithWaitList 1 ");
- error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[1]);
- test_error( error, " clEnqueueBarrierWithWaitList 2" );
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+ localThreads, 0, NULL, &event_list[1]);
+ test_error(error, " clEnqueueBarrierWithWaitList 2");
- error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, NULL);
- test_error( error, " clEnqueueBarrierWithWaitList 20" );
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+ localThreads, 0, NULL, NULL);
+ test_error(error, " clEnqueueBarrierWithWaitList 20");
// test the case event returned
- error =clEnqueueBarrierWithWaitList(queue, event_count, event_list, &event_list[2]);
- test_error( error, " clEnqueueBarrierWithWaitList " );
+ error = clEnqueueBarrierWithWaitList(queue, event_count, event_list,
+ &event_list[2]);
+ test_error(error, " clEnqueueBarrierWithWaitList ");
clReleaseEvent(event_list[0]);
clReleaseEvent(event_list[1]);
- error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[0]);
- test_error( error, " clEnqueueBarrierWithWaitList 1 " );
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+ localThreads, 0, NULL, &event_list[0]);
+ test_error(error, " clEnqueueBarrierWithWaitList 1 ");
- error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[1]);
- test_error( error, " clEnqueueBarrierWithWaitList 2" );
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+ localThreads, 0, NULL, &event_list[1]);
+ test_error(error, " clEnqueueBarrierWithWaitList 2");
- // test the case event =NULL, caused [CL_INVALID_VALUE] : OpenCL Error : clEnqueueMarkerWithWaitList failed: event is a NULL value
- error = clEnqueueBarrierWithWaitList(queue, event_count, event_list, NULL);
- test_error( error, " clEnqueueBarrierWithWaitList " );
+ // test the case event =NULL, caused [CL_INVALID_VALUE] : OpenCL Error :
+ // clEnqueueMarkerWithWaitList failed: event is a NULL value
+ error = clEnqueueBarrierWithWaitList(queue, event_count, event_list, NULL);
+ test_error(error, " clEnqueueBarrierWithWaitList ");
clReleaseEvent(event_list[0]);
clReleaseEvent(event_list[1]);
diff --git a/test_conformance/events/test_userevents.cpp b/test_conformance/events/test_userevents.cpp
index 0a4954f9..1fdb4ea4 100644
--- a/test_conformance/events/test_userevents.cpp
+++ b/test_conformance/events/test_userevents.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -14,11 +14,11 @@
// limitations under the License.
//
#if defined(__APPLE__)
- #include <OpenCL/opencl.h>
- #include <mach/mach_time.h>
+#include <OpenCL/opencl.h>
+#include <mach/mach_time.h>
#else
- #include <CL/cl.h>
- #include <malloc.h>
+#include <CL/cl.h>
+#include <malloc.h>
#endif
#include <assert.h>
#include <stdio.h>
@@ -29,189 +29,261 @@
// CL error checking.
#if defined(_MSC_VER)
-#define CL_EXIT_ERROR(cmd,...) \
-{ \
-if ((cmd) != CL_SUCCESS) { \
-log_error("CL ERROR: %s %u: ", __FILE__,__LINE__);\
-log_error(## __VA_ARGS__ );\
-log_error("\n");\
-return -1;\
-}\
-}
+#define CL_EXIT_ERROR(cmd, ...) \
+ { \
+ if ((cmd) != CL_SUCCESS) \
+ { \
+ log_error("CL ERROR: %s %u: ", __FILE__, __LINE__); \
+ log_error(##__VA_ARGS__); \
+ log_error("\n"); \
+ return -1; \
+ } \
+ }
#else
-#define CL_EXIT_ERROR(cmd,format,...) \
-{ \
-if ((cmd) != CL_SUCCESS) { \
-log_error("CL ERROR: %s %u: ", __FILE__,__LINE__);\
-log_error(format,## __VA_ARGS__ );\
-log_error("\n");\
-return -1;\
-}\
-}
-#endif
-
-#define CL_EXIT_BUILD_ERROR(cmd,program,format,...) \
-{ \
-if ((cmd) != CL_SUCCESS) { \
-cl_uint num_devices_;\
-clGetProgramInfo(program,CL_PROGRAM_NUM_DEVICES,sizeof(num_devices_),&num_devices_,NULL);\
-cl_device_id *device_list;\
-device_list=(cl_device_id *)malloc(num_devices_*sizeof(cl_device_id));\
-clGetProgramInfo(program,CL_PROGRAM_DEVICES,num_devices_*sizeof(cl_device_id),device_list,NULL);\
-for (unsigned i=0;i<num_devices_;++i) {\
-size_t len;\
-char buffer[2048];\
-clGetProgramBuildInfo(program,device_list[i],CL_PROGRAM_BUILD_LOG,sizeof(buffer),buffer,&len);\
-log_error("DEVICE %u CL BUILD ERROR: %s(%u): ",i,__FILE__,__LINE__);\
-log_error(format,## __VA_ARGS__ );\
-log_error("\n");\
-}\
-free(device_list);\
-return -1;\
-}\
-}
-
-const char* src[] = {
- "__kernel void simple_task(__global float* output) {\n"
- " output[0] += 1;\n"
- "}\n"
-};
-
-enum { MaxDevices = 8 };
-
-int test_userevents( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-
- cl_int err;
-
- cl_event u1 = clCreateUserEvent( context, &err );
- CL_EXIT_ERROR(err,"clCreateUserEvent failed");
-
- // Test event properties.
- cl_int s;
- size_t sizeofs;
- CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof s, &s, &sizeofs),"clGetEventInfo failed");
- CL_EXIT_ERROR((sizeof s == sizeofs) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong size for CL_EVENT_COMMAND_EXECUTION_STATUS");
- CL_EXIT_ERROR((s == CL_SUBMITTED) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong value for CL_EVENT_COMMAND_EXECUTION_STATUS");
-
- cl_command_type t;
- size_t sizeoft;
- CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_COMMAND_TYPE, sizeof t, &t, &sizeoft),"clGetEventInfo failed");
- CL_EXIT_ERROR((sizeof t == sizeoft) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong size for CL_EVENT_COMMAND_TYPE");
- CL_EXIT_ERROR((t == CL_COMMAND_USER) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong value for CL_EVENT_COMMAND_TYPE");
-
- cl_command_queue q;
- size_t sizeofq;
- CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_COMMAND_QUEUE, sizeof q, &q, &sizeofq),"clGetEventInfo failed");
- CL_EXIT_ERROR((sizeof q == sizeofq) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong size for CL_EVENT_COMMAND_QUEUE");
- CL_EXIT_ERROR((q == NULL) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong value for CL_EVENT_COMMAND_QUEUE");
-
- cl_context c;
- size_t sizeofc;
- CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_CONTEXT, sizeof c, &c, &sizeofc),"clGetEventInfo failed");
- CL_EXIT_ERROR((sizeof c == sizeofc) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong size for CL_EVENT_CONTEXT");
- CL_EXIT_ERROR((c == context) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong value for CL_EVENT_CONTEXT");
-
- cl_ulong p;
- err = clGetEventProfilingInfo(u1,CL_PROFILING_COMMAND_QUEUED,sizeof p,&p,0);
- CL_EXIT_ERROR((err != CL_SUCCESS) ? CL_SUCCESS : -1,"clGetEventProfilingInfo returned wrong error.");
-
- // Test semantics.
- cl_program program;
- err = create_single_kernel_helper_create_program(context, &program, 1, src);
- CL_EXIT_ERROR(err,"clCreateProgramWithSource failed");
-
- CL_EXIT_BUILD_ERROR(clBuildProgram(program,0,NULL,"",NULL,NULL),program,"Building program from inline src:\t%s",src[0]);
-
- cl_kernel k0 = clCreateKernel(program,"simple_task",&err);
- CL_EXIT_ERROR(err,"clCreateKernel failed");
-
- float buffer[1];
- cl_mem output = clCreateBuffer(context,CL_MEM_USE_HOST_PTR,sizeof buffer, buffer, &err);
- CL_EXIT_ERROR(err,"clCreateBuffer failed.");
-
- CL_EXIT_ERROR(clSetKernelArg(k0,0,sizeof(output),&output),"clSetKernelArg failed");
-
-
- // Successful case. //////////////////////////////////////////////////////////////////////////////////////
- {
- cl_event e[4];
- cl_uint N = sizeof e / sizeof(cl_event);
-
- log_info("Enqueuing tasks\n");
- for (cl_uint i = 0; i != N; ++i)
- CL_EXIT_ERROR(clEnqueueTask(queue,k0,1,&u1,&e[i]),"clEnqueueTaskFailed");
-
- log_info("Checking task status before setting user event status\n");
- for (cl_uint i = 0; i != N; ++i) {
- CL_EXIT_ERROR(clGetEventInfo(e[i],CL_EVENT_COMMAND_EXECUTION_STATUS,sizeof s,&s,0),"clGetEventInfo failed");
- CL_EXIT_ERROR((s >= CL_SUBMITTED) ? CL_SUCCESS : -1,"clGetEventInfo %u returned wrong status before user event",i);
+#define CL_EXIT_ERROR(cmd, format, ...) \
+ { \
+ if ((cmd) != CL_SUCCESS) \
+ { \
+ log_error("CL ERROR: %s %u: ", __FILE__, __LINE__); \
+ log_error(format, ##__VA_ARGS__); \
+ log_error("\n"); \
+ return -1; \
+ } \
}
+#endif
- log_info("Setting user event status to complete\n");
- CL_EXIT_ERROR(clSetUserEventStatus(u1,CL_COMPLETE),"clSetUserEventStatus failed");
-
- log_info("Waiting for tasks to finish executing\n");
- CL_EXIT_ERROR(clWaitForEvents( 1, &e[N-1] ),"clWaitForEvent failed");
-
- log_info("Checking task status after setting user event status\n");
- for (cl_uint i = 0; i != N; ++i) {
- CL_EXIT_ERROR(clGetEventInfo(e[i],CL_EVENT_COMMAND_EXECUTION_STATUS,sizeof s,&s,0),"clGetEventInfo failed");
- CL_EXIT_ERROR((s != CL_QUEUED) ? CL_SUCCESS : -1,"clGetEventInfo %u returned wrong status %04x after successful user event",i,s);
+#define CL_EXIT_BUILD_ERROR(cmd, program, format, ...) \
+ { \
+ if ((cmd) != CL_SUCCESS) \
+ { \
+ cl_uint num_devices_; \
+ clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, \
+ sizeof(num_devices_), &num_devices_, NULL); \
+ cl_device_id *device_list; \
+ device_list = \
+ (cl_device_id *)malloc(num_devices_ * sizeof(cl_device_id)); \
+ clGetProgramInfo(program, CL_PROGRAM_DEVICES, \
+ num_devices_ * sizeof(cl_device_id), device_list, \
+ NULL); \
+ for (unsigned i = 0; i < num_devices_; ++i) \
+ { \
+ size_t len; \
+ char buffer[2048]; \
+ clGetProgramBuildInfo(program, device_list[i], \
+ CL_PROGRAM_BUILD_LOG, sizeof(buffer), \
+ buffer, &len); \
+ log_error("DEVICE %u CL BUILD ERROR: %s(%u): ", i, __FILE__, \
+ __LINE__); \
+ log_error(format, ##__VA_ARGS__); \
+ log_error("\n"); \
+ } \
+ free(device_list); \
+ return -1; \
+ } \
}
- CL_EXIT_ERROR(clReleaseEvent(u1),"clReleaseEvent failed");
-
- for (cl_uint i = 0; i != N; ++i)
- CL_EXIT_ERROR(clReleaseEvent(e[i]),"clReleaseEvent failed");
-
- log_info("Successful user event case passed.\n");
-
- }
+const char *src[] = { "__kernel void simple_task(__global float* output) {\n"
+ " output[0] += 1;\n"
+ "}\n" };
- // Test unsuccessful user event case. ///////////////////////////////////////////////////////////////////
- {
- cl_event u2 = clCreateUserEvent( context, &err );
- CL_EXIT_ERROR(err,"clCreateUserEvent failed");
-
- cl_event e[4];
- cl_uint N = sizeof e / sizeof(cl_event);
+enum
+{
+ MaxDevices = 8
+};
- log_info("Enqueuing tasks\n");
- for (cl_uint i = 0; i != N; ++i)
- CL_EXIT_ERROR(clEnqueueTask(queue,k0,1,&u2,&e[i]),"clEnqueueTaskFailed");
+int test_userevents(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
- log_info("Checking task status before setting user event status\n");
- for (cl_uint i = 0; i != N; ++i) {
- CL_EXIT_ERROR(clGetEventInfo(e[i],CL_EVENT_COMMAND_EXECUTION_STATUS,sizeof s,&s,0),"clGetEventInfo failed");
- CL_EXIT_ERROR((s == CL_QUEUED || s == CL_SUBMITTED) ? CL_SUCCESS : -1,"clGetEventInfo %u returned wrong status %d before user event",i, (int) s);
+ cl_int err;
+
+ cl_event u1 = clCreateUserEvent(context, &err);
+ CL_EXIT_ERROR(err, "clCreateUserEvent failed");
+
+ // Test event properties.
+ cl_int s;
+ size_t sizeofs;
+ CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof s, &s, &sizeofs),
+ "clGetEventInfo failed");
+ CL_EXIT_ERROR((sizeof s == sizeofs) ? CL_SUCCESS : -1,
+ "clGetEventInfo returned wrong size for "
+ "CL_EVENT_COMMAND_EXECUTION_STATUS");
+ CL_EXIT_ERROR((s == CL_SUBMITTED) ? CL_SUCCESS : -1,
+ "clGetEventInfo returned wrong value for "
+ "CL_EVENT_COMMAND_EXECUTION_STATUS");
+
+ cl_command_type t;
+ size_t sizeoft;
+ CL_EXIT_ERROR(
+ clGetEventInfo(u1, CL_EVENT_COMMAND_TYPE, sizeof t, &t, &sizeoft),
+ "clGetEventInfo failed");
+ CL_EXIT_ERROR(
+ (sizeof t == sizeoft) ? CL_SUCCESS : -1,
+ "clGetEventInfo returned wrong size for CL_EVENT_COMMAND_TYPE");
+ CL_EXIT_ERROR(
+ (t == CL_COMMAND_USER) ? CL_SUCCESS : -1,
+ "clGetEventInfo returned wrong value for CL_EVENT_COMMAND_TYPE");
+
+ cl_command_queue q;
+ size_t sizeofq;
+ CL_EXIT_ERROR(
+ clGetEventInfo(u1, CL_EVENT_COMMAND_QUEUE, sizeof q, &q, &sizeofq),
+ "clGetEventInfo failed");
+ CL_EXIT_ERROR(
+ (sizeof q == sizeofq) ? CL_SUCCESS : -1,
+ "clGetEventInfo returned wrong size for CL_EVENT_COMMAND_QUEUE");
+ CL_EXIT_ERROR(
+ (q == NULL) ? CL_SUCCESS : -1,
+ "clGetEventInfo returned wrong value for CL_EVENT_COMMAND_QUEUE");
+
+ cl_context c;
+ size_t sizeofc;
+ CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_CONTEXT, sizeof c, &c, &sizeofc),
+ "clGetEventInfo failed");
+ CL_EXIT_ERROR((sizeof c == sizeofc) ? CL_SUCCESS : -1,
+ "clGetEventInfo returned wrong size for CL_EVENT_CONTEXT");
+ CL_EXIT_ERROR((c == context) ? CL_SUCCESS : -1,
+ "clGetEventInfo returned wrong value for CL_EVENT_CONTEXT");
+
+ cl_ulong p;
+ err = clGetEventProfilingInfo(u1, CL_PROFILING_COMMAND_QUEUED, sizeof p, &p,
+ 0);
+ CL_EXIT_ERROR((err != CL_SUCCESS) ? CL_SUCCESS : -1,
+ "clGetEventProfilingInfo returned wrong error.");
+
+ // Test semantics.
+ cl_program program;
+ err = create_single_kernel_helper_create_program(context, &program, 1, src);
+ CL_EXIT_ERROR(err, "clCreateProgramWithSource failed");
+
+ CL_EXIT_BUILD_ERROR(clBuildProgram(program, 0, NULL, "", NULL, NULL),
+ program, "Building program from inline src:\t%s",
+ src[0]);
+
+ cl_kernel k0 = clCreateKernel(program, "simple_task", &err);
+ CL_EXIT_ERROR(err, "clCreateKernel failed");
+
+ float buffer[1];
+ cl_mem output = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof buffer,
+ buffer, &err);
+ CL_EXIT_ERROR(err, "clCreateBuffer failed.");
+
+ CL_EXIT_ERROR(clSetKernelArg(k0, 0, sizeof(output), &output),
+ "clSetKernelArg failed");
+
+
+ // Successful case.
+ // //////////////////////////////////////////////////////////////////////////////////////
+ {
+ cl_event e[4];
+ cl_uint N = sizeof e / sizeof(cl_event);
+
+ log_info("Enqueuing tasks\n");
+ for (cl_uint i = 0; i != N; ++i)
+ CL_EXIT_ERROR(clEnqueueTask(queue, k0, 1, &u1, &e[i]),
+ "clEnqueueTaskFailed");
+
+ log_info("Checking task status before setting user event status\n");
+ for (cl_uint i = 0; i != N; ++i)
+ {
+ CL_EXIT_ERROR(clGetEventInfo(e[i],
+ CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof s, &s, 0),
+ "clGetEventInfo failed");
+ CL_EXIT_ERROR(
+ (s >= CL_SUBMITTED) ? CL_SUCCESS : -1,
+ "clGetEventInfo %u returned wrong status before user event", i);
+ }
+
+ log_info("Setting user event status to complete\n");
+ CL_EXIT_ERROR(clSetUserEventStatus(u1, CL_COMPLETE),
+ "clSetUserEventStatus failed");
+
+ log_info("Waiting for tasks to finish executing\n");
+ CL_EXIT_ERROR(clWaitForEvents(1, &e[N - 1]), "clWaitForEvent failed");
+
+ log_info("Checking task status after setting user event status\n");
+ for (cl_uint i = 0; i != N; ++i)
+ {
+ CL_EXIT_ERROR(clGetEventInfo(e[i],
+ CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof s, &s, 0),
+ "clGetEventInfo failed");
+ CL_EXIT_ERROR((s != CL_QUEUED) ? CL_SUCCESS : -1,
+ "clGetEventInfo %u returned wrong status %04x after "
+ "successful user event",
+ i, s);
+ }
+
+ CL_EXIT_ERROR(clReleaseEvent(u1), "clReleaseEvent failed");
+
+ for (cl_uint i = 0; i != N; ++i)
+ CL_EXIT_ERROR(clReleaseEvent(e[i]), "clReleaseEvent failed");
+
+ log_info("Successful user event case passed.\n");
}
- log_info("Setting user event status to unsuccessful result\n");
- CL_EXIT_ERROR(clSetUserEventStatus(u2,-1),"clSetUserEventStatus failed");
-
- log_info("Waiting for tasks to finish executing\n");
- CL_EXIT_ERROR((clWaitForEvents( N, &e[0] )!=CL_SUCCESS) ? CL_SUCCESS : -1,"clWaitForEvent succeeded when it should have failed");
-
- log_info("Checking task status after setting user event status\n");
- for (cl_uint i = 0; i != N; ++i) {
- CL_EXIT_ERROR(clGetEventInfo(e[i],CL_EVENT_COMMAND_EXECUTION_STATUS,sizeof s,&s,0),"clGetEventInfo failed");
- CL_EXIT_ERROR((s != CL_QUEUED) ? CL_SUCCESS : -1,"clGetEventInfo %u returned wrong status %04x after unsuccessful user event",i,s);
+ // Test unsuccessful user event case.
+ // ///////////////////////////////////////////////////////////////////
+ {
+ cl_event u2 = clCreateUserEvent(context, &err);
+ CL_EXIT_ERROR(err, "clCreateUserEvent failed");
+
+ cl_event e[4];
+ cl_uint N = sizeof e / sizeof(cl_event);
+
+ log_info("Enqueuing tasks\n");
+ for (cl_uint i = 0; i != N; ++i)
+ CL_EXIT_ERROR(clEnqueueTask(queue, k0, 1, &u2, &e[i]),
+ "clEnqueueTaskFailed");
+
+ log_info("Checking task status before setting user event status\n");
+ for (cl_uint i = 0; i != N; ++i)
+ {
+ CL_EXIT_ERROR(clGetEventInfo(e[i],
+ CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof s, &s, 0),
+ "clGetEventInfo failed");
+ CL_EXIT_ERROR(
+ (s == CL_QUEUED || s == CL_SUBMITTED) ? CL_SUCCESS : -1,
+ "clGetEventInfo %u returned wrong status %d before user event",
+ i, (int)s);
+ }
+
+ log_info("Setting user event status to unsuccessful result\n");
+ CL_EXIT_ERROR(clSetUserEventStatus(u2, -1),
+ "clSetUserEventStatus failed");
+
+ log_info("Waiting for tasks to finish executing\n");
+ CL_EXIT_ERROR((clWaitForEvents(N, &e[0]) != CL_SUCCESS) ? CL_SUCCESS
+ : -1,
+ "clWaitForEvent succeeded when it should have failed");
+
+ log_info("Checking task status after setting user event status\n");
+ for (cl_uint i = 0; i != N; ++i)
+ {
+ CL_EXIT_ERROR(clGetEventInfo(e[i],
+ CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof s, &s, 0),
+ "clGetEventInfo failed");
+ CL_EXIT_ERROR((s != CL_QUEUED) ? CL_SUCCESS : -1,
+ "clGetEventInfo %u returned wrong status %04x after "
+ "unsuccessful user event",
+ i, s);
+ }
+
+ CL_EXIT_ERROR(clReleaseEvent(u2), "clReleaseEvent failed");
+
+ for (cl_uint i = 0; i != N; ++i)
+ CL_EXIT_ERROR(clReleaseEvent(e[i]), "clReleaseEvent failed");
+
+ log_info("Unsuccessful user event case passed.\n");
}
- CL_EXIT_ERROR(clReleaseEvent(u2),"clReleaseEvent failed");
-
- for (cl_uint i = 0; i != N; ++i)
- CL_EXIT_ERROR(clReleaseEvent(e[i]),"clReleaseEvent failed");
-
- log_info("Unsuccessful user event case passed.\n");
- }
-
- clReleaseKernel(k0);
- clReleaseProgram(program);
- clReleaseMemObject(output);
-
- return 0;
+ clReleaseKernel(k0);
+ clReleaseProgram(program);
+ clReleaseMemObject(output);
+ return 0;
}
-
diff --git a/test_conformance/events/test_userevents_multithreaded.cpp b/test_conformance/events/test_userevents_multithreaded.cpp
index 51ef2226..a7845bf1 100644
--- a/test_conformance/events/test_userevents_multithreaded.cpp
+++ b/test_conformance/events/test_userevents_multithreaded.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -19,8 +19,8 @@
#include <thread>
-#if !defined (_MSC_VER)
- #include <unistd.h>
+#if !defined(_MSC_VER)
+#include <unistd.h>
#endif // !_MSC_VER
void trigger_user_event(cl_event *event)
@@ -30,44 +30,44 @@ void trigger_user_event(cl_event *event)
clSetUserEventStatus(*event, CL_COMPLETE);
}
-int test_userevents_multithreaded( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+int test_userevents_multithreaded(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_int error;
// Set up a user event to act as a gate
- clEventWrapper gateEvent = clCreateUserEvent( context, &error );
- test_error( error, "Unable to create user gate event" );
+ clEventWrapper gateEvent = clCreateUserEvent(context, &error);
+ test_error(error, "Unable to create user gate event");
// Set up a few actions gated on the user event
NDRangeKernelAction action1;
ReadBufferAction action2;
WriteBufferAction action3;
- clEventWrapper actionEvents[ 3 ];
- Action * actions[] = { &action1, &action2, &action3, NULL };
+ clEventWrapper actionEvents[3];
+ Action *actions[] = { &action1, &action2, &action3, NULL };
- for( int i = 0; actions[ i ] != NULL; i++ )
+ for (int i = 0; actions[i] != NULL; i++)
{
- error = actions[ i ]->Setup( deviceID, context, queue );
- test_error( error, "Unable to set up test action" );
+ error = actions[i]->Setup(deviceID, context, queue);
+ test_error(error, "Unable to set up test action");
- error = actions[ i ]->Execute( queue, 1, &gateEvent, &actionEvents[ i ] );
- test_error( error, "Unable to execute test action" );
+ error = actions[i]->Execute(queue, 1, &gateEvent, &actionEvents[i]);
+ test_error(error, "Unable to execute test action");
}
// Now, instead of releasing the gate, we spawn a separate thread to do so
- log_info( "\tStarting trigger thread...\n" );
+ log_info("\tStarting trigger thread...\n");
std::thread thread(trigger_user_event, &gateEvent);
- log_info( "\tWaiting for actions...\n" );
- error = clWaitForEvents( 3, &actionEvents[ 0 ] );
- test_error( error, "Unable to wait for action events" );
+ log_info("\tWaiting for actions...\n");
+ error = clWaitForEvents(3, &actionEvents[0]);
+ test_error(error, "Unable to wait for action events");
thread.join();
- log_info( "\tActions completed.\n" );
+ log_info("\tActions completed.\n");
// If we got here without error, we're good
return 0;
}
-
diff --git a/test_conformance/events/test_waitlists.cpp b/test_conformance/events/test_waitlists.cpp
index e23cacf4..6036451f 100644
--- a/test_conformance/events/test_waitlists.cpp
+++ b/test_conformance/events/test_waitlists.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -17,306 +17,374 @@
#include "action_classes.h"
-extern const char *IGetStatusString( cl_int status );
+extern const char *IGetStatusString(cl_int status);
#define PRINT_OPS 0
-int test_waitlist( cl_device_id device, cl_context context, cl_command_queue queue, Action *actionToTest, bool multiple )
+int test_waitlist(cl_device_id device, cl_context context,
+ cl_command_queue queue, Action *actionToTest, bool multiple)
{
- NDRangeKernelAction actions[ 2 ];
- clEventWrapper events[ 3 ];
- cl_int status[ 3 ];
+ NDRangeKernelAction actions[2];
+ clEventWrapper events[3];
+ cl_int status[3];
cl_int error;
- if (multiple)
- log_info("\tExecuting reference event 0, then reference event 1 with reference event 0 in its waitlist, then test event 2 with reference events 0 and 1 in its waitlist.\n");
- else
- log_info("\tExecuting reference event 0, then test event 2 with reference event 0 in its waitlist.\n");
+ if (multiple)
+ log_info("\tExecuting reference event 0, then reference event 1 with "
+ "reference event 0 in its waitlist, then test event 2 with "
+ "reference events 0 and 1 in its waitlist.\n");
+ else
+ log_info("\tExecuting reference event 0, then test event 2 with "
+ "reference event 0 in its waitlist.\n");
// Set up the first base action to wait against
- error = actions[ 0 ].Setup( device, context, queue );
- test_error( error, "Unable to setup base event to wait against" );
+ error = actions[0].Setup(device, context, queue);
+ test_error(error, "Unable to setup base event to wait against");
- if( multiple )
+ if (multiple)
{
// Set up a second event to wait against
- error = actions[ 1 ].Setup( device, context, queue );
- test_error( error, "Unable to setup second base event to wait against" );
+ error = actions[1].Setup(device, context, queue);
+ test_error(error, "Unable to setup second base event to wait against");
}
// Now set up the actual action to test
- error = actionToTest->Setup( device, context, queue );
- test_error( error, "Unable to set up test event" );
+ error = actionToTest->Setup(device, context, queue);
+ test_error(error, "Unable to set up test event");
// Execute all events now
- if (PRINT_OPS) log_info("\tExecuting action 0...\n");
- error = actions[ 0 ].Execute( queue, 0, NULL, &events[ 0 ] );
- test_error( error, "Unable to execute first event" );
+ if (PRINT_OPS) log_info("\tExecuting action 0...\n");
+ error = actions[0].Execute(queue, 0, NULL, &events[0]);
+ test_error(error, "Unable to execute first event");
- if( multiple )
+ if (multiple)
{
- if (PRINT_OPS) log_info("\tExecuting action 1...\n");
- error = actions[ 1 ].Execute( queue, 1, &events[0], &events[ 1 ] );
- test_error( error, "Unable to execute second event" );
+ if (PRINT_OPS) log_info("\tExecuting action 1...\n");
+ error = actions[1].Execute(queue, 1, &events[0], &events[1]);
+ test_error(error, "Unable to execute second event");
}
// Sanity check
- if( multiple ) {
- if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
- error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL );
- test_error( error, "Unable to get event status" );
- }
- if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
- error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL );
- test_error( error, "Unable to get event status" );
-
- log_info("\t\tEvent status after starting reference events: reference event 0: %s, reference event 1: %s, test event 2: %s.\n",
- IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), "N/A");
-
- if( ( status[ 0 ] == CL_COMPLETE ) || ( multiple && status[ 1 ] == CL_COMPLETE ) )
+ if (multiple)
+ {
+ if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
+ error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status[1]), &status[1], NULL);
+ test_error(error, "Unable to get event status");
+ }
+ if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
+ error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status[0]), &status[0], NULL);
+ test_error(error, "Unable to get event status");
+
+ log_info("\t\tEvent status after starting reference events: reference "
+ "event 0: %s, reference event 1: %s, test event 2: %s.\n",
+ IGetStatusString(status[0]),
+ (multiple ? IGetStatusString(status[1]) : "N/A"), "N/A");
+
+ if ((status[0] == CL_COMPLETE) || (multiple && status[1] == CL_COMPLETE))
{
- log_info( "WARNING: Reference event(s) already completed before we could execute test event! Possible that the reference event blocked (implicitly passing)\n" );
+ log_info("WARNING: Reference event(s) already completed before we "
+ "could execute test event! Possible that the reference event "
+ "blocked (implicitly passing)\n");
return 0;
}
- if (PRINT_OPS) log_info("\tExecuting action to test...\n");
- error = actionToTest->Execute( queue, ( multiple ) ? 2 : 1, &events[ 0 ], &events[ 2 ] );
- test_error( error, "Unable to execute test event" );
+ if (PRINT_OPS) log_info("\tExecuting action to test...\n");
+ error = actionToTest->Execute(queue, (multiple) ? 2 : 1, &events[0],
+ &events[2]);
+ test_error(error, "Unable to execute test event");
// Hopefully, the first event is still running
- if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
- error = clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL );
- test_error( error, "Unable to get event status" );
- if( multiple ) {
- if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
- error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL );
- test_error( error, "Unable to get event status" );
- }
- if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
- error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL );
- test_error( error, "Unable to get event status" );
-
- log_info("\t\tEvent status after starting test event: reference event 0: %s, reference event 1: %s, test event 2: %s.\n",
- IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), IGetStatusString( status[ 2 ] ));
-
- if( multiple )
+ if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
+ error = clGetEventInfo(events[2], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status[2]), &status[2], NULL);
+ test_error(error, "Unable to get event status");
+ if (multiple)
+ {
+ if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
+ error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status[1]), &status[1], NULL);
+ test_error(error, "Unable to get event status");
+ }
+ if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
+ error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status[0]), &status[0], NULL);
+ test_error(error, "Unable to get event status");
+
+ log_info("\t\tEvent status after starting test event: reference event 0: "
+ "%s, reference event 1: %s, test event 2: %s.\n",
+ IGetStatusString(status[0]),
+ (multiple ? IGetStatusString(status[1]) : "N/A"),
+ IGetStatusString(status[2]));
+
+ if (multiple)
{
- if( status[ 0 ] == CL_COMPLETE && status[ 1 ] == CL_COMPLETE )
+ if (status[0] == CL_COMPLETE && status[1] == CL_COMPLETE)
{
- log_info( "WARNING: Both events completed, so unable to test further (implicitly passing).\n" );
- clFinish( queue );
+ log_info("WARNING: Both events completed, so unable to test "
+ "further (implicitly passing).\n");
+ clFinish(queue);
return 0;
}
- if(status[1] == CL_COMPLETE && status[0] != CL_COMPLETE)
- {
- log_error("ERROR: Test failed because the second wait event is complete and the first is not.(status: 0: %s and 1: %s)\n", IGetStatusString( status[ 0 ] ), IGetStatusString( status[ 1 ] ) );
- clFinish( queue );
+ if (status[1] == CL_COMPLETE && status[0] != CL_COMPLETE)
+ {
+ log_error(
+ "ERROR: Test failed because the second wait event is complete "
+ "and the first is not.(status: 0: %s and 1: %s)\n",
+ IGetStatusString(status[0]), IGetStatusString(status[1]));
+ clFinish(queue);
return -1;
- }
+ }
}
else
{
- if( status[ 0 ] == CL_COMPLETE )
+ if (status[0] == CL_COMPLETE)
{
- log_info( "WARNING: Reference event completed, so unable to test further (implicitly passing).\n" );
- clFinish( queue );
+ log_info("WARNING: Reference event completed, so unable to test "
+ "further (implicitly passing).\n");
+ clFinish(queue);
return 0;
}
- if( status[ 0 ] != CL_RUNNING && status[ 0 ] != CL_QUEUED && status[ 0 ] != CL_SUBMITTED )
+ if (status[0] != CL_RUNNING && status[0] != CL_QUEUED
+ && status[0] != CL_SUBMITTED)
{
- log_error( "ERROR: Test failed because first wait event is not currently running, queued, or submitted! (status: 0: %s)\n", IGetStatusString( status[ 0 ] ) );
- clFinish( queue );
+ log_error(
+ "ERROR: Test failed because first wait event is not currently "
+ "running, queued, or submitted! (status: 0: %s)\n",
+ IGetStatusString(status[0]));
+ clFinish(queue);
return -1;
}
}
- if( status[ 2 ] != CL_QUEUED && status[ 2 ] != CL_SUBMITTED )
+ if (status[2] != CL_QUEUED && status[2] != CL_SUBMITTED)
{
- log_error( "ERROR: Test event is not waiting to run! (status: 2: %s)\n", IGetStatusString( status[ 2 ] ) );
- clFinish( queue );
+ log_error("ERROR: Test event is not waiting to run! (status: 2: %s)\n",
+ IGetStatusString(status[2]));
+ clFinish(queue);
return -1;
}
// Now wait for the first reference event
- if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n");
- error = clWaitForEvents( 1, &events[ 0 ] );
- test_error( error, "Unable to wait for reference event" );
+ if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n");
+ error = clWaitForEvents(1, &events[0]);
+ test_error(error, "Unable to wait for reference event");
// Grab statuses again
- if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
- error = clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL );
- test_error( error, "Unable to get event status" );
- if( multiple ) {
- if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
- error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL );
- test_error( error, "Unable to get event status" );
- }
- if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
- error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL );
- test_error( error, "Unable to get event status" );
-
- log_info("\t\tEvent status after waiting for reference event 0: reference event 0: %s, reference event 1: %s, test event 2: %s.\n",
- IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), IGetStatusString( status[ 2 ] ));
+ if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
+ error = clGetEventInfo(events[2], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status[2]), &status[2], NULL);
+ test_error(error, "Unable to get event status");
+ if (multiple)
+ {
+ if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
+ error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status[1]), &status[1], NULL);
+ test_error(error, "Unable to get event status");
+ }
+ if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
+ error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status[0]), &status[0], NULL);
+ test_error(error, "Unable to get event status");
+
+ log_info("\t\tEvent status after waiting for reference event 0: reference "
+ "event 0: %s, reference event 1: %s, test event 2: %s.\n",
+ IGetStatusString(status[0]),
+ (multiple ? IGetStatusString(status[1]) : "N/A"),
+ IGetStatusString(status[2]));
// Sanity
- if( status[ 0 ] != CL_COMPLETE )
+ if (status[0] != CL_COMPLETE)
{
- log_error( "ERROR: Waited for first event but it's not complete (status: 0: %s)\n", IGetStatusString( status[ 0 ] ) );
- clFinish( queue );
+ log_error("ERROR: Waited for first event but it's not complete "
+ "(status: 0: %s)\n",
+ IGetStatusString(status[0]));
+ clFinish(queue);
return -1;
}
- // If we're multiple, and the second event isn't complete, then our test event should still be queued
- if( multiple && status[ 1 ] != CL_COMPLETE )
+ // If we're multiple, and the second event isn't complete, then our test
+ // event should still be queued
+ if (multiple && status[1] != CL_COMPLETE)
{
- if( status[ 1 ] == CL_RUNNING && status[ 2 ] == CL_RUNNING ) {
- log_error("ERROR: Test event and second event are both running.\n");
- clFinish( queue );
- return -1;
- }
- if( status[ 2 ] != CL_QUEUED && status[ 2 ] != CL_SUBMITTED )
+ if (status[1] == CL_RUNNING && status[2] == CL_RUNNING)
+ {
+ log_error("ERROR: Test event and second event are both running.\n");
+ clFinish(queue);
+ return -1;
+ }
+ if (status[2] != CL_QUEUED && status[2] != CL_SUBMITTED)
{
- log_error( "ERROR: Test event did not wait for second event before starting! (status of ref: 1: %s, of test: 2: %s)\n", IGetStatusString( status[ 1 ] ), IGetStatusString( status[ 2 ] ) );
- clFinish( queue );
+ log_error("ERROR: Test event did not wait for second event before "
+ "starting! (status of ref: 1: %s, of test: 2: %s)\n",
+ IGetStatusString(status[1]), IGetStatusString(status[2]));
+ clFinish(queue);
return -1;
}
// Now wait for second event to complete, too
- if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n");
- error = clWaitForEvents( 1, &events[ 1 ] );
- test_error( error, "Unable to wait for second reference event" );
+ if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n");
+ error = clWaitForEvents(1, &events[1]);
+ test_error(error, "Unable to wait for second reference event");
// Grab statuses again
- if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
- error = clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL );
- test_error( error, "Unable to get event status" );
- if( multiple ) {
- if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
- error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL );
- test_error( error, "Unable to get event status" );
- }
- if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
- error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL );
- test_error( error, "Unable to get event status" );
-
- log_info("\t\tEvent status after waiting for reference event 1: reference event 0: %s, reference event 1: %s, test event 2: %s.\n",
- IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), IGetStatusString( status[ 2 ] ));
+ if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
+ error = clGetEventInfo(events[2], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status[2]), &status[2], NULL);
+ test_error(error, "Unable to get event status");
+ if (multiple)
+ {
+ if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
+ error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status[1]), &status[1], NULL);
+ test_error(error, "Unable to get event status");
+ }
+ if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
+ error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status[0]), &status[0], NULL);
+ test_error(error, "Unable to get event status");
+
+ log_info(
+ "\t\tEvent status after waiting for reference event 1: reference "
+ "event 0: %s, reference event 1: %s, test event 2: %s.\n",
+ IGetStatusString(status[0]),
+ (multiple ? IGetStatusString(status[1]) : "N/A"),
+ IGetStatusString(status[2]));
// Sanity
- if( status[ 1 ] != CL_COMPLETE )
+ if (status[1] != CL_COMPLETE)
{
- log_error( "ERROR: Waited for second reference event but it didn't complete (status: 1: %s)\n", IGetStatusString( status[ 1 ] ) );
- clFinish( queue );
+ log_error("ERROR: Waited for second reference event but it didn't "
+ "complete (status: 1: %s)\n",
+ IGetStatusString(status[1]));
+ clFinish(queue);
return -1;
}
}
- // At this point, the test event SHOULD be running, but if it completed, we consider it a pass
- if( status[ 2 ] == CL_COMPLETE )
+ // At this point, the test event SHOULD be running, but if it completed, we
+ // consider it a pass
+ if (status[2] == CL_COMPLETE)
{
- log_info( "WARNING: Test event already completed. Assumed valid.\n" );
- clFinish( queue );
+ log_info("WARNING: Test event already completed. Assumed valid.\n");
+ clFinish(queue);
return 0;
}
- if( status[ 2 ] != CL_RUNNING && status[ 2 ] != CL_SUBMITTED && status[ 2 ] != CL_QUEUED)
+ if (status[2] != CL_RUNNING && status[2] != CL_SUBMITTED
+ && status[2] != CL_QUEUED)
{
- log_error( "ERROR: Second event did not start running after reference event(s) completed! (status: 2: %s)\n", IGetStatusString( status[ 2 ] ) );
- clFinish( queue );
+ log_error("ERROR: Second event did not start running after reference "
+ "event(s) completed! (status: 2: %s)\n",
+ IGetStatusString(status[2]));
+ clFinish(queue);
return -1;
}
// Wait for the test event, then return
- if (PRINT_OPS) log_info("\tWaiting for action 2 to test to finish...\n");
- error = clWaitForEvents( 1, &events[ 2 ] );
- test_error( error, "Unable to wait for test event" );
+ if (PRINT_OPS) log_info("\tWaiting for action 2 to test to finish...\n");
+ error = clWaitForEvents(1, &events[2]);
+ test_error(error, "Unable to wait for test event");
- error |= clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL );
- test_error( error, "Unable to get event status" );
+ error |= clGetEventInfo(events[2], CL_EVENT_COMMAND_EXECUTION_STATUS,
+ sizeof(status[2]), &status[2], NULL);
+ test_error(error, "Unable to get event status");
- log_info("\t\tEvent status after waiting for test event: reference event 0: %s, reference event 1: %s, test event 2: %s.\n",
- IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), IGetStatusString( status[ 2 ] ));
+ log_info("\t\tEvent status after waiting for test event: reference event "
+ "0: %s, reference event 1: %s, test event 2: %s.\n",
+ IGetStatusString(status[0]),
+ (multiple ? IGetStatusString(status[1]) : "N/A"),
+ IGetStatusString(status[2]));
- // Sanity
- if( status[ 2 ] != CL_COMPLETE )
- {
- log_error( "ERROR: Test event didn't complete (status: 2: %s)\n", IGetStatusString( status[ 2 ] ) );
- clFinish( queue );
- return -1;
- }
+ // Sanity
+ if (status[2] != CL_COMPLETE)
+ {
+ log_error("ERROR: Test event didn't complete (status: 2: %s)\n",
+ IGetStatusString(status[2]));
+ clFinish(queue);
+ return -1;
+ }
- clFinish(queue);
+ clFinish(queue);
return 0;
}
-#define TEST_ACTION( name ) \
- { \
- name##Action action; \
- log_info( "-- Testing " #name " (waiting on 1 event)...\n" ); \
- if( ( error = test_waitlist( deviceID, context, queue, &action, false ) ) != CL_SUCCESS ) \
- retVal++; \
- clFinish( queue ); \
- } \
- if( error == CL_SUCCESS ) /* Only run multiples test if single test passed */ \
- { \
- name##Action action; \
- log_info( "-- Testing " #name " (waiting on 2 events)...\n" ); \
- if( ( error = test_waitlist( deviceID, context, queue, &action, true ) ) != CL_SUCCESS ) \
- retVal++; \
- clFinish( queue ); \
+#define TEST_ACTION(name) \
+ { \
+ name##Action action; \
+ log_info("-- Testing " #name " (waiting on 1 event)...\n"); \
+ if ((error = test_waitlist(deviceID, context, queue, &action, false)) \
+ != CL_SUCCESS) \
+ retVal++; \
+ clFinish(queue); \
+ } \
+ if (error \
+ == CL_SUCCESS) /* Only run multiples test if single test passed */ \
+ { \
+ name##Action action; \
+ log_info("-- Testing " #name " (waiting on 2 events)...\n"); \
+ if ((error = test_waitlist(deviceID, context, queue, &action, true)) \
+ != CL_SUCCESS) \
+ retVal++; \
+ clFinish(queue); \
}
-int test_waitlists( cl_device_id deviceID, cl_context context, cl_command_queue oldQueue, int num_elements )
+int test_waitlists(cl_device_id deviceID, cl_context context,
+ cl_command_queue oldQueue, int num_elements)
{
cl_int error;
int retVal = 0;
cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
- if( !checkDeviceForQueueSupport( deviceID, props ) )
+ if (!checkDeviceForQueueSupport(deviceID, props))
{
- log_info( "WARNING: Device does not support out-of-order exec mode; skipping test.\n" );
+ log_info("WARNING: Device does not support out-of-order exec mode; "
+ "skipping test.\n");
return 0;
}
- clCommandQueueWrapper queue = clCreateCommandQueue( context, deviceID, props, &error );
+ clCommandQueueWrapper queue =
+ clCreateCommandQueue(context, deviceID, props, &error);
test_error(error, "Unable to create out-of-order queue");
- log_info( "\n" );
+ log_info("\n");
- TEST_ACTION( NDRangeKernel )
+ TEST_ACTION(NDRangeKernel)
- TEST_ACTION( ReadBuffer )
- TEST_ACTION( WriteBuffer )
- TEST_ACTION( MapBuffer )
- TEST_ACTION( UnmapBuffer )
+ TEST_ACTION(ReadBuffer)
+ TEST_ACTION(WriteBuffer)
+ TEST_ACTION(MapBuffer)
+ TEST_ACTION(UnmapBuffer)
- if( checkForImageSupport( deviceID ) == CL_IMAGE_FORMAT_NOT_SUPPORTED )
+ if (checkForImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
{
- log_info( "\nNote: device does not support images. Skipping remainder of waitlist tests...\n" );
+ log_info("\nNote: device does not support images. Skipping remainder "
+ "of waitlist tests...\n");
}
else
{
- TEST_ACTION( ReadImage2D )
- TEST_ACTION( WriteImage2D )
- TEST_ACTION( CopyImage2Dto2D )
- TEST_ACTION( Copy2DImageToBuffer )
- TEST_ACTION( CopyBufferTo2DImage )
- TEST_ACTION( MapImage )
-
- if( checkFor3DImageSupport( deviceID ) == CL_IMAGE_FORMAT_NOT_SUPPORTED )
- log_info("Device does not support 3D images. Skipping remainder of waitlist tests...\n");
+ TEST_ACTION(ReadImage2D)
+ TEST_ACTION(WriteImage2D)
+ TEST_ACTION(CopyImage2Dto2D)
+ TEST_ACTION(Copy2DImageToBuffer)
+ TEST_ACTION(CopyBufferTo2DImage)
+ TEST_ACTION(MapImage)
+
+ if (checkFor3DImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+ log_info("Device does not support 3D images. Skipping remainder of "
+ "waitlist tests...\n");
else
{
- TEST_ACTION( ReadImage3D )
- TEST_ACTION( WriteImage3D )
- TEST_ACTION( CopyImage2Dto3D )
- TEST_ACTION( CopyImage3Dto2D )
- TEST_ACTION( CopyImage3Dto3D )
- TEST_ACTION( Copy3DImageToBuffer )
- TEST_ACTION( CopyBufferTo3DImage )
+ TEST_ACTION(ReadImage3D)
+ TEST_ACTION(WriteImage3D)
+ TEST_ACTION(CopyImage2Dto3D)
+ TEST_ACTION(CopyImage3Dto2D)
+ TEST_ACTION(CopyImage3Dto3D)
+ TEST_ACTION(Copy3DImageToBuffer)
+ TEST_ACTION(CopyBufferTo3DImage)
}
}
return retVal;
}
-
diff --git a/test_conformance/extensions/CMakeLists.txt b/test_conformance/extensions/CMakeLists.txt
index 53d77ee5..d95d29aa 100644
--- a/test_conformance/extensions/CMakeLists.txt
+++ b/test_conformance/extensions/CMakeLists.txt
@@ -1,2 +1,3 @@
add_subdirectory( cl_ext_cxx_for_opencl )
+add_subdirectory( cl_khr_command_buffer )
add_subdirectory( cl_khr_dx9_media_sharing )
diff --git a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
new file mode 100644
index 00000000..ac259f6d
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(MODULE_NAME CL_KHR_COMMAND_BUFFER)
+
+set(${MODULE_NAME}_SOURCES
+ main.cpp
+ basic_command_buffer.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
new file mode 100644
index 00000000..62a02d83
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
@@ -0,0 +1,588 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "command_buffer_test_base.h"
+#include "procs.h"
+#include "harness/typeWrappers.h"
+
+#include <algorithm>
+#include <cstring>
+#include <vector>
+
+#define CHECK_VERIFICATION_ERROR(reference, result, index) \
+ { \
+ if (reference != result) \
+ { \
+ log_error("Expected %d was %d at index %u\n", reference, result, \
+ index); \
+ return TEST_FAIL; \
+ } \
+ }
+
+namespace {
+
+// Helper test fixture for constructing OpenCL objects used in testing
+// a variety of simple command-buffer enqueue scenarios.
+struct BasicCommandBufferTest : CommandBufferTestBase
+{
+
+ BasicCommandBufferTest(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : CommandBufferTestBase(device), context(context), queue(queue),
+ command_buffer(this), simultaneous_use(false),
+ out_of_order_support(false), num_elements(0)
+ {}
+
+ virtual bool Skip()
+ {
+ cl_command_queue_properties required_properties;
+ cl_int error = clGetDeviceInfo(
+ device, CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR,
+ sizeof(required_properties), &required_properties, NULL);
+ test_error(error,
+ "Unable to query "
+ "CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR");
+
+ cl_command_queue_properties queue_properties;
+
+ error = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES,
+ sizeof(queue_properties),
+ &queue_properties, NULL);
+ test_error(error, "Unable to query CL_QUEUE_PROPERTIES");
+
+ // Skip if queue properties don't contain those required
+ return required_properties != (required_properties & queue_properties);
+ }
+
+ virtual cl_int SetUp(int elements)
+ {
+ cl_int error = init_extension_functions();
+ if (error != CL_SUCCESS)
+ {
+ return error;
+ }
+
+ // Query if device supports simultaneous use
+ cl_device_command_buffer_capabilities_khr capabilities;
+ error =
+ clGetDeviceInfo(device, CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR,
+ sizeof(capabilities), &capabilities, NULL);
+ test_error(error,
+ "Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR");
+ simultaneous_use =
+ capabilities & CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR;
+ out_of_order_support =
+ capabilities & CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR;
+
+ if (elements <= 0)
+ {
+ return CL_INVALID_VALUE;
+ }
+ num_elements = static_cast<size_t>(elements);
+
+ // Kernel performs a parallel copy from an input buffer to output buffer
+ // is created.
+ const char *kernel_str =
+ R"(
+ __kernel void copy(__global int* in, __global int* out) {
+ size_t id = get_global_id(0);
+ out[id] = in[id];
+ })";
+
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &kernel_str);
+ test_error(error, "Failed to create program with source");
+
+ error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
+ test_error(error, "Failed to build program");
+
+ in_mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
+ sizeof(cl_int) * num_elements, nullptr, &error);
+ test_error(error, "clCreateBuffer failed");
+
+ out_mem =
+ clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+ sizeof(cl_int) * num_elements, nullptr, &error);
+ test_error(error, "clCreateBuffer failed");
+
+ kernel = clCreateKernel(program, "copy", &error);
+ test_error(error, "Failed to create copy kernel");
+
+ error = clSetKernelArg(kernel, 0, sizeof(in_mem), &in_mem);
+ test_error(error, "clSetKernelArg failed");
+
+ error = clSetKernelArg(kernel, 1, sizeof(out_mem), &out_mem);
+ test_error(error, "clSetKernelArg failed");
+
+ if (simultaneous_use)
+ {
+ cl_command_buffer_properties_khr properties[3] = {
+ CL_COMMAND_BUFFER_FLAGS_KHR,
+ CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR, 0
+ };
+ command_buffer =
+ clCreateCommandBufferKHR(1, &queue, properties, &error);
+ }
+ else
+ {
+ command_buffer =
+ clCreateCommandBufferKHR(1, &queue, nullptr, &error);
+ }
+ test_error(error, "clCreateCommandBufferKHR failed");
+
+ return CL_SUCCESS;
+ }
+
+ // Test body returning an OpenCL error code
+ virtual cl_int Run() = 0;
+
+
+protected:
+ size_t data_size() const { return num_elements * sizeof(cl_int); }
+
+ cl_context context;
+ cl_command_queue queue;
+ clCommandBufferWrapper command_buffer;
+ clProgramWrapper program;
+ clKernelWrapper kernel;
+ clMemWrapper in_mem, out_mem;
+ size_t num_elements;
+
+ // Device support query results
+ bool simultaneous_use;
+ bool out_of_order_support;
+};
+
+// Test enqueuing a command-buffer containing a single NDRange command once
+struct BasicEnqueueTest : public BasicCommandBufferTest
+{
+ using BasicCommandBufferTest::BasicCommandBufferTest;
+
+ cl_int Run() override
+ {
+ cl_int error = clCommandNDRangeKernelKHR(
+ command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+ nullptr, 0, nullptr, nullptr, nullptr);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ const cl_int pattern = 42;
+ error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
+ data_size(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillBuffer failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_int> output_data(num_elements);
+ error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
+ output_data.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+ }
+
+ return CL_SUCCESS;
+ }
+};
+
+// Test enqueuing a command-buffer containing multiple command, including
+// operations other than NDRange kernel execution.
+struct MixedCommandsTest : public BasicCommandBufferTest
+{
+ using BasicCommandBufferTest::BasicCommandBufferTest;
+
+ cl_int Run() override
+ {
+ cl_int error;
+ const size_t iterations = 4;
+ clMemWrapper result_mem =
+ clCreateBuffer(context, CL_MEM_READ_WRITE,
+ sizeof(cl_int) * iterations, nullptr, &error);
+ test_error(error, "clCreateBuffer failed");
+
+ const cl_int pattern_base = 42;
+ for (size_t i = 0; i < iterations; i++)
+ {
+ const cl_int pattern = pattern_base + i;
+ cl_int error = clCommandFillBufferKHR(
+ command_buffer, nullptr, in_mem, &pattern, sizeof(cl_int), 0,
+ data_size(), 0, nullptr, nullptr, nullptr);
+ test_error(error, "clCommandFillBufferKHR failed");
+
+ error = clCommandNDRangeKernelKHR(
+ command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+ &num_elements, nullptr, 0, nullptr, nullptr, nullptr);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ const size_t result_offset = i * sizeof(cl_int);
+ error = clCommandCopyBufferKHR(
+ command_buffer, nullptr, out_mem, result_mem, 0, result_offset,
+ sizeof(cl_int), 0, nullptr, nullptr, nullptr);
+ test_error(error, "clCommandCopyBufferKHR failed");
+ }
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_int> result_data(num_elements);
+ error = clEnqueueReadBuffer(queue, result_mem, CL_TRUE, 0,
+ iterations * sizeof(cl_int),
+ result_data.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < iterations; i++)
+ {
+ const cl_int ref = pattern_base + i;
+ CHECK_VERIFICATION_ERROR(ref, result_data[i], i);
+ }
+
+ return CL_SUCCESS;
+ }
+};
+
+// Test enqueueing a command-buffer blocked on a user-event
+struct UserEventTest : public BasicCommandBufferTest
+{
+ using BasicCommandBufferTest::BasicCommandBufferTest;
+
+ cl_int Run() override
+ {
+ cl_int error = clCommandNDRangeKernelKHR(
+ command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+ nullptr, 0, nullptr, nullptr, nullptr);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ clEventWrapper user_event = clCreateUserEvent(context, &error);
+ test_error(error, "clCreateUserEvent failed");
+
+ const cl_int pattern = 42;
+ error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
+ data_size(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillBuffer failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
+ &user_event, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_int> output_data(num_elements);
+ error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+ output_data.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ error = clSetUserEventStatus(user_event, CL_COMPLETE);
+ test_error(error, "clSetUserEventStatus failed");
+
+ error = clFinish(queue);
+ test_error(error, "clFinish failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+ }
+
+ return CL_SUCCESS;
+ }
+};
+
+// Test flushing the command-queue between command-buffer enqueues
+struct ExplicitFlushTest : public BasicCommandBufferTest
+{
+ using BasicCommandBufferTest::BasicCommandBufferTest;
+
+ cl_int Run() override
+ {
+ cl_int error = clCommandNDRangeKernelKHR(
+ command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+ nullptr, 0, nullptr, nullptr, nullptr);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ const cl_int pattern_A = 42;
+ error = clEnqueueFillBuffer(queue, in_mem, &pattern_A, sizeof(cl_int),
+ 0, data_size(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillBuffer failed");
+
+ error = clFlush(queue);
+ test_error(error, "clFlush failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_int> output_data_A(num_elements);
+ error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+ output_data_A.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ const cl_int pattern_B = 0xA;
+ error = clEnqueueFillBuffer(queue, in_mem, &pattern_B, sizeof(cl_int),
+ 0, data_size(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillBuffer failed");
+
+ error = clFlush(queue);
+ test_error(error, "clFlush failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clFlush(queue);
+ test_error(error, "clFlush failed");
+
+ std::vector<cl_int> output_data_B(num_elements);
+ error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+ output_data_B.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ error = clFinish(queue);
+ test_error(error, "clFinish failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern_A, output_data_A[i], i);
+
+ CHECK_VERIFICATION_ERROR(pattern_B, output_data_B[i], i);
+ }
+ return CL_SUCCESS;
+ }
+
+ bool Skip() override
+ {
+ return !simultaneous_use || BasicCommandBufferTest::Skip();
+ }
+};
+
+// Test enqueueing a command-buffer twice separated by another enqueue operation
+struct InterleavedEnqueueTest : public BasicCommandBufferTest
+{
+ using BasicCommandBufferTest::BasicCommandBufferTest;
+
+ cl_int Run() override
+ {
+ cl_int error = clCommandNDRangeKernelKHR(
+ command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+ nullptr, 0, nullptr, nullptr, nullptr);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ cl_int pattern = 42;
+ error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
+ data_size(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillBuffer failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ pattern = 0xABCD;
+ error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
+ data_size(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillBuffer failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clEnqueueCopyBuffer(queue, in_mem, out_mem, 0, 0, data_size(),
+ 0, nullptr, nullptr);
+ test_error(error, "clEnqueueCopyBuffer failed");
+
+ std::vector<cl_int> output_data(num_elements);
+ error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
+ output_data.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+ }
+
+ return CL_SUCCESS;
+ }
+
+ bool Skip() override
+ {
+ return !simultaneous_use || BasicCommandBufferTest::Skip();
+ }
+};
+
+// Test sync-points with an out-of-order command-buffer
+struct OutOfOrderTest : public BasicCommandBufferTest
+{
+ using BasicCommandBufferTest::BasicCommandBufferTest;
+ OutOfOrderTest(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : BasicCommandBufferTest(device, context, queue),
+ out_of_order_command_buffer(this), out_of_order_queue(nullptr),
+ event(nullptr)
+ {}
+
+ cl_int Run() override
+ {
+ cl_sync_point_khr sync_points[2];
+
+ const cl_int pattern = 42;
+ cl_int error =
+ clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, in_mem,
+ &pattern, sizeof(cl_int), 0, data_size(), 0,
+ nullptr, &sync_points[0], nullptr);
+ test_error(error, "clCommandFillBufferKHR failed");
+
+ const cl_int overwritten_pattern = 0xACDC;
+ error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr,
+ out_mem, &overwritten_pattern,
+ sizeof(cl_int), 0, data_size(), 0,
+ nullptr, &sync_points[1], nullptr);
+ test_error(error, "clCommandFillBufferKHR failed");
+
+ error = clCommandNDRangeKernelKHR(
+ out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+ &num_elements, nullptr, 2, sync_points, nullptr, nullptr);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(out_of_order_command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ error = clEnqueueCommandBufferKHR(
+ 0, nullptr, out_of_order_command_buffer, 0, nullptr, &event);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_int> output_data(num_elements);
+ error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
+ data_size(), output_data.data(), 1, &event,
+ nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+ }
+
+ return CL_SUCCESS;
+ }
+
+ cl_int SetUp(int elements) override
+ {
+ cl_int error = BasicCommandBufferTest::SetUp(elements);
+ test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+ if (!out_of_order_support)
+ {
+ // Test will skip as device doesn't support out-of-order
+ // command-buffers
+ return CL_SUCCESS;
+ }
+
+ out_of_order_queue = clCreateCommandQueue(
+ context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
+ test_error(error, "Unable to create command queue to test with");
+
+ out_of_order_command_buffer =
+ clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
+ test_error(error, "clCreateCommandBufferKHR failed");
+
+ return CL_SUCCESS;
+ }
+
+ bool Skip() override
+ {
+ return !out_of_order_support || BasicCommandBufferTest::Skip();
+ }
+
+ clCommandQueueWrapper out_of_order_queue;
+ clCommandBufferWrapper out_of_order_command_buffer;
+ clEventWrapper event;
+};
+
+#undef CHECK_VERIFICATION_ERROR
+
+template <class T>
+int MakeAndRunTest(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ CHECK_COMMAND_BUFFER_EXTENSION_AVAILABLE(device);
+
+ auto test_fixture = T(device, context, queue);
+ cl_int error = test_fixture.SetUp(num_elements);
+ test_error_ret(error, "Error in test initialization", TEST_FAIL);
+
+ if (test_fixture.Skip())
+ {
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ error = test_fixture.Run();
+ test_error_ret(error, "Test Failed", TEST_FAIL);
+
+ return TEST_PASS;
+}
+} // anonymous namespace
+
+int test_single_ndrange(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return MakeAndRunTest<BasicEnqueueTest>(device, context, queue,
+ num_elements);
+}
+
+int test_interleaved_enqueue(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return MakeAndRunTest<InterleavedEnqueueTest>(device, context, queue,
+ num_elements);
+}
+
+int test_mixed_commands(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return MakeAndRunTest<MixedCommandsTest>(device, context, queue,
+ num_elements);
+}
+
+int test_explicit_flush(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return MakeAndRunTest<ExplicitFlushTest>(device, context, queue,
+ num_elements);
+}
+
+int test_user_events(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return MakeAndRunTest<UserEventTest>(device, context, queue, num_elements);
+}
+
+int test_out_of_order(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return MakeAndRunTest<OutOfOrderTest>(device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h
new file mode 100644
index 00000000..0fd2e4ec
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h
@@ -0,0 +1,177 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef _CL_KHR_COMMAND_BUFFER_TEST_BASE_H
+#define _CL_KHR_COMMAND_BUFFER_TEST_BASE_H
+
+#include <CL/cl_ext.h>
+#include "harness/deviceInfo.h"
+#include "harness/testHarness.h"
+
+
+// Base class for setting function pointers to new extension entry points
+struct CommandBufferTestBase
+{
+ CommandBufferTestBase(cl_device_id device): device(device) {}
+
+ cl_int init_extension_functions()
+ {
+ cl_platform_id platform;
+ cl_int error =
+ clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id),
+ &platform, nullptr);
+ test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
+
+ // If it is supported get the addresses of all the APIs here.
+#define GET_EXTENSION_ADDRESS(FUNC) \
+ FUNC = reinterpret_cast<FUNC##_fn>( \
+ clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \
+ if (FUNC == nullptr) \
+ { \
+ log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \
+ " with " #FUNC "\n"); \
+ return TEST_FAIL; \
+ }
+
+ GET_EXTENSION_ADDRESS(clCreateCommandBufferKHR);
+ GET_EXTENSION_ADDRESS(clReleaseCommandBufferKHR);
+ GET_EXTENSION_ADDRESS(clRetainCommandBufferKHR);
+ GET_EXTENSION_ADDRESS(clFinalizeCommandBufferKHR);
+ GET_EXTENSION_ADDRESS(clEnqueueCommandBufferKHR);
+ GET_EXTENSION_ADDRESS(clCommandBarrierWithWaitListKHR);
+ GET_EXTENSION_ADDRESS(clCommandCopyBufferKHR);
+ GET_EXTENSION_ADDRESS(clCommandCopyBufferRectKHR);
+ GET_EXTENSION_ADDRESS(clCommandCopyBufferToImageKHR);
+ GET_EXTENSION_ADDRESS(clCommandCopyImageKHR);
+ GET_EXTENSION_ADDRESS(clCommandCopyImageToBufferKHR);
+ GET_EXTENSION_ADDRESS(clCommandFillBufferKHR);
+ GET_EXTENSION_ADDRESS(clCommandFillImageKHR);
+ GET_EXTENSION_ADDRESS(clCommandNDRangeKernelKHR);
+ GET_EXTENSION_ADDRESS(clGetCommandBufferInfoKHR);
+#undef GET_EXTENSION_ADDRESS
+ return CL_SUCCESS;
+ }
+
+ clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr;
+ clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr;
+ clRetainCommandBufferKHR_fn clRetainCommandBufferKHR = nullptr;
+ clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr;
+ clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr;
+ clCommandBarrierWithWaitListKHR_fn clCommandBarrierWithWaitListKHR =
+ nullptr;
+ clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr;
+ clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr;
+ clCommandCopyBufferToImageKHR_fn clCommandCopyBufferToImageKHR = nullptr;
+ clCommandCopyImageKHR_fn clCommandCopyImageKHR = nullptr;
+ clCommandCopyImageToBufferKHR_fn clCommandCopyImageToBufferKHR = nullptr;
+ clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr;
+ clCommandFillImageKHR_fn clCommandFillImageKHR = nullptr;
+ clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr;
+ clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR = nullptr;
+
+ cl_device_id device = nullptr;
+};
+
+// Wrapper class based off generic typeWrappers.h wrappers. However, because
+// the release/retain functions are queried at runtime from the platform,
+// rather than known at compile time we cannot link the instantiated template.
+// Instead, pass an instance of `CommandBufferTestBase` on wrapper construction
+// to access the release/retain functions.
+class clCommandBufferWrapper {
+ cl_command_buffer_khr object = nullptr;
+
+ void retain()
+ {
+ if (!object) return;
+
+ auto err = base->clRetainCommandBufferKHR(object);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "clRetainCommandBufferKHR() failed");
+ std::abort();
+ }
+ }
+
+ void release()
+ {
+ if (!object) return;
+
+ auto err = base->clReleaseCommandBufferKHR(object);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "clReleaseCommandBufferKHR() failed");
+ std::abort();
+ }
+ }
+
+ // Used to access release/retain functions
+ CommandBufferTestBase *base;
+
+public:
+ // We always want to have base available to dereference
+ clCommandBufferWrapper() = delete;
+
+ clCommandBufferWrapper(CommandBufferTestBase *base): base(base) {}
+
+ // On assignment, assume the object has a refcount of one.
+ clCommandBufferWrapper &operator=(cl_command_buffer_khr rhs)
+ {
+ reset(rhs);
+ return *this;
+ }
+
+ // Copy semantics, increase retain count.
+ clCommandBufferWrapper(clCommandBufferWrapper const &w) { *this = w; }
+ clCommandBufferWrapper &operator=(clCommandBufferWrapper const &w)
+ {
+ reset(w.object);
+ retain();
+ return *this;
+ }
+
+ // Move semantics, directly take ownership.
+ clCommandBufferWrapper(clCommandBufferWrapper &&w) { *this = std::move(w); }
+ clCommandBufferWrapper &operator=(clCommandBufferWrapper &&w)
+ {
+ reset(w.object);
+ w.object = nullptr;
+ return *this;
+ }
+
+ ~clCommandBufferWrapper() { reset(); }
+
+ // Release the existing object, if any, and own the new one, if any.
+ void reset(cl_command_buffer_khr new_object = nullptr)
+ {
+ release();
+ object = new_object;
+ }
+
+ operator cl_command_buffer_khr() const { return object; }
+};
+
+#define CHECK_COMMAND_BUFFER_EXTENSION_AVAILABLE(device) \
+ { \
+ if (!is_extension_available(device, "cl_khr_command_buffer")) \
+ { \
+ log_info( \
+ "Device does not support 'cl_khr_command_buffer'. Skipping " \
+ "the test.\n"); \
+ return TEST_SKIPPED_ITSELF; \
+ } \
+ }
+
+
+#endif // _CL_KHR_COMMAND_BUFFER_TEST_BASE_H
diff --git a/test_conformance/extensions/cl_khr_command_buffer/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/main.cpp
new file mode 100644
index 00000000..4dece455
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/main.cpp
@@ -0,0 +1,35 @@
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "harness/testHarness.h"
+
+test_definition test_list[] = {
+ ADD_TEST(single_ndrange), ADD_TEST(interleaved_enqueue),
+ ADD_TEST(mixed_commands), ADD_TEST(explicit_flush),
+ ADD_TEST(user_events), ADD_TEST(out_of_order)
+};
+
+
+int main(int argc, const char *argv[])
+{
+ // A device may report the required properties of a queue that
+ // is compatible with command-buffers via the query
+ // CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR. We account
+ // for this in the tests themselves, rather than here, where we have a
+ // device to query.
+ const cl_command_queue_properties queue_properties = 0;
+ return runTestHarnessWithCheck(argc, argv, ARRAY_SIZE(test_list), test_list,
+ false, queue_properties, nullptr);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h
new file mode 100644
index 00000000..58fd228f
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h
@@ -0,0 +1,35 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _CL_KHR_COMMAND_BUFFER_PROCS_H
+#define _CL_KHR_COMMAND_BUFFER_PROCS_H
+
+#include <CL/cl.h>
+
+// Basic command-buffer tests
+extern int test_single_ndrange(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_interleaved_enqueue(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_mixed_commands(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_explicit_flush(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_user_events(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_out_of_order(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements);
+
+#endif /*_CL_KHR_COMMAND_BUFFER_PROCS_H*/
diff --git a/test_conformance/gl/common.h b/test_conformance/gl/common.h
index 36221da1..d8587cf0 100644
--- a/test_conformance/gl/common.h
+++ b/test_conformance/gl/common.h
@@ -32,8 +32,8 @@ struct format {
};
// These are the typically tested formats.
-
-static struct format common_formats[] = {
+// clang-format off
+static const format common_formats[] = {
#ifdef __APPLE__
{ GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, kUChar },
{ GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
@@ -53,25 +53,30 @@ static struct format common_formats[] = {
};
#ifdef GL_VERSION_3_2
-static struct format depth_formats[] = {
+static const format depth_formats[] = {
{ GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, kUShort },
{ GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, kFloat },
{ GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, kUInt },
{ GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, kFloat },
};
#endif
+// clang-format on
int test_images_write_common(cl_device_id device, cl_context context,
- cl_command_queue queue, struct format* formats, size_t nformats,
- GLenum *targets, size_t ntargets, sizevec_t* sizes, size_t nsizes );
+ cl_command_queue queue, const format *formats,
+ size_t nformats, GLenum *targets, size_t ntargets,
+ sizevec_t *sizes, size_t nsizes);
-int test_images_read_common( cl_device_id device, cl_context context,
- cl_command_queue queue, struct format* formats, size_t nformats,
- GLenum *targets, size_t ntargets, sizevec_t *sizes, size_t nsizes );
+int test_images_read_common(cl_device_id device, cl_context context,
+ cl_command_queue queue, const format *formats,
+ size_t nformats, GLenum *targets, size_t ntargets,
+ sizevec_t *sizes, size_t nsizes);
-int test_images_get_info_common( cl_device_id device, cl_context context,
- cl_command_queue queue, struct format* formats, size_t nformats,
- GLenum *targets, size_t ntargets, sizevec_t *sizes, size_t nsizes );
+int test_images_get_info_common(cl_device_id device, cl_context context,
+ cl_command_queue queue, const format *formats,
+ size_t nformats, GLenum *targets,
+ size_t ntargets, sizevec_t *sizes,
+ size_t nsizes);
int is_rgb_101010_supported( cl_context context, GLenum gl_target );
diff --git a/test_conformance/gl/test_buffers.cpp b/test_conformance/gl/test_buffers.cpp
index 35f01ee6..c61610d0 100644
--- a/test_conformance/gl/test_buffers.cpp
+++ b/test_conformance/gl/test_buffers.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -17,126 +17,126 @@
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
-#if !defined (__APPLE__)
- #include <CL/cl_gl.h>
+#if !defined(__APPLE__)
+#include <CL/cl_gl.h>
#endif
static const char *bufferKernelPattern =
-"__kernel void sample_test( __global %s%s *source, __global %s%s *clDest, __global %s%s *glDest )\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-" clDest[ tid ] = source[ tid ] + (%s%s)(1);\n"
-" glDest[ tid ] = source[ tid ] + (%s%s)(2);\n"
-"}\n";
-
-#define TYPE_CASE( enum, type, range, offset ) \
- case enum: \
- { \
- cl_##type *ptr = (cl_##type *)outData; \
- for( i = 0; i < count; i++ ) \
- ptr[ i ] = (cl_##type)( ( genrand_int32(d) & range ) - offset ); \
- break; \
+ "__kernel void sample_test( __global %s%s *source, __global %s%s *clDest, "
+ "__global %s%s *glDest )\n"
+ "{\n"
+ " int tid = get_global_id(0);\n"
+ " clDest[ tid ] = source[ tid ] + (%s%s)(1);\n"
+ " glDest[ tid ] = source[ tid ] + (%s%s)(2);\n"
+ "}\n";
+
+#define TYPE_CASE(enum, type, range, offset) \
+ case enum: { \
+ cl_##type *ptr = (cl_##type *)outData; \
+ for (i = 0; i < count; i++) \
+ ptr[i] = (cl_##type)((genrand_int32(d) & range) - offset); \
+ break; \
}
-void gen_input_data( ExplicitType type, size_t count, MTdata d, void *outData )
+void gen_input_data(ExplicitType type, size_t count, MTdata d, void *outData)
{
size_t i;
- switch( type )
+ switch (type)
{
- case kBool:
- {
+ case kBool: {
bool *boolPtr = (bool *)outData;
- for( i = 0; i < count; i++ )
+ for (i = 0; i < count; i++)
{
- boolPtr[i] = ( genrand_int32(d) & 1 ) ? true : false;
+ boolPtr[i] = (genrand_int32(d) & 1) ? true : false;
}
break;
}
- TYPE_CASE( kChar, char, 250, 127 )
- TYPE_CASE( kUChar, uchar, 250, 0 )
- TYPE_CASE( kShort, short, 65530, 32767 )
- TYPE_CASE( kUShort, ushort, 65530, 0 )
- TYPE_CASE( kInt, int, 0x0fffffff, 0x70000000 )
- TYPE_CASE( kUInt, uint, 0x0fffffff, 0 )
+ TYPE_CASE(kChar, char, 250, 127)
+ TYPE_CASE(kUChar, uchar, 250, 0)
+ TYPE_CASE(kShort, short, 65530, 32767)
+ TYPE_CASE(kUShort, ushort, 65530, 0)
+ TYPE_CASE(kInt, int, 0x0fffffff, 0x70000000)
+ TYPE_CASE(kUInt, uint, 0x0fffffff, 0)
- case kLong:
- {
+ case kLong: {
cl_long *longPtr = (cl_long *)outData;
- for( i = 0; i < count; i++ )
+ for (i = 0; i < count; i++)
{
- longPtr[i] = (cl_long)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 );
+ longPtr[i] = (cl_long)genrand_int32(d)
+ | ((cl_ulong)genrand_int32(d) << 32);
}
break;
}
- case kULong:
- {
+ case kULong: {
cl_ulong *ulongPtr = (cl_ulong *)outData;
- for( i = 0; i < count; i++ )
+ for (i = 0; i < count; i++)
{
- ulongPtr[i] = (cl_ulong)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 );
+ ulongPtr[i] = (cl_ulong)genrand_int32(d)
+ | ((cl_ulong)genrand_int32(d) << 32);
}
break;
}
- case kFloat:
- {
+ case kFloat: {
cl_float *floatPtr = (float *)outData;
- for( i = 0; i < count; i++ )
- floatPtr[i] = get_random_float( -100000.f, 100000.f, d );
+ for (i = 0; i < count; i++)
+ floatPtr[i] = get_random_float(-100000.f, 100000.f, d);
break;
}
default:
- log_error( "ERROR: Invalid type passed in to generate_random_data!\n" );
+ log_error(
+ "ERROR: Invalid type passed in to generate_random_data!\n");
break;
}
}
-#define INC_CASE( enum, type ) \
- case enum: \
- { \
- cl_##type *src = (cl_##type *)inData; \
- cl_##type *dst = (cl_##type *)outData; \
- *dst = *src + 1; \
- break; \
+#define INC_CASE(enum, type) \
+ case enum: { \
+ cl_##type *src = (cl_##type *)inData; \
+ cl_##type *dst = (cl_##type *)outData; \
+ *dst = *src + 1; \
+ break; \
}
-void get_incremented_value( void *inData, void *outData, ExplicitType type )
+void get_incremented_value(void *inData, void *outData, ExplicitType type)
{
- switch( type )
+ switch (type)
{
- INC_CASE( kChar, char )
- INC_CASE( kUChar, uchar )
- INC_CASE( kShort, short )
- INC_CASE( kUShort, ushort )
- INC_CASE( kInt, int )
- INC_CASE( kUInt, uint )
- INC_CASE( kLong, long )
- INC_CASE( kULong, ulong )
- INC_CASE( kFloat, float )
- default:
- break;
+ INC_CASE(kChar, char)
+ INC_CASE(kUChar, uchar)
+ INC_CASE(kShort, short)
+ INC_CASE(kUShort, ushort)
+ INC_CASE(kInt, int)
+ INC_CASE(kUInt, uint)
+ INC_CASE(kLong, long)
+ INC_CASE(kULong, ulong)
+ INC_CASE(kFloat, float)
+ default: break;
}
}
-int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType vecType, size_t vecSize, int numElements, int validate_only, MTdata d)
+int test_buffer_kernel(cl_context context, cl_command_queue queue,
+ ExplicitType vecType, size_t vecSize, int numElements,
+ int validate_only, MTdata d)
{
clProgramWrapper program;
clKernelWrapper kernel;
- clMemWrapper streams[ 3 ];
+ clMemWrapper streams[3];
size_t dataSize = numElements * 16 * sizeof(cl_long);
#if !(defined(_WIN32) && defined(_MSC_VER))
- cl_long inData[numElements * 16], outDataCL[numElements * 16], outDataGL[ numElements * 16 ];
+ cl_long inData[numElements * 16], outDataCL[numElements * 16],
+ outDataGL[numElements * 16];
#else
- cl_long* inData = (cl_long*)_malloca(dataSize);
- cl_long* outDataCL = (cl_long*)_malloca(dataSize);
- cl_long* outDataGL = (cl_long*)_malloca(dataSize);
+ cl_long *inData = (cl_long *)_malloca(dataSize);
+ cl_long *outDataCL = (cl_long *)_malloca(dataSize);
+ cl_long *outDataGL = (cl_long *)_malloca(dataSize);
#endif
glBufferWrapper inGLBuffer, outGLBuffer;
- int i;
+ int i;
size_t bufferSize;
int error;
@@ -146,210 +146,259 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType
char sizeName[4];
/* Create the source */
- if( vecSize == 1 )
- sizeName[ 0 ] = 0;
+ if (vecSize == 1)
+ sizeName[0] = 0;
else
- sprintf( sizeName, "%d", (int)vecSize );
+ sprintf(sizeName, "%d", (int)vecSize);
- sprintf( kernelSource, bufferKernelPattern, get_explicit_type_name( vecType ), sizeName,
- get_explicit_type_name( vecType ), sizeName,
- get_explicit_type_name( vecType ), sizeName,
- get_explicit_type_name( vecType ), sizeName,
- get_explicit_type_name( vecType ), sizeName );
+ sprintf(kernelSource, bufferKernelPattern, get_explicit_type_name(vecType),
+ sizeName, get_explicit_type_name(vecType), sizeName,
+ get_explicit_type_name(vecType), sizeName,
+ get_explicit_type_name(vecType), sizeName,
+ get_explicit_type_name(vecType), sizeName);
/* Create kernels */
programPtr = kernelSource;
- if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+ if (create_single_kernel_helper(context, &program, &kernel, 1,
+ (const char **)&programPtr, "sample_test"))
{
return -1;
}
- bufferSize = numElements * vecSize * get_explicit_type_size( vecType );
+ bufferSize = numElements * vecSize * get_explicit_type_size(vecType);
/* Generate some almost-random input data */
- gen_input_data( vecType, vecSize * numElements, d, inData );
- memset( outDataCL, 0, dataSize );
- memset( outDataGL, 0, dataSize );
+ gen_input_data(vecType, vecSize * numElements, d, inData);
+ memset(outDataCL, 0, dataSize);
+ memset(outDataGL, 0, dataSize);
/* Generate some GL buffers to go against */
- glGenBuffers( 1, &inGLBuffer );
- glGenBuffers( 1, &outGLBuffer );
+ glGenBuffers(1, &inGLBuffer);
+ glGenBuffers(1, &outGLBuffer);
- glBindBuffer( GL_ARRAY_BUFFER, inGLBuffer );
- glBufferData( GL_ARRAY_BUFFER, bufferSize, inData, GL_STATIC_DRAW );
+ glBindBuffer(GL_ARRAY_BUFFER, inGLBuffer);
+ glBufferData(GL_ARRAY_BUFFER, bufferSize, inData, GL_STATIC_DRAW);
- // Note: we need to bind the output buffer, even though we don't care about its values yet,
- // because CL needs it to get the buffer size
- glBindBuffer( GL_ARRAY_BUFFER, outGLBuffer );
- glBufferData( GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW );
+ // Note: we need to bind the output buffer, even though we don't care about
+ // its values yet, because CL needs it to get the buffer size
+ glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer);
+ glBufferData(GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW);
- glBindBuffer( GL_ARRAY_BUFFER, 0 );
+ glBindBuffer(GL_ARRAY_BUFFER, 0);
glFinish();
- /* Generate some streams. The first and last ones are GL, middle one just vanilla CL */
- streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_ONLY, inGLBuffer, &error );
- test_error( error, "Unable to create input GL buffer" );
+ /* Generate some streams. The first and last ones are GL, middle one just
+ * vanilla CL */
+ streams[0] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_ONLY,
+ inGLBuffer, &error);
+ test_error(error, "Unable to create input GL buffer");
- streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, bufferSize, NULL, &error );
- test_error( error, "Unable to create output CL buffer" );
+ streams[1] =
+ clCreateBuffer(context, CL_MEM_READ_WRITE, bufferSize, NULL, &error);
+ test_error(error, "Unable to create output CL buffer");
- streams[ 2 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_WRITE_ONLY, outGLBuffer, &error );
- test_error( error, "Unable to create output GL buffer" );
+ streams[2] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_WRITE_ONLY,
+ outGLBuffer, &error);
+ test_error(error, "Unable to create output GL buffer");
- /* Validate the info */
- if (validate_only) {
- int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER, (GLuint)inGLBuffer, (GLenum)0, 0) |
- CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER, (GLuint)outGLBuffer, (GLenum)0, 0) );
- for(i=0;i<3;i++)
+ /* Validate the info */
+ if (validate_only)
{
- clReleaseMemObject(streams[i]);
- streams[i] = NULL;
- }
+ int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER,
+ (GLuint)inGLBuffer, (GLenum)0, 0)
+ | CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER,
+ (GLuint)outGLBuffer, (GLenum)0, 0));
+ for (i = 0; i < 3; i++)
+ {
+ streams[i].reset();
+ }
- glDeleteBuffers(1, &inGLBuffer); inGLBuffer = 0;
- glDeleteBuffers(1, &outGLBuffer); outGLBuffer = 0;
+ glDeleteBuffers(1, &inGLBuffer);
+ inGLBuffer = 0;
+ glDeleteBuffers(1, &outGLBuffer);
+ outGLBuffer = 0;
- return result;
- }
+ return result;
+ }
/* Assign streams and execute */
- for( int i = 0; i < 3; i++ )
+ for (int i = 0; i < 3; i++)
{
- error = clSetKernelArg( kernel, i, sizeof( streams[ i ] ), &streams[ i ] );
- test_error( error, "Unable to set kernel arguments" );
+ error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]);
+ test_error(error, "Unable to set kernel arguments");
}
- error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL);
- test_error( error, "Unable to acquire GL obejcts");
- error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 2 ], 0, NULL, NULL);
- test_error( error, "Unable to acquire GL obejcts");
+ error =
+ (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &streams[0], 0, NULL, NULL);
+ test_error(error, "Unable to acquire GL obejcts");
+ error =
+ (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &streams[2], 0, NULL, NULL);
+ test_error(error, "Unable to acquire GL obejcts");
/* Run the kernel */
threads[0] = numElements;
- error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
- test_error( error, "Unable to get work group size to use" );
-
- error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
- test_error( error, "Unable to execute test kernel" );
-
- error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL );
- test_error(error, "clEnqueueReleaseGLObjects failed");
- error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 2 ], 0, NULL, NULL );
- test_error(error, "clEnqueueReleaseGLObjects failed");
-
- // Get the results from both CL and GL and make sure everything looks correct
- error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, bufferSize, outDataCL, 0, NULL, NULL );
- test_error( error, "Unable to read output CL array!" );
-
- glBindBuffer( GL_ARRAY_BUFFER, outGLBuffer );
- void *glMem = glMapBuffer( GL_ARRAY_BUFFER, GL_READ_ONLY );
- memcpy( outDataGL, glMem, bufferSize );
- glUnmapBuffer( GL_ARRAY_BUFFER );
-
- char *inP = (char *)inData, *glP = (char *)outDataGL, *clP = (char *)outDataCL;
+ error = get_max_common_work_group_size(context, kernel, threads[0],
+ &localThreads[0]);
+ test_error(error, "Unable to get work group size to use");
+
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+ localThreads, 0, NULL, NULL);
+ test_error(error, "Unable to execute test kernel");
+
+ error =
+ (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &streams[0], 0, NULL, NULL);
+ test_error(error, "clEnqueueReleaseGLObjects failed");
+ error =
+ (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &streams[2], 0, NULL, NULL);
+ test_error(error, "clEnqueueReleaseGLObjects failed");
+
+ // Get the results from both CL and GL and make sure everything looks
+ // correct
+ error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, bufferSize,
+ outDataCL, 0, NULL, NULL);
+ test_error(error, "Unable to read output CL array!");
+
+ glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer);
+ void *glMem = glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY);
+ memcpy(outDataGL, glMem, bufferSize);
+ glUnmapBuffer(GL_ARRAY_BUFFER);
+
+ char *inP = (char *)inData, *glP = (char *)outDataGL,
+ *clP = (char *)outDataCL;
error = 0;
- for( size_t i = 0; i < numElements * vecSize; i++ )
+ for (size_t i = 0; i < numElements * vecSize; i++)
{
cl_long expectedCLValue, expectedGLValue;
- get_incremented_value( inP, &expectedCLValue, vecType );
- get_incremented_value( &expectedCLValue, &expectedGLValue, vecType );
+ get_incremented_value(inP, &expectedCLValue, vecType);
+ get_incremented_value(&expectedCLValue, &expectedGLValue, vecType);
- if( memcmp( clP, &expectedCLValue, get_explicit_type_size( vecType ) ) != 0 )
+ if (memcmp(clP, &expectedCLValue, get_explicit_type_size(vecType)) != 0)
{
- char scratch[ 64 ];
- log_error( "ERROR: Data sample %d from the CL output did not validate!\n", (int)i );
- log_error( "\t Input: %s\n", GetDataVectorString( inP, get_explicit_type_size( vecType ), 1, scratch ) );
- log_error( "\tExpected: %s\n", GetDataVectorString( &expectedCLValue, get_explicit_type_size( vecType ), 1, scratch ) );
- log_error( "\t Actual: %s\n", GetDataVectorString( clP, get_explicit_type_size( vecType ), 1, scratch ) );
+ char scratch[64];
+ log_error(
+ "ERROR: Data sample %d from the CL output did not validate!\n",
+ (int)i);
+ log_error("\t Input: %s\n",
+ GetDataVectorString(inP, get_explicit_type_size(vecType),
+ 1, scratch));
+ log_error("\tExpected: %s\n",
+ GetDataVectorString(&expectedCLValue,
+ get_explicit_type_size(vecType), 1,
+ scratch));
+ log_error("\t Actual: %s\n",
+ GetDataVectorString(clP, get_explicit_type_size(vecType),
+ 1, scratch));
error = -1;
}
- if( memcmp( glP, &expectedGLValue, get_explicit_type_size( vecType ) ) != 0 )
+ if (memcmp(glP, &expectedGLValue, get_explicit_type_size(vecType)) != 0)
{
- char scratch[ 64 ];
- log_error( "ERROR: Data sample %d from the GL output did not validate!\n", (int)i );
- log_error( "\t Input: %s\n", GetDataVectorString( inP, get_explicit_type_size( vecType ), 1, scratch ) );
- log_error( "\tExpected: %s\n", GetDataVectorString( &expectedGLValue, get_explicit_type_size( vecType ), 1, scratch ) );
- log_error( "\t Actual: %s\n", GetDataVectorString( glP, get_explicit_type_size( vecType ), 1, scratch ) );
+ char scratch[64];
+ log_error(
+ "ERROR: Data sample %d from the GL output did not validate!\n",
+ (int)i);
+ log_error("\t Input: %s\n",
+ GetDataVectorString(inP, get_explicit_type_size(vecType),
+ 1, scratch));
+ log_error("\tExpected: %s\n",
+ GetDataVectorString(&expectedGLValue,
+ get_explicit_type_size(vecType), 1,
+ scratch));
+ log_error("\t Actual: %s\n",
+ GetDataVectorString(glP, get_explicit_type_size(vecType),
+ 1, scratch));
error = -1;
}
- if( error )
- return error;
+ if (error) return error;
- inP += get_explicit_type_size( vecType );
- glP += get_explicit_type_size( vecType );
- clP += get_explicit_type_size( vecType );
+ inP += get_explicit_type_size(vecType);
+ glP += get_explicit_type_size(vecType);
+ clP += get_explicit_type_size(vecType);
}
- for(i=0;i<3;i++)
+ for (i = 0; i < 3; i++)
{
- clReleaseMemObject(streams[i]);
- streams[i] = NULL;
+ streams[i].reset();
}
- glDeleteBuffers(1, &inGLBuffer); inGLBuffer = 0;
- glDeleteBuffers(1, &outGLBuffer); outGLBuffer = 0;
+ glDeleteBuffers(1, &inGLBuffer);
+ inGLBuffer = 0;
+ glDeleteBuffers(1, &outGLBuffer);
+ outGLBuffer = 0;
return 0;
}
-int test_buffers( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+int test_buffers(cl_device_id device, cl_context context,
+ cl_command_queue queue, int numElements)
{
- ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kNumExplicitTypes };
+ ExplicitType vecType[] = {
+ kChar, kUChar, kShort, kUShort, kInt,
+ kUInt, kLong, kULong, kFloat, kNumExplicitTypes
+ };
unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
unsigned int index, typeIndex;
int retVal = 0;
RandomSeed seed(gRandomSeed);
- for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+ for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
{
- for( index = 0; vecSizes[ index ] != 0; index++ )
+ for (index = 0; vecSizes[index] != 0; index++)
{
// Test!
- if( test_buffer_kernel( context, queue, vecType[ typeIndex ], vecSizes[ index ], numElements, 0, seed) != 0 )
+ if (test_buffer_kernel(context, queue, vecType[typeIndex],
+ vecSizes[index], numElements, 0, seed)
+ != 0)
{
- char sizeNames[][ 4 ] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
- log_error( " Buffer test %s%s FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), sizeNames[ vecSizes[ index ] ] );
+ char sizeNames[][4] = { "", "", "2", "", "4", "", "", "", "8",
+ "", "", "", "", "", "", "", "16" };
+ log_error(" Buffer test %s%s FAILED\n",
+ get_explicit_type_name(vecType[typeIndex]),
+ sizeNames[vecSizes[index]]);
retVal++;
}
}
}
return retVal;
-
}
-int test_buffers_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+int test_buffers_getinfo(cl_device_id device, cl_context context,
+ cl_command_queue queue, int numElements)
{
- ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kNumExplicitTypes };
+ ExplicitType vecType[] = {
+ kChar, kUChar, kShort, kUShort, kInt,
+ kUInt, kLong, kULong, kFloat, kNumExplicitTypes
+ };
unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
unsigned int index, typeIndex;
int retVal = 0;
- RandomSeed seed( gRandomSeed );
+ RandomSeed seed(gRandomSeed);
- for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+ for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
{
- for( index = 0; vecSizes[ index ] != 0; index++ )
+ for (index = 0; vecSizes[index] != 0; index++)
{
// Test!
- if( test_buffer_kernel( context, queue, vecType[ typeIndex ], vecSizes[ index ], numElements, 1, seed ) != 0 )
+ if (test_buffer_kernel(context, queue, vecType[typeIndex],
+ vecSizes[index], numElements, 1, seed)
+ != 0)
{
- char sizeNames[][ 4 ] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
- log_error( " Buffer test %s%s FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), sizeNames[ vecSizes[ index ] ] );
+ char sizeNames[][4] = { "", "", "2", "", "4", "", "", "", "8",
+ "", "", "", "", "", "", "", "16" };
+ log_error(" Buffer test %s%s FAILED\n",
+ get_explicit_type_name(vecType[typeIndex]),
+ sizeNames[vecSizes[index]]);
retVal++;
}
}
}
return retVal;
-
}
-
-
-
diff --git a/test_conformance/gl/test_fence_sync.cpp b/test_conformance/gl/test_fence_sync.cpp
index 00bf2cc9..35cc62de 100644
--- a/test_conformance/gl/test_fence_sync.cpp
+++ b/test_conformance/gl/test_fence_sync.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -17,7 +17,7 @@
#include "gl/setup.h"
#include "harness/genericThread.h"
-#if defined( __APPLE__ )
+#if defined(__APPLE__)
#include <OpenGL/glu.h>
#else
#include <GL/glu.h>
@@ -40,112 +40,121 @@ typedef struct __GLsync *GLsync;
#define APIENTRY
#endif
-typedef GLsync (APIENTRY *glFenceSyncPtr)(GLenum condition,GLbitfield flags);
+typedef GLsync(APIENTRY *glFenceSyncPtr)(GLenum condition, GLbitfield flags);
glFenceSyncPtr glFenceSyncFunc;
-typedef bool (APIENTRY *glIsSyncPtr)(GLsync sync);
+typedef bool(APIENTRY *glIsSyncPtr)(GLsync sync);
glIsSyncPtr glIsSyncFunc;
-typedef void (APIENTRY *glDeleteSyncPtr)(GLsync sync);
+typedef void(APIENTRY *glDeleteSyncPtr)(GLsync sync);
glDeleteSyncPtr glDeleteSyncFunc;
-typedef GLenum (APIENTRY *glClientWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout);
+typedef GLenum(APIENTRY *glClientWaitSyncPtr)(GLsync sync, GLbitfield flags,
+ GLuint64 timeout);
glClientWaitSyncPtr glClientWaitSyncFunc;
-typedef void (APIENTRY *glWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout);
+typedef void(APIENTRY *glWaitSyncPtr)(GLsync sync, GLbitfield flags,
+ GLuint64 timeout);
glWaitSyncPtr glWaitSyncFunc;
-typedef void (APIENTRY *glGetInteger64vPtr)(GLenum pname, GLint64 *params);
+typedef void(APIENTRY *glGetInteger64vPtr)(GLenum pname, GLint64 *params);
glGetInteger64vPtr glGetInteger64vFunc;
-typedef void (APIENTRY *glGetSyncivPtr)(GLsync sync,GLenum pname,GLsizei bufSize,GLsizei *length,
- GLint *values);
+typedef void(APIENTRY *glGetSyncivPtr)(GLsync sync, GLenum pname,
+ GLsizei bufSize, GLsizei *length,
+ GLint *values);
glGetSyncivPtr glGetSyncivFunc;
#define CHK_GL_ERR() printf("%s\n", gluErrorString(glGetError()))
-static void InitSyncFns( void )
+static void InitSyncFns(void)
{
- glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress( "glFenceSync" );
- glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress( "glIsSync" );
- glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress( "glDeleteSync" );
- glClientWaitSyncFunc = (glClientWaitSyncPtr)glutGetProcAddress( "glClientWaitSync" );
- glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress( "glWaitSync" );
- glGetInteger64vFunc = (glGetInteger64vPtr)glutGetProcAddress( "glGetInteger64v" );
- glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress( "glGetSynciv" );
+ glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress("glFenceSync");
+ glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress("glIsSync");
+ glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress("glDeleteSync");
+ glClientWaitSyncFunc =
+ (glClientWaitSyncPtr)glutGetProcAddress("glClientWaitSync");
+ glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress("glWaitSync");
+ glGetInteger64vFunc =
+ (glGetInteger64vPtr)glutGetProcAddress("glGetInteger64v");
+ glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress("glGetSynciv");
}
#ifndef GL_ARB_sync
-#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111
+#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111
-#define GL_OBJECT_TYPE 0x9112
-#define GL_SYNC_CONDITION 0x9113
-#define GL_SYNC_STATUS 0x9114
-#define GL_SYNC_FLAGS 0x9115
+#define GL_OBJECT_TYPE 0x9112
+#define GL_SYNC_CONDITION 0x9113
+#define GL_SYNC_STATUS 0x9114
+#define GL_SYNC_FLAGS 0x9115
-#define GL_SYNC_FENCE 0x9116
+#define GL_SYNC_FENCE 0x9116
-#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117
+#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117
-#define GL_UNSIGNALED 0x9118
-#define GL_SIGNALED 0x9119
+#define GL_UNSIGNALED 0x9118
+#define GL_SIGNALED 0x9119
-#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001
+#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001
-#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull
+#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull
-#define GL_ALREADY_SIGNALED 0x911A
-#define GL_TIMEOUT_EXPIRED 0x911B
-#define GL_CONDITION_SATISFIED 0x911C
-#define GL_WAIT_FAILED 0x911D
+#define GL_ALREADY_SIGNALED 0x911A
+#define GL_TIMEOUT_EXPIRED 0x911B
+#define GL_CONDITION_SATISFIED 0x911C
+#define GL_WAIT_FAILED 0x911D
#endif
#define USING_ARB_sync 1
#endif
-typedef cl_event (CL_API_CALL *clCreateEventFromGLsyncKHR_fn)( cl_context context, GLsync sync, cl_int *errCode_ret) ;
+typedef cl_event(CL_API_CALL *clCreateEventFromGLsyncKHR_fn)(
+ cl_context context, GLsync sync, cl_int *errCode_ret);
clCreateEventFromGLsyncKHR_fn clCreateEventFromGLsyncKHR_ptr;
static const char *updateBuffersKernel[] = {
- "__kernel void update( __global float4 * vertices, __global float4 *colors, int horizWrap, int rowIdx )\n"
+ "__kernel void update( __global float4 * vertices, __global float4 "
+ "*colors, int horizWrap, int rowIdx )\n"
"{\n"
" size_t tid = get_global_id(0);\n"
"\n"
" size_t xVal = ( tid & ( horizWrap - 1 ) );\n"
" vertices[ tid * 2 + 0 ] = (float4)( xVal, rowIdx*16.f, 0.0f, 1.f );\n"
- " vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, 1.f );\n"
+ " vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, "
+ "1.f );\n"
"\n"
" int rowV = rowIdx + 1;\n"
- " colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 ) >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n"
- " //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, 1.0f, 1.0f, 1.0f );\n"
+ " colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 "
+ ") >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n"
+ " //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, "
+ "1.0f, 1.0f, 1.0f );\n"
" colors[ tid * 2 + 1 ] = colors[ tid * 2 + 0 ];\n"
- "}\n" };
-
-//Passthrough VertexShader
-static const char *vertexshader =
-"#version 150\n"
-"uniform mat4 projMatrix;\n"
-"in vec4 inPosition;\n"
-"in vec4 inColor;\n"
-"out vec4 vertColor;\n"
-"void main (void) {\n"
-" gl_Position = projMatrix*inPosition;\n"
-" vertColor = inColor;\n"
-"}\n";
-
-//Passthrough FragmentShader
-static const char *fragmentshader =
-"#version 150\n"
-"in vec4 vertColor;\n"
-"out vec4 outColor;\n"
-"void main (void) {\n"
-" outColor = vertColor;\n"
-"}\n";
+ "}\n"
+};
+
+// Passthrough VertexShader
+static const char *vertexshader = "#version 150\n"
+ "uniform mat4 projMatrix;\n"
+ "in vec4 inPosition;\n"
+ "in vec4 inColor;\n"
+ "out vec4 vertColor;\n"
+ "void main (void) {\n"
+ " gl_Position = projMatrix*inPosition;\n"
+ " vertColor = inColor;\n"
+ "}\n";
+
+// Passthrough FragmentShader
+static const char *fragmentshader = "#version 150\n"
+ "in vec4 vertColor;\n"
+ "out vec4 outColor;\n"
+ "void main (void) {\n"
+ " outColor = vertColor;\n"
+ "}\n";
GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
{
- GLint logLength, status;
+ GLint logLength, status;
GLuint program = glCreateProgram();
GLuint vpShader;
@@ -153,8 +162,9 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
glShaderSource(vpShader, 1, (const GLchar **)&vertexshader, NULL);
glCompileShader(vpShader);
glGetShaderiv(vpShader, GL_INFO_LOG_LENGTH, &logLength);
- if (logLength > 0) {
- GLchar *log = (GLchar*) malloc(logLength);
+ if (logLength > 0)
+ {
+ GLchar *log = (GLchar *)malloc(logLength);
glGetShaderInfoLog(vpShader, logLength, &logLength, log);
log_info("Vtx Shader compile log:\n%s", log);
free(log);
@@ -175,8 +185,9 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
glCompileShader(fpShader);
glGetShaderiv(fpShader, GL_INFO_LOG_LENGTH, &logLength);
- if (logLength > 0) {
- GLchar *log = (GLchar*)malloc(logLength);
+ if (logLength > 0)
+ {
+ GLchar *log = (GLchar *)malloc(logLength);
glGetShaderInfoLog(fpShader, logLength, &logLength, log);
log_info("Frag Shader compile log:\n%s", log);
free(log);
@@ -192,8 +203,9 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
glLinkProgram(program);
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength);
- if (logLength > 0) {
- GLchar *log = (GLchar*)malloc(logLength);
+ if (logLength > 0)
+ {
+ GLchar *log = (GLchar *)malloc(logLength);
glGetProgramInfoLog(program, logLength, &logLength, log);
log_info("Program link log:\n%s", log);
free(log);
@@ -219,7 +231,7 @@ void destroyShaderProgram(GLuint program)
glUseProgram(0);
glGetAttachedShaders(program, 2, &count, shaders);
int i;
- for(i = 0; i < count; i++)
+ for (i = 0; i < count; i++)
{
glDetachShader(program, shaders[i]);
glDeleteShader(shaders[i]);
@@ -227,44 +239,49 @@ void destroyShaderProgram(GLuint program)
glDeleteProgram(program);
}
-// This function queues up and runs the above CL kernel that writes the vertex data
-cl_int run_cl_kernel( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1,
- cl_int rowIdx, cl_event fenceEvent, size_t numThreads )
+// This function queues up and runs the above CL kernel that writes the vertex
+// data
+cl_int run_cl_kernel(cl_kernel kernel, cl_command_queue queue, cl_mem stream0,
+ cl_mem stream1, cl_int rowIdx, cl_event fenceEvent,
+ size_t numThreads)
{
- cl_int error = clSetKernelArg( kernel, 3, sizeof( rowIdx ), &rowIdx );
- test_error( error, "Unable to set kernel arguments" );
+ cl_int error = clSetKernelArg(kernel, 3, sizeof(rowIdx), &rowIdx);
+ test_error(error, "Unable to set kernel arguments");
clEventWrapper acqEvent1, acqEvent2, kernEvent, relEvent1, relEvent2;
- int numEvents = ( fenceEvent != NULL ) ? 1 : 0;
- cl_event *fence_evt = ( fenceEvent != NULL ) ? &fenceEvent : NULL;
+ int numEvents = (fenceEvent != NULL) ? 1 : 0;
+ cl_event *fence_evt = (fenceEvent != NULL) ? &fenceEvent : NULL;
- error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream0, numEvents, fence_evt, &acqEvent1 );
- test_error( error, "Unable to acquire GL obejcts");
- error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream1, numEvents, fence_evt, &acqEvent2 );
- test_error( error, "Unable to acquire GL obejcts");
+ error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &stream0, numEvents,
+ fence_evt, &acqEvent1);
+ test_error(error, "Unable to acquire GL obejcts");
+ error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &stream1, numEvents,
+ fence_evt, &acqEvent2);
+ test_error(error, "Unable to acquire GL obejcts");
- cl_event evts[ 2 ] = { acqEvent1, acqEvent2 };
+ cl_event evts[2] = { acqEvent1, acqEvent2 };
- error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numThreads, NULL, 2, evts, &kernEvent );
- test_error( error, "Unable to execute test kernel" );
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &numThreads, NULL, 2,
+ evts, &kernEvent);
+ test_error(error, "Unable to execute test kernel");
- error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream0, 1, &kernEvent, &relEvent1 );
+ error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &stream0, 1, &kernEvent,
+ &relEvent1);
test_error(error, "clEnqueueReleaseGLObjects failed");
- error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream1, 1, &kernEvent, &relEvent2 );
+ error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &stream1, 1, &kernEvent,
+ &relEvent2);
test_error(error, "clEnqueueReleaseGLObjects failed");
- evts[ 0 ] = relEvent1;
- evts[ 1 ] = relEvent2;
- error = clWaitForEvents( 2, evts );
- test_error( error, "Unable to wait for release events" );
+ evts[0] = relEvent1;
+ evts[1] = relEvent2;
+ error = clWaitForEvents(2, evts);
+ test_error(error, "Unable to wait for release events");
return 0;
}
-class RunThread : public genericThread
-{
+class RunThread : public genericThread {
public:
-
cl_kernel mKernel;
cl_command_queue mQueue;
cl_mem mStream0, mStream1;
@@ -272,34 +289,40 @@ public:
cl_event mFenceEvent;
size_t mNumThreads;
- RunThread( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1, size_t numThreads )
- : mKernel( kernel ), mQueue( queue ), mStream0( stream0 ), mStream1( stream1 ), mNumThreads( numThreads )
- {
- }
+ RunThread(cl_kernel kernel, cl_command_queue queue, cl_mem stream0,
+ cl_mem stream1, size_t numThreads)
+ : mKernel(kernel), mQueue(queue), mStream0(stream0), mStream1(stream1),
+ mNumThreads(numThreads)
+ {}
- void SetRunData( cl_int rowIdx, cl_event fenceEvent )
+ void SetRunData(cl_int rowIdx, cl_event fenceEvent)
{
mRowIdx = rowIdx;
mFenceEvent = fenceEvent;
}
- virtual void * IRun( void )
+ virtual void *IRun(void)
{
- cl_int error = run_cl_kernel( mKernel, mQueue, mStream0, mStream1, mRowIdx, mFenceEvent, mNumThreads );
+ cl_int error = run_cl_kernel(mKernel, mQueue, mStream0, mStream1,
+ mRowIdx, mFenceEvent, mNumThreads);
return (void *)(uintptr_t)error;
}
};
-int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_queue queue, bool separateThreads, GLint rend_vs, GLint read_vs, cl_device_id rend_device )
+int test_fence_sync_single(cl_device_id device, cl_context context,
+ cl_command_queue queue, bool separateThreads,
+ GLint rend_vs, GLint read_vs,
+ cl_device_id rend_device)
{
int error;
const int framebufferSize = 512;
- if( !is_extension_available( device, "cl_khr_gl_event" ) )
+ if (!is_extension_available(device, "cl_khr_gl_event"))
{
- log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" );
+ log_info("NOTE: cl_khr_gl_event extension not present on this device; "
+ "skipping fence sync test\n");
return 0;
}
@@ -312,10 +335,11 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
clGetPlatformIDs(0, NULL, &nplatforms);
clGetPlatformIDs(1, &platform, NULL);
- if (nplatforms > 1) {
+ if (nplatforms > 1)
+ {
log_info("clGetPlatformIDs returned multiple values. This is not "
- "an error, but might result in obtaining incorrect function "
- "pointers if you do not want the first returned platform.\n");
+ "an error, but might result in obtaining incorrect function "
+ "pointers if you do not want the first returned platform.\n");
// Show them the platform name, in case it is a problem.
@@ -323,28 +347,35 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
char *name;
clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
- name = (char*)malloc(size);
+ name = (char *)malloc(size);
clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
log_info("Using platform with name: %s \n", name);
free(name);
}
- clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clCreateEventFromGLsyncKHR");
- if( clCreateEventFromGLsyncKHR_ptr == NULL )
+ clCreateEventFromGLsyncKHR_ptr =
+ (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(
+ platform, "clCreateEventFromGLsyncKHR");
+ if (clCreateEventFromGLsyncKHR_ptr == NULL)
{
- log_error( "ERROR: Unable to run fence_sync test (clCreateEventFromGLsyncKHR function not discovered!)\n" );
- clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clCreateEventFromGLsyncAPPLE");
+ log_error("ERROR: Unable to run fence_sync test "
+ "(clCreateEventFromGLsyncKHR function not discovered!)\n");
+ clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)
+ clGetExtensionFunctionAddressForPlatform(
+ platform, "clCreateEventFromGLsyncAPPLE");
return -1;
}
#ifdef USING_ARB_sync
- char *gl_version_str = (char*)glGetString( GL_VERSION );
+ char *gl_version_str = (char *)glGetString(GL_VERSION);
float glCoreVersion;
sscanf(gl_version_str, "%f", &glCoreVersion);
- if( glCoreVersion < 3.0f )
+ if (glCoreVersion < 3.0f)
{
- log_info( "OpenGL version %f does not support fence/sync! Skipping test.\n", glCoreVersion );
+ log_info(
+ "OpenGL version %f does not support fence/sync! Skipping test.\n",
+ glCoreVersion);
return 0;
}
@@ -354,10 +385,13 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
GLint val, screen;
CGLGetVirtualScreen(currCtx, &screen);
CGLDescribePixelFormat(pixFmt, screen, kCGLPFAOpenGLProfile, &val);
- if(val != kCGLOGLPVersion_3_2_Core)
+ if (val != kCGLOGLPVersion_3_2_Core)
{
- log_error( "OpenGL context was not created with OpenGL version >= 3.0 profile even though platform supports it"
- "OpenGL profile %f does not support fence/sync! Skipping test.\n", glCoreVersion );
+ log_error(
+ "OpenGL context was not created with OpenGL version >= 3.0 profile "
+ "even though platform supports it"
+ "OpenGL profile %f does not support fence/sync! Skipping test.\n",
+ glCoreVersion);
return -1;
}
#else
@@ -365,7 +399,7 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
HDC hdc = wglGetCurrentDC();
HGLRC hglrc = wglGetCurrentContext();
#else
- Display* dpy = glXGetCurrentDisplay();
+ Display *dpy = glXGetCurrentDisplay();
GLXDrawable drawable = glXGetCurrentDrawable();
GLXContext ctx = glXGetCurrentContext();
#endif
@@ -386,51 +420,66 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
GLint posLoc, colLoc;
GLuint shaderprogram = createShaderProgram(&posLoc, &colLoc);
- if(!shaderprogram)
+ if (!shaderprogram)
{
log_error("Failed to create shader program\n");
return -1;
}
- float l = 0.0f; float r = framebufferSize;
- float b = 0.0f; float t = framebufferSize;
-
- float projMatrix[16] = { 2.0f/(r-l), 0.0f, 0.0f, 0.0f,
- 0.0f, 2.0f/(t-b), 0.0f, 0.0f,
- 0.0f, 0.0f, -1.0f, 0.0f,
- -(r+l)/(r-l), -(t+b)/(t-b), 0.0f, 1.0f
- };
+ float l = 0.0f;
+ float r = framebufferSize;
+ float b = 0.0f;
+ float t = framebufferSize;
+
+ float projMatrix[16] = { 2.0f / (r - l),
+ 0.0f,
+ 0.0f,
+ 0.0f,
+ 0.0f,
+ 2.0f / (t - b),
+ 0.0f,
+ 0.0f,
+ 0.0f,
+ 0.0f,
+ -1.0f,
+ 0.0f,
+ -(r + l) / (r - l),
+ -(t + b) / (t - b),
+ 0.0f,
+ 1.0f };
glUseProgram(shaderprogram);
GLuint projMatLoc = glGetUniformLocation(shaderprogram, "projMatrix");
glUniformMatrix4fv(projMatLoc, 1, 0, projMatrix);
glUseProgram(0);
- // Note: the framebuffer is just the target to verify our results against, so we don't
- // really care to go through all the possible formats in this case
+ // Note: the framebuffer is just the target to verify our results against,
+ // so we don't really care to go through all the possible formats in this
+ // case
glFramebufferWrapper glFramebuffer;
glRenderbufferWrapper glRenderbuffer;
- error = CreateGLRenderbufferRaw( framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT,
- GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV,
- &glFramebuffer, &glRenderbuffer );
- if( error != 0 )
- return error;
+ error = CreateGLRenderbufferRaw(
+ framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT, GL_RGBA, GL_RGBA,
+ GL_UNSIGNED_INT_8_8_8_8_REV, &glFramebuffer, &glRenderbuffer);
+ if (error != 0) return error;
GLuint vao;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
glBufferWrapper vtxBuffer, colorBuffer;
- glGenBuffers( 1, &vtxBuffer );
- glGenBuffers( 1, &colorBuffer );
+ glGenBuffers(1, &vtxBuffer);
+ glGenBuffers(1, &colorBuffer);
- const int numHorizVertices = ( framebufferSize * 64 ) + 1;
+ const int numHorizVertices = (framebufferSize * 64) + 1;
- glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer );
- glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW );
+ glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer);
+ glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * numHorizVertices * 2 * 4,
+ NULL, GL_STATIC_DRAW);
- glBindBuffer( GL_ARRAY_BUFFER, colorBuffer );
- glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW );
+ glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
+ glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * numHorizVertices * 2 * 4,
+ NULL, GL_STATIC_DRAW);
// Now that the requisite objects are bound, we can attempt program
// validation:
@@ -439,8 +488,9 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
GLint logLength, status;
glGetProgramiv(shaderprogram, GL_INFO_LOG_LENGTH, &logLength);
- if (logLength > 0) {
- GLchar *log = (GLchar*)malloc(logLength);
+ if (logLength > 0)
+ {
+ GLchar *log = (GLchar *)malloc(logLength);
glGetProgramInfoLog(shaderprogram, logLength, &logLength, log);
log_info("Program validate log:\n%s", log);
free(log);
@@ -455,125 +505,131 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
clProgramWrapper program;
clKernelWrapper kernel;
- clMemWrapper streams[ 2 ];
+ clMemWrapper streams[2];
- if( create_single_kernel_helper( context, &program, &kernel, 1, updateBuffersKernel, "update" ) )
+ if (create_single_kernel_helper(context, &program, &kernel, 1,
+ updateBuffersKernel, "update"))
return -1;
- streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, vtxBuffer, &error );
- test_error( error, "Unable to create CL buffer from GL vertex buffer" );
+ streams[0] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_WRITE,
+ vtxBuffer, &error);
+ test_error(error, "Unable to create CL buffer from GL vertex buffer");
- streams[ 1 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, colorBuffer, &error );
- test_error( error, "Unable to create CL buffer from GL color buffer" );
+ streams[1] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_WRITE,
+ colorBuffer, &error);
+ test_error(error, "Unable to create CL buffer from GL color buffer");
- error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
- test_error( error, "Unable to set kernel arguments" );
+ error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+ test_error(error, "Unable to set kernel arguments");
- error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
- test_error( error, "Unable to set kernel arguments" );
+ error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
+ test_error(error, "Unable to set kernel arguments");
cl_int horizWrap = (cl_int)framebufferSize;
- error = clSetKernelArg( kernel, 2, sizeof( horizWrap ), &horizWrap );
- test_error( error, "Unable to set kernel arguments" );
+ error = clSetKernelArg(kernel, 2, sizeof(horizWrap), &horizWrap);
+ test_error(error, "Unable to set kernel arguments");
- glViewport( 0, 0, framebufferSize, framebufferSize );
- glClearColor( 0, 0, 0, 0 );
- glClear( GL_COLOR_BUFFER_BIT );
- glClear( GL_DEPTH_BUFFER_BIT );
- glDisable( GL_DEPTH_TEST );
- glEnable( GL_BLEND );
- glBlendFunc( GL_ONE, GL_ONE );
+ glViewport(0, 0, framebufferSize, framebufferSize);
+ glClearColor(0, 0, 0, 0);
+ glClear(GL_COLOR_BUFFER_BIT);
+ glClear(GL_DEPTH_BUFFER_BIT);
+ glDisable(GL_DEPTH_TEST);
+ glEnable(GL_BLEND);
+ glBlendFunc(GL_ONE, GL_ONE);
clEventWrapper fenceEvent;
GLsync glFence = 0;
// Do a loop through 8 different horizontal stripes against the framebuffer
- RunThread thread( kernel, queue, streams[ 0 ], streams[ 1 ], (size_t)numHorizVertices );
+ RunThread thread(kernel, queue, streams[0], streams[1],
+ (size_t)numHorizVertices);
- for( int i = 0; i < 8; i++ )
+ for (int i = 0; i < 8; i++)
{
// if current rendering device is not the compute device and
// separateThreads == false which means compute is going on same
// thread and we are using implicit synchronization (no GLSync obj used)
- // then glFlush by clEnqueueAcquireGLObject is not sufficient ... we need
- // to wait for rendering to finish on other device before CL can start
- // writing to CL/GL shared mem objects. When separateThreads is true i.e.
- // we are using GLSync obj to synchronize then we dont need to call glFinish
- // here since CL should wait for rendering on other device before this
- // GLSync object to finish before it starts writing to shared mem object.
- // Also rend_device == compute_device no need to call glFinish
- if(rend_device != device && !separateThreads)
- glFinish();
-
- if( separateThreads )
+ // then glFlush by clEnqueueAcquireGLObject is not sufficient ... we
+ // need to wait for rendering to finish on other device before CL can
+ // start writing to CL/GL shared mem objects. When separateThreads is
+ // true i.e. we are using GLSync obj to synchronize then we dont need to
+ // call glFinish here since CL should wait for rendering on other device
+ // before this GLSync object to finish before it starts writing to
+ // shared mem object. Also rend_device == compute_device no need to call
+ // glFinish
+ if (rend_device != device && !separateThreads) glFinish();
+
+ if (separateThreads)
{
- if (fenceEvent != NULL)
- {
- clReleaseEvent(fenceEvent);
- glDeleteSyncFunc(glFence);
- }
+ glDeleteSyncFunc(glFence);
glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
- fenceEvent = clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
+ fenceEvent =
+ clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
test_error(error, "Unable to create CL event from GL fence");
- // in case of explicit synchronization, we just wait for the sync object to complete
- // in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility
- // to flush on the context on which glSync is created
+ // in case of explicit synchronization, we just wait for the sync
+ // object to complete in clEnqueueAcquireGLObject but we dont flush.
+ // Its application's responsibility to flush on the context on which
+ // glSync is created
glFlush();
- thread.SetRunData( (cl_int)i, fenceEvent );
+ thread.SetRunData((cl_int)i, fenceEvent);
thread.Start();
error = (cl_int)(size_t)thread.Join();
}
else
{
- error = run_cl_kernel( kernel, queue, streams[ 0 ], streams[ 1 ], (cl_int)i, fenceEvent, (size_t)numHorizVertices );
+ error =
+ run_cl_kernel(kernel, queue, streams[0], streams[1], (cl_int)i,
+ fenceEvent, (size_t)numHorizVertices);
}
- test_error( error, "Unable to run CL kernel" );
+ test_error(error, "Unable to run CL kernel");
glUseProgram(shaderprogram);
glEnableVertexAttribArray(posLoc);
glEnableVertexAttribArray(colLoc);
- glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer );
- glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0);
- glBindBuffer( GL_ARRAY_BUFFER, colorBuffer );
- glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0);
- glBindBuffer( GL_ARRAY_BUFFER, 0 );
+ glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer);
+ glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE,
+ 4 * sizeof(GLfloat), 0);
+ glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
+ glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE,
+ 4 * sizeof(GLfloat), 0);
+ glBindBuffer(GL_ARRAY_BUFFER, 0);
- glDrawArrays( GL_TRIANGLE_STRIP, 0, numHorizVertices * 2 );
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, numHorizVertices * 2);
glDisableVertexAttribArray(posLoc);
glDisableVertexAttribArray(colLoc);
glUseProgram(0);
- if( separateThreads )
+ if (separateThreads)
{
- // If we're on the same thread, then we're testing implicit syncing, so we
- // don't need the actual fence code
- if( fenceEvent != NULL )
- {
- clReleaseEvent( fenceEvent );
- glDeleteSyncFunc( glFence );
- }
+ // If we're on the same thread, then we're testing implicit syncing,
+ // so we don't need the actual fence code
+ glDeleteSyncFunc(glFence);
+
- glFence = glFenceSyncFunc( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 );
- fenceEvent = clCreateEventFromGLsyncKHR_ptr( context, glFence, &error );
- test_error( error, "Unable to create CL event from GL fence" );
+ glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+ fenceEvent =
+ clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
+ test_error(error, "Unable to create CL event from GL fence");
- // in case of explicit synchronization, we just wait for the sync object to complete
- // in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility
- // to flush on the context on which glSync is created
+ // in case of explicit synchronization, we just wait for the sync
+ // object to complete in clEnqueueAcquireGLObject but we dont flush.
+ // Its application's responsibility to flush on the context on which
+ // glSync is created
glFlush();
}
else
glFinish();
}
- if( glFence != 0 )
- // Don't need the final release for fenceEvent, because the wrapper will take care of that
- glDeleteSyncFunc( glFence );
+ if (glFence != 0)
+ // Don't need the final release for fenceEvent, because the wrapper will
+ // take care of that
+ glDeleteSyncFunc(glFence);
#ifdef __APPLE__
CGLSetVirtualScreen(CGLGetCurrentContext(), read_vs);
@@ -585,54 +641,62 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
#endif
#endif
// Grab the contents of the final framebuffer
- BufferOwningPtr<char> resultData( ReadGLRenderbuffer( glFramebuffer, glRenderbuffer,
- GL_COLOR_ATTACHMENT0_EXT,
- GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar,
- framebufferSize, 128 ) );
-
- // Check the contents now. We should end up with solid color bands 32 pixels high and the
- // full width of the framebuffer, at values (128,128,128) due to the additive blending
- for( int i = 0; i < 8; i++ )
+ BufferOwningPtr<char> resultData(ReadGLRenderbuffer(
+ glFramebuffer, glRenderbuffer, GL_COLOR_ATTACHMENT0_EXT, GL_RGBA,
+ GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar, framebufferSize, 128));
+
+ // Check the contents now. We should end up with solid color bands 32 pixels
+ // high and the full width of the framebuffer, at values (128,128,128) due
+ // to the additive blending
+ for (int i = 0; i < 8; i++)
{
- for( int y = 0; y < 4; y++ )
+ for (int y = 0; y < 4; y++)
{
- // Note: coverage will be double because the 63-0 triangle overwrites again at the end of the pass
- cl_uchar valA = ( ( ( i + 1 ) & 1 ) ) * numHorizVertices * 2 / framebufferSize;
- cl_uchar valB = ( ( ( i + 1 ) & 2 ) >> 1 ) * numHorizVertices * 2 / framebufferSize;
- cl_uchar valC = ( ( ( i + 1 ) & 4 ) >> 2 ) * numHorizVertices * 2 / framebufferSize;
-
- cl_uchar *row = (cl_uchar *)&resultData[ ( i * 16 + y ) * framebufferSize * 4 ];
- for( int x = 0; x < ( framebufferSize - 1 ) - 1; x++ )
+ // Note: coverage will be double because the 63-0 triangle
+ // overwrites again at the end of the pass
+ cl_uchar valA =
+ (((i + 1) & 1)) * numHorizVertices * 2 / framebufferSize;
+ cl_uchar valB =
+ (((i + 1) & 2) >> 1) * numHorizVertices * 2 / framebufferSize;
+ cl_uchar valC =
+ (((i + 1) & 4) >> 2) * numHorizVertices * 2 / framebufferSize;
+
+ cl_uchar *row =
+ (cl_uchar *)&resultData[(i * 16 + y) * framebufferSize * 4];
+ for (int x = 0; x < (framebufferSize - 1) - 1; x++)
{
- if( ( row[ x * 4 ] != valA ) || ( row[ x * 4 + 1 ] != valB ) ||
- ( row[ x * 4 + 2 ] != valC ) )
+ if ((row[x * 4] != valA) || (row[x * 4 + 1] != valB)
+ || (row[x * 4 + 2] != valC))
{
- log_error( "ERROR: Output framebuffer did not validate!\n" );
- DumpGLBuffer( GL_UNSIGNED_BYTE, framebufferSize, 128, resultData );
- log_error( "RUNS:\n" );
+ log_error("ERROR: Output framebuffer did not validate!\n");
+ DumpGLBuffer(GL_UNSIGNED_BYTE, framebufferSize, 128,
+ resultData);
+ log_error("RUNS:\n");
uint32_t *p = (uint32_t *)(char *)resultData;
size_t a = 0;
- for( size_t t = 1; t < framebufferSize * framebufferSize; t++ )
+ for (size_t t = 1; t < framebufferSize * framebufferSize;
+ t++)
{
- if( p[ a ] != 0 )
+ if (p[a] != 0)
{
- if( p[ t ] == 0 )
+ if (p[t] == 0)
{
- log_error( "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n", a, t - 1,
- (int)( a % framebufferSize ), (int)( a / framebufferSize ),
- (int)( ( t - 1 ) % framebufferSize ), (int)( ( t - 1 ) / framebufferSize ),
- p[ a ] );
+ log_error(
+ "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n",
+ a, t - 1, (int)(a % framebufferSize),
+ (int)(a / framebufferSize),
+ (int)((t - 1) % framebufferSize),
+ (int)((t - 1) / framebufferSize), p[a]);
a = t;
}
}
else
{
- if( p[ t ] != 0 )
+ if (p[t] != 0)
{
a = t;
}
}
-
}
return -1;
}
@@ -645,46 +709,56 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
return 0;
}
-int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+int test_fence_sync(cl_device_id device, cl_context context,
+ cl_command_queue queue, int numElements)
{
GLint vs_count = 0;
cl_device_id *device_list = NULL;
- if( !is_extension_available( device, "cl_khr_gl_event" ) )
+ if (!is_extension_available(device, "cl_khr_gl_event"))
{
- log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" );
+ log_info("NOTE: cl_khr_gl_event extension not present on this device; "
+ "skipping fence sync test\n");
return 0;
}
#ifdef __APPLE__
CGLContextObj ctx = CGLGetCurrentContext();
CGLPixelFormatObj pix = CGLGetPixelFormat(ctx);
- CGLError err = CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count);
+ CGLError err =
+ CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count);
- device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*vs_count);
- clGetGLContextInfoAPPLE(context, ctx, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, sizeof(cl_device_id)*vs_count, device_list, NULL);
+ device_list = (cl_device_id *)malloc(sizeof(cl_device_id) * vs_count);
+ clGetGLContextInfoAPPLE(context, ctx,
+ CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE,
+ sizeof(cl_device_id) * vs_count, device_list, NULL);
#else
- // Need platform specific way of getting devices from CL context to which OpenGL can render
- // If not available it can be replaced with clGetContextInfo with CL_CONTEXT_DEVICES
+ // Need platform specific way of getting devices from CL context to which
+ // OpenGL can render If not available it can be replaced with
+ // clGetContextInfo with CL_CONTEXT_DEVICES
size_t device_cb;
- cl_int err = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &device_cb);
- if( err != CL_SUCCESS )
+ cl_int err =
+ clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &device_cb);
+ if (err != CL_SUCCESS)
{
- print_error( err, "Unable to get device count from context" );
- return -1;
+ print_error(err, "Unable to get device count from context");
+ return -1;
}
vs_count = (GLint)device_cb / sizeof(cl_device_id);
- if (vs_count < 1) {
- log_error("No devices found.\n");
- return -1;
+ if (vs_count < 1)
+ {
+ log_error("No devices found.\n");
+ return -1;
}
- device_list = (cl_device_id *) malloc(device_cb);
- err = clGetContextInfo( context, CL_CONTEXT_DEVICES, device_cb, device_list, NULL);
- if( err != CL_SUCCESS ) {
- free(device_list);
- print_error( err, "Unable to get device list from context" );
- return -1;
+ device_list = (cl_device_id *)malloc(device_cb);
+ err = clGetContextInfo(context, CL_CONTEXT_DEVICES, device_cb, device_list,
+ NULL);
+ if (err != CL_SUCCESS)
+ {
+ free(device_list);
+ print_error(err, "Unable to get device list from context");
+ return -1;
}
#endif
@@ -695,30 +769,38 @@ int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue q
// Loop through all the devices capable to OpenGL rendering
// and set them as current rendering target
- for(rend_vs = 0; rend_vs < vs_count; rend_vs++)
+ for (rend_vs = 0; rend_vs < vs_count; rend_vs++)
{
// Loop through all the devices and set them as current
// compute target
- for(read_vs = 0; read_vs < vs_count; read_vs++)
+ for (read_vs = 0; read_vs < vs_count; read_vs++)
{
- cl_device_id rend_device = device_list[rend_vs], read_device = device_list[read_vs];
+ cl_device_id rend_device = device_list[rend_vs],
+ read_device = device_list[read_vs];
char rend_name[200], read_name[200];
- clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name), rend_name, NULL);
- clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name), read_name, NULL);
+ clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name),
+ rend_name, NULL);
+ clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name),
+ read_name, NULL);
- log_info("Rendering on: %s, read back on: %s\n", rend_name, read_name);
- error = test_fence_sync_single( device, context, queue, false, rend_vs, read_vs, rend_device );
+ log_info("Rendering on: %s, read back on: %s\n", rend_name,
+ read_name);
+ error = test_fence_sync_single(device, context, queue, false,
+ rend_vs, read_vs, rend_device);
any_failed |= error;
- if( error != 0 )
- log_error( "ERROR: Implicit syncing with GL sync events failed!\n\n" );
+ if (error != 0)
+ log_error(
+ "ERROR: Implicit syncing with GL sync events failed!\n\n");
else
log_info("Implicit syncing Passed\n");
- error = test_fence_sync_single( device, context, queue, true, rend_vs, read_vs, rend_device );
+ error = test_fence_sync_single(device, context, queue, true,
+ rend_vs, read_vs, rend_device);
any_failed |= error;
- if( error != 0 )
- log_error( "ERROR: Explicit syncing with GL sync events failed!\n\n" );
+ if (error != 0)
+ log_error(
+ "ERROR: Explicit syncing with GL sync events failed!\n\n");
else
log_info("Explicit syncing Passed\n");
}
diff --git a/test_conformance/gl/test_image_methods.cpp b/test_conformance/gl/test_image_methods.cpp
index 07f5b65e..7d055fb2 100644
--- a/test_conformance/gl/test_image_methods.cpp
+++ b/test_conformance/gl/test_image_methods.cpp
@@ -337,7 +337,6 @@ int test_image_methods_depth( cl_device_id device, cl_context context, cl_comman
return 0;
}
- size_t pixelSize;
int result = 0;
GLenum depth_targets[] = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
size_t ntargets = sizeof(depth_targets) / sizeof(depth_targets[0]);
@@ -378,7 +377,6 @@ int test_image_methods_multisample( cl_device_id device, cl_context context, cl_
return 0;
}
- size_t pixelSize;
int result = 0;
GLenum targets[] = {GL_TEXTURE_2D_MULTISAMPLE, GL_TEXTURE_2D_MULTISAMPLE_ARRAY};
size_t ntargets = sizeof(targets) / sizeof(targets[0]);
diff --git a/test_conformance/gl/test_images_getinfo_common.cpp b/test_conformance/gl/test_images_getinfo_common.cpp
index 345b5950..2322c269 100644
--- a/test_conformance/gl/test_images_getinfo_common.cpp
+++ b/test_conformance/gl/test_images_getinfo_common.cpp
@@ -86,10 +86,11 @@ static int test_image_info( cl_context context, cl_command_queue queue,
return CheckGLObjectInfo(streams[0], object_type, glTexture, glTarget, 0);
}
-static int test_image_format_get_info(
- cl_context context, cl_command_queue queue,
- size_t width, size_t height, size_t depth,
- GLenum target, struct format* fmt, MTdata data)
+static int test_image_format_get_info(cl_context context,
+ cl_command_queue queue, size_t width,
+ size_t height, size_t depth,
+ GLenum target, const format *fmt,
+ MTdata data)
{
int error = 0;
@@ -197,9 +198,11 @@ static int test_image_format_get_info(
&actualType, (void **)&outBuffer );
}
-int test_images_get_info_common( cl_device_id device, cl_context context,
- cl_command_queue queue, struct format* formats, size_t nformats,
- GLenum *targets, size_t ntargets, sizevec_t *sizes, size_t nsizes )
+int test_images_get_info_common(cl_device_id device, cl_context context,
+ cl_command_queue queue, const format *formats,
+ size_t nformats, GLenum *targets,
+ size_t ntargets, sizevec_t *sizes,
+ size_t nsizes)
{
int error = 0;
RandomSeed seed(gRandomSeed);
diff --git a/test_conformance/gl/test_images_read_common.cpp b/test_conformance/gl/test_images_read_common.cpp
index 112c7891..fe2a529b 100644
--- a/test_conformance/gl/test_images_read_common.cpp
+++ b/test_conformance/gl/test_images_read_common.cpp
@@ -386,10 +386,9 @@ static int test_image_read( cl_context context, cl_command_queue queue,
width, height, depth, sampleNum, outFormat, outType, outResultBuffer );
}
-static int test_image_format_read(
- cl_context context, cl_command_queue queue,
- size_t width, size_t height, size_t depth,
- GLenum target, struct format* fmt, MTdata data)
+static int test_image_format_read(cl_context context, cl_command_queue queue,
+ size_t width, size_t height, size_t depth,
+ GLenum target, const format *fmt, MTdata data)
{
int error = 0;
@@ -645,9 +644,10 @@ static int test_image_format_read(
}
}
-int test_images_read_common( cl_device_id device, cl_context context,
- cl_command_queue queue, struct format* formats, size_t nformats,
- GLenum *targets, size_t ntargets, sizevec_t *sizes, size_t nsizes )
+int test_images_read_common(cl_device_id device, cl_context context,
+ cl_command_queue queue, const format *formats,
+ size_t nformats, GLenum *targets, size_t ntargets,
+ sizevec_t *sizes, size_t nsizes)
{
int error = 0;
RandomSeed seed(gRandomSeed);
diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp
index 9bbb257b..0dba83bb 100644
--- a/test_conformance/gl/test_images_write_common.cpp
+++ b/test_conformance/gl/test_images_write_common.cpp
@@ -427,7 +427,6 @@ static int test_image_write( cl_context context, cl_command_queue queue,
int supportsHalf(cl_context context, bool* supports_half)
{
int error;
- size_t size;
cl_uint numDev;
error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDev, NULL);
@@ -446,7 +445,6 @@ int supportsHalf(cl_context context, bool* supports_half)
int supportsMsaa(cl_context context, bool* supports_msaa)
{
int error;
- size_t size;
cl_uint numDev;
error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDev, NULL);
@@ -465,7 +463,6 @@ int supportsMsaa(cl_context context, bool* supports_msaa)
int supportsDepth(cl_context context, bool* supports_depth)
{
int error;
- size_t size;
cl_uint numDev;
error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDev, NULL);
@@ -486,7 +483,6 @@ static int test_image_format_write( cl_context context, cl_command_queue queue,
GLenum internalFormat, GLenum glType, ExplicitType type, MTdata d )
{
int error;
- int samples = 8;
// If we're testing a half float format, then we need to determine the
// rounding mode of this machine. Punt if we fail to do so.
@@ -664,8 +660,9 @@ static int test_image_format_write( cl_context context, cl_command_queue queue,
// combination.
int test_images_write_common(cl_device_id device, cl_context context,
- cl_command_queue queue, struct format* formats, size_t nformats,
- GLenum *targets, size_t ntargets, sizevec_t* sizes, size_t nsizes )
+ cl_command_queue queue, const format *formats,
+ size_t nformats, GLenum *targets, size_t ntargets,
+ sizevec_t *sizes, size_t nsizes)
{
int err = 0;
int error = 0;
diff --git a/test_conformance/gles/CMakeLists.txt b/test_conformance/gles/CMakeLists.txt
index c76fe512..4f4ba532 100644
--- a/test_conformance/gles/CMakeLists.txt
+++ b/test_conformance/gles/CMakeLists.txt
@@ -18,3 +18,11 @@ set (${MODULE_NAME}_SOURCES
list(APPEND CLConform_LIBRARIES EGL GLESv2)
include(../CMakeCommon.txt)
+
+if(DEFINED USE_GLES3)
+ target_compile_definitions(${${MODULE_NAME}_OUT} PRIVATE GLES3)
+endif()
+if(MSVC)
+ # Don't warn about using the portable "strdup" function.
+ target_compile_definitions(${${MODULE_NAME}_OUT} PRIVATE _CRT_NONSTDC_NO_DEPRECATE)
+endif() \ No newline at end of file
diff --git a/test_conformance/gles/main.cpp b/test_conformance/gles/main.cpp
index 644fa63c..60e020d8 100644
--- a/test_conformance/gles/main.cpp
+++ b/test_conformance/gles/main.cpp
@@ -320,8 +320,10 @@ int main(int argc, const char *argv[])
goto cleanup;
}
+#ifdef GLES3
int argc_ = (first_32_testname) ? 1 + (argc - first_32_testname) : argc;
const char** argv_ = (first_32_testname) ? &argv[first_32_testname-1] : argv;
+#endif
// Execute the tests.
for( size_t i = 0; i < numDevices; i++ ) {
diff --git a/test_conformance/gles/setup_egl.cpp b/test_conformance/gles/setup_egl.cpp
index fe0f8ca3..95a12a66 100644
--- a/test_conformance/gles/setup_egl.cpp
+++ b/test_conformance/gles/setup_egl.cpp
@@ -117,7 +117,8 @@ public:
_platform, "clGetGLContextInfoKHR");
if (GetGLContextInfo == NULL)
{
- print_error(status, "clGetGLContextInfoKHR failed");
+ log_error("ERROR: clGetGLContextInfoKHR failed! (%s:%d)\n",
+ __FILE__, __LINE__);
return NULL;
}
@@ -128,7 +129,7 @@ public:
return NULL;
}
dev_size /= sizeof(cl_device_id);
- log_info("GL _context supports %d compute devices\n", dev_size);
+ log_info("GL _context supports %zu compute devices\n", dev_size);
status =
GetGLContextInfo(properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
diff --git a/test_conformance/gles/test_buffers.cpp b/test_conformance/gles/test_buffers.cpp
index a2d67322..73711261 100644
--- a/test_conformance/gles/test_buffers.cpp
+++ b/test_conformance/gles/test_buffers.cpp
@@ -205,10 +205,10 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType
if (validate_only) {
int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER, (GLuint)inGLBuffer, (GLenum)0, 0) |
CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER, (GLuint)outGLBuffer, (GLenum)0, 0) );
- for(i=0;i<3;i++)
+
+ for (i = 0; i < 3; i++)
{
- clReleaseMemObject(streams[i]);
- streams[i] = NULL;
+ streams[i].reset();
}
glDeleteBuffers(1, &inGLBuffer); inGLBuffer = 0;
@@ -285,10 +285,9 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType
clP += get_explicit_type_size( vecType );
}
- for(i=0;i<3;i++)
+ for (i = 0; i < 3; i++)
{
- clReleaseMemObject(streams[i]);
- streams[i] = NULL;
+ streams[i].reset();
}
glDeleteBuffers(1, &inGLBuffer); inGLBuffer = 0;
diff --git a/test_conformance/gles/test_fence_sync.cpp b/test_conformance/gles/test_fence_sync.cpp
index 0af91a46..968d9695 100644
--- a/test_conformance/gles/test_fence_sync.cpp
+++ b/test_conformance/gles/test_fence_sync.cpp
@@ -570,10 +570,12 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
{
if( p[ t ] == 0 )
{
- log_error( "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n", a, t - 1,
- (int)( a % framebufferSize ), (int)( a / framebufferSize ),
- (int)( ( t - 1 ) % framebufferSize ), (int)( ( t - 1 ) / framebufferSize ),
- p[ a ] );
+ log_error(
+ "RUN: %zu to %zu (%d,%d to %d,%d) 0x%08x\n",
+ a, t - 1, (int)(a % framebufferSize),
+ (int)(a / framebufferSize),
+ (int)((t - 1) % framebufferSize),
+ (int)((t - 1) / framebufferSize), p[a]);
a = t;
}
}
diff --git a/test_conformance/gles/test_images_2D.cpp b/test_conformance/gles/test_images_2D.cpp
index c1a17fc8..f6554023 100644
--- a/test_conformance/gles/test_images_2D.cpp
+++ b/test_conformance/gles/test_images_2D.cpp
@@ -369,7 +369,9 @@ int test_images_read_cube( cl_device_id device, cl_context context, cl_command_q
}
+#ifdef __APPLE__
#pragma mark -------------------- Write tests -------------------------
+#endif
int test_cl_image_write( cl_context context, cl_command_queue queue, cl_mem clImage,
diff --git a/test_conformance/gles/test_renderbuffer.cpp b/test_conformance/gles/test_renderbuffer.cpp
index 20127aca..0f6d289b 100644
--- a/test_conformance/gles/test_renderbuffer.cpp
+++ b/test_conformance/gles/test_renderbuffer.cpp
@@ -197,7 +197,9 @@ int test_renderbuffer_read( cl_device_id device, cl_context context, cl_command_
}
+#ifdef __APPLE__
#pragma mark -------------------- Write tests -------------------------
+#endif
int test_attach_renderbuffer_write_to_image( cl_context context, cl_command_queue queue, GLenum glTarget, GLuint glRenderbuffer,
size_t imageWidth, size_t imageHeight, cl_image_format *outFormat, ExplicitType *outType, MTdata d, void **outSourceBuffer )
diff --git a/test_conformance/half/Test_roundTrip.cpp b/test_conformance/half/Test_roundTrip.cpp
index 69fc7e41..1ab40937 100644
--- a/test_conformance/half/Test_roundTrip.cpp
+++ b/test_conformance/half/Test_roundTrip.cpp
@@ -14,6 +14,9 @@
// limitations under the License.
//
#include <string.h>
+
+#include <algorithm>
+
#include "cl_utils.h"
#include "tests.h"
#include "harness/testHarness.h"
@@ -156,7 +159,7 @@ int test_roundTrip( cl_device_id device, cl_context context, cl_command_queue qu
}
// Figure out how many elements are in a work block
- size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float));
+ size_t elementSize = std::max(sizeof(cl_half), sizeof(cl_float));
size_t blockCount = (size_t)getBufferSize(device) / elementSize; //elementSize is a power of two
uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of cl_half
size_t stride = blockCount;
@@ -168,7 +171,7 @@ int test_roundTrip( cl_device_id device, cl_context context, cl_command_queue qu
for( i = 0; i < (uint64_t)lastCase; i += stride )
{
- count = (uint32_t) MIN( blockCount, lastCase - i );
+ count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i);
//Init the input stream
uint16_t *p = (uint16_t *)gIn_half;
diff --git a/test_conformance/half/Test_vLoadHalf.cpp b/test_conformance/half/Test_vLoadHalf.cpp
index 52867c25..e9354019 100644
--- a/test_conformance/half/Test_vLoadHalf.cpp
+++ b/test_conformance/half/Test_vLoadHalf.cpp
@@ -17,6 +17,9 @@
#include "harness/testHarness.h"
#include <string.h>
+
+#include <algorithm>
+
#include "cl_utils.h"
#include "tests.h"
@@ -37,14 +40,12 @@ int Test_vLoadHalf_private( cl_device_id device, bool aligned )
const char *vector_size_names[] = {"1", "2", "4", "8", "16", "3"};
int minVectorSize = kMinVectorSize;
- // There is no aligned scalar vloada_half in CL 1.1
-#if ! defined( CL_VERSION_1_1 ) && ! defined(__APPLE__)
- vlog("Note: testing vloada_half.\n");
- if (aligned && minVectorSize == 0)
- minVectorSize = 1;
-#endif
- for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+ // There is no aligned scalar vloada_half
+ if (aligned && minVectorSize == 0) minVectorSize = 1;
+
+ for (vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest;
+ vectorSize++)
{
int effectiveVectorSize = g_arrVecSizes[vectorSize];
@@ -81,7 +82,7 @@ int Test_vLoadHalf_private( cl_device_id device, bool aligned )
"{\n"
" size_t i = get_global_id(0);\n"
" f[i] = vloada_half3( i, p );\n"
- " ((__global float *)f)[4*i+3] = vloada_half(4*i+3,p);\n"
+ " ((__global float *)f)[4*i+3] = vload_half(4*i+3,p);\n"
"}\n"
};
@@ -431,7 +432,7 @@ int Test_vLoadHalf_private( cl_device_id device, bool aligned )
}
// Figure out how many elements are in a work block
- size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float));
+ size_t elementSize = std::max(sizeof(cl_half), sizeof(cl_float));
size_t blockCount = getBufferSize(device) / elementSize; // elementSize is power of 2
uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of things of size cl_half
@@ -449,7 +450,7 @@ int Test_vLoadHalf_private( cl_device_id device, bool aligned )
for( i = 0; i < (uint64_t)lastCase; i += blockCount )
{
- count = (uint32_t) MIN( blockCount, lastCase - i );
+ count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i);
//Init the input stream
uint16_t *p = (uint16_t *)gIn_half;
diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp
index c3a328ad..591470f0 100644
--- a/test_conformance/half/Test_vStoreHalf.cpp
+++ b/test_conformance/half/Test_vStoreHalf.cpp
@@ -18,6 +18,9 @@
#include "harness/testHarness.h"
#include <string.h>
+
+#include <algorithm>
+
#include "cl_utils.h"
#include "tests.h"
@@ -78,7 +81,7 @@ ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
cl_ushort *r = cri->r + off;
f2h f = cri->f;
cl_ulong i = cri->i + off;
- cl_uint j, rr;
+ cl_uint j;
if (off + count > lim)
count = lim - off;
@@ -114,8 +117,7 @@ CheckF(cl_uint jid, cl_uint tid, void *userInfo)
return 0;
for (j = 0; j < count; j++) {
- if (s[j] == r[j])
- continue;
+ if (s[j] == r[j]) continue;
// Pass any NaNs
if ((s[j] & 0x7fff) > 0x7c00 && (r[j] & 0x7fff) > 0x7c00 )
@@ -186,8 +188,7 @@ CheckD(cl_uint jid, cl_uint tid, void *userInfo)
return 0;
for (j = 0; j < count; j++) {
- if (s[j] == r[j])
- continue;
+ if (s[j] == r[j]) continue;
// Pass any NaNs
if ((s[j] & 0x7fff) > 0x7c00 && (r[j] & 0x7fff) > 0x7c00)
@@ -419,7 +420,9 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
"__kernel void test( __global float *p, __global half *f,\n"
" uint extra_last_thread )\n"
"{\n"
- " __local ushort data[3*(", local_buf_size, "+1)];\n"
+ " __local ushort data[3*(",
+ local_buf_size,
+ "+1)];\n"
" size_t i = get_global_id(0);\n"
" size_t lid = get_local_id(0);\n"
" size_t last_i = get_global_size(0)-1;\n"
@@ -429,9 +432,18 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
" if(last_i == i && extra_last_thread != 0) {\n"
" adjust = 3-extra_last_thread;\n"
" } "
- " vstore_half3",roundName,"( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n"
+ " vstore_half3",
+ roundName,
+ "( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n"
" barrier( CLK_LOCAL_MEM_FENCE ); \n"
- " async_event = async_work_group_copy((__global ushort *)(f+3*(i-lid)), (__local ushort *)(&data[adjust]), lsize*3-adjust, 0);\n" // investigate later
+ " if (get_group_id(0) == (get_num_groups(0) - 1) &&\n"
+ " extra_last_thread != 0) {\n"
+ " adjust = 3-extra_last_thread;\n"
+ " }\n"
+ " async_event = async_work_group_copy(\n"
+ " (__global ushort*)(f+3*(i-lid)),\n"
+ " (__local ushort *)(&data[adjust]),\n"
+ " lsize*3-adjust, 0);\n" // investigate later
" wait_group_events(1, &async_event);\n"
"}\n"
};
@@ -521,7 +533,9 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
"__kernel void test( __global double *p, __global half *f,\n"
" uint extra_last_thread )\n"
"{\n"
- " __local ushort data[3*(", local_buf_size, "+1)];\n"
+ " __local ushort data[3*(",
+ local_buf_size,
+ "+1)];\n"
" size_t i = get_global_id(0);\n"
" size_t lid = get_local_id(0);\n"
" size_t last_i = get_global_size(0)-1;\n"
@@ -531,15 +545,23 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
" if(last_i == i && extra_last_thread != 0) {\n"
" adjust = 3-extra_last_thread;\n"
" }\n "
- " vstore_half3",roundName,"( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n"
+ " vstore_half3",
+ roundName,
+ "( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n"
" barrier( CLK_LOCAL_MEM_FENCE ); \n"
- " async_event = async_work_group_copy((__global ushort *)(f+3*(i-lid)), (__local ushort *)(&data[adjust]), lsize*3-adjust, 0);\n" // investigate later
+ " if (get_group_id(0) == (get_num_groups(0) - 1) &&\n"
+ " extra_last_thread != 0) {\n"
+ " adjust = 3-extra_last_thread;\n"
+ " }\n"
+ " async_event = async_work_group_copy(\n"
+ " (__global ushort *)(f+3*(i-lid)),\n"
+ " (__local ushort *)(&data[adjust]),\n"
+ " lsize*3-adjust, 0);\n" // investigate later
" wait_group_events(1, &async_event);\n"
"}\n"
};
-
if(g_arrVecSizes[vectorSize] == 3) {
programs[vectorSize][0] = MakeProgram( device, source_v3, sizeof(source_v3) / sizeof( source_v3[0]) );
} else {
@@ -674,7 +696,7 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
} // end for vector size
// Figure out how many elements are in a work block
- size_t elementSize = MAX( sizeof(cl_ushort), sizeof(float));
+ size_t elementSize = std::max(sizeof(cl_ushort), sizeof(float));
size_t blockCount = BUFFER_SIZE / elementSize; // elementSize is power of 2
uint64_t lastCase = 1ULL << (8*sizeof(float)); // number of floats.
size_t stride = blockCount;
@@ -726,7 +748,7 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
for( i = 0; i < lastCase; i += stride )
{
- count = (cl_uint) MIN( blockCount, lastCase - i );
+ count = (cl_uint)std::min((uint64_t)blockCount, lastCase - i);
fref.i = i;
dref.i = i;
@@ -1272,7 +1294,7 @@ int Test_vStoreaHalf_private( cl_device_id device, f2h referenceFunc, d2h double
}
// Figure out how many elements are in a work block
- size_t elementSize = MAX( sizeof(cl_ushort), sizeof(float));
+ size_t elementSize = std::max(sizeof(cl_ushort), sizeof(float));
size_t blockCount = BUFFER_SIZE / elementSize;
uint64_t lastCase = 1ULL << (8*sizeof(float));
size_t stride = blockCount;
@@ -1323,7 +1345,7 @@ int Test_vStoreaHalf_private( cl_device_id device, f2h referenceFunc, d2h double
for( i = 0; i < (uint64_t)lastCase; i += stride )
{
- count = (cl_uint) MIN( blockCount, lastCase - i );
+ count = (cl_uint)std::min((uint64_t)blockCount, lastCase - i);
fref.i = i;
dref.i = i;
diff --git a/test_conformance/half/main.cpp b/test_conformance/half/main.cpp
index 6600cc58..6bc7db95 100644
--- a/test_conformance/half/main.cpp
+++ b/test_conformance/half/main.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -131,8 +131,7 @@ exit:
static int ParseArgs( int argc, const char **argv )
{
int i;
- argList = (const char **)calloc( argc - 1, sizeof( char*) );
-
+ argList = (const char **)calloc(argc, sizeof(char *));
if( NULL == argList )
{
vlog_error( "Failed to allocate memory for argList.\n" );
@@ -222,7 +221,6 @@ static int ParseArgs( int argc, const char **argv )
gWimpyMode = 1;
}
- vlog( "Test binary built %s %s\n", __DATE__, __TIME__ );
PrintArch();
if( gWimpyMode )
{
@@ -248,4 +246,3 @@ static void PrintUsage( void )
vlog("\t\t%s\n", test_list[i].name );
}
}
-
diff --git a/test_conformance/images/clCopyImage/test_copy_1D.cpp b/test_conformance/images/clCopyImage/test_copy_1D.cpp
index 2c996c72..0f6f3ce4 100644
--- a/test_conformance/images/clCopyImage/test_copy_1D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D.cpp
@@ -113,6 +113,7 @@ int test_copy_image_set_1D( cl_device_id device, cl_context context, cl_command_
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
index 0b616934..f0b610bb 100644
--- a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
@@ -118,6 +118,7 @@ int test_copy_image_set_1D_array( cl_device_id device, cl_context context, cl_co
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_2D.cpp b/test_conformance/images/clCopyImage/test_copy_2D.cpp
index 1a69a1fe..448b47f0 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D.cpp
@@ -125,6 +125,7 @@ int test_copy_image_set_2D( cl_device_id device, cl_context context, cl_command_
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
index eb6dd552..1819d87c 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
@@ -224,6 +224,7 @@ int test_copy_image_set_2D_2D_array( cl_device_id device, cl_context context, cl
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
index 8a56c95f..4ab6b42a 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
@@ -230,6 +230,7 @@ int test_copy_image_set_2D_3D( cl_device_id device, cl_context context, cl_comma
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
index 6327ba58..3376bf9a 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
@@ -71,6 +71,7 @@ int test_copy_image_set_2D_array( cl_device_id device, cl_context context, cl_co
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_3D.cpp b/test_conformance/images/clCopyImage/test_copy_3D.cpp
index da6731d7..cdfdccec 100644
--- a/test_conformance/images/clCopyImage/test_copy_3D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_3D.cpp
@@ -57,6 +57,7 @@ int test_copy_image_set_3D( cl_device_id device, cl_context context, cl_command_
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
index c098f645..1da1e477 100644
--- a/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
@@ -251,6 +251,7 @@ int test_copy_image_set_3D_2D_array(cl_device_id device, cl_context context, cl_
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_generic.cpp b/test_conformance/images/clCopyImage/test_copy_generic.cpp
index 026916e8..3bd1b6ef 100644
--- a/test_conformance/images/clCopyImage/test_copy_generic.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_generic.cpp
@@ -228,6 +228,11 @@ cl_mem create_image( cl_context context, cl_command_queue queue, BufferOwningPtr
}
size_t mappedSlicePad = mappedSlice - (mappedRow * height);
+ // For 1Darray, the height variable actually contains the arraysize,
+ // so it can't be used for calculating the slice padding.
+ if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ mappedSlicePad = mappedSlice - (mappedRow * 1);
+
// Copy the image.
size_t scanlineSize = row_pitch_lod;
size_t sliceSize = slice_pitch_lod - scanlineSize * height;
@@ -547,18 +552,19 @@ int test_copy_image_generic( cl_context context, cl_command_queue queue, image_d
{
if( memcmp( sourcePtr, destPtr, scanlineSize ) != 0 )
{
- // Find the first missing pixel
+ // Find the first differing pixel
size_t pixel_size = get_pixel_size( dstImageInfo->format );
- size_t where = 0;
- for( where = 0; where < dstImageInfo->width; where++ )
- if( memcmp( sourcePtr + pixel_size * where, destPtr + pixel_size * where, pixel_size) )
- break;
-
- print_first_pixel_difference_error(
- where, sourcePtr + pixel_size * where,
- destPtr + pixel_size * where, dstImageInfo, y,
- dstImageInfo->depth);
- return -1;
+ size_t where =
+ compare_scanlines(dstImageInfo, sourcePtr, destPtr);
+
+ if (where < dstImageInfo->width)
+ {
+ print_first_pixel_difference_error(
+ where, sourcePtr + pixel_size * where,
+ destPtr + pixel_size * where, dstImageInfo, y,
+ dstImageInfo->depth);
+ return -1;
+ }
}
sourcePtr += rowPitch;
if((dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY || dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D))
diff --git a/test_conformance/images/clFillImage/test_fill_1D.cpp b/test_conformance/images/clFillImage/test_fill_1D.cpp
index c3f23185..b1550bf3 100644
--- a/test_conformance/images/clFillImage/test_fill_1D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_1D.cpp
@@ -80,6 +80,7 @@ int test_fill_image_set_1D( cl_device_id device, cl_context context, cl_command_
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_1D_array.cpp b/test_conformance/images/clFillImage/test_fill_1D_array.cpp
index b4347a47..be32ec6a 100644
--- a/test_conformance/images/clFillImage/test_fill_1D_array.cpp
+++ b/test_conformance/images/clFillImage/test_fill_1D_array.cpp
@@ -83,6 +83,7 @@ int test_fill_image_set_1D_array( cl_device_id device, cl_context context, cl_co
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_2D.cpp b/test_conformance/images/clFillImage/test_fill_2D.cpp
index bb66fc27..e941abcf 100644
--- a/test_conformance/images/clFillImage/test_fill_2D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_2D.cpp
@@ -83,6 +83,7 @@ int test_fill_image_set_2D( cl_device_id device, cl_context context, cl_command_
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_2D_array.cpp b/test_conformance/images/clFillImage/test_fill_2D_array.cpp
index 3265aab0..38196cfc 100644
--- a/test_conformance/images/clFillImage/test_fill_2D_array.cpp
+++ b/test_conformance/images/clFillImage/test_fill_2D_array.cpp
@@ -87,6 +87,7 @@ int test_fill_image_set_2D_array( cl_device_id device, cl_context context, cl_co
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_3D.cpp b/test_conformance/images/clFillImage/test_fill_3D.cpp
index 9db0ac7c..0b8e4e58 100644
--- a/test_conformance/images/clFillImage/test_fill_3D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_3D.cpp
@@ -87,6 +87,7 @@ int test_fill_image_set_3D( cl_device_id device, cl_context context, cl_command_
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_generic.cpp b/test_conformance/images/clFillImage/test_fill_generic.cpp
index 59bf24ad..6cd6beb0 100644
--- a/test_conformance/images/clFillImage/test_fill_generic.cpp
+++ b/test_conformance/images/clFillImage/test_fill_generic.cpp
@@ -468,27 +468,19 @@ int test_fill_image_generic( cl_context context, cl_command_queue queue, image_d
{
for ( size_t y = 0; y < secondDim; y++ )
{
- // If the data type is 101010 ignore bits 31 and 32 when comparing the row
- if (imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010) {
- for (size_t w=0;w!=scanlineSize/4;++w) {
- ((cl_uint*)sourcePtr)[w] &= 0x3FFFFFFF;
- ((cl_uint*)destPtr)[w] &= 0x3FFFFFFF;
- }
- }
-
if (memcmp( sourcePtr, destPtr, scanlineSize ) != 0)
{
- // Find the first missing pixel
+ // Find the first differing pixel
size_t pixel_size = get_pixel_size( imageInfo->format );
- size_t where = 0;
- for ( where = 0; where < imageInfo->width; where++ )
- if ( memcmp( sourcePtr + pixel_size * where, destPtr + pixel_size * where, pixel_size) )
- break;
-
- print_first_pixel_difference_error(
- where, sourcePtr + pixel_size * where,
- destPtr + pixel_size * where, imageInfo, y, thirdDim);
- return -1;
+ size_t where = compare_scanlines(imageInfo, sourcePtr, destPtr);
+
+ if (where < imageInfo->width)
+ {
+ print_first_pixel_difference_error(
+ where, sourcePtr + pixel_size * where,
+ destPtr + pixel_size * where, imageInfo, y, thirdDim);
+ return -1;
+ }
}
total_matched += scanlineSize;
diff --git a/test_conformance/images/clGetInfo/test_1D.cpp b/test_conformance/images/clGetInfo/test_1D.cpp
index 0d704b82..7e044856 100644
--- a/test_conformance/images/clGetInfo/test_1D.cpp
+++ b/test_conformance/images/clGetInfo/test_1D.cpp
@@ -46,6 +46,7 @@ int test_get_image_info_1D( cl_device_id device, cl_context context, cl_image_fo
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clGetInfo/test_1D_2D_array.cpp b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
index 447fc7c2..c35bf22b 100644
--- a/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
+++ b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
@@ -44,6 +44,7 @@ int test_get_image_info_1D_array( cl_device_id device, cl_context context, cl_im
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
@@ -168,6 +169,7 @@ int test_get_image_info_2D_array( cl_device_id device, cl_context context, cl_im
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clGetInfo/test_2D.cpp b/test_conformance/images/clGetInfo/test_2D.cpp
index 74a60123..764b186d 100644
--- a/test_conformance/images/clGetInfo/test_2D.cpp
+++ b/test_conformance/images/clGetInfo/test_2D.cpp
@@ -285,6 +285,7 @@ int test_get_image_info_2D( cl_device_id device, cl_context context, cl_image_fo
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clGetInfo/test_3D.cpp b/test_conformance/images/clGetInfo/test_3D.cpp
index af5062e3..e1261863 100644
--- a/test_conformance/images/clGetInfo/test_3D.cpp
+++ b/test_conformance/images/clGetInfo/test_3D.cpp
@@ -47,6 +47,7 @@ int test_get_image_info_3D( cl_device_id device, cl_context context, cl_image_fo
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_1D.cpp b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
index eef5bf4e..2d94dc82 100644
--- a/test_conformance/images/clReadWriteImage/test_read_1D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
@@ -81,7 +81,6 @@ int test_read_image_1D(cl_context context, cl_command_queue queue,
for( size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
{
- float lod_float = (float) lod;
origin[1] = lod;
size_t width_lod, row_pitch_lod;
@@ -90,14 +89,17 @@ int test_read_image_1D(cl_context context, cl_command_queue queue,
region[0] = width_lod;
- if ( gDebugTrace )
- if ( gTestMipmaps) {
- log_info(" - Working at mipLevel :%llu\n", (unsigned long long)lod);
- }
- error = clEnqueueWriteImage(queue, image, CL_FALSE,
- origin, region, ( gEnablePitch ? row_pitch_lod : 0 ), 0,
- (char*)imageValues + imgValMipLevelOffset, 0, NULL, NULL);
- if (error != CL_SUCCESS) {
+ if (gDebugTrace)
+ if (gTestMipmaps)
+ {
+ log_info(" - Working at mipLevel :%llu\n", (unsigned long long)lod);
+ }
+ error = clEnqueueWriteImage(queue, image, CL_FALSE, origin, region,
+ (gEnablePitch ? row_pitch_lod : 0), 0,
+ (char *)imageValues + imgValMipLevelOffset, 0,
+ NULL, NULL);
+ if (error != CL_SUCCESS)
+ {
log_error( "ERROR: Unable to write to 1D image of size %d \n", (int)width_lod );
return -1;
}
@@ -185,6 +187,7 @@ int test_read_image_set_1D(cl_device_id device, cl_context context,
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
index 5d5c2883..cc902042 100644
--- a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
@@ -82,7 +82,6 @@ int test_read_image_1D_array(cl_context context, cl_command_queue queue,
for( size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
{
- float lod_float = (float) lod;
size_t width_lod, row_pitch_lod, slice_pitch_lod;
if( gTestMipmaps )
origin[2] = lod;
@@ -192,6 +191,7 @@ int test_read_image_set_1D_array(cl_device_id device, cl_context context,
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_2D.cpp b/test_conformance/images/clReadWriteImage/test_read_2D.cpp
index fb2e7948..b6102874 100644
--- a/test_conformance/images/clReadWriteImage/test_read_2D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_2D.cpp
@@ -81,7 +81,6 @@ int test_read_image_2D(cl_context context, cl_command_queue queue,
for( size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
{
- float lod_float = (float) lod;
origin[2] = lod;
size_t width_lod, height_lod, row_pitch_lod;
@@ -195,6 +194,7 @@ int test_read_image_set_2D(cl_device_id device, cl_context context,
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
index d0113bb7..401b0e4d 100644
--- a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
@@ -83,9 +83,8 @@ int test_read_image_2D_array(cl_context context, cl_command_queue queue,
for(size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
{
- float lod_float = (float) lod;
origin[3] = lod;
- size_t width_lod, height_lod, depth_lod, row_pitch_lod, slice_pitch_lod;
+ size_t width_lod, height_lod, row_pitch_lod, slice_pitch_lod;
width_lod = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
height_lod = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
@@ -170,6 +169,7 @@ int test_read_image_set_2D_array(cl_device_id device, cl_context context,
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_3D.cpp b/test_conformance/images/clReadWriteImage/test_read_3D.cpp
index 2dcd2433..ced04abf 100644
--- a/test_conformance/images/clReadWriteImage/test_read_3D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_3D.cpp
@@ -83,7 +83,6 @@ int test_read_image_3D(cl_context context, cl_command_queue queue,
for(size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
{
- float lod_float = (float) lod;
origin[3] = lod;
size_t width_lod, height_lod, depth_lod, row_pitch_lod, slice_pitch_lod;
@@ -175,6 +174,7 @@ int test_read_image_set_3D(cl_device_id device, cl_context context,
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_image_methods/test_1D.cpp b/test_conformance/images/kernel_image_methods/test_1D.cpp
index 0059d4c2..934e78ba 100644
--- a/test_conformance/images/kernel_image_methods/test_1D.cpp
+++ b/test_conformance/images/kernel_image_methods/test_1D.cpp
@@ -171,6 +171,7 @@ int test_get_image_info_1D(cl_device_id device, cl_context context,
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_image_methods/test_1D_array.cpp b/test_conformance/images/kernel_image_methods/test_1D_array.cpp
index 797161c4..a824f088 100644
--- a/test_conformance/images/kernel_image_methods/test_1D_array.cpp
+++ b/test_conformance/images/kernel_image_methods/test_1D_array.cpp
@@ -181,6 +181,7 @@ int test_get_image_info_1D_array(cl_device_id device, cl_context context,
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_image_methods/test_2D.cpp b/test_conformance/images/kernel_image_methods/test_2D.cpp
index b0d4a708..07f8d929 100644
--- a/test_conformance/images/kernel_image_methods/test_2D.cpp
+++ b/test_conformance/images/kernel_image_methods/test_2D.cpp
@@ -232,6 +232,7 @@ int test_get_image_info_2D(cl_device_id device, cl_context context,
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_read_write/CMakeLists.txt b/test_conformance/images/kernel_read_write/CMakeLists.txt
index 595f024a..ccd678c1 100644
--- a/test_conformance/images/kernel_read_write/CMakeLists.txt
+++ b/test_conformance/images/kernel_read_write/CMakeLists.txt
@@ -14,8 +14,14 @@ set(${MODULE_NAME}_SOURCES
test_write_1D_array.cpp
test_write_2D_array.cpp
test_write_3D.cpp
+ test_cl_ext_image_requirements_info.cpp
+ test_cl_ext_image_from_buffer.cpp
../common.cpp
)
+# Make unused variables not fatal in this module; see
+# https://github.com/KhronosGroup/OpenCL-CTS/issues/1484
+set_gnulike_module_compile_flags("-Wno-error=unused-variable")
+
include(../../CMakeCommon.txt)
diff --git a/test_conformance/images/kernel_read_write/main.cpp b/test_conformance/images/kernel_read_write/main.cpp
index 31dceb33..0a93a974 100644
--- a/test_conformance/images/kernel_read_write/main.cpp
+++ b/test_conformance/images/kernel_read_write/main.cpp
@@ -53,6 +53,43 @@ static void printUsage( const char *execName );
extern int test_image_set( cl_device_id device, cl_context context, cl_command_queue queue, test_format_set_fn formatTestFn, cl_mem_object_type imageType );
+extern int cl_image_requirements_size_ext_negative(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue);
+extern int cl_image_requirements_size_ext_consistency(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue);
+extern int clGetImageRequirementsInfoEXT_negative(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue);
+extern int cl_image_requirements_max_val_ext_negative(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue);
+extern int cl_image_requirements_max_val_ext_positive(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue);
+
+extern int image2d_from_buffer_positive(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+extern int memInfo_image_from_buffer_positive(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue);
+extern int imageInfo_image_from_buffer_positive(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue);
+extern int image_from_buffer_alignment_negative(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue);
+extern int image_from_small_buffer_negative(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue);
+extern int image_from_buffer_fill_positive(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue);
+extern int image_from_buffer_read_positive(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue);
+
/** read_write images only support sampler-less read buildt-ins which require special settings
* for some global parameters. This pair of functions temporarily overwrite those global parameters
* and then recover them after completing a read_write test.
@@ -246,12 +283,108 @@ int test_2Darray(cl_device_id device, cl_context context, cl_command_queue queue
return doTest( device, context, queue, CL_MEM_OBJECT_IMAGE2D_ARRAY );
}
+int test_cl_image_requirements_size_ext_negative(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return cl_image_requirements_size_ext_negative(device, context, queue);
+}
+int test_cl_image_requirements_size_ext_consistency(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return cl_image_requirements_size_ext_consistency(device, context, queue);
+}
+int test_clGetImageRequirementsInfoEXT_negative(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return clGetImageRequirementsInfoEXT_negative(device, context, queue);
+}
+int test_cl_image_requirements_max_val_ext_negative(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return cl_image_requirements_max_val_ext_negative(device, context, queue);
+}
+int test_cl_image_requirements_max_val_ext_positive(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return cl_image_requirements_max_val_ext_positive(device, context, queue);
+}
+
+int test_image2d_from_buffer_positive(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return image2d_from_buffer_positive(device, context, queue);
+}
+int test_memInfo_image_from_buffer_positive(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return memInfo_image_from_buffer_positive(device, context, queue);
+}
+int test_imageInfo_image_from_buffer_positive(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return imageInfo_image_from_buffer_positive(device, context, queue);
+}
+int test_image_from_buffer_alignment_negative(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return image_from_buffer_alignment_negative(device, context, queue);
+}
+int test_image_from_small_buffer_negative(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return image_from_small_buffer_negative(device, context, queue);
+}
+int test_image_from_buffer_fill_positive(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return image_from_buffer_fill_positive(device, context, queue);
+}
+int test_image_from_buffer_read_positive(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return image_from_buffer_read_positive(device, context, queue);
+}
+
test_definition test_list[] = {
- ADD_TEST( 1D ),
- ADD_TEST( 2D ),
- ADD_TEST( 3D ),
- ADD_TEST( 1Darray ),
- ADD_TEST( 2Darray ),
+ ADD_TEST(1D),
+ ADD_TEST(2D),
+ ADD_TEST(3D),
+ ADD_TEST(1Darray),
+ ADD_TEST(2Darray),
+ ADD_TEST_VERSION(cl_image_requirements_size_ext_negative, Version(3, 0)),
+ ADD_TEST_VERSION(cl_image_requirements_size_ext_consistency, Version(3, 0)),
+ ADD_TEST_VERSION(clGetImageRequirementsInfoEXT_negative, Version(3, 0)),
+ ADD_TEST_VERSION(cl_image_requirements_max_val_ext_negative, Version(3, 0)),
+ ADD_TEST_VERSION(cl_image_requirements_max_val_ext_positive, Version(3, 0)),
+ ADD_TEST_VERSION(image2d_from_buffer_positive, Version(3, 0)),
+ ADD_TEST_VERSION(memInfo_image_from_buffer_positive, Version(3, 0)),
+ ADD_TEST_VERSION(imageInfo_image_from_buffer_positive, Version(3, 0)),
+ ADD_TEST_VERSION(image_from_buffer_alignment_negative, Version(3, 0)),
+ ADD_TEST_VERSION(image_from_small_buffer_negative, Version(3, 0)),
+ ADD_TEST_VERSION(image_from_buffer_fill_positive, Version(3, 0)),
+ ADD_TEST_VERSION(image_from_buffer_read_positive, Version(3, 0)),
};
const int test_num = ARRAY_SIZE( test_list );
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
new file mode 100644
index 00000000..c6646330
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
@@ -0,0 +1,124 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _TEST_CL_EXT_IMAGE_BUFFER
+#define _TEST_CL_EXT_IMAGE_BUFFER
+
+#define TEST_IMAGE_SIZE 20
+
+#define GET_EXTENSION_FUNC(platform, function_name) \
+ function_name##_fn function_name = reinterpret_cast<function_name##_fn>( \
+ clGetExtensionFunctionAddressForPlatform(platform, #function_name)); \
+ if (function_name == nullptr) \
+ { \
+ return TEST_FAIL; \
+ } \
+ do \
+ { \
+ } while (false)
+
+static inline size_t aligned_size(size_t size, size_t alignment)
+{
+ return (size + alignment - 1) & ~(alignment - 1);
+}
+
+static inline void* aligned_ptr(void* ptr, size_t alignment)
+{
+ return (void*)(((uintptr_t)ptr + alignment - 1) & ~(alignment - 1));
+}
+
+static inline size_t get_format_size(cl_context context,
+ cl_image_format* format,
+ cl_mem_object_type imageType,
+ cl_mem_flags flags)
+{
+ cl_image_desc image_desc = { 0 };
+ image_desc.image_type = imageType;
+
+ /* Size 1 only to query element size */
+ image_desc.image_width = 1;
+ if (CL_MEM_OBJECT_IMAGE1D_BUFFER != imageType
+ && CL_MEM_OBJECT_IMAGE1D != imageType)
+ {
+ image_desc.image_height = 1;
+ }
+ if (CL_MEM_OBJECT_IMAGE3D == imageType
+ || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType)
+ {
+ image_desc.image_depth = 1;
+ }
+ if (CL_MEM_OBJECT_IMAGE1D_ARRAY == imageType
+ || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType)
+ {
+ image_desc.image_array_size = 1;
+ }
+
+ cl_int error = 0;
+ cl_mem buffer;
+ if (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER)
+ {
+ buffer = clCreateBuffer(context, flags,
+ get_pixel_size(format) * image_desc.image_width,
+ NULL, &error);
+ test_error(error, "Unable to create buffer");
+
+ image_desc.buffer = buffer;
+ }
+
+ cl_mem image =
+ clCreateImage(context, flags, format, &image_desc, nullptr, &error);
+ test_error(error, "Unable to create image");
+
+ size_t element_size = 0;
+ error = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(element_size),
+ &element_size, nullptr);
+ test_error(error, "Error clGetImageInfo");
+
+ error = clReleaseMemObject(image);
+ test_error(error, "Unable to release image");
+
+ if (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER)
+ {
+ error = clReleaseMemObject(buffer);
+ test_error(error, "Unable to release buffer");
+ }
+
+ return element_size;
+}
+
+static inline void image_desc_init(cl_image_desc* desc,
+ cl_mem_object_type imageType)
+{
+ desc->image_type = imageType;
+ desc->image_width = TEST_IMAGE_SIZE;
+ if (CL_MEM_OBJECT_IMAGE1D_BUFFER != imageType
+ && CL_MEM_OBJECT_IMAGE1D != imageType)
+ {
+ desc->image_height = TEST_IMAGE_SIZE;
+ }
+ if (CL_MEM_OBJECT_IMAGE3D == imageType
+ || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType)
+ {
+ desc->image_depth = TEST_IMAGE_SIZE;
+ }
+ if (CL_MEM_OBJECT_IMAGE1D_ARRAY == imageType
+ || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType)
+ {
+ desc->image_array_size = TEST_IMAGE_SIZE;
+ }
+}
+
+#endif /* _TEST_CL_EXT_IMAGE_BUFFER */ \ No newline at end of file
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
new file mode 100644
index 00000000..2ce33a17
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
@@ -0,0 +1,1013 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "../testBase.h"
+#include "../common.h"
+#include "test_cl_ext_image_buffer.hpp"
+
+static int get_image_requirement_alignment(
+ cl_device_id device, cl_context context, cl_mem_flags flags,
+ const cl_image_format* image_format, const cl_image_desc* image_desc,
+ size_t* row_pitch_alignment, size_t* slice_pitch_alignment,
+ size_t* base_address_alignment)
+{
+ cl_platform_id platform = getPlatformFromDevice(device);
+ GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT);
+
+ cl_int err = CL_SUCCESS;
+ if (nullptr != row_pitch_alignment)
+ {
+ err = clGetImageRequirementsInfoEXT(
+ context, nullptr, flags, image_format, image_desc,
+ CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT,
+ sizeof(*row_pitch_alignment), row_pitch_alignment, nullptr);
+ test_error(err, "Error getting alignment");
+ }
+
+ if (nullptr != slice_pitch_alignment && CL_SUCCESS == err)
+ {
+ err = clGetImageRequirementsInfoEXT(
+ context, nullptr, flags, image_format, image_desc,
+ CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT,
+ sizeof(*slice_pitch_alignment), slice_pitch_alignment, nullptr);
+ test_error(err, "Error getting alignment");
+ }
+
+ if (nullptr != base_address_alignment && CL_SUCCESS == err)
+ {
+ err = clGetImageRequirementsInfoEXT(
+ context, nullptr, flags, image_format, image_desc,
+ CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT,
+ sizeof(*base_address_alignment), base_address_alignment, nullptr);
+ test_error(err, "Error getting alignment");
+ }
+
+ return TEST_PASS;
+}
+
+/**
+ * Consistency with alignment requirements as returned by
+ * cl_khr_image2d_from_buffer Check that the returned values for
+ * CL_DEVICE_IMAGE_PITCH_ALIGNMENT and CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
+ * are correct.
+ */
+int image2d_from_buffer_positive(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+{
+ if (!is_extension_available(device, "cl_khr_image2d_from_buffer"))
+ {
+ printf("Extension cl_khr_image2d_from_buffer not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+ {
+ printf("Extension cl_ext_image_requirements_info not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ std::vector<cl_mem_object_type> imageTypes{
+ CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D,
+ CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_BUFFER,
+ CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+ };
+
+ std::vector<cl_mem_flags> flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY,
+ CL_MEM_READ_WRITE,
+ CL_MEM_KERNEL_READ_AND_WRITE };
+
+ for (auto flag : flagTypes)
+ {
+ for (auto imageType : imageTypes)
+ {
+ /* Get the list of supported image formats */
+ std::vector<cl_image_format> formatList;
+ if (TEST_PASS
+ != get_format_list(context, imageType, formatList, flag)
+ || formatList.size() == 0)
+ {
+ test_fail("Failure to get supported formats list");
+ }
+
+ cl_uint row_pitch_alignment_2d = 0;
+ cl_int err =
+ clGetDeviceInfo(device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT,
+ sizeof(row_pitch_alignment_2d),
+ &row_pitch_alignment_2d, nullptr);
+ test_error(err, "Error clGetDeviceInfo");
+
+ cl_uint base_address_alignment_2d = 0;
+ err =
+ clGetDeviceInfo(device, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT,
+ sizeof(base_address_alignment_2d),
+ &base_address_alignment_2d, nullptr);
+ test_error(err, "Error clGetDeviceInfo");
+
+ for (auto format : formatList)
+ {
+ cl_image_desc image_desc = { 0 };
+ image_desc_init(&image_desc, imageType);
+
+ flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+ ? CL_MEM_READ_WRITE
+ : flag;
+
+ size_t row_pitch_alignment = 0;
+ size_t base_address_alignment = 0;
+
+ int get_error = get_image_requirement_alignment(
+ device, context, 0, &format, &image_desc,
+ &row_pitch_alignment, nullptr, &base_address_alignment);
+ if (TEST_PASS != get_error)
+ {
+ return get_error;
+ }
+
+ const size_t element_size =
+ get_format_size(context, &format, imageType, flag);
+
+ /* Alignements in pixels vs bytes */
+ if (base_address_alignment
+ > base_address_alignment_2d * element_size)
+ {
+ test_fail("Unexpected base_address_alignment");
+ }
+
+ if (row_pitch_alignment > row_pitch_alignment_2d * element_size)
+ {
+ test_fail("Unexpected row_pitch_alignment");
+ }
+ }
+ }
+ }
+
+ return TEST_PASS;
+}
+
+/**
+ * Test clGetMemObjectInfo
+ * Check that CL_MEM_ASSOCIATED_MEMOBJECT correctly returns the buffer that was
+ * used.
+ */
+int memInfo_image_from_buffer_positive(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+{
+ if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+ {
+ printf("Extension cl_ext_image_requirements_info not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ if (!is_extension_available(device, "cl_ext_image_from_buffer"))
+ {
+ printf("Extension cl_ext_image_from_buffer not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ std::vector<cl_mem_object_type> imageTypes{
+ CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D,
+ CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_BUFFER,
+ CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+ };
+
+ std::vector<cl_mem_flags> flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY,
+ CL_MEM_READ_WRITE,
+ CL_MEM_KERNEL_READ_AND_WRITE };
+
+ for (auto flag : flagTypes)
+ {
+ for (auto imageType : imageTypes)
+ {
+ /* Get the list of supported image formats */
+ std::vector<cl_image_format> formatList;
+ if (TEST_PASS
+ != get_format_list(context, imageType, formatList, flag)
+ || formatList.size() == 0)
+ {
+ test_fail("Failure to get supported formats list");
+ }
+
+ for (auto format : formatList)
+ {
+ cl_image_desc image_desc = { 0 };
+ image_desc_init(&image_desc, imageType);
+
+ flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+ ? CL_MEM_READ_WRITE
+ : flag;
+
+ size_t row_pitch_alignment = 0;
+ size_t slice_pitch_alignment = 0;
+
+ int get_error = get_image_requirement_alignment(
+ device, context, 0, &format, &image_desc,
+ &row_pitch_alignment, &slice_pitch_alignment, nullptr);
+ if (TEST_PASS != get_error)
+ {
+ return get_error;
+ }
+
+ const size_t element_size =
+ get_format_size(context, &format, imageType, flag);
+
+ const size_t row_pitch = aligned_size(
+ TEST_IMAGE_SIZE * element_size, row_pitch_alignment);
+ const size_t slice_pitch = aligned_size(
+ row_pitch * TEST_IMAGE_SIZE, slice_pitch_alignment);
+
+ const size_t buffer_size = slice_pitch * TEST_IMAGE_SIZE;
+
+ cl_int err = CL_SUCCESS;
+ cl_mem buffer =
+ clCreateBuffer(context, flag, buffer_size, nullptr, &err);
+ test_error(err, "Unable to create buffer");
+
+ image_desc.buffer = buffer;
+
+ cl_mem image_buffer = clCreateImage(context, flag, &format,
+ &image_desc, nullptr, &err);
+ test_error(err, "Unable to create image");
+
+ cl_mem returned_buffer;
+ err = clGetMemObjectInfo(
+ image_buffer, CL_MEM_ASSOCIATED_MEMOBJECT,
+ sizeof(returned_buffer), &returned_buffer, nullptr);
+ test_error(err, "Error clGetMemObjectInfo");
+
+ if (returned_buffer != buffer)
+ {
+ test_fail("Unexpected CL_MEM_ASSOCIATED_MEMOBJECT buffer");
+ }
+
+ err = clReleaseMemObject(buffer);
+ test_error(err, "Unable to release buffer");
+
+ err = clReleaseMemObject(image_buffer);
+ test_error(err, "Unable to release image");
+ }
+ }
+ }
+
+ return TEST_PASS;
+}
+
+/**
+ * Test clGetImageInfo
+ * Check that the returned values for CL_IMAGE_ROW_PITCH and
+ * CL_IMAGE_SLICE_PITCH are correct.
+ */
+int imageInfo_image_from_buffer_positive(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue)
+{
+ if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+ {
+ printf("Extension cl_ext_image_requirements_info not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ if (!is_extension_available(device, "cl_ext_image_from_buffer"))
+ {
+ printf("Extension cl_ext_image_from_buffer not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ std::vector<cl_mem_object_type> imageTypes{
+ CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D,
+ CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_BUFFER,
+ CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+ };
+
+ std::vector<cl_mem_flags> flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY,
+ CL_MEM_READ_WRITE,
+ CL_MEM_KERNEL_READ_AND_WRITE };
+
+ for (auto flag : flagTypes)
+ {
+ for (auto imageType : imageTypes)
+ {
+ /* Get the list of supported image formats */
+ std::vector<cl_image_format> formatList;
+ if (TEST_PASS
+ != get_format_list(context, imageType, formatList, flag)
+ || formatList.size() == 0)
+ {
+ test_fail("Failure to get supported formats list");
+ }
+
+ for (auto format : formatList)
+ {
+ cl_image_desc image_desc = { 0 };
+ image_desc_init(&image_desc, imageType);
+
+ flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+ ? CL_MEM_READ_WRITE
+ : flag;
+
+ size_t row_pitch_alignment = 0;
+ size_t slice_pitch_alignment = 0;
+
+ int get_error = get_image_requirement_alignment(
+ device, context, 0, &format, &image_desc,
+ &row_pitch_alignment, &slice_pitch_alignment, nullptr);
+ if (TEST_PASS != get_error)
+ {
+ return get_error;
+ }
+
+ const size_t element_size =
+ get_format_size(context, &format, imageType, flag);
+
+ const size_t row_pitch = aligned_size(
+ TEST_IMAGE_SIZE * element_size, row_pitch_alignment);
+ const size_t slice_pitch = aligned_size(
+ row_pitch * TEST_IMAGE_SIZE, slice_pitch_alignment);
+
+ const size_t buffer_size = slice_pitch * TEST_IMAGE_SIZE;
+
+ cl_int err = CL_SUCCESS;
+ cl_mem buffer =
+ clCreateBuffer(context, flag, buffer_size, nullptr, &err);
+ test_error(err, "Unable to create buffer");
+
+ image_desc.buffer = buffer;
+
+ if (imageType == CL_MEM_OBJECT_IMAGE2D
+ || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ {
+ image_desc.image_row_pitch = row_pitch;
+ }
+ else if (imageType == CL_MEM_OBJECT_IMAGE3D
+ || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+ {
+ image_desc.image_row_pitch = row_pitch;
+ image_desc.image_slice_pitch = slice_pitch;
+ }
+
+ cl_mem image_buffer = clCreateImage(context, flag, &format,
+ &image_desc, nullptr, &err);
+ test_error(err, "Unable to create image");
+
+ if (imageType == CL_MEM_OBJECT_IMAGE3D
+ || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY
+ || imageType == CL_MEM_OBJECT_IMAGE2D
+ || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ {
+ size_t returned_row_pitch = 0;
+ err = clGetImageInfo(image_buffer, CL_IMAGE_ROW_PITCH,
+ sizeof(returned_row_pitch),
+ &returned_row_pitch, nullptr);
+ test_error(err, "Error clGetImageInfo");
+
+ if (returned_row_pitch != row_pitch)
+ {
+ test_fail(
+ "Unexpected row pitch "
+ "CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT");
+ }
+ }
+
+ if (imageType == CL_MEM_OBJECT_IMAGE3D
+ || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+ {
+ size_t returned_slice_pitch = 0;
+ err = clGetImageInfo(image_buffer, CL_IMAGE_SLICE_PITCH,
+ sizeof(returned_slice_pitch),
+ &returned_slice_pitch, nullptr);
+ test_error(err, "Error clGetImageInfo");
+
+ if (returned_slice_pitch != slice_pitch)
+ {
+ test_fail(
+ "Unexpected row pitch "
+ "CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT");
+ }
+ }
+
+ err = clReleaseMemObject(buffer);
+ test_error(err, "Unable to release buffer");
+
+ err = clReleaseMemObject(image_buffer);
+ test_error(err, "Unable to release image");
+ }
+ }
+ }
+
+ return TEST_PASS;
+}
+
+/**
+ * Negative testing for clCreateImage and wrong alignment
+ * - Create an image from a buffer with invalid row pitch (not a multiple of
+ * required alignment) and check that CL_INVALID_IMAGE_DESCRIPTOR is returned.
+ * - Create an image from a buffer with invalid slice pitch (not a multiple of
+ * required alignment) and check that CL_INVALID_IMAGE_DESCRIPTOR is returned.
+ * - Create an image from a buffer with invalid base address alignment (not a
+ * multiple of required alignment) and check that CL_INVALID_IMAGE_DESCRIPTOR is
+ * returned
+ */
+int image_from_buffer_alignment_negative(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue)
+{
+ if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+ {
+ printf("Extension cl_ext_image_requirements_info not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ if (!is_extension_available(device, "cl_ext_image_from_buffer"))
+ {
+ printf("Extension cl_ext_image_from_buffer not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ std::vector<cl_mem_object_type> imageTypes{
+ CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D,
+ CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_BUFFER,
+ CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+ };
+
+ std::vector<cl_mem_flags> flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY,
+ CL_MEM_READ_WRITE,
+ CL_MEM_KERNEL_READ_AND_WRITE };
+
+ for (auto flag : flagTypes)
+ {
+ for (auto imageType : imageTypes)
+ {
+ /* Get the list of supported image formats */
+ std::vector<cl_image_format> formatList;
+ if (TEST_PASS
+ != get_format_list(context, imageType, formatList, flag)
+ || formatList.size() == 0)
+ {
+ test_fail("Failure to get supported formats list");
+ }
+
+ for (auto format : formatList)
+ {
+ cl_image_desc image_desc = { 0 };
+ image_desc_init(&image_desc, imageType);
+
+ flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+ ? CL_MEM_READ_WRITE
+ : flag;
+
+ size_t row_pitch_alignment = 0;
+ size_t slice_pitch_alignment = 0;
+ size_t base_address_alignment = 0;
+
+ int get_error = get_image_requirement_alignment(
+ device, context, 0, &format, &image_desc,
+ &row_pitch_alignment, &slice_pitch_alignment,
+ &base_address_alignment);
+ if (TEST_PASS != get_error)
+ {
+ return get_error;
+ }
+
+ const size_t element_size =
+ get_format_size(context, &format, imageType, flag);
+
+ const size_t row_pitch = aligned_size(
+ TEST_IMAGE_SIZE * element_size, row_pitch_alignment);
+ const size_t slice_pitch = aligned_size(
+ row_pitch * TEST_IMAGE_SIZE, slice_pitch_alignment);
+
+ const size_t buffer_size = (slice_pitch + 1)
+ * TEST_IMAGE_SIZE; /* For bigger row/slice pitch */
+
+ cl_int err = CL_SUCCESS;
+ cl_mem buffer =
+ clCreateBuffer(context, flag, buffer_size, nullptr, &err);
+ test_error(err, "Unable to create buffer");
+
+ /* Test Row pitch images */
+ if (imageType == CL_MEM_OBJECT_IMAGE2D
+ || imageType == CL_MEM_OBJECT_IMAGE3D
+ || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY
+ || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+ {
+ image_desc.buffer = buffer;
+ image_desc.image_row_pitch =
+ row_pitch + 1; /* wrong row pitch */
+
+ clCreateImage(context, flag, &format, &image_desc, nullptr,
+ &err);
+ test_failure_error(err, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
+ "Unexpected clCreateImage return");
+ }
+
+ /* Test Slice pitch images */
+ if (imageType == CL_MEM_OBJECT_IMAGE3D
+ || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+ {
+ image_desc.buffer = buffer;
+ image_desc.image_row_pitch = row_pitch;
+ image_desc.image_slice_pitch =
+ slice_pitch + 1; /* wrong slice pitch */
+
+ clCreateImage(context, flag, &format, &image_desc, nullptr,
+ &err);
+ test_failure_error(err, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
+ "Unexpected clCreateImage return");
+ }
+
+ /* Test buffer from host ptr to test base address alignment */
+ const size_t aligned_buffer_size =
+ aligned_size(buffer_size, base_address_alignment);
+ /* Create buffer with host ptr and additional size for the wrong
+ * alignment */
+ void* const host_ptr =
+ malloc(aligned_buffer_size + base_address_alignment);
+ void* non_aligned_host_ptr =
+ (void*)((char*)(aligned_ptr(host_ptr,
+ base_address_alignment))
+ + 1); /* wrong alignment */
+
+ cl_mem buffer_host = clCreateBuffer(
+ context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+ buffer_size, non_aligned_host_ptr, &err);
+ test_error(err, "Unable to create buffer");
+
+ image_desc.buffer = buffer_host;
+
+ clCreateImage(context, flag, &format, &image_desc, nullptr,
+ &err);
+ test_failure_error(err, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
+ "Unexpected clCreateImage return");
+
+ free(host_ptr);
+
+ err = clReleaseMemObject(buffer);
+ test_error(err, "Unable to release buffer");
+
+ err = clReleaseMemObject(buffer_host);
+ test_error(err, "Unable to release buffer");
+ }
+ }
+ }
+
+ return TEST_PASS;
+}
+
+/**
+ * Negative testing for clCreateImage (buffer size).
+ * Create a buffer too small and check that image creation from that buffer is
+ * rejected
+ */
+int image_from_small_buffer_negative(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+{
+ if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+ {
+ printf("Extension cl_ext_image_requirements_info not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ if (!is_extension_available(device, "cl_ext_image_from_buffer"))
+ {
+ printf("Extension cl_ext_image_from_buffer not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ std::vector<cl_mem_object_type> imageTypes{
+ CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D,
+ CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE3D,
+ CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+ };
+
+ std::vector<cl_mem_flags> flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY,
+ CL_MEM_READ_WRITE,
+ CL_MEM_KERNEL_READ_AND_WRITE };
+
+ for (auto flag : flagTypes)
+ {
+ for (auto imageType : imageTypes)
+ {
+ /* Get the list of supported image formats */
+ std::vector<cl_image_format> formatList;
+ if (TEST_PASS
+ != get_format_list(context, imageType, formatList, flag)
+ || formatList.size() == 0)
+ {
+ test_fail("Failure to get supported formats list");
+ }
+
+ for (auto format : formatList)
+ {
+ cl_image_desc image_desc = { 0 };
+ image_desc_init(&image_desc, imageType);
+
+ flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+ ? CL_MEM_READ_WRITE
+ : flag;
+
+ /* Invalid buffer size */
+ cl_int err;
+ cl_mem buffer = clCreateBuffer(
+ context, flag, TEST_IMAGE_SIZE / 2, nullptr, &err);
+ test_error(err, "Unable to create buffer");
+
+ image_desc.buffer = buffer;
+
+ clCreateImage(context, flag, &format, &image_desc, nullptr,
+ &err);
+ test_failure_error(err, CL_INVALID_MEM_OBJECT,
+ "Unexpected clCreateImage return");
+
+ err = clReleaseMemObject(buffer);
+ test_error(err, "Unable to release buffer");
+ }
+ }
+ }
+
+ return TEST_PASS;
+}
+
+static int image_from_buffer_fill_check(cl_command_queue queue, cl_mem image,
+ size_t* region, size_t element_size,
+ char pattern)
+{
+ /* read the image from buffer and check the pattern */
+ const size_t image_size = region[0] * region[1] * region[2] * element_size;
+ size_t origin[3] = { 0, 0, 0 };
+ std::vector<char> read_buffer(image_size);
+
+ cl_int error =
+ clEnqueueReadImage(queue, image, CL_BLOCKING, origin, region, 0, 0,
+ read_buffer.data(), 0, nullptr, nullptr);
+ test_error(error, "Error clEnqueueReadImage");
+
+ for (size_t line = 0; line < region[0]; line++)
+ {
+ for (size_t row = 0; row < region[1]; row++)
+ {
+ for (size_t depth = 0; depth < region[2]; depth++)
+ {
+ for (size_t elmt = 0; elmt < element_size; elmt++)
+ {
+ size_t index = line * row * depth * elmt;
+
+ if (read_buffer[index] != pattern)
+ {
+ test_fail("Image pattern check failed");
+ }
+ }
+ }
+ }
+ }
+
+ return TEST_PASS;
+}
+
+/**
+ * Use fill buffer to fill the image from buffer
+ */
+int image_from_buffer_fill_positive(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+{
+ if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+ {
+ printf("Extension cl_ext_image_requirements_info not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ if (!is_extension_available(device, "cl_ext_image_from_buffer"))
+ {
+ printf("Extension cl_ext_image_from_buffer not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ std::vector<cl_mem_object_type> imageTypes{
+ CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D,
+ CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_BUFFER,
+ CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+ };
+
+ std::vector<cl_mem_flags> flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY,
+ CL_MEM_READ_WRITE,
+ CL_MEM_KERNEL_READ_AND_WRITE };
+
+ for (auto flag : flagTypes)
+ {
+ for (auto imageType : imageTypes)
+ {
+ /* Get the list of supported image formats */
+ std::vector<cl_image_format> formatList;
+ if (TEST_PASS
+ != get_format_list(context, imageType, formatList, flag)
+ || formatList.size() == 0)
+ {
+ test_fail("Failure to get supported formats list");
+ }
+
+ for (auto format : formatList)
+ {
+ cl_image_desc image_desc = { 0 };
+ image_desc_init(&image_desc, imageType);
+
+ flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+ ? CL_MEM_READ_WRITE
+ : flag;
+
+ size_t row_pitch_alignment = 0;
+ size_t slice_pitch_alignment = 0;
+
+ int get_error = get_image_requirement_alignment(
+ device, context, 0, &format, &image_desc,
+ &row_pitch_alignment, &slice_pitch_alignment, nullptr);
+ if (TEST_PASS != get_error)
+ {
+ return get_error;
+ }
+
+ const size_t element_size =
+ get_format_size(context, &format, imageType, flag);
+
+ const size_t row_pitch = aligned_size(
+ TEST_IMAGE_SIZE * element_size, row_pitch_alignment);
+ const size_t slice_pitch = aligned_size(
+ row_pitch * TEST_IMAGE_SIZE, slice_pitch_alignment);
+
+ const size_t buffer_size = slice_pitch * TEST_IMAGE_SIZE;
+
+ cl_int err = CL_SUCCESS;
+ cl_mem buffer =
+ clCreateBuffer(context, flag, buffer_size, nullptr, &err);
+ test_error(err, "Unable to create buffer");
+
+ /* fill the buffer with a pattern */
+ const char pattern = 0x55;
+ err = clEnqueueFillBuffer(queue, buffer, &pattern,
+ sizeof(pattern), 0, buffer_size, 0,
+ nullptr, nullptr);
+ test_error(err, "Error clEnqueueFillBuffer");
+
+ err = clFinish(queue);
+ test_error(err, "Error clFinish");
+
+ cl_mem image1d_buffer;
+ if (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER)
+ {
+ image1d_buffer = clCreateBuffer(context, flag, buffer_size,
+ nullptr, &err);
+ test_error(err, "Unable to create buffer");
+
+ image_desc.buffer = image1d_buffer;
+ }
+
+ cl_mem image = clCreateImage(context, flag, &format,
+ &image_desc, nullptr, &err);
+ test_error(err, "Unable to create image");
+
+ /* Check the image from buffer */
+ image_desc.buffer = buffer;
+
+ if (imageType == CL_MEM_OBJECT_IMAGE2D
+ || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ {
+ image_desc.image_row_pitch = row_pitch;
+ }
+ else if (imageType == CL_MEM_OBJECT_IMAGE3D
+ || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+ {
+ image_desc.image_row_pitch = row_pitch;
+ image_desc.image_slice_pitch = slice_pitch;
+ }
+
+ cl_mem image_from_buffer = clCreateImage(
+ context, flag, &format, &image_desc, nullptr, &err);
+ test_error(err, "Unable to create image");
+
+ size_t origin[3] = { 0, 0, 0 };
+ size_t region[3] = { 1, 1, 1 };
+
+ region[0] = TEST_IMAGE_SIZE;
+ if (CL_MEM_OBJECT_IMAGE1D_BUFFER != imageType
+ && CL_MEM_OBJECT_IMAGE1D != imageType)
+ {
+ region[1] = TEST_IMAGE_SIZE;
+ }
+ if (CL_MEM_OBJECT_IMAGE3D == imageType
+ || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType)
+ {
+ region[2] = TEST_IMAGE_SIZE;
+ }
+
+ /* Check the copy of the image from buffer */
+ err =
+ clEnqueueCopyImage(queue, image_from_buffer, image, origin,
+ origin, region, 0, nullptr, nullptr);
+ test_error(err, "Error clEnqueueCopyImage");
+
+ err = clFinish(queue);
+ test_error(err, "Error clFinish");
+
+ int fill_error = image_from_buffer_fill_check(
+ queue, image_from_buffer, region, element_size, pattern);
+ if (TEST_PASS != fill_error)
+ {
+ return fill_error;
+ }
+
+ fill_error = image_from_buffer_fill_check(
+ queue, image, region, element_size, pattern);
+ if (TEST_PASS != fill_error)
+ {
+ return fill_error;
+ }
+
+ err = clReleaseMemObject(buffer);
+ test_error(err, "Unable to release buffer");
+
+ err = clReleaseMemObject(image);
+ test_error(err, "Unable to release image");
+
+ err = clReleaseMemObject(image_from_buffer);
+ test_error(err, "Unable to release image");
+
+ if (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER)
+ {
+ err = clReleaseMemObject(image1d_buffer);
+ test_error(err, "Unable to release image");
+ }
+ }
+ }
+ }
+
+ return TEST_PASS;
+}
+
+static int image_from_buffer_read_check(cl_command_queue queue, cl_mem buffer,
+ const size_t buffer_size,
+ size_t* region, size_t element_size,
+ char pattern, size_t row_pitch,
+ size_t slice_pitch)
+{
+ /* read the buffer and check the pattern */
+ std::vector<char> host_buffer(buffer_size);
+ char* host_ptr = host_buffer.data();
+ char* host_ptr_slice = host_ptr;
+
+ cl_int error =
+ clEnqueueReadBuffer(queue, buffer, CL_BLOCKING, 0, buffer_size,
+ host_buffer.data(), 0, nullptr, nullptr);
+ test_error(error, "Error clEnqueueReadBuffer");
+
+ for (size_t k = 0; k < region[2]; k++)
+ {
+ for (size_t i = 0; i < region[1]; i++)
+ {
+ for (size_t j = 0; j < region[0] * element_size; j++)
+ {
+ if (host_ptr[j] != pattern)
+ {
+ test_fail("Image pattern check failed");
+ }
+ }
+ host_ptr = host_ptr + row_pitch;
+ }
+ host_ptr_slice = host_ptr_slice + slice_pitch;
+ host_ptr = host_ptr_slice;
+ }
+
+ return TEST_PASS;
+}
+
+/**
+ * Use fill image to fill the buffer that was used to create the image
+ */
+int image_from_buffer_read_positive(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+{
+ if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+ {
+ printf("Extension cl_ext_image_requirements_info not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ if (!is_extension_available(device, "cl_ext_image_from_buffer"))
+ {
+ printf("Extension cl_ext_image_from_buffer not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ std::vector<cl_mem_object_type> imageTypes{
+ CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D,
+ CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_BUFFER,
+ CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+ };
+
+ for (auto imageType : imageTypes)
+ {
+ cl_image_desc image_desc = { 0 };
+ image_desc_init(&image_desc, imageType);
+
+ /* Non normalized format so we can read it back directly from
+ * clEnqueueFillImage */
+ cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT8 };
+ const char pattern = 0x55;
+
+ const size_t element_size =
+ get_format_size(context, &format, imageType, CL_MEM_READ_WRITE);
+
+ size_t row_pitch_alignment = 0;
+ size_t slice_pitch_alignment = 0;
+
+ int get_error = get_image_requirement_alignment(
+ device, context, CL_MEM_READ_WRITE, &format, &image_desc,
+ &row_pitch_alignment, &slice_pitch_alignment, nullptr);
+ if (TEST_PASS != get_error)
+ {
+ return get_error;
+ }
+
+ const size_t row_pitch =
+ aligned_size(TEST_IMAGE_SIZE * element_size, row_pitch_alignment);
+ const size_t slice_pitch =
+ aligned_size(row_pitch * TEST_IMAGE_SIZE, slice_pitch_alignment);
+
+ const size_t buffer_size = slice_pitch * TEST_IMAGE_SIZE;
+
+ cl_int err = CL_SUCCESS;
+ cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size,
+ nullptr, &err);
+ test_error(err, "Unable to create buffer");
+
+ /* Check the image from buffer */
+ image_desc.buffer = buffer;
+
+ if (imageType == CL_MEM_OBJECT_IMAGE2D
+ || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ {
+ image_desc.image_row_pitch = row_pitch;
+ }
+ else if (imageType == CL_MEM_OBJECT_IMAGE3D
+ || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+ {
+ image_desc.image_row_pitch = row_pitch;
+ image_desc.image_slice_pitch = slice_pitch;
+ }
+
+ cl_mem image = clCreateImage(context, CL_MEM_READ_WRITE, &format,
+ &image_desc, nullptr, &err);
+ test_error(err, "Unable to create image");
+
+ size_t origin[3] = { 0, 0, 0 };
+ size_t region[3] = { 1, 1, 1 };
+
+ region[0] = TEST_IMAGE_SIZE;
+ if (CL_MEM_OBJECT_IMAGE1D_BUFFER != imageType
+ && CL_MEM_OBJECT_IMAGE1D != imageType)
+ {
+ region[1] = TEST_IMAGE_SIZE;
+ }
+ if (CL_MEM_OBJECT_IMAGE3D == imageType
+ || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType)
+ {
+ region[2] = TEST_IMAGE_SIZE;
+ }
+
+ /* fill the image with a pattern */
+ cl_uint fill_color[4] = { pattern, pattern, pattern, pattern };
+ err = clEnqueueFillImage(queue, image, fill_color, origin, region, 0,
+ nullptr, nullptr);
+ test_error(err, "Error clEnqueueFillImage");
+
+ err = clFinish(queue);
+ test_error(err, "Error clFinish");
+
+ int read_error = image_from_buffer_read_check(
+ queue, buffer, buffer_size, region, element_size, pattern,
+ (imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY) ? slice_pitch
+ : row_pitch,
+ slice_pitch);
+ if (TEST_PASS != read_error)
+ {
+ return read_error;
+ }
+
+ err = clReleaseMemObject(buffer);
+ test_error(err, "Unable to release buffer");
+
+ err = clReleaseMemObject(image);
+ test_error(err, "Unable to release image");
+ }
+
+ return TEST_PASS;
+} \ No newline at end of file
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_requirements_info.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_requirements_info.cpp
new file mode 100644
index 00000000..9212fcbc
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_requirements_info.cpp
@@ -0,0 +1,482 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include "../common.h"
+#include "test_cl_ext_image_buffer.hpp"
+
+/**
+ * Negative tests for {CL_IMAGE_REQUIREMENTS_SIZE_EXT}
+ * Check that attempting to perform the {CL_IMAGE_REQUIREMENTS_SIZE_EXT} query
+ * without specifying the _image_format_ results in {CL_INVALID_VALUE} being
+ * returned. Check that attempting to perform the
+ * {CL_IMAGE_REQUIREMENTS_SIZE_EXT} query without specifying the _image_desc_
+ * results in {CL_INVALID_VALUE} being returned.
+ */
+int cl_image_requirements_size_ext_negative(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue)
+{
+ if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+ {
+ printf("Extension cl_ext_image_requirements_info not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ cl_platform_id platform = getPlatformFromDevice(device);
+ GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT);
+
+ size_t max_size = 0;
+ size_t param_val_size = 0;
+
+ cl_image_desc image_desc = { 0 };
+ image_desc_init(&image_desc, CL_MEM_OBJECT_IMAGE2D);
+
+ cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT16 };
+
+ /* Check image_format null results in CL_INVALID_VALUE */
+ cl_int err = clGetImageRequirementsInfoEXT(
+ context, nullptr, CL_MEM_READ_WRITE, nullptr, &image_desc,
+ CL_IMAGE_REQUIREMENTS_SIZE_EXT, sizeof(max_size), &max_size,
+ &param_val_size);
+ test_failure_error(err, CL_INVALID_VALUE,
+ "Unexpected clGetImageRequirementsInfoEXT return");
+
+ /* Check image_desc null results in CL_INVALID_VALUE */
+ err = clGetImageRequirementsInfoEXT(
+ context, nullptr, CL_MEM_READ_WRITE, &format, nullptr,
+ CL_IMAGE_REQUIREMENTS_SIZE_EXT, sizeof(max_size), &max_size,
+ &param_val_size);
+ test_failure_error(err, CL_INVALID_VALUE,
+ "Unexpected clGetImageRequirementsInfoEXT return");
+
+ return TEST_PASS;
+}
+
+/**
+ * Consistency checks for CL_IMAGE_REQUIREMENTS_SIZE_EXT
+ * When creating 2D images from a buffer is supported
+ * Check that the CL_IMAGE_REQUIREMENTS_SIZE_EXT query can be performed
+ * successfully. Create a buffer with the size returned and check that an image
+ * can successfully be created from the buffer. Check that the value returned
+ * for CL_MEM_SIZE for the image is the same as the value returned for
+ * CL_IMAGE_REQUIREMENTS_SIZE_EXT.
+ */
+int cl_image_requirements_size_ext_consistency(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue)
+{
+ if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+ {
+ printf("Extension cl_ext_image_requirements_info not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ if (!is_extension_available(device, "cl_ext_image_from_buffer"))
+ {
+ printf("Extension cl_ext_image_from_buffer not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ cl_platform_id platform = getPlatformFromDevice(device);
+ GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT);
+
+ size_t max_size = 0;
+ size_t param_val_size = 0;
+
+ std::vector<cl_mem_object_type> imageTypes{
+ CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D,
+ CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_BUFFER,
+ CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+ };
+
+ std::vector<cl_mem_flags> flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY,
+ CL_MEM_READ_WRITE,
+ CL_MEM_KERNEL_READ_AND_WRITE };
+
+ for (auto flag : flagTypes)
+ {
+ for (auto imageType : imageTypes)
+ {
+ /* Get the list of supported image formats */
+ std::vector<cl_image_format> formatList;
+ if (TEST_PASS
+ != get_format_list(context, imageType, formatList, flag)
+ || formatList.size() == 0)
+ {
+ test_fail("Failure to get supported formats list");
+ }
+
+ for (auto format : formatList)
+ {
+ cl_image_desc image_desc = { 0 };
+ image_desc_init(&image_desc, imageType);
+
+ flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+ ? CL_MEM_READ_WRITE
+ : flag;
+
+ cl_int err = clGetImageRequirementsInfoEXT(
+ context, nullptr, flag, &format, &image_desc,
+ CL_IMAGE_REQUIREMENTS_SIZE_EXT, sizeof(max_size), &max_size,
+ &param_val_size);
+ test_error(err, "Error clGetImageRequirementsInfoEXT");
+
+ /* Create buffer */
+ cl_mem buffer =
+ clCreateBuffer(context, flag, max_size, nullptr, &err);
+ test_error(err, "Unable to create buffer");
+
+ image_desc.buffer = buffer;
+
+ /* 2D Image from buffer */
+ cl_mem image_buffer = clCreateImage(context, flag, &format,
+ &image_desc, nullptr, &err);
+ test_error(err, "Unable to create image");
+
+ size_t size = 0;
+ err = clGetMemObjectInfo(image_buffer, CL_MEM_SIZE,
+ sizeof(size_t), &size, NULL);
+ test_error(err, "Error clGetMemObjectInfo");
+
+ if (max_size != size)
+ {
+ test_fail("CL_IMAGE_REQUIREMENTS_SIZE_EXT different from "
+ "CL_MEM_SIZE");
+ }
+
+ err = clReleaseMemObject(image_buffer);
+ test_error(err, "Error clReleaseMemObject");
+
+ err = clReleaseMemObject(buffer);
+ test_error(err, "Error clReleaseMemObject");
+ }
+ }
+ }
+
+ return TEST_PASS;
+}
+
+/**
+ * Negative testing for all testable error codes returned by
+ * clGetImageFormatInfoKHR
+ */
+int clGetImageRequirementsInfoEXT_negative(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue)
+{
+ if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+ {
+ printf("Extension cl_ext_image_requirements_info not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ cl_platform_id platform = getPlatformFromDevice(device);
+ GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT);
+
+ cl_image_desc image_desc = { 0 };
+ image_desc_init(&image_desc, CL_MEM_OBJECT_IMAGE3D);
+
+ cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT16 };
+
+ /* Check that CL_INVALID_CONTEXT is returned when passing nullptr as context
+ */
+ size_t row_pitch_alignment = 0;
+ cl_int err = clGetImageRequirementsInfoEXT(
+ nullptr, nullptr, CL_MEM_READ_WRITE, &format, &image_desc,
+ CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT,
+ sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr);
+ test_failure_error(err, CL_INVALID_CONTEXT,
+ "Unexpected clGetImageRequirementsInfoEXT return");
+
+ /* Check that CL_INVALID_VALUE is returned when passing an invalid
+ * image_type */
+ cl_image_desc invalid_desc = { CL_MEM_OBJECT_BUFFER, TEST_IMAGE_SIZE };
+ err = clGetImageRequirementsInfoEXT(
+ context, nullptr, CL_MEM_READ_WRITE, &format, &invalid_desc,
+ CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT,
+ sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr);
+ test_failure_error(err, CL_INVALID_IMAGE_DESCRIPTOR,
+ "Unexpected clGetImageRequirementsInfoEXT return");
+
+ /* Check that CL_INVALID_VALUE is returned when passing invalid flags */
+ err = clGetImageRequirementsInfoEXT(
+ context, nullptr, -1, &format, &image_desc,
+ CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT,
+ sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr);
+ test_failure_error(err, CL_INVALID_VALUE,
+ "Unexpected clGetImageRequirementsInfoEXT return");
+
+ /* Check that CL_INVALID_IMAGE_FORMAT_DESCRIPTOR is returned when passing a
+ * nullptr image_format */
+ cl_image_format invalid_format = { CL_INTENSITY, CL_UNORM_SHORT_555 };
+ err = clGetImageRequirementsInfoEXT(
+ context, nullptr, CL_MEM_READ_WRITE, &invalid_format, &image_desc,
+ CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT,
+ sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr);
+ test_failure_error(err, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
+ "Unexpected clGetImageRequirementsInfoEXT return");
+
+ /* Check that CL_INVALID_IMAGE_DESCRIPTOR is returned when passing an
+ * image_desc with invalid values */
+ cl_image_desc invalid_desc_size = { CL_MEM_OBJECT_IMAGE1D, 0 };
+ err = clGetImageRequirementsInfoEXT(
+ context, nullptr, CL_MEM_READ_WRITE, &format, &invalid_desc_size,
+ CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT,
+ sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr);
+ test_failure_error(err, CL_INVALID_IMAGE_DESCRIPTOR,
+ "Unexpected clGetImageRequirementsInfoEXT return");
+
+ /* Check that CL_INVALID_VALUE is returned when passing an invalid
+ * param_name */
+ cl_image_requirements_info_ext invalid_info = CL_IMAGE_FORMAT;
+ err = clGetImageRequirementsInfoEXT(
+ context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc, invalid_info,
+ sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr);
+ test_failure_error(err, CL_INVALID_VALUE,
+ "Unexpected clGetImageRequirementsInfoEXT return");
+
+ /* Check that CL_INVALID_VALUE is returned when passing a param_value_size
+ * value smaller than the size of the return type */
+ err = clGetImageRequirementsInfoEXT(
+ context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc,
+ CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT,
+ sizeof(row_pitch_alignment) - 1, &row_pitch_alignment, nullptr);
+ test_failure_error(err, CL_INVALID_VALUE,
+ "Unexpected clGetImageRequirementsInfoEXT return");
+
+ /* Check that CL_INVALID_VALUE is returned when passing a param_value_size
+ * value smaller than the size of the return type */
+ uint32_t max_height = 0;
+ err = clGetImageRequirementsInfoEXT(
+ context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc,
+ CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT, sizeof(max_height) - 1,
+ &max_height, nullptr);
+ test_failure_error(err, CL_INVALID_VALUE,
+ "Unexpected clGetImageRequirementsInfoEXT return");
+
+ return TEST_PASS;
+}
+
+/**
+ * Negative tests for {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT}
+ * Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} query on all
+ * image types for which it is not valid Check that
+ * {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases.
+ *
+ * Negative testing for {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT}
+ * Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} query on all
+ * image types for which it is not valid Check that
+ * {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases.
+ *
+ * Negative testing for {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT}
+ * Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} query on
+ * all image types for which it is not valid Check that
+ * {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases.
+ */
+int cl_image_requirements_max_val_ext_negative(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue)
+{
+ if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+ {
+ printf("Extension cl_ext_image_requirements_info not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ cl_platform_id platform = getPlatformFromDevice(device);
+ GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT);
+
+ size_t value = 0;
+
+ std::vector<cl_mem_object_type> imageTypes_height{
+ CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_BUFFER,
+ CL_MEM_OBJECT_IMAGE1D
+ };
+
+ cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT16 };
+
+ for (auto imageType : imageTypes_height)
+ {
+ cl_image_desc image_desc = { 0 };
+ image_desc_init(&image_desc, imageType);
+
+ /* Check image_format null results in CL_INVALID_VALUE */
+ cl_int err = clGetImageRequirementsInfoEXT(
+ context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc,
+ CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT, sizeof(value), &value,
+ nullptr);
+ test_failure_error(err, CL_INVALID_IMAGE_DESCRIPTOR,
+ "Unexpected clGetImageRequirementsInfoEXT return");
+ }
+
+ std::vector<cl_mem_object_type> imageTypes_depth{
+ CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY,
+ CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_BUFFER,
+ CL_MEM_OBJECT_IMAGE1D
+ };
+
+ for (auto imageType : imageTypes_depth)
+ {
+ cl_image_desc image_desc = { 0 };
+ image_desc_init(&image_desc, imageType);
+
+ /* Check image_format null results in CL_INVALID_VALUE */
+ cl_int err = clGetImageRequirementsInfoEXT(
+ context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc,
+ CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT, sizeof(value), &value,
+ nullptr);
+ test_failure_error(err, CL_INVALID_IMAGE_DESCRIPTOR,
+ "Unexpected clGetImageRequirementsInfoEXT return");
+ }
+
+ std::vector<cl_mem_object_type> imageTypes_array_size{
+ CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE2D,
+ CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE1D
+ };
+
+ for (auto imageType : imageTypes_array_size)
+ {
+ cl_image_desc image_desc = { 0 };
+ image_desc_init(&image_desc, imageType);
+
+ /* Check image_format null results in CL_INVALID_VALUE */
+ cl_int err = clGetImageRequirementsInfoEXT(
+ context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc,
+ CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT, sizeof(value), &value,
+ nullptr);
+ test_failure_error(err, CL_INVALID_IMAGE_DESCRIPTOR,
+ "Unexpected clGetImageRequirementsInfoEXT return");
+ }
+
+ return TEST_PASS;
+}
+
+/**
+ * Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT}
+ ** Check that the {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT} query can be performed
+ *successfully
+ *
+ * Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT}
+ ** Check that the {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} query can be performed
+ *successfully
+ *
+ * Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT}
+ ** Check that the {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} query can be performed
+ *successfully
+ *
+ * Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT}
+ ** Check that the {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} query can be
+ *performed successfully
+ */
+int cl_image_requirements_max_val_ext_positive(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue)
+{
+ if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+ {
+ printf("Extension cl_ext_image_requirements_info not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ cl_platform_id platform = getPlatformFromDevice(device);
+ GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT);
+
+ /* CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT */
+ cl_image_desc image_desc_1d = { 0 };
+ image_desc_init(&image_desc_1d, CL_MEM_OBJECT_IMAGE1D);
+
+ uint32_t max_width = 0;
+ cl_int err = clGetImageRequirementsInfoEXT(
+ context, nullptr, CL_MEM_READ_WRITE, nullptr, &image_desc_1d,
+ CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT, sizeof(max_width), &max_width,
+ nullptr);
+ test_error(err, "Error clGetImageRequirementsInfoEXT");
+
+ size_t width_1d = 0;
+ err = clGetDeviceInfo(device, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE,
+ sizeof(width_1d), &width_1d, NULL);
+ test_error(err, "Error clGetDeviceInfo");
+
+ if (!(max_width <= width_1d && max_width > 0))
+ {
+ test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT value");
+ }
+
+ /* CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT */
+ cl_image_desc image_desc_2d = { 0 };
+ image_desc_init(&image_desc_2d, CL_MEM_OBJECT_IMAGE2D);
+
+ uint32_t max_height = 0;
+ err = clGetImageRequirementsInfoEXT(
+ context, nullptr, CL_MEM_READ_WRITE, nullptr, &image_desc_2d,
+ CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT, sizeof(max_height), &max_height,
+ nullptr);
+ test_error(err, "Error clGetImageRequirementsInfoEXT");
+
+ size_t height_2d = 0;
+ err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
+ sizeof(height_2d), &height_2d, NULL);
+ test_error(err, "Error clGetDeviceInfo");
+
+ if (!(max_height <= height_2d && max_height > 0))
+ {
+ test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT value");
+ }
+
+ /* CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT */
+ cl_image_desc image_desc_3d = { 0 };
+ image_desc_init(&image_desc_3d, CL_MEM_OBJECT_IMAGE3D);
+
+ uint32_t max_depth = 0;
+ err = clGetImageRequirementsInfoEXT(context, nullptr, CL_MEM_READ_WRITE,
+ nullptr, &image_desc_3d,
+ CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT,
+ sizeof(max_depth), &max_depth, nullptr);
+ test_error(err, "Error clGetImageRequirementsInfoEXT");
+
+ size_t depth_3d = 0;
+ err = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(depth_3d),
+ &depth_3d, NULL);
+ test_error(err, "Error clGetDeviceInfo");
+
+ if (!(max_depth <= depth_3d && max_depth > 0))
+ {
+ test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT value");
+ }
+
+ /* CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT */
+ cl_image_desc image_desc_array = { 0 };
+ image_desc_init(&image_desc_array, CL_MEM_OBJECT_IMAGE2D_ARRAY);
+
+ uint32_t max_array_size = 0;
+ err = clGetImageRequirementsInfoEXT(
+ context, nullptr, CL_MEM_READ_WRITE, nullptr, &image_desc_array,
+ CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT, sizeof(max_array_size),
+ &max_array_size, nullptr);
+ test_error(err, "Error clGetImageRequirementsInfoEXT");
+
+ size_t array_size = 0;
+ err = clGetDeviceInfo(device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE,
+ sizeof(array_size), &array_size, NULL);
+ test_error(err, "Error clGetDeviceInfo");
+
+ if (!(max_array_size <= array_size && max_array_size > 0))
+ {
+ test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT value");
+ }
+
+ return TEST_PASS;
+} \ No newline at end of file
diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp
index e76710b5..a22db195 100644
--- a/test_conformance/images/kernel_read_write/test_common.cpp
+++ b/test_conformance/images/kernel_read_write/test_common.cpp
@@ -16,6 +16,7 @@
#include "test_common.h"
+#include <algorithm>
cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool test_mipmaps, cl_int *error) {
cl_sampler sampler = nullptr;
@@ -33,122 +34,210 @@ cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool te
return sampler;
}
-void InitFloatCoordsCommon(image_descriptor *imageInfo,
- image_sampler_data *imageSampler, float *xOffsets,
- float *yOffsets, float *zOffsets, float xfract,
- float yfract, float zfract, int normalized_coords,
- MTdata d, int lod)
+bool get_image_dimensions(image_descriptor *imageInfo, size_t &width,
+ size_t &height, size_t &depth)
+{
+ width = imageInfo->width;
+ height = 1;
+ depth = 1;
+ switch (imageInfo->type)
+ {
+ case CL_MEM_OBJECT_IMAGE1D: break;
+ case CL_MEM_OBJECT_IMAGE1D_ARRAY: height = imageInfo->arraySize; break;
+ case CL_MEM_OBJECT_IMAGE2D: height = imageInfo->height; break;
+ case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+ height = imageInfo->height;
+ depth = imageInfo->arraySize;
+ break;
+ case CL_MEM_OBJECT_IMAGE3D:
+ height = imageInfo->height;
+ depth = imageInfo->depth;
+ break;
+ default:
+ log_error("ERROR: Test does not support image type");
+ return TEST_FAIL;
+ }
+ return 0;
+}
+
+static bool InitFloatCoordsCommon(image_descriptor *imageInfo,
+ image_sampler_data *imageSampler,
+ float *xOffsets, float *yOffsets,
+ float *zOffsets, float xfract, float yfract,
+ float zfract, int normalized_coords, MTdata d,
+ int lod)
{
size_t i = 0;
- if (gDisableOffsets)
+ size_t width_loop, height_loop, depth_loop;
+ bool error =
+ get_image_dimensions(imageInfo, width_loop, height_loop, depth_loop);
+ if (!error)
{
- for (size_t z = 0; z < imageInfo->depth; z++)
+ if (gDisableOffsets)
{
- for (size_t y = 0; y < imageInfo->height; y++)
+ for (size_t z = 0; z < depth_loop; z++)
{
- for (size_t x = 0; x < imageInfo->width; x++, i++)
+ for (size_t y = 0; y < height_loop; y++)
{
- xOffsets[i] = (float)(xfract + (double)x);
- yOffsets[i] = (float)(yfract + (double)y);
- zOffsets[i] = (float)(zfract + (double)z);
+ for (size_t x = 0; x < width_loop; x++, i++)
+ {
+ xOffsets[i] = (float)(xfract + (double)x);
+ yOffsets[i] = (float)(yfract + (double)y);
+ zOffsets[i] = (float)(zfract + (double)z);
+ }
}
}
}
- }
- else
- {
- for (size_t z = 0; z < imageInfo->depth; z++)
+ else
{
- for (size_t y = 0; y < imageInfo->height; y++)
+ for (size_t z = 0; z < depth_loop; z++)
{
- for (size_t x = 0; x < imageInfo->width; x++, i++)
+ for (size_t y = 0; y < height_loop; y++)
{
- xOffsets[i] =
- (float)(xfract
- + (double)((int)x
- + random_in_range(-10, 10, d)));
- yOffsets[i] =
- (float)(yfract
- + (double)((int)y
- + random_in_range(-10, 10, d)));
- zOffsets[i] =
- (float)(zfract
- + (double)((int)z
- + random_in_range(-10, 10, d)));
+ for (size_t x = 0; x < width_loop; x++, i++)
+ {
+ xOffsets[i] =
+ (float)(xfract
+ + (double)((int)x
+ + random_in_range(-10, 10, d)));
+ yOffsets[i] =
+ (float)(yfract
+ + (double)((int)y
+ + random_in_range(-10, 10, d)));
+ zOffsets[i] =
+ (float)(zfract
+ + (double)((int)z
+ + random_in_range(-10, 10, d)));
+ }
}
}
}
- }
- if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
- {
- i = 0;
- for (size_t z = 0; z < imageInfo->depth; z++)
+ if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
{
- for (size_t y = 0; y < imageInfo->height; y++)
+ i = 0;
+ for (size_t z = 0; z < depth_loop; z++)
{
- for (size_t x = 0; x < imageInfo->width; x++, i++)
+ for (size_t y = 0; y < height_loop; y++)
{
- xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0,
- (double)imageInfo->width - 1.0);
- yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0,
- (double)imageInfo->height - 1.0);
- zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0,
- (double)imageInfo->depth - 1.0);
+ for (size_t x = 0; x < width_loop; x++, i++)
+ {
+ xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0,
+ (double)width_loop - 1.0);
+ yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0,
+ (double)height_loop - 1.0);
+ zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0,
+ (double)depth_loop - 1.0);
+ }
}
}
}
- }
- if (normalized_coords || gTestMipmaps)
- {
- i = 0;
- if (lod == 0)
+ if (normalized_coords || gTestMipmaps)
{
- for (size_t z = 0; z < imageInfo->depth; z++)
+ i = 0;
+ if (lod == 0)
{
- for (size_t y = 0; y < imageInfo->height; y++)
+ for (size_t z = 0; z < depth_loop; z++)
{
- for (size_t x = 0; x < imageInfo->width; x++, i++)
+ for (size_t y = 0; y < height_loop; y++)
{
- xOffsets[i] = (float)((double)xOffsets[i]
- / (double)imageInfo->width);
- yOffsets[i] = (float)((double)yOffsets[i]
- / (double)imageInfo->height);
- zOffsets[i] = (float)((double)zOffsets[i]
- / (double)imageInfo->depth);
+ for (size_t x = 0; x < width_loop; x++, i++)
+ {
+ xOffsets[i] = (float)((double)xOffsets[i]
+ / (double)width_loop);
+ if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ {
+ yOffsets[i] = (float)((double)yOffsets[i]
+ / (double)height_loop);
+ }
+ if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+ {
+ zOffsets[i] = (float)((double)zOffsets[i]
+ / (double)depth_loop);
+ }
+ }
}
}
}
- }
- else if (gTestMipmaps)
- {
- size_t width_lod, height_lod, depth_lod;
-
- width_lod =
- (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
- height_lod =
- (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
- depth_lod =
- (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
-
- for (size_t z = 0; z < depth_lod; z++)
+ else if (gTestMipmaps)
{
- for (size_t y = 0; y < height_lod; y++)
+ size_t width_lod =
+ (width_loop >> lod) ? (width_loop >> lod) : 1;
+ size_t height_lod = height_loop;
+ size_t depth_lod = depth_loop;
+ if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ {
+ height_lod =
+ (height_loop >> lod) ? (height_loop >> lod) : 1;
+ }
+ if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
{
- for (size_t x = 0; x < width_lod; x++, i++)
+ depth_lod = (depth_loop >> lod) ? (depth_loop >> lod) : 1;
+ }
+
+ for (size_t z = 0; z < depth_lod; z++)
+ {
+ for (size_t y = 0; y < height_lod; y++)
{
- xOffsets[i] =
- (float)((double)xOffsets[i] / (double)width_lod);
- yOffsets[i] =
- (float)((double)yOffsets[i] / (double)height_lod);
- zOffsets[i] =
- (float)((double)zOffsets[i] / (double)depth_lod);
+ for (size_t x = 0; x < width_lod; x++, i++)
+ {
+ xOffsets[i] = (float)((double)xOffsets[i]
+ / (double)width_lod);
+ if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ {
+ yOffsets[i] = (float)((double)yOffsets[i]
+ / (double)height_lod);
+ }
+ if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+ {
+ zOffsets[i] = (float)((double)zOffsets[i]
+ / (double)depth_lod);
+ }
+ }
}
}
}
}
}
+ return error;
+}
+
+cl_mem create_image_of_type(cl_context context, cl_mem_flags mem_flags,
+ image_descriptor *imageInfo, size_t row_pitch,
+ size_t slice_pitch, void *host_ptr, cl_int *error)
+{
+ cl_mem image;
+ switch (imageInfo->type)
+ {
+ case CL_MEM_OBJECT_IMAGE3D:
+ image = create_image_3d(context, mem_flags, imageInfo->format,
+ imageInfo->width, imageInfo->height,
+ imageInfo->depth, row_pitch, slice_pitch,
+ host_ptr, error);
+ break;
+ default:
+ log_error("Implementation is incomplete, only 3D images are "
+ "supported so far");
+ return nullptr;
+ }
+ return image;
+}
+
+static size_t get_image_num_pixels(image_descriptor *imageInfo, size_t width,
+ size_t height, size_t depth,
+ size_t array_size)
+{
+ size_t image_size;
+ switch (imageInfo->type)
+ {
+ case CL_MEM_OBJECT_IMAGE3D: image_size = width * height * depth; break;
+ default:
+ log_error("Implementation is incomplete, only 3D images are "
+ "supported so far");
+ return 0;
+ }
+ return image_size;
}
int test_read_image(cl_context context, cl_command_queue queue,
@@ -160,6 +249,17 @@ int test_read_image(cl_context context, cl_command_queue queue,
size_t threads[3];
static int initHalf = 0;
+ size_t image_size =
+ get_image_num_pixels(imageInfo, imageInfo->width, imageInfo->height,
+ imageInfo->depth, imageInfo->arraySize);
+ test_assert_error(0 != image_size, "Invalid image size");
+ size_t width_size, height_size, depth_size;
+ if (get_image_dimensions(imageInfo, width_size, height_size, depth_size))
+ {
+ log_error("ERROR: invalid image dimensions");
+ return CL_INVALID_VALUE;
+ }
+
cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY;
clMemWrapper xOffsets, yOffsets, zOffsets, results;
@@ -168,14 +268,11 @@ int test_read_image(cl_context context, cl_command_queue queue,
// Create offset data
BufferOwningPtr<cl_float> xOffsetValues(
- malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
- * imageInfo->depth));
+ malloc(sizeof(cl_float) * image_size));
BufferOwningPtr<cl_float> yOffsetValues(
- malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
- * imageInfo->depth));
+ malloc(sizeof(cl_float) * image_size));
BufferOwningPtr<cl_float> zOffsetValues(
- malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
- * imageInfo->depth));
+ malloc(sizeof(cl_float) * image_size));
if (imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
if (DetectFloatToHalfRoundingMode(queue)) return 1;
@@ -206,26 +303,27 @@ int test_read_image(cl_context context, cl_command_queue queue,
{
generate_random_image_data(imageInfo,
maxImageUseHostPtrBackingStore, d);
- unprotImage = create_image_3d(
+ unprotImage = create_image_of_type(
context, image_read_write_flags | CL_MEM_USE_HOST_PTR,
- imageInfo->format, imageInfo->width, imageInfo->height,
- imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
+ imageInfo, (gEnablePitch ? imageInfo->rowPitch : 0),
(gEnablePitch ? imageInfo->slicePitch : 0),
maxImageUseHostPtrBackingStore, &error);
}
else
{
- error = protImage.Create(context, image_read_write_flags,
- imageInfo->format, imageInfo->width,
- imageInfo->height, imageInfo->depth);
+ error = protImage.Create(context, imageInfo->type,
+ image_read_write_flags, imageInfo->format,
+ imageInfo->width, imageInfo->height,
+ imageInfo->depth, imageInfo->arraySize);
}
if (error != CL_SUCCESS)
{
- log_error("ERROR: Unable to create 3D image of size %d x %d x %d "
+ log_error("ERROR: Unable to create image of size %d x %d x %d x %d "
"(pitch %d, %d ) (%s)",
(int)imageInfo->width, (int)imageInfo->height,
- (int)imageInfo->depth, (int)imageInfo->rowPitch,
- (int)imageInfo->slicePitch, IGetErrorString(error));
+ (int)imageInfo->depth, (int)imageInfo->arraySize,
+ (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
+ IGetErrorString(error));
return error;
}
if (gTestMaxImages)
@@ -237,18 +335,18 @@ int test_read_image(cl_context context, cl_command_queue queue,
{
// Don't use clEnqueueWriteImage; just use copy host ptr to get the data
// in
- unprotImage = create_image_3d(
- context, image_read_write_flags | CL_MEM_COPY_HOST_PTR,
- imageInfo->format, imageInfo->width, imageInfo->height,
- imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
+ unprotImage = create_image_of_type(
+ context, image_read_write_flags | CL_MEM_COPY_HOST_PTR, imageInfo,
+ (gEnablePitch ? imageInfo->rowPitch : 0),
(gEnablePitch ? imageInfo->slicePitch : 0), imageValues, &error);
if (error != CL_SUCCESS)
{
- log_error("ERROR: Unable to create 3D image of size %d x %d x %d "
+ log_error("ERROR: Unable to create image of size %d x %d x %d x %d "
"(pitch %d, %d ) (%s)",
(int)imageInfo->width, (int)imageInfo->height,
- (int)imageInfo->depth, (int)imageInfo->rowPitch,
- (int)imageInfo->slicePitch, IGetErrorString(error));
+ (int)imageInfo->depth, (int)imageInfo->arraySize,
+ (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
+ IGetErrorString(error));
return error;
}
image = unprotImage;
@@ -260,19 +358,19 @@ int test_read_image(cl_context context, cl_command_queue queue,
// specified, so we just do the same thing either way
if (!gTestMipmaps)
{
- unprotImage = create_image_3d(
- context, image_read_write_flags | gMemFlagsToUse,
- imageInfo->format, imageInfo->width, imageInfo->height,
- imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
+ unprotImage = create_image_of_type(
+ context, image_read_write_flags | gMemFlagsToUse, imageInfo,
+ (gEnablePitch ? imageInfo->rowPitch : 0),
(gEnablePitch ? imageInfo->slicePitch : 0), imageValues,
&error);
if (error != CL_SUCCESS)
{
- log_error("ERROR: Unable to create 3D image of size %d x %d x "
- "%d (pitch %d, %d ) (%s)",
+ log_error("ERROR: Unable to create image of size %d x %d x "
+ "%d x %d (pitch %d, %d ) (%s)",
(int)imageInfo->width, (int)imageInfo->height,
- (int)imageInfo->depth, (int)imageInfo->rowPitch,
- (int)imageInfo->slicePitch, IGetErrorString(error));
+ (int)imageInfo->depth, (int)imageInfo->arraySize,
+ (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
+ IGetErrorString(error));
return error;
}
image = unprotImage;
@@ -280,10 +378,11 @@ int test_read_image(cl_context context, cl_command_queue queue,
else
{
cl_image_desc image_desc = { 0 };
- image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+ image_desc.image_type = imageInfo->type;
image_desc.image_width = imageInfo->width;
image_desc.image_height = imageInfo->height;
image_desc.image_depth = imageInfo->depth;
+ image_desc.image_array_size = imageInfo->arraySize;
image_desc.num_mip_levels = imageInfo->num_mip_levels;
@@ -292,23 +391,24 @@ int test_read_image(cl_context context, cl_command_queue queue,
imageInfo->format, &image_desc, NULL, &error);
if (error != CL_SUCCESS)
{
- log_error("ERROR: Unable to create %d level mipmapped 3D image "
- "of size %d x %d x %d (pitch %d, %d ) (%s)",
+ log_error("ERROR: Unable to create %d level mipmapped image "
+ "of size %d x %d x %d x %d (pitch %d, %d ) (%s)",
(int)imageInfo->num_mip_levels, (int)imageInfo->width,
(int)imageInfo->height, (int)imageInfo->depth,
- (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
- IGetErrorString(error));
+ (int)imageInfo->arraySize, (int)imageInfo->rowPitch,
+ (int)imageInfo->slicePitch, IGetErrorString(error));
return error;
}
image = unprotImage;
}
}
+ test_assert_error(nullptr != image, "Image creation failed");
+
if (gMemFlagsToUse != CL_MEM_COPY_HOST_PTR)
{
size_t origin[4] = { 0, 0, 0, 0 };
- size_t region[3] = { imageInfo->width, imageInfo->height,
- imageInfo->depth };
+ size_t region[3] = { width_size, height_size, depth_size };
if (gDebugTrace) log_info(" - Writing image...\n");
@@ -323,10 +423,10 @@ int test_read_image(cl_context context, cl_command_queue queue,
if (error != CL_SUCCESS)
{
- log_error("ERROR: Unable to write to 3D image of size %d x %d "
- "x %d \n",
+ log_error("ERROR: Unable to write to image of size %d x %d "
+ "x %d x %d\n",
(int)imageInfo->width, (int)imageInfo->height,
- (int)imageInfo->depth);
+ (int)imageInfo->depth, (int)imageInfo->arraySize);
return error;
}
}
@@ -338,17 +438,15 @@ int test_read_image(cl_context context, cl_command_queue queue,
{
origin[3] = i;
error = clEnqueueWriteImage(
- queue, image, CL_TRUE, origin, region,
- /*gEnablePitch ? imageInfo->rowPitch :*/ 0,
- /*gEnablePitch ? imageInfo->slicePitch :*/ 0,
+ queue, image, CL_TRUE, origin, region, 0, 0,
((char *)imageValues + nextLevelOffset), 0, NULL, NULL);
if (error != CL_SUCCESS)
{
- log_error("ERROR: Unable to write to %d level mipmapped 3D "
- "image of size %d x %d x %d\n",
+ log_error("ERROR: Unable to write to %d level mipmapped "
+ "image of size %d x %d x %d x %d\n",
(int)imageInfo->num_mip_levels,
(int)imageInfo->width, (int)imageInfo->height,
- (int)imageInfo->depth);
+ (int)imageInfo->arraySize, (int)imageInfo->depth);
return error;
}
nextLevelOffset += region[0] * region[1] * region[2]
@@ -361,26 +459,21 @@ int test_read_image(cl_context context, cl_command_queue queue,
}
}
- xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
- sizeof(cl_float) * imageInfo->width
- * imageInfo->height * imageInfo->depth,
- xOffsetValues, &error);
+ xOffsets =
+ clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+ sizeof(cl_float) * image_size, xOffsetValues, &error);
test_error(error, "Unable to create x offset buffer");
- yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
- sizeof(cl_float) * imageInfo->width
- * imageInfo->height * imageInfo->depth,
- yOffsetValues, &error);
+ yOffsets =
+ clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+ sizeof(cl_float) * image_size, yOffsetValues, &error);
test_error(error, "Unable to create y offset buffer");
- zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
- sizeof(cl_float) * imageInfo->width
- * imageInfo->height * imageInfo->depth,
- zOffsetValues, &error);
+ zOffsets =
+ clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+ sizeof(cl_float) * image_size, zOffsetValues, &error);
test_error(error, "Unable to create y offset buffer");
- results =
- clCreateBuffer(context, CL_MEM_READ_WRITE,
- get_explicit_type_size(outputType) * 4 * imageInfo->width
- * imageInfo->height * imageInfo->depth,
- NULL, &error);
+ results = clCreateBuffer(
+ context, CL_MEM_READ_WRITE,
+ get_explicit_type_size(outputType) * 4 * image_size, NULL, &error);
test_error(error, "Unable to create result buffer");
// Create sampler to use
@@ -443,16 +536,19 @@ int test_read_image(cl_context context, cl_command_queue queue,
}
int nextLevelOffset = 0;
- size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
- depth_lod = imageInfo->depth;
+ size_t width_lod = width_size, height_lod = height_size,
+ depth_lod = depth_size;
// Loop over all mipmap levels, if we are testing mipmapped images.
for (int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels)
|| (!gTestMipmaps && lod < 1);
lod++)
{
- size_t resultValuesSize = width_lod * height_lod * depth_lod
- * get_explicit_type_size(outputType) * 4;
+ size_t image_lod_size = get_image_num_pixels(
+ imageInfo, width_lod, height_lod, depth_lod, imageInfo->arraySize);
+ test_assert_error(0 != image_lod_size, "Invalid image size");
+ size_t resultValuesSize =
+ image_lod_size * get_explicit_type_size(outputType) * 4;
BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
float lod_float = (float)lod;
if (gTestMipmaps)
@@ -468,30 +564,25 @@ int test_read_image(cl_context context, cl_command_queue queue,
float offset = float_offsets[q % float_offset_count];
// Init the coordinates
- InitFloatCoordsCommon(imageInfo, imageSampler, xOffsetValues,
- yOffsetValues, zOffsetValues,
- q >= float_offset_count ? -offset : offset,
- q >= float_offset_count ? offset : -offset,
- q >= float_offset_count ? -offset : offset,
- imageSampler->normalized_coords, d, lod);
-
- error =
- clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0,
- sizeof(cl_float) * imageInfo->height
- * imageInfo->width * imageInfo->depth,
- xOffsetValues, 0, NULL, NULL);
+ error = InitFloatCoordsCommon(
+ imageInfo, imageSampler, xOffsetValues, yOffsetValues,
+ zOffsetValues, q >= float_offset_count ? -offset : offset,
+ q >= float_offset_count ? offset : -offset,
+ q >= float_offset_count ? -offset : offset,
+ imageSampler->normalized_coords, d, lod);
+ test_error(error, "Unable to initialise coordinates");
+
+ error = clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0,
+ sizeof(cl_float) * image_size,
+ xOffsetValues, 0, NULL, NULL);
test_error(error, "Unable to write x offsets");
- error =
- clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0,
- sizeof(cl_float) * imageInfo->height
- * imageInfo->width * imageInfo->depth,
- yOffsetValues, 0, NULL, NULL);
+ error = clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0,
+ sizeof(cl_float) * image_size,
+ yOffsetValues, 0, NULL, NULL);
test_error(error, "Unable to write y offsets");
- error =
- clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0,
- sizeof(cl_float) * imageInfo->height
- * imageInfo->width * imageInfo->depth,
- zOffsetValues, 0, NULL, NULL);
+ error = clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0,
+ sizeof(cl_float) * image_size,
+ zOffsetValues, 0, NULL, NULL);
test_error(error, "Unable to write z offsets");
@@ -510,11 +601,10 @@ int test_read_image(cl_context context, cl_command_queue queue,
test_error(error, "Unable to run kernel");
// Get results
- error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0,
- width_lod * height_lod * depth_lod
- * get_explicit_type_size(outputType)
- * 4,
- resultValues, 0, NULL, NULL);
+ error = clEnqueueReadBuffer(
+ queue, results, CL_TRUE, 0,
+ image_lod_size * get_explicit_type_size(outputType) * 4,
+ resultValues, 0, NULL, NULL);
test_error(error, "Unable to read results from kernel");
if (gDebugTrace) log_info(" results read\n");
@@ -556,7 +646,7 @@ int test_read_image(cl_context context, cl_command_queue queue,
// Apple requires its CPU implementation to do
// correctly rounded address arithmetic in all
// modes
- || gDeviceType != CL_DEVICE_TYPE_GPU
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
#endif
)
offset = 0.0f; // Loop only once
@@ -874,7 +964,7 @@ int test_read_image(cl_context context, cl_command_queue queue,
// Apple requires its CPU implementation to do
// correctly rounded address arithmetic in all
// modes
- || gDeviceType != CL_DEVICE_TYPE_GPU
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
#endif
)
offset = 0.0f; // Loop only once
@@ -934,13 +1024,13 @@ int test_read_image(cl_context context, cl_command_queue queue,
{
err4 = 0.0f;
}
- float maxErr1 = MAX(
+ float maxErr1 = std::max(
maxErr * maxPixel.p[0], FLT_MIN);
- float maxErr2 = MAX(
+ float maxErr2 = std::max(
maxErr * maxPixel.p[1], FLT_MIN);
- float maxErr3 = MAX(
+ float maxErr3 = std::max(
maxErr * maxPixel.p[2], FLT_MIN);
- float maxErr4 = MAX(
+ float maxErr4 = std::max(
maxErr * maxPixel.p[3], FLT_MIN);
if (!(err1 <= maxErr1)
@@ -1039,17 +1129,17 @@ int test_read_image(cl_context context, cl_command_queue queue,
float err4 = ABS_ERROR(resultPtr[3],
expected[3]);
float maxErr1 =
- MAX(maxErr * maxPixel.p[0],
- FLT_MIN);
+ std::max(maxErr * maxPixel.p[0],
+ FLT_MIN);
float maxErr2 =
- MAX(maxErr * maxPixel.p[1],
- FLT_MIN);
+ std::max(maxErr * maxPixel.p[1],
+ FLT_MIN);
float maxErr3 =
- MAX(maxErr * maxPixel.p[2],
- FLT_MIN);
+ std::max(maxErr * maxPixel.p[2],
+ FLT_MIN);
float maxErr4 =
- MAX(maxErr * maxPixel.p[3],
- FLT_MIN);
+ std::max(maxErr * maxPixel.p[3],
+ FLT_MIN);
if (!(err1 <= maxErr1)
@@ -1213,7 +1303,8 @@ int test_read_image(cl_context context, cl_command_queue queue,
// offsets (0.0, 0.0) E.g., test one
// pixel.
if (!imageSampler->normalized_coords
- || gDeviceType != CL_DEVICE_TYPE_GPU
+ || !(gDeviceType
+ & CL_DEVICE_TYPE_GPU)
|| NORM_OFFSET == 0)
{
norm_offset_x = 0.0f;
@@ -1395,7 +1486,8 @@ int test_read_image(cl_context context, cl_command_queue queue,
// offsets (0.0, 0.0) E.g., test one
// pixel.
if (!imageSampler->normalized_coords
- || gDeviceType != CL_DEVICE_TYPE_GPU
+ || !(gDeviceType
+ & CL_DEVICE_TYPE_GPU)
|| NORM_OFFSET == 0)
{
norm_offset_x = 0.0f;
@@ -1537,10 +1629,51 @@ int test_read_image(cl_context context, cl_command_queue queue,
nextLevelOffset += width_lod * height_lod * depth_lod
* get_pixel_size(imageInfo->format);
width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
- height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
- depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1;
+ if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ {
+ height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
+ }
+ if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+ {
+ depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1;
+ }
}
}
return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
-} \ No newline at end of file
+}
+
+void filter_undefined_bits(image_descriptor *imageInfo, char *resultPtr)
+{
+ // mask off the top bit (bit 15) if the image format is (CL_UNORM_SHORT_555,
+ // CL_RGB). (Note: OpenCL says: the top bit is undefined meaning it can be
+ // either 0 or 1.)
+ if (imageInfo->format->image_channel_data_type == CL_UNORM_SHORT_555)
+ {
+ cl_ushort *temp = (cl_ushort *)resultPtr;
+ temp[0] &= 0x7fff;
+ }
+}
+
+int filter_rounding_errors(int forceCorrectlyRoundedWrites,
+ image_descriptor *imageInfo, float *errors)
+{
+ // We are allowed 0.6 absolute error vs. infinitely precise for some
+ // normalized formats
+ if (0 == forceCorrectlyRoundedWrites
+ && (imageInfo->format->image_channel_data_type == CL_UNORM_INT8
+ || imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010
+ || imageInfo->format->image_channel_data_type == CL_UNORM_INT16
+ || imageInfo->format->image_channel_data_type == CL_SNORM_INT8
+ || imageInfo->format->image_channel_data_type == CL_SNORM_INT16
+ || imageInfo->format->image_channel_data_type == CL_UNORM_SHORT_555
+ || imageInfo->format->image_channel_data_type
+ == CL_UNORM_SHORT_565))
+ {
+ if (!(fabsf(errors[0]) > 0.6f) && !(fabsf(errors[1]) > 0.6f)
+ && !(fabsf(errors[2]) > 0.6f) && !(fabsf(errors[3]) > 0.6f))
+ return 0;
+ }
+
+ return 1;
+}
diff --git a/test_conformance/images/kernel_read_write/test_common.h b/test_conformance/images/kernel_read_write/test_common.h
index e7ecbe0b..fc95bee2 100644
--- a/test_conformance/images/kernel_read_write/test_common.h
+++ b/test_conformance/images/kernel_read_write/test_common.h
@@ -42,12 +42,8 @@ extern int test_read_image(cl_context context, cl_command_queue queue,
bool useFloatCoords, ExplicitType outputType,
MTdata d);
-extern void InitFloatCoordsCommon(image_descriptor *imageInfo,
- image_sampler_data *imageSampler,
- float *xOffsets, float *yOffsets,
- float *zOffsets, float xfract, float yfract,
- float zfract, int normalized_coords, MTdata d,
- int lod);
+extern bool get_image_dimensions(image_descriptor *imageInfo, size_t &width,
+ size_t &height, size_t &depth);
template <class T>
int determine_validation_error_offset(
@@ -63,8 +59,12 @@ int determine_validation_error_offset(
bool clampingErr = false, clamped = false, otherClampingBug = false;
int clampedX, clampedY, clampedZ;
- size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height,
- imageDepth = imageInfo->depth;
+ size_t imageWidth, imageHeight, imageDepth;
+ if (get_image_dimensions(imageInfo, imageWidth, imageHeight, imageDepth))
+ {
+ log_error("ERROR: invalid image dimensions");
+ return TEST_FAIL;
+ }
clamped = get_integer_coords_offset(x, y, z, xAddressOffset, yAddressOffset,
zAddressOffset, imageWidth, imageHeight,
@@ -147,85 +147,75 @@ int determine_validation_error_offset(
}
if (!clampingErr)
{
- /* if( clamped && ( (int)x + (int)xOffsetValues[ j ] < 0 ||
- (int)y + (int)yOffsetValues[ j ] < 0 ) )
- {
- log_error( "NEGATIVE COORDINATE ERROR\n" );
- return -1;
- }
- */
- if (true) // gExtraValidateInfo )
+ if (printAsFloat)
{
- if (printAsFloat)
- {
- log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
- "validate!\n\tExpected (%g,%g,%g,%g),\n\t got "
- "(%g,%g,%g,%g), error of %g\n",
- j, x, x, y, y, z, z, (float)expected[0],
- (float)expected[1], (float)expected[2],
- (float)expected[3], (float)resultPtr[0],
- (float)resultPtr[1], (float)resultPtr[2],
- (float)resultPtr[3], error);
- }
- else
- {
- log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
- "validate!\n\tExpected (%x,%x,%x,%x),\n\t got "
- "(%x,%x,%x,%x)\n",
- j, x, x, y, y, z, z, (int)expected[0],
- (int)expected[1], (int)expected[2], (int)expected[3],
- (int)resultPtr[0], (int)resultPtr[1],
- (int)resultPtr[2], (int)resultPtr[3]);
- }
- log_error(
- "Integer coords resolve to %d,%d,%d with img size %d,%d,%d\n",
- clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight,
- (int)imageDepth);
+ log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
+ "validate!\n\tExpected (%g,%g,%g,%g),\n\t got "
+ "(%g,%g,%g,%g), error of %g\n",
+ j, x, x, y, y, z, z, (float)expected[0],
+ (float)expected[1], (float)expected[2],
+ (float)expected[3], (float)resultPtr[0],
+ (float)resultPtr[1], (float)resultPtr[2],
+ (float)resultPtr[3], error);
+ }
+ else
+ {
+ log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
+ "validate!\n\tExpected (%x,%x,%x,%x),\n\t got "
+ "(%x,%x,%x,%x)\n",
+ j, x, x, y, y, z, z, (int)expected[0], (int)expected[1],
+ (int)expected[2], (int)expected[3], (int)resultPtr[0],
+ (int)resultPtr[1], (int)resultPtr[2], (int)resultPtr[3]);
+ }
+ log_error(
+ "Integer coords resolve to %d,%d,%d with img size %d,%d,%d\n",
+ clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight,
+ (int)imageDepth);
- if (printAsFloat && gExtraValidateInfo)
+ if (printAsFloat && gExtraValidateInfo)
+ {
+ log_error("\nNearby values:\n");
+ for (int zOff = -1; zOff <= 1; zOff++)
{
- log_error("\nNearby values:\n");
- for (int zOff = -1; zOff <= 1; zOff++)
+ for (int yOff = -1; yOff <= 1; yOff++)
{
- for (int yOff = -1; yOff <= 1; yOff++)
- {
- float top[4], real[4], bot[4];
- read_image_pixel_float(imagePtr, imageInfo,
- clampedX - 1, clampedY + yOff,
- clampedZ + zOff, top);
- read_image_pixel_float(imagePtr, imageInfo, clampedX,
- clampedY + yOff, clampedZ + zOff,
- real);
- read_image_pixel_float(imagePtr, imageInfo,
- clampedX + 1, clampedY + yOff,
- clampedZ + zOff, bot);
- log_error("\t(%g,%g,%g,%g)", top[0], top[1], top[2],
- top[3]);
- log_error(" (%g,%g,%g,%g)", real[0], real[1], real[2],
- real[3]);
- log_error(" (%g,%g,%g,%g)\n", bot[0], bot[1], bot[2],
- bot[3]);
- }
+ float top[4], real[4], bot[4];
+ read_image_pixel_float(imagePtr, imageInfo, clampedX - 1,
+ clampedY + yOff, clampedZ + zOff,
+ top);
+ read_image_pixel_float(imagePtr, imageInfo, clampedX,
+ clampedY + yOff, clampedZ + zOff,
+ real);
+ read_image_pixel_float(imagePtr, imageInfo, clampedX + 1,
+ clampedY + yOff, clampedZ + zOff,
+ bot);
+ log_error("\t(%g,%g,%g,%g)", top[0], top[1], top[2],
+ top[3]);
+ log_error(" (%g,%g,%g,%g)", real[0], real[1], real[2],
+ real[3]);
+ log_error(" (%g,%g,%g,%g)\n", bot[0], bot[1], bot[2],
+ bot[3]);
}
}
- // }
- // else
- // log_error( "\n" );
- if (imageSampler->filter_mode != CL_FILTER_LINEAR)
- {
- if (found)
- log_error(
- "\tValue really found in image at %d,%d,%d (%s)\n",
- actualX, actualY, actualZ,
- (found > 1) ? "NOT unique!!" : "unique");
- else
- log_error("\tValue not actually found in image\n");
- }
- log_error("\n");
}
+ if (imageSampler->filter_mode != CL_FILTER_LINEAR)
+ {
+ if (found)
+ log_error("\tValue really found in image at %d,%d,%d (%s)\n",
+ actualX, actualY, actualZ,
+ (found > 1) ? "NOT unique!!" : "unique");
+ else
+ log_error("\tValue not actually found in image\n");
+ }
+ log_error("\n");
numClamped = -1; // We force the clamped counter to never work
if ((--numTries) == 0) return -1;
}
return 0;
}
+
+
+extern int filter_rounding_errors(int forceCorrectlyRoundedWrites,
+ image_descriptor *imageInfo, float *errors);
+extern void filter_undefined_bits(image_descriptor *imageInfo, char *resultPtr);
diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp
index 03ca9595..05aed02c 100644
--- a/test_conformance/images/kernel_read_write/test_iterations.cpp
+++ b/test_conformance/images/kernel_read_write/test_iterations.cpp
@@ -16,6 +16,8 @@
#include "test_common.h"
#include <float.h>
+#include <algorithm>
+
#if defined( __APPLE__ )
#include <signal.h>
#include <sys/signal.h>
@@ -37,24 +39,28 @@ static size_t reduceImageSizeRange(size_t maxDimSize) {
}
const char *read2DKernelSourcePattern =
-"__kernel void sample_kernel( read_only %s input,%s __global float *xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
-"{\n"
-"%s"
-" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
-"%s"
-"%s"
-" results[offset] = read_image%s( input, imageSampler, coords %s);\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( read_only %s input,%s __global float "
+ "*xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
+ "{\n"
+ "%s"
+ " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+ "%s"
+ "%s"
+ " results[offset] = read_image%s( input, imageSampler, coords %s);\n"
+ "}";
const char *read_write2DKernelSourcePattern =
-"__kernel void sample_kernel( read_write %s input,%s __global float *xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
-"{\n"
-"%s"
-" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
-"%s"
-"%s"
-" results[offset] = read_image%s( input, coords %s);\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( read_write %s input,%s __global float "
+ "*xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
+ "{\n"
+ "%s"
+ " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+ "%s"
+ "%s"
+ " results[offset] = read_image%s( input, coords %s);\n"
+ "}";
const char *intCoordKernelSource =
" int2 coords = (int2)( xOffsets[offset], yOffsets[offset]);\n";
@@ -413,12 +419,15 @@ int validate_image_2D_depth_results(void *imageValues, void *resultValues, doubl
int checkOnlyOnePixel = 0;
int found_pixel = 0;
float offset = NORM_OFFSET;
- if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+ if (!imageSampler->normalized_coords
+ || imageSampler->filter_mode != CL_FILTER_NEAREST
+ || NORM_OFFSET == 0
#if defined( __APPLE__ )
- // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
- || gDeviceType != CL_DEVICE_TYPE_GPU
+ // Apple requires its CPU implementation to do correctly
+ // rounded address arithmetic in all modes
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
#endif
- )
+ )
offset = 0.0f; // Loop only once
for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
@@ -434,7 +443,8 @@ int validate_image_2D_depth_results(void *imageValues, void *resultValues, doubl
float err1 = ABS_ERROR(resultPtr[0], expected[0]);
// Clamp to the minimum absolute error for the format
if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
- float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+ float maxErr1 =
+ std::max(maxErr * maxPixel.p[0], FLT_MIN);
// Check if the result matches.
if( ! (err1 <= maxErr1) )
@@ -471,7 +481,10 @@ int validate_image_2D_depth_results(void *imageValues, void *resultValues, doubl
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
checkOnlyOnePixel = 1;
@@ -484,7 +497,8 @@ int validate_image_2D_depth_results(void *imageValues, void *resultValues, doubl
imageSampler, expected, 0, &containsDenormals );
float err1 = ABS_ERROR(resultPtr[0], expected[0]);
- float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+ float maxErr1 =
+ std::max(maxErr * maxPixel.p[0], FLT_MIN);
if( ! (err1 <= maxErr1) )
@@ -565,12 +579,15 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form
int checkOnlyOnePixel = 0;
int found_pixel = 0;
float offset = NORM_OFFSET;
- if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+ if (!imageSampler->normalized_coords
+ || imageSampler->filter_mode != CL_FILTER_NEAREST
+ || NORM_OFFSET == 0
#if defined( __APPLE__ )
- // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
- || gDeviceType != CL_DEVICE_TYPE_GPU
+ // Apple requires its CPU implementation to do correctly
+ // rounded address arithmetic in all modes
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
#endif
- )
+ )
offset = 0.0f; // Loop only once
for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
@@ -598,10 +615,14 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form
if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
- float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
- float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
- float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
- float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+ float maxErr1 =
+ std::max(maxErr * maxPixel.p[0], FLT_MIN);
+ float maxErr2 =
+ std::max(maxErr * maxPixel.p[1], FLT_MIN);
+ float maxErr3 =
+ std::max(maxErr * maxPixel.p[2], FLT_MIN);
+ float maxErr4 =
+ std::max(maxErr * maxPixel.p[3], FLT_MIN);
// Check if the result matches.
if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
@@ -650,7 +671,10 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
checkOnlyOnePixel = 1;
@@ -671,10 +695,14 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form
float err2 = ABS_ERROR(resultPtr[1], expected[1]);
float err3 = ABS_ERROR(resultPtr[2], expected[2]);
float err4 = ABS_ERROR(resultPtr[3], expected[3]);
- float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
- float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
- float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
- float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+ float maxErr1 =
+ std::max(maxErr * maxPixel.p[0], FLT_MIN);
+ float maxErr2 =
+ std::max(maxErr * maxPixel.p[1], FLT_MIN);
+ float maxErr3 =
+ std::max(maxErr * maxPixel.p[2], FLT_MIN);
+ float maxErr4 =
+ std::max(maxErr * maxPixel.p[3], FLT_MIN);
if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
@@ -766,7 +794,10 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
checkOnlyOnePixel = 1;
@@ -801,7 +832,10 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
checkOnlyOnePixel = 1;
@@ -862,7 +896,10 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
checkOnlyOnePixel = 1;
@@ -897,7 +934,10 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
checkOnlyOnePixel = 1;
@@ -963,12 +1003,15 @@ int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double
int checkOnlyOnePixel = 0;
int found_pixel = 0;
float offset = NORM_OFFSET;
- if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+ if (!imageSampler->normalized_coords
+ || imageSampler->filter_mode != CL_FILTER_NEAREST
+ || NORM_OFFSET == 0
#if defined( __APPLE__ )
- // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
- || gDeviceType != CL_DEVICE_TYPE_GPU
+ // Apple requires its CPU implementation to do correctly
+ // rounded address arithmetic in all modes
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
#endif
- )
+ )
offset = 0.0f; // Loop only once
for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
@@ -1042,7 +1085,10 @@ int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
checkOnlyOnePixel = 1;
@@ -1649,16 +1695,18 @@ int test_read_image_set_2D(cl_device_id device, cl_context context,
}
- sprintf( programSrc, KernelSourcePattern,
- (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t",
- samplerArg, get_explicit_type_name( outputType ),
+ sprintf(programSrc, KernelSourcePattern,
+ gTestMipmaps
+ ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable"
+ : "",
+ (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t"
+ : "image2d_t",
+ samplerArg, get_explicit_type_name(outputType),
(format->image_channel_order == CL_DEPTH) ? "" : "4",
- gTestMipmaps?", float lod":" ",
- samplerVar,
- gTestMipmaps? lodOffsetSource : offsetSource,
- floatCoords ? floatKernelSource : intCoordKernelSource,
- readFormat,
- gTestMipmaps?", lod":" ");
+ gTestMipmaps ? ", float lod" : " ", samplerVar,
+ gTestMipmaps ? lodOffsetSource : offsetSource,
+ floatCoords ? floatKernelSource : intCoordKernelSource, readFormat,
+ gTestMipmaps ? ", lod" : " ");
ptr = programSrc;
error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/kernel_read_write/test_loops.cpp b/test_conformance/images/kernel_read_write/test_loops.cpp
index 795a9eda..ea1e1c7c 100644
--- a/test_conformance/images/kernel_read_write/test_loops.cpp
+++ b/test_conformance/images/kernel_read_write/test_loops.cpp
@@ -84,7 +84,7 @@ int test_read_image_type(cl_device_id device, cl_context context,
// of operations for linear filtering on the GPU. We do not test linear
// filtering for the CL_RGB CL_UNORM_INT_101010 image format; however, we
// test it internally for a set of other image formats.
- if ((gDeviceType == CL_DEVICE_TYPE_GPU)
+ if ((gDeviceType & CL_DEVICE_TYPE_GPU)
&& (imageSampler->filter_mode == CL_FILTER_LINEAR)
&& (format->image_channel_order == CL_RGB)
&& (format->image_channel_data_type == CL_UNORM_INT_101010))
diff --git a/test_conformance/images/kernel_read_write/test_read_1D.cpp b/test_conformance/images/kernel_read_write/test_read_1D.cpp
index c9ba4e84..2a722088 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp
@@ -17,6 +17,8 @@
#include "test_common.h"
#include <float.h>
+#include <algorithm>
+
#if defined( __APPLE__ )
#include <signal.h>
#include <sys/signal.h>
@@ -24,24 +26,28 @@
#endif
const char *read1DKernelSourcePattern =
-"__kernel void sample_kernel( read_only image1d_t input,%s __global float *xOffsets, __global %s4 *results %s)\n"
-"{\n"
-"%s"
-" int tidX = get_global_id(0);\n"
-" int offset = tidX;\n"
-"%s"
-" results[offset] = read_image%s( input, imageSampler, coord %s);\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( read_only image1d_t input,%s __global float "
+ "*xOffsets, __global %s4 *results %s)\n"
+ "{\n"
+ "%s"
+ " int tidX = get_global_id(0);\n"
+ " int offset = tidX;\n"
+ "%s"
+ " results[offset] = read_image%s( input, imageSampler, coord %s);\n"
+ "}";
const char *read_write1DKernelSourcePattern =
-"__kernel void sample_kernel( read_write image1d_t input,%s __global float *xOffsets, __global %s4 *results %s)\n"
-"{\n"
-"%s"
-" int tidX = get_global_id(0);\n"
-" int offset = tidX;\n"
-"%s"
-" results[offset] = read_image%s( input, coord %s);\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( read_write image1d_t input,%s __global float "
+ "*xOffsets, __global %s4 *results %s)\n"
+ "{\n"
+ "%s"
+ " int tidX = get_global_id(0);\n"
+ " int offset = tidX;\n"
+ "%s"
+ " results[offset] = read_image%s( input, coord %s);\n"
+ "}";
const char *int1DCoordKernelSource =
" int coord = xOffsets[offset];\n";
@@ -485,10 +491,13 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
int checkOnlyOnePixel = 0;
int found_pixel = 0;
float offset = NORM_OFFSET;
- if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+ if (!imageSampler->normalized_coords
+ || imageSampler->filter_mode != CL_FILTER_NEAREST
+ || NORM_OFFSET == 0
#if defined( __APPLE__ )
- // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
- || gDeviceType != CL_DEVICE_TYPE_GPU
+ // Apple requires its CPU implementation to do correctly
+ // rounded address arithmetic in all modes
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
#endif
)
offset = 0.0f; // Loop only once
@@ -551,7 +560,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
checkOnlyOnePixel = 1;
}
@@ -644,10 +656,13 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
int checkOnlyOnePixel = 0;
int found_pixel = 0;
float offset = NORM_OFFSET;
- if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+ if (!imageSampler->normalized_coords
+ || imageSampler->filter_mode != CL_FILTER_NEAREST
+ || NORM_OFFSET == 0
#if defined( __APPLE__ )
- // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
- || gDeviceType != CL_DEVICE_TYPE_GPU
+ // Apple requires its CPU implementation to do correctly
+ // rounded address arithmetic in all modes
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
#endif
)
offset = 0.0f; // Loop only once
@@ -669,10 +684,14 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
- float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
- float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
- float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
- float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+ float maxErr1 =
+ std::max(maxErr * maxPixel.p[0], FLT_MIN);
+ float maxErr2 =
+ std::max(maxErr * maxPixel.p[1], FLT_MIN);
+ float maxErr3 =
+ std::max(maxErr * maxPixel.p[2], FLT_MIN);
+ float maxErr4 =
+ std::max(maxErr * maxPixel.p[3], FLT_MIN);
// Check if the result matches.
if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
@@ -714,7 +733,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
checkOnlyOnePixel = 1;
}
@@ -732,10 +754,14 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
ABS_ERROR(resultPtr[2], expected[2]);
float err4 =
ABS_ERROR(resultPtr[3], expected[3]);
- float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
- float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
- float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
- float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+ float maxErr1 =
+ std::max(maxErr * maxPixel.p[0], FLT_MIN);
+ float maxErr2 =
+ std::max(maxErr * maxPixel.p[1], FLT_MIN);
+ float maxErr3 =
+ std::max(maxErr * maxPixel.p[2], FLT_MIN);
+ float maxErr4 =
+ std::max(maxErr * maxPixel.p[3], FLT_MIN);
if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
@@ -816,7 +842,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
checkOnlyOnePixel = 1;
}
@@ -847,7 +876,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
checkOnlyOnePixel = 1;
}
@@ -903,7 +935,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
checkOnlyOnePixel = 1;
}
@@ -934,7 +969,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
checkOnlyOnePixel = 1;
}
@@ -1041,14 +1079,14 @@ int test_read_image_set_1D(cl_device_id device, cl_context context,
{
KernelSourcePattern = read1DKernelSourcePattern;
}
- sprintf( programSrc,
- KernelSourcePattern,
- samplerArg, get_explicit_type_name( outputType ),
- gTestMipmaps ? ", float lod" : "",
- samplerVar,
+ sprintf(programSrc, KernelSourcePattern,
+ gTestMipmaps
+ ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable"
+ : "",
+ samplerArg, get_explicit_type_name(outputType),
+ gTestMipmaps ? ", float lod" : "", samplerVar,
floatCoords ? float1DKernelSource : int1DCoordKernelSource,
- readFormat,
- gTestMipmaps ? ", lod" : "" );
+ readFormat, gTestMipmaps ? ", lod" : "");
ptr = programSrc;
diff --git a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
index b3287ded..a8009420 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
@@ -16,32 +16,37 @@
#include "test_common.h"
#include <float.h>
+#include <algorithm>
+
#if defined( __APPLE__ )
#include <signal.h>
#include <sys/signal.h>
#include <setjmp.h>
#endif
-
const char *read1DArrayKernelSourcePattern =
-"__kernel void sample_kernel( read_only image1d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global %s4 *results %s)\n"
-"{\n"
-"%s"
-" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
-"%s"
-"%s"
-" results[offset] = read_image%s( input, imageSampler, coords %s);\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( read_only image1d_array_t input,%s __global "
+ "float *xOffsets, __global float *yOffsets, __global %s4 *results %s)\n"
+ "{\n"
+ "%s"
+ " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+ "%s"
+ "%s"
+ " results[offset] = read_image%s( input, imageSampler, coords %s);\n"
+ "}";
const char *read_write1DArrayKernelSourcePattern =
-"__kernel void sample_kernel( read_write image1d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global %s4 *results %s )\n"
-"{\n"
-"%s"
-" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
-"%s"
-"%s"
-" results[offset] = read_image%s( input, coords %s);\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( read_write image1d_array_t input,%s __global "
+ "float *xOffsets, __global float *yOffsets, __global %s4 *results %s )\n"
+ "{\n"
+ "%s"
+ " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+ "%s"
+ "%s"
+ " results[offset] = read_image%s( input, coords %s);\n"
+ "}";
const char *offset1DArrayKernelSource =
" int offset = tidY*get_image_width(input) + tidX;\n";
@@ -577,12 +582,15 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
int checkOnlyOnePixel = 0;
int found_pixel = 0;
float offset = NORM_OFFSET;
- if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+ if (!imageSampler->normalized_coords
+ || imageSampler->filter_mode != CL_FILTER_NEAREST
+ || NORM_OFFSET == 0
#if defined( __APPLE__ )
- // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
- || gDeviceType != CL_DEVICE_TYPE_GPU
+ // Apple requires its CPU implementation to do correctly
+ // rounded address arithmetic in all modes
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
#endif
- )
+ )
offset = 0.0f; // Loop only once
for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
@@ -646,7 +654,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
checkOnlyOnePixel = 1;
@@ -745,12 +756,15 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
int checkOnlyOnePixel = 0;
int found_pixel = 0;
float offset = NORM_OFFSET;
- if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+ if (!imageSampler->normalized_coords
+ || imageSampler->filter_mode != CL_FILTER_NEAREST
+ || NORM_OFFSET == 0
#if defined( __APPLE__ )
- // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
- || gDeviceType != CL_DEVICE_TYPE_GPU
+ // Apple requires its CPU implementation to do correctly
+ // rounded address arithmetic in all modes
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
#endif
- )
+ )
offset = 0.0f; // Loop only once
for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
@@ -772,10 +786,14 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
- float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
- float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
- float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
- float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+ float maxErr1 =
+ std::max(maxErr * maxPixel.p[0], FLT_MIN);
+ float maxErr2 =
+ std::max(maxErr * maxPixel.p[1], FLT_MIN);
+ float maxErr3 =
+ std::max(maxErr * maxPixel.p[2], FLT_MIN);
+ float maxErr4 =
+ std::max(maxErr * maxPixel.p[3], FLT_MIN);
// Check if the result matches.
if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
@@ -819,7 +837,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
checkOnlyOnePixel = 1;
@@ -838,10 +859,14 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
ABS_ERROR(resultPtr[2], expected[2]);
float err4 =
ABS_ERROR(resultPtr[3], expected[3]);
- float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
- float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
- float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
- float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+ float maxErr1 =
+ std::max(maxErr * maxPixel.p[0], FLT_MIN);
+ float maxErr2 =
+ std::max(maxErr * maxPixel.p[1], FLT_MIN);
+ float maxErr3 =
+ std::max(maxErr * maxPixel.p[2], FLT_MIN);
+ float maxErr4 =
+ std::max(maxErr * maxPixel.p[3], FLT_MIN);
if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
@@ -926,7 +951,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
checkOnlyOnePixel = 1;
@@ -956,7 +984,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
checkOnlyOnePixel = 1;
@@ -1012,7 +1043,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
checkOnlyOnePixel = 1;
@@ -1042,7 +1076,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
checkOnlyOnePixel = 1;
@@ -1147,15 +1184,15 @@ int test_read_image_set_1D_array(cl_device_id device, cl_context context,
KernelSourcePattern = read_write1DArrayKernelSourcePattern;
}
- sprintf( programSrc,
- KernelSourcePattern,
- samplerArg, get_explicit_type_name( outputType ),
- gTestMipmaps ? ", float lod" : "",
- samplerVar,
- gTestMipmaps ? offset1DArrayLodKernelSource : offset1DArrayKernelSource,
- floatCoords ? floatKernelSource1DArray : intCoordKernelSource1DArray,
- readFormat,
- gTestMipmaps ? ", lod" : "" );
+ sprintf(
+ programSrc, KernelSourcePattern,
+ gTestMipmaps ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable"
+ : "",
+ samplerArg, get_explicit_type_name(outputType),
+ gTestMipmaps ? ", float lod" : "", samplerVar,
+ gTestMipmaps ? offset1DArrayLodKernelSource : offset1DArrayKernelSource,
+ floatCoords ? floatKernelSource1DArray : intCoordKernelSource1DArray,
+ readFormat, gTestMipmaps ? ", lod" : "");
ptr = programSrc;
error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
index 7cb334b2..533a0fe8 100644
--- a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
@@ -16,6 +16,8 @@
#include "test_common.h"
#include <float.h>
+#include <algorithm>
+
// Utility function to clamp down image sizes for certain tests to avoid
// using too much memory.
static size_t reduceImageSizeRange(size_t maxDimSize) {
@@ -39,24 +41,32 @@ static size_t reduceImageDepth(size_t maxDepth) {
}
const char *read2DArrayKernelSourcePattern =
-"__kernel void sample_kernel( read_only %s input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets, __global %s%s *results %s )\n"
-"{\n"
-"%s"
-" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
-"%s"
-"%s"
-" results[offset] = read_image%s( input, imageSampler, coords %s);\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( read_only %s input,%s __global float "
+ "*xOffsets, __global float *yOffsets, __global float *zOffsets, __global "
+ "%s%s *results %s )\n"
+ "{\n"
+ "%s"
+ " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
+ "get_global_id(2);\n"
+ "%s"
+ "%s"
+ " results[offset] = read_image%s( input, imageSampler, coords %s);\n"
+ "}";
const char *read_write2DArrayKernelSourcePattern =
-"__kernel void sample_kernel( read_write %s input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets, __global %s%s *results %s)\n"
-"{\n"
-"%s"
-" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
-"%s"
-"%s"
-" results[offset] = read_image%s( input, coords %s);\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( read_write %s input,%s __global float "
+ "*xOffsets, __global float *yOffsets, __global float *zOffsets, __global "
+ "%s%s *results %s)\n"
+ "{\n"
+ "%s"
+ " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
+ "get_global_id(2);\n"
+ "%s"
+ "%s"
+ " results[offset] = read_image%s( input, coords %s);\n"
+ "}";
const char* offset2DarraySource =" int offset = tidZ*get_image_width(input)*get_image_height(input) + tidY*get_image_width(input) + tidX;\n";
const char* offset2DarraySourceLod =
@@ -595,12 +605,15 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
int checkOnlyOnePixel = 0;
int found_pixel = 0;
float offset = NORM_OFFSET;
- if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+ if (!imageSampler->normalized_coords
+ || imageSampler->filter_mode != CL_FILTER_NEAREST
+ || NORM_OFFSET == 0
#if defined( __APPLE__ )
- // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
- || gDeviceType != CL_DEVICE_TYPE_GPU
+ // Apple requires its CPU implementation to do
+ // correctly rounded address arithmetic in all modes
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
#endif
- )
+ )
offset = 0.0f; // Loop only once
for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
@@ -617,7 +630,8 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
ABS_ERROR(resultPtr[0], expected[0]);
// Clamp to the minimum absolute error for the format
if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
- float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+ float maxErr1 = std::max(
+ maxErr * maxPixel.p[0], FLT_MIN);
if( ! (err1 <= maxErr1) )
{
@@ -661,7 +675,8 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
float err1 = ABS_ERROR(resultPtr[0],
expected[0]);
- float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+ float maxErr1 = std::max(
+ maxErr * maxPixel.p[0], FLT_MIN);
if( ! (err1 <= maxErr1) )
@@ -734,12 +749,15 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
int checkOnlyOnePixel = 0;
int found_pixel = 0;
float offset = NORM_OFFSET;
- if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+ if (!imageSampler->normalized_coords
+ || imageSampler->filter_mode != CL_FILTER_NEAREST
+ || NORM_OFFSET == 0
#if defined( __APPLE__ )
- // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
- || gDeviceType != CL_DEVICE_TYPE_GPU
+ // Apple requires its CPU implementation to do
+ // correctly rounded address arithmetic in all modes
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
#endif
- )
+ )
offset = 0.0f; // Loop only once
for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
@@ -911,12 +929,15 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
int checkOnlyOnePixel = 0;
int found_pixel = 0;
float offset = NORM_OFFSET;
- if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+ if (!imageSampler->normalized_coords
+ || imageSampler->filter_mode != CL_FILTER_NEAREST
+ || NORM_OFFSET == 0
#if defined( __APPLE__ )
- // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
- || gDeviceType != CL_DEVICE_TYPE_GPU
+ // Apple requires its CPU implementation to do
+ // correctly rounded address arithmetic in all modes
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
#endif
- )
+ )
offset = 0.0f; // Loop only once
for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
@@ -942,10 +963,14 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
- float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
- float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
- float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
- float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+ float maxErr1 = std::max(
+ maxErr * maxPixel.p[0], FLT_MIN);
+ float maxErr2 = std::max(
+ maxErr * maxPixel.p[1], FLT_MIN);
+ float maxErr3 = std::max(
+ maxErr * maxPixel.p[2], FLT_MIN);
+ float maxErr4 = std::max(
+ maxErr * maxPixel.p[3], FLT_MIN);
if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
{
@@ -1004,10 +1029,14 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
expected[2]);
float err4 = ABS_ERROR(resultPtr[3],
expected[3]);
- float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
- float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
- float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
- float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+ float maxErr1 = std::max(
+ maxErr * maxPixel.p[0], FLT_MIN);
+ float maxErr2 = std::max(
+ maxErr * maxPixel.p[1], FLT_MIN);
+ float maxErr3 = std::max(
+ maxErr * maxPixel.p[2], FLT_MIN);
+ float maxErr4 = std::max(
+ maxErr * maxPixel.p[3], FLT_MIN);
if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
@@ -1096,7 +1125,10 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
norm_offset_z = 0.0f;
@@ -1135,7 +1167,11 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType
+ & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
norm_offset_z = 0.0f;
@@ -1204,7 +1240,10 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
norm_offset_z = 0.0f;
@@ -1243,7 +1282,11 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
// If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
// E.g., test one pixel.
- if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0 || NORM_OFFSET == 0 || NORM_OFFSET == 0) {
+ if (!imageSampler->normalized_coords
+ || !(gDeviceType
+ & CL_DEVICE_TYPE_GPU)
+ || NORM_OFFSET == 0)
+ {
norm_offset_x = 0.0f;
norm_offset_y = 0.0f;
norm_offset_z = 0.0f;
@@ -1377,17 +1420,16 @@ int test_read_image_set_2D_array(cl_device_id device, cl_context context,
}
// Construct the source
- sprintf( programSrc,
- KernelSourcePattern,
- imageType,
- samplerArg, get_explicit_type_name( outputType ),
- imageElement,
- gTestMipmaps ? ", float lod" : " ",
- samplerVar,
+ sprintf(programSrc, KernelSourcePattern,
+ gTestMipmaps
+ ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable"
+ : "",
+ imageType, samplerArg, get_explicit_type_name(outputType),
+ imageElement, gTestMipmaps ? ", float lod" : " ", samplerVar,
gTestMipmaps ? offset2DarraySourceLod : offset2DarraySource,
- floatCoords ? float2DArrayUnnormalizedCoordKernelSource : int2DArrayCoordKernelSource,
- readFormat,
- gTestMipmaps ? ", lod" : " " );
+ floatCoords ? float2DArrayUnnormalizedCoordKernelSource
+ : int2DArrayCoordKernelSource,
+ readFormat, gTestMipmaps ? ", lod" : " ");
ptr = programSrc;
error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/kernel_read_write/test_read_3D.cpp b/test_conformance/images/kernel_read_write/test_read_3D.cpp
index 860114fb..cec77bf0 100644
--- a/test_conformance/images/kernel_read_write/test_read_3D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_3D.cpp
@@ -36,24 +36,32 @@ static size_t reduceImageDepth(size_t maxDimSize, RandomSeed& seed) {
const char *read3DKernelSourcePattern =
-"__kernel void sample_kernel( read_only image3d_t input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets, __global %s4 *results %s)\n"
-"{\n"
-"%s"
-" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
-"%s"
-"%s"
-" results[offset] = read_image%s( input, imageSampler, coords %s);\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( read_only image3d_t input,%s __global float "
+ "*xOffsets, __global float *yOffsets, __global float *zOffsets, __global "
+ "%s4 *results %s)\n"
+ "{\n"
+ "%s"
+ " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
+ "get_global_id(2);\n"
+ "%s"
+ "%s"
+ " results[offset] = read_image%s( input, imageSampler, coords %s);\n"
+ "}";
const char *read_write3DKernelSourcePattern =
-"__kernel void sample_kernel( read_write image3d_t input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets, __global %s4 *results %s)\n"
-"{\n"
-"%s"
-" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
-"%s"
-"%s"
-" results[offset] = read_image%s( input, coords %s);\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( read_write image3d_t input,%s __global float "
+ "*xOffsets, __global float *yOffsets, __global float *zOffsets, __global "
+ "%s4 *results %s)\n"
+ "{\n"
+ "%s"
+ " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
+ "get_global_id(2);\n"
+ "%s"
+ "%s"
+ " results[offset] = read_image%s( input, coords %s);\n"
+ "}";
const char *offset3DKernelSource =
" int offset = tidZ*get_image_width(input)*get_image_height(input) + tidY*get_image_width(input) + tidX;\n";
@@ -137,15 +145,16 @@ int test_read_image_set_3D(cl_device_id device, cl_context context,
KernelSourcePattern = read_write3DKernelSourcePattern;
}
- sprintf( programSrc,
- KernelSourcePattern,
- samplerArg, get_explicit_type_name( outputType ),
- gTestMipmaps? ", float lod": " ",
- samplerVar,
- gTestMipmaps? offset3DLodKernelSource: offset3DKernelSource,
- floatCoords ? float3DUnnormalizedCoordKernelSource : int3DCoordKernelSource,
- readFormat,
- gTestMipmaps? ",lod":" ");
+ sprintf(programSrc, KernelSourcePattern,
+ gTestMipmaps
+ ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable"
+ : "",
+ samplerArg, get_explicit_type_name(outputType),
+ gTestMipmaps ? ", float lod" : " ", samplerVar,
+ gTestMipmaps ? offset3DLodKernelSource : offset3DKernelSource,
+ floatCoords ? float3DUnnormalizedCoordKernelSource
+ : int3DCoordKernelSource,
+ readFormat, gTestMipmaps ? ",lod" : " ");
ptr = programSrc;
error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/kernel_read_write/test_write_1D.cpp b/test_conformance/images/kernel_read_write/test_write_1D.cpp
index 41983edf..5f726796 100644
--- a/test_conformance/images/kernel_read_write/test_write_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D.cpp
@@ -14,6 +14,7 @@
// limitations under the License.
//
#include "../testBase.h"
+#include "test_common.h"
#if !defined(_WIN32)
#include <sys/mman.h>
@@ -26,20 +27,24 @@ extern bool validate_float_write_results( float *expected, float *actual, image_
extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor* imageInfo );
const char *readwrite1DKernelSourcePattern =
-"__kernel void sample_kernel( __global %s4 *input, read_write image1d_t output %s)\n"
-"{\n"
-" int tidX = get_global_id(0);\n"
-" int offset = tidX;\n"
-" write_image%s( output, tidX %s, input[ offset ]);\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( __global %s4 *input, read_write image1d_t "
+ "output %s)\n"
+ "{\n"
+ " int tidX = get_global_id(0);\n"
+ " int offset = tidX;\n"
+ " write_image%s( output, tidX %s, input[ offset ]);\n"
+ "}";
const char *write1DKernelSourcePattern =
-"__kernel void sample_kernel( __global %s4 *input, write_only image1d_t output %s)\n"
-"{\n"
-" int tidX = get_global_id(0);\n"
-" int offset = tidX;\n"
-" write_image%s( output, tidX %s, input[ offset ]);\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( __global %s4 *input, write_only image1d_t "
+ "output %s)\n"
+ "{\n"
+ " int tidX = get_global_id(0);\n"
+ " int offset = tidX;\n"
+ " write_image%s( output, tidX %s, input[ offset ]);\n"
+ "}";
int test_write_image_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
@@ -395,6 +400,8 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que
}
else
{
+ filter_undefined_bits(imageInfo, resultPtr);
+
// Exact result passes every time
if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
{
@@ -403,21 +410,8 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que
float errors[4] = {NAN, NAN, NAN, NAN};
pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
- // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
- if( 0 == forceCorrectlyRoundedWrites &&
- (
- imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
- imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
- imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
- imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
- imageInfo->format->image_channel_data_type == CL_SNORM_INT16
- ))
- {
- if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
- ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
- failure = 0;
- }
-
+ failure = filter_rounding_errors(
+ forceCorrectlyRoundedWrites, imageInfo, errors);
if( failure )
{
@@ -458,6 +452,56 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que
log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
+ case CL_UNORM_SHORT_565: {
+ cl_uint *ref_value =
+ (cl_uint *)resultBuffer;
+ cl_uint *test_value =
+ (cl_uint *)resultPtr;
+
+ log_error(" Expected: 0x%2.2x Actual: "
+ "0x%2.2x \n",
+ ref_value[0], test_value[0]);
+
+ log_error(" Expected: 0x%2.2x "
+ "0x%2.2x 0x%2.2x \n",
+ ref_value[0] & 0x1F,
+ (ref_value[0] >> 5) & 0x3F,
+ (ref_value[0] >> 11) & 0x1F);
+ log_error(" Actual: 0x%2.2x "
+ "0x%2.2x 0x%2.2x \n",
+ test_value[0] & 0x1F,
+ (test_value[0] >> 5) & 0x3F,
+ (test_value[0] >> 11) & 0x1F);
+ log_error(" Error: %f %f %f %f\n",
+ errors[0], errors[1],
+ errors[2]);
+ break;
+ }
+ case CL_UNORM_SHORT_555: {
+ cl_uint *ref_value =
+ (cl_uint *)resultBuffer;
+ cl_uint *test_value =
+ (cl_uint *)resultPtr;
+
+ log_error(" Expected: 0x%2.2x Actual: "
+ "0x%2.2x \n",
+ ref_value[0], test_value[0]);
+
+ log_error(" Expected: 0x%2.2x "
+ "0x%2.2x 0x%2.2x \n",
+ ref_value[0] & 0x1F,
+ (ref_value[0] >> 5) & 0x1F,
+ (ref_value[0] >> 10) & 0x1F);
+ log_error(" Actual: 0x%2.2x "
+ "0x%2.2x 0x%2.2x \n",
+ test_value[0] & 0x1F,
+ (test_value[0] >> 5) & 0x1F,
+ (test_value[0] >> 10) & 0x1F);
+ log_error(" Error: %f %f %f %f\n",
+ errors[0], errors[1],
+ errors[2]);
+ break;
+ }
case CL_UNORM_INT16:
case CL_SNORM_INT16:
case CL_UNSIGNED_INT16:
@@ -574,12 +618,14 @@ int test_write_image_1D_set(cl_device_id device, cl_context context,
KernelSourcePattern = readwrite1DKernelSourcePattern;
}
- sprintf( programSrc,
- KernelSourcePattern,
- get_explicit_type_name( inputType ),
- gTestMipmaps ? ", int lod" : "",
- readFormat,
- gTestMipmaps ? ", lod" :"" );
+ sprintf(
+ programSrc, KernelSourcePattern,
+ gTestMipmaps
+ ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma "
+ "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable"
+ : "",
+ get_explicit_type_name(inputType), gTestMipmaps ? ", int lod" : "",
+ readFormat, gTestMipmaps ? ", lod" : "");
ptr = programSrc;
error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
index c771704c..f9024405 100644
--- a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
@@ -14,6 +14,7 @@
// limitations under the License.
//
#include "../testBase.h"
+#include "test_common.h"
#if !defined(_WIN32)
#include <sys/mman.h>
@@ -26,20 +27,24 @@ extern bool validate_float_write_results( float *expected, float *actual, image_
extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
const char *readwrite1DArrayKernelSourcePattern =
-"__kernel void sample_kernel( __global %s4 *input, read_write image1d_array_t output %s)\n"
-"{\n"
-" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
-"%s"
-" write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ]);\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( __global %s4 *input, read_write "
+ "image1d_array_t output %s)\n"
+ "{\n"
+ " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+ "%s"
+ " write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ]);\n"
+ "}";
const char *write1DArrayKernelSourcePattern =
-"__kernel void sample_kernel( __global %s4 *input, write_only image1d_array_t output %s)\n"
-"{\n"
-" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
-"%s"
-" write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( __global %s4 *input, write_only "
+ "image1d_array_t output %s)\n"
+ "{\n"
+ " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+ "%s"
+ " write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
+ "}";
const char *offset1DArraySource =
" int offset = tidY*get_image_width(output) + tidX;\n";
@@ -415,6 +420,9 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma
}
else
{
+
+ filter_undefined_bits(imageInfo, resultPtr);
+
// Exact result passes every time
if( memcmp( resultBuffer, resultPtr, pixelSize ) != 0 )
{
@@ -423,21 +431,8 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma
float errors[4] = {NAN, NAN, NAN, NAN};
pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
- // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
- if( 0 == forceCorrectlyRoundedWrites &&
- (
- imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
- imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
- imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
- imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
- imageInfo->format->image_channel_data_type == CL_SNORM_INT16
- ))
- {
- if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
- ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
- failure = 0;
- }
-
+ failure = filter_rounding_errors(
+ forceCorrectlyRoundedWrites, imageInfo, errors);
if( failure )
{
@@ -478,6 +473,56 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma
log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
+ case CL_UNORM_SHORT_565: {
+ cl_uint *ref_value =
+ (cl_uint *)resultBuffer;
+ cl_uint *test_value =
+ (cl_uint *)resultPtr;
+
+ log_error(" Expected: 0x%2.2x Actual: "
+ "0x%2.2x \n",
+ ref_value[0], test_value[0]);
+
+ log_error(" Expected: 0x%2.2x "
+ "0x%2.2x 0x%2.2x \n",
+ ref_value[0] & 0x1F,
+ (ref_value[0] >> 5) & 0x3F,
+ (ref_value[0] >> 11) & 0x1F);
+ log_error(" Actual: 0x%2.2x "
+ "0x%2.2x 0x%2.2x \n",
+ test_value[0] & 0x1F,
+ (test_value[0] >> 5) & 0x3F,
+ (test_value[0] >> 11) & 0x1F);
+ log_error(" Error: %f %f %f %f\n",
+ errors[0], errors[1],
+ errors[2]);
+ break;
+ }
+ case CL_UNORM_SHORT_555: {
+ cl_uint *ref_value =
+ (cl_uint *)resultBuffer;
+ cl_uint *test_value =
+ (cl_uint *)resultPtr;
+
+ log_error(" Expected: 0x%2.2x Actual: "
+ "0x%2.2x \n",
+ ref_value[0], test_value[0]);
+
+ log_error(" Expected: 0x%2.2x "
+ "0x%2.2x 0x%2.2x \n",
+ ref_value[0] & 0x1F,
+ (ref_value[0] >> 5) & 0x1F,
+ (ref_value[0] >> 10) & 0x1F);
+ log_error(" Actual: 0x%2.2x "
+ "0x%2.2x 0x%2.2x \n",
+ test_value[0] & 0x1F,
+ (test_value[0] >> 5) & 0x1F,
+ (test_value[0] >> 10) & 0x1F);
+ log_error(" Error: %f %f %f %f\n",
+ errors[0], errors[1],
+ errors[2]);
+ break;
+ }
case CL_UNORM_INT16:
case CL_SNORM_INT16:
case CL_UNSIGNED_INT16:
@@ -596,13 +641,15 @@ int test_write_image_1D_array_set(cl_device_id device, cl_context context,
}
// Construct the source
// Construct the source
- sprintf( programSrc,
- KernelSourcePattern,
- get_explicit_type_name( inputType ),
- gTestMipmaps ? ", int lod" : "",
- gTestMipmaps ? offset1DArrayLodSource : offset1DArraySource,
- readFormat,
- gTestMipmaps ? ", lod" :"" );
+ sprintf(
+ programSrc, KernelSourcePattern,
+ gTestMipmaps
+ ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma "
+ "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable"
+ : "",
+ get_explicit_type_name(inputType), gTestMipmaps ? ", int lod" : "",
+ gTestMipmaps ? offset1DArrayLodSource : offset1DArraySource, readFormat,
+ gTestMipmaps ? ", lod" : "");
ptr = programSrc;
error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
index 08a7a803..c1c56994 100644
--- a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
@@ -14,6 +14,7 @@
// limitations under the License.
//
#include "../testBase.h"
+#include "test_common.h"
#if !defined(_WIN32)
#include <sys/mman.h>
@@ -48,20 +49,28 @@ static size_t reduceImageDepth(size_t maxDepth) {
}
const char *write2DArrayKernelSourcePattern =
-"__kernel void sample_kernel( __global %s%s *input, write_only %s output %s)\n"
-"{\n"
-" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
-"%s"
-" write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( __global %s%s *input, write_only %s output "
+ "%s)\n"
+ "{\n"
+ " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
+ "get_global_id(2);\n"
+ "%s"
+ " write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset "
+ "]);\n"
+ "}";
const char *readwrite2DArrayKernelSourcePattern =
-"__kernel void sample_kernel( __global %s%s *input, read_write %s output %s)\n"
-"{\n"
-" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
-"%s"
-" write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ] );\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( __global %s%s *input, read_write %s output "
+ "%s)\n"
+ "{\n"
+ " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
+ "get_global_id(2);\n"
+ "%s"
+ " write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset "
+ "] );\n"
+ "}";
const char *offset2DArrayKernelSource =
" int offset = tidZ*get_image_width(output)*get_image_height(output) + tidY*get_image_width(output) + tidX;\n";
@@ -438,6 +447,9 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma
}
else
{
+
+ filter_undefined_bits(imageInfo, resultPtr);
+
// Exact result passes every time
if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
{
@@ -446,21 +458,9 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma
float errors[4] = {NAN, NAN, NAN, NAN};
pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
- // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
- if( 0 == forceCorrectlyRoundedWrites &&
- (
- imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
- imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
- imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
- imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
- imageInfo->format->image_channel_data_type == CL_SNORM_INT16
- ))
- {
- if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
- ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
- failure = 0;
- }
-
+ failure = filter_rounding_errors(
+ forceCorrectlyRoundedWrites, imageInfo,
+ errors);
if( failure )
{
@@ -501,6 +501,64 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma
log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
+ case CL_UNORM_SHORT_565: {
+ cl_uint *ref_value =
+ (cl_uint *)resultBuffer;
+ cl_uint *test_value =
+ (cl_uint *)resultPtr;
+
+ log_error(" Expected: 0x%2.2x "
+ "Actual: 0x%2.2x \n",
+ ref_value[0],
+ test_value[0]);
+
+ log_error(
+ " Expected: 0x%2.2x 0x%2.2x "
+ "0x%2.2x \n",
+ ref_value[0] & 0x1F,
+ (ref_value[0] >> 5) & 0x3F,
+ (ref_value[0] >> 11) & 0x1F);
+ log_error(
+ " Actual: 0x%2.2x 0x%2.2x "
+ "0x%2.2x \n",
+ test_value[0] & 0x1F,
+ (test_value[0] >> 5) & 0x3F,
+ (test_value[0] >> 11) & 0x1F);
+ log_error(
+ " Error: %f %f %f %f\n",
+ errors[0], errors[1],
+ errors[2]);
+ break;
+ }
+ case CL_UNORM_SHORT_555: {
+ cl_uint *ref_value =
+ (cl_uint *)resultBuffer;
+ cl_uint *test_value =
+ (cl_uint *)resultPtr;
+
+ log_error(" Expected: 0x%2.2x "
+ "Actual: 0x%2.2x \n",
+ ref_value[0],
+ test_value[0]);
+
+ log_error(
+ " Expected: 0x%2.2x 0x%2.2x "
+ "0x%2.2x \n",
+ ref_value[0] & 0x1F,
+ (ref_value[0] >> 5) & 0x1F,
+ (ref_value[0] >> 10) & 0x1F);
+ log_error(
+ " Actual: 0x%2.2x 0x%2.2x "
+ "0x%2.2x \n",
+ test_value[0] & 0x1F,
+ (test_value[0] >> 5) & 0x1F,
+ (test_value[0] >> 10) & 0x1F);
+ log_error(
+ " Error: %f %f %f %f\n",
+ errors[0], errors[1],
+ errors[2]);
+ break;
+ }
case CL_UNORM_INT16:
case CL_SNORM_INT16:
case CL_UNSIGNED_INT16:
@@ -621,15 +679,19 @@ int test_write_image_2D_array_set(cl_device_id device, cl_context context,
}
// Construct the source
// Construct the source
- sprintf( programSrc,
- KernelSourcePattern,
- get_explicit_type_name( inputType ),
- (format->image_channel_order == CL_DEPTH) ? "" : "4",
- (format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t" : "image2d_array_t",
- gTestMipmaps ? " , int lod" : "",
- gTestMipmaps ? offset2DArrayLodKernelSource : offset2DArrayKernelSource,
- readFormat,
- gTestMipmaps ? ", lod" : "" );
+ sprintf(
+ programSrc, KernelSourcePattern,
+ gTestMipmaps
+ ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma "
+ "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable"
+ : "",
+ get_explicit_type_name(inputType),
+ (format->image_channel_order == CL_DEPTH) ? "" : "4",
+ (format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t"
+ : "image2d_array_t",
+ gTestMipmaps ? " , int lod" : "",
+ gTestMipmaps ? offset2DArrayLodKernelSource : offset2DArrayKernelSource,
+ readFormat, gTestMipmaps ? ", lod" : "");
ptr = programSrc;
error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/kernel_read_write/test_write_3D.cpp b/test_conformance/images/kernel_read_write/test_write_3D.cpp
index 5cc96bb4..9da93695 100644
--- a/test_conformance/images/kernel_read_write/test_write_3D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_3D.cpp
@@ -14,6 +14,7 @@
// limitations under the License.
//
#include "../testBase.h"
+#include "test_common.h"
#if !defined(_WIN32)
#include <sys/mman.h>
@@ -45,22 +46,30 @@ static size_t reduceImageDepth(size_t maxDimSize, MTdata& seed) {
const char *write3DKernelSourcePattern =
-"%s"
-"__kernel void sample_kernel( __global %s4 *input, write_only image3d_t output %s )\n"
-"{\n"
-" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
-"%s"
-" write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
-"}";
+ "%s"
+ "%s\n"
+ "__kernel void sample_kernel( __global %s4 *input, write_only image3d_t "
+ "output %s )\n"
+ "{\n"
+ " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
+ "get_global_id(2);\n"
+ "%s"
+ " write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset "
+ "]);\n"
+ "}";
const char *readwrite3DKernelSourcePattern =
-"%s"
-"__kernel void sample_kernel( __global %s4 *input, read_write image3d_t output %s )\n"
-"{\n"
-" int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
-"%s"
-" write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
-"}";
+ "%s"
+ "%s\n"
+ "__kernel void sample_kernel( __global %s4 *input, read_write image3d_t "
+ "output %s )\n"
+ "{\n"
+ " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
+ "get_global_id(2);\n"
+ "%s"
+ " write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset "
+ "]);\n"
+ "}";
const char *khr3DWritesPragma =
"#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
@@ -445,6 +454,9 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que
}
else
{
+
+ filter_undefined_bits(imageInfo, resultPtr);
+
// Exact result passes every time
if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
{
@@ -453,21 +465,9 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que
float errors[4] = {NAN, NAN, NAN, NAN};
pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
- // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
- if( 0 == forceCorrectlyRoundedWrites &&
- (
- imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
- imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
- imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
- imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
- imageInfo->format->image_channel_data_type == CL_SNORM_INT16
- ))
- {
- if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
- ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
- failure = 0;
- }
-
+ failure = filter_rounding_errors(
+ forceCorrectlyRoundedWrites, imageInfo,
+ errors);
if( failure )
{
@@ -508,6 +508,64 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que
log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
+ case CL_UNORM_SHORT_565: {
+ cl_uint *ref_value =
+ (cl_uint *)resultBuffer;
+ cl_uint *test_value =
+ (cl_uint *)resultPtr;
+
+ log_error(" Expected: 0x%2.2x "
+ "Actual: 0x%2.2x \n",
+ ref_value[0],
+ test_value[0]);
+
+ log_error(
+ " Expected: 0x%2.2x 0x%2.2x "
+ "0x%2.2x \n",
+ ref_value[0] & 0x1F,
+ (ref_value[0] >> 5) & 0x3F,
+ (ref_value[0] >> 11) & 0x1F);
+ log_error(
+ " Actual: 0x%2.2x 0x%2.2x "
+ "0x%2.2x \n",
+ test_value[0] & 0x1F,
+ (test_value[0] >> 5) & 0x3F,
+ (test_value[0] >> 11) & 0x1F);
+ log_error(
+ " Error: %f %f %f %f\n",
+ errors[0], errors[1],
+ errors[2]);
+ break;
+ }
+ case CL_UNORM_SHORT_555: {
+ cl_uint *ref_value =
+ (cl_uint *)resultBuffer;
+ cl_uint *test_value =
+ (cl_uint *)resultPtr;
+
+ log_error(" Expected: 0x%2.2x "
+ "Actual: 0x%2.2x \n",
+ ref_value[0],
+ test_value[0]);
+
+ log_error(
+ " Expected: 0x%2.2x 0x%2.2x "
+ "0x%2.2x \n",
+ ref_value[0] & 0x1F,
+ (ref_value[0] >> 5) & 0x1F,
+ (ref_value[0] >> 10) & 0x1F);
+ log_error(
+ " Actual: 0x%2.2x 0x%2.2x "
+ "0x%2.2x \n",
+ test_value[0] & 0x1F,
+ (test_value[0] >> 5) & 0x1F,
+ (test_value[0] >> 10) & 0x1F);
+ log_error(
+ " Error: %f %f %f %f\n",
+ errors[0], errors[1],
+ errors[2]);
+ break;
+ }
case CL_UNORM_INT16:
case CL_SNORM_INT16:
case CL_UNSIGNED_INT16:
@@ -628,14 +686,15 @@ int test_write_image_3D_set(cl_device_id device, cl_context context,
}
// Construct the source
- sprintf( programSrc,
- KernelSourcePattern,
- gTestMipmaps ? "" : khr3DWritesPragma,
- get_explicit_type_name( inputType ),
- gTestMipmaps ? ", int lod" : "",
- gTestMipmaps ? offset3DLodSource : offset3DSource,
- readFormat,
- gTestMipmaps ? ", lod" : "" );
+ sprintf(
+ programSrc, KernelSourcePattern, khr3DWritesPragma,
+ gTestMipmaps
+ ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma "
+ "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable"
+ : "",
+ get_explicit_type_name(inputType), gTestMipmaps ? ", int lod" : "",
+ gTestMipmaps ? offset3DLodSource : offset3DSource, readFormat,
+ gTestMipmaps ? ", lod" : "");
ptr = programSrc;
error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/kernel_read_write/test_write_image.cpp b/test_conformance/images/kernel_read_write/test_write_image.cpp
index e40e80d6..29626971 100644
--- a/test_conformance/images/kernel_read_write/test_write_image.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_image.cpp
@@ -14,6 +14,7 @@
// limitations under the License.
//
#include "../testBase.h"
+#include "test_common.h"
#if !defined(_WIN32)
#include <sys/mman.h>
@@ -46,20 +47,24 @@ extern bool validate_float_write_results( float *expected, float *actual, image_
extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
const char *writeKernelSourcePattern =
-"__kernel void sample_kernel( __global %s%s *input, write_only %s output %s)\n"
-"{\n"
-" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
-"%s"
-" write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( __global %s%s *input, write_only %s output "
+ "%s)\n"
+ "{\n"
+ " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+ "%s"
+ " write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
+ "}";
const char *read_writeKernelSourcePattern =
-"__kernel void sample_kernel( __global %s%s *input, read_write %s output %s)\n"
-"{\n"
-" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
-"%s"
-" write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ] );\n"
-"}";
+ "%s\n"
+ "__kernel void sample_kernel( __global %s%s *input, read_write %s output "
+ "%s)\n"
+ "{\n"
+ " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+ "%s"
+ " write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ] );\n"
+ "}";
const char *offset2DKernelSource =
" int offset = tidY*get_image_width(output) + tidX;\n";
@@ -477,6 +482,9 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue
}
else
{
+
+ filter_undefined_bits(imageInfo, resultPtr);
+
// Exact result passes every time
if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
{
@@ -485,21 +493,8 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue
float errors[4] = {NAN, NAN, NAN, NAN};
pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
- // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
- if( 0 == forceCorrectlyRoundedWrites &&
- (
- imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
- imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
- imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
- imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
- imageInfo->format->image_channel_data_type == CL_SNORM_INT16
- ))
- {
- if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
- ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
- failure = 0;
- }
-
+ failure = filter_rounding_errors(
+ forceCorrectlyRoundedWrites, imageInfo, errors);
if( failure )
{
@@ -577,6 +572,57 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue
log_error( " Actual: %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
break;
+ case CL_UNORM_SHORT_565: {
+ cl_uint *ref_value =
+ (cl_uint *)resultBuffer;
+ cl_uint *test_value =
+ (cl_uint *)resultPtr;
+
+ log_error(" Expected: 0x%2.2x Actual: "
+ "0x%2.2x \n",
+ ref_value[0], test_value[0]);
+
+ log_error(" Expected: 0x%2.2x "
+ "0x%2.2x 0x%2.2x \n",
+ ref_value[0] & 0x1F,
+ (ref_value[0] >> 5) & 0x3F,
+ (ref_value[0] >> 11) & 0x1F);
+ log_error(" Actual: 0x%2.2x "
+ "0x%2.2x 0x%2.2x \n",
+ test_value[0] & 0x1F,
+ (test_value[0] >> 5) & 0x3F,
+ (test_value[0] >> 11) & 0x1F);
+ log_error(" Error: %f %f %f %f\n",
+ errors[0], errors[1],
+ errors[2]);
+ break;
+ }
+
+ case CL_UNORM_SHORT_555: {
+ cl_uint *ref_value =
+ (cl_uint *)resultBuffer;
+ cl_uint *test_value =
+ (cl_uint *)resultPtr;
+
+ log_error(" Expected: 0x%2.2x Actual: "
+ "0x%2.2x \n",
+ ref_value[0], test_value[0]);
+
+ log_error(" Expected: 0x%2.2x "
+ "0x%2.2x 0x%2.2x \n",
+ ref_value[0] & 0x1F,
+ (ref_value[0] >> 5) & 0x1F,
+ (ref_value[0] >> 10) & 0x1F);
+ log_error(" Actual: 0x%2.2x "
+ "0x%2.2x 0x%2.2x \n",
+ test_value[0] & 0x1F,
+ (test_value[0] >> 5) & 0x1F,
+ (test_value[0] >> 10) & 0x1F);
+ log_error(" Error: %f %f %f %f\n",
+ errors[0], errors[1],
+ errors[2]);
+ break;
+ }
}
float *v = (float *)(char *)imagePtr;
@@ -686,15 +732,19 @@ int test_write_image_set(cl_device_id device, cl_context context,
}
// Construct the source
- sprintf( programSrc,
- KernelSourcePattern,
- get_explicit_type_name( inputType ),
- (format->image_channel_order == CL_DEPTH) ? "" : "4",
- (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t",
- gTestMipmaps ? ", int lod" : "",
- gTestMipmaps ? offset2DLodKernelSource : offset2DKernelSource,
- readFormat,
- gTestMipmaps ? ", lod" : "" );
+ sprintf(
+ programSrc, KernelSourcePattern,
+ gTestMipmaps
+ ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma "
+ "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable"
+ : "",
+ get_explicit_type_name(inputType),
+ (format->image_channel_order == CL_DEPTH) ? "" : "4",
+ (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t"
+ : "image2d_t",
+ gTestMipmaps ? ", int lod" : "",
+ gTestMipmaps ? offset2DLodKernelSource : offset2DKernelSource,
+ readFormat, gTestMipmaps ? ", lod" : "");
ptr = programSrc;
error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/samplerlessReads/test_iterations.cpp b/test_conformance/images/samplerlessReads/test_iterations.cpp
index 55eaaf48..e2f89aad 100644
--- a/test_conformance/images/samplerlessReads/test_iterations.cpp
+++ b/test_conformance/images/samplerlessReads/test_iterations.cpp
@@ -215,6 +215,7 @@ int test_read_image_set_2D(cl_device_id device, cl_context context,
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
// Determine types
diff --git a/test_conformance/images/samplerlessReads/test_read_1D.cpp b/test_conformance/images/samplerlessReads/test_read_1D.cpp
index aa261b7e..6ed9910a 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D.cpp
@@ -215,6 +215,7 @@ int test_read_image_set_1D(cl_device_id device, cl_context context,
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
// Determine types
diff --git a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
index fb0c2632..677eb9f1 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
@@ -214,6 +214,7 @@ int test_read_image_set_1D_array(cl_device_id device, cl_context context,
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
// Determine types
diff --git a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
index 7a3084d3..c3a991a7 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
@@ -219,6 +219,7 @@ int test_read_image_set_1D_buffer(cl_device_id device, cl_context context,
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
// note: image_buffer test uses image1D for results validation.
diff --git a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
index 99f24266..8273f538 100644
--- a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
@@ -202,6 +202,7 @@ int test_read_image_set_2D_array(cl_device_id device, cl_context context,
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
// Determine types
diff --git a/test_conformance/images/samplerlessReads/test_read_3D.cpp b/test_conformance/images/samplerlessReads/test_read_3D.cpp
index cf411407..0df46c86 100644
--- a/test_conformance/images/samplerlessReads/test_read_3D.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_3D.cpp
@@ -206,6 +206,7 @@ int test_read_image_set_3D(cl_device_id device, cl_context context,
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
// Determine types
diff --git a/test_conformance/integer_ops/CMakeLists.txt b/test_conformance/integer_ops/CMakeLists.txt
index a045ef81..5344eabc 100644
--- a/test_conformance/integer_ops/CMakeLists.txt
+++ b/test_conformance/integer_ops/CMakeLists.txt
@@ -11,6 +11,7 @@ set(${MODULE_NAME}_SOURCES
test_unary_ops.cpp
verification_and_generation_functions.cpp
test_popcount.cpp
+ test_integer_dot_product.cpp
)
include(../CMakeCommon.txt)
diff --git a/test_conformance/integer_ops/main.cpp b/test_conformance/integer_ops/main.cpp
index 00e91661..e57cffd9 100644
--- a/test_conformance/integer_ops/main.cpp
+++ b/test_conformance/integer_ops/main.cpp
@@ -25,127 +25,129 @@
#endif
test_definition test_list[] = {
- ADD_TEST( integer_clz ),
- ADD_TEST_VERSION( integer_ctz, Version(2, 0)),
- ADD_TEST( integer_hadd ),
- ADD_TEST( integer_rhadd ),
- ADD_TEST( integer_mul_hi ),
- ADD_TEST( integer_rotate ),
- ADD_TEST( integer_clamp ),
- ADD_TEST( integer_mad_sat ),
- ADD_TEST( integer_mad_hi ),
- ADD_TEST( integer_min ),
- ADD_TEST( integer_max ),
- ADD_TEST( integer_upsample ),
-
- ADD_TEST( integer_abs ),
- ADD_TEST( integer_abs_diff ),
- ADD_TEST( integer_add_sat ),
- ADD_TEST( integer_sub_sat ),
-
- ADD_TEST( integer_addAssign ),
- ADD_TEST( integer_subtractAssign ),
- ADD_TEST( integer_multiplyAssign ),
- ADD_TEST( integer_divideAssign ),
- ADD_TEST( integer_moduloAssign ),
- ADD_TEST( integer_andAssign ),
- ADD_TEST( integer_orAssign ),
- ADD_TEST( integer_exclusiveOrAssign ),
-
- ADD_TEST( unary_ops_increment ),
- ADD_TEST( unary_ops_decrement ),
- ADD_TEST( unary_ops_full ),
-
- ADD_TEST( integer_mul24 ),
- ADD_TEST( integer_mad24 ),
-
- ADD_TEST( long_math ),
- ADD_TEST( long_logic ),
- ADD_TEST( long_shift ),
- ADD_TEST( long_compare ),
-
- ADD_TEST( ulong_math ),
- ADD_TEST( ulong_logic ),
- ADD_TEST( ulong_shift ),
- ADD_TEST( ulong_compare ),
-
- ADD_TEST( int_math ),
- ADD_TEST( int_logic ),
- ADD_TEST( int_shift ),
- ADD_TEST( int_compare ),
-
- ADD_TEST( uint_math ),
- ADD_TEST( uint_logic ),
- ADD_TEST( uint_shift ),
- ADD_TEST( uint_compare ),
-
- ADD_TEST( short_math ),
- ADD_TEST( short_logic ),
- ADD_TEST( short_shift ),
- ADD_TEST( short_compare ),
-
- ADD_TEST( ushort_math ),
- ADD_TEST( ushort_logic ),
- ADD_TEST( ushort_shift ),
- ADD_TEST( ushort_compare ),
-
- ADD_TEST( char_math ),
- ADD_TEST( char_logic ),
- ADD_TEST( char_shift ),
- ADD_TEST( char_compare ),
-
- ADD_TEST( uchar_math ),
- ADD_TEST( uchar_logic ),
- ADD_TEST( uchar_shift ),
- ADD_TEST( uchar_compare ),
-
- ADD_TEST( popcount ),
+ ADD_TEST(integer_clz),
+ ADD_TEST_VERSION(integer_ctz, Version(2, 0)),
+ ADD_TEST(integer_hadd),
+ ADD_TEST(integer_rhadd),
+ ADD_TEST(integer_mul_hi),
+ ADD_TEST(integer_rotate),
+ ADD_TEST(integer_clamp),
+ ADD_TEST(integer_mad_sat),
+ ADD_TEST(integer_mad_hi),
+ ADD_TEST(integer_min),
+ ADD_TEST(integer_max),
+ ADD_TEST(integer_upsample),
+
+ ADD_TEST(integer_abs),
+ ADD_TEST(integer_abs_diff),
+ ADD_TEST(integer_add_sat),
+ ADD_TEST(integer_sub_sat),
+
+ ADD_TEST(integer_addAssign),
+ ADD_TEST(integer_subtractAssign),
+ ADD_TEST(integer_multiplyAssign),
+ ADD_TEST(integer_divideAssign),
+ ADD_TEST(integer_moduloAssign),
+ ADD_TEST(integer_andAssign),
+ ADD_TEST(integer_orAssign),
+ ADD_TEST(integer_exclusiveOrAssign),
+
+ ADD_TEST(unary_ops_increment),
+ ADD_TEST(unary_ops_decrement),
+ ADD_TEST(unary_ops_full),
+
+ ADD_TEST(integer_mul24),
+ ADD_TEST(integer_mad24),
+
+ ADD_TEST(long_math),
+ ADD_TEST(long_logic),
+ ADD_TEST(long_shift),
+ ADD_TEST(long_compare),
+
+ ADD_TEST(ulong_math),
+ ADD_TEST(ulong_logic),
+ ADD_TEST(ulong_shift),
+ ADD_TEST(ulong_compare),
+
+ ADD_TEST(int_math),
+ ADD_TEST(int_logic),
+ ADD_TEST(int_shift),
+ ADD_TEST(int_compare),
+
+ ADD_TEST(uint_math),
+ ADD_TEST(uint_logic),
+ ADD_TEST(uint_shift),
+ ADD_TEST(uint_compare),
+
+ ADD_TEST(short_math),
+ ADD_TEST(short_logic),
+ ADD_TEST(short_shift),
+ ADD_TEST(short_compare),
+
+ ADD_TEST(ushort_math),
+ ADD_TEST(ushort_logic),
+ ADD_TEST(ushort_shift),
+ ADD_TEST(ushort_compare),
+
+ ADD_TEST(char_math),
+ ADD_TEST(char_logic),
+ ADD_TEST(char_shift),
+ ADD_TEST(char_compare),
+
+ ADD_TEST(uchar_math),
+ ADD_TEST(uchar_logic),
+ ADD_TEST(uchar_shift),
+ ADD_TEST(uchar_compare),
+
+ ADD_TEST(popcount),
// Quick
- ADD_TEST( quick_long_math ),
- ADD_TEST( quick_long_logic ),
- ADD_TEST( quick_long_shift ),
- ADD_TEST( quick_long_compare ),
-
- ADD_TEST( quick_ulong_math ),
- ADD_TEST( quick_ulong_logic ),
- ADD_TEST( quick_ulong_shift ),
- ADD_TEST( quick_ulong_compare ),
-
- ADD_TEST( quick_int_math ),
- ADD_TEST( quick_int_logic ),
- ADD_TEST( quick_int_shift ),
- ADD_TEST( quick_int_compare ),
-
- ADD_TEST( quick_uint_math ),
- ADD_TEST( quick_uint_logic ),
- ADD_TEST( quick_uint_shift ),
- ADD_TEST( quick_uint_compare ),
-
- ADD_TEST( quick_short_math ),
- ADD_TEST( quick_short_logic ),
- ADD_TEST( quick_short_shift ),
- ADD_TEST( quick_short_compare ),
-
- ADD_TEST( quick_ushort_math ),
- ADD_TEST( quick_ushort_logic ),
- ADD_TEST( quick_ushort_shift ),
- ADD_TEST( quick_ushort_compare ),
-
- ADD_TEST( quick_char_math ),
- ADD_TEST( quick_char_logic ),
- ADD_TEST( quick_char_shift ),
- ADD_TEST( quick_char_compare ),
-
- ADD_TEST( quick_uchar_math ),
- ADD_TEST( quick_uchar_logic ),
- ADD_TEST( quick_uchar_shift ),
- ADD_TEST( quick_uchar_compare ),
-
- ADD_TEST( vector_scalar ),
+ ADD_TEST(quick_long_math),
+ ADD_TEST(quick_long_logic),
+ ADD_TEST(quick_long_shift),
+ ADD_TEST(quick_long_compare),
+
+ ADD_TEST(quick_ulong_math),
+ ADD_TEST(quick_ulong_logic),
+ ADD_TEST(quick_ulong_shift),
+ ADD_TEST(quick_ulong_compare),
+
+ ADD_TEST(quick_int_math),
+ ADD_TEST(quick_int_logic),
+ ADD_TEST(quick_int_shift),
+ ADD_TEST(quick_int_compare),
+
+ ADD_TEST(quick_uint_math),
+ ADD_TEST(quick_uint_logic),
+ ADD_TEST(quick_uint_shift),
+ ADD_TEST(quick_uint_compare),
+
+ ADD_TEST(quick_short_math),
+ ADD_TEST(quick_short_logic),
+ ADD_TEST(quick_short_shift),
+ ADD_TEST(quick_short_compare),
+
+ ADD_TEST(quick_ushort_math),
+ ADD_TEST(quick_ushort_logic),
+ ADD_TEST(quick_ushort_shift),
+ ADD_TEST(quick_ushort_compare),
+
+ ADD_TEST(quick_char_math),
+ ADD_TEST(quick_char_logic),
+ ADD_TEST(quick_char_shift),
+ ADD_TEST(quick_char_compare),
+
+ ADD_TEST(quick_uchar_math),
+ ADD_TEST(quick_uchar_logic),
+ ADD_TEST(quick_uchar_shift),
+ ADD_TEST(quick_uchar_compare),
+
+ ADD_TEST(vector_scalar),
+
+ ADD_TEST(integer_dot_product),
};
-const int test_num = ARRAY_SIZE( test_list );
+const int test_num = ARRAY_SIZE(test_list);
void fill_test_values( cl_long *outBufferA, cl_long *outBufferB, size_t numElements, MTdata d )
{
diff --git a/test_conformance/integer_ops/procs.h b/test_conformance/integer_ops/procs.h
index d5b77e70..82311fb9 100644
--- a/test_conformance/integer_ops/procs.h
+++ b/test_conformance/integer_ops/procs.h
@@ -141,3 +141,5 @@ extern int test_unary_ops_decrement(cl_device_id deviceID, cl_context context, c
extern int test_vector_scalar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_dot_product(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
diff --git a/test_conformance/integer_ops/test_add_sat.cpp b/test_conformance/integer_ops/test_add_sat.cpp
index c0e45d11..e33f5c67 100644
--- a/test_conformance/integer_ops/test_add_sat.cpp
+++ b/test_conformance/integer_ops/test_add_sat.cpp
@@ -21,18 +21,9 @@
#include <sys/types.h>
#include <sys/stat.h>
-#include "procs.h"
-
-#define UCHAR_MIN 0
-#define USHRT_MIN 0
-#define UINT_MIN 0
+#include <algorithm>
-#ifndef MAX
-#define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) )
-#endif
-#ifndef MIN
-#define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) )
-#endif
+#include "procs.h"
static int verify_addsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize )
{
@@ -40,8 +31,8 @@ static int verify_addsat_char( const cl_char *inA, const cl_char *inB, const cl_
for( i = 0; i < n; i++ )
{
cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
- r = MAX( r, CL_CHAR_MIN );
- r = MIN( r, CL_CHAR_MAX );
+ r = std::max(r, CL_CHAR_MIN);
+ r = std::min(r, CL_CHAR_MAX);
if( r != outptr[i] )
{ log_info( "\n%d) Failure for add_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
@@ -55,9 +46,9 @@ static int verify_addsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const
for( i = 0; i < n; i++ )
{
cl_int r = (int) inA[i] + (int) inB[i];
- r = MAX( r, 0 );
- r = MIN( r, CL_UCHAR_MAX );
- if( r != outptr[i] )
+ r = std::max(r, 0);
+ r = std::min(r, CL_UCHAR_MAX);
+ if (r != outptr[i])
{ log_info( "\n%d) Failure for add_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
@@ -69,8 +60,8 @@ static int verify_addsat_short( const cl_short *inA, const cl_short *inB, const
for( i = 0; i < n; i++ )
{
cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
- r = MAX( r, CL_SHRT_MIN );
- r = MIN( r, CL_SHRT_MAX );
+ r = std::max(r, CL_SHRT_MIN);
+ r = std::min(r, CL_SHRT_MAX);
if( r != outptr[i] )
{ log_info( "\n%d) Failure for add_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
@@ -84,8 +75,8 @@ static int verify_addsat_ushort( const cl_ushort *inA, const cl_ushort *inB, con
for( i = 0; i < n; i++ )
{
cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
- r = MAX( r, 0 );
- r = MIN( r, CL_USHRT_MAX );
+ r = std::max(r, 0);
+ r = std::min(r, CL_USHRT_MAX);
if( r != outptr[i] )
{ log_info( "\n%d) Failure for add_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
diff --git a/test_conformance/integer_ops/test_integer_dot_product.cpp b/test_conformance/integer_ops/test_integer_dot_product.cpp
new file mode 100644
index 00000000..602d59b6
--- /dev/null
+++ b/test_conformance/integer_ops/test_integer_dot_product.cpp
@@ -0,0 +1,442 @@
+//
+// Copyright (c) 2021 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <algorithm>
+#include <limits>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "procs.h"
+#include "harness/integer_ops_test_info.h"
+#include "harness/testHarness.h"
+
+template <size_t N, typename DstType, typename SrcTypeA, typename SrcTypeB>
+static void
+calculate_reference(std::vector<DstType>& ref, const std::vector<SrcTypeA>& a,
+ const std::vector<SrcTypeB>& b, const bool AccSat = false,
+ const std::vector<DstType>& acc = {})
+{
+ assert(a.size() == b.size());
+ assert(AccSat == false || acc.size() == a.size() / N);
+
+ ref.resize(a.size() / N);
+ for (size_t r = 0; r < ref.size(); r++)
+ {
+ cl_long result = AccSat ? acc[r] : 0;
+ for (size_t c = 0; c < N; c++)
+ {
+ // OK to assume no overflow?
+ result += a[r * N + c] * b[r * N + c];
+ }
+ if (AccSat && result > std::numeric_limits<DstType>::max())
+ {
+ result = std::numeric_limits<DstType>::max();
+ }
+ ref[r] = static_cast<DstType>(result);
+ }
+}
+
+template <typename SrcTypeA, typename SrcTypeB>
+void generate_inputs_with_special_values(std::vector<SrcTypeA>& a,
+ std::vector<SrcTypeB>& b)
+{
+ const std::vector<SrcTypeA> specialValuesA(
+ { static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::min()),
+ static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::min() + 1),
+ static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::min() / 2), 0,
+ static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::max() / 2),
+ static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::max() - 1),
+ static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::max()) });
+ const std::vector<SrcTypeB> specialValuesB(
+ { static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::min()),
+ static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::min() + 1),
+ static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::min() / 2), 0,
+ static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::max() / 2),
+ static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::max() - 1),
+ static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::max()) });
+
+ size_t count = 0;
+ for (auto svA : specialValuesA)
+ {
+ for (auto svB : specialValuesB)
+ {
+ a[count] = svA;
+ b[count] = svB;
+ ++count;
+ }
+ }
+
+ // Generate random data for the rest of the inputs:
+ MTdataHolder d(gRandomSeed);
+ generate_random_data(TestInfo<SrcTypeA>::explicitType, a.size() - count, d,
+ a.data() + count);
+ generate_random_data(TestInfo<SrcTypeB>::explicitType, b.size() - count, d,
+ b.data() + count);
+}
+
+template <typename SrcType>
+void generate_acc_sat_inputs(std::vector<SrcType>& acc)
+{
+ // First generate random data:
+ fill_vector_with_random_data(acc);
+
+ // Now go through the generated data, and make every other element large.
+ // This ensures we have some elements that need saturation.
+ for (size_t i = 0; i < acc.size(); i += 2)
+ {
+ acc[i] = std::numeric_limits<SrcType>::max() - acc[i];
+ }
+}
+
+template <typename T> struct PackedTestInfo
+{
+ static constexpr const char* deviceTypeName = "UNSUPPORTED";
+};
+template <> struct PackedTestInfo<cl_char>
+{
+ static constexpr const char* deviceTypeName = "int";
+};
+template <> struct PackedTestInfo<cl_uchar>
+{
+ static constexpr const char* deviceTypeName = "uint";
+};
+
+static constexpr const char* kernel_source_dot = R"CLC(
+__kernel void test_dot(__global DSTTYPE* dst, __global SRCTYPEA* a, __global SRCTYPEB* b)
+{
+ int index = get_global_id(0);
+ dst[index] = DOT(a[index], b[index]);
+}
+)CLC";
+
+static constexpr const char* kernel_source_dot_acc_sat = R"CLC(
+__kernel void test_dot_acc_sat(
+ __global DSTTYPE* dst,
+ __global SRCTYPEA* a, __global SRCTYPEB* b, __global DSTTYPE* acc)
+{
+ int index = get_global_id(0);
+ dst[index] = DOT_ACC_SAT(a[index], b[index], acc[index]);
+}
+)CLC";
+
+template <typename DstType, typename SrcTypeA, typename SrcTypeB, size_t N>
+static int test_case_dot(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements, bool packed,
+ bool sat)
+{
+ log_info(" testing %s = dot%s%s(%s, %s)\n",
+ std::numeric_limits<DstType>::is_signed ? "signed" : "unsigned",
+ sat ? "_acc_sat" : "", packed ? "_packed" : "",
+ std::numeric_limits<SrcTypeA>::is_signed ? "signed" : "unsigned",
+ std::numeric_limits<SrcTypeB>::is_signed ? "signed" : "unsigned");
+
+ cl_int error = CL_SUCCESS;
+
+ clProgramWrapper program;
+ clKernelWrapper kernel;
+
+ std::string buildOptions;
+ buildOptions += " -DDSTTYPE=";
+ buildOptions += TestInfo<DstType>::deviceTypeName;
+ buildOptions += " -DSRCTYPEA=";
+ buildOptions += packed
+ ? PackedTestInfo<SrcTypeA>::deviceTypeName
+ : TestInfo<SrcTypeA>::deviceTypeName + std::to_string(N);
+ buildOptions += " -DSRCTYPEB=";
+ buildOptions += packed
+ ? PackedTestInfo<SrcTypeB>::deviceTypeName
+ : TestInfo<SrcTypeB>::deviceTypeName + std::to_string(N);
+ std::string packedSuffix;
+ packedSuffix += std::numeric_limits<SrcTypeA>::is_signed ? "s" : "u";
+ packedSuffix += std::numeric_limits<SrcTypeB>::is_signed ? "s" : "u";
+ packedSuffix += std::numeric_limits<DstType>::is_signed ? "_int" : "_uint";
+ if (sat)
+ {
+ buildOptions += packed
+ ? " -DDOT_ACC_SAT=dot_acc_sat_4x8packed_" + packedSuffix
+ : " -DDOT_ACC_SAT=dot_acc_sat";
+ }
+ else
+ {
+ buildOptions +=
+ packed ? " -DDOT=dot_4x8packed_" + packedSuffix : " -DDOT=dot";
+ }
+
+ std::vector<SrcTypeA> a(N * num_elements);
+ std::vector<SrcTypeB> b(N * num_elements);
+ generate_inputs_with_special_values(a, b);
+
+ std::vector<DstType> acc;
+ if (sat)
+ {
+ acc.resize(num_elements);
+ generate_acc_sat_inputs(acc);
+ }
+
+ std::vector<DstType> reference(num_elements);
+ calculate_reference<N>(reference, a, b, sat, acc);
+
+ const char* source = sat ? kernel_source_dot_acc_sat : kernel_source_dot;
+ const char* name = sat ? "test_dot_acc_sat" : "test_dot";
+ error = create_single_kernel_helper(context, &program, &kernel, 1, &source,
+ name, buildOptions.c_str());
+ test_error(error, "Unable to create test kernel");
+
+ clMemWrapper dst = clCreateBuffer(
+ context, 0, reference.size() * sizeof(DstType), NULL, &error);
+ test_error(error, "Unable to create output buffer");
+
+ clMemWrapper srcA =
+ clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+ a.size() * sizeof(SrcTypeA), a.data(), &error);
+ test_error(error, "Unable to create srcA buffer");
+
+ clMemWrapper srcB =
+ clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+ b.size() * sizeof(SrcTypeB), b.data(), &error);
+ test_error(error, "Unable to create srcB buffer");
+
+ clMemWrapper srcAcc;
+ if (sat)
+ {
+ srcAcc =
+ clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+ acc.size() * sizeof(DstType), acc.data(), &error);
+ test_error(error, "Unable to create acc buffer");
+ }
+
+ error = clSetKernelArg(kernel, 0, sizeof(dst), &dst);
+ test_error(error, "Unable to set output buffer kernel arg");
+
+ error = clSetKernelArg(kernel, 1, sizeof(srcA), &srcA);
+ test_error(error, "Unable to set srcA buffer kernel arg");
+
+ error = clSetKernelArg(kernel, 2, sizeof(srcB), &srcB);
+ test_error(error, "Unable to set srcB buffer kernel arg");
+
+ if (sat)
+ {
+ error = clSetKernelArg(kernel, 3, sizeof(srcAcc), &srcAcc);
+ test_error(error, "Unable to set acc buffer kernel arg");
+ }
+
+ size_t global_work_size[] = { reference.size() };
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
+ NULL, 0, NULL, NULL);
+ test_error(error, "Unable to enqueue test kernel");
+
+ error = clFinish(queue);
+ test_error(error, "clFinish failed after test kernel");
+
+ std::vector<DstType> results(reference.size(), 99);
+ error = clEnqueueReadBuffer(queue, dst, CL_TRUE, 0,
+ results.size() * sizeof(DstType),
+ results.data(), 0, NULL, NULL);
+ test_error(error, "Unable to read data after test kernel");
+
+ if (results != reference)
+ {
+ log_error("Result buffer did not match reference buffer!\n");
+ return TEST_FAIL;
+ }
+
+ return TEST_PASS;
+}
+
+template <typename SrcType, typename DstType, size_t N>
+static int test_vectype(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ int result = TEST_PASS;
+
+ typedef typename std::make_signed<SrcType>::type SSrcType;
+ typedef typename std::make_signed<DstType>::type SDstType;
+
+ typedef typename std::make_unsigned<SrcType>::type USrcType;
+ typedef typename std::make_unsigned<DstType>::type UDstType;
+
+ // dot testing:
+ result |= test_case_dot<UDstType, USrcType, USrcType, N>(
+ deviceID, context, queue, num_elements, false, false);
+ result |= test_case_dot<SDstType, SSrcType, SSrcType, N>(
+ deviceID, context, queue, num_elements, false, false);
+ result |= test_case_dot<SDstType, USrcType, SSrcType, N>(
+ deviceID, context, queue, num_elements, false, false);
+ result |= test_case_dot<SDstType, SSrcType, USrcType, N>(
+ deviceID, context, queue, num_elements, false, false);
+
+ // dot_acc_sat testing:
+ result |= test_case_dot<UDstType, USrcType, USrcType, N>(
+ deviceID, context, queue, num_elements, false, true);
+ result |= test_case_dot<SDstType, SSrcType, SSrcType, N>(
+ deviceID, context, queue, num_elements, false, true);
+ result |= test_case_dot<SDstType, USrcType, SSrcType, N>(
+ deviceID, context, queue, num_elements, false, true);
+ result |= test_case_dot<SDstType, SSrcType, USrcType, N>(
+ deviceID, context, queue, num_elements, false, true);
+
+ return result;
+}
+
+template <typename SrcType, typename DstType, size_t N>
+static int test_vectype_packed(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ int result = TEST_PASS;
+
+ typedef typename std::make_signed<SrcType>::type SSrcType;
+ typedef typename std::make_signed<DstType>::type SDstType;
+
+ typedef typename std::make_unsigned<SrcType>::type USrcType;
+ typedef typename std::make_unsigned<DstType>::type UDstType;
+
+ // packed dot testing:
+ result |= test_case_dot<UDstType, USrcType, USrcType, N>(
+ deviceID, context, queue, num_elements, true, false);
+ result |= test_case_dot<SDstType, SSrcType, SSrcType, N>(
+ deviceID, context, queue, num_elements, true, false);
+ result |= test_case_dot<SDstType, USrcType, SSrcType, N>(
+ deviceID, context, queue, num_elements, true, false);
+ result |= test_case_dot<SDstType, SSrcType, USrcType, N>(
+ deviceID, context, queue, num_elements, true, false);
+
+ // packed dot_acc_sat testing:
+ result |= test_case_dot<UDstType, USrcType, USrcType, N>(
+ deviceID, context, queue, num_elements, true, true);
+ result |= test_case_dot<SDstType, SSrcType, SSrcType, N>(
+ deviceID, context, queue, num_elements, true, true);
+ result |= test_case_dot<SDstType, USrcType, SSrcType, N>(
+ deviceID, context, queue, num_elements, true, true);
+ result |= test_case_dot<SDstType, SSrcType, USrcType, N>(
+ deviceID, context, queue, num_elements, true, true);
+
+ return result;
+}
+
+int test_integer_dot_product(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ if (!is_extension_available(deviceID, "cl_khr_integer_dot_product"))
+ {
+ log_info("cl_khr_integer_dot_product is not supported\n");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ Version deviceVersion = get_device_cl_version(deviceID);
+ cl_version extensionVersion;
+
+ if ((deviceVersion >= Version(3, 0))
+ || is_extension_available(deviceID, "cl_khr_extended_versioning"))
+ {
+ extensionVersion =
+ get_extension_version(deviceID, "cl_khr_integer_dot_product");
+ }
+ else
+ {
+ // Assume 1.0.0 is supported if the version can't be queried
+ extensionVersion = CL_MAKE_VERSION(1, 0, 0);
+ }
+
+ cl_int error = CL_SUCCESS;
+ int result = TEST_PASS;
+
+ cl_device_integer_dot_product_capabilities_khr dotCaps = 0;
+ error = clGetDeviceInfo(deviceID,
+ CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR,
+ sizeof(dotCaps), &dotCaps, NULL);
+ test_error(
+ error,
+ "Unable to query CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR");
+
+ // Check that the required capabilities are reported
+ test_assert_error(
+ dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR,
+ "When cl_khr_integer_dot_product is supported "
+ "CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR must be "
+ "supported");
+
+ if (extensionVersion >= CL_MAKE_VERSION(2, 0, 0))
+ {
+ test_assert_error(
+ dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR,
+ "When cl_khr_integer_dot_product is supported with version >= 2.0.0"
+ "CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR must be "
+ "supported");
+ }
+
+ // Check that acceleration properties can be queried
+ if (extensionVersion >= CL_MAKE_VERSION(2, 0, 0))
+ {
+ size_t size_ret;
+ error = clGetDeviceInfo(
+ deviceID,
+ CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR, 0,
+ nullptr, &size_ret);
+ test_error(
+ error,
+ "Unable to query size of data returned by "
+ "CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR");
+
+ cl_device_integer_dot_product_acceleration_properties_khr
+ accelerationProperties;
+ error = clGetDeviceInfo(
+ deviceID,
+ CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR,
+ sizeof(accelerationProperties), &accelerationProperties, nullptr);
+ test_error(error, "Unable to query 8-bit acceleration properties");
+
+ error = clGetDeviceInfo(
+ deviceID,
+ CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR,
+ 0, nullptr, &size_ret);
+ test_error(
+ error,
+ "Unable to query size of data returned by "
+ "CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_"
+ "PACKED_KHR");
+
+ error = clGetDeviceInfo(
+ deviceID,
+ CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR,
+ sizeof(accelerationProperties), &accelerationProperties, nullptr);
+ test_error(error,
+ "Unable to query 4x8-bit packed acceleration properties");
+ }
+
+ // Report when unknown capabilities are found
+ if (dotCaps
+ & ~(CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR
+ | CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR))
+ {
+ log_info("NOTE: found an unknown / untested capability!\n");
+ }
+
+ // Test built-in functions
+ if (dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR)
+ {
+ result |= test_vectype<cl_uchar, cl_uint, 4>(deviceID, context, queue,
+ num_elements);
+ }
+
+ if (dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR)
+ {
+ result |= test_vectype_packed<cl_uchar, cl_uint, 4>(
+ deviceID, context, queue, num_elements);
+ }
+
+ return result;
+}
diff --git a/test_conformance/integer_ops/test_integers.cpp b/test_conformance/integer_ops/test_integers.cpp
index 8d77b24b..6fa18e1e 100644
--- a/test_conformance/integer_ops/test_integers.cpp
+++ b/test_conformance/integer_ops/test_integers.cpp
@@ -16,14 +16,9 @@
#include "testBase.h"
#include "harness/conversions.h"
-#define TEST_SIZE 512
+#include <algorithm>
-#ifndef MIN
- #define MIN( _a, _b ) ((_a) < (_b) ? (_a) : (_b))
-#endif
-#ifndef MAX
- #define MAX( _a, _b ) ((_a) > (_b) ? (_a) : (_b))
-#endif
+#define TEST_SIZE 512
const char *singleParamIntegerKernelSourcePattern =
"__kernel void sample_test(__global %s *sourceA, __global %s *destValues)\n"
@@ -1512,19 +1507,20 @@ bool verify_integer_clamp( void *sourceA, void *sourceB, void *sourceC, void *de
switch( vecAType )
{
case kULong:
- ((cl_ulong*) destination)[0] = MAX(MIN(valueA, valueC), valueB);
+ ((cl_ulong *)destination)[0] =
+ std::max(std::min(valueA, valueC), valueB);
break;
case kUInt:
- ((cl_uint*) destination)[0] = (cl_uint)
- (MAX(MIN(valueA, valueC), valueB));
+ ((cl_uint *)destination)[0] =
+ (cl_uint)(std::max(std::min(valueA, valueC), valueB));
break;
case kUShort:
- ((cl_ushort*) destination)[0] = (cl_ushort)
- (MAX(MIN(valueA, valueC), valueB));
+ ((cl_ushort *)destination)[0] =
+ (cl_ushort)(std::max(std::min(valueA, valueC), valueB));
break;
case kUChar:
- ((cl_uchar*) destination)[0] = (cl_uchar)
- (MAX(MIN(valueA, valueC), valueB));
+ ((cl_uchar *)destination)[0] =
+ (cl_uchar)(std::max(std::min(valueA, valueC), valueB));
break;
default:
//error -- should never get here
@@ -1576,19 +1572,20 @@ bool verify_integer_clamp( void *sourceA, void *sourceB, void *sourceC, void *de
switch( vecAType )
{
case kLong:
- ((cl_long*) destination)[0] = MAX(MIN(valueA, valueC), valueB);
+ ((cl_long *)destination)[0] =
+ std::max(std::min(valueA, valueC), valueB);
break;
case kInt:
- ((cl_int*) destination)[0] = (cl_int)
- (MAX(MIN(valueA, valueC), valueB));
+ ((cl_int *)destination)[0] =
+ (cl_int)(std::max(std::min(valueA, valueC), valueB));
break;
case kShort:
- ((cl_short*) destination)[0] = (cl_short)
- (MAX(MIN(valueA, valueC), valueB));
+ ((cl_short *)destination)[0] =
+ (cl_short)(std::max(std::min(valueA, valueC), valueB));
break;
case kChar:
- ((cl_char*) destination)[0] = (cl_char)
- (MAX(MIN(valueA, valueC), valueB));
+ ((cl_char *)destination)[0] =
+ (cl_char)(std::max(std::min(valueA, valueC), valueB));
break;
default:
//error -- should never get here
@@ -1654,13 +1651,16 @@ bool verify_integer_mad_sat( void *sourceA, void *sourceB, void *sourceC, void *
((cl_ulong*) destination)[0] = multLo;
break;
case kUInt:
- ((cl_uint*) destination)[0] = (cl_uint) MIN( multLo, (cl_ulong) CL_UINT_MAX );
+ ((cl_uint *)destination)[0] =
+ (cl_uint)std::min(multLo, (cl_ulong)CL_UINT_MAX);
break;
case kUShort:
- ((cl_ushort*) destination)[0] = (cl_ushort) MIN( multLo, (cl_ulong) CL_USHRT_MAX );
+ ((cl_ushort *)destination)[0] =
+ (cl_ushort)std::min(multLo, (cl_ulong)CL_USHRT_MAX);
break;
case kUChar:
- ((cl_uchar*) destination)[0] = (cl_uchar) MIN( multLo, (cl_ulong) CL_UCHAR_MAX );
+ ((cl_uchar *)destination)[0] =
+ (cl_uchar)std::min(multLo, (cl_ulong)CL_UCHAR_MAX);
break;
default:
//error -- should never get here
@@ -1744,18 +1744,18 @@ bool verify_integer_mad_sat( void *sourceA, void *sourceB, void *sourceC, void *
((cl_long*) destination)[0] = result;
break;
case kInt:
- result = MIN( result, (cl_long) CL_INT_MAX );
- result = MAX( result, (cl_long) CL_INT_MIN );
+ result = std::min(result, (cl_long)CL_INT_MAX);
+ result = std::max(result, (cl_long)CL_INT_MIN);
((cl_int*) destination)[0] = (cl_int) result;
break;
case kShort:
- result = MIN( result, (cl_long) CL_SHRT_MAX );
- result = MAX( result, (cl_long) CL_SHRT_MIN );
+ result = std::min(result, (cl_long)CL_SHRT_MAX);
+ result = std::max(result, (cl_long)CL_SHRT_MIN);
((cl_short*) destination)[0] = (cl_short) result;
break;
case kChar:
- result = MIN( result, (cl_long) CL_CHAR_MAX );
- result = MAX( result, (cl_long) CL_CHAR_MIN );
+ result = std::min(result, (cl_long)CL_CHAR_MAX);
+ result = std::max(result, (cl_long)CL_CHAR_MIN);
((cl_char*) destination)[0] = (cl_char) result;
break;
default:
diff --git a/test_conformance/integer_ops/test_sub_sat.cpp b/test_conformance/integer_ops/test_sub_sat.cpp
index 845d1064..2a88ee0d 100644
--- a/test_conformance/integer_ops/test_sub_sat.cpp
+++ b/test_conformance/integer_ops/test_sub_sat.cpp
@@ -21,19 +21,9 @@
#include <sys/types.h>
#include <sys/stat.h>
-#include "procs.h"
-
-#define UCHAR_MIN 0
-#define USHRT_MIN 0
-#define UINT_MIN 0
-
-#ifndef MAX
-#define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) )
-#endif
-#ifndef MIN
-#define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) )
-#endif
+#include <algorithm>
+#include "procs.h"
static int verify_subsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize )
{
@@ -41,8 +31,8 @@ static int verify_subsat_char( const cl_char *inA, const cl_char *inB, const cl_
for( i = 0; i < n; i++ )
{
cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
- r = MAX( r, CL_CHAR_MIN );
- r = MIN( r, CL_CHAR_MAX );
+ r = std::max(r, CL_CHAR_MIN);
+ r = std::min(r, CL_CHAR_MAX);
if( r != outptr[i] )
{ log_info( "\n%d) Failure for sub_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
@@ -56,9 +46,9 @@ static int verify_subsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const
for( i = 0; i < n; i++ )
{
cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
- r = MAX( r, 0 );
- r = MIN( r, CL_UCHAR_MAX );
- if( r != outptr[i] )
+ r = std::max(r, 0);
+ r = std::min(r, CL_UCHAR_MAX);
+ if (r != outptr[i])
{ log_info( "\n%d) Failure for sub_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
@@ -70,8 +60,8 @@ static int verify_subsat_short( const cl_short *inA, const cl_short *inB, const
for( i = 0; i < n; i++ )
{
cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
- r = MAX( r, CL_SHRT_MIN );
- r = MIN( r, CL_SHRT_MAX );
+ r = std::max(r, CL_SHRT_MIN);
+ r = std::min(r, CL_SHRT_MAX);
if( r != outptr[i] )
{ log_info( "\n%d) Failure for sub_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
@@ -85,8 +75,8 @@ static int verify_subsat_ushort( const cl_ushort *inA, const cl_ushort *inB, con
for( i = 0; i < n; i++ )
{
cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
- r = MAX( r, 0 );
- r = MIN( r, CL_USHRT_MAX );
+ r = std::max(r, 0);
+ r = std::min(r, CL_USHRT_MAX);
if( r != outptr[i] )
{ log_info( "\n%d) Failure for sub_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
diff --git a/test_conformance/integer_ops/test_unary_ops.cpp b/test_conformance/integer_ops/test_unary_ops.cpp
index 72940eaa..c91c85ae 100644
--- a/test_conformance/integer_ops/test_unary_ops.cpp
+++ b/test_conformance/integer_ops/test_unary_ops.cpp
@@ -107,7 +107,7 @@ int test_unary_op( cl_command_queue queue, cl_context context, OpKonstants which
// For sub ops, the min control value is 2. Otherwise, it's 0
controlData[ i ] |= 0x02;
else if( whichOp == kIncrement )
- // For addition ops, the MAX control value is 1. Otherwise, it's 3
+ // For addition ops, the max control value is 1. Otherwise, it's 3
controlData[ i ] &= ~0x02;
}
streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt
index d8dfc403..32814026 100644
--- a/test_conformance/math_brute_force/CMakeLists.txt
+++ b/test_conformance/math_brute_force/CMakeLists.txt
@@ -9,7 +9,10 @@ set(${MODULE_NAME}_SOURCES
binary_operator_float.cpp
binary_two_results_i_double.cpp
binary_two_results_i_float.cpp
+ common.cpp
+ common.h
function_list.cpp
+ function_list.h
i_unary_double.cpp
i_unary_float.cpp
macro_binary_double.cpp
@@ -20,9 +23,12 @@ set(${MODULE_NAME}_SOURCES
mad_float.cpp
main.cpp
reference_math.cpp
+ reference_math.h
sleep.cpp
+ sleep.h
ternary_double.cpp
ternary_float.cpp
+ test_functions.h
unary_double.cpp
unary_float.cpp
unary_two_results_double.cpp
@@ -32,6 +38,11 @@ set(${MODULE_NAME}_SOURCES
unary_u_double.cpp
unary_u_float.cpp
utility.cpp
+ utility.h
)
+# math_brute_force compiles cleanly with -Wall (except for a few remaining
+# warnings), but other tests not (yet); so enable -Wall locally.
+set_gnulike_module_compile_flags("-Wall -Wno-strict-aliasing -Wno-unknown-pragmas")
+
include(../CMakeCommon.txt)
diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index 4baa4991..f18d0b97 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -14,16 +14,19 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
#include <cstring>
+namespace {
+
const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022);
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
- cl_kernel *k, cl_program *p, bool relaxedMode)
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+ cl_kernel *k, cl_program *p, bool relaxedMode)
{
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void math_kernel",
@@ -109,49 +112,49 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
relaxedMode);
}
-typedef struct BuildKernelInfo
-{
- cl_uint offset; // the first vector size to build
- cl_uint kernel_count;
- cl_kernel **kernels;
- cl_program *programs;
- const char *nameInCode;
- bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernel_count,
- info->kernels[i], info->programs + i, info->relaxedMode);
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+ info->kernels[vectorSize].data(),
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
{
- cl_mem inBuf; // input buffer for the thread
- cl_mem inBuf2; // input buffer for the thread
- cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+ // Input and output buffers for the thread
+ clMemWrapper inBuf;
+ clMemWrapper inBuf2;
+ Buffers outBuf;
+
float maxError; // max error value. Init to 0.
double
maxErrorValue; // position of the max error value (param 1). Init to 0.
double maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
- MTdata d;
- cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+ MTdataHolder d;
+
+ // Per thread command queue to improve performance
+ clCommandQueueWrapper tQueue;
+};
-typedef struct TestInfo
+struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
- cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
- cl_kernel
- *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
- // worker thread: k[vector_size][thread_id]
- ThreadInfo *
- tinfo; // An array of thread specific information for each worker thread
+
+ // Programs for various vector sizes.
+ Programs programs;
+
+ // Thread-specific kernels for each vector size:
+ // k[vector_size][thread_id]
+ KernelMatrix k;
+
+ // Array of thread specific information
+ std::vector<ThreadInfo> tinfo;
+
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
@@ -164,10 +167,10 @@ typedef struct TestInfo
int isNextafter;
bool relaxedMode; // True if test is running in relaxed mode, false
// otherwise.
-} TestInfo;
+};
// A table of more difficult cases to get right
-static const double specialValues[] = {
+const double specialValues[] = {
-NAN,
-INFINITY,
-DBL_MAX,
@@ -277,204 +280,20 @@ static const double specialValues[] = {
+0.0,
};
-static size_t specialValuesCount =
+constexpr size_t specialValuesCount =
sizeof(specialValues) / sizeof(specialValues[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
-{
- TestInfo test_info;
- cl_int error;
- float maxError = 0.0f;
- double maxErrorVal = 0.0;
- double maxErrorVal2 = 0.0;
-
- logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
- // Init test_info
- memset(&test_info, 0, sizeof(test_info));
- test_info.threadCount = GetThreadCount();
- test_info.subBufferSize = BUFFER_SIZE
- / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
- test_info.scale = getTestScale(sizeof(cl_double));
-
- test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
- if (test_info.step / test_info.subBufferSize != test_info.scale)
- {
- // there was overflow
- test_info.jobCount = 1;
- }
- else
- {
- test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
- }
-
- test_info.f = f;
- test_info.ulps = f->double_ulps;
- test_info.ftz = f->ftz || gForceFTZ;
-
- test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
- test_info.skipNanInf = 0;
- test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
-
- // cl_kernels aren't thread safe, so we make one for each vector size for
- // every thread
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- size_t array_size = test_info.threadCount * sizeof(cl_kernel);
- test_info.k[i] = (cl_kernel *)malloc(array_size);
- if (NULL == test_info.k[i])
- {
- vlog_error("Error: Unable to allocate storage for kernels!\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.k[i], 0, array_size);
- }
- test_info.tinfo =
- (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
- if (NULL == test_info.tinfo)
- {
- vlog_error(
- "Error: Unable to allocate storage for thread specific data.\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.tinfo, 0,
- test_info.threadCount * sizeof(*test_info.tinfo));
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- cl_buffer_region region = {
- i * test_info.subBufferSize * sizeof(cl_double),
- test_info.subBufferSize * sizeof(cl_double)
- };
- test_info.tinfo[i].inBuf =
- clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
- test_info.tinfo[i].inBuf2 =
- clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf2)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
-
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- {
- test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
- gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
- &region, &error);
- if (error || NULL == test_info.tinfo[i].outBuf[j])
- {
- vlog_error("Error: Unable to create sub-buffer of "
- "gOutBuffer[%d] for region {%zd, %zd}\n",
- (int)j, region.origin, region.size);
- goto exit;
- }
- }
- test_info.tinfo[i].tQueue =
- clCreateCommandQueue(gContext, gDevice, 0, &error);
- if (NULL == test_info.tinfo[i].tQueue || error)
- {
- vlog_error("clCreateCommandQueue failed. (%d)\n", error);
- goto exit;
- }
-
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
- }
-
- // Init the kernels
- {
- BuildKernelInfo build_info = {
- gMinVectorSizeIndex, test_info.threadCount, test_info.k,
- test_info.programs, f->nameInCode, relaxedMode
- };
- if ((error = ThreadPool_Do(BuildKernelFn,
- gMaxVectorSizeIndex - gMinVectorSizeIndex,
- &build_info)))
- goto exit;
- }
-
- // Run the kernels
- if (!gSkipCorrectnessTesting)
- {
- error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
- // Accumulate the arithmetic errors
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- if (test_info.tinfo[i].maxError > maxError)
- {
- maxError = test_info.tinfo[i].maxError;
- maxErrorVal = test_info.tinfo[i].maxErrorValue;
- maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
- }
- }
-
- if (error) goto exit;
-
- if (gWimpyMode)
- vlog("Wimp pass");
- else
- vlog("passed");
-
- vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
- }
-
- vlog("\n");
-
-exit:
- // Release
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- clReleaseProgram(test_info.programs[i]);
- if (test_info.k[i])
- {
- for (cl_uint j = 0; j < test_info.threadCount; j++)
- clReleaseKernel(test_info.k[i][j]);
-
- free(test_info.k[i]);
- }
- }
- if (test_info.tinfo)
- {
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- free_mtdata(test_info.tinfo[i].d);
- clReleaseMemObject(test_info.tinfo[i].inBuf);
- clReleaseMemObject(test_info.tinfo[i].inBuf2);
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
- clReleaseCommandQueue(test_info.tinfo[i].tQueue);
- }
-
- free(test_info.tinfo);
- }
-
- return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
- const TestInfo *job = (const TestInfo *)data;
+ TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_double);
cl_uint base = job_id * (cl_uint)job->step;
- ThreadInfo *tinfo = job->tinfo + thread_id;
+ ThreadInfo *tinfo = &(job->tinfo[thread_id]);
float ulps = job->ulps;
dptr func = job->f->dfunc;
int ftz = job->ftz;
+ bool relaxedMode = job->relaxedMode;
MTdata d = tinfo->d;
cl_int error;
const char *name = job->f->name;
@@ -577,7 +396,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
out[j], 0, NULL, NULL)))
{
- vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+ vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+ error);
goto exit;
}
@@ -659,7 +479,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
float err = Bruteforce_Ulp_Error_Double(test, correct);
int fail = !(fabsf(err) <= ulps);
- if (fail && ftz)
+ if (fail && (ftz || relaxedMode))
{
// retry per section 6.5.3.2
if (IsDoubleResultSubnormal(correct, ulps))
@@ -810,7 +630,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+ vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
"ThreadCount:%2u\n",
base, job->step, job->scale, buffer_elements, job->ulps,
job->threadCount);
@@ -825,3 +645,152 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
exit:
return error;
}
+
+} // anonymous namespace
+
+int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
+{
+ TestInfo test_info{};
+ cl_int error;
+ float maxError = 0.0f;
+ double maxErrorVal = 0.0;
+ double maxErrorVal2 = 0.0;
+
+ logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
+
+ // Init test_info
+ test_info.threadCount = GetThreadCount();
+ test_info.subBufferSize = BUFFER_SIZE
+ / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+ test_info.scale = getTestScale(sizeof(cl_double));
+
+ test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+ if (test_info.step / test_info.subBufferSize != test_info.scale)
+ {
+ // there was overflow
+ test_info.jobCount = 1;
+ }
+ else
+ {
+ test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+ }
+
+ test_info.f = f;
+ test_info.ulps = f->double_ulps;
+ test_info.ftz = f->ftz || gForceFTZ;
+ test_info.relaxedMode = relaxedMode;
+
+ test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
+ test_info.skipNanInf = 0;
+ test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
+
+ // cl_kernels aren't thread safe, so we make one for each vector size for
+ // every thread
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ test_info.k[i].resize(test_info.threadCount, nullptr);
+ }
+
+ test_info.tinfo.resize(test_info.threadCount);
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ cl_buffer_region region = {
+ i * test_info.subBufferSize * sizeof(cl_double),
+ test_info.subBufferSize * sizeof(cl_double)
+ };
+ test_info.tinfo[i].inBuf =
+ clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+ test_info.tinfo[i].inBuf2 =
+ clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf2)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+
+ for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+ {
+ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+ gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+ &region, &error);
+ if (error || NULL == test_info.tinfo[i].outBuf[j])
+ {
+ vlog_error("Error: Unable to create sub-buffer of "
+ "gOutBuffer[%d] for region {%zd, %zd}\n",
+ (int)j, region.origin, region.size);
+ goto exit;
+ }
+ }
+ test_info.tinfo[i].tQueue =
+ clCreateCommandQueue(gContext, gDevice, 0, &error);
+ if (NULL == test_info.tinfo[i].tQueue || error)
+ {
+ vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+ goto exit;
+ }
+
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
+ }
+
+ // Init the kernels
+ {
+ BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+ test_info.programs, f->nameInCode,
+ relaxedMode };
+ if ((error = ThreadPool_Do(BuildKernelFn,
+ gMaxVectorSizeIndex - gMinVectorSizeIndex,
+ &build_info)))
+ goto exit;
+ }
+
+ // Run the kernels
+ if (!gSkipCorrectnessTesting)
+ {
+ error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+ // Accumulate the arithmetic errors
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ if (test_info.tinfo[i].maxError > maxError)
+ {
+ maxError = test_info.tinfo[i].maxError;
+ maxErrorVal = test_info.tinfo[i].maxErrorValue;
+ maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+ }
+ }
+
+ if (error) goto exit;
+
+ if (gWimpyMode)
+ vlog("Wimp pass");
+ else
+ vlog("passed");
+
+ vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
+ }
+
+ vlog("\n");
+
+exit:
+ // Release
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ for (auto &kernel : test_info.k[i])
+ {
+ clReleaseKernel(kernel);
+ }
+ }
+
+ return error;
+}
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index 32caafa3..fe1491d7 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -14,16 +14,19 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
#include <cstring>
+namespace {
+
const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
- cl_kernel *k, cl_program *p, bool relaxedMode)
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+ cl_kernel *k, cl_program *p, bool relaxedMode)
{
const char *c[] = { "__kernel void math_kernel",
sizeNames[vectorSize],
@@ -107,49 +110,49 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
relaxedMode);
}
-typedef struct BuildKernelInfo
-{
- cl_uint offset; // the first vector size to build
- cl_uint kernel_count;
- cl_kernel **kernels;
- cl_program *programs;
- const char *nameInCode;
- bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernel_count,
- info->kernels[i], info->programs + i, info->relaxedMode);
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+ info->kernels[vectorSize].data(),
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
{
- cl_mem inBuf; // input buffer for the thread
- cl_mem inBuf2; // input buffer for the thread
- cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+ // Input and output buffers for the thread
+ clMemWrapper inBuf;
+ clMemWrapper inBuf2;
+ Buffers outBuf;
+
float maxError; // max error value. Init to 0.
double
maxErrorValue; // position of the max error value (param 1). Init to 0.
double maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
- MTdata d;
- cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+ MTdataHolder d;
+
+ // Per thread command queue to improve performance
+ clCommandQueueWrapper tQueue;
+};
-typedef struct TestInfo
+struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
- cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
- cl_kernel
- *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
- // worker thread: k[vector_size][thread_id]
- ThreadInfo *
- tinfo; // An array of thread specific information for each worker thread
+
+ // Programs for various vector sizes.
+ Programs programs;
+
+ // Thread-specific kernels for each vector size:
+ // k[vector_size][thread_id]
+ KernelMatrix k;
+
+ // Array of thread specific information
+ std::vector<ThreadInfo> tinfo;
+
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
@@ -162,10 +165,10 @@ typedef struct TestInfo
int isNextafter;
bool relaxedMode; // True if test is running in relaxed mode, false
// otherwise.
-} TestInfo;
+};
// A table of more difficult cases to get right
-static const float specialValues[] = {
+const float specialValues[] = {
-NAN,
-INFINITY,
-FLT_MAX,
@@ -267,209 +270,23 @@ static const float specialValues[] = {
+0.0f,
};
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
sizeof(specialValues) / sizeof(specialValues[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
-{
- TestInfo test_info;
- cl_int error;
- float maxError = 0.0f;
- double maxErrorVal = 0.0;
- double maxErrorVal2 = 0.0;
-
- logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
- // Init test_info
- memset(&test_info, 0, sizeof(test_info));
- test_info.threadCount = GetThreadCount();
- test_info.subBufferSize = BUFFER_SIZE
- / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
- test_info.scale = getTestScale(sizeof(cl_float));
-
- test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
- if (test_info.step / test_info.subBufferSize != test_info.scale)
- {
- // there was overflow
- test_info.jobCount = 1;
- }
- else
- {
- test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
- }
-
- test_info.f = f;
- test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
- test_info.ftz =
- f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
- test_info.relaxedMode = relaxedMode;
- test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
- test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
- test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
-
- // cl_kernels aren't thread safe, so we make one for each vector size for
- // every thread
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- size_t array_size = test_info.threadCount * sizeof(cl_kernel);
- test_info.k[i] = (cl_kernel *)malloc(array_size);
- if (NULL == test_info.k[i])
- {
- vlog_error("Error: Unable to allocate storage for kernels!\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.k[i], 0, array_size);
- }
- test_info.tinfo =
- (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
- if (NULL == test_info.tinfo)
- {
- vlog_error(
- "Error: Unable to allocate storage for thread specific data.\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.tinfo, 0,
- test_info.threadCount * sizeof(*test_info.tinfo));
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- cl_buffer_region region = {
- i * test_info.subBufferSize * sizeof(cl_float),
- test_info.subBufferSize * sizeof(cl_float)
- };
- test_info.tinfo[i].inBuf =
- clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
- test_info.tinfo[i].inBuf2 =
- clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf2)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
-
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- {
- test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
- gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
- &region, &error);
- if (error || NULL == test_info.tinfo[i].outBuf[j])
- {
- vlog_error("Error: Unable to create sub-buffer of "
- "gOutBuffer[%d] for region {%zd, %zd}\n",
- (int)j, region.origin, region.size);
- goto exit;
- }
- }
- test_info.tinfo[i].tQueue =
- clCreateCommandQueue(gContext, gDevice, 0, &error);
- if (NULL == test_info.tinfo[i].tQueue || error)
- {
- vlog_error("clCreateCommandQueue failed. (%d)\n", error);
- goto exit;
- }
-
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
- }
-
- // Init the kernels
- {
- BuildKernelInfo build_info = {
- gMinVectorSizeIndex, test_info.threadCount, test_info.k,
- test_info.programs, f->nameInCode, relaxedMode
- };
- if ((error = ThreadPool_Do(BuildKernelFn,
- gMaxVectorSizeIndex - gMinVectorSizeIndex,
- &build_info)))
- goto exit;
- }
-
- // Run the kernels
- if (!gSkipCorrectnessTesting)
- {
- error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
- // Accumulate the arithmetic errors
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- if (test_info.tinfo[i].maxError > maxError)
- {
- maxError = test_info.tinfo[i].maxError;
- maxErrorVal = test_info.tinfo[i].maxErrorValue;
- maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
- }
- }
-
- if (error) goto exit;
-
- if (gWimpyMode)
- vlog("Wimp pass");
- else
- vlog("passed");
-
- vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
- }
-
- vlog("\n");
-
-exit:
- // Release
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- clReleaseProgram(test_info.programs[i]);
- if (test_info.k[i])
- {
- for (cl_uint j = 0; j < test_info.threadCount; j++)
- clReleaseKernel(test_info.k[i][j]);
-
- free(test_info.k[i]);
- }
- }
- if (test_info.tinfo)
- {
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- free_mtdata(test_info.tinfo[i].d);
- clReleaseMemObject(test_info.tinfo[i].inBuf);
- clReleaseMemObject(test_info.tinfo[i].inBuf2);
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
- clReleaseCommandQueue(test_info.tinfo[i].tQueue);
- }
-
- free(test_info.tinfo);
- }
-
- return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
- const TestInfo *job = (const TestInfo *)data;
+ TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_float);
cl_uint base = job_id * (cl_uint)job->step;
- ThreadInfo *tinfo = job->tinfo + thread_id;
+ ThreadInfo *tinfo = &(job->tinfo[thread_id]);
fptr func = job->f->func;
int ftz = job->ftz;
bool relaxedMode = job->relaxedMode;
float ulps = getAllowedUlpError(job->f, relaxedMode);
MTdata d = tinfo->d;
cl_int error;
- cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
+ std::vector<bool> overflow(buffer_elements, false);
const char *name = job->f->name;
int isFDim = job->isFDim;
int skipNanInf = job->skipNanInf;
@@ -583,7 +400,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
out[j], 0, NULL, NULL)))
{
- vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+ vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+ error);
goto exit;
}
@@ -631,7 +449,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
vlog_error("Error: clFinish failed! err: %d\n", error);
goto exit;
}
- free(overflow);
return CL_SUCCESS;
}
@@ -641,7 +458,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
// Calculate the correctly rounded reference result
memset(&oldMode, 0, sizeof(oldMode));
- if (ftz) ForceFTZ(&oldMode);
+ if (ftz || relaxedMode) ForceFTZ(&oldMode);
// Set the rounding mode to match the device
if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
@@ -726,7 +543,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
float err = Ulp_Error(test, correct);
int fail = !(fabsf(err) <= ulps);
- if (fail && ftz)
+ if (fail && (ftz || relaxedMode))
{
// retry per section 6.5.3.2
if (IsFloatResultSubnormal(correct, ulps))
@@ -938,7 +755,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
vlog_error(
"\nERROR: %s%s: %f ulp error at {%a (0x%x), %a "
- "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %d\n",
+ "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %zu\n",
name, sizeNames[k], err, s[j], ((cl_uint *)s)[j],
s2[j], ((cl_uint *)s2)[j], r[j], test,
((cl_uint *)&test)[0], j);
@@ -970,7 +787,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+ vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
"ThreadCount:%2u\n",
base, job->step, job->scale, buffer_elements, job->ulps,
job->threadCount);
@@ -983,6 +800,154 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
}
exit:
- if (overflow) free(overflow);
+ return error;
+}
+
+} // anonymous namespace
+
+int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
+{
+ TestInfo test_info{};
+ cl_int error;
+ float maxError = 0.0f;
+ double maxErrorVal = 0.0;
+ double maxErrorVal2 = 0.0;
+
+ logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
+
+ // Init test_info
+ test_info.threadCount = GetThreadCount();
+ test_info.subBufferSize = BUFFER_SIZE
+ / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+ test_info.scale = getTestScale(sizeof(cl_float));
+
+ test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+ if (test_info.step / test_info.subBufferSize != test_info.scale)
+ {
+ // there was overflow
+ test_info.jobCount = 1;
+ }
+ else
+ {
+ test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+ }
+
+ test_info.f = f;
+ test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+ test_info.ftz =
+ f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+ test_info.relaxedMode = relaxedMode;
+ test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
+ test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
+ test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
+
+ // cl_kernels aren't thread safe, so we make one for each vector size for
+ // every thread
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ test_info.k[i].resize(test_info.threadCount, nullptr);
+ }
+
+ test_info.tinfo.resize(test_info.threadCount);
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ cl_buffer_region region = {
+ i * test_info.subBufferSize * sizeof(cl_float),
+ test_info.subBufferSize * sizeof(cl_float)
+ };
+ test_info.tinfo[i].inBuf =
+ clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+ test_info.tinfo[i].inBuf2 =
+ clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf2)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+
+ for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+ {
+ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+ gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+ &region, &error);
+ if (error || NULL == test_info.tinfo[i].outBuf[j])
+ {
+ vlog_error("Error: Unable to create sub-buffer of "
+ "gOutBuffer[%d] for region {%zd, %zd}\n",
+ (int)j, region.origin, region.size);
+ goto exit;
+ }
+ }
+ test_info.tinfo[i].tQueue =
+ clCreateCommandQueue(gContext, gDevice, 0, &error);
+ if (NULL == test_info.tinfo[i].tQueue || error)
+ {
+ vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+ goto exit;
+ }
+
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
+ }
+
+ // Init the kernels
+ {
+ BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+ test_info.programs, f->nameInCode,
+ relaxedMode };
+ if ((error = ThreadPool_Do(BuildKernelFn,
+ gMaxVectorSizeIndex - gMinVectorSizeIndex,
+ &build_info)))
+ goto exit;
+ }
+
+ // Run the kernels
+ if (!gSkipCorrectnessTesting)
+ {
+ error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+ // Accumulate the arithmetic errors
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ if (test_info.tinfo[i].maxError > maxError)
+ {
+ maxError = test_info.tinfo[i].maxError;
+ maxErrorVal = test_info.tinfo[i].maxErrorValue;
+ maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+ }
+ }
+
+ if (error) goto exit;
+
+ if (gWimpyMode)
+ vlog("Wimp pass");
+ else
+ vlog("passed");
+
+ vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
+ }
+
+ vlog("\n");
+
+exit:
+ // Release
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ for (auto &kernel : test_info.k[i])
+ {
+ clReleaseKernel(kernel);
+ }
+ }
+
return error;
}
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index 69e620aa..f8786e68 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -14,6 +14,7 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
@@ -21,8 +22,10 @@
#include <climits>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
- cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+ cl_kernel *k, cl_program *p, bool relaxedMode)
{
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void math_kernel",
@@ -108,61 +111,63 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
relaxedMode);
}
-typedef struct BuildKernelInfo
-{
- cl_uint offset; // the first vector size to build
- cl_uint kernel_count;
- cl_kernel **kernels;
- cl_program *programs;
- const char *nameInCode;
- bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernel_count,
- info->kernels[i], info->programs + i, info->relaxedMode);
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+ info->kernels[vectorSize].data(),
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
{
- cl_mem inBuf; // input buffer for the thread
- cl_mem inBuf2; // input buffer for the thread
- cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+ // Input and output buffers for the thread
+ clMemWrapper inBuf;
+ clMemWrapper inBuf2;
+ Buffers outBuf;
+
float maxError; // max error value. Init to 0.
double
maxErrorValue; // position of the max error value (param 1). Init to 0.
cl_int maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
- MTdata d;
- cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+ MTdataHolder d;
+
+ // Per thread command queue to improve performance
+ clCommandQueueWrapper tQueue;
+};
-typedef struct TestInfo
+struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
- cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
- cl_kernel
- *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
- // worker thread: k[vector_size][thread_id]
- ThreadInfo *
- tinfo; // An array of thread specific information for each worker thread
+
+ // Programs for various vector sizes.
+ Programs programs;
+
+ // Thread-specific kernels for each vector size:
+ // k[vector_size][thread_id]
+ KernelMatrix k;
+
+ // Array of thread specific information
+ std::vector<ThreadInfo> tinfo;
+
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values
float ulps; // max_allowed ulps
int ftz; // non-zero if running in flush to zero mode
+ bool relaxedMode; // True if test is running in relaxed mode, false
+ // otherwise.
// no special values
-} TestInfo;
+};
// A table of more difficult cases to get right
-static const double specialValues[] = {
+const double specialValues[] = {
-NAN,
-INFINITY,
-DBL_MAX,
@@ -272,210 +277,28 @@ static const double specialValues[] = {
+0.0,
};
-static size_t specialValuesCount =
+constexpr size_t specialValuesCount =
sizeof(specialValues) / sizeof(specialValues[0]);
-static const int specialValuesInt[] = {
+const int specialValuesInt[] = {
0, 1, 2, 3, 1022, 1023, 1024, INT_MIN,
INT_MAX, -1, -2, -3, -1022, -1023, -11024, -INT_MAX,
};
-static constexpr size_t specialValuesIntCount =
- sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
-{
- TestInfo test_info;
- cl_int error;
- float maxError = 0.0f;
- double maxErrorVal = 0.0;
- cl_int maxErrorVal2 = 0;
-
- logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
- // Init test_info
- memset(&test_info, 0, sizeof(test_info));
- test_info.threadCount = GetThreadCount();
- test_info.subBufferSize = BUFFER_SIZE
- / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
- test_info.scale = getTestScale(sizeof(cl_double));
-
- test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
- if (test_info.step / test_info.subBufferSize != test_info.scale)
- {
- // there was overflow
- test_info.jobCount = 1;
- }
- else
- {
- test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
- }
-
- test_info.f = f;
- test_info.ulps = f->double_ulps;
- test_info.ftz = f->ftz || gForceFTZ;
-
- // cl_kernels aren't thread safe, so we make one for each vector size for
- // every thread
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- size_t array_size = test_info.threadCount * sizeof(cl_kernel);
- test_info.k[i] = (cl_kernel *)malloc(array_size);
- if (NULL == test_info.k[i])
- {
- vlog_error("Error: Unable to allocate storage for kernels!\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.k[i], 0, array_size);
- }
- test_info.tinfo =
- (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
- if (NULL == test_info.tinfo)
- {
- vlog_error(
- "Error: Unable to allocate storage for thread specific data.\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.tinfo, 0,
- test_info.threadCount * sizeof(*test_info.tinfo));
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- cl_buffer_region region = {
- i * test_info.subBufferSize * sizeof(cl_double),
- test_info.subBufferSize * sizeof(cl_double)
- };
- test_info.tinfo[i].inBuf =
- clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
- cl_buffer_region region2 = { i * test_info.subBufferSize
- * sizeof(cl_int),
- test_info.subBufferSize * sizeof(cl_int) };
- test_info.tinfo[i].inBuf2 =
- clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
- if (error || NULL == test_info.tinfo[i].inBuf2)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
-
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- {
- test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
- gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
- &region, &error);
- if (error || NULL == test_info.tinfo[i].outBuf[j])
- {
- vlog_error("Error: Unable to create sub-buffer of "
- "gOutBuffer[%d] for region {%zd, %zd}\n",
- (int)j, region.origin, region.size);
- goto exit;
- }
- }
- test_info.tinfo[i].tQueue =
- clCreateCommandQueue(gContext, gDevice, 0, &error);
- if (NULL == test_info.tinfo[i].tQueue || error)
- {
- vlog_error("clCreateCommandQueue failed. (%d)\n", error);
- goto exit;
- }
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
- }
-
- // Init the kernels
- {
- BuildKernelInfo build_info = {
- gMinVectorSizeIndex, test_info.threadCount, test_info.k,
- test_info.programs, f->nameInCode, relaxedMode
- };
- if ((error = ThreadPool_Do(BuildKernelFn,
- gMaxVectorSizeIndex - gMinVectorSizeIndex,
- &build_info)))
- goto exit;
- }
-
- // Run the kernels
- if (!gSkipCorrectnessTesting)
- {
- error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
- // Accumulate the arithmetic errors
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- if (test_info.tinfo[i].maxError > maxError)
- {
- maxError = test_info.tinfo[i].maxError;
- maxErrorVal = test_info.tinfo[i].maxErrorValue;
- maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
- }
- }
-
- if (error) goto exit;
-
- if (gWimpyMode)
- vlog("Wimp pass");
- else
- vlog("passed");
-
- vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
- }
-
- vlog("\n");
-
-exit:
- // Release
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- clReleaseProgram(test_info.programs[i]);
- if (test_info.k[i])
- {
- for (cl_uint j = 0; j < test_info.threadCount; j++)
- clReleaseKernel(test_info.k[i][j]);
-
- free(test_info.k[i]);
- }
- }
- if (test_info.tinfo)
- {
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- free_mtdata(test_info.tinfo[i].d);
- clReleaseMemObject(test_info.tinfo[i].inBuf);
- clReleaseMemObject(test_info.tinfo[i].inBuf2);
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
- clReleaseCommandQueue(test_info.tinfo[i].tQueue);
- }
-
- free(test_info.tinfo);
- }
-
- return error;
-}
+constexpr size_t specialValuesIntCount =
+ sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
- const TestInfo *job = (const TestInfo *)data;
+ TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_double);
cl_uint base = job_id * (cl_uint)job->step;
- ThreadInfo *tinfo = job->tinfo + thread_id;
+ ThreadInfo *tinfo = &(job->tinfo[thread_id]);
float ulps = job->ulps;
dptr func = job->f->dfunc;
int ftz = job->ftz;
+ bool relaxedMode = job->relaxedMode;
MTdata d = tinfo->d;
cl_int error;
const char *name = job->f->name;
@@ -576,7 +399,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
out[j], 0, NULL, NULL)))
{
- vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+ vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+ error);
goto exit;
}
@@ -658,7 +482,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
float err = Bruteforce_Ulp_Error_Double(test, correct);
int fail = !(fabsf(err) <= ulps);
- if (fail && ftz)
+ if (fail && (ftz || relaxedMode))
{
// retry per section 6.5.3.2
if (IsDoubleResultSubnormal(correct, ulps))
@@ -744,3 +568,151 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
exit:
return error;
}
+
+} // anonymous namespace
+
+int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
+{
+ TestInfo test_info{};
+ cl_int error;
+ float maxError = 0.0f;
+ double maxErrorVal = 0.0;
+ cl_int maxErrorVal2 = 0;
+
+ logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
+
+ // Init test_info
+ test_info.threadCount = GetThreadCount();
+ test_info.subBufferSize = BUFFER_SIZE
+ / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+ test_info.scale = getTestScale(sizeof(cl_double));
+
+ test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+ if (test_info.step / test_info.subBufferSize != test_info.scale)
+ {
+ // there was overflow
+ test_info.jobCount = 1;
+ }
+ else
+ {
+ test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+ }
+
+ test_info.f = f;
+ test_info.ulps = f->double_ulps;
+ test_info.ftz = f->ftz || gForceFTZ;
+ test_info.relaxedMode = relaxedMode;
+
+ // cl_kernels aren't thread safe, so we make one for each vector size for
+ // every thread
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ test_info.k[i].resize(test_info.threadCount, nullptr);
+ }
+
+ test_info.tinfo.resize(test_info.threadCount);
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ cl_buffer_region region = {
+ i * test_info.subBufferSize * sizeof(cl_double),
+ test_info.subBufferSize * sizeof(cl_double)
+ };
+ test_info.tinfo[i].inBuf =
+ clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+ cl_buffer_region region2 = { i * test_info.subBufferSize
+ * sizeof(cl_int),
+ test_info.subBufferSize * sizeof(cl_int) };
+ test_info.tinfo[i].inBuf2 =
+ clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf2)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+
+ for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+ {
+ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+ gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+ &region, &error);
+ if (error || NULL == test_info.tinfo[i].outBuf[j])
+ {
+ vlog_error("Error: Unable to create sub-buffer of "
+ "gOutBuffer[%d] for region {%zd, %zd}\n",
+ (int)j, region.origin, region.size);
+ goto exit;
+ }
+ }
+ test_info.tinfo[i].tQueue =
+ clCreateCommandQueue(gContext, gDevice, 0, &error);
+ if (NULL == test_info.tinfo[i].tQueue || error)
+ {
+ vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+ goto exit;
+ }
+
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
+ }
+
+ // Init the kernels
+ {
+ BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+ test_info.programs, f->nameInCode,
+ relaxedMode };
+ if ((error = ThreadPool_Do(BuildKernelFn,
+ gMaxVectorSizeIndex - gMinVectorSizeIndex,
+ &build_info)))
+ goto exit;
+ }
+
+ // Run the kernels
+ if (!gSkipCorrectnessTesting)
+ {
+ error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+ // Accumulate the arithmetic errors
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ if (test_info.tinfo[i].maxError > maxError)
+ {
+ maxError = test_info.tinfo[i].maxError;
+ maxErrorVal = test_info.tinfo[i].maxErrorValue;
+ maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+ }
+ }
+
+ if (error) goto exit;
+
+ if (gWimpyMode)
+ vlog("Wimp pass");
+ else
+ vlog("passed");
+
+ vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
+ }
+
+ vlog("\n");
+
+exit:
+ // Release
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ for (auto &kernel : test_info.k[i])
+ {
+ clReleaseKernel(kernel);
+ }
+ }
+
+ return error;
+}
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index e65a9aaf..d855f447 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -14,6 +14,7 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
@@ -21,8 +22,10 @@
#include <climits>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
- cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+ cl_kernel *k, cl_program *p, bool relaxedMode)
{
const char *c[] = { "__kernel void math_kernel",
sizeNames[vectorSize],
@@ -106,61 +109,62 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
relaxedMode);
}
-typedef struct BuildKernelInfo
-{
- cl_uint offset; // the first vector size to build
- cl_uint kernel_count;
- cl_kernel **kernels;
- cl_program *programs;
- const char *nameInCode;
- bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernel_count,
- info->kernels[i], info->programs + i, info->relaxedMode);
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+ info->kernels[vectorSize].data(),
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
{
- cl_mem inBuf; // input buffer for the thread
- cl_mem inBuf2; // input buffer for the thread
- cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+ // Input and output buffers for the thread
+ clMemWrapper inBuf;
+ clMemWrapper inBuf2;
+ Buffers outBuf;
+
float maxError; // max error value. Init to 0.
double
maxErrorValue; // position of the max error value (param 1). Init to 0.
cl_int maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
- MTdata d;
- cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+ MTdataHolder d;
+
+ // Per thread command queue to improve performance
+ clCommandQueueWrapper tQueue;
+};
-typedef struct TestInfo
+struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
- cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
- cl_kernel
- *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
- // worker thread: k[vector_size][thread_id]
- ThreadInfo *
- tinfo; // An array of thread specific information for each worker thread
+
+ // Programs for various vector sizes.
+ Programs programs;
+
+ // Thread-specific kernels for each vector size:
+ // k[vector_size][thread_id]
+ KernelMatrix k;
+
+ // Array of thread specific information
+ std::vector<ThreadInfo> tinfo;
+
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values
float ulps; // max_allowed ulps
int ftz; // non-zero if running in flush to zero mode
-
+ bool relaxedMode; // True if test is running in relaxed mode, false
+ // otherwise.
// no special values
-} TestInfo;
+};
// A table of more difficult cases to get right
-static const float specialValues[] = {
+const float specialValues[] = {
-NAN,
-INFINITY,
-FLT_MAX,
@@ -262,212 +266,29 @@ static const float specialValues[] = {
+0.0f,
};
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
sizeof(specialValues) / sizeof(specialValues[0]);
-static const int specialValuesInt[] = {
+const int specialValuesInt[] = {
0, 1, 2, 3, 126, 127,
128, 0x02000001, 0x04000001, 1465264071, 1488522147, -1,
-2, -3, -126, -127, -128, -0x02000001,
-0x04000001, -1465264071, -1488522147,
};
-static size_t specialValuesIntCount =
- sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
-{
- TestInfo test_info;
- cl_int error;
- float maxError = 0.0f;
- double maxErrorVal = 0.0;
- cl_int maxErrorVal2 = 0;
-
- logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
- // Init test_info
- memset(&test_info, 0, sizeof(test_info));
- test_info.threadCount = GetThreadCount();
- test_info.subBufferSize = BUFFER_SIZE
- / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
- test_info.scale = getTestScale(sizeof(cl_float));
-
- test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
- if (test_info.step / test_info.subBufferSize != test_info.scale)
- {
- // there was overflow
- test_info.jobCount = 1;
- }
- else
- {
- test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
- }
-
- test_info.f = f;
- test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
- test_info.ftz =
- f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-
- // cl_kernels aren't thread safe, so we make one for each vector size for
- // every thread
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- size_t array_size = test_info.threadCount * sizeof(cl_kernel);
- test_info.k[i] = (cl_kernel *)malloc(array_size);
- if (NULL == test_info.k[i])
- {
- vlog_error("Error: Unable to allocate storage for kernels!\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.k[i], 0, array_size);
- }
- test_info.tinfo =
- (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
- if (NULL == test_info.tinfo)
- {
- vlog_error(
- "Error: Unable to allocate storage for thread specific data.\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.tinfo, 0,
- test_info.threadCount * sizeof(*test_info.tinfo));
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- cl_buffer_region region = {
- i * test_info.subBufferSize * sizeof(cl_float),
- test_info.subBufferSize * sizeof(cl_float)
- };
- test_info.tinfo[i].inBuf =
- clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
- cl_buffer_region region2 = { i * test_info.subBufferSize
- * sizeof(cl_int),
- test_info.subBufferSize * sizeof(cl_int) };
- test_info.tinfo[i].inBuf2 =
- clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
- if (error || NULL == test_info.tinfo[i].inBuf2)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
-
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- {
- test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
- gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
- &region, &error);
- if (error || NULL == test_info.tinfo[i].outBuf[j])
- {
- vlog_error("Error: Unable to create sub-buffer of "
- "gOutBuffer[%d] for region {%zd, %zd}\n",
- (int)j, region.origin, region.size);
- goto exit;
- }
- }
- test_info.tinfo[i].tQueue =
- clCreateCommandQueue(gContext, gDevice, 0, &error);
- if (NULL == test_info.tinfo[i].tQueue || error)
- {
- vlog_error("clCreateCommandQueue failed. (%d)\n", error);
- goto exit;
- }
-
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
- }
-
- // Init the kernels
- {
- BuildKernelInfo build_info = {
- gMinVectorSizeIndex, test_info.threadCount, test_info.k,
- test_info.programs, f->nameInCode, relaxedMode
- };
- if ((error = ThreadPool_Do(BuildKernelFn,
- gMaxVectorSizeIndex - gMinVectorSizeIndex,
- &build_info)))
- goto exit;
- }
-
- // Run the kernels
- if (!gSkipCorrectnessTesting)
- {
- error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
- // Accumulate the arithmetic errors
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- if (test_info.tinfo[i].maxError > maxError)
- {
- maxError = test_info.tinfo[i].maxError;
- maxErrorVal = test_info.tinfo[i].maxErrorValue;
- maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
- }
- }
-
- if (error) goto exit;
-
- if (gWimpyMode)
- vlog("Wimp pass");
- else
- vlog("passed");
-
- vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
- }
-
- vlog("\n");
-
-exit:
- // Release
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- clReleaseProgram(test_info.programs[i]);
- if (test_info.k[i])
- {
- for (cl_uint j = 0; j < test_info.threadCount; j++)
- clReleaseKernel(test_info.k[i][j]);
- free(test_info.k[i]);
- }
- }
- if (test_info.tinfo)
- {
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- free_mtdata(test_info.tinfo[i].d);
- clReleaseMemObject(test_info.tinfo[i].inBuf);
- clReleaseMemObject(test_info.tinfo[i].inBuf2);
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
- clReleaseCommandQueue(test_info.tinfo[i].tQueue);
- }
-
- free(test_info.tinfo);
- }
-
- return error;
-}
+constexpr size_t specialValuesIntCount =
+ sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
- const TestInfo *job = (const TestInfo *)data;
+ TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_float);
cl_uint base = job_id * (cl_uint)job->step;
- ThreadInfo *tinfo = job->tinfo + thread_id;
+ ThreadInfo *tinfo = &(job->tinfo[thread_id]);
fptr func = job->f->func;
int ftz = job->ftz;
+ bool relaxedMode = job->relaxedMode;
float ulps = job->ulps;
MTdata d = tinfo->d;
cl_int error;
@@ -568,7 +389,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
out[j], 0, NULL, NULL)))
{
- vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+ vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+ error);
goto exit;
}
@@ -650,7 +472,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
float err = Ulp_Error(test, correct);
int fail = !(fabsf(err) <= ulps);
- if (fail && ftz)
+ if (fail && (ftz || relaxedMode))
{
// retry per section 6.5.3.2
if (IsFloatResultSubnormal(correct, ulps))
@@ -694,7 +516,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
vlog_error(
"\nERROR: %s%s: %f ulp error at {%a (0x%8.8x), %d}: "
- "*%a (0x%8.8x) vs. %a (0x%8.8x) at index: %d\n",
+ "*%a (0x%8.8x) vs. %a (0x%8.8x) at index: %zu\n",
name, sizeNames[k], err, s[j], ((uint32_t *)s)[j],
s2[j], r[j], ((uint32_t *)r)[j], test,
((cl_uint *)&test)[0], j);
@@ -723,7 +545,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+ vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
"ThreadCount:%2u\n",
base, job->step, job->scale, buffer_elements, job->ulps,
job->threadCount);
@@ -738,3 +560,152 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
exit:
return error;
}
+
+} // anonymous namespace
+
+int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
+{
+ TestInfo test_info{};
+ cl_int error;
+ float maxError = 0.0f;
+ double maxErrorVal = 0.0;
+ cl_int maxErrorVal2 = 0;
+
+ logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
+
+ // Init test_info
+ test_info.threadCount = GetThreadCount();
+ test_info.subBufferSize = BUFFER_SIZE
+ / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+ test_info.scale = getTestScale(sizeof(cl_float));
+
+ test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+ if (test_info.step / test_info.subBufferSize != test_info.scale)
+ {
+ // there was overflow
+ test_info.jobCount = 1;
+ }
+ else
+ {
+ test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+ }
+
+ test_info.f = f;
+ test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+ test_info.ftz =
+ f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+ test_info.relaxedMode = relaxedMode;
+
+ // cl_kernels aren't thread safe, so we make one for each vector size for
+ // every thread
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ test_info.k[i].resize(test_info.threadCount, nullptr);
+ }
+
+ test_info.tinfo.resize(test_info.threadCount);
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ cl_buffer_region region = {
+ i * test_info.subBufferSize * sizeof(cl_float),
+ test_info.subBufferSize * sizeof(cl_float)
+ };
+ test_info.tinfo[i].inBuf =
+ clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+ cl_buffer_region region2 = { i * test_info.subBufferSize
+ * sizeof(cl_int),
+ test_info.subBufferSize * sizeof(cl_int) };
+ test_info.tinfo[i].inBuf2 =
+ clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf2)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+
+ for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+ {
+ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+ gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+ &region, &error);
+ if (error || NULL == test_info.tinfo[i].outBuf[j])
+ {
+ vlog_error("Error: Unable to create sub-buffer of "
+ "gOutBuffer[%d] for region {%zd, %zd}\n",
+ (int)j, region.origin, region.size);
+ goto exit;
+ }
+ }
+ test_info.tinfo[i].tQueue =
+ clCreateCommandQueue(gContext, gDevice, 0, &error);
+ if (NULL == test_info.tinfo[i].tQueue || error)
+ {
+ vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+ goto exit;
+ }
+
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
+ }
+
+ // Init the kernels
+ {
+ BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+ test_info.programs, f->nameInCode,
+ relaxedMode };
+ if ((error = ThreadPool_Do(BuildKernelFn,
+ gMaxVectorSizeIndex - gMinVectorSizeIndex,
+ &build_info)))
+ goto exit;
+ }
+
+ // Run the kernels
+ if (!gSkipCorrectnessTesting)
+ {
+ error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+ // Accumulate the arithmetic errors
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ if (test_info.tinfo[i].maxError > maxError)
+ {
+ maxError = test_info.tinfo[i].maxError;
+ maxErrorVal = test_info.tinfo[i].maxErrorValue;
+ maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+ }
+ }
+
+ if (error) goto exit;
+
+ if (gWimpyMode)
+ vlog("Wimp pass");
+ else
+ vlog("passed");
+
+ vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
+ }
+
+ vlog("\n");
+
+exit:
+ // Release
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ for (auto &kernel : test_info.k[i])
+ {
+ clReleaseKernel(kernel);
+ }
+ }
+
+ return error;
+}
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index 21e76c85..bbe5c438 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -14,15 +14,18 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
#include <cstring>
-static int BuildKernel(const char *operator_symbol, int vectorSize,
- cl_uint kernel_count, cl_kernel *k, cl_program *p,
- bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *operator_symbol, int vectorSize,
+ cl_uint kernel_count, cl_kernel *k, cl_program *p,
+ bool relaxedMode)
{
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void math_kernel",
@@ -108,49 +111,49 @@ static int BuildKernel(const char *operator_symbol, int vectorSize,
relaxedMode);
}
-typedef struct BuildKernelInfo
-{
- cl_uint offset; // the first vector size to build
- cl_uint kernel_count;
- cl_kernel **kernels;
- cl_program *programs;
- const char *operator_symbol;
- bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->operator_symbol, i, info->kernel_count,
- info->kernels[i], info->programs + i, info->relaxedMode);
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+ info->kernels[vectorSize].data(),
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
{
- cl_mem inBuf; // input buffer for the thread
- cl_mem inBuf2; // input buffer for the thread
- cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+ // Input and output buffers for the thread
+ clMemWrapper inBuf;
+ clMemWrapper inBuf2;
+ Buffers outBuf;
+
float maxError; // max error value. Init to 0.
double
maxErrorValue; // position of the max error value (param 1). Init to 0.
double maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
- MTdata d;
- cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+ MTdataHolder d;
+
+ // Per thread command queue to improve performance
+ clCommandQueueWrapper tQueue;
+};
-typedef struct TestInfo
+struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
- cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
- cl_kernel
- *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
- // worker thread: k[vector_size][thread_id]
- ThreadInfo *
- tinfo; // An array of thread specific information for each worker thread
+
+ // Programs for various vector sizes.
+ Programs programs;
+
+ // Thread-specific kernels for each vector size:
+ // k[vector_size][thread_id]
+ KernelMatrix k;
+
+ // Array of thread specific information
+ std::vector<ThreadInfo> tinfo;
+
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
@@ -161,10 +164,10 @@ typedef struct TestInfo
// otherwise.
// no special fields
-} TestInfo;
+};
// A table of more difficult cases to get right
-static const double specialValues[] = {
+const double specialValues[] = {
-NAN,
-INFINITY,
-DBL_MAX,
@@ -274,201 +277,20 @@ static const double specialValues[] = {
+0.0,
};
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
sizeof(specialValues) / sizeof(specialValues[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
- bool relaxedMode)
-{
- TestInfo test_info;
- cl_int error;
- float maxError = 0.0f;
- double maxErrorVal = 0.0;
- double maxErrorVal2 = 0.0;
-
- logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
- // Init test_info
- memset(&test_info, 0, sizeof(test_info));
- test_info.threadCount = GetThreadCount();
- test_info.subBufferSize = BUFFER_SIZE
- / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
- test_info.scale = getTestScale(sizeof(cl_double));
-
- test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
- if (test_info.step / test_info.subBufferSize != test_info.scale)
- {
- // there was overflow
- test_info.jobCount = 1;
- }
- else
- {
- test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
- }
-
- test_info.f = f;
- test_info.ulps = f->double_ulps;
- test_info.ftz = f->ftz || gForceFTZ;
-
- // cl_kernels aren't thread safe, so we make one for each vector size for
- // every thread
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- size_t array_size = test_info.threadCount * sizeof(cl_kernel);
- test_info.k[i] = (cl_kernel *)malloc(array_size);
- if (NULL == test_info.k[i])
- {
- vlog_error("Error: Unable to allocate storage for kernels!\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.k[i], 0, array_size);
- }
- test_info.tinfo =
- (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
- if (NULL == test_info.tinfo)
- {
- vlog_error(
- "Error: Unable to allocate storage for thread specific data.\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.tinfo, 0,
- test_info.threadCount * sizeof(*test_info.tinfo));
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- cl_buffer_region region = {
- i * test_info.subBufferSize * sizeof(cl_double),
- test_info.subBufferSize * sizeof(cl_double)
- };
- test_info.tinfo[i].inBuf =
- clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
- test_info.tinfo[i].inBuf2 =
- clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf2)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
-
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- {
- test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
- gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
- &region, &error);
- if (error || NULL == test_info.tinfo[i].outBuf[j])
- {
- vlog_error("Error: Unable to create sub-buffer of "
- "gOutBuffer[%d] for region {%zd, %zd}\n",
- (int)j, region.origin, region.size);
- goto exit;
- }
- }
- test_info.tinfo[i].tQueue =
- clCreateCommandQueue(gContext, gDevice, 0, &error);
- if (NULL == test_info.tinfo[i].tQueue || error)
- {
- vlog_error("clCreateCommandQueue failed. (%d)\n", error);
- goto exit;
- }
-
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
- }
-
- // Init the kernels
- {
- BuildKernelInfo build_info = {
- gMinVectorSizeIndex, test_info.threadCount, test_info.k,
- test_info.programs, f->nameInCode, relaxedMode
- };
- if ((error = ThreadPool_Do(BuildKernelFn,
- gMaxVectorSizeIndex - gMinVectorSizeIndex,
- &build_info)))
- goto exit;
- }
-
- // Run the kernels
- if (!gSkipCorrectnessTesting)
- {
- error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
- // Accumulate the arithmetic errors
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- if (test_info.tinfo[i].maxError > maxError)
- {
- maxError = test_info.tinfo[i].maxError;
- maxErrorVal = test_info.tinfo[i].maxErrorValue;
- maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
- }
- }
-
- if (error) goto exit;
-
- if (gWimpyMode)
- vlog("Wimp pass");
- else
- vlog("passed");
-
- vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
- }
-
- vlog("\n");
-
-exit:
- // Release
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- clReleaseProgram(test_info.programs[i]);
- if (test_info.k[i])
- {
- for (cl_uint j = 0; j < test_info.threadCount; j++)
- clReleaseKernel(test_info.k[i][j]);
-
- free(test_info.k[i]);
- }
- }
- if (test_info.tinfo)
- {
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- free_mtdata(test_info.tinfo[i].d);
- clReleaseMemObject(test_info.tinfo[i].inBuf);
- clReleaseMemObject(test_info.tinfo[i].inBuf2);
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
- clReleaseCommandQueue(test_info.tinfo[i].tQueue);
- }
-
- free(test_info.tinfo);
- }
-
- return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
- const TestInfo *job = (const TestInfo *)data;
+ TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_double);
cl_uint base = job_id * (cl_uint)job->step;
- ThreadInfo *tinfo = job->tinfo + thread_id;
+ ThreadInfo *tinfo = &(job->tinfo[thread_id]);
float ulps = job->ulps;
dptr func = job->f->dfunc;
int ftz = job->ftz;
+ bool relaxedMode = job->relaxedMode;
MTdata d = tinfo->d;
cl_int error;
const char *name = job->f->name;
@@ -569,7 +391,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
out[j], 0, NULL, NULL)))
{
- vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+ vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+ error);
goto exit;
}
@@ -651,7 +474,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
float err = Bruteforce_Ulp_Error_Double(test, correct);
int fail = !(fabsf(err) <= ulps);
- if (fail && ftz)
+ if (fail && (ftz || relaxedMode))
{
// retry per section 6.5.3.2
if (IsDoubleResultSubnormal(correct, ulps))
@@ -778,7 +601,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+ vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
"ThreadCount:%2u\n",
base, job->step, job->scale, buffer_elements, job->ulps,
job->threadCount);
@@ -793,3 +616,148 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
exit:
return error;
}
+
+} // anonymous namespace
+
+int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
+ bool relaxedMode)
+{
+ TestInfo test_info{};
+ cl_int error;
+ float maxError = 0.0f;
+ double maxErrorVal = 0.0;
+ double maxErrorVal2 = 0.0;
+
+ logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
+
+ // Init test_info
+ test_info.threadCount = GetThreadCount();
+ test_info.subBufferSize = BUFFER_SIZE
+ / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+ test_info.scale = getTestScale(sizeof(cl_double));
+
+ test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+ if (test_info.step / test_info.subBufferSize != test_info.scale)
+ {
+ // there was overflow
+ test_info.jobCount = 1;
+ }
+ else
+ {
+ test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+ }
+
+ test_info.f = f;
+ test_info.ulps = f->double_ulps;
+ test_info.ftz = f->ftz || gForceFTZ;
+
+ // cl_kernels aren't thread safe, so we make one for each vector size for
+ // every thread
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ test_info.k[i].resize(test_info.threadCount, nullptr);
+ }
+
+ test_info.tinfo.resize(test_info.threadCount);
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ cl_buffer_region region = {
+ i * test_info.subBufferSize * sizeof(cl_double),
+ test_info.subBufferSize * sizeof(cl_double)
+ };
+ test_info.tinfo[i].inBuf =
+ clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+ test_info.tinfo[i].inBuf2 =
+ clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf2)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+
+ for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+ {
+ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+ gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+ &region, &error);
+ if (error || NULL == test_info.tinfo[i].outBuf[j])
+ {
+ vlog_error("Error: Unable to create sub-buffer of "
+ "gOutBuffer[%d] for region {%zd, %zd}\n",
+ (int)j, region.origin, region.size);
+ goto exit;
+ }
+ }
+ test_info.tinfo[i].tQueue =
+ clCreateCommandQueue(gContext, gDevice, 0, &error);
+ if (NULL == test_info.tinfo[i].tQueue || error)
+ {
+ vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+ goto exit;
+ }
+
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
+ }
+
+ // Init the kernels
+ {
+ BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+ test_info.programs, f->nameInCode,
+ relaxedMode };
+ if ((error = ThreadPool_Do(BuildKernelFn,
+ gMaxVectorSizeIndex - gMinVectorSizeIndex,
+ &build_info)))
+ goto exit;
+ }
+
+ // Run the kernels
+ if (!gSkipCorrectnessTesting)
+ {
+ error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+ // Accumulate the arithmetic errors
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ if (test_info.tinfo[i].maxError > maxError)
+ {
+ maxError = test_info.tinfo[i].maxError;
+ maxErrorVal = test_info.tinfo[i].maxErrorValue;
+ maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+ }
+ }
+
+ if (error) goto exit;
+
+ if (gWimpyMode)
+ vlog("Wimp pass");
+ else
+ vlog("passed");
+
+ vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
+ }
+
+ vlog("\n");
+
+exit:
+ // Release
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ for (auto &kernel : test_info.k[i])
+ {
+ clReleaseKernel(kernel);
+ }
+ }
+
+ return error;
+}
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index ccaef604..1a28d8d8 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -14,15 +14,18 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
#include <cstring>
-static int BuildKernel(const char *operator_symbol, int vectorSize,
- cl_uint kernel_count, cl_kernel *k, cl_program *p,
- bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *operator_symbol, int vectorSize,
+ cl_uint kernel_count, cl_kernel *k, cl_program *p,
+ bool relaxedMode)
{
const char *c[] = { "__kernel void math_kernel",
sizeNames[vectorSize],
@@ -106,49 +109,49 @@ static int BuildKernel(const char *operator_symbol, int vectorSize,
relaxedMode);
}
-typedef struct BuildKernelInfo
-{
- cl_uint offset; // the first vector size to build
- cl_uint kernel_count;
- cl_kernel **kernels;
- cl_program *programs;
- const char *operator_symbol;
- bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->operator_symbol, i, info->kernel_count,
- info->kernels[i], info->programs + i, info->relaxedMode);
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+ info->kernels[vectorSize].data(),
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
{
- cl_mem inBuf; // input buffer for the thread
- cl_mem inBuf2; // input buffer for the thread
- cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+ // Input and output buffers for the thread
+ clMemWrapper inBuf;
+ clMemWrapper inBuf2;
+ Buffers outBuf;
+
float maxError; // max error value. Init to 0.
double
maxErrorValue; // position of the max error value (param 1). Init to 0.
double maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
- MTdata d;
- cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+ MTdataHolder d;
+
+ // Per thread command queue to improve performance
+ clCommandQueueWrapper tQueue;
+};
-typedef struct TestInfo
+struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
- cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
- cl_kernel
- *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
- // worker thread: k[vector_size][thread_id]
- ThreadInfo *
- tinfo; // An array of thread specific information for each worker thread
+
+ // Programs for various vector sizes.
+ Programs programs;
+
+ // Thread-specific kernels for each vector size:
+ // k[vector_size][thread_id]
+ KernelMatrix k;
+
+ // Array of thread specific information
+ std::vector<ThreadInfo> tinfo;
+
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
@@ -159,10 +162,10 @@ typedef struct TestInfo
// otherwise.
// no special fields
-} TestInfo;
+};
// A table of more difficult cases to get right
-static const float specialValues[] = {
+const float specialValues[] = {
-NAN,
-INFINITY,
-FLT_MAX,
@@ -264,207 +267,23 @@ static const float specialValues[] = {
+0.0f,
};
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
sizeof(specialValues) / sizeof(specialValues[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
- bool relaxedMode)
-{
- TestInfo test_info;
- cl_int error;
- float maxError = 0.0f;
- double maxErrorVal = 0.0;
- double maxErrorVal2 = 0.0;
-
- logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
- // Init test_info
- memset(&test_info, 0, sizeof(test_info));
- test_info.threadCount = GetThreadCount();
- test_info.subBufferSize = BUFFER_SIZE
- / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
- test_info.scale = getTestScale(sizeof(cl_float));
-
- test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
- if (test_info.step / test_info.subBufferSize != test_info.scale)
- {
- // there was overflow
- test_info.jobCount = 1;
- }
- else
- {
- test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
- }
-
- test_info.f = f;
- test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
- test_info.ftz =
- f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
- test_info.relaxedMode = relaxedMode;
-
- // cl_kernels aren't thread safe, so we make one for each vector size for
- // every thread
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- size_t array_size = test_info.threadCount * sizeof(cl_kernel);
- test_info.k[i] = (cl_kernel *)malloc(array_size);
- if (NULL == test_info.k[i])
- {
- vlog_error("Error: Unable to allocate storage for kernels!\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.k[i], 0, array_size);
- }
- test_info.tinfo =
- (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
- if (NULL == test_info.tinfo)
- {
- vlog_error(
- "Error: Unable to allocate storage for thread specific data.\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.tinfo, 0,
- test_info.threadCount * sizeof(*test_info.tinfo));
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- cl_buffer_region region = {
- i * test_info.subBufferSize * sizeof(cl_float),
- test_info.subBufferSize * sizeof(cl_float)
- };
- test_info.tinfo[i].inBuf =
- clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
- test_info.tinfo[i].inBuf2 =
- clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf2)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
-
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- {
- test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
- gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION,
- &region, &error);
- if (error || NULL == test_info.tinfo[i].outBuf[j])
- {
- vlog_error("Error: Unable to create sub-buffer of "
- "gOutBuffer[%d] for region {%zd, %zd}\n",
- (int)j, region.origin, region.size);
- goto exit;
- }
- }
- test_info.tinfo[i].tQueue =
- clCreateCommandQueue(gContext, gDevice, 0, &error);
- if (NULL == test_info.tinfo[i].tQueue || error)
- {
- vlog_error("clCreateCommandQueue failed. (%d)\n", error);
- goto exit;
- }
-
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
- }
-
- // Init the kernels
- {
- BuildKernelInfo build_info = {
- gMinVectorSizeIndex, test_info.threadCount, test_info.k,
- test_info.programs, f->nameInCode, relaxedMode
- };
- if ((error = ThreadPool_Do(BuildKernelFn,
- gMaxVectorSizeIndex - gMinVectorSizeIndex,
- &build_info)))
- goto exit;
- }
-
- // Run the kernels
- if (!gSkipCorrectnessTesting)
- {
- error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
- // Accumulate the arithmetic errors
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- if (test_info.tinfo[i].maxError > maxError)
- {
- maxError = test_info.tinfo[i].maxError;
- maxErrorVal = test_info.tinfo[i].maxErrorValue;
- maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
- }
- }
-
- if (error) goto exit;
-
- if (gWimpyMode)
- vlog("Wimp pass");
- else
- vlog("passed");
-
- vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
- }
-
- vlog("\n");
-
-exit:
- // Release
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- clReleaseProgram(test_info.programs[i]);
- if (test_info.k[i])
- {
- for (cl_uint j = 0; j < test_info.threadCount; j++)
- clReleaseKernel(test_info.k[i][j]);
-
- free(test_info.k[i]);
- }
- }
- if (test_info.tinfo)
- {
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- free_mtdata(test_info.tinfo[i].d);
- clReleaseMemObject(test_info.tinfo[i].inBuf);
- clReleaseMemObject(test_info.tinfo[i].inBuf2);
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
- clReleaseCommandQueue(test_info.tinfo[i].tQueue);
- }
-
- free(test_info.tinfo);
- }
-
- return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
- const TestInfo *job = (const TestInfo *)data;
+ TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_float);
cl_uint base = job_id * (cl_uint)job->step;
- ThreadInfo *tinfo = job->tinfo + thread_id;
+ ThreadInfo *tinfo = &(job->tinfo[thread_id]);
fptr func = job->f->func;
int ftz = job->ftz;
bool relaxedMode = job->relaxedMode;
float ulps = getAllowedUlpError(job->f, relaxedMode);
MTdata d = tinfo->d;
cl_int error;
- cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
+ std::vector<bool> overflow(buffer_elements, false);
const char *name = job->f->name;
cl_uint *t = 0;
cl_float *r = 0;
@@ -584,7 +403,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
out[j], 0, NULL, NULL)))
{
- vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+ vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+ error);
goto exit;
}
@@ -627,14 +447,13 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (gSkipCorrectnessTesting)
{
- free(overflow);
return CL_SUCCESS;
}
// Calculate the correctly rounded reference result
FPU_mode_type oldMode;
memset(&oldMode, 0, sizeof(oldMode));
- if (ftz) ForceFTZ(&oldMode);
+ if (ftz || relaxedMode) ForceFTZ(&oldMode);
// Set the rounding mode to match the device
oldRoundMode = kRoundToNearestEven;
@@ -662,7 +481,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
- if (ftz) RestoreFPState(&oldMode);
+ if (ftz || relaxedMode) RestoreFPState(&oldMode);
// Read the data back -- no need to wait for the first N-1 buffers but wait
// for the last buffer. This is an in order queue.
@@ -719,7 +538,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
((!(fabsf(err) <= ulps)) && (!(fabsf(errB) <= ulps)));
if (fabsf(errB) < fabsf(err)) err = errB;
- if (fail && ftz)
+ if (fail && (ftz || relaxedMode))
{
// retry per section 6.5.3.2
if (IsFloatResultSubnormal(correct, ulps))
@@ -879,7 +698,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (fail)
{
vlog_error("\nERROR: %s%s: %f ulp error at {%a, %a}: *%a "
- "vs. %a (0x%8.8x) at index: %d\n",
+ "vs. %a (0x%8.8x) at index: %zu\n",
name, sizeNames[k], err, s[j], s2[j], r[j], test,
((cl_uint *)&test)[0], j);
error = -1;
@@ -907,7 +726,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+ vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
"ThreadCount:%2u\n",
base, job->step, job->scale, buffer_elements, job->ulps,
job->threadCount);
@@ -920,6 +739,152 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
}
exit:
- if (overflow) free(overflow);
+ return error;
+}
+
+} // anonymous namespace
+
+int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
+ bool relaxedMode)
+{
+ TestInfo test_info{};
+ cl_int error;
+ float maxError = 0.0f;
+ double maxErrorVal = 0.0;
+ double maxErrorVal2 = 0.0;
+
+ logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
+
+ // Init test_info
+ test_info.threadCount = GetThreadCount();
+ test_info.subBufferSize = BUFFER_SIZE
+ / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+ test_info.scale = getTestScale(sizeof(cl_float));
+
+ test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+ if (test_info.step / test_info.subBufferSize != test_info.scale)
+ {
+ // there was overflow
+ test_info.jobCount = 1;
+ }
+ else
+ {
+ test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+ }
+
+ test_info.f = f;
+ test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+ test_info.ftz =
+ f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+ test_info.relaxedMode = relaxedMode;
+
+ // cl_kernels aren't thread safe, so we make one for each vector size for
+ // every thread
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ test_info.k[i].resize(test_info.threadCount, nullptr);
+ }
+
+ test_info.tinfo.resize(test_info.threadCount);
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ cl_buffer_region region = {
+ i * test_info.subBufferSize * sizeof(cl_float),
+ test_info.subBufferSize * sizeof(cl_float)
+ };
+ test_info.tinfo[i].inBuf =
+ clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+ test_info.tinfo[i].inBuf2 =
+ clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf2)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+
+ for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+ {
+ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+ gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION,
+ &region, &error);
+ if (error || NULL == test_info.tinfo[i].outBuf[j])
+ {
+ vlog_error("Error: Unable to create sub-buffer of "
+ "gOutBuffer[%d] for region {%zd, %zd}\n",
+ (int)j, region.origin, region.size);
+ goto exit;
+ }
+ }
+ test_info.tinfo[i].tQueue =
+ clCreateCommandQueue(gContext, gDevice, 0, &error);
+ if (NULL == test_info.tinfo[i].tQueue || error)
+ {
+ vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+ goto exit;
+ }
+
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
+ }
+
+ // Init the kernels
+ {
+ BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+ test_info.programs, f->nameInCode,
+ relaxedMode };
+ if ((error = ThreadPool_Do(BuildKernelFn,
+ gMaxVectorSizeIndex - gMinVectorSizeIndex,
+ &build_info)))
+ goto exit;
+ }
+
+ // Run the kernels
+ if (!gSkipCorrectnessTesting)
+ {
+ error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+ // Accumulate the arithmetic errors
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ if (test_info.tinfo[i].maxError > maxError)
+ {
+ maxError = test_info.tinfo[i].maxError;
+ maxErrorVal = test_info.tinfo[i].maxErrorValue;
+ maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+ }
+ }
+
+ if (error) goto exit;
+
+ if (gWimpyMode)
+ vlog("Wimp pass");
+ else
+ vlog("passed");
+
+ vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
+ }
+
+ vlog("\n");
+
+exit:
+ // Release
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ for (auto &kernel : test_info.k[i])
+ {
+ clReleaseKernel(kernel);
+ }
+ }
+
return error;
}
diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
index 14f41092..bbfd707b 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
@@ -14,15 +14,19 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <climits>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
- cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+ bool relaxedMode)
{
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void math_kernel",
@@ -115,24 +119,23 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
}
-typedef struct BuildKernelInfo
+struct BuildKernelInfo2
{
- cl_uint offset; // the first vector size to build
cl_kernel *kernels;
- cl_program *programs;
+ Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
- BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernels + i,
- info->programs + i, info->relaxedMode);
+ BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+ &(info->programs[vectorSize]), info->relaxedMode);
}
-typedef struct ComputeReferenceInfoD_
+struct ComputeReferenceInfoD
{
const double *x;
const double *y;
@@ -141,9 +144,9 @@ typedef struct ComputeReferenceInfoD_
long double (*f_ffpI)(long double, long double, int *);
cl_uint lim;
cl_uint count;
-} ComputeReferenceInfoD;
+};
-static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
+cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
{
ComputeReferenceInfoD *cri = (ComputeReferenceInfoD *)userInfo;
cl_uint lim = cri->lim;
@@ -165,10 +168,12 @@ static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
return CL_SUCCESS;
}
+} // anonymous namespace
+
int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
{
int error;
- cl_program programs[VECTOR_SIZE_COUNT];
+ Programs programs;
cl_kernel kernels[VECTOR_SIZE_COUNT];
float maxError = 0.0f;
int64_t maxError2 = 0;
@@ -187,8 +192,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
// Init the kernels
{
- BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
- f->nameInCode, relaxedMode };
+ BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+ relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -375,7 +380,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
if (iptrUndefined) iErr = 0;
int fail = !(fabsf(err) <= f->double_ulps && iErr == 0);
- if (ftz && fail)
+ if ((ftz || relaxedMode) && fail)
{
// retry per section 6.5.3.2
if (IsDoubleResultSubnormal(correct, f->double_ulps))
@@ -523,17 +528,20 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
if (fail)
{
- vlog_error(
- "\nERROR: %sD%s: {%f, %lld} ulp error at {%.13la, "
- "%.13la} ({ 0x%16.16llx, 0x%16.16llx}): *{%.13la, "
- "%d} ({ 0x%16.16llx, 0x%8.8x}) vs. {%.13la, %d} ({ "
- "0x%16.16llx, 0x%8.8x})\n",
- f->name, sizeNames[k], err, iErr, ((double *)gIn)[j],
- ((double *)gIn2)[j], ((cl_ulong *)gIn)[j],
- ((cl_ulong *)gIn2)[j], ((double *)gOut_Ref)[j],
- ((int *)gOut_Ref2)[j], ((cl_ulong *)gOut_Ref)[j],
- ((cl_uint *)gOut_Ref2)[j], test, q2[j],
- ((cl_ulong *)q)[j], ((cl_uint *)q2)[j]);
+ vlog_error("\nERROR: %sD%s: {%f, %" PRId64
+ "} ulp error at {%.13la, "
+ "%.13la} ({ 0x%16.16" PRIx64 ", 0x%16.16" PRIx64
+ "}): *{%.13la, "
+ "%d} ({ 0x%16.16" PRIx64
+ ", 0x%8.8x}) vs. {%.13la, %d} ({ "
+ "0x%16.16" PRIx64 ", 0x%8.8x})\n",
+ f->name, sizeNames[k], err, iErr,
+ ((double *)gIn)[j], ((double *)gIn2)[j],
+ ((cl_ulong *)gIn)[j], ((cl_ulong *)gIn2)[j],
+ ((double *)gOut_Ref)[j], ((int *)gOut_Ref2)[j],
+ ((cl_ulong *)gOut_Ref)[j],
+ ((cl_uint *)gOut_Ref2)[j], test, q2[j],
+ ((cl_ulong *)q)[j], ((cl_uint *)q2)[j]);
error = -1;
goto exit;
}
@@ -544,8 +552,9 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
@@ -562,8 +571,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
else
vlog("passed");
- vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
- maxErrorVal2);
+ vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+ maxErrorVal, maxErrorVal2);
}
vlog("\n");
@@ -573,7 +582,6 @@ exit:
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
{
clReleaseKernel(kernels[k]);
- clReleaseProgram(programs[k]);
}
return error;
diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
index 5ef44b6e..07473376 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
@@ -14,15 +14,19 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <climits>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
- cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+ bool relaxedMode)
{
const char *c[] = { "__kernel void math_kernel",
sizeNames[vectorSize],
@@ -113,24 +117,23 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
}
-typedef struct BuildKernelInfo
+struct BuildKernelInfo2
{
- cl_uint offset; // the first vector size to build
cl_kernel *kernels;
- cl_program *programs;
+ Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
- BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernels + i,
- info->programs + i, info->relaxedMode);
+ BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+ &(info->programs[vectorSize]), info->relaxedMode);
}
-typedef struct ComputeReferenceInfoF_
+struct ComputeReferenceInfoF
{
const float *x;
const float *y;
@@ -139,9 +142,9 @@ typedef struct ComputeReferenceInfoF_
double (*f_ffpI)(double, double, int *);
cl_uint lim;
cl_uint count;
-} ComputeReferenceInfoF;
+};
-static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
+cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
{
ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo;
cl_uint lim = cri->lim;
@@ -161,13 +164,15 @@ static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
return CL_SUCCESS;
}
+} // anonymous namespace
+
int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
{
int error;
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
- cl_program programs[VECTOR_SIZE_COUNT];
+ Programs programs;
cl_kernel kernels[VECTOR_SIZE_COUNT];
float maxError = 0.0f;
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -188,8 +193,8 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
// Init the kernels
{
- BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
- f->nameInCode, relaxedMode };
+ BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+ relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -375,7 +380,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
if (iptrUndefined) iErr = 0;
int fail = !(fabsf(err) <= float_ulps && iErr == 0);
- if (ftz && fail)
+ if ((ftz || relaxedMode) && fail)
{
// retry per section 6.5.3.2
if (IsFloatResultSubnormal(correct, float_ulps))
@@ -509,16 +514,17 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
if (fail)
{
- vlog_error(
- "\nERROR: %s%s: {%f, %lld} ulp error at {%a, %a} "
- "({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, "
- "0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n",
- f->name, sizeNames[k], err, iErr, ((float *)gIn)[j],
- ((float *)gIn2)[j], ((cl_uint *)gIn)[j],
- ((cl_uint *)gIn2)[j], ((float *)gOut_Ref)[j],
- ((int *)gOut_Ref2)[j], ((cl_uint *)gOut_Ref)[j],
- ((cl_uint *)gOut_Ref2)[j], test, q2[j],
- ((cl_uint *)&test)[0], ((cl_uint *)q2)[j]);
+ vlog_error("\nERROR: %s%s: {%f, %" PRId64
+ "} ulp error at {%a, %a} "
+ "({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, "
+ "0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n",
+ f->name, sizeNames[k], err, iErr,
+ ((float *)gIn)[j], ((float *)gIn2)[j],
+ ((cl_uint *)gIn)[j], ((cl_uint *)gIn2)[j],
+ ((float *)gOut_Ref)[j], ((int *)gOut_Ref2)[j],
+ ((cl_uint *)gOut_Ref)[j],
+ ((cl_uint *)gOut_Ref2)[j], test, q2[j],
+ ((cl_uint *)&test)[0], ((cl_uint *)q2)[j]);
error = -1;
goto exit;
}
@@ -529,8 +535,9 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
@@ -547,8 +554,8 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
else
vlog("passed");
- vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
- maxErrorVal2);
+ vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+ maxErrorVal, maxErrorVal2);
}
vlog("\n");
@@ -558,7 +565,6 @@ exit:
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
{
clReleaseKernel(kernels[k]);
- clReleaseProgram(programs[k]);
}
return error;
diff --git a/test_conformance/math_brute_force/common.cpp b/test_conformance/math_brute_force/common.cpp
new file mode 100644
index 00000000..f5e9f993
--- /dev/null
+++ b/test_conformance/math_brute_force/common.cpp
@@ -0,0 +1,170 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "common.h"
+
+#include "utility.h" // for sizeNames and sizeValues.
+
+#include <sstream>
+#include <string>
+
+namespace {
+
+const char *GetTypeName(ParameterType type)
+{
+ switch (type)
+ {
+ case ParameterType::Float: return "float";
+ case ParameterType::Double: return "double";
+ }
+ return nullptr;
+}
+
+const char *GetUndefValue(ParameterType type)
+{
+ switch (type)
+ {
+ case ParameterType::Float:
+ case ParameterType::Double: return "NAN";
+ }
+ return nullptr;
+}
+
+void EmitDefineType(std::ostringstream &kernel, const char *name,
+ ParameterType type, int vector_size_index)
+{
+ kernel << "#define " << name << " " << GetTypeName(type)
+ << sizeNames[vector_size_index] << '\n';
+ kernel << "#define " << name << "_SCALAR " << GetTypeName(type) << '\n';
+}
+
+void EmitDefineUndef(std::ostringstream &kernel, const char *name,
+ ParameterType type)
+{
+ kernel << "#define " << name << " " << GetUndefValue(type) << '\n';
+}
+
+void EmitEnableExtension(std::ostringstream &kernel, ParameterType type)
+{
+ switch (type)
+ {
+ case ParameterType::Double:
+ kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+ break;
+
+ case ParameterType::Float:
+ // No extension required.
+ break;
+ }
+}
+
+} // anonymous namespace
+
+std::string GetKernelName(int vector_size_index)
+{
+ return std::string("math_kernel") + sizeNames[vector_size_index];
+}
+
+std::string GetTernaryKernel(const std::string &kernel_name,
+ const char *builtin, ParameterType retType,
+ ParameterType type1, ParameterType type2,
+ ParameterType type3, int vector_size_index)
+{
+ // To keep the kernel code readable, use macros for types and undef values.
+ std::ostringstream kernel;
+ EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
+ EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
+ EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
+ EmitDefineType(kernel, "TYPE3", type3, vector_size_index);
+ EmitDefineUndef(kernel, "UNDEF1", type1);
+ EmitDefineUndef(kernel, "UNDEF2", type2);
+ EmitDefineUndef(kernel, "UNDEF3", type3);
+ EmitEnableExtension(kernel, type1);
+
+ // clang-format off
+ const char *kernel_nonvec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
+ __global TYPE1* in1,
+ __global TYPE2* in2,
+ __global TYPE3* in3)
+{
+ size_t i = get_global_id(0);
+ out[i] = )", builtin, R"((in1[i], in2[i], in3[i]);
+}
+)" };
+
+ const char *kernel_vec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
+ __global TYPE1_SCALAR* in1,
+ __global TYPE2_SCALAR* in2,
+ __global TYPE3_SCALAR* in3)
+{
+ size_t i = get_global_id(0);
+
+ if (i + 1 < get_global_size(0))
+ {
+ TYPE1 a = vload3(0, in1 + 3 * i);
+ TYPE2 b = vload3(0, in2 + 3 * i);
+ TYPE3 c = vload3(0, in3 + 3 * i);
+ RETTYPE res = )", builtin, R"((a, b, c);
+ vstore3(res, 0, out + 3 * i);
+ }
+ else
+ {
+ // Figure out how many elements are left over after
+ // BUFFER_SIZE % (3 * sizeof(type)).
+ // Assume power of two buffer size.
+ size_t parity = i & 1;
+ TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
+ TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
+ TYPE3 c = (TYPE3)(UNDEF3, UNDEF3, UNDEF3);
+ switch (parity)
+ {
+ case 0:
+ a.y = in1[3 * i + 1];
+ b.y = in2[3 * i + 1];
+ c.y = in3[3 * i + 1];
+ // fall through
+ case 1:
+ a.x = in1[3 * i];
+ b.x = in2[3 * i];
+ c.x = in3[3 * i];
+ break;
+ }
+
+ RETTYPE res = )", builtin, R"((a, b, c);
+
+ switch (parity)
+ {
+ case 0:
+ out[3 * i + 1] = res.y;
+ // fall through
+ case 1:
+ out[3 * i] = res.x;
+ break;
+ }
+ }
+}
+)" };
+ // clang-format on
+
+ if (sizeValues[vector_size_index] != 3)
+ for (const auto &chunk : kernel_nonvec3) kernel << chunk;
+ else
+ for (const auto &chunk : kernel_vec3) kernel << chunk;
+
+ return kernel.str();
+}
diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h
new file mode 100644
index 00000000..143814ca
--- /dev/null
+++ b/test_conformance/math_brute_force/common.h
@@ -0,0 +1,68 @@
+//
+// Copyright (c) 2021 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef COMMON_H
+#define COMMON_H
+
+#include "harness/typeWrappers.h"
+#include "utility.h"
+
+#include <array>
+#include <string>
+#include <vector>
+
+// Array of thread-specific kernels for each vector size.
+using KernelMatrix = std::array<std::vector<cl_kernel>, VECTOR_SIZE_COUNT>;
+
+// Array of programs for each vector size.
+using Programs = std::array<clProgramWrapper, VECTOR_SIZE_COUNT>;
+
+// Array of buffers for each vector size.
+using Buffers = std::array<clMemWrapper, VECTOR_SIZE_COUNT>;
+
+// Types supported for kernel code generation.
+enum class ParameterType
+{
+ Float,
+ Double,
+};
+
+// Return kernel name suffixed with vector size.
+std::string GetKernelName(int vector_size_index);
+
+// Generate kernel code for the given builtin function/operator.
+std::string GetTernaryKernel(const std::string &kernel_name,
+ const char *builtin, ParameterType retType,
+ ParameterType type1, ParameterType type2,
+ ParameterType type3, int vector_size_index);
+
+// Information to generate OpenCL kernels.
+struct BuildKernelInfo
+{
+ // Number of kernels to build, one for each thread to avoid data races.
+ cl_uint threadCount;
+
+ KernelMatrix &kernels;
+
+ Programs &programs;
+
+ // Function, macro or symbol tested by the kernel.
+ const char *nameInCode;
+
+ // Whether to build with -cl-fast-relaxed-math.
+ bool relaxedMode;
+};
+
+#endif /* COMMON_H */
diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp
index 3edbb485..91736285 100644
--- a/test_conformance/math_brute_force/function_list.cpp
+++ b/test_conformance/math_brute_force/function_list.cpp
@@ -53,6 +53,7 @@
STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \
_embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \
}
+
#define unaryF NULL
#define i_unaryF NULL
#define unaryF_u NULL
diff --git a/test_conformance/math_brute_force/function_list.h b/test_conformance/math_brute_force/function_list.h
index 38f739ce..95a29459 100644
--- a/test_conformance/math_brute_force/function_list.h
+++ b/test_conformance/math_brute_force/function_list.h
@@ -30,7 +30,7 @@
#include "harness/mt19937.h"
-typedef union fptr {
+union fptr {
void *p;
double (*f_f)(double);
double (*f_u)(cl_uint);
@@ -45,9 +45,9 @@ typedef union fptr {
double (*f_ffpI)(double, double, int *);
double (*f_fff)(double, double, double);
float (*f_fma)(float, float, float, int);
-} fptr;
+};
-typedef union dptr {
+union dptr {
void *p;
long double (*f_f)(long double);
long double (*f_u)(cl_ulong);
@@ -59,20 +59,20 @@ typedef union dptr {
long double (*f_fpI)(long double, int *);
long double (*f_ffpI)(long double, long double, int *);
long double (*f_fff)(long double, long double, long double);
-} dptr;
+};
struct Func;
-typedef struct vtbl
+struct vtbl
{
const char *type_name;
int (*TestFunc)(const struct Func *, MTdata, bool);
int (*DoubleTestFunc)(
const struct Func *, MTdata,
bool); // may be NULL if function is single precision only
-} vtbl;
+};
-typedef struct Func
+struct Func
{
const char *name; // common name, to be used as an argument in the shell
const char *nameInCode; // name as it appears in the __kernel, usually the
@@ -88,7 +88,7 @@ typedef struct Func
int ftz;
int relaxed;
const vtbl *vtbl_ptr;
-} Func;
+};
extern const Func functionList[];
diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
index 4383fa8b..0cbcf86e 100644
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -14,14 +14,18 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
- cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+ bool relaxedMode)
{
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void math_kernel",
@@ -100,27 +104,28 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
}
-typedef struct BuildKernelInfo
+struct BuildKernelInfo2
{
- cl_uint offset; // the first vector size to build
cl_kernel *kernels;
- cl_program *programs;
+ Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
- BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernels + i,
- info->programs + i, info->relaxedMode);
+ BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+ &(info->programs[vectorSize]), info->relaxedMode);
}
+} // anonymous namespace
+
int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
{
int error;
- cl_program programs[VECTOR_SIZE_COUNT];
+ Programs programs;
cl_kernel kernels[VECTOR_SIZE_COUNT];
int ftz = f->ftz || gForceFTZ;
uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE);
@@ -138,8 +143,8 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
// Init the kernels
{
- BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
- f->nameInCode, relaxedMode };
+ BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+ relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -244,7 +249,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
// If we aren't getting the correctly rounded result
if (t[j] != q[j])
{
- if (ftz && IsDoubleSubnormal(s[j]))
+ if ((ftz || relaxedMode) && IsDoubleSubnormal(s[j]))
{
unsigned int correct0 = f->dfunc.i_f(0.0);
unsigned int correct1 = f->dfunc.i_f(-0.0);
@@ -267,8 +272,9 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
@@ -295,7 +301,6 @@ exit:
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
{
clReleaseKernel(kernels[k]);
- clReleaseProgram(programs[k]);
}
return error;
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
index c803aa32..90bb1e16 100644
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -14,14 +14,18 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
- cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+ bool relaxedMode)
{
const char *c[] = { "__kernel void math_kernel",
sizeNames[vectorSize],
@@ -98,27 +102,28 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
}
-typedef struct BuildKernelInfo
+struct BuildKernelInfo2
{
- cl_uint offset; // the first vector size to build
cl_kernel *kernels;
- cl_program *programs;
+ Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
- BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernels + i,
- info->programs + i, info->relaxedMode);
+ BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+ &(info->programs[vectorSize]), info->relaxedMode);
}
+} // anonymous namespace
+
int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
{
int error;
- cl_program programs[VECTOR_SIZE_COUNT];
+ Programs programs;
cl_kernel kernels[VECTOR_SIZE_COUNT];
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
@@ -135,8 +140,8 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
// Init the kernels
{
- BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
- f->nameInCode, relaxedMode };
+ BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+ relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -241,7 +246,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
// If we aren't getting the correctly rounded result
if (t[j] != q[j])
{
- if (ftz && IsFloatSubnormal(s[j]))
+ if ((ftz || relaxedMode) && IsFloatSubnormal(s[j]))
{
unsigned int correct0 = f->func.i_f(0.0);
unsigned int correct1 = f->func.i_f(-0.0);
@@ -264,8 +269,9 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
@@ -291,7 +297,6 @@ exit:
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
{
clReleaseKernel(kernels[k]);
- clReleaseProgram(programs[k]);
}
return error;
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index d09915f6..412f210b 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -14,14 +14,18 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
- cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+ cl_kernel *k, cl_program *p, bool relaxedMode)
{
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void math_kernel",
@@ -107,54 +111,55 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
relaxedMode);
}
-typedef struct BuildKernelInfo
-{
- cl_uint offset; // the first vector size to build
- cl_uint kernel_count;
- cl_kernel **kernels;
- cl_program *programs;
- const char *nameInCode;
- bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernel_count,
- info->kernels[i], info->programs + i, info->relaxedMode);
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+ info->kernels[vectorSize].data(),
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
{
- cl_mem inBuf; // input buffer for the thread
- cl_mem inBuf2; // input buffer for the thread
- cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
- MTdata d;
- cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-
-typedef struct TestInfo
+ // Input and output buffers for the thread
+ clMemWrapper inBuf;
+ clMemWrapper inBuf2;
+ Buffers outBuf;
+
+ MTdataHolder d;
+
+ // Per thread command queue to improve performance
+ clCommandQueueWrapper tQueue;
+};
+
+struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
- cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
- cl_kernel
- *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
- // worker thread: k[vector_size][thread_id]
- ThreadInfo *
- tinfo; // An array of thread specific information for each worker thread
+
+ // Programs for various vector sizes.
+ Programs programs;
+
+ // Thread-specific kernels for each vector size:
+ // k[vector_size][thread_id]
+ KernelMatrix k;
+
+ // Array of thread specific information
+ std::vector<ThreadInfo> tinfo;
+
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values
int ftz; // non-zero if running in flush to zero mode
-
-} TestInfo;
+ bool relaxedMode; // True if test is running in relaxed mode, false
+ // otherwise.
+};
// A table of more difficult cases to get right
-static const double specialValues[] = {
+const double specialValues[] = {
-NAN,
-INFINITY,
-DBL_MAX,
@@ -264,182 +269,19 @@ static const double specialValues[] = {
+0.0,
};
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
sizeof(specialValues) / sizeof(specialValues[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
- TestInfo test_info;
- cl_int error;
-
- logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
- // Init test_info
- memset(&test_info, 0, sizeof(test_info));
- test_info.threadCount = GetThreadCount();
- test_info.subBufferSize = BUFFER_SIZE
- / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
- test_info.scale = getTestScale(sizeof(cl_double));
-
- test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
- if (test_info.step / test_info.subBufferSize != test_info.scale)
- {
- // there was overflow
- test_info.jobCount = 1;
- }
- else
- {
- test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
- }
-
- test_info.f = f;
- test_info.ftz = f->ftz || gForceFTZ;
-
- // cl_kernels aren't thread safe, so we make one for each vector size for
- // every thread
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- size_t array_size = test_info.threadCount * sizeof(cl_kernel);
- test_info.k[i] = (cl_kernel *)malloc(array_size);
- if (NULL == test_info.k[i])
- {
- vlog_error("Error: Unable to allocate storage for kernels!\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.k[i], 0, array_size);
- }
- test_info.tinfo =
- (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
- if (NULL == test_info.tinfo)
- {
- vlog_error(
- "Error: Unable to allocate storage for thread specific data.\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.tinfo, 0,
- test_info.threadCount * sizeof(*test_info.tinfo));
- for (size_t i = 0; i < test_info.threadCount; i++)
- {
- cl_buffer_region region = {
- i * test_info.subBufferSize * sizeof(cl_double),
- test_info.subBufferSize * sizeof(cl_double)
- };
- test_info.tinfo[i].inBuf =
- clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
- test_info.tinfo[i].inBuf2 =
- clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf2)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
-
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- {
- test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
- gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
- &region, &error);
- if (error || NULL == test_info.tinfo[i].outBuf[j])
- {
- vlog_error("Error: Unable to create sub-buffer of "
- "gOutBuffer[%d] for region {%zd, %zd}\n",
- (int)j, region.origin, region.size);
- goto exit;
- }
- }
- test_info.tinfo[i].tQueue =
- clCreateCommandQueue(gContext, gDevice, 0, &error);
- if (NULL == test_info.tinfo[i].tQueue || error)
- {
- vlog_error("clCreateCommandQueue failed. (%d)\n", error);
- goto exit;
- }
-
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
- }
-
- // Init the kernels
- {
- BuildKernelInfo build_info = {
- gMinVectorSizeIndex, test_info.threadCount, test_info.k,
- test_info.programs, f->nameInCode, relaxedMode
- };
- if ((error = ThreadPool_Do(BuildKernelFn,
- gMaxVectorSizeIndex - gMinVectorSizeIndex,
- &build_info)))
- goto exit;
- }
-
- // Run the kernels
- if (!gSkipCorrectnessTesting)
- {
- error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
- if (error) goto exit;
-
- if (gWimpyMode)
- vlog("Wimp pass");
- else
- vlog("passed");
- }
-
- vlog("\n");
-
-exit:
- // Release
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- clReleaseProgram(test_info.programs[i]);
- if (test_info.k[i])
- {
- for (cl_uint j = 0; j < test_info.threadCount; j++)
- clReleaseKernel(test_info.k[i][j]);
-
- free(test_info.k[i]);
- }
- }
- if (test_info.tinfo)
- {
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- free_mtdata(test_info.tinfo[i].d);
- clReleaseMemObject(test_info.tinfo[i].inBuf);
- clReleaseMemObject(test_info.tinfo[i].inBuf2);
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
- clReleaseCommandQueue(test_info.tinfo[i].tQueue);
- }
-
- free(test_info.tinfo);
- }
-
- return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
- const TestInfo *job = (const TestInfo *)data;
+ TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_double);
cl_uint base = job_id * (cl_uint)job->step;
- ThreadInfo *tinfo = job->tinfo + thread_id;
+ ThreadInfo *tinfo = &(job->tinfo[thread_id]);
dptr dfunc = job->f->dfunc;
int ftz = job->ftz;
+ bool relaxedMode = job->relaxedMode;
MTdata d = tinfo->d;
cl_int error;
const char *name = job->f->name;
@@ -538,7 +380,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
out[j], 0, NULL, NULL)))
{
- vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+ vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+ error);
goto exit;
}
@@ -613,7 +456,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (gMinVectorSizeIndex == 0 && t[j] != q[j])
{
// If we aren't getting the correctly rounded result
- if (ftz)
+ if (ftz || relaxedMode)
{
if (IsDoubleSubnormal(s[j]))
{
@@ -645,8 +488,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
cl_ulong err = t[j] - q[j];
if (q[j] > t[j]) err = q[j] - t[j];
- vlog_error("\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld "
- "vs. %lld (index: %d)\n",
+ vlog_error("\nERROR: %s: %" PRId64
+ " ulp error at {%.13la, %.13la}: *%" PRId64 " "
+ "vs. %" PRId64 " (index: %zu)\n",
name, err, ((double *)s)[j], ((double *)s2)[j], t[j],
q[j], j);
error = -1;
@@ -654,13 +498,14 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
}
- for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
+ for (auto k = std::max(1U, gMinVectorSizeIndex);
+ k < gMaxVectorSizeIndex; k++)
{
q = (cl_long *)out[k];
// If we aren't getting the correctly rounded result
if (-t[j] != q[j])
{
- if (ftz)
+ if (ftz || relaxedMode)
{
if (IsDoubleSubnormal(s[j]))
{
@@ -692,8 +537,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
cl_ulong err = -t[j] - q[j];
if (q[j] > -t[j]) err = q[j] + t[j];
- vlog_error("\nERROR: %sD%s: %lld ulp error at {%.13la, "
- "%.13la}: *%lld vs. %lld (index: %d)\n",
+ vlog_error("\nERROR: %sD%s: %" PRId64 " ulp error at {%.13la, "
+ "%.13la}: *%" PRId64 " vs. %" PRId64
+ " (index: %zu)\n",
name, sizeNames[k], err, ((double *)s)[j],
((double *)s2)[j], -t[j], q[j], j);
error = -1;
@@ -735,3 +581,131 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
exit:
return error;
}
+
+} // anonymous namespace
+
+int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
+{
+ TestInfo test_info{};
+ cl_int error;
+
+ logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
+
+ // Init test_info
+ test_info.threadCount = GetThreadCount();
+ test_info.subBufferSize = BUFFER_SIZE
+ / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+ test_info.scale = getTestScale(sizeof(cl_double));
+
+ test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+ if (test_info.step / test_info.subBufferSize != test_info.scale)
+ {
+ // there was overflow
+ test_info.jobCount = 1;
+ }
+ else
+ {
+ test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+ }
+
+ test_info.f = f;
+ test_info.ftz = f->ftz || gForceFTZ;
+ test_info.relaxedMode = relaxedMode;
+
+ // cl_kernels aren't thread safe, so we make one for each vector size for
+ // every thread
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ test_info.k[i].resize(test_info.threadCount, nullptr);
+ }
+
+ test_info.tinfo.resize(test_info.threadCount);
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ cl_buffer_region region = {
+ i * test_info.subBufferSize * sizeof(cl_double),
+ test_info.subBufferSize * sizeof(cl_double)
+ };
+ test_info.tinfo[i].inBuf =
+ clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+ test_info.tinfo[i].inBuf2 =
+ clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf2)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+
+ for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+ {
+ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+ gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+ &region, &error);
+ if (error || NULL == test_info.tinfo[i].outBuf[j])
+ {
+ vlog_error("Error: Unable to create sub-buffer of "
+ "gOutBuffer[%d] for region {%zd, %zd}\n",
+ (int)j, region.origin, region.size);
+ goto exit;
+ }
+ }
+ test_info.tinfo[i].tQueue =
+ clCreateCommandQueue(gContext, gDevice, 0, &error);
+ if (NULL == test_info.tinfo[i].tQueue || error)
+ {
+ vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+ goto exit;
+ }
+
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
+ }
+
+ // Init the kernels
+ {
+ BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+ test_info.programs, f->nameInCode,
+ relaxedMode };
+ if ((error = ThreadPool_Do(BuildKernelFn,
+ gMaxVectorSizeIndex - gMinVectorSizeIndex,
+ &build_info)))
+ goto exit;
+ }
+
+ // Run the kernels
+ if (!gSkipCorrectnessTesting)
+ {
+ error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+ if (error) goto exit;
+
+ if (gWimpyMode)
+ vlog("Wimp pass");
+ else
+ vlog("passed");
+ }
+
+ vlog("\n");
+
+exit:
+ // Release
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ for (auto &kernel : test_info.k[i])
+ {
+ clReleaseKernel(kernel);
+ }
+ }
+
+ return error;
+}
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index c530cdaf..cb915fc7 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -14,14 +14,17 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
- cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+ cl_kernel *k, cl_program *p, bool relaxedMode)
{
const char *c[] = { "__kernel void math_kernel",
sizeNames[vectorSize],
@@ -105,54 +108,55 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
relaxedMode);
}
-typedef struct BuildKernelInfo
-{
- cl_uint offset; // the first vector size to build
- cl_uint kernel_count;
- cl_kernel **kernels;
- cl_program *programs;
- const char *nameInCode;
- bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernel_count,
- info->kernels[i], info->programs + i, info->relaxedMode);
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+ info->kernels[vectorSize].data(),
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
{
- cl_mem inBuf; // input buffer for the thread
- cl_mem inBuf2; // input buffer for the thread
- cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
- MTdata d;
- cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-
-typedef struct TestInfo
+ // Input and output buffers for the thread
+ clMemWrapper inBuf;
+ clMemWrapper inBuf2;
+ Buffers outBuf;
+
+ MTdataHolder d;
+
+ // Per thread command queue to improve performance
+ clCommandQueueWrapper tQueue;
+};
+
+struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
- cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
- cl_kernel
- *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
- // worker thread: k[vector_size][thread_id]
- ThreadInfo *
- tinfo; // An array of thread specific information for each worker thread
+
+ // Programs for various vector sizes.
+ Programs programs;
+
+ // Thread-specific kernels for each vector size:
+ // k[vector_size][thread_id]
+ KernelMatrix k;
+
+ // Array of thread specific information
+ std::vector<ThreadInfo> tinfo;
+
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values
int ftz; // non-zero if running in flush to zero mode
-
-} TestInfo;
+ bool relaxedMode; // True if test is running in relaxed mode, false
+ // otherwise.
+};
// A table of more difficult cases to get right
-static const float specialValues[] = {
+const float specialValues[] = {
-NAN,
-INFINITY,
-FLT_MAX,
@@ -254,183 +258,19 @@ static const float specialValues[] = {
+0.0f,
};
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
sizeof(specialValues) / sizeof(specialValues[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
- TestInfo test_info;
- cl_int error;
-
- logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
- // Init test_info
- memset(&test_info, 0, sizeof(test_info));
- test_info.threadCount = GetThreadCount();
- test_info.subBufferSize = BUFFER_SIZE
- / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
- test_info.scale = getTestScale(sizeof(cl_float));
-
- test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
- if (test_info.step / test_info.subBufferSize != test_info.scale)
- {
- // there was overflow
- test_info.jobCount = 1;
- }
- else
- {
- test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
- }
-
- test_info.f = f;
- test_info.ftz =
- f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-
- // cl_kernels aren't thread safe, so we make one for each vector size for
- // every thread
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- size_t array_size = test_info.threadCount * sizeof(cl_kernel);
- test_info.k[i] = (cl_kernel *)malloc(array_size);
- if (NULL == test_info.k[i])
- {
- vlog_error("Error: Unable to allocate storage for kernels!\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.k[i], 0, array_size);
- }
- test_info.tinfo =
- (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
- if (NULL == test_info.tinfo)
- {
- vlog_error(
- "Error: Unable to allocate storage for thread specific data.\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.tinfo, 0,
- test_info.threadCount * sizeof(*test_info.tinfo));
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- cl_buffer_region region = {
- i * test_info.subBufferSize * sizeof(cl_float),
- test_info.subBufferSize * sizeof(cl_float)
- };
- test_info.tinfo[i].inBuf =
- clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
- test_info.tinfo[i].inBuf2 =
- clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf2)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
-
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- {
- test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
- gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
- &region, &error);
- if (error || NULL == test_info.tinfo[i].outBuf[j])
- {
- vlog_error("Error: Unable to create sub-buffer of "
- "gOutBuffer[%d] for region {%zd, %zd}\n",
- (int)j, region.origin, region.size);
- goto exit;
- }
- }
- test_info.tinfo[i].tQueue =
- clCreateCommandQueue(gContext, gDevice, 0, &error);
- if (NULL == test_info.tinfo[i].tQueue || error)
- {
- vlog_error("clCreateCommandQueue failed. (%d)\n", error);
- goto exit;
- }
-
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
- }
-
- // Init the kernels
- {
- BuildKernelInfo build_info = {
- gMinVectorSizeIndex, test_info.threadCount, test_info.k,
- test_info.programs, f->nameInCode, relaxedMode
- };
- if ((error = ThreadPool_Do(BuildKernelFn,
- gMaxVectorSizeIndex - gMinVectorSizeIndex,
- &build_info)))
- goto exit;
- }
-
- // Run the kernels
- if (!gSkipCorrectnessTesting)
- {
- error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
- if (error) goto exit;
-
- if (gWimpyMode)
- vlog("Wimp pass");
- else
- vlog("passed");
- }
-
- vlog("\n");
-
-exit:
- // Release
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- clReleaseProgram(test_info.programs[i]);
- if (test_info.k[i])
- {
- for (cl_uint j = 0; j < test_info.threadCount; j++)
- clReleaseKernel(test_info.k[i][j]);
-
- free(test_info.k[i]);
- }
- }
- if (test_info.tinfo)
- {
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- free_mtdata(test_info.tinfo[i].d);
- clReleaseMemObject(test_info.tinfo[i].inBuf);
- clReleaseMemObject(test_info.tinfo[i].inBuf2);
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
- clReleaseCommandQueue(test_info.tinfo[i].tQueue);
- }
-
- free(test_info.tinfo);
- }
-
- return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
- const TestInfo *job = (const TestInfo *)data;
+ TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_float);
cl_uint base = job_id * (cl_uint)job->step;
- ThreadInfo *tinfo = job->tinfo + thread_id;
+ ThreadInfo *tinfo = &(job->tinfo[thread_id]);
fptr func = job->f->func;
int ftz = job->ftz;
+ bool relaxedMode = job->relaxedMode;
MTdata d = tinfo->d;
cl_int error;
const char *name = job->f->name;
@@ -531,7 +371,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
out[j], 0, NULL, NULL)))
{
- vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+ vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+ error);
goto exit;
}
@@ -604,7 +445,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (gMinVectorSizeIndex == 0 && t[j] != q[j])
{
- if (ftz)
+ if (ftz || relaxedMode)
{
if (IsFloatSubnormal(s[j]))
{
@@ -637,20 +478,21 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
uint32_t err = t[j] - q[j];
if (q[j] > t[j]) err = q[j] - t[j];
vlog_error("\nERROR: %s: %d ulp error at {%a, %a}: *0x%8.8x vs. "
- "0x%8.8x (index: %d)\n",
+ "0x%8.8x (index: %zu)\n",
name, err, ((float *)s)[j], ((float *)s2)[j], t[j], q[j],
j);
error = -1;
goto exit;
}
- for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
+ for (auto k = std::max(1U, gMinVectorSizeIndex);
+ k < gMaxVectorSizeIndex; k++)
{
q = out[k];
// If we aren't getting the correctly rounded result
if (-t[j] != q[j])
{
- if (ftz)
+ if (ftz || relaxedMode)
{
if (IsFloatSubnormal(s[j]))
{
@@ -682,7 +524,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
cl_uint err = -t[j] - q[j];
if (q[j] > -t[j]) err = q[j] + t[j];
vlog_error("\nERROR: %s%s: %d ulp error at {%a, %a}: *0x%8.8x "
- "vs. 0x%8.8x (index: %d)\n",
+ "vs. 0x%8.8x (index: %zu)\n",
name, sizeNames[k], err, ((float *)s)[j],
((float *)s2)[j], -t[j], q[j], j);
error = -1;
@@ -724,3 +566,132 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
exit:
return error;
}
+
+} // anonymous namespace
+
+int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
+{
+ TestInfo test_info{};
+ cl_int error;
+
+ logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
+
+ // Init test_info
+ test_info.threadCount = GetThreadCount();
+ test_info.subBufferSize = BUFFER_SIZE
+ / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+ test_info.scale = getTestScale(sizeof(cl_float));
+
+ test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+ if (test_info.step / test_info.subBufferSize != test_info.scale)
+ {
+ // there was overflow
+ test_info.jobCount = 1;
+ }
+ else
+ {
+ test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+ }
+
+ test_info.f = f;
+ test_info.ftz =
+ f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+ test_info.relaxedMode = relaxedMode;
+
+ // cl_kernels aren't thread safe, so we make one for each vector size for
+ // every thread
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ test_info.k[i].resize(test_info.threadCount, nullptr);
+ }
+
+ test_info.tinfo.resize(test_info.threadCount);
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ cl_buffer_region region = {
+ i * test_info.subBufferSize * sizeof(cl_float),
+ test_info.subBufferSize * sizeof(cl_float)
+ };
+ test_info.tinfo[i].inBuf =
+ clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+ test_info.tinfo[i].inBuf2 =
+ clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf2)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+
+ for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+ {
+ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+ gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+ &region, &error);
+ if (error || NULL == test_info.tinfo[i].outBuf[j])
+ {
+ vlog_error("Error: Unable to create sub-buffer of "
+ "gOutBuffer[%d] for region {%zd, %zd}\n",
+ (int)j, region.origin, region.size);
+ goto exit;
+ }
+ }
+ test_info.tinfo[i].tQueue =
+ clCreateCommandQueue(gContext, gDevice, 0, &error);
+ if (NULL == test_info.tinfo[i].tQueue || error)
+ {
+ vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+ goto exit;
+ }
+
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
+ }
+
+ // Init the kernels
+ {
+ BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+ test_info.programs, f->nameInCode,
+ relaxedMode };
+ if ((error = ThreadPool_Do(BuildKernelFn,
+ gMaxVectorSizeIndex - gMinVectorSizeIndex,
+ &build_info)))
+ goto exit;
+ }
+
+ // Run the kernels
+ if (!gSkipCorrectnessTesting)
+ {
+ error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+ if (error) goto exit;
+
+ if (gWimpyMode)
+ vlog("Wimp pass");
+ else
+ vlog("passed");
+ }
+
+ vlog("\n");
+
+exit:
+ // Release
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ for (auto &kernel : test_info.k[i])
+ {
+ clReleaseKernel(kernel);
+ }
+ }
+
+ return error;
+}
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index 00e65a2c..c2e7cdcc 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -14,14 +14,18 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
- cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+ cl_kernel *k, cl_program *p, bool relaxedMode)
{
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void math_kernel",
@@ -101,210 +105,61 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
relaxedMode);
}
-typedef struct BuildKernelInfo
-{
- cl_uint offset; // the first vector size to build
- cl_uint kernel_count;
- cl_kernel **kernels;
- cl_program *programs;
- const char *nameInCode;
- bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernel_count,
- info->kernels[i], info->programs + i, info->relaxedMode);
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+ info->kernels[vectorSize].data(),
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
{
- cl_mem inBuf; // input buffer for the thread
- cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
- cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+ // Input and output buffers for the thread
+ clMemWrapper inBuf;
+ Buffers outBuf;
-typedef struct TestInfo
+ // Per thread command queue to improve performance
+ clCommandQueueWrapper tQueue;
+};
+
+struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
- cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
- cl_kernel
- *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
- // worker thread: k[vector_size][thread_id]
- ThreadInfo *
- tinfo; // An array of thread specific information for each worker thread
+
+ // Programs for various vector sizes.
+ Programs programs;
+
+ // Thread-specific kernels for each vector size:
+ // k[vector_size][thread_id]
+ KernelMatrix k;
+
+ // Array of thread specific information
+ std::vector<ThreadInfo> tinfo;
+
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values
int ftz; // non-zero if running in flush to zero mode
+ bool relaxedMode; // True if test is running in relaxed mode, false
+ // otherwise.
+};
-} TestInfo;
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
-{
- TestInfo test_info;
- cl_int error;
-
- logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
- // Init test_info
- memset(&test_info, 0, sizeof(test_info));
- test_info.threadCount = GetThreadCount();
- test_info.subBufferSize = BUFFER_SIZE
- / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
- test_info.scale = getTestScale(sizeof(cl_double));
-
- test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
- if (test_info.step / test_info.subBufferSize != test_info.scale)
- {
- // there was overflow
- test_info.jobCount = 1;
- }
- else
- {
- test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
- }
-
- test_info.f = f;
- test_info.ftz = f->ftz || gForceFTZ;
-
- // cl_kernels aren't thread safe, so we make one for each vector size for
- // every thread
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- size_t array_size = test_info.threadCount * sizeof(cl_kernel);
- test_info.k[i] = (cl_kernel *)malloc(array_size);
- if (NULL == test_info.k[i])
- {
- vlog_error("Error: Unable to allocate storage for kernels!\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.k[i], 0, array_size);
- }
- test_info.tinfo =
- (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
- if (NULL == test_info.tinfo)
- {
- vlog_error(
- "Error: Unable to allocate storage for thread specific data.\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.tinfo, 0,
- test_info.threadCount * sizeof(*test_info.tinfo));
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- cl_buffer_region region = {
- i * test_info.subBufferSize * sizeof(cl_double),
- test_info.subBufferSize * sizeof(cl_double)
- };
- test_info.tinfo[i].inBuf =
- clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
-
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- {
- test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
- gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
- &region, &error);
- if (error || NULL == test_info.tinfo[i].outBuf[j])
- {
- vlog_error("Error: Unable to create sub-buffer of "
- "gOutBuffer[%d] for region {%zd, %zd}\n",
- (int)j, region.origin, region.size);
- goto exit;
- }
- }
- test_info.tinfo[i].tQueue =
- clCreateCommandQueue(gContext, gDevice, 0, &error);
- if (NULL == test_info.tinfo[i].tQueue || error)
- {
- vlog_error("clCreateCommandQueue failed. (%d)\n", error);
- goto exit;
- }
- }
-
- // Init the kernels
- {
- BuildKernelInfo build_info = {
- gMinVectorSizeIndex, test_info.threadCount, test_info.k,
- test_info.programs, f->nameInCode, relaxedMode
- };
- if ((error = ThreadPool_Do(BuildKernelFn,
- gMaxVectorSizeIndex - gMinVectorSizeIndex,
- &build_info)))
- goto exit;
- }
-
- // Run the kernels
- if (!gSkipCorrectnessTesting)
- {
- error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
- if (error) goto exit;
-
- if (gWimpyMode)
- vlog("Wimp pass");
- else
- vlog("passed");
- }
-
- vlog("\n");
-
-exit:
- // Release
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- clReleaseProgram(test_info.programs[i]);
- if (test_info.k[i])
- {
- for (cl_uint j = 0; j < test_info.threadCount; j++)
- clReleaseKernel(test_info.k[i][j]);
-
- free(test_info.k[i]);
- }
- }
- if (test_info.tinfo)
- {
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- clReleaseMemObject(test_info.tinfo[i].inBuf);
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
- clReleaseCommandQueue(test_info.tinfo[i].tQueue);
- }
-
- free(test_info.tinfo);
- }
-
- return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
- const TestInfo *job = (const TestInfo *)data;
+ TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_double);
cl_uint scale = job->scale;
cl_uint base = job_id * (cl_uint)job->step;
- ThreadInfo *tinfo = job->tinfo + thread_id;
+ ThreadInfo *tinfo = &(job->tinfo[thread_id]);
dptr dfunc = job->f->dfunc;
int ftz = job->ftz;
+ bool relaxedMode = job->relaxedMode;
cl_int error;
const char *name = job->f->name;
@@ -362,7 +217,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
out[j], 0, NULL, NULL)))
{
- vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+ vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+ error);
return error;
}
@@ -430,7 +286,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (gMinVectorSizeIndex == 0 && t[j] != q[j])
{
// If we aren't getting the correctly rounded result
- if (ftz)
+ if (ftz || relaxedMode)
{
if (IsDoubleSubnormal(s[j]))
{
@@ -442,19 +298,21 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
cl_ulong err = t[j] - q[j];
if (q[j] > t[j]) err = q[j] - t[j];
- vlog_error("\nERROR: %sD: %zd ulp error at %.13la: *%zd vs. %zd\n",
+ vlog_error("\nERROR: %sD: %" PRId64
+ " ulp error at %.13la: *%" PRId64 " vs. %" PRId64 "\n",
name, err, ((double *)gIn)[j], t[j], q[j]);
return -1;
}
- for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
+ for (auto k = std::max(1U, gMinVectorSizeIndex);
+ k < gMaxVectorSizeIndex; k++)
{
q = out[k];
// If we aren't getting the correctly rounded result
if (-t[j] != q[j])
{
- if (ftz)
+ if (ftz || relaxedMode)
{
if (IsDoubleSubnormal(s[j]))
{
@@ -467,7 +325,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
cl_ulong err = -t[j] - q[j];
if (q[j] > -t[j]) err = q[j] + t[j];
vlog_error(
- "\nERROR: %sD%s: %zd ulp error at %.13la: *%zd vs. %zd\n",
+ "\nERROR: %sD%s: %" PRId64 " ulp error at %.13la: *%" PRId64
+ " vs. %" PRId64 "\n",
name, sizeNames[k], err, ((double *)gIn)[j], -t[j], q[j]);
return -1;
}
@@ -506,3 +365,119 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
return CL_SUCCESS;
}
+
+} // anonymous namespace
+
+int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
+{
+ TestInfo test_info{};
+ cl_int error;
+
+ logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
+
+ // Init test_info
+ test_info.threadCount = GetThreadCount();
+ test_info.subBufferSize = BUFFER_SIZE
+ / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+ test_info.scale = getTestScale(sizeof(cl_double));
+
+ test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+ if (test_info.step / test_info.subBufferSize != test_info.scale)
+ {
+ // there was overflow
+ test_info.jobCount = 1;
+ }
+ else
+ {
+ test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+ }
+
+ test_info.f = f;
+ test_info.ftz = f->ftz || gForceFTZ;
+ test_info.relaxedMode = relaxedMode;
+
+ // cl_kernels aren't thread safe, so we make one for each vector size for
+ // every thread
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ test_info.k[i].resize(test_info.threadCount, nullptr);
+ }
+
+ test_info.tinfo.resize(test_info.threadCount);
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ cl_buffer_region region = {
+ i * test_info.subBufferSize * sizeof(cl_double),
+ test_info.subBufferSize * sizeof(cl_double)
+ };
+ test_info.tinfo[i].inBuf =
+ clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+
+ for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+ {
+ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+ gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+ &region, &error);
+ if (error || NULL == test_info.tinfo[i].outBuf[j])
+ {
+ vlog_error("Error: Unable to create sub-buffer of "
+ "gOutBuffer[%d] for region {%zd, %zd}\n",
+ (int)j, region.origin, region.size);
+ goto exit;
+ }
+ }
+ test_info.tinfo[i].tQueue =
+ clCreateCommandQueue(gContext, gDevice, 0, &error);
+ if (NULL == test_info.tinfo[i].tQueue || error)
+ {
+ vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+ goto exit;
+ }
+ }
+
+ // Init the kernels
+ {
+ BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+ test_info.programs, f->nameInCode,
+ relaxedMode };
+ if ((error = ThreadPool_Do(BuildKernelFn,
+ gMaxVectorSizeIndex - gMinVectorSizeIndex,
+ &build_info)))
+ goto exit;
+ }
+
+ // Run the kernels
+ if (!gSkipCorrectnessTesting)
+ {
+ error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+ if (error) goto exit;
+
+ if (gWimpyMode)
+ vlog("Wimp pass");
+ else
+ vlog("passed");
+ }
+
+ vlog("\n");
+
+exit:
+ // Release
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ for (auto &kernel : test_info.k[i])
+ {
+ clReleaseKernel(kernel);
+ }
+ }
+
+ return error;
+}
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index 3c1717ac..6a1b9b9a 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -14,14 +14,17 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
- cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+ cl_kernel *k, cl_program *p, bool relaxedMode)
{
const char *c[] = { "__kernel void math_kernel",
sizeNames[vectorSize],
@@ -100,211 +103,61 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
relaxedMode);
}
-typedef struct BuildKernelInfo
-{
- cl_uint offset; // the first vector size to build
- cl_uint kernel_count;
- cl_kernel **kernels;
- cl_program *programs;
- const char *nameInCode;
- bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernel_count,
- info->kernels[i], info->programs + i, info->relaxedMode);
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+ info->kernels[vectorSize].data(),
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
{
- cl_mem inBuf; // input buffer for the thread
- cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
- cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+ // Input and output buffers for the thread
+ clMemWrapper inBuf;
+ Buffers outBuf;
-typedef struct TestInfo
+ // Per thread command queue to improve performance
+ clCommandQueueWrapper tQueue;
+};
+
+struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
- cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
- cl_kernel
- *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
- // worker thread: k[vector_size][thread_id]
- ThreadInfo *
- tinfo; // An array of thread specific information for each worker thread
+
+ // Programs for various vector sizes.
+ Programs programs;
+
+ // Thread-specific kernels for each vector size:
+ // k[vector_size][thread_id]
+ KernelMatrix k;
+
+ // Array of thread specific information
+ std::vector<ThreadInfo> tinfo;
+
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values
int ftz; // non-zero if running in flush to zero mode
+ bool relaxedMode; // True if test is running in relaxed mode, false
+ // otherwise.
+};
-} TestInfo;
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
-{
- TestInfo test_info;
- cl_int error;
-
- logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
- // Init test_info
- memset(&test_info, 0, sizeof(test_info));
- test_info.threadCount = GetThreadCount();
- test_info.subBufferSize = BUFFER_SIZE
- / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
- test_info.scale = getTestScale(sizeof(cl_float));
-
- test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
- if (test_info.step / test_info.subBufferSize != test_info.scale)
- {
- // there was overflow
- test_info.jobCount = 1;
- }
- else
- {
- test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
- }
-
- test_info.f = f;
- test_info.ftz =
- f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-
- // cl_kernels aren't thread safe, so we make one for each vector size for
- // every thread
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- size_t array_size = test_info.threadCount * sizeof(cl_kernel);
- test_info.k[i] = (cl_kernel *)malloc(array_size);
- if (NULL == test_info.k[i])
- {
- vlog_error("Error: Unable to allocate storage for kernels!\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.k[i], 0, array_size);
- }
- test_info.tinfo =
- (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
- if (NULL == test_info.tinfo)
- {
- vlog_error(
- "Error: Unable to allocate storage for thread specific data.\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.tinfo, 0,
- test_info.threadCount * sizeof(*test_info.tinfo));
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- cl_buffer_region region = {
- i * test_info.subBufferSize * sizeof(cl_float),
- test_info.subBufferSize * sizeof(cl_float)
- };
- test_info.tinfo[i].inBuf =
- clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
-
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- {
- test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
- gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
- &region, &error);
- if (error || NULL == test_info.tinfo[i].outBuf[j])
- {
- vlog_error("Error: Unable to create sub-buffer of "
- "gOutBuffer[%d] for region {%zd, %zd}\n",
- (int)j, region.origin, region.size);
- goto exit;
- }
- }
- test_info.tinfo[i].tQueue =
- clCreateCommandQueue(gContext, gDevice, 0, &error);
- if (NULL == test_info.tinfo[i].tQueue || error)
- {
- vlog_error("clCreateCommandQueue failed. (%d)\n", error);
- goto exit;
- }
- }
-
- // Init the kernels
- {
- BuildKernelInfo build_info = {
- gMinVectorSizeIndex, test_info.threadCount, test_info.k,
- test_info.programs, f->nameInCode, relaxedMode
- };
- if ((error = ThreadPool_Do(BuildKernelFn,
- gMaxVectorSizeIndex - gMinVectorSizeIndex,
- &build_info)))
- goto exit;
- }
-
- // Run the kernels
- if (!gSkipCorrectnessTesting)
- {
- error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
- if (error) goto exit;
-
- if (gWimpyMode)
- vlog("Wimp pass");
- else
- vlog("passed");
- }
-
- vlog("\n");
-
-exit:
- // Release
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- clReleaseProgram(test_info.programs[i]);
- if (test_info.k[i])
- {
- for (cl_uint j = 0; j < test_info.threadCount; j++)
- clReleaseKernel(test_info.k[i][j]);
-
- free(test_info.k[i]);
- }
- }
- if (test_info.tinfo)
- {
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- clReleaseMemObject(test_info.tinfo[i].inBuf);
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
- clReleaseCommandQueue(test_info.tinfo[i].tQueue);
- }
-
- free(test_info.tinfo);
- }
-
- return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
- const TestInfo *job = (const TestInfo *)data;
+ TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_float);
cl_uint scale = job->scale;
cl_uint base = job_id * (cl_uint)job->step;
- ThreadInfo *tinfo = job->tinfo + thread_id;
+ ThreadInfo *tinfo = &(job->tinfo[thread_id]);
fptr func = job->f->func;
int ftz = job->ftz;
+ bool relaxedMode = job->relaxedMode;
cl_int error = CL_SUCCESS;
cl_int ret = CL_SUCCESS;
const char *name = job->f->name;
@@ -365,7 +218,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
out[j], 0, NULL, NULL)))
{
- vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+ vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+ error);
return error;
}
@@ -435,7 +289,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (gMinVectorSizeIndex == 0 && t[j] != q[j])
{
// If we aren't getting the correctly rounded result
- if (ftz)
+ if (ftz || relaxedMode)
{
if (IsFloatSubnormal(s[j]))
{
@@ -454,14 +308,14 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
}
- for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex;
- k++)
+ for (auto k = std::max(1U, gMinVectorSizeIndex);
+ k < gMaxVectorSizeIndex; k++)
{
q = out[k];
// If we aren't getting the correctly rounded result
if (-t[j] != q[j])
{
- if (ftz)
+ if (ftz || relaxedMode)
{
if (IsFloatSubnormal(s[j]))
{
@@ -521,3 +375,120 @@ exit:
return ret;
}
+
+} // anonymous namespace
+
+int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
+{
+ TestInfo test_info{};
+ cl_int error;
+
+ logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
+
+ // Init test_info
+ test_info.threadCount = GetThreadCount();
+ test_info.subBufferSize = BUFFER_SIZE
+ / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+ test_info.scale = getTestScale(sizeof(cl_float));
+
+ test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+ if (test_info.step / test_info.subBufferSize != test_info.scale)
+ {
+ // there was overflow
+ test_info.jobCount = 1;
+ }
+ else
+ {
+ test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+ }
+
+ test_info.f = f;
+ test_info.ftz =
+ f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+ test_info.relaxedMode = relaxedMode;
+
+ // cl_kernels aren't thread safe, so we make one for each vector size for
+ // every thread
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ test_info.k[i].resize(test_info.threadCount, nullptr);
+ }
+
+ test_info.tinfo.resize(test_info.threadCount);
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ cl_buffer_region region = {
+ i * test_info.subBufferSize * sizeof(cl_float),
+ test_info.subBufferSize * sizeof(cl_float)
+ };
+ test_info.tinfo[i].inBuf =
+ clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+
+ for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+ {
+ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+ gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+ &region, &error);
+ if (error || NULL == test_info.tinfo[i].outBuf[j])
+ {
+ vlog_error("Error: Unable to create sub-buffer of "
+ "gOutBuffer[%d] for region {%zd, %zd}\n",
+ (int)j, region.origin, region.size);
+ goto exit;
+ }
+ }
+ test_info.tinfo[i].tQueue =
+ clCreateCommandQueue(gContext, gDevice, 0, &error);
+ if (NULL == test_info.tinfo[i].tQueue || error)
+ {
+ vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+ goto exit;
+ }
+ }
+
+ // Init the kernels
+ {
+ BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+ test_info.programs, f->nameInCode,
+ relaxedMode };
+ if ((error = ThreadPool_Do(BuildKernelFn,
+ gMaxVectorSizeIndex - gMinVectorSizeIndex,
+ &build_info)))
+ goto exit;
+ }
+
+ // Run the kernels
+ if (!gSkipCorrectnessTesting)
+ {
+ error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+ if (error) goto exit;
+
+ if (gWimpyMode)
+ vlog("Wimp pass");
+ else
+ vlog("passed");
+ }
+
+ vlog("\n");
+
+exit:
+ // Release
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ for (auto &kernel : test_info.k[i])
+ {
+ clReleaseKernel(kernel);
+ }
+ }
+
+ return error;
+}
diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp
index a32cd5a8..8d8fec52 100644
--- a/test_conformance/math_brute_force/mad_double.cpp
+++ b/test_conformance/math_brute_force/mad_double.cpp
@@ -14,126 +14,49 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
- cl_program *p, bool relaxedMode)
-{
- const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
- "__kernel void math_kernel",
- sizeNames[vectorSize],
- "( __global double",
- sizeNames[vectorSize],
- "* out, __global double",
- sizeNames[vectorSize],
- "* in1, __global double",
- sizeNames[vectorSize],
- "* in2, __global double",
- sizeNames[vectorSize],
- "* in3 )\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " out[i] = ",
- name,
- "( in1[i], in2[i], in3[i] );\n"
- "}\n" };
-
- const char *c3[] = {
- "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
- "__kernel void math_kernel",
- sizeNames[vectorSize],
- "( __global double* out, __global double* in, __global double* in2, "
- "__global double* in3)\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " if( i + 1 < get_global_size(0) )\n"
- " {\n"
- " double3 d0 = vload3( 0, in + 3 * i );\n"
- " double3 d1 = vload3( 0, in2 + 3 * i );\n"
- " double3 d2 = vload3( 0, in3 + 3 * i );\n"
- " d0 = ",
- name,
- "( d0, d1, d2 );\n"
- " vstore3( d0, 0, out + 3*i );\n"
- " }\n"
- " else\n"
- " {\n"
- " size_t parity = i & 1; // Figure out how many elements are "
- "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
- "buffer size \n"
- " double3 d0;\n"
- " double3 d1;\n"
- " double3 d2;\n"
- " switch( parity )\n"
- " {\n"
- " case 1:\n"
- " d0 = (double3)( in[3*i], NAN, NAN ); \n"
- " d1 = (double3)( in2[3*i], NAN, NAN ); \n"
- " d2 = (double3)( in3[3*i], NAN, NAN ); \n"
- " break;\n"
- " case 0:\n"
- " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
- " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
- " d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
- " break;\n"
- " }\n"
- " d0 = ",
- name,
- "( d0, d1, d2 );\n"
- " switch( parity )\n"
- " {\n"
- " case 0:\n"
- " out[3*i+1] = d0.y; \n"
- " // fall through\n"
- " case 1:\n"
- " out[3*i] = d0.x; \n"
- " break;\n"
- " }\n"
- " }\n"
- "}\n"
- };
-
- const char **kern = c;
- size_t kernSize = sizeof(c) / sizeof(c[0]);
+namespace {
- if (sizeValues[vectorSize] == 3)
- {
- kern = c3;
- kernSize = sizeof(c3) / sizeof(c3[0]);
- }
-
- char testName[32];
- snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
- sizeNames[vectorSize]);
-
- return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+ bool relaxedMode)
+{
+ auto kernel_name = GetKernelName(vectorSize);
+ auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double,
+ ParameterType::Double, ParameterType::Double,
+ ParameterType::Double, vectorSize);
+ std::array<const char *, 1> sources{ source.c_str() };
+ return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
+ relaxedMode);
}
-typedef struct BuildKernelInfo
+struct BuildKernelInfo2
{
- cl_uint offset; // the first vector size to build
cl_kernel *kernels;
- cl_program *programs;
+ Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
- BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernels + i,
- info->programs + i, info->relaxedMode);
+ BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+ &(info->programs[vectorSize]), info->relaxedMode);
}
+} // anonymous namespace
+
int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
{
int error;
- cl_program programs[VECTOR_SIZE_COUNT];
+ Programs programs;
cl_kernel kernels[VECTOR_SIZE_COUNT];
float maxError = 0.0f;
double maxErrorVal = 0.0f;
@@ -145,8 +68,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
// Init the kernels
{
- BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
- f->nameInCode, relaxedMode };
+ BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+ relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -294,7 +217,6 @@ exit:
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
{
clReleaseKernel(kernels[k]);
- clReleaseProgram(programs[k]);
}
return error;
diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp
index 095a22ff..04ac5aa6 100644
--- a/test_conformance/math_brute_force/mad_float.cpp
+++ b/test_conformance/math_brute_force/mad_float.cpp
@@ -14,127 +14,52 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
- cl_program *p, bool relaxedMode)
-{
- const char *c[] = { "__kernel void math_kernel",
- sizeNames[vectorSize],
- "( __global float",
- sizeNames[vectorSize],
- "* out, __global float",
- sizeNames[vectorSize],
- "* in1, __global float",
- sizeNames[vectorSize],
- "* in2, __global float",
- sizeNames[vectorSize],
- "* in3 )\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " out[i] = ",
- name,
- "( in1[i], in2[i], in3[i] );\n"
- "}\n" };
-
- const char *c3[] = {
- "__kernel void math_kernel",
- sizeNames[vectorSize],
- "( __global float* out, __global float* in, __global float* in2, "
- "__global float* in3)\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " if( i + 1 < get_global_size(0) )\n"
- " {\n"
- " float3 f0 = vload3( 0, in + 3 * i );\n"
- " float3 f1 = vload3( 0, in2 + 3 * i );\n"
- " float3 f2 = vload3( 0, in3 + 3 * i );\n"
- " f0 = ",
- name,
- "( f0, f1, f2 );\n"
- " vstore3( f0, 0, out + 3*i );\n"
- " }\n"
- " else\n"
- " {\n"
- " size_t parity = i & 1; // Figure out how many elements are "
- "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
- "buffer size \n"
- " float3 f0;\n"
- " float3 f1;\n"
- " float3 f2;\n"
- " switch( parity )\n"
- " {\n"
- " case 1:\n"
- " f0 = (float3)( in[3*i], NAN, NAN ); \n"
- " f1 = (float3)( in2[3*i], NAN, NAN ); \n"
- " f2 = (float3)( in3[3*i], NAN, NAN ); \n"
- " break;\n"
- " case 0:\n"
- " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
- " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
- " f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
- " break;\n"
- " }\n"
- " f0 = ",
- name,
- "( f0, f1, f2 );\n"
- " switch( parity )\n"
- " {\n"
- " case 0:\n"
- " out[3*i+1] = f0.y; \n"
- " // fall through\n"
- " case 1:\n"
- " out[3*i] = f0.x; \n"
- " break;\n"
- " }\n"
- " }\n"
- "}\n"
- };
-
- const char **kern = c;
- size_t kernSize = sizeof(c) / sizeof(c[0]);
+namespace {
- if (sizeValues[vectorSize] == 3)
- {
- kern = c3;
- kernSize = sizeof(c3) / sizeof(c3[0]);
- }
-
- char testName[32];
- snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
- sizeNames[vectorSize]);
-
- return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+ bool relaxedMode)
+{
+ auto kernel_name = GetKernelName(vectorSize);
+ auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float,
+ ParameterType::Float, ParameterType::Float,
+ ParameterType::Float, vectorSize);
+ std::array<const char *, 1> sources{ source.c_str() };
+ return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
+ relaxedMode);
}
-typedef struct BuildKernelInfo
+struct BuildKernelInfo2
{
- cl_uint offset; // the first vector size to build
cl_kernel *kernels;
- cl_program *programs;
+ Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
- BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernels + i,
- info->programs + i, info->relaxedMode);
+ BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+ &(info->programs[vectorSize]), info->relaxedMode);
}
+} // anonymous namespace
+
int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
{
int error;
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
- cl_program programs[VECTOR_SIZE_COUNT];
+ Programs programs;
cl_kernel kernels[VECTOR_SIZE_COUNT];
float maxError = 0.0f;
float maxErrorVal = 0.0f;
@@ -144,8 +69,8 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
// Init the kernels
{
- BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
- f->nameInCode, relaxedMode };
+ BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+ relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -293,7 +218,6 @@ exit:
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
{
clReleaseKernel(kernels[k]);
- clReleaseProgram(programs[k]);
}
return error;
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index d6c2f11f..64491bd4 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -18,6 +18,7 @@
#include "sleep.h"
#include "utility.h"
+#include <algorithm>
#include <cstdio>
#include <cstdlib>
#include <ctime>
@@ -57,8 +58,8 @@ static char appName[MAXPATHLEN] = "";
cl_device_id gDevice = NULL;
cl_context gContext = NULL;
cl_command_queue gQueue = NULL;
-static int32_t gStartTestNumber = -1;
-static int32_t gEndTestNumber = -1;
+static size_t gStartTestNumber = ~0u;
+static size_t gEndTestNumber = ~0u;
int gSkipCorrectnessTesting = 0;
static int gStopOnError = 0;
static bool gSkipRestOfTests;
@@ -97,7 +98,7 @@ cl_mem gInBuffer2 = NULL;
cl_mem gInBuffer3 = NULL;
cl_mem gOutBuffer[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
cl_mem gOutBuffer2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
-static MTdata gMTdata;
+static MTdataHolder gMTdata;
cl_device_fp_config gFloatCapabilities = 0;
int gWimpyReductionFactor = 32;
int gVerboseBruteForce = 0;
@@ -128,10 +129,10 @@ static int doTest(const char *name)
const Func *const temp_func = functionList + i;
if (strcmp(temp_func->name, name) == 0)
{
- if ((gStartTestNumber != -1 && i < gStartTestNumber)
+ if ((gStartTestNumber != ~0u && i < gStartTestNumber)
|| i > gEndTestNumber)
{
- vlog("Skipping function #%d\n", i);
+ vlog("Skipping function #%zu\n", i);
return 0;
}
@@ -167,7 +168,6 @@ static int doTest(const char *name)
}
{
- extern int my_ilogb(double);
if (0 == strcmp("ilogb", func_data->name))
{
InitILogbConstants();
@@ -326,7 +326,7 @@ int main(int argc, const char *argv[])
vlog("\n-------------------------------------------------------------------"
"----------------------------------------\n");
- gMTdata = init_genrand(gRandomSeed);
+ gMTdata = MTdataHolder(gRandomSeed);
FPU_mode_type oldMode;
DisableFTZ(&oldMode);
@@ -336,8 +336,6 @@ int main(int argc, const char *argv[])
RestoreFPState(&oldMode);
- free_mtdata(gMTdata);
-
if (gQueue)
{
int error_code = clFinish(gQueue);
@@ -360,16 +358,18 @@ static int ParseArgs(int argc, const char **argv)
int singleThreaded = 0;
{ // Extract the app name
- strncpy(appName, argv[0], MAXPATHLEN);
+ strncpy(appName, argv[0], MAXPATHLEN - 1);
+ appName[MAXPATHLEN - 1] = '\0';
#if defined(__APPLE__)
char baseName[MAXPATHLEN];
char *base = NULL;
- strncpy(baseName, argv[0], MAXPATHLEN);
+ strncpy(baseName, argv[0], MAXPATHLEN - 1);
+ baseName[MAXPATHLEN - 1] = '\0';
base = basename(baseName);
if (NULL != base)
{
- strncpy(appName, base, sizeof(appName));
+ strncpy(appName, base, sizeof(appName) - 1);
appName[sizeof(appName) - 1] = '\0';
}
#endif
@@ -467,7 +467,7 @@ static int ParseArgs(int argc, const char **argv)
long number = strtol(arg, &t, 0);
if (t != arg)
{
- if (-1 == gStartTestNumber)
+ if (~0u == gStartTestNumber)
gStartTestNumber = (int32_t)number;
else
gEndTestNumber = gStartTestNumber + (int32_t)number;
@@ -502,8 +502,6 @@ static int ParseArgs(int argc, const char **argv)
gWimpyMode = 1;
}
- vlog("\nTest binary built %s %s\n", __DATE__, __TIME__);
-
PrintArch();
if (gWimpyMode)
@@ -524,7 +522,7 @@ static int ParseArgs(int argc, const char **argv)
static void PrintFunctions(void)
{
vlog("\nMath function names:\n");
- for (int i = 0; i < functionListCount; i++)
+ for (size_t i = 0; i < functionListCount; i++)
{
vlog("\t%s\n", functionList[i].name);
}
@@ -1056,8 +1054,6 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
cl_uint kernel_count, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
- int error = 0;
- cl_uint i;
char options[200] = "";
if (gForceFTZ)
@@ -1075,7 +1071,7 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
strcat(options, " -cl-fast-relaxed-math");
}
- error =
+ int error =
create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
if (error != CL_SUCCESS)
{
@@ -1083,9 +1079,7 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
return error;
}
-
- memset(k, 0, kernel_count * sizeof(*k));
- for (i = 0; i < kernel_count; i++)
+ for (cl_uint i = 0; i < kernel_count; i++)
{
k[i] = clCreateKernel(*p, name, &error);
if (NULL == k[i] || error)
@@ -1096,7 +1090,6 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG,
sizeof(buffer), buffer, NULL);
vlog_error("Log: %s\n", buffer);
- clReleaseProgram(*p);
return error;
}
}
@@ -1244,7 +1237,7 @@ float Bruteforce_Ulp_Error_Double(double test, long double reference)
// The unbiased exponent of the ulp unit place
int ulp_exp =
- DBL_MANT_DIG - 1 - MAX(ilogbl(reference), DBL_MIN_EXP - 1);
+ DBL_MANT_DIG - 1 - std::max(ilogbl(reference), DBL_MIN_EXP - 1);
// Scale the exponent of the error
float result = (float)scalbnl(testVal - reference, ulp_exp);
@@ -1260,7 +1253,7 @@ float Bruteforce_Ulp_Error_Double(double test, long double reference)
// reference is a normal power of two or a zero
// The unbiased exponent of the ulp unit place
int ulp_exp =
- DBL_MANT_DIG - 1 - MAX(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
+ DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
// allow correctly rounded results to pass through unmolested. (We might add
// error to it below.) There is something of a performance optimization here
diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index 3a6516ba..afa072f8 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
@@ -41,10 +41,10 @@
#pragma STDC FP_CONTRACT OFF
static void __log2_ep(double *hi, double *lo, double x);
-typedef union {
+union uint64d_t {
uint64_t i;
double d;
-} uint64d_t;
+};
static const uint64d_t _CL_NAN = { 0x7ff8000000000000ULL };
@@ -1949,7 +1949,8 @@ double reference_lgamma(double x)
w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
static const double zero = 0.00000000000000000000e+00;
- double t, y, z, nadj, p, p1, p2, p3, q, r, w;
+ double nadj = zero;
+ double t, y, z, p, p1, p2, p3, q, r, w;
cl_int i, hx, lx, ix;
union {
@@ -2259,10 +2260,10 @@ long double reference_dividel(long double x, long double y)
return dx / dy;
}
-typedef struct
+struct double_double
{
double hi, lo;
-} double_double;
+};
// Split doubles_double into a series of consecutive 26-bit precise doubles and
// a remainder. Note for later -- for multiplication, it might be better to
@@ -2321,7 +2322,7 @@ static inline double_double accum_d(double_double a, double b)
static inline double_double add_dd(double_double a, double_double b)
{
- double_double r = { -0.0 - 0.0 };
+ double_double r = { -0.0, -0.0 };
if (isinf(a.hi) || isinf(b.hi) || isnan(a.hi) || isnan(b.hi) || 0.0 == a.hi
|| 0.0 == b.hi)
@@ -3767,10 +3768,10 @@ static uint32_t two_over_pi[] = {
static uint32_t pi_over_two[] = { 0x1, 0x2487ed51, 0x42d1846,
0x26263314, 0x1701b839, 0x28948127 };
-typedef union {
+union d_ui64_t {
uint64_t u;
double d;
-} d_ui64_t;
+};
// radix or base of representation
#define RADIX (30)
@@ -3786,13 +3787,13 @@ d_ui64_t two_pow_two_mradix = { (uint64_t)(1023 - 2 * RADIX) << 52 };
// extended fixed point representation of double precision
// floating point number.
// x = sign * [ sum_{i = 0 to 2} ( X[i] * 2^(index - i)*RADIX ) ]
-typedef struct
+struct eprep_t
{
uint32_t X[3]; // three 32 bit integers are sufficient to represnt double in
// base_30
int index; // exponent bias
int sign; // sign of double
-} eprep_t;
+};
static eprep_t double_to_eprep(double x)
{
@@ -4549,8 +4550,8 @@ long double reference_powl(long double x, long double y)
if (x != x || y != y) return x + y;
// do the work required to sort out edge cases
- double fabsy = reference_fabs(y);
- double fabsx = reference_fabs(x);
+ double fabsy = (double)reference_fabsl(y);
+ double fabsx = (double)reference_fabsl(x);
double iy = reference_rint(
fabsy); // we do round to nearest here so that |fy| <= 0.5
if (iy > fabsy) // convert nearbyint to floor
@@ -4637,13 +4638,13 @@ long double reference_powl(long double x, long double y)
// compute product of y*log2(x)
// scale to avoid overflow in double-double multiplication
- if (reference_fabs(y) > HEX_DBL(+, 1, 0, +, 970))
+ if (fabsy > HEX_DBL(+, 1, 0, +, 970))
{
y_hi = reference_ldexp(y_hi, -53);
y_lo = reference_ldexp(y_lo, -53);
}
MulDD(&ylog2x_hi, &ylog2x_lo, log2x_hi, log2x_lo, y_hi, y_lo);
- if (fabs(y) > HEX_DBL(+, 1, 0, +, 970))
+ if (fabsy > HEX_DBL(+, 1, 0, +, 970))
{
ylog2x_hi = reference_ldexp(ylog2x_hi, 53);
ylog2x_lo = reference_ldexp(ylog2x_lo, 53);
@@ -5357,10 +5358,10 @@ long double reference_acosl(long double x)
0x3243F6A8885A308DULL, 0x313198A2E0370734ULL
}; // first 126 bits of pi
// http://www.super-computing.org/pi-hexa_current.html
- long double head, tail, temp;
+ long double head, tail;
#if __LDBL_MANT_DIG__ >= 64
// long double has 64-bits of precision or greater
- temp = (long double)pi_bits[0] * 0x1.0p64L;
+ long double temp = (long double)pi_bits[0] * 0x1.0p64L;
head = temp + (long double)pi_bits[1];
temp -= head; // rounding err rounding pi_bits[1] into head
tail = (long double)pi_bits[1] + temp;
diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp
index 606fdc5a..b5f1ab09 100644
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ b/test_conformance/math_brute_force/ternary_double.cpp
@@ -14,127 +14,49 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
#define CORRECTLY_ROUNDED 0
#define FLUSHED 1
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
- cl_program *p, bool relaxedMode)
-{
- const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
- "__kernel void math_kernel",
- sizeNames[vectorSize],
- "( __global double",
- sizeNames[vectorSize],
- "* out, __global double",
- sizeNames[vectorSize],
- "* in1, __global double",
- sizeNames[vectorSize],
- "* in2, __global double",
- sizeNames[vectorSize],
- "* in3 )\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " out[i] = ",
- name,
- "( in1[i], in2[i], in3[i] );\n"
- "}\n" };
-
- const char *c3[] = {
- "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
- "__kernel void math_kernel",
- sizeNames[vectorSize],
- "( __global double* out, __global double* in, __global double* in2, "
- "__global double* in3)\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " if( i + 1 < get_global_size(0) )\n"
- " {\n"
- " double3 d0 = vload3( 0, in + 3 * i );\n"
- " double3 d1 = vload3( 0, in2 + 3 * i );\n"
- " double3 d2 = vload3( 0, in3 + 3 * i );\n"
- " d0 = ",
- name,
- "( d0, d1, d2 );\n"
- " vstore3( d0, 0, out + 3*i );\n"
- " }\n"
- " else\n"
- " {\n"
- " size_t parity = i & 1; // Figure out how many elements are "
- "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
- "buffer size \n"
- " double3 d0;\n"
- " double3 d1;\n"
- " double3 d2;\n"
- " switch( parity )\n"
- " {\n"
- " case 1:\n"
- " d0 = (double3)( in[3*i], NAN, NAN ); \n"
- " d1 = (double3)( in2[3*i], NAN, NAN ); \n"
- " d2 = (double3)( in3[3*i], NAN, NAN ); \n"
- " break;\n"
- " case 0:\n"
- " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
- " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
- " d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
- " break;\n"
- " }\n"
- " d0 = ",
- name,
- "( d0, d1, d2 );\n"
- " switch( parity )\n"
- " {\n"
- " case 0:\n"
- " out[3*i+1] = d0.y; \n"
- " // fall through\n"
- " case 1:\n"
- " out[3*i] = d0.x; \n"
- " break;\n"
- " }\n"
- " }\n"
- "}\n"
- };
-
- const char **kern = c;
- size_t kernSize = sizeof(c) / sizeof(c[0]);
-
- if (sizeValues[vectorSize] == 3)
- {
- kern = c3;
- kernSize = sizeof(c3) / sizeof(c3[0]);
- }
-
- char testName[32];
- snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
- sizeNames[vectorSize]);
+namespace {
- return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+ bool relaxedMode)
+{
+ auto kernel_name = GetKernelName(vectorSize);
+ auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double,
+ ParameterType::Double, ParameterType::Double,
+ ParameterType::Double, vectorSize);
+ std::array<const char *, 1> sources{ source.c_str() };
+ return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
+ relaxedMode);
}
-typedef struct BuildKernelInfo
+struct BuildKernelInfo2
{
- cl_uint offset; // the first vector size to build
cl_kernel *kernels;
- cl_program *programs;
+ Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
- BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernels + i,
- info->programs + i, info->relaxedMode);
+ BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// A table of more difficult cases to get right
-static const double specialValues[] = {
+const double specialValues[] = {
-NAN,
-INFINITY,
-DBL_MAX,
@@ -202,14 +124,16 @@ static const double specialValues[] = {
+0.0,
};
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
sizeof(specialValues) / sizeof(specialValues[0]);
+} // anonymous namespace
+
int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
bool relaxedMode)
{
int error;
- cl_program programs[VECTOR_SIZE_COUNT];
+ Programs programs;
cl_kernel kernels[VECTOR_SIZE_COUNT];
float maxError = 0.0f;
int ftz = f->ftz || gForceFTZ;
@@ -224,8 +148,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
// Init the kernels
{
- BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
- f->nameInCode, relaxedMode };
+ BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+ relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -387,7 +311,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
float err = Bruteforce_Ulp_Error_Double(test, correct);
int fail = !(fabsf(err) <= f->double_ulps);
- if (fail && ftz)
+ if (fail && (ftz || relaxedMode))
{
// retry per section 6.5.3.2
if (IsDoubleSubnormal(correct))
@@ -704,8 +628,9 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
@@ -733,7 +658,6 @@ exit:
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
{
clReleaseKernel(kernels[k]);
- clReleaseProgram(programs[k]);
}
return error;
diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp
index e52c0a0f..cf361841 100644
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ b/test_conformance/math_brute_force/ternary_float.cpp
@@ -14,125 +14,49 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
#define CORRECTLY_ROUNDED 0
#define FLUSHED 1
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
- cl_program *p, bool relaxedMode)
-{
- const char *c[] = { "__kernel void math_kernel",
- sizeNames[vectorSize],
- "( __global float",
- sizeNames[vectorSize],
- "* out, __global float",
- sizeNames[vectorSize],
- "* in1, __global float",
- sizeNames[vectorSize],
- "* in2, __global float",
- sizeNames[vectorSize],
- "* in3 )\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " out[i] = ",
- name,
- "( in1[i], in2[i], in3[i] );\n"
- "}\n" };
-
- const char *c3[] = {
- "__kernel void math_kernel",
- sizeNames[vectorSize],
- "( __global float* out, __global float* in, __global float* in2, "
- "__global float* in3)\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " if( i + 1 < get_global_size(0) )\n"
- " {\n"
- " float3 f0 = vload3( 0, in + 3 * i );\n"
- " float3 f1 = vload3( 0, in2 + 3 * i );\n"
- " float3 f2 = vload3( 0, in3 + 3 * i );\n"
- " f0 = ",
- name,
- "( f0, f1, f2 );\n"
- " vstore3( f0, 0, out + 3*i );\n"
- " }\n"
- " else\n"
- " {\n"
- " size_t parity = i & 1; // Figure out how many elements are "
- "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
- "buffer size \n"
- " float3 f0;\n"
- " float3 f1;\n"
- " float3 f2;\n"
- " switch( parity )\n"
- " {\n"
- " case 1:\n"
- " f0 = (float3)( in[3*i], NAN, NAN ); \n"
- " f1 = (float3)( in2[3*i], NAN, NAN ); \n"
- " f2 = (float3)( in3[3*i], NAN, NAN ); \n"
- " break;\n"
- " case 0:\n"
- " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
- " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
- " f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
- " break;\n"
- " }\n"
- " f0 = ",
- name,
- "( f0, f1, f2 );\n"
- " switch( parity )\n"
- " {\n"
- " case 0:\n"
- " out[3*i+1] = f0.y; \n"
- " // fall through\n"
- " case 1:\n"
- " out[3*i] = f0.x; \n"
- " break;\n"
- " }\n"
- " }\n"
- "}\n"
- };
-
- const char **kern = c;
- size_t kernSize = sizeof(c) / sizeof(c[0]);
-
- if (sizeValues[vectorSize] == 3)
- {
- kern = c3;
- kernSize = sizeof(c3) / sizeof(c3[0]);
- }
-
- char testName[32];
- snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
- sizeNames[vectorSize]);
+namespace {
- return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+ bool relaxedMode)
+{
+ auto kernel_name = GetKernelName(vectorSize);
+ auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float,
+ ParameterType::Float, ParameterType::Float,
+ ParameterType::Float, vectorSize);
+ std::array<const char *, 1> sources{ source.c_str() };
+ return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
+ relaxedMode);
}
-typedef struct BuildKernelInfo
+struct BuildKernelInfo2
{
- cl_uint offset; // the first vector size to build
cl_kernel *kernels;
- cl_program *programs;
+ Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
- BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernels + i,
- info->programs + i, info->relaxedMode);
+ BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// A table of more difficult cases to get right
-static const float specialValues[] = {
+const float specialValues[] = {
-NAN,
-INFINITY,
-FLT_MAX,
@@ -210,16 +134,18 @@ static const float specialValues[] = {
+0.0f,
};
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
sizeof(specialValues) / sizeof(specialValues[0]);
+} // anonymous namespace
+
int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
{
int error;
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
- cl_program programs[VECTOR_SIZE_COUNT];
+ Programs programs;
cl_kernel kernels[VECTOR_SIZE_COUNT];
float maxError = 0.0f;
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -240,8 +166,8 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
// Init the kernels
{
- BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
- f->nameInCode, relaxedMode };
+ BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+ relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -439,7 +365,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
err = Ulp_Error(test, correct);
fail = !(fabsf(err) <= float_ulps);
- if (fail && ftz)
+ if (fail && (ftz || relaxedMode))
{
float correct2, err2;
@@ -839,8 +765,8 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10u bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64 " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
@@ -868,7 +794,6 @@ exit:
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
{
clReleaseKernel(kernels[k]);
- clReleaseProgram(programs[k]);
}
return error;
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index f6fa3264..177cfe5b 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -14,14 +14,18 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
- cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+ cl_kernel *k, cl_program *p, bool relaxedMode)
{
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void math_kernel",
@@ -101,44 +105,44 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
relaxedMode);
}
-typedef struct BuildKernelInfo
-{
- cl_uint offset; // the first vector size to build
- cl_uint kernel_count;
- cl_kernel **kernels;
- cl_program *programs;
- const char *nameInCode;
- bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernel_count,
- info->kernels[i], info->programs + i, info->relaxedMode);
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+ info->kernels[vectorSize].data(),
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
{
- cl_mem inBuf; // input buffer for the thread
- cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+ // Input and output buffers for the thread
+ clMemWrapper inBuf;
+ Buffers outBuf;
+
float maxError; // max error value. Init to 0.
double maxErrorValue; // position of the max error value. Init to 0.
- cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-typedef struct TestInfo
+ // Per thread command queue to improve performance
+ clCommandQueueWrapper tQueue;
+};
+
+struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
- cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
- cl_kernel
- *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
- // worker thread: k[vector_size][thread_id]
- ThreadInfo *
- tinfo; // An array of thread specific information for each worker thread
+
+ // Programs for various vector sizes.
+ Programs programs;
+
+ // Thread-specific kernels for each vector size:
+ // k[vector_size][thread_id]
+ KernelMatrix k;
+
+ // Array of thread specific information
+ std::vector<ThreadInfo> tinfo;
+
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
@@ -151,185 +155,21 @@ typedef struct TestInfo
float half_sin_cos_tan_limit;
bool relaxedMode; // True if test is running in relaxed mode, false
// otherwise.
-} TestInfo;
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
+};
-int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
- TestInfo test_info;
- cl_int error;
- float maxError = 0.0f;
- double maxErrorVal = 0.0;
-
- logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
- // Init test_info
- memset(&test_info, 0, sizeof(test_info));
- test_info.threadCount = GetThreadCount();
- test_info.subBufferSize = BUFFER_SIZE
- / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
- test_info.scale = getTestScale(sizeof(cl_double));
-
- test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
- if (test_info.step / test_info.subBufferSize != test_info.scale)
- {
- // there was overflow
- test_info.jobCount = 1;
- }
- else
- {
- test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
- }
-
- test_info.f = f;
- test_info.ulps = f->double_ulps;
- test_info.ftz = f->ftz || gForceFTZ;
- test_info.relaxedMode = relaxedMode;
-
- // cl_kernels aren't thread safe, so we make one for each vector size for
- // every thread
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- size_t array_size = test_info.threadCount * sizeof(cl_kernel);
- test_info.k[i] = (cl_kernel *)malloc(array_size);
- if (NULL == test_info.k[i])
- {
- vlog_error("Error: Unable to allocate storage for kernels!\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.k[i], 0, array_size);
- }
- test_info.tinfo =
- (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
- if (NULL == test_info.tinfo)
- {
- vlog_error(
- "Error: Unable to allocate storage for thread specific data.\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.tinfo, 0,
- test_info.threadCount * sizeof(*test_info.tinfo));
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- cl_buffer_region region = {
- i * test_info.subBufferSize * sizeof(cl_double),
- test_info.subBufferSize * sizeof(cl_double)
- };
- test_info.tinfo[i].inBuf =
- clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
-
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- {
- test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
- gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
- &region, &error);
- if (error || NULL == test_info.tinfo[i].outBuf[j])
- {
- vlog_error("Error: Unable to create sub-buffer of "
- "gOutBuffer[%d] for region {%zd, %zd}\n",
- (int)j, region.origin, region.size);
- goto exit;
- }
- }
- test_info.tinfo[i].tQueue =
- clCreateCommandQueue(gContext, gDevice, 0, &error);
- if (NULL == test_info.tinfo[i].tQueue || error)
- {
- vlog_error("clCreateCommandQueue failed. (%d)\n", error);
- goto exit;
- }
- }
-
- // Init the kernels
- {
- BuildKernelInfo build_info = {
- gMinVectorSizeIndex, test_info.threadCount, test_info.k,
- test_info.programs, f->nameInCode, relaxedMode
- };
- if ((error = ThreadPool_Do(BuildKernelFn,
- gMaxVectorSizeIndex - gMinVectorSizeIndex,
- &build_info)))
- goto exit;
- }
-
- // Run the kernels
- if (!gSkipCorrectnessTesting)
- {
- error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
- // Accumulate the arithmetic errors
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- if (test_info.tinfo[i].maxError > maxError)
- {
- maxError = test_info.tinfo[i].maxError;
- maxErrorVal = test_info.tinfo[i].maxErrorValue;
- }
- }
-
- if (error) goto exit;
-
- if (gWimpyMode)
- vlog("Wimp pass");
- else
- vlog("passed");
-
- vlog("\t%8.2f @ %a", maxError, maxErrorVal);
- }
-
- vlog("\n");
-
-exit:
- // Release
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- clReleaseProgram(test_info.programs[i]);
- if (test_info.k[i])
- {
- for (cl_uint j = 0; j < test_info.threadCount; j++)
- clReleaseKernel(test_info.k[i][j]);
-
- free(test_info.k[i]);
- }
- }
- if (test_info.tinfo)
- {
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- clReleaseMemObject(test_info.tinfo[i].inBuf);
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
- clReleaseCommandQueue(test_info.tinfo[i].tQueue);
- }
-
- free(test_info.tinfo);
- }
-
- return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
- const TestInfo *job = (const TestInfo *)data;
+ TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_double);
cl_uint scale = job->scale;
cl_uint base = job_id * (cl_uint)job->step;
- ThreadInfo *tinfo = job->tinfo + thread_id;
+ ThreadInfo *tinfo = &(job->tinfo[thread_id]);
float ulps = job->ulps;
dptr func = job->f->dfunc;
cl_int error;
int ftz = job->ftz;
+ bool relaxedMode = job->relaxedMode;
Force64BitFPUPrecision();
@@ -385,7 +225,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
out[j], 0, NULL, NULL)))
{
- vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+ vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+ error);
return error;
}
@@ -463,7 +304,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (fail)
{
- if (ftz)
+ if (ftz || relaxedMode)
{
// retry per section 6.5.3.2
if (IsDoubleResultSubnormal(correct, ulps))
@@ -505,7 +346,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (fail)
{
vlog_error("\nERROR: %s%s: %f ulp error at %.13la "
- "(0x%16.16llx): *%.13la vs. %.13la\n",
+ "(0x%16.16" PRIx64 "): *%.13la vs. %.13la\n",
job->f->name, sizeNames[k], err,
((cl_double *)gIn)[j], ((cl_ulong *)gIn)[j],
((cl_double *)gOut_Ref)[j], test);
@@ -547,3 +388,133 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
return CL_SUCCESS;
}
+
+} // anonymous namespace
+
+int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
+{
+ TestInfo test_info{};
+ cl_int error;
+ float maxError = 0.0f;
+ double maxErrorVal = 0.0;
+
+ logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
+ // Init test_info
+ test_info.threadCount = GetThreadCount();
+ test_info.subBufferSize = BUFFER_SIZE
+ / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+ test_info.scale = getTestScale(sizeof(cl_double));
+
+ test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+ if (test_info.step / test_info.subBufferSize != test_info.scale)
+ {
+ // there was overflow
+ test_info.jobCount = 1;
+ }
+ else
+ {
+ test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+ }
+
+ test_info.f = f;
+ test_info.ulps = f->double_ulps;
+ test_info.ftz = f->ftz || gForceFTZ;
+ test_info.relaxedMode = relaxedMode;
+
+ // cl_kernels aren't thread safe, so we make one for each vector size for
+ // every thread
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ test_info.k[i].resize(test_info.threadCount, nullptr);
+ }
+
+ test_info.tinfo.resize(test_info.threadCount);
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ cl_buffer_region region = {
+ i * test_info.subBufferSize * sizeof(cl_double),
+ test_info.subBufferSize * sizeof(cl_double)
+ };
+ test_info.tinfo[i].inBuf =
+ clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+
+ for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+ {
+ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+ gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+ &region, &error);
+ if (error || NULL == test_info.tinfo[i].outBuf[j])
+ {
+ vlog_error("Error: Unable to create sub-buffer of "
+ "gOutBuffer[%d] for region {%zd, %zd}\n",
+ (int)j, region.origin, region.size);
+ goto exit;
+ }
+ }
+ test_info.tinfo[i].tQueue =
+ clCreateCommandQueue(gContext, gDevice, 0, &error);
+ if (NULL == test_info.tinfo[i].tQueue || error)
+ {
+ vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+ goto exit;
+ }
+ }
+
+ // Init the kernels
+ {
+ BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+ test_info.programs, f->nameInCode,
+ relaxedMode };
+ if ((error = ThreadPool_Do(BuildKernelFn,
+ gMaxVectorSizeIndex - gMinVectorSizeIndex,
+ &build_info)))
+ goto exit;
+ }
+
+ // Run the kernels
+ if (!gSkipCorrectnessTesting)
+ {
+ error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+ // Accumulate the arithmetic errors
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ if (test_info.tinfo[i].maxError > maxError)
+ {
+ maxError = test_info.tinfo[i].maxError;
+ maxErrorVal = test_info.tinfo[i].maxErrorValue;
+ }
+ }
+
+ if (error) goto exit;
+
+ if (gWimpyMode)
+ vlog("Wimp pass");
+ else
+ vlog("passed");
+
+ vlog("\t%8.2f @ %a", maxError, maxErrorVal);
+ }
+
+ vlog("\n");
+
+exit:
+ // Release
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ for (auto &kernel : test_info.k[i])
+ {
+ clReleaseKernel(kernel);
+ }
+ }
+
+ return error;
+}
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index 17edc58d..4c1f1a1d 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -14,14 +14,17 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
- cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+ cl_kernel *k, cl_program *p, bool relaxedMode)
{
const char *c[] = { "__kernel void math_kernel",
sizeNames[vectorSize],
@@ -99,44 +102,44 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
relaxedMode);
}
-typedef struct BuildKernelInfo
-{
- cl_uint offset; // the first vector size to build
- cl_uint kernel_count;
- cl_kernel **kernels;
- cl_program *programs;
- const char *nameInCode;
- bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernel_count,
- info->kernels[i], info->programs + i, info->relaxedMode);
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+ info->kernels[vectorSize].data(),
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
{
- cl_mem inBuf; // input buffer for the thread
- cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+ // Input and output buffers for the thread
+ clMemWrapper inBuf;
+ Buffers outBuf;
+
float maxError; // max error value. Init to 0.
double maxErrorValue; // position of the max error value. Init to 0.
- cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-typedef struct TestInfo
+ // Per thread command queue to improve performance
+ clCommandQueueWrapper tQueue;
+};
+
+struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
- cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
- cl_kernel
- *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
- // worker thread: k[vector_size][thread_id]
- ThreadInfo *
- tinfo; // An array of thread specific information for each worker thread
+
+ // Programs for various vector sizes.
+ Programs programs;
+
+ // Thread-specific kernels for each vector size:
+ // k[vector_size][thread_id]
+ KernelMatrix k;
+
+ // Array of thread specific information
+ std::vector<ThreadInfo> tinfo;
+
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
@@ -149,207 +152,16 @@ typedef struct TestInfo
float half_sin_cos_tan_limit;
bool relaxedMode; // True if test is running in relaxed mode, false
// otherwise.
-} TestInfo;
+};
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
- TestInfo test_info;
- cl_int error;
- float maxError = 0.0f;
- double maxErrorVal = 0.0;
- int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
-
- logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
- // Init test_info
- memset(&test_info, 0, sizeof(test_info));
- test_info.threadCount = GetThreadCount();
- test_info.subBufferSize = BUFFER_SIZE
- / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
- test_info.scale = getTestScale(sizeof(cl_float));
-
- test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
- if (test_info.step / test_info.subBufferSize != test_info.scale)
- {
- // there was overflow
- test_info.jobCount = 1;
- }
- else
- {
- test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
- }
-
- test_info.f = f;
- test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
- test_info.ftz =
- f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
- test_info.relaxedMode = relaxedMode;
- // cl_kernels aren't thread safe, so we make one for each vector size for
- // every thread
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- size_t array_size = test_info.threadCount * sizeof(cl_kernel);
- test_info.k[i] = (cl_kernel *)malloc(array_size);
- if (NULL == test_info.k[i])
- {
- vlog_error("Error: Unable to allocate storage for kernels!\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.k[i], 0, array_size);
- }
- test_info.tinfo =
- (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
- if (NULL == test_info.tinfo)
- {
- vlog_error(
- "Error: Unable to allocate storage for thread specific data.\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.tinfo, 0,
- test_info.threadCount * sizeof(*test_info.tinfo));
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- cl_buffer_region region = {
- i * test_info.subBufferSize * sizeof(cl_float),
- test_info.subBufferSize * sizeof(cl_float)
- };
- test_info.tinfo[i].inBuf =
- clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
-
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- {
- test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
- gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
- &region, &error);
- if (error || NULL == test_info.tinfo[i].outBuf[j])
- {
- vlog_error("Error: Unable to create sub-buffer of "
- "gOutBuffer[%d] for region {%zd, %zd}\n",
- (int)j, region.origin, region.size);
- goto exit;
- }
- }
- test_info.tinfo[i].tQueue =
- clCreateCommandQueue(gContext, gDevice, 0, &error);
- if (NULL == test_info.tinfo[i].tQueue || error)
- {
- vlog_error("clCreateCommandQueue failed. (%d)\n", error);
- goto exit;
- }
- }
-
- // Check for special cases for unary float
- test_info.isRangeLimited = 0;
- test_info.half_sin_cos_tan_limit = 0;
- if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
- {
- test_info.isRangeLimited = 1;
- test_info.half_sin_cos_tan_limit = 1.0f
- + test_info.ulps
- * (FLT_EPSILON / 2.0f); // out of range results from finite
- // inputs must be in [-1,1]
- }
- else if (0 == strcmp(f->name, "half_tan"))
- {
- test_info.isRangeLimited = 1;
- test_info.half_sin_cos_tan_limit =
- INFINITY; // out of range resut from finite inputs must be numeric
- }
-
- // Init the kernels
- {
- BuildKernelInfo build_info = {
- gMinVectorSizeIndex, test_info.threadCount, test_info.k,
- test_info.programs, f->nameInCode, relaxedMode
- };
- if ((error = ThreadPool_Do(BuildKernelFn,
- gMaxVectorSizeIndex - gMinVectorSizeIndex,
- &build_info)))
- goto exit;
- }
-
- // Run the kernels
- if (!gSkipCorrectnessTesting || skipTestingRelaxed)
- {
- error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
- // Accumulate the arithmetic errors
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- if (test_info.tinfo[i].maxError > maxError)
- {
- maxError = test_info.tinfo[i].maxError;
- maxErrorVal = test_info.tinfo[i].maxErrorValue;
- }
- }
-
- if (error) goto exit;
-
- if (gWimpyMode)
- vlog("Wimp pass");
- else
- vlog("passed");
-
- if (skipTestingRelaxed)
- {
- vlog(" (rlx skip correctness testing)\n");
- goto exit;
- }
-
- vlog("\t%8.2f @ %a", maxError, maxErrorVal);
- }
-
- vlog("\n");
-
-exit:
- // Release
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- clReleaseProgram(test_info.programs[i]);
- if (test_info.k[i])
- {
- for (cl_uint j = 0; j < test_info.threadCount; j++)
- clReleaseKernel(test_info.k[i][j]);
-
- free(test_info.k[i]);
- }
- }
- if (test_info.tinfo)
- {
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- clReleaseMemObject(test_info.tinfo[i].inBuf);
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
- clReleaseCommandQueue(test_info.tinfo[i].tQueue);
- }
-
- free(test_info.tinfo);
- }
-
- return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
- const TestInfo *job = (const TestInfo *)data;
+ TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_float);
cl_uint scale = job->scale;
cl_uint base = job_id * (cl_uint)job->step;
- ThreadInfo *tinfo = job->tinfo + thread_id;
+ ThreadInfo *tinfo = &(job->tinfo[thread_id]);
fptr func = job->f->func;
const char *fname = job->f->name;
bool relaxedMode = job->relaxedMode;
@@ -440,7 +252,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
out[j], 0, NULL, NULL)))
{
- vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+ vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+ error);
return error;
}
@@ -619,7 +432,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (fail)
{
- if (ftz)
+ if (ftz || relaxedMode)
{
typedef int (*CheckForSubnormal)(
double, float); // If we are in fast relaxed math,
@@ -725,3 +538,159 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
return CL_SUCCESS;
}
+
+} // anonymous namespace
+
+int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
+{
+ TestInfo test_info{};
+ cl_int error;
+ float maxError = 0.0f;
+ double maxErrorVal = 0.0;
+ int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
+
+ logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
+
+ // Init test_info
+ test_info.threadCount = GetThreadCount();
+ test_info.subBufferSize = BUFFER_SIZE
+ / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+ test_info.scale = getTestScale(sizeof(cl_float));
+
+ test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+ if (test_info.step / test_info.subBufferSize != test_info.scale)
+ {
+ // there was overflow
+ test_info.jobCount = 1;
+ }
+ else
+ {
+ test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+ }
+
+ test_info.f = f;
+ test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+ test_info.ftz =
+ f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+ test_info.relaxedMode = relaxedMode;
+ // cl_kernels aren't thread safe, so we make one for each vector size for
+ // every thread
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ test_info.k[i].resize(test_info.threadCount, nullptr);
+ }
+
+ test_info.tinfo.resize(test_info.threadCount);
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ cl_buffer_region region = {
+ i * test_info.subBufferSize * sizeof(cl_float),
+ test_info.subBufferSize * sizeof(cl_float)
+ };
+ test_info.tinfo[i].inBuf =
+ clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+
+ for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+ {
+ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+ gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+ &region, &error);
+ if (error || NULL == test_info.tinfo[i].outBuf[j])
+ {
+ vlog_error("Error: Unable to create sub-buffer of "
+ "gOutBuffer[%d] for region {%zd, %zd}\n",
+ (int)j, region.origin, region.size);
+ goto exit;
+ }
+ }
+ test_info.tinfo[i].tQueue =
+ clCreateCommandQueue(gContext, gDevice, 0, &error);
+ if (NULL == test_info.tinfo[i].tQueue || error)
+ {
+ vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+ goto exit;
+ }
+ }
+
+ // Check for special cases for unary float
+ test_info.isRangeLimited = 0;
+ test_info.half_sin_cos_tan_limit = 0;
+ if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
+ {
+ test_info.isRangeLimited = 1;
+ test_info.half_sin_cos_tan_limit = 1.0f
+ + test_info.ulps
+ * (FLT_EPSILON / 2.0f); // out of range results from finite
+ // inputs must be in [-1,1]
+ }
+ else if (0 == strcmp(f->name, "half_tan"))
+ {
+ test_info.isRangeLimited = 1;
+ test_info.half_sin_cos_tan_limit =
+ INFINITY; // out of range resut from finite inputs must be numeric
+ }
+
+ // Init the kernels
+ {
+ BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+ test_info.programs, f->nameInCode,
+ relaxedMode };
+ if ((error = ThreadPool_Do(BuildKernelFn,
+ gMaxVectorSizeIndex - gMinVectorSizeIndex,
+ &build_info)))
+ goto exit;
+ }
+
+ // Run the kernels
+ if (!gSkipCorrectnessTesting || skipTestingRelaxed)
+ {
+ error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+ // Accumulate the arithmetic errors
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ if (test_info.tinfo[i].maxError > maxError)
+ {
+ maxError = test_info.tinfo[i].maxError;
+ maxErrorVal = test_info.tinfo[i].maxErrorValue;
+ }
+ }
+
+ if (error) goto exit;
+
+ if (gWimpyMode)
+ vlog("Wimp pass");
+ else
+ vlog("passed");
+
+ if (skipTestingRelaxed)
+ {
+ vlog(" (rlx skip correctness testing)\n");
+ goto exit;
+ }
+
+ vlog("\t%8.2f @ %a", maxError, maxErrorVal);
+ }
+
+ vlog("\n");
+
+exit:
+ // Release
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ for (auto &kernel : test_info.k[i])
+ {
+ clReleaseKernel(kernel);
+ }
+ }
+
+ return error;
+}
diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp
index 71dd4f44..6d7c61d6 100644
--- a/test_conformance/math_brute_force/unary_two_results_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_double.cpp
@@ -14,14 +14,18 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
- cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+ bool relaxedMode)
{
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void math_kernel",
@@ -107,27 +111,28 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
}
-typedef struct BuildKernelInfo
+struct BuildKernelInfo2
{
- cl_uint offset; // the first vector size to build
cl_kernel *kernels;
- cl_program *programs;
+ Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
- BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernels + i,
- info->programs + i, info->relaxedMode);
+ BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+ &(info->programs[vectorSize]), info->relaxedMode);
}
+} // anonymous namespace
+
int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
{
int error;
- cl_program programs[VECTOR_SIZE_COUNT];
+ Programs programs;
cl_kernel kernels[VECTOR_SIZE_COUNT];
float maxError0 = 0.0f;
float maxError1 = 0.0f;
@@ -144,8 +149,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
// Init the kernels
{
- BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
- f->nameInCode, relaxedMode };
+ BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+ relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -287,7 +292,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
float err2 = Bruteforce_Ulp_Error_Double(test2, correct2);
int fail = !(fabsf(err) <= f->double_ulps
&& fabsf(err2) <= f->double_ulps);
- if (ftz)
+ if (ftz || relaxedMode)
{
// retry per section 6.5.3.2
if (IsDoubleResultSubnormal(correct, f->double_ulps))
@@ -410,8 +415,9 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
@@ -439,7 +445,6 @@ exit:
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
{
clReleaseKernel(kernels[k]);
- clReleaseProgram(programs[k]);
}
return error;
diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp
index 4a375ce3..42e858c4 100644
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_float.cpp
@@ -14,14 +14,18 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
- cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+ bool relaxedMode)
{
const char *c[] = { "__kernel void math_kernel",
sizeNames[vectorSize],
@@ -105,27 +109,28 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
}
-typedef struct BuildKernelInfo
+struct BuildKernelInfo2
{
- cl_uint offset; // the first vector size to build
cl_kernel *kernels;
- cl_program *programs;
+ Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
- BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernels + i,
- info->programs + i, info->relaxedMode);
+ BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+ &(info->programs[vectorSize]), info->relaxedMode);
}
+} // anonymous namespace
+
int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
{
int error;
- cl_program programs[VECTOR_SIZE_COUNT];
+ Programs programs;
cl_kernel kernels[VECTOR_SIZE_COUNT];
float maxError0 = 0.0f;
float maxError1 = 0.0f;
@@ -143,8 +148,8 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
float float_ulps = getAllowedUlpError(f, relaxedMode);
// Init the kernels
{
- BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
- f->nameInCode, relaxedMode };
+ BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+ relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -254,7 +259,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
{
// Calculate the correctly rounded reference result
memset(&oldMode, 0, sizeof(oldMode));
- if (ftz) ForceFTZ(&oldMode);
+ if (ftz || relaxedMode) ForceFTZ(&oldMode);
// Set the rounding mode to match the device
if (gIsInRTZMode)
@@ -381,7 +386,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
int fail = !(fabsf(err) <= float_ulps
&& fabsf(err2) <= float_ulps);
- if (ftz)
+ if (ftz || relaxedMode)
{
// retry per section 6.5.3.2
if ((*isFloatResultSubnormalPtr)(correct, float_ulps))
@@ -542,8 +547,9 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
@@ -571,7 +577,6 @@ exit:
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
{
clReleaseKernel(kernels[k]);
- clReleaseProgram(programs[k]);
}
return error;
diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
index 14d1fb99..8b751944 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
@@ -14,15 +14,19 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <climits>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
- cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+ bool relaxedMode)
{
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void math_kernel",
@@ -108,33 +112,34 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
}
-typedef struct BuildKernelInfo
+struct BuildKernelInfo2
{
- cl_uint offset; // the first vector size to build
cl_kernel *kernels;
- cl_program *programs;
+ Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
- BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernels + i,
- info->programs + i, info->relaxedMode);
+ BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+ &(info->programs[vectorSize]), info->relaxedMode);
}
-static cl_ulong abs_cl_long(cl_long i)
+cl_ulong abs_cl_long(cl_long i)
{
cl_long mask = i >> 63;
return (i ^ mask) - mask;
}
+} // anonymous namespace
+
int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
{
int error;
- cl_program programs[VECTOR_SIZE_COUNT];
+ Programs programs;
cl_kernel kernels[VECTOR_SIZE_COUNT];
float maxError = 0.0f;
int64_t maxError2 = 0;
@@ -152,8 +157,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
// Init the kernels
{
- BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
- f->nameInCode, relaxedMode };
+ BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+ relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -290,7 +295,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
cl_long iErr = (long long)q2[j] - (long long)correct2;
int fail = !(fabsf(err) <= f->double_ulps
&& abs_cl_long(iErr) <= maxiError);
- if (ftz)
+ if (ftz || relaxedMode)
{
// retry per section 6.5.3.2
if (IsDoubleResultSubnormal(correct, f->double_ulps))
@@ -382,8 +387,9 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
@@ -400,8 +406,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
else
vlog("passed");
- vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
- maxErrorVal2);
+ vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+ maxErrorVal, maxErrorVal2);
}
vlog("\n");
@@ -411,7 +417,6 @@ exit:
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
{
clReleaseKernel(kernels[k]);
- clReleaseProgram(programs[k]);
}
return error;
diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
index 23b0d707..54843a29 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
@@ -14,15 +14,19 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <climits>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
- cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+ bool relaxedMode)
{
const char *c[] = { "__kernel void math_kernel",
sizeNames[vectorSize],
@@ -106,33 +110,34 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
}
-typedef struct BuildKernelInfo
+struct BuildKernelInfo2
{
- cl_uint offset; // the first vector size to build
cl_kernel *kernels;
- cl_program *programs;
+ Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
- BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernels + i,
- info->programs + i, info->relaxedMode);
+ BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+ &(info->programs[vectorSize]), info->relaxedMode);
}
-static cl_ulong abs_cl_long(cl_long i)
+cl_ulong abs_cl_long(cl_long i)
{
cl_long mask = i >> 63;
return (i ^ mask) - mask;
}
+} // anonymous namespace
+
int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
{
int error;
- cl_program programs[VECTOR_SIZE_COUNT];
+ Programs programs;
cl_kernel kernels[VECTOR_SIZE_COUNT];
float maxError = 0.0f;
int64_t maxError2 = 0;
@@ -155,8 +160,8 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
// Init the kernels
{
- BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
- f->nameInCode, relaxedMode };
+ BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+ relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -293,7 +298,7 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
cl_long iErr = (int64_t)q2[j] - (int64_t)correct2;
int fail = !(fabsf(err) <= float_ulps
&& abs_cl_long(iErr) <= maxiError);
- if (ftz)
+ if (ftz || relaxedMode)
{
// retry per section 6.5.3.2
if (IsFloatResultSubnormal(correct, float_ulps))
@@ -380,8 +385,9 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
@@ -398,8 +404,8 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
else
vlog("passed");
- vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
- maxErrorVal2);
+ vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+ maxErrorVal, maxErrorVal2);
}
vlog("\n");
@@ -409,7 +415,6 @@ exit:
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
{
clReleaseKernel(kernels[k]);
- clReleaseProgram(programs[k]);
}
return error;
diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp
index 3c5f99da..9b60904a 100644
--- a/test_conformance/math_brute_force/unary_u_double.cpp
+++ b/test_conformance/math_brute_force/unary_u_double.cpp
@@ -14,14 +14,18 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
- cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+ bool relaxedMode)
{
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void math_kernel",
@@ -102,32 +106,33 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
}
-typedef struct BuildKernelInfo
+struct BuildKernelInfo2
{
- cl_uint offset; // the first vector size to build
cl_kernel *kernels;
- cl_program *programs;
+ Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
- BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernels + i,
- info->programs + i, info->relaxedMode);
+ BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+ &(info->programs[vectorSize]), info->relaxedMode);
}
-static cl_ulong random64(MTdata d)
+cl_ulong random64(MTdata d)
{
return (cl_ulong)genrand_int32(d) | ((cl_ulong)genrand_int32(d) << 32);
}
+} // anonymous namespace
+
int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
{
int error;
- cl_program programs[VECTOR_SIZE_COUNT];
+ Programs programs;
cl_kernel kernels[VECTOR_SIZE_COUNT];
float maxError = 0.0f;
int ftz = f->ftz || gForceFTZ;
@@ -140,8 +145,8 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
// Init the kernels
{
- BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
- f->nameInCode, relaxedMode };
+ BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+ relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -245,7 +250,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
if (fail)
{
- if (ftz)
+ if (ftz || relaxedMode)
{
// retry per section 6.5.3.2
if (IsDoubleResultSubnormal(correct,
@@ -263,11 +268,11 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
}
if (fail)
{
- vlog_error("\n%s%sD: %f ulp error at 0x%16.16llx: "
- "*%.13la vs. %.13la\n",
- f->name, sizeNames[k], err,
- ((uint64_t *)gIn)[j],
- ((double *)gOut_Ref)[j], test);
+ vlog_error(
+ "\n%s%sD: %f ulp error at 0x%16.16" PRIx64 ": "
+ "*%.13la vs. %.13la\n",
+ f->name, sizeNames[k], err, ((uint64_t *)gIn)[j],
+ ((double *)gOut_Ref)[j], test);
error = -1;
goto exit;
}
@@ -279,8 +284,9 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
@@ -307,7 +313,6 @@ exit:
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
{
clReleaseKernel(kernels[k]);
- clReleaseProgram(programs[k]);
}
return error;
diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp
index 44c5af47..b67a9bda 100644
--- a/test_conformance/math_brute_force/unary_u_float.cpp
+++ b/test_conformance/math_brute_force/unary_u_float.cpp
@@ -14,14 +14,18 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
- cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+ bool relaxedMode)
{
const char *c[] = { "__kernel void math_kernel",
sizeNames[vectorSize],
@@ -99,27 +103,28 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
}
-typedef struct BuildKernelInfo
+struct BuildKernelInfo2
{
- cl_uint offset; // the first vector size to build
cl_kernel *kernels;
- cl_program *programs;
+ Programs &programs;
const char *nameInCode;
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
- BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernels + i,
- info->programs + i, info->relaxedMode);
+ BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+ &(info->programs[vectorSize]), info->relaxedMode);
}
+} // anonymous namespace
+
int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
{
int error;
- cl_program programs[VECTOR_SIZE_COUNT];
+ Programs programs;
cl_kernel kernels[VECTOR_SIZE_COUNT];
float maxError = 0.0f;
int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -137,8 +142,8 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
// Init the kernels
{
- BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
- f->nameInCode, relaxedMode };
+ BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+ relaxedMode };
if ((error = ThreadPool_Do(BuildKernelFn,
gMaxVectorSizeIndex - gMinVectorSizeIndex,
&build_info)))
@@ -249,7 +254,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
if (fail)
{
- if (ftz)
+ if (ftz || relaxedMode)
{
// retry per section 6.5.3.2
if (IsFloatResultSubnormal(correct, float_ulps))
@@ -281,8 +286,9 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
@@ -309,7 +315,6 @@ exit:
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
{
clReleaseKernel(kernels[k]);
- clReleaseProgram(programs[k]);
}
return error;
diff --git a/test_conformance/math_brute_force/utility.h b/test_conformance/math_brute_force/utility.h
index ac4db9c8..b4a59edb 100644
--- a/test_conformance/math_brute_force/utility.h
+++ b/test_conformance/math_brute_force/utility.h
@@ -90,8 +90,7 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
bool relaxedMode);
// used to convert a bucket of bits into a search pattern through double
-static inline double DoubleFromUInt32(uint32_t bits);
-static inline double DoubleFromUInt32(uint32_t bits)
+inline double DoubleFromUInt32(uint32_t bits)
{
union {
uint64_t u;
@@ -117,25 +116,25 @@ void _LogBuildError(cl_program p, int line, const char *file);
// premature flushing to zero.
// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN +
// ulp_limit to be flushed to zero.
-static inline int IsFloatResultSubnormal(double x, float ulps)
+inline int IsFloatResultSubnormal(double x, float ulps)
{
x = fabs(x) - MAKE_HEX_DOUBLE(0x1.0p-149, 0x1, -149) * (double)ulps;
return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
}
-static inline int IsFloatResultSubnormalAbsError(double x, float abs_err)
+inline int IsFloatResultSubnormalAbsError(double x, float abs_err)
{
x = x - abs_err;
return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
}
-static inline int IsDoubleResultSubnormal(long double x, float ulps)
+inline int IsDoubleResultSubnormal(long double x, float ulps)
{
x = fabsl(x) - MAKE_HEX_LONG(0x1.0p-1074, 0x1, -1074) * (long double)ulps;
return x < MAKE_HEX_LONG(0x1.0p-1022, 0x1, -1022);
}
-static inline int IsFloatInfinity(double x)
+inline int IsFloatInfinity(double x)
{
union {
cl_float d;
@@ -145,7 +144,7 @@ static inline int IsFloatInfinity(double x)
return ((u.u & 0x7fffffffU) == 0x7F800000U);
}
-static inline int IsFloatMaxFloat(double x)
+inline int IsFloatMaxFloat(double x)
{
union {
cl_float d;
@@ -155,7 +154,7 @@ static inline int IsFloatMaxFloat(double x)
return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
}
-static inline int IsFloatNaN(double x)
+inline int IsFloatNaN(double x)
{
union {
cl_float d;
@@ -165,13 +164,13 @@ static inline int IsFloatNaN(double x)
return ((u.u & 0x7fffffffU) > 0x7F800000U);
}
-extern cl_uint RoundUpToNextPowerOfTwo(cl_uint x);
+cl_uint RoundUpToNextPowerOfTwo(cl_uint x);
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
// (that's x87 default state). This causes problems with the tests that
// convert long and ulong to float and double or otherwise deal with values
// that need more precision than 53-bit. So, set the x87 to 64-bit precision.
-static inline void Force64BitFPUPrecision(void)
+inline void Force64BitFPUPrecision(void)
{
#if __MINGW32__
// The usual method is to use _controlfp as follows:
@@ -202,17 +201,17 @@ static inline void Force64BitFPUPrecision(void)
#endif
}
-extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes);
+void memset_pattern4(void *dest, const void *src_pattern, size_t bytes);
-typedef union {
+union int32f_t {
int32_t i;
float f;
-} int32f_t;
+};
-typedef union {
+union int64d_t {
int64_t l;
double d;
-} int64d_t;
+};
void MulD(double *rhi, double *rlo, double u, double v);
void AddD(double *rhi, double *rlo, double a, double b);
@@ -229,7 +228,7 @@ void logFunctionInfo(const char *fname, unsigned int float_size,
float getAllowedUlpError(const Func *f, const bool relaxed);
-static inline cl_uint getTestScale(size_t typeSize)
+inline cl_uint getTestScale(size_t typeSize)
{
if (gWimpyMode)
{
@@ -245,7 +244,7 @@ static inline cl_uint getTestScale(size_t typeSize)
}
}
-static inline uint64_t getTestStep(size_t typeSize, size_t bufferSize)
+inline uint64_t getTestStep(size_t typeSize, size_t bufferSize)
{
if (gWimpyMode)
{
diff --git a/test_conformance/multiple_device_context/test_multiple_devices.cpp b/test_conformance/multiple_device_context/test_multiple_devices.cpp
index 59543ade..4f187b9c 100644
--- a/test_conformance/multiple_device_context/test_multiple_devices.cpp
+++ b/test_conformance/multiple_device_context/test_multiple_devices.cpp
@@ -175,9 +175,8 @@ int test_device_set(size_t deviceCount, size_t queueCount, cl_device_id *devices
}
/* All done now! */
- if (errors)
- return -1;
- return 0;
+ if (errors) return -1;
+ return 0;
}
int test_two_devices(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
diff --git a/test_conformance/pipes/test_pipe_info.cpp b/test_conformance/pipes/test_pipe_info.cpp
index 7543c6cd..e7b486db 100644
--- a/test_conformance/pipes/test_pipe_info.cpp
+++ b/test_conformance/pipes/test_pipe_info.cpp
@@ -14,6 +14,7 @@
// limitations under the License.
//
#include "procs.h"
+#include "harness/parseParameters.h"
const char* pipe_kernel_code = {
"__kernel void pipe_kernel(__write_only pipe int out_pipe)\n"
@@ -39,8 +40,7 @@ int test_pipe_info( cl_device_id deviceID, cl_context context, cl_command_queue
if (pipe_width != returnVal)
{
- log_error("Error in clGetPipeInfo() check of pipe packet size\n");
- return -1;
+ test_fail("Error in clGetPipeInfo() check of pipe packet size\n");
}
else
{
@@ -52,29 +52,37 @@ int test_pipe_info( cl_device_id deviceID, cl_context context, cl_command_queue
if(pipe_depth != returnVal)
{
- log_error( "Error in clGetPipeInfo() check of pipe max packets\n" );
- return -1;
+ test_fail("Error in clGetPipeInfo() check of pipe max packets\n");
}
else
{
log_info( " CL_PIPE_MAX_PACKETS passed.\n" );
}
- err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, (const char**)&pipe_kernel_code, "pipe_kernel", "-cl-std=CL2.0 -cl-kernel-arg-info");
- test_error_ret(err, " Error creating program", -1);
+ err = create_single_kernel_helper_with_build_options(
+ context, &program, &kernel, 1, &pipe_kernel_code, "pipe_kernel",
+ "-cl-std=CL2.0 -cl-kernel-arg-info");
+ test_error_fail(err, "Error creating program");
cl_kernel_arg_type_qualifier arg_type_qualifier = 0;
- cl_kernel_arg_type_qualifier expected_type_qualifier = CL_KERNEL_ARG_TYPE_PIPE;
- err = clGetKernelArgInfo( kernel, 0, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof(arg_type_qualifier), &arg_type_qualifier, NULL );
- test_error_ret(err, " clSetKernelArgInfo failed", -1);
- err = (arg_type_qualifier != expected_type_qualifier);
-
- if(err)
+ err = clGetKernelArgInfo(kernel, 0, CL_KERNEL_ARG_TYPE_QUALIFIER,
+ sizeof(arg_type_qualifier), &arg_type_qualifier,
+ NULL);
+ if (gCompilationMode == kOnline)
{
- print_error(err, "ERROR: Bad type qualifier\n");
- return -1;
+ test_error_fail(err, "clGetKernelArgInfo failed");
+ if (arg_type_qualifier != CL_KERNEL_ARG_TYPE_PIPE)
+ {
+ test_fail("ERROR: Incorrect type qualifier: %i\n",
+ arg_type_qualifier);
+ }
+ }
+ else
+ {
+ test_failure_error_ret(err, CL_KERNEL_ARG_INFO_NOT_AVAILABLE,
+ "clGetKernelArgInfo error not as expected",
+ TEST_FAIL);
}
- return err;
-
+ return TEST_PASS;
}
diff --git a/test_conformance/pipes/test_pipe_limits.cpp b/test_conformance/pipes/test_pipe_limits.cpp
index 169ab80c..e1048f5f 100644
--- a/test_conformance/pipes/test_pipe_limits.cpp
+++ b/test_conformance/pipes/test_pipe_limits.cpp
@@ -69,7 +69,7 @@ void createKernelSourceCode(std::stringstream &stream, int num_pipes)
}
}
)";
- // clang-format om
+ // clang-format on
}
stream << R"(
}
@@ -163,7 +163,7 @@ int test_pipe_max_args(cl_device_id deviceID, cl_context context, cl_command_que
cl_int err;
cl_int size;
int num_pipe_elements = 1024;
- int i, j;
+ int i;
int max_pipe_args;
std::stringstream source;
clEventWrapper producer_sync_event = NULL;
@@ -648,4 +648,4 @@ int test_pipe_max_active_reservations(cl_device_id deviceID, cl_context context,
}
return 0;
-} \ No newline at end of file
+}
diff --git a/test_conformance/pipes/test_pipe_read_write.cpp b/test_conformance/pipes/test_pipe_read_write.cpp
index dd0d1216..425c7aee 100644
--- a/test_conformance/pipes/test_pipe_read_write.cpp
+++ b/test_conformance/pipes/test_pipe_read_write.cpp
@@ -414,9 +414,9 @@ static int verify_readwrite_ulong(void *ptr1, void *ptr2, int n)
static int verify_readwrite_double(void *ptr1, void *ptr2, int n)
{
int i;
- long long int sum_input = 0, sum_output = 0;
- long long int *inptr = (long long int *)ptr1;
- long long int *outptr = (long long int *)ptr2;
+ cl_long sum_input = 0, sum_output = 0;
+ cl_long *inptr = (cl_long *)ptr1;
+ cl_long *outptr = (cl_long *)ptr2;
for(i = 0; i < n; i++)
{
@@ -626,7 +626,6 @@ int test_pipe_readwrite_struct_generic( cl_device_id deviceID, cl_context contex
size_t size = sizeof(TestStruct);
size_t global_work_size[3];
cl_int err;
- int total_errors = 0;
int i;
MTdataHolder d(gRandomSeed);
clEventWrapper producer_sync_event = NULL;
@@ -1076,7 +1075,8 @@ int test_pipe_readwrite_half( cl_device_id deviceID, cl_context context, cl_comm
if(!is_extension_available(deviceID, "cl_khr_fp16"))
{
- log_info("cl_khr_fp16 is not supported on this platoform. Skipping test.\n");
+ log_info(
+ "cl_khr_fp16 is not supported on this platform. Skipping test.\n");
return CL_SUCCESS;
}
ptrSizes[0] = sizeof(cl_float) / 2;
@@ -1246,7 +1246,7 @@ int test_pipe_readwrite_double( cl_device_id deviceID, cl_context context, cl_co
size_t min_alignment = get_min_alignment(context);
- foo = verify_readwrite_long;
+ foo = verify_readwrite_double;
ptrSizes[0] = sizeof(cl_double);
ptrSizes[1] = ptrSizes[0] << 1;
@@ -1257,7 +1257,8 @@ int test_pipe_readwrite_double( cl_device_id deviceID, cl_context context, cl_co
//skip devices that don't support double
if(!is_extension_available(deviceID, "cl_khr_fp64"))
{
- log_info("cl_khr_fp64 is not supported on this platoform. Skipping test.\n");
+ log_info(
+ "cl_khr_fp64 is not supported on this platform. Skipping test.\n");
return CL_SUCCESS;
}
@@ -1404,7 +1405,8 @@ int test_pipe_subgroup_readwrite_int( cl_device_id deviceID, cl_context context,
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_int(deviceID, context, queue, num_elements);
@@ -1418,7 +1420,8 @@ int test_pipe_subgroup_readwrite_uint( cl_device_id deviceID, cl_context context
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_uint(deviceID, context, queue, num_elements);
@@ -1432,7 +1435,8 @@ int test_pipe_subgroup_readwrite_short( cl_device_id deviceID, cl_context contex
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_short(deviceID, context, queue, num_elements);
@@ -1446,7 +1450,8 @@ int test_pipe_subgroup_readwrite_ushort( cl_device_id deviceID, cl_context conte
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_ushort(deviceID, context, queue, num_elements);
@@ -1460,7 +1465,8 @@ int test_pipe_subgroup_readwrite_char( cl_device_id deviceID, cl_context context
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_char(deviceID, context, queue, num_elements);
@@ -1474,7 +1480,8 @@ int test_pipe_subgroup_readwrite_uchar( cl_device_id deviceID, cl_context contex
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_uchar(deviceID, context, queue, num_elements);
@@ -1489,7 +1496,8 @@ int test_pipe_subgroup_readwrite_float( cl_device_id deviceID, cl_context contex
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_float(deviceID, context, queue, num_elements);
@@ -1503,7 +1511,8 @@ int test_pipe_subgroup_readwrite_half( cl_device_id deviceID, cl_context context
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_half(deviceID, context, queue, num_elements);
@@ -1517,7 +1526,8 @@ int test_pipe_subgroup_readwrite_long( cl_device_id deviceID, cl_context context
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_long(deviceID, context, queue, num_elements);
@@ -1531,7 +1541,8 @@ int test_pipe_subgroup_readwrite_ulong( cl_device_id deviceID, cl_context contex
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_ulong(deviceID, context, queue, num_elements);
@@ -1545,7 +1556,8 @@ int test_pipe_subgroup_readwrite_double( cl_device_id deviceID, cl_context conte
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_double(deviceID, context, queue, num_elements);
@@ -1555,7 +1567,8 @@ int test_pipe_subgroup_readwrite_struct( cl_device_id deviceID, cl_context conte
{
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
const char *kernelNames[] = {"test_pipe_subgroup_write_struct","test_pipe_subgroup_read_struct"};
diff --git a/test_conformance/pipes/test_pipe_subgroups.cpp b/test_conformance/pipes/test_pipe_subgroups.cpp
index b3e17183..8e2f6e57 100644
--- a/test_conformance/pipes/test_pipe_subgroups.cpp
+++ b/test_conformance/pipes/test_pipe_subgroups.cpp
@@ -114,9 +114,8 @@ int test_pipe_subgroups_divergence(cl_device_id deviceID, cl_context context, cl
if (!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info(
- "cl_khr_subgroups is not supported on this platoform. Skipping "
- "test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp
index 2b804e40..d638cd46 100644
--- a/test_conformance/printf/test_printf.cpp
+++ b/test_conformance/printf/test_printf.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -232,10 +232,8 @@ int waitForEvent(cl_event* event)
//-----------------------------------------
static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context context,const unsigned int testId,const unsigned int testNum,bool isLongSupport,bool is64bAddrSpace)
{
- int err,i;
+ int err;
cl_program program;
- cl_device_id devID;
- char buildLog[ 1024 * 128 ];
char testname[256] = {0};
char addrSpaceArgument[256] = {0};
char addrSpacePAddArgument[256] = {0};
@@ -825,73 +823,75 @@ int test_address_space_4(cl_device_id deviceID, cl_context context, cl_command_q
return doTest(gQueue, gContext, TYPE_ADDRESS_SPACE, 4, deviceID);
}
+int test_buffer_size(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ size_t printf_buff_size = 0;
+ const size_t printf_buff_size_req = !gIsEmbedded ? (1024 * 1024UL) : 1024UL;
+ const size_t config_size = sizeof(printf_buff_size);
+ cl_int err = CL_SUCCESS;
+
+ err = clGetDeviceInfo(deviceID, CL_DEVICE_PRINTF_BUFFER_SIZE, config_size,
+ &printf_buff_size, NULL);
+ if (err != CL_SUCCESS)
+ {
+ log_error("Unable to query CL_DEVICE_PRINTF_BUFFER_SIZE");
+ return TEST_FAIL;
+ }
+
+ if (printf_buff_size < printf_buff_size_req)
+ {
+ log_error("CL_DEVICE_PRINTF_BUFFER_SIZE does not meet requirements");
+ return TEST_FAIL;
+ }
+
+ return TEST_PASS;
+}
+
test_definition test_list[] = {
- ADD_TEST( int_0 ),
- ADD_TEST( int_1 ),
- ADD_TEST( int_2 ),
- ADD_TEST( int_3 ),
- ADD_TEST( int_4 ),
- ADD_TEST( int_5 ),
- ADD_TEST( int_6 ),
- ADD_TEST( int_7 ),
- ADD_TEST( int_8 ),
-
- ADD_TEST( float_0 ),
- ADD_TEST( float_1 ),
- ADD_TEST( float_2 ),
- ADD_TEST( float_3 ),
- ADD_TEST( float_4 ),
- ADD_TEST( float_5 ),
- ADD_TEST( float_6 ),
- ADD_TEST( float_7 ),
- ADD_TEST( float_8 ),
- ADD_TEST( float_9 ),
- ADD_TEST( float_10 ),
- ADD_TEST( float_11 ),
- ADD_TEST( float_12 ),
- ADD_TEST( float_13 ),
- ADD_TEST( float_14 ),
- ADD_TEST( float_15 ),
- ADD_TEST( float_16 ),
- ADD_TEST( float_17 ),
-
- ADD_TEST( float_limits_0 ),
- ADD_TEST( float_limits_1 ),
- ADD_TEST( float_limits_2 ),
-
- ADD_TEST( octal_0 ),
- ADD_TEST( octal_1 ),
- ADD_TEST( octal_2 ),
- ADD_TEST( octal_3 ),
-
- ADD_TEST( unsigned_0 ),
- ADD_TEST( unsigned_1 ),
-
- ADD_TEST( hexadecimal_0 ),
- ADD_TEST( hexadecimal_1 ),
- ADD_TEST( hexadecimal_2 ),
- ADD_TEST( hexadecimal_3 ),
- ADD_TEST( hexadecimal_4 ),
-
- ADD_TEST( char_0 ),
- ADD_TEST( char_1 ),
- ADD_TEST( char_2 ),
-
- ADD_TEST( string_0 ),
- ADD_TEST( string_1 ),
- ADD_TEST( string_2 ),
-
- ADD_TEST( vector_0 ),
- ADD_TEST( vector_1 ),
- ADD_TEST( vector_2 ),
- ADD_TEST( vector_3 ),
- ADD_TEST( vector_4 ),
-
- ADD_TEST( address_space_0 ),
- ADD_TEST( address_space_1 ),
- ADD_TEST( address_space_2 ),
- ADD_TEST( address_space_3 ),
- ADD_TEST( address_space_4 ),
+ ADD_TEST(int_0), ADD_TEST(int_1),
+ ADD_TEST(int_2), ADD_TEST(int_3),
+ ADD_TEST(int_4), ADD_TEST(int_5),
+ ADD_TEST(int_6), ADD_TEST(int_7),
+ ADD_TEST(int_8),
+
+ ADD_TEST(float_0), ADD_TEST(float_1),
+ ADD_TEST(float_2), ADD_TEST(float_3),
+ ADD_TEST(float_4), ADD_TEST(float_5),
+ ADD_TEST(float_6), ADD_TEST(float_7),
+ ADD_TEST(float_8), ADD_TEST(float_9),
+ ADD_TEST(float_10), ADD_TEST(float_11),
+ ADD_TEST(float_12), ADD_TEST(float_13),
+ ADD_TEST(float_14), ADD_TEST(float_15),
+ ADD_TEST(float_16), ADD_TEST(float_17),
+
+ ADD_TEST(float_limits_0), ADD_TEST(float_limits_1),
+ ADD_TEST(float_limits_2),
+
+ ADD_TEST(octal_0), ADD_TEST(octal_1),
+ ADD_TEST(octal_2), ADD_TEST(octal_3),
+
+ ADD_TEST(unsigned_0), ADD_TEST(unsigned_1),
+
+ ADD_TEST(hexadecimal_0), ADD_TEST(hexadecimal_1),
+ ADD_TEST(hexadecimal_2), ADD_TEST(hexadecimal_3),
+ ADD_TEST(hexadecimal_4),
+
+ ADD_TEST(char_0), ADD_TEST(char_1),
+ ADD_TEST(char_2),
+
+ ADD_TEST(string_0), ADD_TEST(string_1),
+ ADD_TEST(string_2),
+
+ ADD_TEST(vector_0), ADD_TEST(vector_1),
+ ADD_TEST(vector_2), ADD_TEST(vector_3),
+ ADD_TEST(vector_4),
+
+ ADD_TEST(address_space_0), ADD_TEST(address_space_1),
+ ADD_TEST(address_space_2), ADD_TEST(address_space_3),
+ ADD_TEST(address_space_4),
+
+ ADD_TEST(buffer_size),
};
const int test_num = ARRAY_SIZE( test_list );
@@ -1030,8 +1030,6 @@ test_status InitCL( cl_device_id device )
return TEST_SKIP;
}
- log_info( "Test binary built %s %s\n", __DATE__, __TIME__ );
-
gFd = acquireOutputStream(&err);
if (err != 0)
{
diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp
index 3546c5f5..d45e1d43 100644
--- a/test_conformance/printf/util_printf.cpp
+++ b/test_conformance/printf/util_printf.cpp
@@ -842,8 +842,6 @@ static void hexRefBuilder(printDataGenParameters& params, char* refResult, const
*/
void generateRef(const cl_device_id device)
{
- int fd = -1;
- char _refBuffer[ANALYSIS_BUFFER_SIZE];
const cl_device_fp_config fpConfig = get_default_rounding_mode(device);
const RoundingMode hostRound = get_round();
RoundingMode deviceRound;
diff --git a/test_conformance/profiling/execute.cpp b/test_conformance/profiling/execute.cpp
index edfc043c..44b1bcd4 100644
--- a/test_conformance/profiling/execute.cpp
+++ b/test_conformance/profiling/execute.cpp
@@ -21,6 +21,8 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include <algorithm>
+
#include "procs.h"
#include "harness/testHarness.h"
#include "harness/errorHelpers.h"
@@ -29,12 +31,6 @@
typedef unsigned char uchar;
#endif
-#undef MIN
-#define MIN(x,y) ( (x) < (y) ? (x) : (y) )
-
-#undef MAX
-#define MAX(x,y) ( (x) > (y) ? (x) : (y) )
-
//#define CREATE_OUTPUT 1
extern int writePPM( const char *filename, uchar *buf, int xsize, int ysize );
@@ -73,8 +69,8 @@ static const char *image_filter_src =
static void read_imagef( int x, int y, int w, int h, int nChannels, uchar *src, float *srcRgb )
{
// clamp the coords
- int x0 = MIN( MAX( x, 0 ), w - 1 );
- int y0 = MIN( MAX( y, 0 ), h - 1 );
+ int x0 = std::min(std::max(x, 0), w - 1);
+ int y0 = std::min(std::max(y, 0), h - 1);
// get tine index
int indx = ( y0 * w + x0 ) * nChannels;
@@ -339,8 +335,8 @@ static int kernelFilter( cl_device_id device, cl_context context, cl_command_que
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
- if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
- err = -1;
+ if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
+ err = -1;
return err;
diff --git a/test_conformance/profiling/writeImage.cpp b/test_conformance/profiling/writeImage.cpp
index fbc8fbcd..ec2fbdaf 100644
--- a/test_conformance/profiling/writeImage.cpp
+++ b/test_conformance/profiling/writeImage.cpp
@@ -628,8 +628,8 @@ int write_image( cl_device_id device, cl_context context, cl_command_queue queue
free( dst );
free( inptr );
- if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
- err = -1;
+ if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
+ err = -1;
return err;
diff --git a/test_conformance/run_conformance.py b/test_conformance/run_conformance.py
index ea7f6775..974491e1 100755
--- a/test_conformance/run_conformance.py
+++ b/test_conformance/run_conformance.py
@@ -8,295 +8,304 @@
#//
#******************************************************************/
-import os, re, sys, subprocess, time, commands, tempfile, math, string
+from __future__ import print_function
+
+import os
+import re
+import sys
+import subprocess
+import time
+import tempfile
DEBUG = 0
-log_file_name = "opencl_conformance_results_" + time.strftime("%Y-%m-%d_%H-%M", time.localtime())+ ".log"
+log_file_name = "opencl_conformance_results_" + time.strftime("%Y-%m-%d_%H-%M", time.localtime()) + ".log"
process_pid = 0
# The amount of time between printing a "." (if no output from test) or ":" (if output)
# to the screen while the tests are running.
-seconds_between_status_updates = 60*60*24*7 # effectively never
+seconds_between_status_updates = 60 * 60 * 24 * 7 # effectively never
# Help info
-def write_help_info() :
- print("run_conformance.py test_list [CL_DEVICE_TYPE(s) to test] [partial-test-names, ...] [log=path/to/log/file/]")
- print(" test_list - the .csv file containing the test names and commands to run the tests.")
- print(" [partial-test-names, ...] - optional partial strings to select a subset of the tests to run.")
- print(" [CL_DEVICE_TYPE(s) to test] - list of CL device types to test, default is CL_DEVICE_TYPE_DEFAULT.")
- print(" [log=path/to/log/file/] - provide a path for the test log file, default is in the current directory.")
- print(" (Note: spaces are not allowed in the log file path.")
+def write_help_info():
+ print("run_conformance.py test_list [CL_DEVICE_TYPE(s) to test] [partial-test-names, ...] [log=path/to/log/file/]")
+ print(" test_list - the .csv file containing the test names and commands to run the tests.")
+ print(" [partial-test-names, ...] - optional partial strings to select a subset of the tests to run.")
+ print(" [CL_DEVICE_TYPE(s) to test] - list of CL device types to test, default is CL_DEVICE_TYPE_DEFAULT.")
+ print(" [log=path/to/log/file/] - provide a path for the test log file, default is in the current directory.")
+ print(" (Note: spaces are not allowed in the log file path.")
# Get the time formatted nicely
-def get_time() :
- return time.strftime("%d-%b %H:%M:%S", time.localtime())
+def get_time():
+ return time.strftime("%d-%b %H:%M:%S", time.localtime())
+
# Write text to the screen and the log file
-def write_screen_log(text) :
- global log_file
- print(text)
- log_file.write(text+"\n")
+def write_screen_log(text):
+ global log_file
+ print(text)
+ log_file.write(text + "\n")
+
# Load the tests from a csv formated file of the form name,command
def get_tests(filename, devices_to_test):
- tests = []
- if (os.path.exists(filename) == False):
- print("FAILED: test_list \"" + filename + "\" does not exist.")
- print("")
- write_help_info()
- sys.exit(-1)
- file = open(filename, 'r')
- for line in file.readlines():
- comment = re.search("^#.*", line)
- if (comment):
- continue
- device_specific_match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*,\s*(.+?)\s*$", line)
- if (device_specific_match):
- if (device_specific_match.group(1) in devices_to_test):
- test_path = string.replace(device_specific_match.group(3), '/', os.sep)
- test_name = string.replace(device_specific_match.group(2), '/', os.sep)
- tests.append((test_name, test_path))
- else:
- print("Skipping " + device_specific_match.group(2) + " because " + device_specific_match.group(1) + " is not in the list of devices to test.")
- continue
- match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*$", line)
- if (match):
- test_path = string.replace(match.group(2), '/', os.sep)
- test_name = string.replace(match.group(1), '/', os.sep)
- tests.append((test_name, test_path))
- return tests
+ tests = []
+ if os.path.exists(filename) == False:
+ print("FAILED: test_list \"" + filename + "\" does not exist.")
+ print("")
+ write_help_info()
+ sys.exit(-1)
+ file = open(filename, 'r')
+ for line in file.readlines():
+ comment = re.search("^#.*", line)
+ if comment:
+ continue
+ device_specific_match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*,\s*(.+?)\s*$", line)
+ if device_specific_match:
+ if device_specific_match.group(1) in devices_to_test:
+ test_path = str.replace(device_specific_match.group(3), '/', os.sep)
+ test_name = str.replace(device_specific_match.group(2), '/', os.sep)
+ tests.append((test_name, test_path))
+ else:
+ print("Skipping " + device_specific_match.group(2) + " because " + device_specific_match.group(1) + " is not in the list of devices to test.")
+ continue
+ match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*$", line)
+ if match:
+ test_path = str.replace(match.group(2), '/', os.sep)
+ test_name = str.replace(match.group(1), '/', os.sep)
+ tests.append((test_name, test_path))
+ return tests
def run_test_checking_output(current_directory, test_dir, log_file):
- global process_pid, seconds_between_status_updates
- failures_this_run = 0
- start_time = time.time()
- # Create a temporary file for capturing the output from the test
- (output_fd, output_name) = tempfile.mkstemp()
- if ( not os.path.exists(output_name)) :
- write_screen_log("\n ==> ERROR: could not create temporary file %s ." % output_name)
- os.close(output_fd)
- return -1
- # Execute the test
- program_to_run = test_dir_without_args = test_dir.split(None, 1)[0]
- if ( os.sep == '\\' ) : program_to_run += ".exe"
- if (os.path.exists(current_directory + os.sep + program_to_run)) :
- os.chdir(os.path.dirname(current_directory+os.sep+test_dir_without_args) )
- try:
- if (DEBUG): p = subprocess.Popen("", stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
- else : p = subprocess.Popen(current_directory + os.sep + test_dir, stderr=output_fd, stdout=output_fd, shell=True)
- except OSError:
- write_screen_log("\n ==> ERROR: failed to execute test. Failing test. : " + str(OSError))
- os.close(output_fd)
- return -1
- else:
- write_screen_log("\n ==> ERROR: test file (" + current_directory + os.sep + program_to_run +") does not exist. Failing test.")
- os.close(output_fd)
- return -1
- # Set the global pid so we can kill it if this is aborted
- process_pid = p.pid
- # Read one character at a time from the temporary output file while the process is running.
- # When we get an end-of-line, look for errors and write the results to the log file.
- # This allows us to process the file as it is being produced.
- # Keep track of the state for reading
- # Whether we are done, if we have more to read, and where in the file we last read
- done = False
- more_to_read = True
- pointer = 0
- pointer_at_last_user_update = 0
- output_this_run = False
- try:
- read_output = open(output_name, 'r')
- except IOError:
- write_screen_log("\n ==> ERROR: could not open output file from test.")
- os.close(output_fd)
- return -1
- line = ""
- while (not done or more_to_read):
- os.fsync(output_fd)
- # Determine if we should display some output
- elapsed_time = (time.time() - start_time)
- if (elapsed_time > seconds_between_status_updates):
- start_time = time.time()
- # If we've received output from the test since the last update, display a #
- if (pointer != pointer_at_last_user_update):
- sys.stdout.write(":")
- else:
- sys.stdout.write(".")
- pointer_at_last_user_update = pointer
- sys.stdout.flush()
- # Check if we're done
- p.poll()
- if (not done and p.returncode != None):
- if (p.returncode < 0):
- if (not output_this_run):
- print ""
- output_this_run = True
- write_screen_log(" ==> ERROR: test killed/crashed: " + str(p.returncode)+ ".")
- done = True
- # Try reading
+ global process_pid, seconds_between_status_updates
+ failures_this_run = 0
+ start_time = time.time()
+ # Create a temporary file for capturing the output from the test
+ (output_fd, output_name) = tempfile.mkstemp()
+ if not os.path.exists(output_name):
+ write_screen_log("\n ==> ERROR: could not create temporary file %s ." % output_name)
+ os.close(output_fd)
+ return -1
+ # Execute the test
+ program_to_run = test_dir_without_args = test_dir.split(None, 1)[0]
+ if os.sep == '\\':
+ program_to_run += ".exe"
+ if os.path.exists(current_directory + os.sep + program_to_run):
+ os.chdir(os.path.dirname(current_directory + os.sep + test_dir_without_args))
+ try:
+ if DEBUG: p = subprocess.Popen("", stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
+ else: p = subprocess.Popen(current_directory + os.sep + test_dir, stderr=output_fd, stdout=output_fd, shell=True)
+ except OSError:
+ write_screen_log("\n ==> ERROR: failed to execute test. Failing test. : " + str(OSError))
+ os.close(output_fd)
+ return -1
+ else:
+ write_screen_log("\n ==> ERROR: test file (" + current_directory + os.sep + program_to_run + ") does not exist. Failing test.")
+ os.close(output_fd)
+ return -1
+ # Set the global pid so we can kill it if this is aborted
+ process_pid = p.pid
+ # Read one character at a time from the temporary output file while the process is running.
+ # When we get an end-of-line, look for errors and write the results to the log file.
+ # This allows us to process the file as it is being produced.
+ # Keep track of the state for reading
+ # Whether we are done, if we have more to read, and where in the file we last read
+ done = False
+ more_to_read = True
+ pointer = 0
+ pointer_at_last_user_update = 0
+ output_this_run = False
try:
- read_output.seek(pointer)
- char_read = read_output.read(1)
- except IOError:
- time.sleep(1)
- continue
- # If we got a full line then process it
- if (char_read == "\n"):
- # Look for failures and report them as such
- match = re.search(".*(FAILED|ERROR).*", line)
- if (match):
- if (not output_this_run):
- print ""
- output_this_run = True
- print(" ==> " + line.replace('\n',''))
- match = re.search(".*FAILED.*", line)
- if (match):
- failures_this_run = failures_this_run + 1
- match = re.search(".*(PASSED).*", line)
- if (match):
- if (not output_this_run):
- print ""
- output_this_run = True
- print(" " + line.replace('\n',''))
- # Write it to the log
- log_file.write(" " + line +"\n")
- log_file.flush()
- line = ""
- pointer = pointer + 1
- # If we are at the end of the file, then re-open it to get new data
- elif (char_read == ""):
- more_to_read = False
- read_output.close()
- time.sleep(1)
- try:
- os.fsync(output_fd)
read_output = open(output_name, 'r')
- # See if there is more to read. This happens if the process ends and we have data left.
- read_output.seek(pointer)
- if (read_output.read(1) != ""):
- more_to_read = True
- except IOError:
- write_screen_log("\n ==> ERROR: could not reopen output file from test.")
+ except IOError:
+ write_screen_log("\n ==> ERROR: could not open output file from test.")
+ os.close(output_fd)
return -1
- done = True
- else:
- line = line + char_read
- pointer = pointer + 1
- # Now we are done, so write out any remaining data in the file:
- # This should only happen if the process exited with an error.
- os.fsync(output_fd)
- while (read_output.read(1) != ""):
- log_file.write(read_output.read(1))
- # Return the total number of failures
- if (p.returncode == 0 and failures_this_run > 0):
- write_screen_log("\n ==> ERROR: Test returned 0, but number of FAILED lines reported is " + str(failures_this_run) +".")
- return failures_this_run
- return p.returncode
-
-
-def run_tests(tests) :
- global curent_directory
- global process_pid
- # Run the tests
- failures = 0
- previous_test = None
- test_number = 1
- for test in tests:
- # Print the name of the test we're running and the time
- (test_name, test_dir) = test
- if (test_dir != previous_test):
- print("========== " + test_dir)
- log_file.write("========================================================================================\n")
- log_file.write("========================================================================================\n")
- log_file.write("(" + get_time() + ") Running Tests: " + test_dir +"\n")
- log_file.write("========================================================================================\n")
- log_file.write("========================================================================================\n")
- previous_test = test_dir
- print("("+get_time()+") BEGIN " + test_name.ljust(40) +": "),
- log_file.write(" ----------------------------------------------------------------------------------------\n")
- log_file.write(" (" + get_time() + ") Running Sub Test: " + test_name + "\n")
- log_file.write(" ----------------------------------------------------------------------------------------\n")
- log_file.flush()
- sys.stdout.flush()
-
- # Run the test
- result = 0
- start_time = time.time()
- try:
- process_pid = 0
- result = run_test_checking_output(current_directory, test_dir, log_file)
- except KeyboardInterrupt:
- # Catch an interrupt from the user
- write_screen_log("\nFAILED: Execution interrupted. Killing test process, but not aborting full test run.")
- os.kill(process_pid, 9)
- answer = raw_input("Abort all tests? (y/n)")
- if (answer.find("y") != -1):
- write_screen_log("\nUser chose to abort all tests.")
- log_file.close()
- sys.exit(-1)
- else:
- write_screen_log("\nUser chose to continue with other tests. Reporting this test as failed.")
- result = 1
- run_time = (time.time() - start_time)
-
- # Move print the finish status
- if (result == 0):
- print("("+get_time()+") PASSED " + test_name.ljust(40) +": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) +")"),
- else:
- print("("+get_time()+") FAILED " + test_name.ljust(40) +": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) +")"),
-
- test_number = test_number + 1
- log_file.write(" ----------------------------------------------------------------------------------------\n")
- log_file.flush()
-
- print("")
- if (result != 0):
- log_file.write(" *******************************************************************************************\n")
- log_file.write(" * ("+get_time()+") Test " + test_name + " ==> FAILED: " + str(result)+"\n")
- log_file.write(" *******************************************************************************************\n")
- failures = failures + 1
- else:
- log_file.write(" ("+get_time()+") Test " + test_name +" passed in " + str(run_time) + "s\n")
-
- log_file.write(" ----------------------------------------------------------------------------------------\n")
- log_file.write("\n")
- return failures
-
-
-
+ line = ""
+ while not done or more_to_read:
+ os.fsync(output_fd)
+ # Determine if we should display some output
+ elapsed_time = (time.time() - start_time)
+ if elapsed_time > seconds_between_status_updates:
+ start_time = time.time()
+ # If we've received output from the test since the last update, display a #
+ if pointer != pointer_at_last_user_update:
+ sys.stdout.write(":")
+ else:
+ sys.stdout.write(".")
+ pointer_at_last_user_update = pointer
+ sys.stdout.flush()
+ # Check if we're done
+ p.poll()
+ if not done and p.returncode != None:
+ if p.returncode < 0:
+ if not output_this_run:
+ print("")
+ output_this_run = True
+ write_screen_log(" ==> ERROR: test killed/crashed: " + str(p.returncode) + ".")
+ done = True
+ # Try reading
+ try:
+ read_output.seek(pointer)
+ char_read = read_output.read(1)
+ except IOError:
+ time.sleep(1)
+ continue
+ # If we got a full line then process it
+ if char_read == "\n":
+ # Look for failures and report them as such
+ match = re.search(".*(FAILED|ERROR).*", line)
+ if match:
+ if not output_this_run:
+ print("")
+ output_this_run = True
+ print(" ==> " + line.replace('\n', ''))
+ match = re.search(".*FAILED.*", line)
+ if match:
+ failures_this_run = failures_this_run + 1
+ match = re.search(".*(PASSED).*", line)
+ if match:
+ if not output_this_run:
+ print("")
+ output_this_run = True
+ print(" " + line.replace('\n', ''))
+ # Write it to the log
+ log_file.write(" " + line + "\n")
+ log_file.flush()
+ line = ""
+ pointer = pointer + 1
+ # If we are at the end of the file, then re-open it to get new data
+ elif char_read == "":
+ more_to_read = False
+ read_output.close()
+ time.sleep(1)
+ try:
+ os.fsync(output_fd)
+ read_output = open(output_name, 'r')
+ # See if there is more to read. This happens if the process ends and we have data left.
+ read_output.seek(pointer)
+ if read_output.read(1) != "":
+ more_to_read = True
+ except IOError:
+ write_screen_log("\n ==> ERROR: could not reopen output file from test.")
+ return -1
+ else:
+ line = line + char_read
+ pointer = pointer + 1
+ # Now we are done, so write out any remaining data in the file:
+ # This should only happen if the process exited with an error.
+ os.fsync(output_fd)
+ while read_output.read(1) != "":
+ log_file.write(read_output.read(1))
+ # Return the total number of failures
+ if (p.returncode == 0 and failures_this_run > 0):
+ write_screen_log("\n ==> ERROR: Test returned 0, but number of FAILED lines reported is " + str(failures_this_run) + ".")
+ return failures_this_run
+ return p.returncode
+
+
+def run_tests(tests):
+ global curent_directory
+ global process_pid
+ # Run the tests
+ failures = 0
+ previous_test = None
+ test_number = 1
+ for test in tests:
+ # Print the name of the test we're running and the time
+ (test_name, test_dir) = test
+ if test_dir != previous_test:
+ print("========== " + test_dir)
+ log_file.write("========================================================================================\n")
+ log_file.write("========================================================================================\n")
+ log_file.write("(" + get_time() + ") Running Tests: " + test_dir + "\n")
+ log_file.write("========================================================================================\n")
+ log_file.write("========================================================================================\n")
+ previous_test = test_dir
+ print("(" + get_time() + ") BEGIN " + test_name.ljust(40) + ": ", end='')
+ log_file.write(" ----------------------------------------------------------------------------------------\n")
+ log_file.write(" (" + get_time() + ") Running Sub Test: " + test_name + "\n")
+ log_file.write(" ----------------------------------------------------------------------------------------\n")
+ log_file.flush()
+ sys.stdout.flush()
+
+ # Run the test
+ result = 0
+ start_time = time.time()
+ try:
+ process_pid = 0
+ result = run_test_checking_output(current_directory, test_dir, log_file)
+ except KeyboardInterrupt:
+ # Catch an interrupt from the user
+ write_screen_log("\nFAILED: Execution interrupted. Killing test process, but not aborting full test run.")
+ os.kill(process_pid, 9)
+ if sys.version_info[0] < 3:
+ answer = raw_input("Abort all tests? (y/n)")
+ else:
+ answer = input("Abort all tests? (y/n)")
+ if answer.find("y") != -1:
+ write_screen_log("\nUser chose to abort all tests.")
+ log_file.close()
+ sys.exit(-1)
+ else:
+ write_screen_log("\nUser chose to continue with other tests. Reporting this test as failed.")
+ result = 1
+ run_time = (time.time() - start_time)
+
+ # Move print the finish status
+ if result == 0:
+ print("(" + get_time() + ") PASSED " + test_name.ljust(40) + ": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) + ")", end='')
+ else:
+ print("(" + get_time() + ") FAILED " + test_name.ljust(40) + ": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) + ")", end='')
+
+ test_number = test_number + 1
+ log_file.write(" ----------------------------------------------------------------------------------------\n")
+ log_file.flush()
+
+ print("")
+ if result != 0:
+ log_file.write(" *******************************************************************************************\n")
+ log_file.write(" * (" + get_time() + ") Test " + test_name + " ==> FAILED: " + str(result) + "\n")
+ log_file.write(" *******************************************************************************************\n")
+ failures = failures + 1
+ else:
+ log_file.write(" (" + get_time() + ") Test " + test_name + " passed in " + str(run_time) + "s\n")
+
+ log_file.write(" ----------------------------------------------------------------------------------------\n")
+ log_file.write("\n")
+ return failures
# ########################
# Begin OpenCL conformance run script
# ########################
-if (len(sys.argv) < 2):
- write_help_info()
- sys.exit(-1)
-
+if len(sys.argv) < 2:
+ write_help_info()
+ sys.exit(-1)
current_directory = os.getcwd()
# Open the log file
for arg in sys.argv:
- match = re.search("log=(\S+)", arg)
- if (match):
- log_file_name = match.group(1).rstrip('/') + os.sep + log_file_name
+ match = re.search("log=(\S+)", arg)
+ if match:
+ log_file_name = match.group(1).rstrip('/') + os.sep + log_file_name
try:
- log_file = open(log_file_name, "w")
+ log_file = open(log_file_name, "w")
except IOError:
- print "Could not open log file " + log_file_name
+ print("Could not open log file " + log_file_name)
+ sys.exit(-1)
# Determine which devices to test
device_types = ["CL_DEVICE_TYPE_DEFAULT", "CL_DEVICE_TYPE_CPU", "CL_DEVICE_TYPE_GPU", "CL_DEVICE_TYPE_ACCELERATOR", "CL_DEVICE_TYPE_ALL"]
devices_to_test = []
for device in device_types:
- if device in sys.argv[2:]:
- devices_to_test.append(device)
-if (len(devices_to_test) == 0):
- devices_to_test = ["CL_DEVICE_TYPE_DEFAULT"]
+ if device in sys.argv[2:]:
+ devices_to_test.append(device)
+if len(devices_to_test) == 0:
+ devices_to_test = ["CL_DEVICE_TYPE_DEFAULT"]
write_screen_log("Testing on: " + str(devices_to_test))
# Get the tests
@@ -306,52 +315,52 @@ tests = get_tests(sys.argv[1], devices_to_test)
tests_to_use = []
num_of_patterns_to_match = 0
for arg in sys.argv[2:]:
- if arg in device_types:
- continue
- if re.search("log=(\S+)", arg):
- continue
- num_of_patterns_to_match = num_of_patterns_to_match + 1
- found_it = False
- for test in tests:
- (test_name, test_dir) = test
- if (test_name.find(arg) != -1 or test_dir.find(arg) != -1):
- found_it = True
- if (test not in tests_to_use):
- tests_to_use.append(test)
- if (found_it == False):
- print("Failed to find a test matching " + arg)
-if (len(tests_to_use) == 0):
- if (num_of_patterns_to_match > 0):
- print("FAILED: Failed to find any tests matching the given command-line options.")
- print("")
- write_help_info()
- sys.exit(-1)
+ if arg in device_types:
+ continue
+ if re.search("log=(\S+)", arg):
+ continue
+ num_of_patterns_to_match = num_of_patterns_to_match + 1
+ found_it = False
+ for test in tests:
+ (test_name, test_dir) = test
+ if (test_name.find(arg) != -1 or test_dir.find(arg) != -1):
+ found_it = True
+ if test not in tests_to_use:
+ tests_to_use.append(test)
+ if found_it == False:
+ print("Failed to find a test matching " + arg)
+if len(tests_to_use) == 0:
+ if num_of_patterns_to_match > 0:
+ print("FAILED: Failed to find any tests matching the given command-line options.")
+ print("")
+ write_help_info()
+ sys.exit(-1)
else:
- tests = tests_to_use[:]
+ tests = tests_to_use[:]
write_screen_log("Test execution arguments: " + str(sys.argv))
-write_screen_log("Logging to file " + log_file_name +".")
+write_screen_log("Logging to file " + log_file_name + ".")
write_screen_log("Loaded tests from " + sys.argv[1] + ", total of " + str(len(tests)) + " tests selected to run:")
for (test_name, test_command) in tests:
- write_screen_log(test_name.ljust(50) + " (" + test_command +")")
+ write_screen_log(test_name.ljust(50) + " (" + test_command + ")")
# Run the tests
total_failures = 0
for device_to_test in devices_to_test:
- os.environ['CL_DEVICE_TYPE'] = device_to_test
- write_screen_log("========================================================================================")
- write_screen_log("========================================================================================")
- write_screen_log(("Setting CL_DEVICE_TYPE to " + device_to_test).center(90))
- write_screen_log("========================================================================================")
- write_screen_log("========================================================================================")
- failures = run_tests(tests)
- write_screen_log("========================================================================================")
- if (failures == 0):
- write_screen_log(">> TEST on " + device_to_test + " PASSED")
- else:
- write_screen_log(">> TEST on " + device_to_test + " FAILED (" + str(failures) + " FAILURES)")
- write_screen_log("========================================================================================")
- total_failures = total_failures + failures
-
-write_screen_log("("+get_time()+") Testing complete. " + str(total_failures) + " failures for " + str(len(tests)) + " tests.")
+ os.environ['CL_DEVICE_TYPE'] = device_to_test
+ write_screen_log("========================================================================================")
+ write_screen_log("========================================================================================")
+ write_screen_log(("Setting CL_DEVICE_TYPE to " + device_to_test).center(90))
+ write_screen_log("========================================================================================")
+ write_screen_log("========================================================================================")
+ failures = run_tests(tests)
+ write_screen_log("========================================================================================")
+ if failures == 0:
+ write_screen_log(">> TEST on " + device_to_test + " PASSED")
+ else:
+ write_screen_log(">> TEST on " + device_to_test + " FAILED (" + str(failures) + " FAILURES)")
+ write_screen_log("========================================================================================")
+ total_failures = total_failures + failures
+
+write_screen_log("(" + get_time() + ") Testing complete. " + str(total_failures) + " failures for " + str(len(tests)) + " tests.")
log_file.close()
diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp
index 35f154ac..972a53c6 100644
--- a/test_conformance/select/test_select.cpp
+++ b/test_conformance/select/test_select.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -79,7 +79,6 @@ static int s_wimpy_reduction_factor = 256;
// sub tests which is for each individual test. The following
// tracks the subtests
int s_test_cnt = 0;
-int s_test_fail = 0;
//-----------------------------------------
// Static helper functions
@@ -174,8 +173,6 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont
char extension[128] = "";
int err = 0;
- int i; // generic, re-usable loop variable
-
const char *source[] = {
extension,
"__kernel void ", testname,
@@ -297,6 +294,7 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont
static int doTest(cl_command_queue queue, cl_context context, Type stype, Type cmptype, cl_device_id device)
{
int err = CL_SUCCESS;
+ int s_test_fail = 0;
MTdata d;
const size_t element_count[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
cl_mem src1 = NULL;
@@ -468,6 +466,11 @@ exit:
clReleaseProgram(programs[vecsize]);
}
++s_test_cnt;
+ if (s_test_fail)
+ {
+ err = TEST_FAIL;
+ gFailCount++;
+ }
return err;
}
@@ -636,7 +639,6 @@ int main(int argc, const char* argv[])
s_wimpy_mode = true;
}
- log_info( "Test binary built %s %s\n", __DATE__, __TIME__ );
if (s_wimpy_mode) {
log_info("\n");
log_info("*** WARNING: Testing in Wimpy mode! ***\n");
@@ -665,4 +667,3 @@ static void printUsage( void )
log_info( "\t%s\n", test_list[i].name );
}
}
-
diff --git a/test_conformance/spir/main.cpp b/test_conformance/spir/main.cpp
index 3a18988c..06caf33b 100644
--- a/test_conformance/spir/main.cpp
+++ b/test_conformance/spir/main.cpp
@@ -6615,40 +6615,45 @@ struct sub_suite
};
static const sub_suite spir_suites[] = {
- {"api", "api", test_api},
- {"api_double", "api", test_api_double},
- {"atomics", "atomics", test_atomics},
- {"basic", "basic", test_basic},
- {"basic_double", "basic", test_basic_double},
- {"commonfns", "commonfns", test_commonfns},
- {"commonfns_double", "commonfns", test_commonfns_double},
- {"conversions", "conversions", test_conversions},
- {"conversions_double", "conversions", test_conversions_double},
- {"geometrics", "geometrics", test_geometrics},
- {"geometrics_double", "geometrics", test_geometrics_double},
- {"half", "half", test_half},
- {"half_double", "half", test_half_double},
- {"kernel_image_methods", "kernel_image_methods", test_kernel_image_methods},
- {"images_kernel_read_write", "images_kernel_read_write", test_images_kernel_read_write},
- {"images_samplerlessRead", "images_samplerlessRead", test_images_samplerless_read},
- {"integer_ops", "integer_ops", test_integer_ops},
- {"math_brute_force", "math_brute_force", test_math_brute_force},
- {"math_brute_force_double", "math_brute_force", test_math_brute_force_double},
- {"printf", "printf", test_printf},
- {"profiling", "profiling", test_profiling},
- {"relationals", "relationals", test_relationals},
- {"relationals_double", "relationals", test_relationals_double},
- {"select", "select", test_select},
- {"select_double", "select", test_select_double},
- {"vec_align", "vec_align", test_vec_align},
- {"vec_align_double", "vec_align", test_vec_align_double},
- {"vec_step", "vec_step", test_vec_step},
- {"vec_step_double", "vec_step", test_vec_step_double},
- {"compile_and_link", "compile_and_link", test_compile_and_link},
- {"sampler_enumeration", "sampler_enumeration", test_sampler_enumeration},
- {"enum_values", "enum_values", test_enum_values},
- {"kernel_attributes", "kernel_attributes", test_kernel_attributes},
- {"binary_type", "binary_type", test_binary_type},
+ { "api", "api", test_api },
+ { "api_double", "api", test_api_double },
+ { "atomics", "atomics", test_atomics },
+ { "basic", "basic", test_basic },
+ { "basic_double", "basic", test_basic_double },
+ { "commonfns", "commonfns", test_commonfns },
+ { "commonfns_double", "commonfns", test_commonfns_double },
+ { "conversions", "conversions", test_conversions },
+ { "conversions_double", "conversions", test_conversions_double },
+ { "geometrics", "geometrics", test_geometrics },
+ { "geometrics_double", "geometrics", test_geometrics_double },
+ { "half", "half", test_half },
+ { "half_double", "half", test_half_double },
+ { "kernel_image_methods", "kernel_image_methods",
+ test_kernel_image_methods },
+ { "images_kernel_read_write", "images_kernel_read_write",
+ test_images_kernel_read_write },
+ { "images_samplerlessRead", "images_samplerlessRead",
+ test_images_samplerless_read },
+ { "integer_ops", "integer_ops", test_integer_ops },
+ { "math_brute_force", "math_brute_force", test_math_brute_force },
+ { "math_brute_force_double", "math_brute_force",
+ test_math_brute_force_double },
+ { "printf", "printf", test_printf },
+ { "profiling", "profiling", test_profiling },
+ { "relationals", "relationals", test_relationals },
+ { "relationals_double", "relationals", test_relationals_double },
+ { "select", "select", test_select },
+ { "select_double", "select", test_select_double },
+ { "vec_align", "vec_align", test_vec_align },
+ { "vec_align_double", "vec_align", test_vec_align_double },
+ { "vec_step", "vec_step", test_vec_step },
+ { "vec_step_double", "vec_step", test_vec_step_double },
+ { "compile_and_link", "compile_and_link", test_compile_and_link },
+ { "sampler_enumeration", "sampler_enumeration", test_sampler_enumeration },
+ { "enum_values", "enum_values", test_enum_values },
+ // {"kernel_attributes", "kernel_attributes",
+ // test_kernel_attributes}, // disabling temporarily, see GitHub #1284
+ { "binary_type", "binary_type", test_binary_type },
};
diff --git a/test_conformance/spir/run_services.cpp b/test_conformance/spir/run_services.cpp
index 06fc418d..6e06d53c 100644
--- a/test_conformance/spir/run_services.cpp
+++ b/test_conformance/spir/run_services.cpp
@@ -213,7 +213,6 @@ cl_kernel create_kernel_helper( cl_program program, const std::string& kernel_na
{
int error = CL_SUCCESS;
cl_kernel kernel = NULL;
- cl_device_id device = get_program_device(program);
/* And create a kernel from it */
kernel = clCreateKernel( program, kernel_name.c_str(), &error );
if( kernel == NULL || error != CL_SUCCESS)
@@ -389,6 +388,7 @@ OclExtensions OclExtensions::getDeviceCapabilities(cl_device_id devId)
{
ret = ret | OclExtensions::fromString(*it);
}
+
return ret;
}
@@ -399,75 +399,80 @@ OclExtensions OclExtensions::empty()
OclExtensions OclExtensions::fromString(const std::string& e)
{
- std::string s = "OclExtensions::" + e;
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_int64_base_atomics);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_int64_extended_atomics);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_3d_image_writes);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_fp16);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_gl_sharing);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_gl_event);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_d3d10_sharing);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_dx9_media_sharing);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_d3d11_sharing);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_depth_images);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_gl_depth_images);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_gl_msaa_sharing);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_image2d_from_buffer);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_initialize_memory);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_spir);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_fp64);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_global_int32_base_atomics);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_global_int32_extended_atomics);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_local_int32_base_atomics);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_local_int32_extended_atomics);
- RETURN_IF_ENUM(s, OclExtensions::cl_khr_byte_addressable_store);
- RETURN_IF_ENUM(s, OclExtensions::cles_khr_int64);
- RETURN_IF_ENUM(s, OclExtensions::cles_khr_2d_image_array_writes);
+ std::string s = "OclExtensions::has_" + e;
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_int64_base_atomics);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_int64_extended_atomics);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_3d_image_writes);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_fp16);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_gl_sharing);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_gl_event);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_d3d10_sharing);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_dx9_media_sharing);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_d3d11_sharing);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_depth_images);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_gl_depth_images);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_gl_msaa_sharing);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_image2d_from_buffer);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_initialize_memory);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_spir);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_fp64);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_global_int32_base_atomics);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_global_int32_extended_atomics);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_local_int32_base_atomics);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_local_int32_extended_atomics);
+ RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_byte_addressable_store);
+ RETURN_IF_ENUM(s, OclExtensions::has_cles_khr_int64);
+ RETURN_IF_ENUM(s, OclExtensions::has_cles_khr_2d_image_array_writes);
// Unknown KHR string.
return OclExtensions::empty();
}
std::string OclExtensions::toString()
{
-
- #define APPEND_STR_IF_SUPPORTS( STR, E) \
- if ( this->supports(E) ) \
- { \
- std::string ext_str( #E ); \
- std::string prefix = "OclExtensions::"; \
- size_t pos = ext_str.find( prefix ); \
- if ( pos != std::string::npos ) \
- { \
- ext_str.replace( pos, prefix.length(), ""); \
- } \
- STR += ext_str; \
- }
+#define APPEND_STR_IF_SUPPORTS(STR, E) \
+ if (this->supports(E)) \
+ { \
+ std::string ext_str(#E); \
+ std::string prefix = "OclExtensions::has_"; \
+ size_t pos = ext_str.find(prefix); \
+ if (pos != std::string::npos) \
+ { \
+ ext_str.replace(pos, prefix.length(), ""); \
+ } \
+ STR += ext_str; \
+ STR += " "; \
+ }
std::string s = "";
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_int64_base_atomics );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_int64_extended_atomics );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_3d_image_writes );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_fp16 );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_gl_sharing );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_gl_event );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_d3d10_sharing );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_dx9_media_sharing );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_d3d11_sharing );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_depth_images );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_gl_depth_images );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_gl_msaa_sharing );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_image2d_from_buffer );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_initialize_memory );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_spir );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_fp64 );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_global_int32_base_atomics );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_global_int32_extended_atomics );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_local_int32_base_atomics );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_local_int32_extended_atomics );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_byte_addressable_store );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cles_khr_int64 );
- APPEND_STR_IF_SUPPORTS( s, OclExtensions::cles_khr_2d_image_array_writes );
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_int64_base_atomics);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_int64_extended_atomics);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_3d_image_writes);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_fp16);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_gl_sharing);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_gl_event);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_d3d10_sharing);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_dx9_media_sharing);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_d3d11_sharing);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_depth_images);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_gl_depth_images);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_gl_msaa_sharing);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_image2d_from_buffer);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_initialize_memory);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_spir);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_fp64);
+ APPEND_STR_IF_SUPPORTS(s,
+ OclExtensions::has_cl_khr_global_int32_base_atomics);
+ APPEND_STR_IF_SUPPORTS(
+ s, OclExtensions::has_cl_khr_global_int32_extended_atomics);
+ APPEND_STR_IF_SUPPORTS(s,
+ OclExtensions::has_cl_khr_local_int32_base_atomics);
+ APPEND_STR_IF_SUPPORTS(
+ s, OclExtensions::has_cl_khr_local_int32_extended_atomics);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_byte_addressable_store);
+ APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cles_khr_int64);
+ APPEND_STR_IF_SUPPORTS(s,
+ OclExtensions::has_cles_khr_2d_image_array_writes);
return s;
}
diff --git a/test_conformance/spir/run_services.h b/test_conformance/spir/run_services.h
index 6bac4c91..10f0d05e 100644
--- a/test_conformance/spir/run_services.h
+++ b/test_conformance/spir/run_services.h
@@ -113,42 +113,33 @@ private:
OclExtensions(size_t ext) : m_extVector(ext) {}
-// Fix a compilation error, since cl_khr_gl_sharing is defined as a macro.
-#ifdef cl_khr_gl_sharing
-#undef cl_khr_gl_sharing
-#endif//cl_khr_gl_sharing
-
-#ifdef cl_khr_icd
-#undef cl_khr_icd
-#endif//cl_khr_icd
-
enum ClKhrs
{
- no_extensions = KhrValue<0>::Mask,
- cl_khr_int64_base_atomics = KhrValue<1>::Mask,
- cl_khr_int64_extended_atomics = KhrValue<2>::Mask,
- cl_khr_3d_image_writes = KhrValue<3>::Mask,
- cl_khr_fp16 = KhrValue<4>::Mask,
- cl_khr_gl_sharing = KhrValue<5>::Mask,
- cl_khr_gl_event = KhrValue<6>::Mask,
- cl_khr_d3d10_sharing = KhrValue<7>::Mask,
- cl_khr_dx9_media_sharing = KhrValue<8>::Mask,
- cl_khr_d3d11_sharing = KhrValue<9>::Mask,
- cl_khr_depth_images = KhrValue<10>::Mask,
- cl_khr_gl_depth_images = KhrValue<11>::Mask,
- cl_khr_gl_msaa_sharing = KhrValue<12>::Mask,
- cl_khr_image2d_from_buffer = KhrValue<13>::Mask,
- cl_khr_initialize_memory = KhrValue<14>::Mask,
- cl_khr_context_abort = KhrValue<15>::Mask,
- cl_khr_spir = KhrValue<16>::Mask,
- cl_khr_fp64 = KhrValue<17>::Mask,
- cl_khr_global_int32_base_atomics = KhrValue<18>::Mask,
- cl_khr_global_int32_extended_atomics = KhrValue<19>::Mask,
- cl_khr_local_int32_base_atomics = KhrValue<20>::Mask,
- cl_khr_local_int32_extended_atomics = KhrValue<21>::Mask,
- cl_khr_byte_addressable_store = KhrValue<22>::Mask,
- cles_khr_int64 = KhrValue<23>::Mask,
- cles_khr_2d_image_array_writes = KhrValue<24>::Mask,
+ no_extensions = KhrValue<0>::Mask,
+ has_cl_khr_int64_base_atomics = KhrValue<1>::Mask,
+ has_cl_khr_int64_extended_atomics = KhrValue<2>::Mask,
+ has_cl_khr_3d_image_writes = KhrValue<3>::Mask,
+ has_cl_khr_fp16 = KhrValue<4>::Mask,
+ has_cl_khr_gl_sharing = KhrValue<5>::Mask,
+ has_cl_khr_gl_event = KhrValue<6>::Mask,
+ has_cl_khr_d3d10_sharing = KhrValue<7>::Mask,
+ has_cl_khr_dx9_media_sharing = KhrValue<8>::Mask,
+ has_cl_khr_d3d11_sharing = KhrValue<9>::Mask,
+ has_cl_khr_depth_images = KhrValue<10>::Mask,
+ has_cl_khr_gl_depth_images = KhrValue<11>::Mask,
+ has_cl_khr_gl_msaa_sharing = KhrValue<12>::Mask,
+ has_cl_khr_image2d_from_buffer = KhrValue<13>::Mask,
+ has_cl_khr_initialize_memory = KhrValue<14>::Mask,
+ has_cl_khr_context_abort = KhrValue<15>::Mask,
+ has_cl_khr_spir = KhrValue<16>::Mask,
+ has_cl_khr_fp64 = KhrValue<17>::Mask,
+ has_cl_khr_global_int32_base_atomics = KhrValue<18>::Mask,
+ has_cl_khr_global_int32_extended_atomics = KhrValue<19>::Mask,
+ has_cl_khr_local_int32_base_atomics = KhrValue<20>::Mask,
+ has_cl_khr_local_int32_extended_atomics = KhrValue<21>::Mask,
+ has_cl_khr_byte_addressable_store = KhrValue<22>::Mask,
+ has_cles_khr_int64 = KhrValue<23>::Mask,
+ has_cles_khr_2d_image_array_writes = KhrValue<24>::Mask,
};
size_t m_extVector;
diff --git a/test_conformance/spir/sampler_enumeration.zip b/test_conformance/spir/sampler_enumeration.zip
index 5f8a7a06..ab9c9a56 100644
--- a/test_conformance/spir/sampler_enumeration.zip
+++ b/test_conformance/spir/sampler_enumeration.zip
Binary files differ
diff --git a/test_conformance/spirv_new/main.cpp b/test_conformance/spirv_new/main.cpp
index 5a8664b6..41566837 100644
--- a/test_conformance/spirv_new/main.cpp
+++ b/test_conformance/spirv_new/main.cpp
@@ -203,7 +203,6 @@ int get_program_with_il(clProgramWrapper &prog, const cl_device_id deviceID,
test_status InitCL(cl_device_id id)
{
test_status spirv_status;
- bool force = true;
spirv_status = check_spirv_compilation_readiness(id);
if (spirv_status != TEST_PASS)
{
diff --git a/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp b/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp
index 9e1789c2..0728ea03 100644
--- a/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp
+++ b/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp
@@ -1,219 +1,218 @@
-/******************************************************************
-Copyright (c) 2018 The Khronos Group Inc. All Rights Reserved.
-
-This code is protected by copyright laws and contains material proprietary to the Khronos Group, Inc.
-This is UNPUBLISHED PROPRIETARY SOURCE CODE that may not be disclosed in whole or in part to
-third parties, and may not be reproduced, republished, distributed, transmitted, displayed,
-broadcast or otherwise exploited in any manner without the express prior written permission
-of Khronos Group. The receipt or possession of this code does not convey any rights to reproduce,
-disclose, or distribute its contents, or to manufacture, use, or sell anything that it may describe,
-in whole or in part other than under the terms of the Khronos Adopters Agreement
-or Khronos Conformance Test Source License Agreement as executed between Khronos and the recipient.
-******************************************************************/
-
-#include "testBase.h"
-#include "types.hpp"
-
-#include <sstream>
-#include <string>
-#include <type_traits>
-
-
-template<typename T>
-int test_ext_cl_khr_spirv_no_integer_wrap_decoration(cl_device_id deviceID,
- cl_context context,
- cl_command_queue queue,
- const char *spvName,
- const char *funcName,
- const char *Tname)
-{
-
- cl_int err = CL_SUCCESS;
- const int num = 10;
- std::vector<T> h_lhs(num);
- std::vector<T> h_rhs(num);
- std::vector<T> expected_results(num);
- std::vector<T> h_ref(num);
- if (!is_extension_available(deviceID, "cl_khr_spirv_no_integer_wrap_decoration")) {
- log_info("Extension cl_khr_spirv_no_integer_wrap_decoration not supported; skipping tests.\n");
- return 0;
- }
-
- /*Test with some values that do not cause overflow*/
- if (std::is_signed<T>::value == true) {
- h_lhs.push_back((T)-25000);
- h_lhs.push_back((T)-3333);
- h_lhs.push_back((T)-7);
- h_lhs.push_back((T)-1);
- h_lhs.push_back(0);
- h_lhs.push_back(1);
- h_lhs.push_back(1024);
- h_lhs.push_back(2048);
- h_lhs.push_back(4094);
- h_lhs.push_back(10000);
- } else {
- h_lhs.push_back(0);
- h_lhs.push_back(1);
- h_lhs.push_back(3);
- h_lhs.push_back(5);
- h_lhs.push_back(10);
- h_lhs.push_back(100);
- h_lhs.push_back(1024);
- h_lhs.push_back(2048);
- h_lhs.push_back(4094);
- h_lhs.push_back(52888);
- }
-
- h_rhs.push_back(0);
- h_rhs.push_back(1);
- h_rhs.push_back(2);
- h_rhs.push_back(3);
- h_rhs.push_back(4);
- h_rhs.push_back(5);
- h_rhs.push_back(6);
- h_rhs.push_back(7);
- h_rhs.push_back(8);
- h_rhs.push_back(9);
- size_t bytes = num * sizeof(T);
-
- clMemWrapper lhs = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, &err);
- SPIRV_CHECK_ERROR(err, "Failed to create lhs buffer");
-
- err = clEnqueueWriteBuffer(queue, lhs, CL_TRUE, 0, bytes, &h_lhs[0], 0, NULL, NULL);
- SPIRV_CHECK_ERROR(err, "Failed to copy to lhs buffer");
-
- clMemWrapper rhs = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, &err);
- SPIRV_CHECK_ERROR(err, "Failed to create rhs buffer");
-
- err = clEnqueueWriteBuffer(queue, rhs, CL_TRUE, 0, bytes, &h_rhs[0], 0, NULL, NULL);
- SPIRV_CHECK_ERROR(err, "Failed to copy to rhs buffer");
-
- std::string kernelStr;
-
- {
- std::stringstream kernelStream;
- kernelStream << "#define spirv_fadd(a, b) (a) + (b) \n";
- kernelStream << "#define spirv_fsub(a, b) (a) - (b) \n";
- kernelStream << "#define spirv_fmul(a, b) (a) * (b) \n";
- kernelStream << "#define spirv_fshiftleft(a, b) (a) << (b) \n";
- kernelStream << "#define spirv_fnegate(a, b) (-a) \n";
-
- kernelStream << "#define T " << Tname << "\n";
- kernelStream << "#define FUNC spirv_" << funcName << "\n";
- kernelStream << "__kernel void fmath_cl(__global T *out, \n";
- kernelStream << "const __global T *lhs, const __global T *rhs) \n";
- kernelStream << "{ \n";
- kernelStream << " int id = get_global_id(0); \n";
- kernelStream << " out[id] = FUNC(lhs[id], rhs[id]); \n";
- kernelStream << "} \n";
- kernelStr = kernelStream.str();
- }
-
- size_t kernelLen = kernelStr.size();
- const char *kernelBuf = kernelStr.c_str();
-
- for (int i = 0; i < num; i++) {
- if (std::string(funcName) == std::string("fadd")) {
- expected_results[i] = h_lhs[i] + h_rhs[i];
- } else if (std::string(funcName) == std::string("fsub")) {
- expected_results[i] = h_lhs[i] - h_rhs[i];
- } else if (std::string(funcName) == std::string("fmul")) {
- expected_results[i] = h_lhs[i] * h_rhs[i];
- } else if (std::string(funcName) == std::string("fshiftleft")) {
- expected_results[i] = h_lhs[i] << h_rhs[i];
- } else if (std::string(funcName) == std::string("fnegate")) {
- expected_results[i] = 0 - h_lhs[i];
- }
- }
-
- {
- // Run the cl kernel for reference results
- clProgramWrapper prog;
- clKernelWrapper kernel;
- err = create_single_kernel_helper(context, &prog, &kernel, 1,
- &kernelBuf, "fmath_cl");
- SPIRV_CHECK_ERROR(err, "Failed to create cl kernel");
-
- clMemWrapper ref = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);
- SPIRV_CHECK_ERROR(err, "Failed to create ref buffer");
-
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &ref);
- SPIRV_CHECK_ERROR(err, "Failed to set arg 0");
-
- err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &lhs);
- SPIRV_CHECK_ERROR(err, "Failed to set arg 1");
-
- err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &rhs);
- SPIRV_CHECK_ERROR(err, "Failed to set arg 2");
-
- size_t global = num;
- err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
- SPIRV_CHECK_ERROR(err, "Failed to enqueue cl kernel");
-
- err = clEnqueueReadBuffer(queue, ref, CL_TRUE, 0, bytes, &h_ref[0], 0, NULL, NULL);
- SPIRV_CHECK_ERROR(err, "Failed to read from ref");
- }
-
- for (int i = 0; i < num; i++) {
- if (expected_results[i] != h_ref[i]) {
- log_error("Values do not match at index %d expected = %d got = %d\n", i, expected_results[i], h_ref[i]);
- return -1;
- }
- }
-
- clProgramWrapper prog;
- err = get_program_with_il(prog, deviceID, context, spvName);
- SPIRV_CHECK_ERROR(err, "Failed to build program");
-
- clKernelWrapper kernel = clCreateKernel(prog, "fmath_cl", &err);
- SPIRV_CHECK_ERROR(err, "Failed to create spv kernel");
-
- clMemWrapper res = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);
- SPIRV_CHECK_ERROR(err, "Failed to create res buffer");
-
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &res);
- SPIRV_CHECK_ERROR(err, "Failed to set arg 0");
-
- err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &lhs);
- SPIRV_CHECK_ERROR(err, "Failed to set arg 1");
-
- err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &rhs);
- SPIRV_CHECK_ERROR(err, "Failed to set arg 2");
-
- size_t global = num;
- err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
- SPIRV_CHECK_ERROR(err, "Failed to enqueue cl kernel");
-
- std::vector<T> h_res(num);
- err = clEnqueueReadBuffer(queue, res, CL_TRUE, 0, bytes, &h_res[0], 0, NULL, NULL);
- SPIRV_CHECK_ERROR(err, "Failed to read from ref");
-
- for (int i = 0; i < num; i++) {
- if (expected_results[i] != h_res[i]) {
- log_error("Values do not match at location %d expected = %d got = %d\n", i, expected_results[i], h_res[i]);
- return -1;
- }
- }
-
- return 0;
-}
-
-#define TEST_FMATH_FUNC(TYPE, FUNC) \
- TEST_SPIRV_FUNC(ext_cl_khr_spirv_no_integer_wrap_decoration_##FUNC##_##TYPE) \
- { \
- return test_ext_cl_khr_spirv_no_integer_wrap_decoration<cl_##TYPE>(deviceID, context, queue, \
- "ext_cl_khr_spirv_no_integer_wrap_decoration_"#FUNC"_"#TYPE, \
- #FUNC, \
- #TYPE \
- ); \
- }
-
-TEST_FMATH_FUNC(int, fadd)
-TEST_FMATH_FUNC(int, fsub)
-TEST_FMATH_FUNC(int, fmul)
-TEST_FMATH_FUNC(int, fshiftleft)
-TEST_FMATH_FUNC(int, fnegate)
-TEST_FMATH_FUNC(uint, fadd)
-TEST_FMATH_FUNC(uint, fsub)
-TEST_FMATH_FUNC(uint, fmul)
-TEST_FMATH_FUNC(uint, fshiftleft) \ No newline at end of file
+/******************************************************************
+Copyright (c) 2018 The Khronos Group Inc. All Rights Reserved.
+
+This code is protected by copyright laws and contains material proprietary to the Khronos Group, Inc.
+This is UNPUBLISHED PROPRIETARY SOURCE CODE that may not be disclosed in whole or in part to
+third parties, and may not be reproduced, republished, distributed, transmitted, displayed,
+broadcast or otherwise exploited in any manner without the express prior written permission
+of Khronos Group. The receipt or possession of this code does not convey any rights to reproduce,
+disclose, or distribute its contents, or to manufacture, use, or sell anything that it may describe,
+in whole or in part other than under the terms of the Khronos Adopters Agreement
+or Khronos Conformance Test Source License Agreement as executed between Khronos and the recipient.
+******************************************************************/
+
+#include "testBase.h"
+#include "types.hpp"
+
+#include <sstream>
+#include <string>
+#include <type_traits>
+
+
+template<typename T>
+int test_ext_cl_khr_spirv_no_integer_wrap_decoration(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ const char *spvName,
+ const char *funcName,
+ const char *Tname)
+{
+
+ cl_int err = CL_SUCCESS;
+ const int num = 10;
+ std::vector<T> h_lhs(num);
+ std::vector<T> h_rhs(num);
+ std::vector<T> expected_results(num);
+ std::vector<T> h_ref(num);
+ if (!is_extension_available(deviceID, "cl_khr_spirv_no_integer_wrap_decoration")) {
+ log_info("Extension cl_khr_spirv_no_integer_wrap_decoration not supported; skipping tests.\n");
+ return 0;
+ }
+
+ /*Test with some values that do not cause overflow*/
+ if (std::is_signed<T>::value == true) {
+ h_lhs.push_back((T)-25000);
+ h_lhs.push_back((T)-3333);
+ h_lhs.push_back((T)-7);
+ h_lhs.push_back((T)-1);
+ h_lhs.push_back(0);
+ h_lhs.push_back(1);
+ h_lhs.push_back(1024);
+ h_lhs.push_back(2048);
+ h_lhs.push_back(4094);
+ h_lhs.push_back(10000);
+ } else {
+ h_lhs.push_back(0);
+ h_lhs.push_back(1);
+ h_lhs.push_back(3);
+ h_lhs.push_back(5);
+ h_lhs.push_back(10);
+ h_lhs.push_back(100);
+ h_lhs.push_back(1024);
+ h_lhs.push_back(2048);
+ h_lhs.push_back(4094);
+ h_lhs.push_back(52888);
+ }
+
+ h_rhs.push_back(0);
+ h_rhs.push_back(1);
+ h_rhs.push_back(2);
+ h_rhs.push_back(3);
+ h_rhs.push_back(4);
+ h_rhs.push_back(5);
+ h_rhs.push_back(6);
+ h_rhs.push_back(7);
+ h_rhs.push_back(8);
+ h_rhs.push_back(9);
+ size_t bytes = num * sizeof(T);
+
+ clMemWrapper lhs = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, &err);
+ SPIRV_CHECK_ERROR(err, "Failed to create lhs buffer");
+
+ err = clEnqueueWriteBuffer(queue, lhs, CL_TRUE, 0, bytes, &h_lhs[0], 0, NULL, NULL);
+ SPIRV_CHECK_ERROR(err, "Failed to copy to lhs buffer");
+
+ clMemWrapper rhs = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, &err);
+ SPIRV_CHECK_ERROR(err, "Failed to create rhs buffer");
+
+ err = clEnqueueWriteBuffer(queue, rhs, CL_TRUE, 0, bytes, &h_rhs[0], 0, NULL, NULL);
+ SPIRV_CHECK_ERROR(err, "Failed to copy to rhs buffer");
+
+ std::string kernelStr;
+
+ {
+ std::stringstream kernelStream;
+ kernelStream << "#define spirv_fadd(a, b) (a) + (b) \n";
+ kernelStream << "#define spirv_fsub(a, b) (a) - (b) \n";
+ kernelStream << "#define spirv_fmul(a, b) (a) * (b) \n";
+ kernelStream << "#define spirv_fshiftleft(a, b) (a) << (b) \n";
+ kernelStream << "#define spirv_fnegate(a, b) (-a) \n";
+
+ kernelStream << "#define T " << Tname << "\n";
+ kernelStream << "#define FUNC spirv_" << funcName << "\n";
+ kernelStream << "__kernel void fmath_cl(__global T *out, \n";
+ kernelStream << "const __global T *lhs, const __global T *rhs) \n";
+ kernelStream << "{ \n";
+ kernelStream << " int id = get_global_id(0); \n";
+ kernelStream << " out[id] = FUNC(lhs[id], rhs[id]); \n";
+ kernelStream << "} \n";
+ kernelStr = kernelStream.str();
+ }
+
+ const char *kernelBuf = kernelStr.c_str();
+
+ for (int i = 0; i < num; i++) {
+ if (std::string(funcName) == std::string("fadd")) {
+ expected_results[i] = h_lhs[i] + h_rhs[i];
+ } else if (std::string(funcName) == std::string("fsub")) {
+ expected_results[i] = h_lhs[i] - h_rhs[i];
+ } else if (std::string(funcName) == std::string("fmul")) {
+ expected_results[i] = h_lhs[i] * h_rhs[i];
+ } else if (std::string(funcName) == std::string("fshiftleft")) {
+ expected_results[i] = h_lhs[i] << h_rhs[i];
+ } else if (std::string(funcName) == std::string("fnegate")) {
+ expected_results[i] = 0 - h_lhs[i];
+ }
+ }
+
+ {
+ // Run the cl kernel for reference results
+ clProgramWrapper prog;
+ clKernelWrapper kernel;
+ err = create_single_kernel_helper(context, &prog, &kernel, 1,
+ &kernelBuf, "fmath_cl");
+ SPIRV_CHECK_ERROR(err, "Failed to create cl kernel");
+
+ clMemWrapper ref = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);
+ SPIRV_CHECK_ERROR(err, "Failed to create ref buffer");
+
+ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &ref);
+ SPIRV_CHECK_ERROR(err, "Failed to set arg 0");
+
+ err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &lhs);
+ SPIRV_CHECK_ERROR(err, "Failed to set arg 1");
+
+ err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &rhs);
+ SPIRV_CHECK_ERROR(err, "Failed to set arg 2");
+
+ size_t global = num;
+ err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
+ SPIRV_CHECK_ERROR(err, "Failed to enqueue cl kernel");
+
+ err = clEnqueueReadBuffer(queue, ref, CL_TRUE, 0, bytes, &h_ref[0], 0, NULL, NULL);
+ SPIRV_CHECK_ERROR(err, "Failed to read from ref");
+ }
+
+ for (int i = 0; i < num; i++) {
+ if (expected_results[i] != h_ref[i]) {
+ log_error("Values do not match at index %d expected = %d got = %d\n", i, expected_results[i], h_ref[i]);
+ return -1;
+ }
+ }
+
+ clProgramWrapper prog;
+ err = get_program_with_il(prog, deviceID, context, spvName);
+ SPIRV_CHECK_ERROR(err, "Failed to build program");
+
+ clKernelWrapper kernel = clCreateKernel(prog, "fmath_cl", &err);
+ SPIRV_CHECK_ERROR(err, "Failed to create spv kernel");
+
+ clMemWrapper res = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);
+ SPIRV_CHECK_ERROR(err, "Failed to create res buffer");
+
+ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &res);
+ SPIRV_CHECK_ERROR(err, "Failed to set arg 0");
+
+ err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &lhs);
+ SPIRV_CHECK_ERROR(err, "Failed to set arg 1");
+
+ err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &rhs);
+ SPIRV_CHECK_ERROR(err, "Failed to set arg 2");
+
+ size_t global = num;
+ err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
+ SPIRV_CHECK_ERROR(err, "Failed to enqueue cl kernel");
+
+ std::vector<T> h_res(num);
+ err = clEnqueueReadBuffer(queue, res, CL_TRUE, 0, bytes, &h_res[0], 0, NULL, NULL);
+ SPIRV_CHECK_ERROR(err, "Failed to read from ref");
+
+ for (int i = 0; i < num; i++) {
+ if (expected_results[i] != h_res[i]) {
+ log_error("Values do not match at location %d expected = %d got = %d\n", i, expected_results[i], h_res[i]);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+#define TEST_FMATH_FUNC(TYPE, FUNC) \
+ TEST_SPIRV_FUNC(ext_cl_khr_spirv_no_integer_wrap_decoration_##FUNC##_##TYPE) \
+ { \
+ return test_ext_cl_khr_spirv_no_integer_wrap_decoration<cl_##TYPE>(deviceID, context, queue, \
+ "ext_cl_khr_spirv_no_integer_wrap_decoration_"#FUNC"_"#TYPE, \
+ #FUNC, \
+ #TYPE \
+ ); \
+ }
+
+TEST_FMATH_FUNC(int, fadd)
+TEST_FMATH_FUNC(int, fsub)
+TEST_FMATH_FUNC(int, fmul)
+TEST_FMATH_FUNC(int, fshiftleft)
+TEST_FMATH_FUNC(int, fnegate)
+TEST_FMATH_FUNC(uint, fadd)
+TEST_FMATH_FUNC(uint, fsub)
+TEST_FMATH_FUNC(uint, fmul)
+TEST_FMATH_FUNC(uint, fshiftleft)
diff --git a/test_conformance/spirv_new/test_op_fmath.cpp b/test_conformance/spirv_new/test_op_fmath.cpp
index bec0667c..61e2864d 100644
--- a/test_conformance/spirv_new/test_op_fmath.cpp
+++ b/test_conformance/spirv_new/test_op_fmath.cpp
@@ -79,11 +79,8 @@ int test_fmath(cl_device_id deviceID,
kernelStr = kernelStream.str();
}
- size_t kernelLen = kernelStr.size();
const char *kernelBuf = kernelStr.c_str();
- const char *options = fast_math ? "-cl-fast-relaxed-math" : NULL;
-
std::vector<T> h_ref(num);
{
diff --git a/test_conformance/spirv_new/test_op_function.cpp b/test_conformance/spirv_new/test_op_function.cpp
index caa3e0d3..16183e80 100644
--- a/test_conformance/spirv_new/test_op_function.cpp
+++ b/test_conformance/spirv_new/test_op_function.cpp
@@ -33,7 +33,6 @@ int test_function(cl_device_id deviceID,
err = clEnqueueWriteBuffer(queue, in, CL_TRUE, 0, bytes, &h_in[0], 0, NULL, NULL);
SPIRV_CHECK_ERROR(err, "Failed to copy to in buffer");
- cl_uint bits = sizeof(void *) * 8;
std::string spvStr = std::string("op_function") + "_" + std::string(funcType);
const char *spvName = spvStr.c_str();
diff --git a/test_conformance/spirv_new/test_op_negate.cpp b/test_conformance/spirv_new/test_op_negate.cpp
index 1891c9bb..e3dc1f34 100644
--- a/test_conformance/spirv_new/test_op_negate.cpp
+++ b/test_conformance/spirv_new/test_op_negate.cpp
@@ -43,7 +43,6 @@ int test_negation(cl_device_id deviceID,
err = clEnqueueWriteBuffer(queue, in, CL_TRUE, 0, bytes, &h_in[0], 0, NULL, NULL);
SPIRV_CHECK_ERROR(err, "Failed to copy to in buffer");
- cl_uint bits = sizeof(void *) * 8;
std::string spvStr = std::string(funcName) + "_" + std::string(Tname);
const char *spvName = spvStr.c_str();
diff --git a/test_conformance/spirv_new/test_op_opaque.cpp b/test_conformance/spirv_new/test_op_opaque.cpp
index 067d9e4e..e6216061 100644
--- a/test_conformance/spirv_new/test_op_opaque.cpp
+++ b/test_conformance/spirv_new/test_op_opaque.cpp
@@ -17,7 +17,6 @@ or Khronos Conformance Test Source License Agreement as executed between Khronos
TEST_SPIRV_FUNC(op_type_opaque_simple)
{
const char *name = "opaque";
- int num = (int)(1 << 10);
cl_int err = CL_SUCCESS;
std::vector<unsigned char> buffer_vec = readSPIRV(name);
diff --git a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp
index 0a604bcf..0859668c 100644
--- a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp
+++ b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp
@@ -75,7 +75,6 @@ int test_vector_times_scalar(cl_device_id deviceID,
kernelStr = kernelStream.str();
}
- size_t kernelLen = kernelStr.size();
const char *kernelBuf = kernelStr.c_str();
std::vector<Tv> h_ref(num);
@@ -107,7 +106,6 @@ int test_vector_times_scalar(cl_device_id deviceID,
SPIRV_CHECK_ERROR(err, "Failed to read from ref");
}
- cl_uint bits = sizeof(void *) * 8;
std::string ref = "vector_times_scalar_";
ref += Tname;
const char *spvName = ref.c_str();
diff --git a/test_conformance/subgroups/CMakeLists.txt b/test_conformance/subgroups/CMakeLists.txt
index d48af9cc..1ff249cf 100644
--- a/test_conformance/subgroups/CMakeLists.txt
+++ b/test_conformance/subgroups/CMakeLists.txt
@@ -15,6 +15,7 @@ set(${MODULE_NAME}_SOURCES
test_subgroup_clustered_reduce.cpp
test_subgroup_shuffle.cpp
test_subgroup_shuffle_relative.cpp
+ test_subgroup_rotate.cpp
)
include(../CMakeCommon.txt)
diff --git a/test_conformance/subgroups/main.cpp b/test_conformance/subgroups/main.cpp
index 44416dd7..a3ae910d 100644
--- a/test_conformance/subgroups/main.cpp
+++ b/test_conformance/subgroups/main.cpp
@@ -19,8 +19,10 @@
#include <string.h>
#include "procs.h"
#include "harness/testHarness.h"
+#include "CL/cl_half.h"
MTdata gMTdata;
+cl_half_rounding_mode g_rounding_mode;
test_definition test_list[] = {
ADD_TEST_VERSION(sub_group_info_ext, Version(2, 0)),
@@ -39,7 +41,8 @@ test_definition test_list[] = {
ADD_TEST(subgroup_functions_ballot),
ADD_TEST(subgroup_functions_clustered_reduce),
ADD_TEST(subgroup_functions_shuffle),
- ADD_TEST(subgroup_functions_shuffle_relative)
+ ADD_TEST(subgroup_functions_shuffle_relative),
+ ADD_TEST(subgroup_functions_rotate),
};
const int test_num = ARRAY_SIZE(test_list);
@@ -66,6 +69,22 @@ static test_status InitCL(cl_device_id device)
ret = TEST_SKIP;
}
}
+ // Determine the rounding mode to be used in float to half conversions in
+ // init and reference code
+ const cl_device_fp_config fpConfig = get_default_rounding_mode(device);
+
+ if (fpConfig == CL_FP_ROUND_TO_NEAREST)
+ {
+ g_rounding_mode = CL_HALF_RTE;
+ }
+ else if (fpConfig == CL_FP_ROUND_TO_ZERO && gIsEmbedded)
+ {
+ g_rounding_mode = CL_HALF_RTZ;
+ }
+ else
+ {
+ assert(false && "Unreachable");
+ }
return ret;
}
diff --git a/test_conformance/subgroups/procs.h b/test_conformance/subgroups/procs.h
index d09e8242..d4f51bec 100644
--- a/test_conformance/subgroups/procs.h
+++ b/test_conformance/subgroups/procs.h
@@ -81,4 +81,8 @@ extern int test_subgroup_functions_shuffle_relative(cl_device_id device,
cl_context context,
cl_command_queue queue,
int num_elements);
+extern int test_subgroup_functions_rotate(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
#endif /*_procs_h*/
diff --git a/test_conformance/subgroups/subgroup_common_kernels.cpp b/test_conformance/subgroups/subgroup_common_kernels.cpp
index f8b24450..33a51637 100644
--- a/test_conformance/subgroups/subgroup_common_kernels.cpp
+++ b/test_conformance/subgroups/subgroup_common_kernels.cpp
@@ -15,92 +15,20 @@
//
#include "subgroup_common_kernels.h"
-const char* bcast_source =
- "__kernel void test_bcast(const __global Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " Type x = in[gid];\n"
- " uint which_sub_group_local_id = xy[gid].z;\n"
- " out[gid] = sub_group_broadcast(x, which_sub_group_local_id);\n"
- "}\n";
-
-const char* redadd_source = "__kernel void test_redadd(const __global Type "
- "*in, __global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " out[gid] = sub_group_reduce_add(in[gid]);\n"
- "}\n";
-
-const char* redmax_source = "__kernel void test_redmax(const __global Type "
- "*in, __global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " out[gid] = sub_group_reduce_max(in[gid]);\n"
- "}\n";
-
-const char* redmin_source = "__kernel void test_redmin(const __global Type "
- "*in, __global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " out[gid] = sub_group_reduce_min(in[gid]);\n"
- "}\n";
-
-const char* scinadd_source =
- "__kernel void test_scinadd(const __global Type *in, __global int4 *xy, "
- "__global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " out[gid] = sub_group_scan_inclusive_add(in[gid]);\n"
- "}\n";
-
-const char* scinmax_source =
- "__kernel void test_scinmax(const __global Type *in, __global int4 *xy, "
- "__global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " out[gid] = sub_group_scan_inclusive_max(in[gid]);\n"
- "}\n";
-
-const char* scinmin_source =
- "__kernel void test_scinmin(const __global Type *in, __global int4 *xy, "
- "__global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " out[gid] = sub_group_scan_inclusive_min(in[gid]);\n"
- "}\n";
-
-const char* scexadd_source =
- "__kernel void test_scexadd(const __global Type *in, __global int4 *xy, "
- "__global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " out[gid] = sub_group_scan_exclusive_add(in[gid]);\n"
- "}\n";
-
-const char* scexmax_source =
- "__kernel void test_scexmax(const __global Type *in, __global int4 *xy, "
- "__global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " out[gid] = sub_group_scan_exclusive_max(in[gid]);\n"
- "}\n";
-
-const char* scexmin_source =
- "__kernel void test_scexmin(const __global Type *in, __global int4 *xy, "
- "__global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " out[gid] = sub_group_scan_exclusive_min(in[gid]);\n"
- "}\n";
+std::string sub_group_reduction_scan_source = R"(
+ __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) {
+ int gid = get_global_id(0);
+ XY(xy,gid);
+ out[gid] = %s(in[gid]);
+ }
+)";
+
+std::string sub_group_generic_source = R"(
+ __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) {
+ int gid = get_global_id(0);
+ XY(xy,gid);
+ Type x = in[gid];
+ out[gid] = %s(x, xy[gid].z);
+ }
+)"; \ No newline at end of file
diff --git a/test_conformance/subgroups/subgroup_common_kernels.h b/test_conformance/subgroups/subgroup_common_kernels.h
index 8ae97d9a..bf2210ef 100644
--- a/test_conformance/subgroups/subgroup_common_kernels.h
+++ b/test_conformance/subgroups/subgroup_common_kernels.h
@@ -18,15 +18,7 @@
#include "subhelpers.h"
-extern const char* bcast_source;
-extern const char* redadd_source;
-extern const char* redmax_source;
-extern const char* redmin_source;
-extern const char* scinadd_source;
-extern const char* scinmax_source;
-extern const char* scinmin_source;
-extern const char* scexadd_source;
-extern const char* scexmax_source;
-extern const char* scexmin_source;
+extern std::string sub_group_reduction_scan_source;
+extern std::string sub_group_generic_source;
#endif
diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
index b30c416b..b2648c30 100644
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ b/test_conformance/subgroups/subgroup_common_templates.h
@@ -17,13 +17,12 @@
#define SUBGROUPCOMMONTEMPLATES_H
#include "typeWrappers.h"
-#include <bitset>
#include "CL/cl_half.h"
#include "subhelpers.h"
-
#include <set>
+#include <algorithm>
+#include <random>
-typedef std::bitset<128> bs128;
static cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
const std::string &mask_type,
cl_uint max_sub_group_size)
@@ -66,6 +65,13 @@ static cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
// only 4 work_items from subgroup enter the code (are active)
template <typename Ty, SubgroupsBroadcastOp operation> struct BC
{
+ static void log_test(const WorkGroupParams &test_params,
+ const char *extra_text)
+ {
+ log_info(" sub_group_%s(%s)...%s\n", operation_names(operation),
+ TypeManager<Ty>::name(), extra_text);
+ }
+
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
{
int i, ii, j, k, n;
@@ -79,11 +85,8 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC
int last_subgroup_size = 0;
ii = 0;
- log_info(" sub_group_%s(%s)...\n", operation_names(operation),
- TypeManager<Ty>::name());
if (non_uniform_size)
{
- log_info(" non uniform work group size mode ON\n");
ng++;
}
for (k = 0; k < ng; ++k)
@@ -172,8 +175,8 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC
}
}
- static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
- const WorkGroupParams &test_params)
+ static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+ const WorkGroupParams &test_params)
{
int ii, i, j, k, l, n;
int ng = test_params.global_workgroup_size;
@@ -290,8 +293,6 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC
y += nw;
m += 4 * nw;
}
- log_info(" sub_group_%s(%s)... passed\n", operation_names(operation),
- TypeManager<Ty>::name());
return TEST_PASS;
}
};
@@ -301,7 +302,7 @@ static float to_float(subgroups::cl_half x) { return cl_half_to_float(x.data); }
static subgroups::cl_half to_half(float x)
{
subgroups::cl_half value;
- value.data = cl_half_from_float(x, CL_HALF_RTE);
+ value.data = cl_half_from_float(x, g_rounding_mode);
return value;
}
@@ -320,7 +321,7 @@ template <typename Ty> inline Ty calculate(Ty a, Ty b, ArithmeticOp operation)
case ArithmeticOp::logical_and: return a && b;
case ArithmeticOp::logical_or: return a || b;
case ArithmeticOp::logical_xor: return !a ^ !b;
- default: log_error("Unknown operation request"); break;
+ default: log_error("Unknown operation request\n"); break;
}
return 0;
}
@@ -342,7 +343,7 @@ inline cl_double calculate(cl_double a, cl_double b, ArithmeticOp operation)
case ArithmeticOp::mul_: {
return a * b;
}
- default: log_error("Unknown operation request"); break;
+ default: log_error("Unknown operation request\n"); break;
}
return 0;
}
@@ -364,7 +365,7 @@ inline cl_float calculate(cl_float a, cl_float b, ArithmeticOp operation)
case ArithmeticOp::mul_: {
return a * b;
}
- default: log_error("Unknown operation request"); break;
+ default: log_error("Unknown operation request\n"); break;
}
return 0;
}
@@ -381,7 +382,7 @@ inline subgroups::cl_half calculate(subgroups::cl_half a, subgroups::cl_half b,
case ArithmeticOp::min_:
return to_float(a) < to_float(b) || is_half_nan(b.data) ? a : b;
case ArithmeticOp::mul_: return to_half(to_float(a) * to_float(b));
- default: log_error("Unknown operation request"); break;
+ default: log_error("Unknown operation request\n"); break;
}
return to_half(0);
}
@@ -392,11 +393,44 @@ template <typename Ty> bool is_floating_point()
|| std::is_same<Ty, subgroups::cl_half>::value;
}
+// limit possible input values to avoid arithmetic rounding/overflow issues.
+// for each subgroup values defined different values
+// for rest of workitems set 1
+// shuffle values
+static void fill_and_shuffle_safe_values(std::vector<cl_ulong> &safe_values,
+ int sb_size)
+{
+ // max product is 720, cl_half has enough precision for it
+ const std::vector<cl_ulong> non_one_values{ 2, 3, 4, 5, 6 };
+
+ if (sb_size <= non_one_values.size())
+ {
+ safe_values.assign(non_one_values.begin(),
+ non_one_values.begin() + sb_size);
+ }
+ else
+ {
+ safe_values.assign(sb_size, 1);
+ std::copy(non_one_values.begin(), non_one_values.end(),
+ safe_values.begin());
+ }
+
+ std::mt19937 mersenne_twister_engine(10000);
+ std::shuffle(safe_values.begin(), safe_values.end(),
+ mersenne_twister_engine);
+};
+
template <typename Ty, ArithmeticOp operation>
-void genrand(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
+void generate_inputs(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
{
int nj = (nw + ns - 1) / ns;
+ std::vector<cl_ulong> safe_values;
+ if (operation == ArithmeticOp::mul_ || operation == ArithmeticOp::add_)
+ {
+ fill_and_shuffle_safe_values(safe_values, ns);
+ }
+
for (int k = 0; k < ng; ++k)
{
for (int j = 0; j < nj; ++j)
@@ -407,13 +441,10 @@ void genrand(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
for (int i = 0; i < n; ++i)
{
cl_ulong out_value;
- double y;
if (operation == ArithmeticOp::mul_
|| operation == ArithmeticOp::add_)
{
- // work around to avoid overflow, do not use 0 for
- // multiplication
- out_value = (genrand_int32(gMTdata) % 4) + 1;
+ out_value = safe_values[i];
}
else
{
@@ -441,18 +472,23 @@ void genrand(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
template <typename Ty, ShuffleOp operation> struct SHF
{
+ static void log_test(const WorkGroupParams &test_params,
+ const char *extra_text)
+ {
+ log_info(" sub_group_%s(%s)...%s\n", operation_names(operation),
+ TypeManager<Ty>::name(), extra_text);
+ }
+
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
{
- int i, ii, j, k, l, n, delta;
+ int i, ii, j, k, n;
+ cl_uint l;
int nw = test_params.local_workgroup_size;
int ns = test_params.subgroup_size;
int ng = test_params.global_workgroup_size;
int nj = (nw + ns - 1) / ns;
- int d = ns > 100 ? 100 : ns;
ii = 0;
ng = ng / nw;
- log_info(" sub_group_%s(%s)...\n", operation_names(operation),
- TypeManager<Ty>::name());
for (k = 0; k < ng; ++k)
{ // for each work_group
for (j = 0; j < nj; ++j)
@@ -462,30 +498,31 @@ template <typename Ty, ShuffleOp operation> struct SHF
for (i = 0; i < n; ++i)
{
int midx = 4 * ii + 4 * i + 2;
- l = (int)(genrand_int32(gMTdata) & 0x7fffffff)
- % (d > n ? n : d);
+ l = (((cl_uint)(genrand_int32(gMTdata) & 0x7fffffff) + 1)
+ % (ns * 2 + 1))
+ - 1;
switch (operation)
{
case ShuffleOp::shuffle:
case ShuffleOp::shuffle_xor:
- // storing information about shuffle index
+ case ShuffleOp::shuffle_up:
+ case ShuffleOp::shuffle_down:
+ // storing information about shuffle index/delta
m[midx] = (cl_int)l;
break;
- case ShuffleOp::shuffle_up:
- delta = l; // calculate delta for shuffle up
- if (i - delta < 0)
+ case ShuffleOp::rotate:
+ case ShuffleOp::clustered_rotate:
+ // Storing information about rotate delta.
+ // The delta must be the same for each thread in
+ // the subgroup.
+ if (i == 0)
{
- delta = i;
+ m[midx] = (cl_int)l;
}
- m[midx] = (cl_int)delta;
- break;
- case ShuffleOp::shuffle_down:
- delta = l; // calculate delta for shuffle down
- if (i + delta >= n)
+ else
{
- delta = n - 1 - i;
+ m[midx] = m[midx - 4];
}
- m[midx] = (cl_int)delta;
break;
default: break;
}
@@ -503,10 +540,11 @@ template <typename Ty, ShuffleOp operation> struct SHF
}
}
- static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
- const WorkGroupParams &test_params)
+ static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+ const WorkGroupParams &test_params)
{
- int ii, i, j, k, l, n;
+ int ii, i, j, k, n;
+ cl_uint l;
int nw = test_params.local_workgroup_size;
int ns = test_params.subgroup_size;
int ng = test_params.global_workgroup_size;
@@ -531,32 +569,51 @@ template <typename Ty, ShuffleOp operation> struct SHF
{ // inside the subgroup
// shuffle index storage
int midx = 4 * ii + 4 * i + 2;
- l = (int)m[midx];
+ l = m[midx];
rr = my[ii + i];
+ cl_uint tr_idx;
+ bool skip = false;
switch (operation)
{
// shuffle basic - treat l as index
- case ShuffleOp::shuffle: tr = mx[ii + l]; break;
- // shuffle up - treat l as delta
- case ShuffleOp::shuffle_up: tr = mx[ii + i - l]; break;
+ case ShuffleOp::shuffle: tr_idx = l; break;
+ // shuffle xor - treat l as mask
+ case ShuffleOp::shuffle_xor: tr_idx = i ^ l; break;
// shuffle up - treat l as delta
+ case ShuffleOp::shuffle_up:
+ if (l >= ns) skip = true;
+ tr_idx = i - l;
+ break;
+ // shuffle down - treat l as delta
case ShuffleOp::shuffle_down:
- tr = mx[ii + i + l];
+ if (l >= ns) skip = true;
+ tr_idx = i + l;
break;
- // shuffle xor - treat l as mask
- case ShuffleOp::shuffle_xor:
- tr = mx[ii + (i ^ l)];
+ // rotate - treat l as delta
+ case ShuffleOp::rotate:
+ tr_idx = (i + l) % test_params.subgroup_size;
+ break;
+ case ShuffleOp::clustered_rotate: {
+ tr_idx = ((i & ~(test_params.cluster_size - 1))
+ + ((i + l) % test_params.cluster_size));
break;
+ }
default: break;
}
- if (!compare(rr, tr))
+ if (!skip && tr_idx < n)
{
- log_error("ERROR: sub_group_%s(%s) mismatch for "
- "local id %d in sub group %d in group %d\n",
- operation_names(operation),
- TypeManager<Ty>::name(), i, j, k);
- return TEST_FAIL;
+ tr = mx[ii + tr_idx];
+
+ if (!compare(rr, tr))
+ {
+ log_error("ERROR: sub_group_%s(%s) mismatch for "
+ "local id %d in sub group %d in group "
+ "%d\n",
+ operation_names(operation),
+ TypeManager<Ty>::name(), i, j, k);
+ return TEST_FAIL;
+ }
}
}
}
@@ -564,51 +621,53 @@ template <typename Ty, ShuffleOp operation> struct SHF
y += nw;
m += 4 * nw;
}
- log_info(" sub_group_%s(%s)... passed\n", operation_names(operation),
- TypeManager<Ty>::name());
return TEST_PASS;
}
};
template <typename Ty, ArithmeticOp operation> struct SCEX_NU
{
+ static void log_test(const WorkGroupParams &test_params,
+ const char *extra_text)
+ {
+ std::string func_name = (test_params.all_work_item_masks.size() > 0
+ ? "sub_group_non_uniform_scan_exclusive"
+ : "sub_group_scan_exclusive");
+ log_info(" %s_%s(%s)...%s\n", func_name.c_str(),
+ operation_names(operation), TypeManager<Ty>::name(),
+ extra_text);
+ }
+
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
{
int nw = test_params.local_workgroup_size;
int ns = test_params.subgroup_size;
int ng = test_params.global_workgroup_size;
- uint32_t work_items_mask = test_params.work_items_mask;
ng = ng / nw;
- std::string func_name;
- work_items_mask ? func_name = "sub_group_non_uniform_scan_exclusive"
- : func_name = "sub_group_scan_exclusive";
- log_info(" %s_%s(%s)...\n", func_name.c_str(),
- operation_names(operation), TypeManager<Ty>::name());
- log_info(" test params: global size = %d local size = %d subgroups "
- "size = %d work item mask = 0x%x \n",
- test_params.global_workgroup_size, nw, ns, work_items_mask);
- genrand<Ty, operation>(x, t, m, ns, nw, ng);
+ generate_inputs<Ty, operation>(x, t, m, ns, nw, ng);
}
- static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
- const WorkGroupParams &test_params)
+ static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+ const WorkGroupParams &test_params)
{
int ii, i, j, k, n;
int nw = test_params.local_workgroup_size;
int ns = test_params.subgroup_size;
int ng = test_params.global_workgroup_size;
- uint32_t work_items_mask = test_params.work_items_mask;
+ bs128 work_items_mask = test_params.work_items_mask;
int nj = (nw + ns - 1) / ns;
Ty tr, rr;
ng = ng / nw;
- std::string func_name;
- work_items_mask ? func_name = "sub_group_non_uniform_scan_exclusive"
- : func_name = "sub_group_scan_exclusive";
+ std::string func_name = (test_params.all_work_item_masks.size() > 0
+ ? "sub_group_non_uniform_scan_exclusive"
+ : "sub_group_scan_exclusive");
- uint32_t use_work_items_mask;
// for uniform case take into consideration all workitems
- use_work_items_mask = !work_items_mask ? 0xFFFFFFFF : work_items_mask;
+ if (!work_items_mask.any())
+ {
+ work_items_mask.set();
+ }
for (k = 0; k < ng; ++k)
{ // for each work_group
// Map to array indexed to array indexed by local ID and sub group
@@ -624,35 +683,21 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
std::set<int> active_work_items;
for (i = 0; i < n; ++i)
{
- uint32_t check_work_item = 1 << (i % 32);
- if (use_work_items_mask & check_work_item)
+ if (work_items_mask.test(i))
{
active_work_items.insert(i);
}
}
if (active_work_items.empty())
{
- log_info(" No acitve workitems in workgroup id = %d "
- "subgroup id = %d - no calculation\n",
- k, j);
- continue;
- }
- else if (active_work_items.size() == 1)
- {
- log_info(" One active workitem in workgroup id = %d "
- "subgroup id = %d - no calculation\n",
- k, j);
continue;
}
else
{
tr = TypeManager<Ty>::identify_limits(operation);
- int idx = 0;
for (const int &active_work_item : active_work_items)
{
rr = my[ii + active_work_item];
- if (idx == 0) continue;
-
if (!compare_ordered(rr, tr))
{
log_error(
@@ -665,7 +710,6 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
}
tr = calculate<Ty>(tr, mx[ii + active_work_item],
operation);
- idx++;
}
}
}
@@ -674,8 +718,6 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
m += 4 * nw;
}
- log_info(" %s_%s(%s)... passed\n", func_name.c_str(),
- operation_names(operation), TypeManager<Ty>::name());
return TEST_PASS;
}
};
@@ -683,44 +725,48 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
// Test for scan inclusive non uniform functions
template <typename Ty, ArithmeticOp operation> struct SCIN_NU
{
+ static void log_test(const WorkGroupParams &test_params,
+ const char *extra_text)
+ {
+ std::string func_name = (test_params.all_work_item_masks.size() > 0
+ ? "sub_group_non_uniform_scan_inclusive"
+ : "sub_group_scan_inclusive");
+ log_info(" %s_%s(%s)...%s\n", func_name.c_str(),
+ operation_names(operation), TypeManager<Ty>::name(),
+ extra_text);
+ }
+
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
{
int nw = test_params.local_workgroup_size;
int ns = test_params.subgroup_size;
int ng = test_params.global_workgroup_size;
- uint32_t work_items_mask = test_params.work_items_mask;
ng = ng / nw;
- std::string func_name;
- work_items_mask ? func_name = "sub_group_non_uniform_scan_inclusive"
- : func_name = "sub_group_scan_inclusive";
-
- genrand<Ty, operation>(x, t, m, ns, nw, ng);
- log_info(" %s_%s(%s)...\n", func_name.c_str(),
- operation_names(operation), TypeManager<Ty>::name());
- log_info(" test params: global size = %d local size = %d subgroups "
- "size = %d work item mask = 0x%x \n",
- test_params.global_workgroup_size, nw, ns, work_items_mask);
+ generate_inputs<Ty, operation>(x, t, m, ns, nw, ng);
}
- static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
- const WorkGroupParams &test_params)
+ static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+ const WorkGroupParams &test_params)
{
int ii, i, j, k, n;
int nw = test_params.local_workgroup_size;
int ns = test_params.subgroup_size;
int ng = test_params.global_workgroup_size;
- uint32_t work_items_mask = test_params.work_items_mask;
+ bs128 work_items_mask = test_params.work_items_mask;
+
int nj = (nw + ns - 1) / ns;
Ty tr, rr;
ng = ng / nw;
- std::string func_name;
- work_items_mask ? func_name = "sub_group_non_uniform_scan_inclusive"
- : func_name = "sub_group_scan_inclusive";
+ std::string func_name = (test_params.all_work_item_masks.size() > 0
+ ? "sub_group_non_uniform_scan_inclusive"
+ : "sub_group_scan_inclusive");
- uint32_t use_work_items_mask;
// for uniform case take into consideration all workitems
- use_work_items_mask = !work_items_mask ? 0xFFFFFFFF : work_items_mask;
+ if (!work_items_mask.any())
+ {
+ work_items_mask.set();
+ }
// std::bitset<32> mask32(use_work_items_mask);
// for (int k) mask32.count();
for (k = 0; k < ng; ++k)
@@ -740,8 +786,7 @@ template <typename Ty, ArithmeticOp operation> struct SCIN_NU
for (i = 0; i < n; ++i)
{
- uint32_t check_work_item = 1 << (i % 32);
- if (use_work_items_mask & check_work_item)
+ if (work_items_mask.test(i))
{
if (catch_frist_active == -1)
{
@@ -752,9 +797,6 @@ template <typename Ty, ArithmeticOp operation> struct SCIN_NU
}
if (active_work_items.empty())
{
- log_info(" No acitve workitems in workgroup id = %d "
- "subgroup id = %d - no calculation\n",
- k, j);
continue;
}
else
@@ -792,8 +834,6 @@ template <typename Ty, ArithmeticOp operation> struct SCIN_NU
m += 4 * nw;
}
- log_info(" %s_%s(%s)... passed\n", func_name.c_str(),
- operation_names(operation), TypeManager<Ty>::name());
return TEST_PASS;
}
};
@@ -801,41 +841,41 @@ template <typename Ty, ArithmeticOp operation> struct SCIN_NU
// Test for reduce non uniform functions
template <typename Ty, ArithmeticOp operation> struct RED_NU
{
+ static void log_test(const WorkGroupParams &test_params,
+ const char *extra_text)
+ {
+ std::string func_name = (test_params.all_work_item_masks.size() > 0
+ ? "sub_group_non_uniform_reduce"
+ : "sub_group_reduce");
+ log_info(" %s_%s(%s)...%s\n", func_name.c_str(),
+ operation_names(operation), TypeManager<Ty>::name(),
+ extra_text);
+ }
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
{
int nw = test_params.local_workgroup_size;
int ns = test_params.subgroup_size;
int ng = test_params.global_workgroup_size;
- uint32_t work_items_mask = test_params.work_items_mask;
ng = ng / nw;
- std::string func_name;
-
- work_items_mask ? func_name = "sub_group_non_uniform_reduce"
- : func_name = "sub_group_reduce";
- log_info(" %s_%s(%s)...\n", func_name.c_str(),
- operation_names(operation), TypeManager<Ty>::name());
- log_info(" test params: global size = %d local size = %d subgroups "
- "size = %d work item mask = 0x%x \n",
- test_params.global_workgroup_size, nw, ns, work_items_mask);
- genrand<Ty, operation>(x, t, m, ns, nw, ng);
+ generate_inputs<Ty, operation>(x, t, m, ns, nw, ng);
}
- static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
- const WorkGroupParams &test_params)
+ static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+ const WorkGroupParams &test_params)
{
int ii, i, j, k, n;
int nw = test_params.local_workgroup_size;
int ns = test_params.subgroup_size;
int ng = test_params.global_workgroup_size;
- uint32_t work_items_mask = test_params.work_items_mask;
+ bs128 work_items_mask = test_params.work_items_mask;
int nj = (nw + ns - 1) / ns;
ng = ng / nw;
Ty tr, rr;
- std::string func_name;
- work_items_mask ? func_name = "sub_group_non_uniform_reduce"
- : func_name = "sub_group_reduce";
+ std::string func_name = (test_params.all_work_item_masks.size() > 0
+ ? "sub_group_non_uniform_reduce"
+ : "sub_group_reduce");
for (k = 0; k < ng; ++k)
{
@@ -847,9 +887,10 @@ template <typename Ty, ArithmeticOp operation> struct RED_NU
my[j] = y[j];
}
- uint32_t use_work_items_mask;
- use_work_items_mask =
- !work_items_mask ? 0xFFFFFFFF : work_items_mask;
+ if (!work_items_mask.any())
+ {
+ work_items_mask.set();
+ }
for (j = 0; j < nj; ++j)
{
@@ -859,8 +900,7 @@ template <typename Ty, ArithmeticOp operation> struct RED_NU
int catch_frist_active = -1;
for (i = 0; i < n; ++i)
{
- uint32_t check_work_item = 1 << (i % 32);
- if (use_work_items_mask & check_work_item)
+ if (work_items_mask.test(i))
{
if (catch_frist_active == -1)
{
@@ -876,9 +916,6 @@ template <typename Ty, ArithmeticOp operation> struct RED_NU
if (active_work_items.empty())
{
- log_info(" No acitve workitems in workgroup id = %d "
- "subgroup id = %d - no calculation\n",
- k, j);
continue;
}
@@ -902,8 +939,6 @@ template <typename Ty, ArithmeticOp operation> struct RED_NU
m += 4 * nw;
}
- log_info(" %s_%s(%s)... passed\n", func_name.c_str(),
- operation_names(operation), TypeManager<Ty>::name());
return TEST_PASS;
}
};
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index 93673b35..0a2c3903 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -24,32 +24,194 @@
#include <limits>
#include <vector>
#include <type_traits>
+#include <bitset>
+#include <regex>
+#include <map>
#define NR_OF_ACTIVE_WORK_ITEMS 4
extern MTdata gMTdata;
+typedef std::bitset<128> bs128;
+extern cl_half_rounding_mode g_rounding_mode;
+
+static bs128 cl_uint4_to_bs128(cl_uint4 v)
+{
+ return bs128(v.s0) | (bs128(v.s1) << 32) | (bs128(v.s2) << 64)
+ | (bs128(v.s3) << 96);
+}
+
+static cl_uint4 bs128_to_cl_uint4(bs128 v)
+{
+ bs128 bs128_ffffffff = 0xffffffffU;
+
+ cl_uint4 r;
+ r.s0 = ((v >> 0) & bs128_ffffffff).to_ulong();
+ r.s1 = ((v >> 32) & bs128_ffffffff).to_ulong();
+ r.s2 = ((v >> 64) & bs128_ffffffff).to_ulong();
+ r.s3 = ((v >> 96) & bs128_ffffffff).to_ulong();
+
+ return r;
+}
struct WorkGroupParams
{
- WorkGroupParams(size_t gws, size_t lws,
- const std::vector<std::string> &req_ext = {},
- const std::vector<uint32_t> &all_wim = {})
+
+ WorkGroupParams(size_t gws, size_t lws, int dm_arg = -1, int cs_arg = -1)
: global_workgroup_size(gws), local_workgroup_size(lws),
- required_extensions(req_ext), all_work_item_masks(all_wim)
+ divergence_mask_arg(dm_arg), cluster_size_arg(cs_arg)
{
subgroup_size = 0;
+ cluster_size = 0;
work_items_mask = 0;
use_core_subgroups = true;
dynsc = 0;
+ load_masks();
}
size_t global_workgroup_size;
size_t local_workgroup_size;
size_t subgroup_size;
- uint32_t work_items_mask;
- int dynsc;
+ cl_uint cluster_size;
+ bs128 work_items_mask;
+ size_t dynsc;
bool use_core_subgroups;
- std::vector<std::string> required_extensions;
- std::vector<uint32_t> all_work_item_masks;
+ std::vector<bs128> all_work_item_masks;
+ int divergence_mask_arg;
+ int cluster_size_arg;
+ void save_kernel_source(const std::string &source, std::string name = "")
+ {
+ if (name == "")
+ {
+ name = "default";
+ }
+ if (kernel_function_name.find(name) != kernel_function_name.end())
+ {
+ log_info("Kernel definition duplication. Source will be "
+ "overwritten for function name %s\n",
+ name.c_str());
+ }
+ kernel_function_name[name] = source;
+ };
+ // return specific defined kernel or default.
+ std::string get_kernel_source(std::string name)
+ {
+ if (kernel_function_name.find(name) == kernel_function_name.end())
+ {
+ return kernel_function_name["default"];
+ }
+ return kernel_function_name[name];
+ }
+
+
+private:
+ std::map<std::string, std::string> kernel_function_name;
+ void load_masks()
+ {
+ if (divergence_mask_arg != -1)
+ {
+ // 1 in string will be set 1, 0 will be set 0
+ bs128 mask_0xf0f0f0f0("11110000111100001111000011110000"
+ "11110000111100001111000011110000"
+ "11110000111100001111000011110000"
+ "11110000111100001111000011110000",
+ 128, '0', '1');
+ all_work_item_masks.push_back(mask_0xf0f0f0f0);
+ // 1 in string will be set 0, 0 will be set 1
+ bs128 mask_0x0f0f0f0f("11110000111100001111000011110000"
+ "11110000111100001111000011110000"
+ "11110000111100001111000011110000"
+ "11110000111100001111000011110000",
+ 128, '1', '0');
+ all_work_item_masks.push_back(mask_0x0f0f0f0f);
+ bs128 mask_0x5555aaaa("10101010101010101010101010101010"
+ "10101010101010101010101010101010"
+ "10101010101010101010101010101010"
+ "10101010101010101010101010101010",
+ 128, '0', '1');
+ all_work_item_masks.push_back(mask_0x5555aaaa);
+ bs128 mask_0xaaaa5555("10101010101010101010101010101010"
+ "10101010101010101010101010101010"
+ "10101010101010101010101010101010"
+ "10101010101010101010101010101010",
+ 128, '1', '0');
+ all_work_item_masks.push_back(mask_0xaaaa5555);
+ // 0x0f0ff0f0
+ bs128 mask_0x0f0ff0f0("00001111000011111111000011110000"
+ "00001111000011111111000011110000"
+ "00001111000011111111000011110000"
+ "00001111000011111111000011110000",
+ 128, '0', '1');
+ all_work_item_masks.push_back(mask_0x0f0ff0f0);
+ // 0xff0000ff
+ bs128 mask_0xff0000ff("11111111000000000000000011111111"
+ "11111111000000000000000011111111"
+ "11111111000000000000000011111111"
+ "11111111000000000000000011111111",
+ 128, '0', '1');
+ all_work_item_masks.push_back(mask_0xff0000ff);
+ // 0xff00ff00
+ bs128 mask_0xff00ff00("11111111000000001111111100000000"
+ "11111111000000001111111100000000"
+ "11111111000000001111111100000000"
+ "11111111000000001111111100000000",
+ 128, '0', '1');
+ all_work_item_masks.push_back(mask_0xff00ff00);
+ // 0x00ffff00
+ bs128 mask_0x00ffff00("00000000111111111111111100000000"
+ "00000000111111111111111100000000"
+ "00000000111111111111111100000000"
+ "00000000111111111111111100000000",
+ 128, '0', '1');
+ all_work_item_masks.push_back(mask_0x00ffff00);
+ // 0x80 1 workitem highest id for 8 subgroup size
+ bs128 mask_0x80808080("10000000100000001000000010000000"
+ "10000000100000001000000010000000"
+ "10000000100000001000000010000000"
+ "10000000100000001000000010000000",
+ 128, '0', '1');
+
+ all_work_item_masks.push_back(mask_0x80808080);
+ // 0x8000 1 workitem highest id for 16 subgroup size
+ bs128 mask_0x80008000("10000000000000001000000000000000"
+ "10000000000000001000000000000000"
+ "10000000000000001000000000000000"
+ "10000000000000001000000000000000",
+ 128, '0', '1');
+ all_work_item_masks.push_back(mask_0x80008000);
+ // 0x80000000 1 workitem highest id for 32 subgroup size
+ bs128 mask_0x80000000("10000000000000000000000000000000"
+ "10000000000000000000000000000000"
+ "10000000000000000000000000000000"
+ "10000000000000000000000000000000",
+ 128, '0', '1');
+ all_work_item_masks.push_back(mask_0x80000000);
+ // 0x80000000 00000000 1 workitem highest id for 64 subgroup size
+ // 0x80000000 1 workitem highest id for 32 subgroup size
+ bs128 mask_0x8000000000000000("10000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "10000000000000000000000000000000"
+ "00000000000000000000000000000000",
+ 128, '0', '1');
+
+ all_work_item_masks.push_back(mask_0x8000000000000000);
+ // 0x80000000 00000000 00000000 00000000 1 workitem highest id for
+ // 128 subgroup size
+ bs128 mask_0x80000000000000000000000000000000(
+ "10000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000",
+ 128, '0', '1');
+ all_work_item_masks.push_back(
+ mask_0x80000000000000000000000000000000);
+
+ bs128 mask_0xffffffff("11111111111111111111111111111111"
+ "11111111111111111111111111111111"
+ "11111111111111111111111111111111"
+ "11111111111111111111111111111111",
+ 128, '0', '1');
+ all_work_item_masks.push_back(mask_0xffffffff);
+ }
+ }
};
enum class SubgroupsBroadcastOp
@@ -89,7 +251,9 @@ enum class ShuffleOp
shuffle,
shuffle_up,
shuffle_down,
- shuffle_xor
+ shuffle_xor,
+ rotate,
+ clustered_rotate,
};
enum class ArithmeticOp
@@ -120,7 +284,7 @@ static const char *const operation_names(ArithmeticOp operation)
case ArithmeticOp::logical_and: return "logical_and";
case ArithmeticOp::logical_or: return "logical_or";
case ArithmeticOp::logical_xor: return "logical_xor";
- default: log_error("Unknown operation request"); break;
+ default: log_error("Unknown operation request\n"); break;
}
return "";
}
@@ -142,7 +306,7 @@ static const char *const operation_names(BallotOp operation)
case BallotOp::gt_mask: return "gt";
case BallotOp::le_mask: return "le";
case BallotOp::lt_mask: return "lt";
- default: log_error("Unknown operation request"); break;
+ default: log_error("Unknown operation request\n"); break;
}
return "";
}
@@ -155,7 +319,9 @@ static const char *const operation_names(ShuffleOp operation)
case ShuffleOp::shuffle_up: return "shuffle_up";
case ShuffleOp::shuffle_down: return "shuffle_down";
case ShuffleOp::shuffle_xor: return "shuffle_xor";
- default: log_error("Unknown operation request"); break;
+ case ShuffleOp::rotate: return "rotate";
+ case ShuffleOp::clustered_rotate: return "clustered_rotate";
+ default: log_error("Unknown operation request\n"); break;
}
return "";
}
@@ -168,7 +334,7 @@ static const char *const operation_names(NonUniformVoteOp operation)
case NonUniformVoteOp::all_equal: return "all_equal";
case NonUniformVoteOp::any: return "any";
case NonUniformVoteOp::elect: return "elect";
- default: log_error("Unknown operation request"); break;
+ default: log_error("Unknown operation request\n"); break;
}
return "";
}
@@ -181,7 +347,7 @@ static const char *const operation_names(SubgroupsBroadcastOp operation)
case SubgroupsBroadcastOp::broadcast_first: return "broadcast_first";
case SubgroupsBroadcastOp::non_uniform_broadcast:
return "non_uniform_broadcast";
- default: log_error("Unknown operation request"); break;
+ default: log_error("Unknown operation request\n"); break;
}
return "";
}
@@ -358,7 +524,7 @@ template <typename Ty> struct CommonTypeManager
case ArithmeticOp::and_: return (Ty)~0;
case ArithmeticOp::or_: return (Ty)0;
case ArithmeticOp::xor_: return (Ty)0;
- default: log_error("Unknown operation request"); break;
+ default: log_error("Unknown operation request\n"); break;
}
return 0;
}
@@ -386,7 +552,7 @@ template <> struct TypeManager<cl_int> : public CommonTypeManager<cl_int>
case ArithmeticOp::logical_and: return (cl_int)1;
case ArithmeticOp::logical_or: return (cl_int)0;
case ArithmeticOp::logical_xor: return (cl_int)0;
- default: log_error("Unknown operation request"); break;
+ default: log_error("Unknown operation request\n"); break;
}
return 0;
}
@@ -800,7 +966,7 @@ template <> struct TypeManager<cl_float> : public CommonTypeManager<cl_float>
case ArithmeticOp::min_:
return std::numeric_limits<float>::infinity();
case ArithmeticOp::mul_: return (cl_float)1;
- default: log_error("Unknown operation request"); break;
+ default: log_error("Unknown operation request\n"); break;
}
return 0;
}
@@ -859,7 +1025,7 @@ template <> struct TypeManager<cl_double> : public CommonTypeManager<cl_double>
case ArithmeticOp::min_:
return std::numeric_limits<double>::infinity();
case ArithmeticOp::mul_: return (cl_double)1;
- default: log_error("Unknown operation request"); break;
+ default: log_error("Unknown operation request\n"); break;
}
return 0;
}
@@ -946,7 +1112,7 @@ struct TypeManager<subgroups::cl_half>
case ArithmeticOp::max_: return { 0xfc00 };
case ArithmeticOp::min_: return { 0x7c00 };
case ArithmeticOp::mul_: return { 0x3c00 };
- default: log_error("Unknown operation request"); break;
+ default: log_error("Unknown operation request\n"); break;
}
return { 0 };
}
@@ -1080,7 +1246,7 @@ template <typename Ty>
typename std::enable_if<TypeManager<Ty>::is_sb_scalar_type::value>::type
set_value(Ty &lhs, const cl_ulong &rhs)
{
- lhs.data = rhs;
+ lhs.data = cl_half_from_float(static_cast<cl_float>(rhs), g_rounding_mode);
}
// compare for common vectors
@@ -1164,98 +1330,172 @@ inline bool compare_ordered(const subgroups::cl_half &lhs, const int &rhs)
return cl_half_to_float(lhs.data) == rhs;
}
-// Run a test kernel to compute the result of a built-in on an input
-static int run_kernel(cl_context context, cl_command_queue queue,
- cl_kernel kernel, size_t global, size_t local,
- void *idata, size_t isize, void *mdata, size_t msize,
- void *odata, size_t osize, size_t tsize = 0)
-{
- clMemWrapper in;
- clMemWrapper xy;
- clMemWrapper out;
- clMemWrapper tmp;
- int error;
+template <typename Ty, typename Fns> class KernelExecutor {
+public:
+ KernelExecutor(cl_context c, cl_command_queue q, cl_kernel k, size_t g,
+ size_t l, Ty *id, size_t is, Ty *mid, Ty *mod, cl_int *md,
+ size_t ms, Ty *od, size_t os, size_t ts = 0)
+ : context(c), queue(q), kernel(k), global(g), local(l), idata(id),
+ isize(is), mapin_data(mid), mapout_data(mod), mdata(md), msize(ms),
+ odata(od), osize(os), tsize(ts)
+ {
+ has_status = false;
+ run_failed = false;
+ }
+ cl_context context;
+ cl_command_queue queue;
+ cl_kernel kernel;
+ size_t global;
+ size_t local;
+ Ty *idata;
+ size_t isize;
+ Ty *mapin_data;
+ Ty *mapout_data;
+ cl_int *mdata;
+ size_t msize;
+ Ty *odata;
+ size_t osize;
+ size_t tsize;
+ bool run_failed;
- in = clCreateBuffer(context, CL_MEM_READ_ONLY, isize, NULL, &error);
- test_error(error, "clCreateBuffer failed");
+private:
+ bool has_status;
+ test_status status;
- xy = clCreateBuffer(context, CL_MEM_WRITE_ONLY, msize, NULL, &error);
- test_error(error, "clCreateBuffer failed");
+public:
+ // Run a test kernel to compute the result of a built-in on an input
+ int run()
+ {
+ clMemWrapper in;
+ clMemWrapper xy;
+ clMemWrapper out;
+ clMemWrapper tmp;
+ int error;
+
+ in = clCreateBuffer(context, CL_MEM_READ_ONLY, isize, NULL, &error);
+ test_error(error, "clCreateBuffer failed");
- out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error);
- test_error(error, "clCreateBuffer failed");
+ xy = clCreateBuffer(context, CL_MEM_WRITE_ONLY, msize, NULL, &error);
+ test_error(error, "clCreateBuffer failed");
- if (tsize)
- {
- tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS,
- tsize, NULL, &error);
+ out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error);
test_error(error, "clCreateBuffer failed");
- }
- error = clSetKernelArg(kernel, 0, sizeof(in), (void *)&in);
- test_error(error, "clSetKernelArg failed");
+ if (tsize)
+ {
+ tmp = clCreateBuffer(context,
+ CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS,
+ tsize, NULL, &error);
+ test_error(error, "clCreateBuffer failed");
+ }
- error = clSetKernelArg(kernel, 1, sizeof(xy), (void *)&xy);
- test_error(error, "clSetKernelArg failed");
+ error = clSetKernelArg(kernel, 0, sizeof(in), (void *)&in);
+ test_error(error, "clSetKernelArg failed");
- error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out);
- test_error(error, "clSetKernelArg failed");
+ error = clSetKernelArg(kernel, 1, sizeof(xy), (void *)&xy);
+ test_error(error, "clSetKernelArg failed");
- if (tsize)
- {
- error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp);
+ error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out);
test_error(error, "clSetKernelArg failed");
+
+ if (tsize)
+ {
+ error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp);
+ test_error(error, "clSetKernelArg failed");
+ }
+
+ error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0,
+ NULL, NULL);
+ test_error(error, "clEnqueueWriteBuffer failed");
+
+ error = clEnqueueWriteBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0,
+ NULL, NULL);
+ test_error(error, "clEnqueueWriteBuffer failed");
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local,
+ 0, NULL, NULL);
+ test_error(error, "clEnqueueNDRangeKernel failed");
+
+ error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0,
+ NULL, NULL);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0,
+ NULL, NULL);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ error = clFinish(queue);
+ test_error(error, "clFinish failed");
+
+ return error;
}
- error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0, NULL,
- NULL);
- test_error(error, "clEnqueueWriteBuffer failed");
+private:
+ test_status
+ run_and_check_with_cluster_size(const WorkGroupParams &test_params)
+ {
+ cl_int error = run();
+ if (error != CL_SUCCESS)
+ {
+ print_error(error, "Failed to run subgroup test kernel");
+ status = TEST_FAIL;
+ run_failed = true;
+ return status;
+ }
+
+ test_status tmp_status =
+ Fns::chk(idata, odata, mapin_data, mapout_data, mdata, test_params);
+
+ if (!has_status || tmp_status == TEST_FAIL
+ || (tmp_status == TEST_PASS && status != TEST_FAIL))
+ {
+ status = tmp_status;
+ has_status = true;
+ }
+
+ return status;
+ }
- error = clEnqueueWriteBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL,
- NULL);
- test_error(error, "clEnqueueWriteBuffer failed");
- error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0,
- NULL, NULL);
- test_error(error, "clEnqueueNDRangeKernel failed");
+public:
+ test_status run_and_check(WorkGroupParams &test_params)
+ {
+ test_status tmp_status = TEST_SKIPPED_ITSELF;
- error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL,
- NULL);
- test_error(error, "clEnqueueReadBuffer failed");
+ if (test_params.cluster_size_arg != -1)
+ {
+ for (cl_uint cluster_size = 1;
+ cluster_size <= test_params.subgroup_size; cluster_size *= 2)
+ {
+ test_params.cluster_size = cluster_size;
+ cl_int error =
+ clSetKernelArg(kernel, test_params.cluster_size_arg,
+ sizeof(cl_uint), &cluster_size);
+ test_error_fail(error, "Unable to set cluster size");
- error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0, NULL,
- NULL);
- test_error(error, "clEnqueueReadBuffer failed");
+ tmp_status = run_and_check_with_cluster_size(test_params);
- error = clFinish(queue);
- test_error(error, "clFinish failed");
+ if (tmp_status == TEST_FAIL) break;
+ }
+ }
+ else
+ {
+ tmp_status = run_and_check_with_cluster_size(test_params);
+ }
- return error;
-}
+ return tmp_status;
+ }
+};
// Driver for testing a single built in function
template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
{
- static int mrun(cl_device_id device, cl_context context,
- cl_command_queue queue, int num_elements, const char *kname,
- const char *src, WorkGroupParams test_params)
- {
- int error = TEST_PASS;
- for (auto &mask : test_params.all_work_item_masks)
- {
- test_params.work_items_mask = mask;
- error |= run(device, context, queue, num_elements, kname, src,
- test_params);
- }
- return error;
- };
- static int run(cl_device_id device, cl_context context,
- cl_command_queue queue, int num_elements, const char *kname,
- const char *src, WorkGroupParams test_params)
+ static test_status run(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements,
+ const char *kname, const char *src,
+ WorkGroupParams test_params)
{
size_t tmp;
- int error;
- int subgroup_size, num_subgroups;
- size_t realSize;
+ cl_int error;
+ size_t subgroup_size, num_subgroups;
size_t global = test_params.global_workgroup_size;
size_t local = test_params.local_workgroup_size;
clProgramWrapper program;
@@ -1268,13 +1508,8 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
std::vector<Ty> mapout;
mapout.resize(local);
std::stringstream kernel_sstr;
- if (test_params.work_items_mask != 0)
- {
- kernel_sstr << "#define WORK_ITEMS_MASK ";
- kernel_sstr << "0x" << std::hex << test_params.work_items_mask
- << "\n";
- }
+ Fns::log_test(test_params, "");
kernel_sstr << "#define NR_OF_ACTIVE_WORK_ITEMS ";
kernel_sstr << NR_OF_ACTIVE_WORK_ITEMS << "\n";
@@ -1282,36 +1517,21 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
if (!TypeManager<Ty>::type_supported(device))
{
log_info("Data type not supported : %s\n", TypeManager<Ty>::name());
- return 0;
+ return TEST_SKIPPED_ITSELF;
}
- else
+
+ if (strstr(TypeManager<Ty>::name(), "double"))
{
- if (strstr(TypeManager<Ty>::name(), "double"))
- {
- kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
- }
- else if (strstr(TypeManager<Ty>::name(), "half"))
- {
- kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp16: enable\n";
- }
+ kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
}
-
- for (std::string extension : test_params.required_extensions)
+ else if (strstr(TypeManager<Ty>::name(), "half"))
{
- if (!is_extension_available(device, extension.c_str()))
- {
- log_info("The extension %s not supported on this device. SKIP "
- "testing - kernel %s data type %s\n",
- extension.c_str(), kname, TypeManager<Ty>::name());
- return TEST_PASS;
- }
- kernel_sstr << "#pragma OPENCL EXTENSION " + extension
- + ": enable\n";
+ kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp16: enable\n";
}
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
(void *)&platform, NULL);
- test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM");
+ test_error_fail(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM");
if (test_params.use_core_subgroups)
{
kernel_sstr
@@ -1326,12 +1546,12 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
error = create_single_kernel_helper(context, &program, &kernel, 1,
&kernel_src, kname);
- if (error != 0) return error;
+ if (error != CL_SUCCESS) return TEST_FAIL;
// Determine some local dimensions to use for the test.
error = get_max_common_work_group_size(
context, kernel, test_params.global_workgroup_size, &local);
- test_error(error, "get_max_common_work_group_size failed");
+ test_error_fail(error, "get_max_common_work_group_size failed");
// Limit it a bit so we have muliple work groups
// Ideally this will still be large enough to give us multiple
@@ -1345,7 +1565,7 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
subgroupsApiSet.clGetKernelSubGroupInfo_ptr();
if (clGetKernelSubGroupInfo_ptr == NULL)
{
- log_error("ERROR: %s function not available",
+ log_error("ERROR: %s function not available\n",
subgroupsApiSet.clGetKernelSubGroupInfo_name);
return TEST_FAIL;
}
@@ -1355,12 +1575,12 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
if (error != CL_SUCCESS)
{
log_error("ERROR: %s function error for "
- "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE",
+ "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE\n",
subgroupsApiSet.clGetKernelSubGroupInfo_name);
return TEST_FAIL;
}
- subgroup_size = (int)tmp;
+ subgroup_size = tmp;
error = clGetKernelSubGroupInfo_ptr(
kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
@@ -1368,16 +1588,16 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
if (error != CL_SUCCESS)
{
log_error("ERROR: %s function error for "
- "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE",
+ "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE\n",
subgroupsApiSet.clGetKernelSubGroupInfo_name);
return TEST_FAIL;
}
- num_subgroups = (int)tmp;
+ num_subgroups = tmp;
// Make sure the number of sub groups is what we expect
if (num_subgroups != (local + subgroup_size - 1) / subgroup_size)
{
- log_error("ERROR: unexpected number of subgroups (%d) returned\n",
+ log_error("ERROR: unexpected number of subgroups (%zu) returned\n",
num_subgroups);
return TEST_FAIL;
}
@@ -1386,41 +1606,83 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
std::vector<Ty> odata;
size_t input_array_size = global;
size_t output_array_size = global;
- int dynscl = test_params.dynsc;
+ size_t dynscl = test_params.dynsc;
if (dynscl != 0)
{
- input_array_size =
- (int)global / (int)local * num_subgroups * dynscl;
- output_array_size = (int)global / (int)local * dynscl;
+ input_array_size = global / local * num_subgroups * dynscl;
+ output_array_size = global / local * dynscl;
}
idata.resize(input_array_size);
odata.resize(output_array_size);
+ if (test_params.divergence_mask_arg != -1)
+ {
+ cl_uint4 mask_vector;
+ mask_vector.x = 0xffffffffU;
+ mask_vector.y = 0xffffffffU;
+ mask_vector.z = 0xffffffffU;
+ mask_vector.w = 0xffffffffU;
+ error = clSetKernelArg(kernel, test_params.divergence_mask_arg,
+ sizeof(cl_uint4), &mask_vector);
+ test_error_fail(error, "Unable to set divergence mask argument");
+ }
+
+ if (test_params.cluster_size_arg != -1)
+ {
+ cl_uint dummy_cluster_size = 1;
+ error = clSetKernelArg(kernel, test_params.cluster_size_arg,
+ sizeof(cl_uint), &dummy_cluster_size);
+ test_error_fail(error, "Unable to set dummy cluster size");
+ }
+
+ KernelExecutor<Ty, Fns> executor(
+ context, queue, kernel, global, local, idata.data(),
+ input_array_size * sizeof(Ty), mapin.data(), mapout.data(),
+ sgmap.data(), global * sizeof(cl_int4), odata.data(),
+ output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
+
// Run the kernel once on zeroes to get the map
memset(idata.data(), 0, input_array_size * sizeof(Ty));
- error = run_kernel(context, queue, kernel, global, local, idata.data(),
- input_array_size * sizeof(Ty), sgmap.data(),
- global * sizeof(cl_int4), odata.data(),
- output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
- test_error(error, "Running kernel first time failed");
+ error = executor.run();
+ test_error_fail(error, "Running kernel first time failed");
// Generate the desired input for the kernel
-
test_params.subgroup_size = subgroup_size;
Fns::gen(idata.data(), mapin.data(), sgmap.data(), test_params);
- error = run_kernel(context, queue, kernel, global, local, idata.data(),
- input_array_size * sizeof(Ty), sgmap.data(),
- global * sizeof(cl_int4), odata.data(),
- output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
- test_error(error, "Running kernel second time failed");
-
- // Check the result
- error = Fns::chk(idata.data(), odata.data(), mapin.data(),
- mapout.data(), sgmap.data(), test_params);
- test_error(error, "Data verification failed");
- return TEST_PASS;
+
+ test_status status;
+
+ if (test_params.divergence_mask_arg != -1)
+ {
+ for (auto &mask : test_params.all_work_item_masks)
+ {
+ test_params.work_items_mask = mask;
+ cl_uint4 mask_vector = bs128_to_cl_uint4(mask);
+ clSetKernelArg(kernel, test_params.divergence_mask_arg,
+ sizeof(cl_uint4), &mask_vector);
+
+ status = executor.run_and_check(test_params);
+
+ if (status == TEST_FAIL) break;
+ }
+ }
+ else
+ {
+ status = executor.run_and_check(test_params);
+ }
+ // Detailed failure and skip messages should be logged by
+ // run_and_check.
+ if (status == TEST_PASS)
+ {
+ Fns::log_test(test_params, " passed");
+ }
+ else if (!executor.run_failed && status == TEST_FAIL)
+ {
+ test_fail("Data verification failed\n");
+ }
+ return status;
}
};
@@ -1466,21 +1728,21 @@ struct RunTestForType
num_elements_(num_elements), test_params_(test_params)
{}
template <typename T, typename U>
- int run_impl(const char *kernel_name, const char *source)
+ int run_impl(const std::string &function_name)
{
int error = TEST_PASS;
- if (test_params_.all_work_item_masks.size() > 0)
- {
- error = test<T, U>::mrun(device_, context_, queue_, num_elements_,
- kernel_name, source, test_params_);
- }
- else
- {
- error = test<T, U>::run(device_, context_, queue_, num_elements_,
- kernel_name, source, test_params_);
- }
-
- return error;
+ std::string source =
+ std::regex_replace(test_params_.get_kernel_source(function_name),
+ std::regex("\\%s"), function_name);
+ std::string kernel_name = "test_" + function_name;
+ error =
+ test<T, U>::run(device_, context_, queue_, num_elements_,
+ kernel_name.c_str(), source.c_str(), test_params_);
+
+ // If we return TEST_SKIPPED_ITSELF here, then an entire suite may be
+ // reported as having been skipped even if some tests within it
+ // passed, as the status codes are erroneously ORed together:
+ return error == TEST_FAIL ? TEST_FAIL : TEST_PASS;
}
private:
diff --git a/test_conformance/subgroups/test_barrier.cpp b/test_conformance/subgroups/test_barrier.cpp
index 47e42f65..fb93ddb1 100644
--- a/test_conformance/subgroups/test_barrier.cpp
+++ b/test_conformance/subgroups/test_barrier.cpp
@@ -59,6 +59,17 @@ static const char *gbar_source =
// barrier test functions
template <int Which> struct BAR
{
+ static void log_test(const WorkGroupParams &test_params,
+ const char *extra_text)
+ {
+ if (Which == 0)
+ log_info(" sub_group_barrier(CLK_LOCAL_MEM_FENCE)...%s\n",
+ extra_text);
+ else
+ log_info(" sub_group_barrier(CLK_GLOBAL_MEM_FENCE)...%s\n",
+ extra_text);
+ }
+
static void gen(cl_int *x, cl_int *t, cl_int *m,
const WorkGroupParams &test_params)
{
@@ -68,7 +79,6 @@ template <int Which> struct BAR
int ng = test_params.global_workgroup_size;
int nj = (nw + ns - 1) / ns;
ng = ng / nw;
- int e;
ii = 0;
for (k = 0; k < ng; ++k)
@@ -92,8 +102,8 @@ template <int Which> struct BAR
}
}
- static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m,
- const WorkGroupParams &test_params)
+ static test_status chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my,
+ cl_int *m, const WorkGroupParams &test_params)
{
int ii, i, j, k, n;
int nw = test_params.local_workgroup_size;
@@ -103,11 +113,6 @@ template <int Which> struct BAR
ng = ng / nw;
cl_int tr, rr;
- if (Which == 0)
- log_info(" sub_group_barrier(CLK_LOCAL_MEM_FENCE)...\n");
- else
- log_info(" sub_group_barrier(CLK_GLOBAL_MEM_FENCE)...\n");
-
for (k = 0; k < ng; ++k)
{
// Map to array indexed to array indexed by local ID and sub group
@@ -133,7 +138,7 @@ template <int Which> struct BAR
"id %d in sub group %d in group %d expected "
"%d got %d\n",
i, j, k, tr, rr);
- return -1;
+ return TEST_FAIL;
}
}
}
@@ -143,7 +148,7 @@ template <int Which> struct BAR
m += 2 * nw;
}
- return 0;
+ return TEST_PASS;
}
};
@@ -187,4 +192,4 @@ int test_barrier_functions_ext(cl_device_id device, cl_context context,
}
return test_barrier_functions(device, context, queue, num_elements, false);
-} \ No newline at end of file
+}
diff --git a/test_conformance/subgroups/test_ifp.cpp b/test_conformance/subgroups/test_ifp.cpp
index 428f2cdc..f2bd5b92 100644
--- a/test_conformance/subgroups/test_ifp.cpp
+++ b/test_conformance/subgroups/test_ifp.cpp
@@ -225,6 +225,12 @@ void run_insts(cl_int *x, cl_int *p, int n)
struct IFP
{
+ static void log_test(const WorkGroupParams &test_params,
+ const char *extra_text)
+ {
+ log_info(" independent forward progress...%s\n", extra_text);
+ }
+
static void gen(cl_int *x, cl_int *t, cl_int *,
const WorkGroupParams &test_params)
{
@@ -245,8 +251,8 @@ struct IFP
}
}
- static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *,
- const WorkGroupParams &test_params)
+ static test_status chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *,
+ const WorkGroupParams &test_params)
{
int i, k;
int nw = test_params.local_workgroup_size;
@@ -255,10 +261,8 @@ struct IFP
int nj = (nw + ns - 1) / ns;
ng = ng / nw;
- // We need at least 2 sub groups per group for this tes
- if (nj == 1) return 0;
-
- log_info(" independent forward progress...\n");
+ // We need at least 2 sub groups per group for this test
+ if (nj == 1) return TEST_SKIPPED_ITSELF;
for (k = 0; k < ng; ++k)
{
@@ -270,14 +274,14 @@ struct IFP
log_error(
"ERROR: mismatch at element %d in work group %d\n", i,
k);
- return -1;
+ return TEST_FAIL;
}
}
x += nj * (NUM_LOC + 1);
y += NUM_LOC;
}
- return 0;
+ return TEST_PASS;
}
};
@@ -360,17 +364,21 @@ int test_ifp_ext(cl_device_id device, cl_context context,
}
// ifp only in subgroup functions tests:
test_status error;
- error = checkIFPSupport(device, ifpSupport);
- if (error != TEST_PASS)
- {
- return error;
- }
- if (ifpSupport == false)
+ auto device_cl_version = get_device_cl_version(device);
+ if (device_cl_version >= Version(2, 1))
{
- log_info(
- "Error reason: the extension cl_khr_subgroups requires that "
- "Independed forward progress has to be supported by device.\n");
- return TEST_FAIL;
+ error = checkIFPSupport(device, ifpSupport);
+ if (error != TEST_PASS)
+ {
+ return error;
+ }
+ if (ifpSupport == false)
+ {
+ log_info(
+ "Error reason: the extension cl_khr_subgroups requires that "
+ "Independed forward progress has to be supported by device.\n");
+ return TEST_FAIL;
+ }
}
return test_ifp(device, context, queue, num_elements, false);
-} \ No newline at end of file
+}
diff --git a/test_conformance/subgroups/test_queries.cpp b/test_conformance/subgroups/test_queries.cpp
index 761ca7a6..6b940935 100644
--- a/test_conformance/subgroups/test_queries.cpp
+++ b/test_conformance/subgroups/test_queries.cpp
@@ -100,7 +100,7 @@ int test_sub_group_info(cl_device_id device, cl_context context,
subgroupsApiSet.clGetKernelSubGroupInfo_ptr();
if (clGetKernelSubGroupInfo_ptr == NULL)
{
- log_error("ERROR: %s function not available",
+ log_error("ERROR: %s function not available\n",
subgroupsApiSet.clGetKernelSubGroupInfo_name);
return TEST_FAIL;
}
@@ -112,7 +112,7 @@ int test_sub_group_info(cl_device_id device, cl_context context,
if (error != CL_SUCCESS)
{
log_error("ERROR: %s function error for "
- "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE",
+ "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE\n",
subgroupsApiSet.clGetKernelSubGroupInfo_name);
return TEST_FAIL;
}
@@ -133,7 +133,7 @@ int test_sub_group_info(cl_device_id device, cl_context context,
if (error != CL_SUCCESS)
{
log_error("ERROR: %s function error "
- "for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE",
+ "for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE\n",
subgroupsApiSet.clGetKernelSubGroupInfo_name);
return TEST_FAIL;
}
@@ -209,4 +209,4 @@ int test_sub_group_info_ext(cl_device_id device, cl_context context,
}
return test_sub_group_info(device, context, queue, num_elements, false);
-} \ No newline at end of file
+}
diff --git a/test_conformance/subgroups/test_subgroup.cpp b/test_conformance/subgroups/test_subgroup.cpp
index c0e49524..75e9d4ae 100644
--- a/test_conformance/subgroups/test_subgroup.cpp
+++ b/test_conformance/subgroups/test_subgroup.cpp
@@ -24,6 +24,13 @@ namespace {
// Any/All test functions
template <NonUniformVoteOp operation> struct AA
{
+ static void log_test(const WorkGroupParams &test_params,
+ const char *extra_text)
+ {
+ log_info(" sub_group_%s...%s\n", operation_names(operation),
+ extra_text);
+ }
+
static void gen(cl_int *x, cl_int *t, cl_int *m,
const WorkGroupParams &test_params)
{
@@ -35,7 +42,6 @@ template <NonUniformVoteOp operation> struct AA
int e;
ng = ng / nw;
ii = 0;
- log_info(" sub_group_%s...\n", operation_names(operation));
for (k = 0; k < ng; ++k)
{
for (j = 0; j < nj; ++j)
@@ -68,8 +74,8 @@ template <NonUniformVoteOp operation> struct AA
}
}
- static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m,
- const WorkGroupParams &test_params)
+ static test_status chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my,
+ cl_int *m, const WorkGroupParams &test_params)
{
int ii, i, j, k, n;
int ng = test_params.global_workgroup_size;
@@ -124,51 +130,33 @@ template <NonUniformVoteOp operation> struct AA
y += nw;
m += 4 * nw;
}
- log_info(" sub_group_%s... passed\n", operation_names(operation));
return TEST_PASS;
}
};
-static const char *any_source = "__kernel void test_any(const __global Type "
- "*in, __global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " out[gid] = sub_group_any(in[gid]);\n"
- "}\n";
-
-static const char *all_source = "__kernel void test_all(const __global Type "
- "*in, __global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " out[gid] = sub_group_all(in[gid]);\n"
- "}\n";
-
-
template <typename T>
int run_broadcast_scan_reduction_for_type(RunTestForType rft)
{
int error = rft.run_impl<T, BC<T, SubgroupsBroadcastOp::broadcast>>(
- "test_bcast", bcast_source);
- error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>("test_redadd",
- redadd_source);
- error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>("test_redmax",
- redmax_source);
- error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>("test_redmin",
- redmin_source);
- error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>("test_scinadd",
- scinadd_source);
- error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>("test_scinmax",
- scinmax_source);
- error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>("test_scinmin",
- scinmin_source);
- error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>("test_scexadd",
- scexadd_source);
- error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>("test_scexmax",
- scexmax_source);
- error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>("test_scexmin",
- scexmin_source);
+ "sub_group_broadcast");
+ error |=
+ rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>("sub_group_reduce_add");
+ error |=
+ rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>("sub_group_reduce_max");
+ error |=
+ rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>("sub_group_reduce_min");
+ error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>(
+ "sub_group_scan_inclusive_add");
+ error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>(
+ "sub_group_scan_inclusive_max");
+ error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>(
+ "sub_group_scan_inclusive_min");
+ error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>(
+ "sub_group_scan_exclusive_add");
+ error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>(
+ "sub_group_scan_exclusive_max");
+ error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>(
+ "sub_group_scan_exclusive_min");
return error;
}
@@ -181,11 +169,14 @@ int test_subgroup_functions(cl_device_id device, cl_context context,
constexpr size_t global_work_size = 2000;
constexpr size_t local_work_size = 200;
WorkGroupParams test_params(global_work_size, local_work_size);
+ test_params.save_kernel_source(sub_group_reduction_scan_source);
+ test_params.save_kernel_source(sub_group_generic_source,
+ "sub_group_broadcast");
+
RunTestForType rft(device, context, queue, num_elements, test_params);
int error =
- rft.run_impl<cl_int, AA<NonUniformVoteOp::any>>("test_any", any_source);
- error |=
- rft.run_impl<cl_int, AA<NonUniformVoteOp::all>>("test_all", all_source);
+ rft.run_impl<cl_int, AA<NonUniformVoteOp::any>>("sub_group_any");
+ error |= rft.run_impl<cl_int, AA<NonUniformVoteOp::all>>("sub_group_all");
error |= run_broadcast_scan_reduction_for_type<cl_int>(rft);
error |= run_broadcast_scan_reduction_for_type<cl_uint>(rft);
error |= run_broadcast_scan_reduction_for_type<cl_long>(rft);
diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
index f2e4060b..3882311d 100644
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ b/test_conformance/subgroups/test_subgroup_ballot.cpp
@@ -23,52 +23,101 @@ namespace {
// Test for ballot functions
template <typename Ty> struct BALLOT
{
+ static void log_test(const WorkGroupParams &test_params,
+ const char *extra_text)
+ {
+ log_info(" sub_group_ballot...%s\n", extra_text);
+ }
+
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
{
- // no work here
int gws = test_params.global_workgroup_size;
int lws = test_params.local_workgroup_size;
int sbs = test_params.subgroup_size;
+ int sb_number = (lws + sbs - 1) / sbs;
int non_uniform_size = gws % lws;
- log_info(" sub_group_ballot...\n");
- if (non_uniform_size)
- {
- log_info(" non uniform work group size mode ON\n");
+ int wg_number = gws / lws;
+ wg_number = non_uniform_size ? wg_number + 1 : wg_number;
+ int last_subgroup_size = 0;
+
+ for (int wg_id = 0; wg_id < wg_number; ++wg_id)
+ { // for each work_group
+ if (non_uniform_size && wg_id == wg_number - 1)
+ {
+ set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
+ last_subgroup_size);
+ }
+ for (int sb_id = 0; sb_id < sb_number; ++sb_id)
+ { // for each subgroup
+ int wg_offset = sb_id * sbs;
+ int current_sbs;
+ if (last_subgroup_size && sb_id == sb_number - 1)
+ {
+ current_sbs = last_subgroup_size;
+ }
+ else
+ {
+ current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
+ }
+
+ for (int wi_id = 0; wi_id < current_sbs; wi_id++)
+ {
+ cl_uint v;
+ if (genrand_bool(gMTdata))
+ {
+ v = genrand_bool(gMTdata);
+ }
+ else if (genrand_bool(gMTdata))
+ {
+ v = 1U << ((genrand_int32(gMTdata) % 31) + 1);
+ }
+ else
+ {
+ v = genrand_int32(gMTdata);
+ }
+ cl_uint4 v4 = { v, 0, 0, 0 };
+ t[wi_id + wg_offset] = v4;
+ }
+ }
+ // Now map into work group using map from device
+ for (int wi_id = 0; wi_id < lws; ++wi_id)
+ {
+ x[wi_id] = t[wi_id];
+ }
+ x += lws;
+ m += 4 * lws;
}
}
- static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
- const WorkGroupParams &test_params)
+ static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+ const WorkGroupParams &test_params)
{
- int wi_id, wg_id, sb_id;
int gws = test_params.global_workgroup_size;
int lws = test_params.local_workgroup_size;
int sbs = test_params.subgroup_size;
int sb_number = (lws + sbs - 1) / sbs;
- int current_sbs = 0;
- cl_uint expected_result, device_result;
int non_uniform_size = gws % lws;
int wg_number = gws / lws;
wg_number = non_uniform_size ? wg_number + 1 : wg_number;
int last_subgroup_size = 0;
- for (wg_id = 0; wg_id < wg_number; ++wg_id)
+ for (int wg_id = 0; wg_id < wg_number; ++wg_id)
{ // for each work_group
if (non_uniform_size && wg_id == wg_number - 1)
{
set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
last_subgroup_size);
}
-
- for (wi_id = 0; wi_id < lws; ++wi_id)
+ for (int wi_id = 0; wi_id < lws; ++wi_id)
{ // inside the work_group
- // read device outputs for work_group
- my[wi_id] = y[wi_id];
+ mx[wi_id] = x[wi_id]; // read host inputs for work_group
+ my[wi_id] = y[wi_id]; // read device outputs for work_group
}
- for (sb_id = 0; sb_id < sb_number; ++sb_id)
+ for (int sb_id = 0; sb_id < sb_number; ++sb_id)
{ // for each subgroup
int wg_offset = sb_id * sbs;
+ int current_sbs;
if (last_subgroup_size && sb_id == sb_number - 1)
{
current_sbs = last_subgroup_size;
@@ -77,26 +126,54 @@ template <typename Ty> struct BALLOT
{
current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
}
- for (wi_id = 0; wi_id < current_sbs; ++wi_id)
+
+ bs128 expected_result_bs = 0;
+
+ std::set<int> active_work_items;
+ for (int wi_id = 0; wi_id < current_sbs; ++wi_id)
{
- device_result = my[wg_offset + wi_id];
- expected_result = 1;
- if (!compare(device_result, expected_result))
+ if (test_params.work_items_mask.test(wi_id))
+ {
+ bool predicate = (mx[wg_offset + wi_id].s0 != 0);
+ expected_result_bs |= (bs128(predicate) << wi_id);
+ active_work_items.insert(wi_id);
+ }
+ }
+ if (active_work_items.empty())
+ {
+ continue;
+ }
+
+ cl_uint4 expected_result =
+ bs128_to_cl_uint4(expected_result_bs);
+ for (const int &active_work_item : active_work_items)
+ {
+ int wi_id = active_work_item;
+
+ cl_uint4 device_result = my[wg_offset + wi_id];
+ bs128 device_result_bs = cl_uint4_to_bs128(device_result);
+
+ if (device_result_bs != expected_result_bs)
{
log_error(
"ERROR: sub_group_ballot mismatch for local id "
- "%d in sub group %d in group %d obtained {%d}, "
- "expected {%d} \n",
- wi_id, sb_id, wg_id, device_result,
- expected_result);
+ "%d in sub group %d in group %d obtained {%d, %d, "
+ "%d, %d}, expected {%d, %d, %d, %d}\n",
+ wi_id, sb_id, wg_id, device_result.s0,
+ device_result.s1, device_result.s2,
+ device_result.s3, expected_result.s0,
+ expected_result.s1, expected_result.s2,
+ expected_result.s3);
return TEST_FAIL;
}
}
}
+
+ x += lws;
y += lws;
m += 4 * lws;
}
- log_info(" sub_group_ballot... passed\n");
+
return TEST_PASS;
}
};
@@ -104,23 +181,22 @@ template <typename Ty> struct BALLOT
// Test for bit extract ballot functions
template <typename Ty, BallotOp operation> struct BALLOT_BIT_EXTRACT
{
+ static void log_test(const WorkGroupParams &test_params,
+ const char *extra_text)
+ {
+ log_info(" sub_group_ballot_%s(%s)...%s\n", operation_names(operation),
+ TypeManager<Ty>::name(), extra_text);
+ }
+
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
{
- int wi_id, sb_id, wg_id, l;
+ int wi_id, sb_id, wg_id;
int gws = test_params.global_workgroup_size;
int lws = test_params.local_workgroup_size;
int sbs = test_params.subgroup_size;
int sb_number = (lws + sbs - 1) / sbs;
int wg_number = gws / lws;
int limit_sbs = sbs > 100 ? 100 : sbs;
- int non_uniform_size = gws % lws;
- log_info(" sub_group_%s(%s)...\n", operation_names(operation),
- TypeManager<Ty>::name());
-
- if (non_uniform_size)
- {
- log_info(" non uniform work group size mode ON\n");
- }
for (wg_id = 0; wg_id < wg_number; ++wg_id)
{ // for each work_group
@@ -155,10 +231,10 @@ template <typename Ty, BallotOp operation> struct BALLOT_BIT_EXTRACT
}
}
- static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
- const WorkGroupParams &test_params)
+ static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+ const WorkGroupParams &test_params)
{
- int wi_id, wg_id, l, sb_id;
+ int wi_id, wg_id, sb_id;
int gws = test_params.global_workgroup_size;
int lws = test_params.local_workgroup_size;
int sbs = test_params.subgroup_size;
@@ -260,30 +336,25 @@ template <typename Ty, BallotOp operation> struct BALLOT_BIT_EXTRACT
y += lws;
m += 4 * lws;
}
- log_info(" sub_group_%s(%s)... passed\n", operation_names(operation),
- TypeManager<Ty>::name());
return TEST_PASS;
}
};
template <typename Ty, BallotOp operation> struct BALLOT_INVERSE
{
+ static void log_test(const WorkGroupParams &test_params,
+ const char *extra_text)
+ {
+ log_info(" sub_group_inverse_ballot...%s\n", extra_text);
+ }
+
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
{
- int gws = test_params.global_workgroup_size;
- int lws = test_params.local_workgroup_size;
- int sbs = test_params.subgroup_size;
- int non_uniform_size = gws % lws;
- log_info(" sub_group_inverse_ballot...\n");
- if (non_uniform_size)
- {
- log_info(" non uniform work group size mode ON\n");
- }
// no work here
}
- static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
- const WorkGroupParams &test_params)
+ static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+ const WorkGroupParams &test_params)
{
int wi_id, wg_id, sb_id;
int gws = test_params.global_workgroup_size;
@@ -322,9 +393,6 @@ template <typename Ty, BallotOp operation> struct BALLOT_INVERSE
{
current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
}
- // take index of array where info which work_item will
- // be broadcast its value is stored
- int midx = 4 * wg_offset + 2;
// take subgroup local id of this work_item
// Check result
for (wi_id = 0; wi_id < current_sbs; ++wi_id)
@@ -354,7 +422,6 @@ template <typename Ty, BallotOp operation> struct BALLOT_INVERSE
m += 4 * lws;
}
- log_info(" sub_group_inverse_ballot... passed\n");
return TEST_PASS;
}
};
@@ -363,6 +430,13 @@ template <typename Ty, BallotOp operation> struct BALLOT_INVERSE
// Test for bit count/inclusive and exclusive scan/ find lsb msb ballot function
template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
{
+ static void log_test(const WorkGroupParams &test_params,
+ const char *extra_text)
+ {
+ log_info(" sub_group_%s(%s)...%s\n", operation_names(operation),
+ TypeManager<Ty>::name(), extra_text);
+ }
+
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
{
int wi_id, wg_id, sb_id;
@@ -375,14 +449,10 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
int last_subgroup_size = 0;
int current_sbs = 0;
- log_info(" sub_group_%s(%s)...\n", operation_names(operation),
- TypeManager<Ty>::name());
if (non_uniform_size)
{
- log_info(" non uniform work group size mode ON\n");
wg_number++;
}
- int e;
for (wg_id = 0; wg_id < wg_number; ++wg_id)
{ // for each work_group
if (non_uniform_size && wg_id == wg_number - 1)
@@ -423,7 +493,7 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
}
else
{
- log_error("Unknown operation...");
+ log_error("Unknown operation...\n");
}
}
@@ -451,15 +521,15 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
else if (operation == BallotOp::ballot_inclusive_scan
|| operation == BallotOp::ballot_exclusive_scan)
{
- for (cl_uint i = 0; i <= sub_group_local_id; ++i) mask.set(i);
- if (operation == BallotOp::ballot_exclusive_scan)
- mask.reset(sub_group_local_id);
+ for (cl_uint i = 0; i < sub_group_local_id; ++i) mask.set(i);
+ if (operation == BallotOp::ballot_inclusive_scan)
+ mask.set(sub_group_local_id);
}
return mask;
}
- static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
- const WorkGroupParams &test_params)
+ static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+ const WorkGroupParams &test_params)
{
int wi_id, wg_id, sb_id;
int gws = test_params.global_workgroup_size;
@@ -469,7 +539,7 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
int non_uniform_size = gws % lws;
int wg_number = gws / lws;
wg_number = non_uniform_size ? wg_number + 1 : wg_number;
- cl_uint4 expected_result, device_result;
+ cl_uint expected_result, device_result;
int last_subgroup_size = 0;
int current_sbs = 0;
@@ -501,7 +571,7 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
}
// Check result
- expected_result = { 0, 0, 0, 0 };
+ expected_result = 0;
for (wi_id = 0; wi_id < current_sbs; ++wi_id)
{ // for subgroup element
bs128 bs;
@@ -510,34 +580,37 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
| (bs128(mx[wg_offset + wi_id].s1) << 32)
| (bs128(mx[wg_offset + wi_id].s2) << 64)
| (bs128(mx[wg_offset + wi_id].s3) << 96);
- bs &= getImportantBits(wi_id, current_sbs);
- device_result = my[wg_offset + wi_id];
+ bs &= getImportantBits(wi_id, sbs);
+ device_result = my[wg_offset + wi_id].s0;
if (operation == BallotOp::ballot_inclusive_scan
|| operation == BallotOp::ballot_exclusive_scan
|| operation == BallotOp::ballot_bit_count)
{
- expected_result.s0 = bs.count();
+ expected_result = bs.count();
if (!compare(device_result, expected_result))
{
log_error("ERROR: sub_group_%s "
"mismatch for local id %d in sub group "
- "%d in group %d obtained {%d, %d, %d, "
- "%d}, expected {%d, %d, %d, %d}\n",
+ "%d in group %d obtained %d, "
+ "expected %d\n",
operation_names(operation), wi_id, sb_id,
- wg_id, device_result.s0, device_result.s1,
- device_result.s2, device_result.s3,
- expected_result.s0, expected_result.s1,
- expected_result.s2, expected_result.s3);
+ wg_id, device_result, expected_result);
return TEST_FAIL;
}
}
else if (operation == BallotOp::ballot_find_lsb)
{
- for (int id = 0; id < current_sbs; ++id)
+ if (bs.none())
+ {
+ // Return value is undefined when no bits are set,
+ // so skip validation:
+ continue;
+ }
+ for (int id = 0; id < sbs; ++id)
{
if (bs.test(id))
{
- expected_result.s0 = id;
+ expected_result = id;
break;
}
}
@@ -545,23 +618,26 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
{
log_error("ERROR: sub_group_ballot_find_lsb "
"mismatch for local id %d in sub group "
- "%d in group %d obtained {%d, %d, %d, "
- "%d}, expected {%d, %d, %d, %d}\n",
- wi_id, sb_id, wg_id, device_result.s0,
- device_result.s1, device_result.s2,
- device_result.s3, expected_result.s0,
- expected_result.s1, expected_result.s2,
- expected_result.s3);
+ "%d in group %d obtained %d, "
+ "expected %d\n",
+ wi_id, sb_id, wg_id, device_result,
+ expected_result);
return TEST_FAIL;
}
}
else if (operation == BallotOp::ballot_find_msb)
{
- for (int id = current_sbs - 1; id >= 0; --id)
+ if (bs.none())
+ {
+ // Return value is undefined when no bits are set,
+ // so skip validation:
+ continue;
+ }
+ for (int id = sbs - 1; id >= 0; --id)
{
if (bs.test(id))
{
- expected_result.s0 = id;
+ expected_result = id;
break;
}
}
@@ -569,13 +645,10 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
{
log_error("ERROR: sub_group_ballot_find_msb "
"mismatch for local id %d in sub group "
- "%d in group %d obtained {%d, %d, %d, "
- "%d}, expected {%d, %d, %d, %d}\n",
- wi_id, sb_id, wg_id, device_result.s0,
- device_result.s1, device_result.s2,
- device_result.s3, expected_result.s0,
- expected_result.s1, expected_result.s2,
- expected_result.s3);
+ "%d in group %d obtained %d, "
+ "expected %d\n",
+ wi_id, sb_id, wg_id, device_result,
+ expected_result);
return TEST_FAIL;
}
}
@@ -585,8 +658,6 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
y += lws;
m += 4 * lws;
}
- log_info(" sub_group_ballot_%s(%s)... passed\n",
- operation_names(operation), TypeManager<Ty>::name());
return TEST_PASS;
}
};
@@ -594,15 +665,21 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
// test mask functions
template <typename Ty, BallotOp operation> struct SMASK
{
+ static void log_test(const WorkGroupParams &test_params,
+ const char *extra_text)
+ {
+ log_info(" get_sub_group_%s_mask...%s\n", operation_names(operation),
+ extra_text);
+ }
+
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
{
- int wi_id, wg_id, l, sb_id;
+ int wi_id, wg_id, sb_id;
int gws = test_params.global_workgroup_size;
int lws = test_params.local_workgroup_size;
int sbs = test_params.subgroup_size;
int sb_number = (lws + sbs - 1) / sbs;
int wg_number = gws / lws;
- log_info(" get_sub_group_%s_mask...\n", operation_names(operation));
for (wg_id = 0; wg_id < wg_number; ++wg_id)
{ // for each work_group
for (sb_id = 0; sb_id < sb_number; ++sb_id)
@@ -631,8 +708,8 @@ template <typename Ty, BallotOp operation> struct SMASK
}
}
- static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
- const WorkGroupParams &test_params)
+ static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+ const WorkGroupParams &test_params)
{
int wi_id, wg_id, sb_id;
int gws = test_params.global_workgroup_size;
@@ -678,245 +755,130 @@ template <typename Ty, BallotOp operation> struct SMASK
y += lws;
m += 4 * lws;
}
- log_info(" get_sub_group_%s_mask... passed\n",
- operation_names(operation));
return TEST_PASS;
}
};
-static const char *bcast_non_uniform_source =
- "__kernel void test_bcast_non_uniform(const __global Type *in, __global "
- "int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " Type x = in[gid];\n"
- " if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {\n"
- " out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].z);\n"
- " } else {\n"
- " out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].w);\n"
- " }\n"
- "}\n";
-
-static const char *bcast_first_source =
- "__kernel void test_bcast_first(const __global Type *in, __global int4 "
- "*xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " Type x = in[gid];\n"
- " if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {\n"
- " out[gid] = sub_group_broadcast_first(x);\n"
- " } else {\n"
- " out[gid] = sub_group_broadcast_first(x);\n"
- " }\n"
- "}\n";
-
-static const char *ballot_bit_count_source =
- "__kernel void test_sub_group_ballot_bit_count(const __global Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " Type x = in[gid];\n"
- " uint4 value = (uint4)(0,0,0,0);\n"
- " value = (uint4)(sub_group_ballot_bit_count(x),0,0,0);\n"
- " out[gid] = value;\n"
- "}\n";
-
-static const char *ballot_inclusive_scan_source =
- "__kernel void test_sub_group_ballot_inclusive_scan(const __global Type "
- "*in, __global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " Type x = in[gid];\n"
- " uint4 value = (uint4)(0,0,0,0);\n"
- " value = (uint4)(sub_group_ballot_inclusive_scan(x),0,0,0);\n"
- " out[gid] = value;\n"
- "}\n";
-
-static const char *ballot_exclusive_scan_source =
- "__kernel void test_sub_group_ballot_exclusive_scan(const __global Type "
- "*in, __global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " Type x = in[gid];\n"
- " uint4 value = (uint4)(0,0,0,0);\n"
- " value = (uint4)(sub_group_ballot_exclusive_scan(x),0,0,0);\n"
- " out[gid] = value;\n"
- "}\n";
-
-static const char *ballot_find_lsb_source =
- "__kernel void test_sub_group_ballot_find_lsb(const __global Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " Type x = in[gid];\n"
- " uint4 value = (uint4)(0,0,0,0);\n"
- " value = (uint4)(sub_group_ballot_find_lsb(x),0,0,0);\n"
- " out[gid] = value;\n"
- "}\n";
-
-static const char *ballot_find_msb_source =
- "__kernel void test_sub_group_ballot_find_msb(const __global Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " Type x = in[gid];\n"
- " uint4 value = (uint4)(0,0,0,0);"
- " value = (uint4)(sub_group_ballot_find_msb(x),0,0,0);"
- " out[gid] = value ;"
- "}\n";
-
-static const char *get_subgroup_ge_mask_source =
- "__kernel void test_get_sub_group_ge_mask(const __global Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " xy[gid].z = get_max_sub_group_size();\n"
- " Type x = in[gid];\n"
- " uint4 mask = get_sub_group_ge_mask();"
- " out[gid] = mask;\n"
- "}\n";
-
-static const char *get_subgroup_gt_mask_source =
- "__kernel void test_get_sub_group_gt_mask(const __global Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " xy[gid].z = get_max_sub_group_size();\n"
- " Type x = in[gid];\n"
- " uint4 mask = get_sub_group_gt_mask();"
- " out[gid] = mask;\n"
- "}\n";
-
-static const char *get_subgroup_le_mask_source =
- "__kernel void test_get_sub_group_le_mask(const __global Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " xy[gid].z = get_max_sub_group_size();\n"
- " Type x = in[gid];\n"
- " uint4 mask = get_sub_group_le_mask();"
- " out[gid] = mask;\n"
- "}\n";
-
-static const char *get_subgroup_lt_mask_source =
- "__kernel void test_get_sub_group_lt_mask(const __global Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " xy[gid].z = get_max_sub_group_size();\n"
- " Type x = in[gid];\n"
- " uint4 mask = get_sub_group_lt_mask();"
- " out[gid] = mask;\n"
- "}\n";
-
-static const char *get_subgroup_eq_mask_source =
- "__kernel void test_get_sub_group_eq_mask(const __global Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " xy[gid].z = get_max_sub_group_size();\n"
- " Type x = in[gid];\n"
- " uint4 mask = get_sub_group_eq_mask();"
- " out[gid] = mask;\n"
- "}\n";
-
-static const char *ballot_source =
- "__kernel void test_sub_group_ballot(const __global Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- "uint4 full_ballot = sub_group_ballot(1);\n"
- "uint divergence_mask;\n"
- "uint4 partial_ballot;\n"
- "uint gid = get_global_id(0);"
- "XY(xy,gid);\n"
- "if (get_sub_group_local_id() & 1) {\n"
- " divergence_mask = 0xaaaaaaaa;\n"
- " partial_ballot = sub_group_ballot(1);\n"
- "} else {\n"
- " divergence_mask = 0x55555555;\n"
- " partial_ballot = sub_group_ballot(1);\n"
- "}\n"
- " size_t lws = get_local_size(0);\n"
- "uint4 masked_ballot = full_ballot;\n"
- "masked_ballot.x &= divergence_mask;\n"
- "masked_ballot.y &= divergence_mask;\n"
- "masked_ballot.z &= divergence_mask;\n"
- "masked_ballot.w &= divergence_mask;\n"
- "out[gid] = all(masked_ballot == partial_ballot);\n"
-
- "} \n";
-
-static const char *ballot_source_inverse =
- "__kernel void test_sub_group_ballot_inverse(const __global "
- "Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " Type x = in[gid];\n"
- " uint4 value = (uint4)(10,0,0,0);\n"
- " if (get_sub_group_local_id() & 1) {"
- " uint4 partial_ballot_mask = "
- "(uint4)(0xAAAAAAAA,0xAAAAAAAA,0xAAAAAAAA,0xAAAAAAAA);"
- " if (sub_group_inverse_ballot(partial_ballot_mask)) {\n"
- " value = (uint4)(1,0,0,1);\n"
- " } else {\n"
- " value = (uint4)(0,0,0,1);\n"
- " }\n"
- " } else {\n"
- " uint4 partial_ballot_mask = "
- "(uint4)(0x55555555,0x55555555,0x55555555,0x55555555);"
- " if (sub_group_inverse_ballot(partial_ballot_mask)) {\n"
- " value = (uint4)(1,0,0,2);\n"
- " } else {\n"
- " value = (uint4)(0,0,0,2);\n"
- " }\n"
- " }\n"
- " out[gid] = value;\n"
- "}\n";
-
-static const char *ballot_bit_extract_source =
- "__kernel void test_sub_group_ballot_bit_extract(const __global Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " Type x = in[gid];\n"
- " uint index = xy[gid].z;\n"
- " uint4 value = (uint4)(10,0,0,0);\n"
- " if (get_sub_group_local_id() & 1) {"
- " if (sub_group_ballot_bit_extract(x, xy[gid].z)) {\n"
- " value = (uint4)(1,0,0,1);\n"
- " } else {\n"
- " value = (uint4)(0,0,0,1);\n"
- " }\n"
- " } else {\n"
- " if (sub_group_ballot_bit_extract(x, xy[gid].w)) {\n"
- " value = (uint4)(1,0,0,2);\n"
- " } else {\n"
- " value = (uint4)(0,0,0,2);\n"
- " }\n"
- " }\n"
- " out[gid] = value;\n"
- "}\n";
+std::string sub_group_non_uniform_broadcast_source = R"(
+__kernel void test_sub_group_non_uniform_broadcast(const __global Type *in, __global int4 *xy, __global Type *out) {
+ int gid = get_global_id(0);
+ XY(xy,gid);
+ Type x = in[gid];
+ if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {
+ out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].z);
+ } else {
+ out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].w);
+ }
+}
+)";
+std::string sub_group_broadcast_first_source = R"(
+__kernel void test_sub_group_broadcast_first(const __global Type *in, __global int4 *xy, __global Type *out) {
+ int gid = get_global_id(0);
+ XY(xy,gid);
+ Type x = in[gid];
+ if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {
+ out[gid] = sub_group_broadcast_first(x);;
+ } else {
+ out[gid] = sub_group_broadcast_first(x);;
+ }
+}
+)";
+std::string sub_group_ballot_bit_scan_find_source = R"(
+__kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) {
+ int gid = get_global_id(0);
+ XY(xy,gid);
+ Type x = in[gid];
+ uint4 value = (uint4)(0,0,0,0);
+ value = (uint4)(%s(x),0,0,0);
+ out[gid] = value;
+}
+)";
+std::string sub_group_ballot_mask_source = R"(
+__kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) {
+ int gid = get_global_id(0);
+ XY(xy,gid);
+ xy[gid].z = get_max_sub_group_size();
+ Type x = in[gid];
+ uint4 mask = %s();
+ out[gid] = mask;
+}
+)";
+std::string sub_group_ballot_source = R"(
+__kernel void test_sub_group_ballot(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) {
+ uint gid = get_global_id(0);
+ XY(xy,gid);
+ uint subgroup_local_id = get_sub_group_local_id();
+ uint elect_work_item = 1 << (subgroup_local_id % 32);
+ uint work_item_mask;
+ if (subgroup_local_id < 32) {
+ work_item_mask = work_item_mask_vector.x;
+ } else if(subgroup_local_id < 64) {
+ work_item_mask = work_item_mask_vector.y;
+ } else if(subgroup_local_id < 96) {
+ work_item_mask = work_item_mask_vector.z;
+ } else if(subgroup_local_id < 128) {
+ work_item_mask = work_item_mask_vector.w;
+ }
+ uint4 value = (uint4)(0, 0, 0, 0);
+ if (elect_work_item & work_item_mask) {
+ value = sub_group_ballot(in[gid].s0);
+ }
+ out[gid] = value;
+}
+)";
+std::string sub_group_inverse_ballot_source = R"(
+__kernel void test_sub_group_inverse_ballot(const __global Type *in, __global int4 *xy, __global Type *out) {
+ int gid = get_global_id(0);
+ XY(xy,gid);
+ Type x = in[gid];
+ uint4 value = (uint4)(10,0,0,0);
+ if (get_sub_group_local_id() & 1) {
+ uint4 partial_ballot_mask = (uint4)(0xAAAAAAAA,0xAAAAAAAA,0xAAAAAAAA,0xAAAAAAAA);
+ if (sub_group_inverse_ballot(partial_ballot_mask)) {
+ value = (uint4)(1,0,0,1);
+ } else {
+ value = (uint4)(0,0,0,1);
+ }
+ } else {
+ uint4 partial_ballot_mask = (uint4)(0x55555555,0x55555555,0x55555555,0x55555555);
+ if (sub_group_inverse_ballot(partial_ballot_mask)) {
+ value = (uint4)(1,0,0,2);
+ } else {
+ value = (uint4)(0,0,0,2);
+ }
+ }
+ out[gid] = value;
+}
+)";
+std::string sub_group_ballot_bit_extract_source = R"(
+ __kernel void test_sub_group_ballot_bit_extract(const __global Type *in, __global int4 *xy, __global Type *out) {
+ int gid = get_global_id(0);
+ XY(xy,gid);
+ Type x = in[gid];
+ uint index = xy[gid].z;
+ uint4 value = (uint4)(10,0,0,0);
+ if (get_sub_group_local_id() & 1) {
+ if (sub_group_ballot_bit_extract(x, xy[gid].z)) {
+ value = (uint4)(1,0,0,1);
+ } else {
+ value = (uint4)(0,0,0,1);
+ }
+ } else {
+ if (sub_group_ballot_bit_extract(x, xy[gid].w)) {
+ value = (uint4)(1,0,0,2);
+ } else {
+ value = (uint4)(0,0,0,2);
+ }
+ }
+ out[gid] = value;
+}
+)";
template <typename T> int run_non_uniform_broadcast_for_type(RunTestForType rft)
{
int error =
rft.run_impl<T, BC<T, SubgroupsBroadcastOp::non_uniform_broadcast>>(
- "test_bcast_non_uniform", bcast_non_uniform_source);
+ "sub_group_non_uniform_broadcast");
return error;
}
@@ -926,11 +888,21 @@ template <typename T> int run_non_uniform_broadcast_for_type(RunTestForType rft)
int test_subgroup_functions_ballot(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
{
- std::vector<std::string> required_extensions = { "cl_khr_subgroup_ballot" };
+ if (!is_extension_available(device, "cl_khr_subgroup_ballot"))
+ {
+ log_info("cl_khr_subgroup_ballot is not supported on this device, "
+ "skipping test.\n");
+ return TEST_SKIPPED_ITSELF;
+ }
+
constexpr size_t global_work_size = 170;
constexpr size_t local_work_size = 64;
- WorkGroupParams test_params(global_work_size, local_work_size,
- required_extensions);
+ WorkGroupParams test_params(global_work_size, local_work_size);
+ test_params.save_kernel_source(sub_group_ballot_mask_source);
+ test_params.save_kernel_source(sub_group_non_uniform_broadcast_source,
+ "sub_group_non_uniform_broadcast");
+ test_params.save_kernel_source(sub_group_broadcast_first_source,
+ "sub_group_broadcast_first");
RunTestForType rft(device, context, queue, num_elements, test_params);
// non uniform broadcast functions
@@ -1014,76 +986,92 @@ int test_subgroup_functions_ballot(cl_device_id device, cl_context context,
// broadcast first functions
error |=
rft.run_impl<cl_int, BC<cl_int, SubgroupsBroadcastOp::broadcast_first>>(
- "test_bcast_first", bcast_first_source);
+ "sub_group_broadcast_first");
error |= rft.run_impl<cl_uint,
BC<cl_uint, SubgroupsBroadcastOp::broadcast_first>>(
- "test_bcast_first", bcast_first_source);
+ "sub_group_broadcast_first");
error |= rft.run_impl<cl_long,
BC<cl_long, SubgroupsBroadcastOp::broadcast_first>>(
- "test_bcast_first", bcast_first_source);
+ "sub_group_broadcast_first");
error |= rft.run_impl<cl_ulong,
BC<cl_ulong, SubgroupsBroadcastOp::broadcast_first>>(
- "test_bcast_first", bcast_first_source);
+ "sub_group_broadcast_first");
error |= rft.run_impl<cl_short,
BC<cl_short, SubgroupsBroadcastOp::broadcast_first>>(
- "test_bcast_first", bcast_first_source);
+ "sub_group_broadcast_first");
error |= rft.run_impl<cl_ushort,
BC<cl_ushort, SubgroupsBroadcastOp::broadcast_first>>(
- "test_bcast_first", bcast_first_source);
+ "sub_group_broadcast_first");
error |= rft.run_impl<cl_char,
BC<cl_char, SubgroupsBroadcastOp::broadcast_first>>(
- "test_bcast_first", bcast_first_source);
+ "sub_group_broadcast_first");
error |= rft.run_impl<cl_uchar,
BC<cl_uchar, SubgroupsBroadcastOp::broadcast_first>>(
- "test_bcast_first", bcast_first_source);
+ "sub_group_broadcast_first");
error |= rft.run_impl<cl_float,
BC<cl_float, SubgroupsBroadcastOp::broadcast_first>>(
- "test_bcast_first", bcast_first_source);
+ "sub_group_broadcast_first");
error |= rft.run_impl<cl_double,
BC<cl_double, SubgroupsBroadcastOp::broadcast_first>>(
- "test_bcast_first", bcast_first_source);
+ "sub_group_broadcast_first");
error |= rft.run_impl<
subgroups::cl_half,
BC<subgroups::cl_half, SubgroupsBroadcastOp::broadcast_first>>(
- "test_bcast_first", bcast_first_source);
+ "sub_group_broadcast_first");
// mask functions
error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::eq_mask>>(
- "test_get_sub_group_eq_mask", get_subgroup_eq_mask_source);
+ "get_sub_group_eq_mask");
error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::ge_mask>>(
- "test_get_sub_group_ge_mask", get_subgroup_ge_mask_source);
+ "get_sub_group_ge_mask");
error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::gt_mask>>(
- "test_get_sub_group_gt_mask", get_subgroup_gt_mask_source);
+ "get_sub_group_gt_mask");
error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::le_mask>>(
- "test_get_sub_group_le_mask", get_subgroup_le_mask_source);
+ "get_sub_group_le_mask");
error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::lt_mask>>(
- "test_get_sub_group_lt_mask", get_subgroup_lt_mask_source);
-
- // ballot functions
- error |= rft.run_impl<cl_uint, BALLOT<cl_uint>>("test_sub_group_ballot",
- ballot_source);
- error |= rft.run_impl<cl_uint4,
- BALLOT_INVERSE<cl_uint4, BallotOp::inverse_ballot>>(
- "test_sub_group_ballot_inverse", ballot_source_inverse);
- error |= rft.run_impl<
+ "get_sub_group_lt_mask");
+
+ // sub_group_ballot function
+ WorkGroupParams test_params_ballot(global_work_size, local_work_size, 3);
+ test_params_ballot.save_kernel_source(sub_group_ballot_source);
+ RunTestForType rft_ballot(device, context, queue, num_elements,
+ test_params_ballot);
+ error |=
+ rft_ballot.run_impl<cl_uint4, BALLOT<cl_uint4>>("sub_group_ballot");
+
+ // ballot arithmetic functions
+ WorkGroupParams test_params_arith(global_work_size, local_work_size);
+ test_params_arith.save_kernel_source(sub_group_ballot_bit_scan_find_source);
+ test_params_arith.save_kernel_source(sub_group_inverse_ballot_source,
+ "sub_group_inverse_ballot");
+ test_params_arith.save_kernel_source(sub_group_ballot_bit_extract_source,
+ "sub_group_ballot_bit_extract");
+ RunTestForType rft_arith(device, context, queue, num_elements,
+ test_params_arith);
+ error |=
+ rft_arith.run_impl<cl_uint4,
+ BALLOT_INVERSE<cl_uint4, BallotOp::inverse_ballot>>(
+ "sub_group_inverse_ballot");
+ error |= rft_arith.run_impl<
cl_uint4, BALLOT_BIT_EXTRACT<cl_uint4, BallotOp::ballot_bit_extract>>(
- "test_sub_group_ballot_bit_extract", ballot_bit_extract_source);
- error |= rft.run_impl<
+ "sub_group_ballot_bit_extract");
+ error |= rft_arith.run_impl<
cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_bit_count>>(
- "test_sub_group_ballot_bit_count", ballot_bit_count_source);
- error |= rft.run_impl<
+ "sub_group_ballot_bit_count");
+ error |= rft_arith.run_impl<
cl_uint4,
BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_inclusive_scan>>(
- "test_sub_group_ballot_inclusive_scan", ballot_inclusive_scan_source);
- error |= rft.run_impl<
+ "sub_group_ballot_inclusive_scan");
+ error |= rft_arith.run_impl<
cl_uint4,
BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_exclusive_scan>>(
- "test_sub_group_ballot_exclusive_scan", ballot_exclusive_scan_source);
- error |= rft.run_impl<
+ "sub_group_ballot_exclusive_scan");
+ error |= rft_arith.run_impl<
cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_find_lsb>>(
- "test_sub_group_ballot_find_lsb", ballot_find_lsb_source);
- error |= rft.run_impl<
+ "sub_group_ballot_find_lsb");
+ error |= rft_arith.run_impl<
cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_find_msb>>(
- "test_sub_group_ballot_find_msb", ballot_find_msb_source);
+ "sub_group_ballot_find_msb");
+
return error;
}
diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
index 588e9cee..38652d51 100644
--- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
+++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
@@ -18,172 +18,55 @@
#include "subgroup_common_templates.h"
#include "harness/typeWrappers.h"
-#define CLUSTER_SIZE 4
-#define CLUSTER_SIZE_STR "4"
-
namespace {
-static const char *redadd_clustered_source =
- "__kernel void test_redadd_clustered(const __global Type *in, __global "
- "int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " xy[gid].w = 0;\n"
- " if (sizeof(in[gid]) == "
- "sizeof(sub_group_clustered_reduce_add(in[gid], " CLUSTER_SIZE_STR ")))\n"
- " {xy[gid].w = sizeof(in[gid]);}\n"
- " out[gid] = sub_group_clustered_reduce_add(in[gid], " CLUSTER_SIZE_STR
- ");\n"
- "}\n";
-
-static const char *redmax_clustered_source =
- "__kernel void test_redmax_clustered(const __global Type *in, __global "
- "int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " xy[gid].w = 0;\n"
- " if (sizeof(in[gid]) == "
- "sizeof(sub_group_clustered_reduce_max(in[gid], " CLUSTER_SIZE_STR ")))\n"
- " {xy[gid].w = sizeof(in[gid]);}\n"
- " out[gid] = sub_group_clustered_reduce_max(in[gid], " CLUSTER_SIZE_STR
- ");\n"
- "}\n";
-
-static const char *redmin_clustered_source =
- "__kernel void test_redmin_clustered(const __global Type *in, __global "
- "int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " xy[gid].w = 0;\n"
- " if (sizeof(in[gid]) == "
- "sizeof(sub_group_clustered_reduce_min(in[gid], " CLUSTER_SIZE_STR ")))\n"
- " {xy[gid].w = sizeof(in[gid]);}\n"
- " out[gid] = sub_group_clustered_reduce_min(in[gid], " CLUSTER_SIZE_STR
- ");\n"
- "}\n";
-
-static const char *redmul_clustered_source =
- "__kernel void test_redmul_clustered(const __global Type *in, __global "
- "int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " xy[gid].w = 0;\n"
- " if (sizeof(in[gid]) == "
- "sizeof(sub_group_clustered_reduce_mul(in[gid], " CLUSTER_SIZE_STR ")))\n"
- " {xy[gid].w = sizeof(in[gid]);}\n"
- " out[gid] = sub_group_clustered_reduce_mul(in[gid], " CLUSTER_SIZE_STR
- ");\n"
- "}\n";
-
-static const char *redand_clustered_source =
- "__kernel void test_redand_clustered(const __global Type *in, __global "
- "int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " xy[gid].w = 0;\n"
- " if (sizeof(in[gid]) == "
- "sizeof(sub_group_clustered_reduce_and(in[gid], " CLUSTER_SIZE_STR ")))\n"
- " {xy[gid].w = sizeof(in[gid]);}\n"
- " out[gid] = sub_group_clustered_reduce_and(in[gid], " CLUSTER_SIZE_STR
- ");\n"
- "}\n";
-
-static const char *redor_clustered_source =
- "__kernel void test_redor_clustered(const __global Type *in, __global int4 "
- "*xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " xy[gid].w = 0;\n"
- " if (sizeof(in[gid]) == "
- "sizeof(sub_group_clustered_reduce_or(in[gid], " CLUSTER_SIZE_STR ")))\n"
- " {xy[gid].w = sizeof(in[gid]);}\n"
- " out[gid] = sub_group_clustered_reduce_or(in[gid], " CLUSTER_SIZE_STR
- ");\n"
- "}\n";
-
-static const char *redxor_clustered_source =
- "__kernel void test_redxor_clustered(const __global Type *in, __global "
- "int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " xy[gid].w = 0;\n"
- " if (sizeof(in[gid]) == "
- "sizeof(sub_group_clustered_reduce_xor(in[gid], " CLUSTER_SIZE_STR ")))\n"
- " {xy[gid].w = sizeof(in[gid]);}\n"
- " out[gid] = sub_group_clustered_reduce_xor(in[gid], " CLUSTER_SIZE_STR
- ");\n"
- "}\n";
-
-static const char *redand_clustered_logical_source =
- "__kernel void test_redand_clustered_logical(const __global Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " xy[gid].w = 0;\n"
- " if (sizeof(in[gid]) == "
- "sizeof(sub_group_clustered_reduce_logical_and(in[gid], " CLUSTER_SIZE_STR
- ")))\n"
- " {xy[gid].w = sizeof(in[gid]);}\n"
- " out[gid] = "
- "sub_group_clustered_reduce_logical_and(in[gid], " CLUSTER_SIZE_STR ");\n"
- "}\n";
-
-static const char *redor_clustered_logical_source =
- "__kernel void test_redor_clustered_logical(const __global Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " xy[gid].w = 0;\n"
- " if (sizeof(in[gid]) == "
- "sizeof(sub_group_clustered_reduce_logical_or(in[gid], " CLUSTER_SIZE_STR
- ")))\n"
- " {xy[gid].w = sizeof(in[gid]);}\n"
- " out[gid] = "
- "sub_group_clustered_reduce_logical_or(in[gid], " CLUSTER_SIZE_STR ");\n"
- "}\n";
-
-static const char *redxor_clustered_logical_source =
- "__kernel void test_redxor_clustered_logical(const __global Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " xy[gid].w = 0;\n"
- " if ( sizeof(in[gid]) == "
- "sizeof(sub_group_clustered_reduce_logical_xor(in[gid], " CLUSTER_SIZE_STR
- ")))\n"
- " {xy[gid].w = sizeof(in[gid]);}\n"
- " out[gid] = "
- "sub_group_clustered_reduce_logical_xor(in[gid], " CLUSTER_SIZE_STR ");\n"
- "}\n";
-
+std::string sub_group_clustered_reduce_source = R"(
+__kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out,
+ uint cluster_size) {
+ Type r;
+ int gid = get_global_id(0);
+ XY(xy,gid);
+ xy[gid].w = 0;
+ Type v = in[gid];
+ if (sizeof(in[gid]) == sizeof(%s(v, 1))) {
+ xy[gid].w = sizeof(in[gid]);
+ }
+ switch (cluster_size) {
+ case 1: r = %s(v, 1); break;
+ case 2: r = %s(v, 2); break;
+ case 4: r = %s(v, 4); break;
+ case 8: r = %s(v, 8); break;
+ case 16: r = %s(v, 16); break;
+ case 32: r = %s(v, 32); break;
+ case 64: r = %s(v, 64); break;
+ case 128: r = %s(v, 128); break;
+ }
+ out[gid] = r;
+}
+)";
// DESCRIPTION:
// Test for reduce cluster functions
template <typename Ty, ArithmeticOp operation> struct RED_CLU
{
+ static void log_test(const WorkGroupParams &test_params,
+ const char *extra_text)
+ {
+ log_info(" sub_group_clustered_reduce_%s(%s, %d bytes) ...%s\n",
+ operation_names(operation), TypeManager<Ty>::name(),
+ sizeof(Ty), extra_text);
+ }
+
static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
{
int nw = test_params.local_workgroup_size;
int ns = test_params.subgroup_size;
int ng = test_params.global_workgroup_size;
ng = ng / nw;
- log_info(" sub_group_clustered_reduce_%s(%s, %d bytes) ...\n",
- operation_names(operation), TypeManager<Ty>::name(),
- sizeof(Ty));
- genrand<Ty, operation>(x, t, m, ns, nw, ng);
+ generate_inputs<Ty, operation>(x, t, m, ns, nw, ng);
}
- static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
- const WorkGroupParams &test_params)
+ static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+ const WorkGroupParams &test_params)
{
int nw = test_params.local_workgroup_size;
int ns = test_params.subgroup_size;
@@ -219,34 +102,34 @@ template <typename Ty, ArithmeticOp operation> struct RED_CLU
{
int ii = j * ns;
int n = ii + ns > nw ? nw - ii : ns;
- int midx = 4 * ii + 2;
std::vector<Ty> clusters_results;
- int clusters_counter = ns / CLUSTER_SIZE;
+ int clusters_counter = ns / test_params.cluster_size;
clusters_results.resize(clusters_counter);
// Compute target
Ty tr = mx[ii];
for (int i = 0; i < n; ++i)
{
- if (i % CLUSTER_SIZE == 0)
+ if (i % test_params.cluster_size == 0)
tr = mx[ii + i];
else
tr = calculate<Ty>(tr, mx[ii + i], operation);
- clusters_results[i / CLUSTER_SIZE] = tr;
+ clusters_results[i / test_params.cluster_size] = tr;
}
// Check result
for (int i = 0; i < n; ++i)
{
Ty rr = my[ii + i];
- tr = clusters_results[i / CLUSTER_SIZE];
+ tr = clusters_results[i / test_params.cluster_size];
if (!compare(rr, tr))
{
log_error(
- "ERROR: sub_group_clustered_reduce_%s(%s) mismatch "
- "for local id %d in sub group %d in group %d\n",
+ "ERROR: sub_group_clustered_reduce_%s(%s, %u) "
+ "mismatch for local id %d in sub group %d in group "
+ "%d\n",
operation_names(operation), TypeManager<Ty>::name(),
- i, j, k);
+ test_params.cluster_size, i, j, k);
return TEST_FAIL;
}
}
@@ -256,9 +139,6 @@ template <typename Ty, ArithmeticOp operation> struct RED_CLU
y += nw;
m += 4 * nw;
}
- log_info(" sub_group_clustered_reduce_%s(%s, %d bytes) ... passed\n",
- operation_names(operation), TypeManager<Ty>::name(),
- sizeof(Ty));
return TEST_PASS;
}
};
@@ -267,34 +147,34 @@ template <typename T>
int run_cluster_red_add_max_min_mul_for_type(RunTestForType rft)
{
int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::add_>>(
- "test_redadd_clustered", redadd_clustered_source);
+ "sub_group_clustered_reduce_add");
error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::max_>>(
- "test_redmax_clustered", redmax_clustered_source);
+ "sub_group_clustered_reduce_max");
error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::min_>>(
- "test_redmin_clustered", redmin_clustered_source);
+ "sub_group_clustered_reduce_min");
error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::mul_>>(
- "test_redmul_clustered", redmul_clustered_source);
+ "sub_group_clustered_reduce_mul");
return error;
}
template <typename T> int run_cluster_and_or_xor_for_type(RunTestForType rft)
{
int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::and_>>(
- "test_redand_clustered", redand_clustered_source);
+ "sub_group_clustered_reduce_and");
error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::or_>>(
- "test_redor_clustered", redor_clustered_source);
+ "sub_group_clustered_reduce_or");
error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::xor_>>(
- "test_redxor_clustered", redxor_clustered_source);
+ "sub_group_clustered_reduce_xor");
return error;
}
template <typename T>
int run_cluster_logical_and_or_xor_for_type(RunTestForType rft)
{
int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_and>>(
- "test_redand_clustered_logical", redand_clustered_logical_source);
+ "sub_group_clustered_reduce_logical_and");
error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_or>>(
- "test_redor_clustered_logical", redor_clustered_logical_source);
+ "sub_group_clustered_reduce_logical_or");
error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_xor>>(
- "test_redxor_clustered_logical", redxor_clustered_logical_source);
+ "sub_group_clustered_reduce_logical_xor");
return error;
}
@@ -305,13 +185,17 @@ int test_subgroup_functions_clustered_reduce(cl_device_id device,
cl_command_queue queue,
int num_elements)
{
- std::vector<std::string> required_extensions = {
- "cl_khr_subgroup_clustered_reduce"
- };
+ if (!is_extension_available(device, "cl_khr_subgroup_clustered_reduce"))
+ {
+ log_info("cl_khr_subgroup_clustered_reduce is not supported on this "
+ "device, skipping test.\n");
+ return TEST_SKIPPED_ITSELF;
+ }
+
constexpr size_t global_work_size = 2000;
constexpr size_t local_work_size = 200;
- WorkGroupParams test_params(global_work_size, local_work_size,
- required_extensions);
+ WorkGroupParams test_params(global_work_size, local_work_size, -1, 3);
+ test_params.save_kernel_source(sub_group_clustered_reduce_source);
RunTestForType rft(device, context, queue, num_elements, test_params);
int error = run_cluster_red_add_max_min_mul_for_type<cl_int>(rft);
diff --git a/test_conformance/subgroups/test_subgroup_extended_types.cpp b/test_conformance/subgroups/test_subgroup_extended_types.cpp
index 98401b8e..c9e6bb61 100644
--- a/test_conformance/subgroups/test_subgroup_extended_types.cpp
+++ b/test_conformance/subgroups/test_subgroup_extended_types.cpp
@@ -24,30 +24,30 @@ namespace {
template <typename T> int run_broadcast_for_extended_type(RunTestForType rft)
{
int error = rft.run_impl<T, BC<T, SubgroupsBroadcastOp::broadcast>>(
- "test_bcast", bcast_source);
+ "sub_group_broadcast");
return error;
}
template <typename T> int run_scan_reduction_for_type(RunTestForType rft)
{
- int error = rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>("test_redadd",
- redadd_source);
- error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>("test_redmax",
- redmax_source);
- error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>("test_redmin",
- redmin_source);
- error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>("test_scinadd",
- scinadd_source);
- error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>("test_scinmax",
- scinmax_source);
- error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>("test_scinmin",
- scinmin_source);
- error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>("test_scexadd",
- scexadd_source);
- error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>("test_scexmax",
- scexmax_source);
- error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>("test_scexmin",
- scexmin_source);
+ int error =
+ rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>("sub_group_reduce_add");
+ error |=
+ rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>("sub_group_reduce_max");
+ error |=
+ rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>("sub_group_reduce_min");
+ error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>(
+ "sub_group_scan_inclusive_add");
+ error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>(
+ "sub_group_scan_inclusive_max");
+ error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>(
+ "sub_group_scan_inclusive_min");
+ error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>(
+ "sub_group_scan_exclusive_add");
+ error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>(
+ "sub_group_scan_exclusive_max");
+ error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>(
+ "sub_group_scan_exclusive_min");
return error;
}
@@ -59,15 +59,21 @@ int test_subgroup_functions_extended_types(cl_device_id device,
cl_command_queue queue,
int num_elements)
{
- std::vector<std::string> required_extensions = {
- "cl_khr_subgroup_extended_types"
- };
+ if (!is_extension_available(device, "cl_khr_subgroup_extended_types"))
+ {
+ log_info("cl_khr_subgroup_extended_types is not supported on this "
+ "device, skipping test.\n");
+ return TEST_SKIPPED_ITSELF;
+ }
+
constexpr size_t global_work_size = 2000;
constexpr size_t local_work_size = 200;
- WorkGroupParams test_params(global_work_size, local_work_size,
- required_extensions);
- RunTestForType rft(device, context, queue, num_elements, test_params);
+ WorkGroupParams test_params(global_work_size, local_work_size);
+ test_params.save_kernel_source(sub_group_reduction_scan_source);
+ test_params.save_kernel_source(sub_group_generic_source,
+ "sub_group_broadcast");
+ RunTestForType rft(device, context, queue, num_elements, test_params);
int error = run_broadcast_for_extended_type<cl_uint2>(rft);
error |= run_broadcast_for_extended_type<subgroups::cl_uint3>(rft);
error |= run_broadcast_for_extended_type<cl_uint4>(rft);
@@ -102,22 +108,26 @@ int test_subgroup_functions_extended_types(cl_device_id device,
error |= run_broadcast_for_extended_type<cl_double8>(rft);
error |= run_broadcast_for_extended_type<cl_double16>(rft);
+ error |= run_broadcast_for_extended_type<cl_ushort>(rft);
error |= run_broadcast_for_extended_type<cl_ushort2>(rft);
error |= run_broadcast_for_extended_type<subgroups::cl_ushort3>(rft);
error |= run_broadcast_for_extended_type<cl_ushort4>(rft);
error |= run_broadcast_for_extended_type<cl_ushort8>(rft);
error |= run_broadcast_for_extended_type<cl_ushort16>(rft);
+ error |= run_broadcast_for_extended_type<cl_short>(rft);
error |= run_broadcast_for_extended_type<cl_short2>(rft);
error |= run_broadcast_for_extended_type<subgroups::cl_short3>(rft);
error |= run_broadcast_for_extended_type<cl_short4>(rft);
error |= run_broadcast_for_extended_type<cl_short8>(rft);
error |= run_broadcast_for_extended_type<cl_short16>(rft);
+ error |= run_broadcast_for_extended_type<cl_uchar>(rft);
error |= run_broadcast_for_extended_type<cl_uchar2>(rft);
error |= run_broadcast_for_extended_type<subgroups::cl_uchar3>(rft);
error |= run_broadcast_for_extended_type<cl_uchar4>(rft);
error |= run_broadcast_for_extended_type<cl_uchar8>(rft);
error |= run_broadcast_for_extended_type<cl_uchar16>(rft);
+ error |= run_broadcast_for_extended_type<cl_char>(rft);
error |= run_broadcast_for_extended_type<cl_char2>(rft);
error |= run_broadcast_for_extended_type<subgroups::cl_char3>(rft);
error |= run_broadcast_for_extended_type<cl_char4>(rft);
diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
index eb46ff09..02fc507b 100644
--- a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
+++ b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
@@ -20,333 +20,25 @@
namespace {
-static const char *scinadd_non_uniform_source = R"(
- __kernel void test_scinadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
+std::string sub_group_non_uniform_arithmetic_source = R"(
+ __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) {
int gid = get_global_id(0);
XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_inclusive_add(in[gid]);
- }
- }
-)";
-
-static const char *scinmax_non_uniform_source = R"(
- __kernel void test_scinmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_inclusive_max(in[gid]);
- }
- }
-)";
-
-static const char *scinmin_non_uniform_source = R"(
- __kernel void test_scinmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_inclusive_min(in[gid]);
- }
- }
-)";
-
-static const char *scinmul_non_uniform_source = R"(
- __kernel void test_scinmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_inclusive_mul(in[gid]);
- }
- }
-)";
-
-static const char *scinand_non_uniform_source = R"(
- __kernel void test_scinand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_inclusive_and(in[gid]);
- }
- }
-)";
-
-static const char *scinor_non_uniform_source = R"(
- __kernel void test_scinor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_inclusive_or(in[gid]);
- }
- }
-)";
-
-static const char *scinxor_non_uniform_source = R"(
- __kernel void test_scinxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_inclusive_xor(in[gid]);
- }
- }
-)";
-
-static const char *scinand_non_uniform_logical_source = R"(
- __kernel void test_scinand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_inclusive_logical_and(in[gid]);
- }
- }
-)";
-
-static const char *scinor_non_uniform_logical_source = R"(
- __kernel void test_scinor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_inclusive_logical_or(in[gid]);
- }
- }
-)";
-
-static const char *scinxor_non_uniform_logical_source = R"(
- __kernel void test_scinxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_inclusive_logical_xor(in[gid]);
- }
- }
-)";
-
-static const char *scexadd_non_uniform_source = R"(
- __kernel void test_scexadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_exclusive_add(in[gid]);
- }
- }
-)";
-
-static const char *scexmax_non_uniform_source = R"(
- __kernel void test_scexmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_exclusive_max(in[gid]);
- }
- }
-)";
-
-static const char *scexmin_non_uniform_source = R"(
- __kernel void test_scexmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_exclusive_min(in[gid]);
- }
- }
-)";
-
-static const char *scexmul_non_uniform_source = R"(
- __kernel void test_scexmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_exclusive_mul(in[gid]);
- }
- }
-)";
-
-static const char *scexand_non_uniform_source = R"(
- __kernel void test_scexand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_exclusive_and(in[gid]);
- }
- }
-)";
-
-static const char *scexor_non_uniform_source = R"(
- __kernel void test_scexor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_exclusive_or(in[gid]);
- }
- }
-)";
-
-static const char *scexxor_non_uniform_source = R"(
- __kernel void test_scexxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_exclusive_xor(in[gid]);
- }
- }
-)";
-
-static const char *scexand_non_uniform_logical_source = R"(
- __kernel void test_scexand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_exclusive_logical_and(in[gid]);
- }
- }
-)";
-
-static const char *scexor_non_uniform_logical_source = R"(
- __kernel void test_scexor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_exclusive_logical_or(in[gid]);
- }
- }
-)";
-
-static const char *scexxor_non_uniform_logical_source = R"(
- __kernel void test_scexxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_scan_exclusive_logical_xor(in[gid]);
- }
- }
-)";
-
-static const char *redadd_non_uniform_source = R"(
- __kernel void test_redadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_reduce_add(in[gid]);
- }
- }
-)";
-
-static const char *redmax_non_uniform_source = R"(
- __kernel void test_redmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_reduce_max(in[gid]);
- }
- }
-)";
-
-static const char *redmin_non_uniform_source = R"(
- __kernel void test_redmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_reduce_min(in[gid]);
- }
- }
-)";
-
-static const char *redmul_non_uniform_source = R"(
- __kernel void test_redmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_reduce_mul(in[gid]);
- }
- }
-)";
-
-static const char *redand_non_uniform_source = R"(
- __kernel void test_redand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_reduce_and(in[gid]);
- }
- }
-)";
-
-static const char *redor_non_uniform_source = R"(
- __kernel void test_redor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_reduce_or(in[gid]);
- }
- }
-)";
-
-static const char *redxor_non_uniform_source = R"(
- __kernel void test_redxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_reduce_xor(in[gid]);
- }
- }
-)";
-
-static const char *redand_non_uniform_logical_source = R"(
- __kernel void test_redand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_reduce_logical_and(in[gid]);
- }
- }
-)";
-
-static const char *redor_non_uniform_logical_source = R"(
- __kernel void test_redor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_reduce_logical_or(in[gid]);
- }
- }
-)";
-
-static const char *redxor_non_uniform_logical_source = R"(
- __kernel void test_redxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- int elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_reduce_logical_xor(in[gid]);
- }
+ uint subgroup_local_id = get_sub_group_local_id();
+ uint elect_work_item = 1 << (subgroup_local_id % 32);
+ uint work_item_mask;
+ if(subgroup_local_id < 32) {
+ work_item_mask = work_item_mask_vector.x;
+ } else if(subgroup_local_id < 64) {
+ work_item_mask = work_item_mask_vector.y;
+ } else if(subgroup_local_id < 96) {
+ work_item_mask = work_item_mask_vector.z;
+ } else if(subgroup_local_id < 128) {
+ work_item_mask = work_item_mask_vector.w;
+ }
+ if (elect_work_item & work_item_mask){
+ out[gid] = %s(in[gid]);
+ }
}
)";
@@ -354,52 +46,52 @@ template <typename T>
int run_functions_add_mul_max_min_for_type(RunTestForType rft)
{
int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>(
- "test_scinadd_non_uniform", scinadd_non_uniform_source);
+ "sub_group_non_uniform_scan_inclusive_add");
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::mul_>>(
- "test_scinmul_non_uniform", scinmul_non_uniform_source);
+ "sub_group_non_uniform_scan_inclusive_mul");
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>(
- "test_scinmax_non_uniform", scinmax_non_uniform_source);
+ "sub_group_non_uniform_scan_inclusive_max");
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>(
- "test_scinmin_non_uniform", scinmin_non_uniform_source);
+ "sub_group_non_uniform_scan_inclusive_min");
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>(
- "test_scexadd_non_uniform", scexadd_non_uniform_source);
+ "sub_group_non_uniform_scan_exclusive_add");
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::mul_>>(
- "test_scexmul_non_uniform", scexmul_non_uniform_source);
+ "sub_group_non_uniform_scan_exclusive_mul");
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>(
- "test_scexmax_non_uniform", scexmax_non_uniform_source);
+ "sub_group_non_uniform_scan_exclusive_max");
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>(
- "test_scexmin_non_uniform", scexmin_non_uniform_source);
+ "sub_group_non_uniform_scan_exclusive_min");
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>(
- "test_redadd_non_uniform", redadd_non_uniform_source);
+ "sub_group_non_uniform_reduce_add");
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::mul_>>(
- "test_redmul_non_uniform", redmul_non_uniform_source);
+ "sub_group_non_uniform_reduce_mul");
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>(
- "test_redmax_non_uniform", redmax_non_uniform_source);
+ "sub_group_non_uniform_reduce_max");
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>(
- "test_redmin_non_uniform", redmin_non_uniform_source);
+ "sub_group_non_uniform_reduce_min");
return error;
}
template <typename T> int run_functions_and_or_xor_for_type(RunTestForType rft)
{
int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::and_>>(
- "test_scinand_non_uniform", scinand_non_uniform_source);
+ "sub_group_non_uniform_scan_inclusive_and");
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::or_>>(
- "test_scinor_non_uniform", scinor_non_uniform_source);
+ "sub_group_non_uniform_scan_inclusive_or");
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::xor_>>(
- "test_scinxor_non_uniform", scinxor_non_uniform_source);
+ "sub_group_non_uniform_scan_inclusive_xor");
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::and_>>(
- "test_scexand_non_uniform", scexand_non_uniform_source);
+ "sub_group_non_uniform_scan_exclusive_and");
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::or_>>(
- "test_scexor_non_uniform", scexor_non_uniform_source);
+ "sub_group_non_uniform_scan_exclusive_or");
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::xor_>>(
- "test_scexxor_non_uniform", scexxor_non_uniform_source);
+ "sub_group_non_uniform_scan_exclusive_xor");
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::and_>>(
- "test_redand_non_uniform", redand_non_uniform_source);
+ "sub_group_non_uniform_reduce_and");
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::or_>>(
- "test_redor_non_uniform", redor_non_uniform_source);
+ "sub_group_non_uniform_reduce_or");
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::xor_>>(
- "test_redxor_non_uniform", redxor_non_uniform_source);
+ "sub_group_non_uniform_reduce_xor");
return error;
}
@@ -407,23 +99,23 @@ template <typename T>
int run_functions_logical_and_or_xor_for_type(RunTestForType rft)
{
int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_and>>(
- "test_scinand_non_uniform_logical", scinand_non_uniform_logical_source);
+ "sub_group_non_uniform_scan_inclusive_logical_and");
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_or>>(
- "test_scinor_non_uniform_logical", scinor_non_uniform_logical_source);
+ "sub_group_non_uniform_scan_inclusive_logical_or");
error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_xor>>(
- "test_scinxor_non_uniform_logical", scinxor_non_uniform_logical_source);
+ "sub_group_non_uniform_scan_inclusive_logical_xor");
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_and>>(
- "test_scexand_non_uniform_logical", scexand_non_uniform_logical_source);
+ "sub_group_non_uniform_scan_exclusive_logical_and");
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_or>>(
- "test_scexor_non_uniform_logical", scexor_non_uniform_logical_source);
+ "sub_group_non_uniform_scan_exclusive_logical_or");
error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_xor>>(
- "test_scexxor_non_uniform_logical", scexxor_non_uniform_logical_source);
+ "sub_group_non_uniform_scan_exclusive_logical_xor");
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_and>>(
- "test_redand_non_uniform_logical", redand_non_uniform_logical_source);
+ "sub_group_non_uniform_reduce_logical_and");
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_or>>(
- "test_redor_non_uniform_logical", redor_non_uniform_logical_source);
+ "sub_group_non_uniform_reduce_logical_or");
error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_xor>>(
- "test_redxor_non_uniform_logical", redxor_non_uniform_logical_source);
+ "sub_group_non_uniform_reduce_logical_xor");
return error;
}
@@ -434,17 +126,18 @@ int test_subgroup_functions_non_uniform_arithmetic(cl_device_id device,
cl_command_queue queue,
int num_elements)
{
- std::vector<std::string> required_extensions = {
- "cl_khr_subgroup_non_uniform_arithmetic"
- };
- std::vector<uint32_t> masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555,
- 0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00,
- 0x00ffff00, 0x80000000, 0xaaaaaaaa };
+ if (!is_extension_available(device,
+ "cl_khr_subgroup_non_uniform_arithmetic"))
+ {
+ log_info("cl_khr_subgroup_non_uniform_arithmetic is not supported on "
+ "this device, skipping test.\n");
+ return TEST_SKIPPED_ITSELF;
+ }
constexpr size_t global_work_size = 2000;
constexpr size_t local_work_size = 200;
- WorkGroupParams test_params(global_work_size, local_work_size,
- required_extensions, masks);
+ WorkGroupParams test_params(global_work_size, local_work_size, 3);
+ test_params.save_kernel_source(sub_group_non_uniform_arithmetic_source);
RunTestForType rft(device, context, queue, num_elements, test_params);
int error = run_functions_add_mul_max_min_for_type<cl_int>(rft);
@@ -470,4 +163,4 @@ int test_subgroup_functions_non_uniform_arithmetic(cl_device_id device,
error |= run_functions_logical_and_or_xor_for_type<cl_int>(rft);
return error;
-} \ No newline at end of file
+}
diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
index 2b00b4dd..3be1ba30 100644
--- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
+++ b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
@@ -22,31 +22,27 @@ namespace {
template <typename T, NonUniformVoteOp operation> struct VOTE
{
+ static void log_test(const WorkGroupParams &test_params,
+ const char *extra_text)
+ {
+ log_info(" sub_group_%s%s(%s)...%s\n",
+ (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_",
+ operation_names(operation), TypeManager<T>::name(),
+ extra_text);
+ }
+
static void gen(T *x, T *t, cl_int *m, const WorkGroupParams &test_params)
{
int i, ii, j, k, n;
int nw = test_params.local_workgroup_size;
int ns = test_params.subgroup_size;
int ng = test_params.global_workgroup_size;
- uint32_t work_items_mask = test_params.work_items_mask;
int nj = (nw + ns - 1) / ns;
int non_uniform_size = ng % nw;
ng = ng / nw;
int last_subgroup_size = 0;
ii = 0;
- log_info(" sub_group_%s%s... \n",
- (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_",
- operation_names(operation));
-
- log_info(" test params: global size = %d local size = %d subgroups "
- "size = %d work item mask = 0x%x data type (%s)\n",
- test_params.global_workgroup_size, nw, ns, work_items_mask,
- TypeManager<T>::name());
- if (non_uniform_size)
- {
- log_info(" non uniform work group size mode ON\n");
- }
if (operation == NonUniformVoteOp::elect) return;
for (k = 0; k < ng; ++k)
@@ -92,14 +88,13 @@ template <typename T, NonUniformVoteOp operation> struct VOTE
}
}
- static int chk(T *x, T *y, T *mx, T *my, cl_int *m,
- const WorkGroupParams &test_params)
+ static test_status chk(T *x, T *y, T *mx, T *my, cl_int *m,
+ const WorkGroupParams &test_params)
{
int ii, i, j, k, n;
int nw = test_params.local_workgroup_size;
int ns = test_params.subgroup_size;
int ng = test_params.global_workgroup_size;
- uint32_t work_items_mask = test_params.work_items_mask;
int nj = (nw + ns - 1) / ns;
cl_int tr, rr;
int non_uniform_size = ng % nw;
@@ -141,8 +136,7 @@ template <typename T, NonUniformVoteOp operation> struct VOTE
std::set<int> active_work_items;
for (i = 0; i < n; ++i)
{
- uint32_t check_work_item = 1 << (i % 32);
- if (work_items_mask & check_work_item)
+ if (test_params.work_items_mask.test(i))
{
active_work_items.insert(i);
switch (operation)
@@ -172,34 +166,28 @@ template <typename T, NonUniformVoteOp operation> struct VOTE
}
if (active_work_items.empty())
{
- log_info(" no one workitem acitve... in workgroup id = %d "
- "subgroup id = %d\n",
- k, j);
+ continue;
}
- else
+ auto lowest_active = active_work_items.begin();
+ for (const int &active_work_item : active_work_items)
{
- auto lowest_active = active_work_items.begin();
- for (const int &active_work_item : active_work_items)
+ i = active_work_item;
+ if (operation == NonUniformVoteOp::elect)
{
- i = active_work_item;
- if (operation == NonUniformVoteOp::elect)
- {
- i == *lowest_active ? tr = 1 : tr = 0;
- }
+ i == *lowest_active ? tr = 1 : tr = 0;
+ }
- // normalize device values on host, non zero set 1.
- rr = compare_ordered<T>(my[ii + i], 0) ? 0 : 1;
+ // normalize device values on host, non zero set 1.
+ rr = compare_ordered<T>(my[ii + i], 0) ? 0 : 1;
- if (rr != tr)
- {
- log_error("ERROR: sub_group_%s() \n",
- operation_names(operation));
- log_error(
- "mismatch for work item %d sub group %d in "
- "work group %d. Expected: %d Obtained: %d\n",
- i, j, k, tr, rr);
- return TEST_FAIL;
- }
+ if (rr != tr)
+ {
+ log_error("ERROR: sub_group_%s() \n",
+ operation_names(operation));
+ log_error("mismatch for work item %d sub group %d in "
+ "work group %d. Expected: %d Obtained: %d\n",
+ i, j, k, tr, rr);
+ return TEST_FAIL;
}
}
}
@@ -209,52 +197,50 @@ template <typename T, NonUniformVoteOp operation> struct VOTE
m += 4 * nw;
}
- log_info(" sub_group_%s%s... passed\n",
- (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_",
- operation_names(operation));
return TEST_PASS;
}
};
-static const char *elect_source = R"(
- __kernel void test_elect(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_elect();
- }
- }
-)";
-
-static const char *non_uniform_any_source = R"(
- __kernel void test_non_uniform_any(const __global Type *in, __global int4 *xy, __global Type *out) {
- int gid = get_global_id(0);
- XY(xy,gid);
- uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_any(in[gid]);
- }
- }
-)";
-static const char *non_uniform_all_source = R"(
- __kernel void test_non_uniform_all(const __global Type *in, __global int4 *xy, __global Type *out) {
+std::string sub_group_elect_source = R"(
+ __kernel void test_sub_group_elect(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) {
int gid = get_global_id(0);
XY(xy,gid);
- uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_all(in[gid]);
- }
+ uint subgroup_local_id = get_sub_group_local_id();
+ uint elect_work_item = 1 << (subgroup_local_id % 32);
+ uint work_item_mask;
+ if(subgroup_local_id < 32) {
+ work_item_mask = work_item_mask_vector.x;
+ } else if(subgroup_local_id < 64) {
+ work_item_mask = work_item_mask_vector.y;
+ } else if(subgroup_local_id < 96) {
+ work_item_mask = work_item_mask_vector.z;
+ } else if(subgroup_local_id < 128) {
+ work_item_mask = work_item_mask_vector.w;
+ }
+ if (elect_work_item & work_item_mask){
+ out[gid] = sub_group_elect();
+ }
}
)";
-static const char *non_uniform_all_equal_source = R"(
- __kernel void test_non_uniform_all_equal(const __global Type *in, __global int4 *xy, __global Type *out) {
+std::string sub_group_non_uniform_any_all_all_equal_source = R"(
+ __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) {
int gid = get_global_id(0);
XY(xy,gid);
- uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
- if (elect_work_item & WORK_ITEMS_MASK){
- out[gid] = sub_group_non_uniform_all_equal(in[gid]);
+ uint subgroup_local_id = get_sub_group_local_id();
+ uint elect_work_item = 1 << (subgroup_local_id % 32);
+ uint work_item_mask;
+ if(subgroup_local_id < 32) {
+ work_item_mask = work_item_mask_vector.x;
+ } else if(subgroup_local_id < 64) {
+ work_item_mask = work_item_mask_vector.y;
+ } else if(subgroup_local_id < 96) {
+ work_item_mask = work_item_mask_vector.z;
+ } else if(subgroup_local_id < 128) {
+ work_item_mask = work_item_mask_vector.w;
+ }
+ if (elect_work_item & work_item_mask){
+ out[gid] = %s(in[gid]);
}
}
)";
@@ -262,7 +248,7 @@ static const char *non_uniform_all_equal_source = R"(
template <typename T> int run_vote_all_equal_for_type(RunTestForType rft)
{
int error = rft.run_impl<T, VOTE<T, NonUniformVoteOp::all_equal>>(
- "test_non_uniform_all_equal", non_uniform_all_equal_source);
+ "sub_group_non_uniform_all_equal");
return error;
}
}
@@ -272,17 +258,19 @@ int test_subgroup_functions_non_uniform_vote(cl_device_id device,
cl_command_queue queue,
int num_elements)
{
- std::vector<std::string> required_extensions = {
- "cl_khr_subgroup_non_uniform_vote"
- };
+ if (!is_extension_available(device, "cl_khr_subgroup_non_uniform_vote"))
+ {
+ log_info("cl_khr_subgroup_non_uniform_vote is not supported on this "
+ "device, skipping test.\n");
+ return TEST_SKIPPED_ITSELF;
+ }
- std::vector<uint32_t> masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555,
- 0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00,
- 0x00ffff00, 0x80000000 };
constexpr size_t global_work_size = 170;
constexpr size_t local_work_size = 64;
- WorkGroupParams test_params(global_work_size, local_work_size,
- required_extensions, masks);
+ WorkGroupParams test_params(global_work_size, local_work_size, 3);
+ test_params.save_kernel_source(
+ sub_group_non_uniform_any_all_all_equal_source);
+ test_params.save_kernel_source(sub_group_elect_source, "sub_group_elect");
RunTestForType rft(device, context, queue, num_elements, test_params);
int error = run_vote_all_equal_for_type<cl_int>(rft);
@@ -294,10 +282,10 @@ int test_subgroup_functions_non_uniform_vote(cl_device_id device,
error |= run_vote_all_equal_for_type<subgroups::cl_half>(rft);
error |= rft.run_impl<cl_int, VOTE<cl_int, NonUniformVoteOp::all>>(
- "test_non_uniform_all", non_uniform_all_source);
+ "sub_group_non_uniform_all");
error |= rft.run_impl<cl_int, VOTE<cl_int, NonUniformVoteOp::elect>>(
- "test_elect", elect_source);
+ "sub_group_elect");
error |= rft.run_impl<cl_int, VOTE<cl_int, NonUniformVoteOp::any>>(
- "test_non_uniform_any", non_uniform_any_source);
+ "sub_group_non_uniform_any");
return error;
}
diff --git a/test_conformance/subgroups/test_subgroup_rotate.cpp b/test_conformance/subgroups/test_subgroup_rotate.cpp
new file mode 100644
index 00000000..db0f48eb
--- /dev/null
+++ b/test_conformance/subgroups/test_subgroup_rotate.cpp
@@ -0,0 +1,109 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "subhelpers.h"
+#include "subgroup_common_kernels.h"
+#include "subgroup_common_templates.h"
+#include "harness/conversions.h"
+#include "harness/typeWrappers.h"
+
+namespace {
+
+template <typename T> int run_rotate_for_type(RunTestForType rft)
+{
+ int error = rft.run_impl<T, SHF<T, ShuffleOp::rotate>>("sub_group_rotate");
+ return error;
+}
+
+std::string sub_group_clustered_rotate_source = R"(
+ __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out,
+ uint cluster_size) {
+ Type r;
+ int gid = get_global_id(0);
+ XY(xy,gid);
+ Type x = in[gid];
+ int delta = xy[gid].z;
+ switch (cluster_size) {
+ case 1: r = %s(x, delta, 1); break;
+ case 2: r = %s(x, delta, 2); break;
+ case 4: r = %s(x, delta, 4); break;
+ case 8: r = %s(x, delta, 8); break;
+ case 16: r = %s(x, delta, 16); break;
+ case 32: r = %s(x, delta, 32); break;
+ case 64: r = %s(x, delta, 64); break;
+ case 128: r = %s(x, delta, 128); break;
+ }
+ out[gid] = r;
+ }
+)";
+
+template <typename T> int run_clustered_rotate_for_type(RunTestForType rft)
+{
+ int error = rft.run_impl<T, SHF<T, ShuffleOp::clustered_rotate>>(
+ "sub_group_clustered_rotate");
+ return error;
+}
+
+}
+
+int test_subgroup_functions_rotate(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ if (!is_extension_available(device, "cl_khr_subgroup_rotate"))
+ {
+ log_info("cl_khr_subgroup_rotate is not supported on this device, "
+ "skipping test.\n");
+ return TEST_SKIPPED_ITSELF;
+ }
+
+ constexpr size_t global_work_size = 2000;
+ constexpr size_t local_work_size = 200;
+ WorkGroupParams test_params(global_work_size, local_work_size);
+ test_params.save_kernel_source(sub_group_generic_source);
+ RunTestForType rft(device, context, queue, num_elements, test_params);
+
+ int error = run_rotate_for_type<cl_int>(rft);
+ error |= run_rotate_for_type<cl_uint>(rft);
+ error |= run_rotate_for_type<cl_long>(rft);
+ error |= run_rotate_for_type<cl_ulong>(rft);
+ error |= run_rotate_for_type<cl_short>(rft);
+ error |= run_rotate_for_type<cl_ushort>(rft);
+ error |= run_rotate_for_type<cl_char>(rft);
+ error |= run_rotate_for_type<cl_uchar>(rft);
+ error |= run_rotate_for_type<cl_float>(rft);
+ error |= run_rotate_for_type<cl_double>(rft);
+ error |= run_rotate_for_type<subgroups::cl_half>(rft);
+
+ WorkGroupParams test_params_clustered(global_work_size, local_work_size, -1,
+ 3);
+ test_params_clustered.save_kernel_source(sub_group_clustered_rotate_source);
+ RunTestForType rft_clustered(device, context, queue, num_elements,
+ test_params_clustered);
+
+ error |= run_clustered_rotate_for_type<cl_int>(rft_clustered);
+ error |= run_clustered_rotate_for_type<cl_uint>(rft_clustered);
+ error |= run_clustered_rotate_for_type<cl_long>(rft_clustered);
+ error |= run_clustered_rotate_for_type<cl_ulong>(rft_clustered);
+ error |= run_clustered_rotate_for_type<cl_short>(rft_clustered);
+ error |= run_clustered_rotate_for_type<cl_ushort>(rft_clustered);
+ error |= run_clustered_rotate_for_type<cl_char>(rft_clustered);
+ error |= run_clustered_rotate_for_type<cl_uchar>(rft_clustered);
+ error |= run_clustered_rotate_for_type<cl_float>(rft_clustered);
+ error |= run_clustered_rotate_for_type<cl_double>(rft_clustered);
+ error |= run_clustered_rotate_for_type<subgroups::cl_half>(rft_clustered);
+
+ return error;
+}
diff --git a/test_conformance/subgroups/test_subgroup_shuffle.cpp b/test_conformance/subgroups/test_subgroup_shuffle.cpp
index 049f0982..56231cbf 100644
--- a/test_conformance/subgroups/test_subgroup_shuffle.cpp
+++ b/test_conformance/subgroups/test_subgroup_shuffle.cpp
@@ -15,38 +15,19 @@
//
#include "procs.h"
#include "subhelpers.h"
+#include "subgroup_common_kernels.h"
#include "subgroup_common_templates.h"
#include "harness/typeWrappers.h"
#include <bitset>
namespace {
-static const char* shuffle_xor_source =
- "__kernel void test_sub_group_shuffle_xor(const __global Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " Type x = in[gid];\n"
- " out[gid] = sub_group_shuffle_xor(x, xy[gid].z);"
- "}\n";
-
-static const char* shuffle_source =
- "__kernel void test_sub_group_shuffle(const __global Type *in, __global "
- "int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " Type x = in[gid];\n"
- " out[gid] = sub_group_shuffle(x, xy[gid].z);"
- "}\n";
-
template <typename T> int run_shuffle_for_type(RunTestForType rft)
{
- int error = rft.run_impl<T, SHF<T, ShuffleOp::shuffle>>(
- "test_sub_group_shuffle", shuffle_source);
+ int error =
+ rft.run_impl<T, SHF<T, ShuffleOp::shuffle>>("sub_group_shuffle");
error |= rft.run_impl<T, SHF<T, ShuffleOp::shuffle_xor>>(
- "test_sub_group_shuffle_xor", shuffle_xor_source);
+ "sub_group_shuffle_xor");
return error;
}
@@ -55,11 +36,17 @@ template <typename T> int run_shuffle_for_type(RunTestForType rft)
int test_subgroup_functions_shuffle(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
{
- std::vector<std::string> required_extensions{ "cl_khr_subgroup_shuffle" };
+ if (!is_extension_available(device, "cl_khr_subgroup_shuffle"))
+ {
+ log_info("cl_khr_subgroup_shuffle is not supported on this device, "
+ "skipping test.\n");
+ return TEST_SKIPPED_ITSELF;
+ }
+
constexpr size_t global_work_size = 2000;
constexpr size_t local_work_size = 200;
- WorkGroupParams test_params(global_work_size, local_work_size,
- required_extensions);
+ WorkGroupParams test_params(global_work_size, local_work_size);
+ test_params.save_kernel_source(sub_group_generic_source);
RunTestForType rft(device, context, queue, num_elements, test_params);
int error = run_shuffle_for_type<cl_int>(rft);
diff --git a/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp b/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp
index 6000c970..caa1dccc 100644
--- a/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp
+++ b/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp
@@ -15,37 +15,19 @@
//
#include "procs.h"
#include "subhelpers.h"
+#include "subgroup_common_kernels.h"
#include "subgroup_common_templates.h"
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
namespace {
-static const char* shuffle_down_source =
- "__kernel void test_sub_group_shuffle_down(const __global Type *in, "
- "__global int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " Type x = in[gid];\n"
- " out[gid] = sub_group_shuffle_down(x, xy[gid].z);"
- "}\n";
-static const char* shuffle_up_source =
- "__kernel void test_sub_group_shuffle_up(const __global Type *in, __global "
- "int4 *xy, __global Type *out)\n"
- "{\n"
- " int gid = get_global_id(0);\n"
- " XY(xy,gid);\n"
- " Type x = in[gid];\n"
- " out[gid] = sub_group_shuffle_up(x, xy[gid].z);"
- "}\n";
-
template <typename T> int run_shuffle_relative_for_type(RunTestForType rft)
{
- int error = rft.run_impl<T, SHF<T, ShuffleOp::shuffle_up>>(
- "test_sub_group_shuffle_up", shuffle_up_source);
+ int error =
+ rft.run_impl<T, SHF<T, ShuffleOp::shuffle_up>>("sub_group_shuffle_up");
error |= rft.run_impl<T, SHF<T, ShuffleOp::shuffle_down>>(
- "test_sub_group_shuffle_down", shuffle_down_source);
+ "sub_group_shuffle_down");
return error;
}
@@ -56,13 +38,17 @@ int test_subgroup_functions_shuffle_relative(cl_device_id device,
cl_command_queue queue,
int num_elements)
{
- std::vector<std::string> required_extensions = {
- "cl_khr_subgroup_shuffle_relative"
- };
+ if (!is_extension_available(device, "cl_khr_subgroup_shuffle_relative"))
+ {
+ log_info("cl_khr_subgroup_shuffle_relative is not supported on this "
+ "device, skipping test.\n");
+ return TEST_SKIPPED_ITSELF;
+ }
+
constexpr size_t global_work_size = 2000;
constexpr size_t local_work_size = 200;
- WorkGroupParams test_params(global_work_size, local_work_size,
- required_extensions);
+ WorkGroupParams test_params(global_work_size, local_work_size);
+ test_params.save_kernel_source(sub_group_generic_source);
RunTestForType rft(device, context, queue, num_elements, test_params);
int error = run_shuffle_relative_for_type<cl_int>(rft);
diff --git a/test_conformance/subgroups/test_workitem.cpp b/test_conformance/subgroups/test_workitem.cpp
index 7ffa6a7c..b69f3138 100644
--- a/test_conformance/subgroups/test_workitem.cpp
+++ b/test_conformance/subgroups/test_workitem.cpp
@@ -16,6 +16,7 @@
#include "procs.h"
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
+#include <CL/cl.h>
struct get_test_data
{
@@ -251,8 +252,21 @@ int test_work_item_functions(cl_device_id device, cl_context context,
global = local * 5;
- // Make sure we have a flexible range
- global += 3 * local / 4;
+ // Non-uniform work-groups are an optional feature from 3.0 onward.
+ cl_bool device_supports_non_uniform_wg = CL_TRUE;
+ if (get_device_cl_version(device) >= Version(3, 0))
+ {
+ error = clGetDeviceInfo(
+ device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, sizeof(cl_bool),
+ &device_supports_non_uniform_wg, nullptr);
+ test_error(error, "clGetDeviceInfo failed");
+ }
+
+ if (device_supports_non_uniform_wg)
+ {
+ // Make sure we have a flexible range
+ global += 3 * local / 4;
+ }
// Collect the data
memset((void *)&result, 0xf0, sizeof(result));
@@ -327,4 +341,4 @@ int test_work_item_functions_ext(cl_device_id device, cl_context context,
return test_work_item_functions(device, context, queue, num_elements,
false);
-} \ No newline at end of file
+}
diff --git a/test_conformance/submission_details_template.txt b/test_conformance/submission_details_template.txt
index 9d276a62..ff624837 100644
--- a/test_conformance/submission_details_template.txt
+++ b/test_conformance/submission_details_template.txt
@@ -81,6 +81,12 @@ Platform Version:
#
Tests version:
+# Commit SHAs (7-digit) of any cherry-picked patches subsequent to tagged
+# version. Any patches included must apply without conflicts to the tagged
+# version in the order listed.
+#
+Patches:
+
# Implementations that support cl_khr_icd are required to use a loader to run
# the tests and document the loader that was used.
#
diff --git a/test_conformance/vectors/test_step.cpp b/test_conformance/vectors/test_step.cpp
index 2f6ad187..089bad2f 100644
--- a/test_conformance/vectors/test_step.cpp
+++ b/test_conformance/vectors/test_step.cpp
@@ -172,6 +172,8 @@ int test_step_internal(cl_device_id deviceID, cl_context context,
destroyClState(pClState);
return -1;
}
+
+ clStateDestroyProgramAndKernel(pClState);
}
}
diff --git a/test_conformance/vulkan/CMakeLists.txt b/test_conformance/vulkan/CMakeLists.txt
new file mode 100644
index 00000000..4f43172a
--- /dev/null
+++ b/test_conformance/vulkan/CMakeLists.txt
@@ -0,0 +1,50 @@
+set (MODULE_NAME VULKAN)
+
+if(WIN32)
+ list(APPEND CLConform_LIBRARIES vulkan-1)
+else(WIN32)
+ list(APPEND CLConform_LIBRARIES vulkan dl)
+endif(WIN32)
+set(CMAKE_CXX_FLAGS "-fpermissive")
+if(WIN32)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVK_USE_PLATFORM_WIN32_KHR")
+endif(WIN32)
+
+set (CLConform_VULKAN_LIBRARIES_DIR "${VULKAN_LIB_DIR}")
+
+link_directories(${CLConform_VULKAN_LIBRARIES_DIR})
+
+list(APPEND CLConform_INCLUDE_DIR ${VULKAN_INCLUDE_DIR})
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+include_directories (${CLConform_INCLUDE_DIR})
+
+set (${MODULE_NAME}_SOURCES
+ main.cpp
+ test_vulkan_interop_buffer.cpp
+ test_vulkan_interop_image.cpp
+ test_vulkan_api_consistency.cpp
+ test_vulkan_platform_device_info.cpp
+ vulkan_interop_common/vulkan_wrapper.cpp
+ vulkan_interop_common/vulkan_interop_common.cpp
+ vulkan_interop_common/opencl_vulkan_wrapper.cpp
+ vulkan_interop_common/vulkan_utility.cpp
+ vulkan_interop_common/vulkan_list_map.cpp
+ ../../test_common/harness/genericThread.cpp
+ ../../test_common/harness/errorHelpers.cpp
+ ../../test_common/harness/testHarness.cpp
+ ../../test_common/harness/kernelHelpers.cpp
+ ../../test_common/harness/mt19937.cpp
+ ../../test_common/harness/msvc9.c
+ ../../test_common/harness/parseParameters.cpp
+ ../../test_common/harness/deviceInfo.cpp
+ ../../test_common/harness/crc32.cpp
+ )
+
+set_source_files_properties(
+ ${${MODULE_NAME}_SOURCES}
+ PROPERTIES LANGUAGE CXX)
+include_directories("./vulkan_interop_common/")
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp
new file mode 100644
index 00000000..2eeb0c36
--- /dev/null
+++ b/test_conformance/vulkan/main.cpp
@@ -0,0 +1,346 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <math.h>
+#include <string.h>
+
+#if !defined(__APPLE__)
+#include <CL/cl.h>
+#else
+#include <OpenCL/cl.h>
+#endif
+
+
+#include "procs.h"
+#include "harness/testHarness.h"
+#include "harness/parseParameters.h"
+#include "harness/deviceInfo.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+#include <vulkan_interop_common.hpp>
+#include <vulkan_wrapper.hpp>
+
+#define BUFFERSIZE 3000
+
+static void params_reset()
+{
+ numCQ = 1;
+ multiImport = false;
+ multiCtx = false;
+}
+
+extern int test_buffer_common(cl_device_id device_, cl_context context_,
+ cl_command_queue queue_, int numElements_);
+extern int test_image_common(cl_device_id device_, cl_context context_,
+ cl_command_queue queue_, int numElements_);
+
+int test_buffer_single_queue(cl_device_id device_, cl_context context_,
+ cl_command_queue queue_, int numElements_)
+{
+ params_reset();
+ log_info("RUNNING TEST WITH ONE QUEUE...... \n\n");
+ return test_buffer_common(device_, context_, queue_, numElements_);
+}
+int test_buffer_multiple_queue(cl_device_id device_, cl_context context_,
+ cl_command_queue queue_, int numElements_)
+{
+ params_reset();
+ numCQ = 2;
+ log_info("RUNNING TEST WITH TWO QUEUE...... \n\n");
+ return test_buffer_common(device_, context_, queue_, numElements_);
+}
+int test_buffer_multiImport_sameCtx(cl_device_id device_, cl_context context_,
+ cl_command_queue queue_, int numElements_)
+{
+ params_reset();
+ multiImport = true;
+ log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT "
+ "IN SAME CONTEXT...... \n\n");
+ return test_buffer_common(device_, context_, queue_, numElements_);
+}
+int test_buffer_multiImport_diffCtx(cl_device_id device_, cl_context context_,
+ cl_command_queue queue_, int numElements_)
+{
+ params_reset();
+ multiImport = true;
+ multiCtx = true;
+ log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT "
+ "IN DIFFERENT CONTEXT...... \n\n");
+ return test_buffer_common(device_, context_, queue_, numElements_);
+}
+int test_image_single_queue(cl_device_id device_, cl_context context_,
+ cl_command_queue queue_, int numElements_)
+{
+ params_reset();
+ log_info("RUNNING TEST WITH ONE QUEUE...... \n\n");
+ return test_image_common(device_, context_, queue_, numElements_);
+}
+int test_image_multiple_queue(cl_device_id device_, cl_context context_,
+ cl_command_queue queue_, int numElements_)
+{
+ params_reset();
+ numCQ = 2;
+ log_info("RUNNING TEST WITH TWO QUEUE...... \n\n");
+ return test_image_common(device_, context_, queue_, numElements_);
+}
+
+test_definition test_list[] = { ADD_TEST(buffer_single_queue),
+ ADD_TEST(buffer_multiple_queue),
+ ADD_TEST(buffer_multiImport_sameCtx),
+ ADD_TEST(buffer_multiImport_diffCtx),
+ ADD_TEST(image_single_queue),
+ ADD_TEST(image_multiple_queue),
+ ADD_TEST(consistency_external_buffer),
+ ADD_TEST(consistency_external_image),
+ ADD_TEST(consistency_external_semaphore),
+ ADD_TEST(platform_info),
+ ADD_TEST(device_info) };
+
+const int test_num = ARRAY_SIZE(test_list);
+
+cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+char *choosen_platform_name = NULL;
+cl_platform_id platform = NULL;
+cl_int choosen_platform_index = -1;
+char platform_name[1024] = "";
+cl_platform_id select_platform = NULL;
+char *extensions = NULL;
+size_t extensionSize = 0;
+cl_uint num_devices = 0;
+cl_uint device_no = 0;
+cl_device_id *devices;
+const size_t bufsize = BUFFERSIZE;
+char buf[BUFFERSIZE];
+cl_uchar uuid[CL_UUID_SIZE_KHR];
+unsigned int numCQ;
+bool multiImport;
+bool multiCtx;
+bool debug_trace = false;
+bool useSingleImageKernel = false;
+bool useDeviceLocal = false;
+bool disableNTHandleType = false;
+bool enableOffset = false;
+bool non_dedicated = false;
+
+static void printUsage(const char *execName)
+{
+ const char *p = strrchr(execName, '/');
+ if (p != NULL) execName = p + 1;
+
+ log_info("Usage: %s [test_names] [options]\n", execName);
+ log_info("Test names:\n");
+ for (int i = 0; i < test_num; i++)
+ {
+ log_info("\t%s\n", test_list[i].name);
+ }
+ log_info("\n");
+ log_info("Options:\n");
+ log_info("\t--debug_trace - Enables additional debug info logging\n");
+ log_info("\t--non_dedicated - Choose dedicated Vs. non_dedicated \n");
+}
+
+size_t parseParams(int argc, const char *argv[], const char **argList)
+{
+ size_t argCount = 1;
+ for (int i = 1; i < argc; i++)
+ {
+ if (argv[i] == NULL) break;
+ if (argv[i][0] == '-')
+ {
+ if (!strcmp(argv[i], "--debug_trace"))
+ {
+ debug_trace = true;
+ }
+ if (!strcmp(argv[i], "--useSingleImageKernel"))
+ {
+ useSingleImageKernel = true;
+ }
+ if (!strcmp(argv[i], "--useDeviceLocal"))
+ {
+ useDeviceLocal = true;
+ }
+ if (!strcmp(argv[i], "--disableNTHandleType"))
+ {
+ disableNTHandleType = true;
+ }
+ if (!strcmp(argv[i], "--enableOffset"))
+ {
+ enableOffset = true;
+ }
+ if (!strcmp(argv[i], "--non_dedicated"))
+ {
+ non_dedicated = true;
+ }
+ if (strcmp(argv[i], "-h") == 0)
+ {
+ printUsage(argv[0]);
+ argCount = 0; // Returning argCount=0 to assert error in main()
+ break;
+ }
+ }
+ else
+ {
+ argList[argCount] = argv[i];
+ argCount++;
+ }
+ }
+ return argCount;
+}
+
+int main(int argc, const char *argv[])
+{
+ int errNum = 0;
+
+ test_start();
+ params_reset();
+
+ if (!checkVkSupport())
+ {
+ log_info("Vulkan supported GPU not found \n");
+ log_info("TEST SKIPPED \n");
+ return 0;
+ }
+
+ VulkanDevice vkDevice;
+
+ cl_device_type requestedDeviceType = CL_DEVICE_TYPE_GPU;
+ char *force_cpu = getenv("CL_DEVICE_TYPE");
+ if (force_cpu != NULL)
+ {
+ if (strcmp(force_cpu, "gpu") == 0
+ || strcmp(force_cpu, "CL_DEVICE_TYPE_GPU") == 0)
+ requestedDeviceType = CL_DEVICE_TYPE_GPU;
+ else if (strcmp(force_cpu, "cpu") == 0
+ || strcmp(force_cpu, "CL_DEVICE_TYPE_CPU") == 0)
+ requestedDeviceType = CL_DEVICE_TYPE_CPU;
+ else if (strcmp(force_cpu, "accelerator") == 0
+ || strcmp(force_cpu, "CL_DEVICE_TYPE_ACCELERATOR") == 0)
+ requestedDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+ else if (strcmp(force_cpu, "CL_DEVICE_TYPE_DEFAULT") == 0)
+ requestedDeviceType = CL_DEVICE_TYPE_DEFAULT;
+ }
+
+ if (requestedDeviceType != CL_DEVICE_TYPE_GPU)
+ {
+ log_info("Vulkan tests can only run on a GPU device.\n");
+ return 0;
+ }
+ gDeviceType = CL_DEVICE_TYPE_GPU;
+
+ const char **argList = (const char **)calloc(argc, sizeof(char *));
+ size_t argCount = parseParams(argc, argv, argList);
+ if (argCount == 0) return 0;
+ // get the platform ID
+ errNum = clGetPlatformIDs(1, &platform, NULL);
+ if (errNum != CL_SUCCESS)
+ {
+ print_error(errNum, "Error: Failed to get platform\n");
+ return errNum;
+ }
+
+ errNum =
+ clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
+ if (CL_SUCCESS != errNum)
+ {
+ print_error(errNum, "clGetDeviceIDs failed in returning of devices\n");
+ return errNum;
+ }
+ devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
+ if (NULL == devices)
+ {
+ print_error(errNum, "Unable to allocate memory for devices\n");
+ return CL_OUT_OF_HOST_MEMORY;
+ }
+ errNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices,
+ NULL);
+ if (CL_SUCCESS != errNum)
+ {
+ print_error(errNum, "Failed to get deviceID.\n");
+ return errNum;
+ }
+ for (device_no = 0; device_no < num_devices; device_no++)
+ {
+ errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0,
+ NULL, &extensionSize);
+ if (CL_SUCCESS != errNum)
+ {
+ log_error("Error in clGetDeviceInfo for getting "
+ "device_extension size....\n");
+ return errNum;
+ }
+ extensions = (char *)malloc(extensionSize);
+ if (NULL == extensions)
+ {
+ log_error("Unable to allocate memory for extensions\n");
+ return CL_OUT_OF_HOST_MEMORY;
+ }
+ errNum =
+ clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS,
+ extensionSize, extensions, NULL /*&extensionSize*/);
+ if (CL_SUCCESS != errNum)
+ {
+ print_error(errNum,
+ "Error in clGetDeviceInfo for getting "
+ "device_extension\n");
+ return errNum;
+ }
+ errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR,
+ CL_UUID_SIZE_KHR, uuid, &extensionSize);
+ if (CL_SUCCESS != errNum)
+ {
+ print_error(errNum, "clGetDeviceInfo failed with error\n ");
+ return errNum;
+ }
+ errNum =
+ memcmp(uuid, vkDevice.getPhysicalDevice().getUUID(), VK_UUID_SIZE);
+ if (errNum == 0)
+ {
+ break;
+ }
+ }
+ if (device_no >= num_devices)
+ {
+ fprintf(stderr,
+ "OpenCL error: "
+ "No Vulkan-OpenCL Interop capable GPU found.\n");
+ }
+ if (!(is_extension_available(devices[device_no], "cl_khr_external_memory")
+ && is_extension_available(devices[device_no],
+ "cl_khr_external_semaphore")))
+ {
+ log_info("Device does not support cl_khr_external_memory "
+ "or cl_khr_external_semaphore\n");
+ log_info(" TEST SKIPPED\n");
+ return CL_SUCCESS;
+ }
+ init_cl_vk_ext(platform);
+
+ // Execute tests.
+ // Note: don't use the entire harness, because we have a different way of
+ // obtaining the device (via the context)
+ errNum = parseAndCallCommandLineTests(argCount, argList, devices[device_no],
+ test_num, test_list, true, 0, 1024);
+ return errNum;
+}
diff --git a/test_conformance/vulkan/procs.h b/test_conformance/vulkan/procs.h
new file mode 100644
index 00000000..37bf7869
--- /dev/null
+++ b/test_conformance/vulkan/procs.h
@@ -0,0 +1,38 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "harness/mt19937.h"
+
+extern int test_vulkan_interop_buffer(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_vulkan_interop_image(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_consistency_external_buffer(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_consistency_external_image(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_consistency_external_semaphore(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_platform_info(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_device_info(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements);
diff --git a/test_conformance/vulkan/shaders/buffer.comp b/test_conformance/vulkan/shaders/buffer.comp
new file mode 100644
index 00000000..d8756f92
--- /dev/null
+++ b/test_conformance/vulkan/shaders/buffer.comp
@@ -0,0 +1,28 @@
+#version 450
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable
+
+#define MAX_BUFFERS 5
+
+layout(binding = 0) buffer Params
+{
+ uint32_t numBuffers;
+ uint32_t bufferSize;
+ uint32_t interBufferOffset;
+};
+layout(binding = 1) buffer Buffer
+{
+ uint8_t ptr[];
+} bufferPtrList[MAX_BUFFERS];
+layout(local_size_x = 512) in;
+void main() {
+ for (uint32_t bufIdx = 0; bufIdx < numBuffers; bufIdx++) {
+ uint32_t ptrIdx = gl_GlobalInvocationID.x;
+ uint32_t limit = bufferSize;
+ while (ptrIdx < limit) {
+ bufferPtrList[bufIdx].ptr[ptrIdx]++;
+ ptrIdx += (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
+ }
+ }
+} \ No newline at end of file
diff --git a/test_conformance/vulkan/shaders/buffer.spv b/test_conformance/vulkan/shaders/buffer.spv
new file mode 100644
index 00000000..685523ba
--- /dev/null
+++ b/test_conformance/vulkan/shaders/buffer.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D.comp b/test_conformance/vulkan/shaders/image2D.comp
new file mode 100644
index 00000000..42fa2f73
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D.comp
@@ -0,0 +1,31 @@
+#version 450
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable
+
+#define MAX_2D_IMAGES 5
+#define MAX_2D_IMAGE_MIP_LEVELS 11
+#define MAX_2D_IMAGE_DESCRIPTORS MAX_2D_IMAGES * MAX_2D_IMAGE_MIP_LEVELS
+
+layout(binding = 0) buffer Params
+{
+ uint32_t numImage2DDescriptors;
+};
+layout(binding = 1, rgba32f ) uniform image2D image2DList[ MAX_2D_IMAGE_DESCRIPTORS ];
+layout(local_size_x = 32, local_size_y = 32) in;
+void main() {
+ uvec3 numThreads = gl_NumWorkGroups * gl_WorkGroupSize;
+ for (uint32_t image2DIdx = 0; image2DIdx < numImage2DDescriptors; image2DIdx++) {
+ ivec2 imageDim = imageSize(image2DList[image2DIdx]);
+ uint32_t heightBy2 = imageDim.y / 2;
+ for (uint32_t row = gl_GlobalInvocationID.y; row < heightBy2; row += numThreads.y) {
+ for (uint32_t col = gl_GlobalInvocationID.x; col < imageDim.x; col += numThreads.x) {
+ ivec2 coordsA = ivec2(col, row);
+ ivec2 coordsB = ivec2(col, imageDim.y - row - 1);
+ vec4 dataA = imageLoad(image2DList[image2DIdx], coordsA);
+ vec4 dataB = imageLoad(image2DList[image2DIdx], coordsB);
+ imageStore(image2DList[image2DIdx], coordsA, dataB);
+ imageStore(image2DList[image2DIdx], coordsB, dataA);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/test_conformance/vulkan/shaders/image2D_r16i.spv b/test_conformance/vulkan/shaders/image2D_r16i.spv
new file mode 100644
index 00000000..00c5c283
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r16i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r16ui.spv b/test_conformance/vulkan/shaders/image2D_r16ui.spv
new file mode 100644
index 00000000..87514d9f
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r16ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r32f.spv b/test_conformance/vulkan/shaders/image2D_r32f.spv
new file mode 100644
index 00000000..e82c9c19
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r32f.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r32i.spv b/test_conformance/vulkan/shaders/image2D_r32i.spv
new file mode 100644
index 00000000..7ea8d26f
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r32i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r32ui.spv b/test_conformance/vulkan/shaders/image2D_r32ui.spv
new file mode 100644
index 00000000..dbcdbc5f
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r32ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r8i.spv b/test_conformance/vulkan/shaders/image2D_r8i.spv
new file mode 100644
index 00000000..1a641475
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r8i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r8ui.spv b/test_conformance/vulkan/shaders/image2D_r8ui.spv
new file mode 100644
index 00000000..a90ccf98
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r8ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg16i.spv b/test_conformance/vulkan/shaders/image2D_rg16i.spv
new file mode 100644
index 00000000..07996173
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg16i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg16ui.spv b/test_conformance/vulkan/shaders/image2D_rg16ui.spv
new file mode 100644
index 00000000..f73e096b
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg16ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg32f.spv b/test_conformance/vulkan/shaders/image2D_rg32f.spv
new file mode 100644
index 00000000..1489660e
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg32f.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg32i.spv b/test_conformance/vulkan/shaders/image2D_rg32i.spv
new file mode 100644
index 00000000..b7d302f4
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg32i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg32ui.spv b/test_conformance/vulkan/shaders/image2D_rg32ui.spv
new file mode 100644
index 00000000..6cf2f1b8
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg32ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg8i.spv b/test_conformance/vulkan/shaders/image2D_rg8i.spv
new file mode 100644
index 00000000..a71b9bf0
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg8i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg8ui.spv b/test_conformance/vulkan/shaders/image2D_rg8ui.spv
new file mode 100644
index 00000000..2aca9290
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg8ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba16i.spv b/test_conformance/vulkan/shaders/image2D_rgba16i.spv
new file mode 100644
index 00000000..0cb95dfd
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba16i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba16ui.spv b/test_conformance/vulkan/shaders/image2D_rgba16ui.spv
new file mode 100644
index 00000000..84c3d3db
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba16ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba32f.spv b/test_conformance/vulkan/shaders/image2D_rgba32f.spv
new file mode 100644
index 00000000..35136c58
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba32f.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba32i.spv b/test_conformance/vulkan/shaders/image2D_rgba32i.spv
new file mode 100644
index 00000000..4d1ae581
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba32i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba32ui.spv b/test_conformance/vulkan/shaders/image2D_rgba32ui.spv
new file mode 100644
index 00000000..bed86f0c
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba32ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba8i.spv b/test_conformance/vulkan/shaders/image2D_rgba8i.spv
new file mode 100644
index 00000000..edf8c58c
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba8i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba8ui.spv b/test_conformance/vulkan/shaders/image2D_rgba8ui.spv
new file mode 100644
index 00000000..bb9a770c
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba8ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
new file mode 100644
index 00000000..f22ac319
--- /dev/null
+++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
@@ -0,0 +1,568 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <vulkan_interop_common.hpp>
+#include <opencl_vulkan_wrapper.hpp>
+#include <vulkan_wrapper.hpp>
+#if !defined(__APPLE__)
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+#else
+#include <OpenCL/cl.h>
+#include <OpenCL/cl_ext.h>
+#endif
+
+#include <assert.h>
+#include <vector>
+#include <iostream>
+#include <string.h>
+#include "harness/testHarness.h"
+#include "harness/typeWrappers.h"
+#include "harness/deviceInfo.h"
+
+int test_consistency_external_buffer(cl_device_id deviceID, cl_context _context,
+ cl_command_queue _queue, int num_elements)
+{
+ cl_int errNum;
+ VulkanDevice vkDevice;
+ // Context and command queue creation
+ cl_platform_id platform = NULL;
+ cl_context context = NULL;
+ cl_command_queue cmd_queue = NULL;
+
+ cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
+ errNum = clGetPlatformIDs(1, &platform, NULL);
+ test_error(errNum, "Failed to get platform Id");
+
+ contextProperties[1] = (cl_context_properties)platform;
+
+ context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
+ NULL, NULL, &errNum);
+ test_error(errNum, "Unable to create context with properties");
+
+ cmd_queue = clCreateCommandQueue(context, deviceID, 0, &errNum);
+ test_error(errNum, "Unable to create command queue");
+
+ uint32_t bufferSize = 32;
+ cl_device_id devList[] = { deviceID, NULL };
+
+#ifdef _WIN32
+ if (!is_extension_available(devList[0], "cl_khr_external_memory_win32"))
+ {
+ throw std::runtime_error("Device does not support "
+ "cl_khr_external_memory_win32 extension \n");
+ }
+#else
+ if (!is_extension_available(devList[0], "cl_khr_external_memory_opaque_fd"))
+ {
+ throw std::runtime_error(
+ "Device does not support "
+ "cl_khr_external_memory_opaque_fd extension \n");
+ }
+#endif
+
+ VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
+ getSupportedVulkanExternalMemoryHandleTypeList()[0];
+
+ VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024, vkExternalMemoryHandleType);
+ const VulkanMemoryTypeList& memoryTypeList =
+ vkDummyBuffer.getMemoryTypeList();
+
+ VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory(
+ vkDevice, bufferSize, memoryTypeList[0], vkExternalMemoryHandleType);
+ VulkanBufferList vkBufferList(1, vkDevice, bufferSize,
+ vkExternalMemoryHandleType);
+
+ vkDeviceMem->bindBuffer(vkBufferList[0], 0);
+
+ void* handle = NULL;
+ int fd;
+
+ std::vector<cl_mem_properties> extMemProperties{
+ (cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR,
+ (cl_mem_properties)devList[0],
+ (cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR,
+ };
+ cl_external_memory_handle_type_khr type;
+ switch (vkExternalMemoryHandleType)
+ {
+#ifdef _WIN32
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT:
+ handle = vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+ type = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR;
+ errNum = check_external_memory_handle_type(devList[0], type);
+ extMemProperties.push_back((cl_mem_properties)type);
+ extMemProperties.push_back((cl_mem_properties)handle);
+ break;
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+ handle = vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+ type = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR;
+ errNum = check_external_memory_handle_type(devList[0], type);
+ extMemProperties.push_back((cl_mem_properties)type);
+ extMemProperties.push_back((cl_mem_properties)handle);
+ break;
+#else
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD:
+ fd = (int)vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+ type = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR;
+ errNum = check_external_memory_handle_type(devList[0], type);
+ extMemProperties.push_back((cl_mem_properties)type);
+ extMemProperties.push_back((cl_mem_properties)fd);
+ break;
+#endif
+ default:
+ errNum = TEST_FAIL;
+ log_error("Unsupported external memory handle type \n");
+ break;
+ }
+ if (errNum != CL_SUCCESS)
+ {
+ log_error("Checks failed for "
+ "CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR\n");
+ return TEST_FAIL;
+ }
+ extMemProperties.push_back(0);
+
+ clMemWrapper buffer;
+
+ // Passing NULL properties and a valid extMem_desc size
+ buffer = clCreateBufferWithProperties(context, NULL, 1, bufferSize, NULL,
+ &errNum);
+ test_error(errNum, "Unable to create buffer with NULL properties");
+
+ buffer.reset();
+
+ // Passing valid extMemProperties and buffersize
+ buffer = clCreateBufferWithProperties(context, extMemProperties.data(), 1,
+ bufferSize, NULL, &errNum);
+ test_error(errNum, "Unable to create buffer with Properties");
+
+ buffer.reset();
+
+ // Not passing external memory handle
+ std::vector<cl_mem_properties> extMemProperties2{
+#ifdef _WIN32
+ (cl_mem_properties)type,
+ NULL, // Passing NULL handle
+#else
+ (cl_mem_properties)type,
+ (cl_mem_properties)-64, // Passing random invalid fd
+#endif
+ (cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR,
+ (cl_mem_properties)devList[0],
+ (cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR,
+ 0
+ };
+ buffer = clCreateBufferWithProperties(context, extMemProperties2.data(), 1,
+ bufferSize, NULL, &errNum);
+ test_failure_error(errNum, CL_INVALID_VALUE,
+ "Should return CL_INVALID_VALUE ");
+
+ buffer.reset();
+
+ // Passing extMem_desc size = 0 but valid memProperties, CL_INVALID_SIZE
+ // should be returned.
+ buffer = clCreateBufferWithProperties(context, extMemProperties.data(), 1,
+ 0, NULL, &errNum);
+ test_failure_error(errNum, CL_INVALID_BUFFER_SIZE,
+ "Should return CL_INVALID_BUFFER_SIZE");
+
+ return TEST_PASS;
+}
+
+int test_consistency_external_image(cl_device_id deviceID, cl_context _context,
+ cl_command_queue _queue, int num_elements)
+{
+ cl_int errNum;
+ VulkanDevice vkDevice;
+
+ // Context and command queue creation
+ cl_platform_id platform = NULL;
+ cl_context context = NULL;
+ cl_command_queue cmd_queue = NULL;
+
+ cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
+ errNum = clGetPlatformIDs(1, &platform, NULL);
+ test_error(errNum, "Failed to get platform id");
+
+ contextProperties[1] = (cl_context_properties)platform;
+
+ context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
+ NULL, NULL, &errNum);
+ test_error(errNum, "Unable to create context with properties");
+
+ cmd_queue = clCreateCommandQueue(context, deviceID, 0, &errNum);
+ test_error(errNum, "Unable to create command queue");
+
+ cl_device_id devList[] = { deviceID, NULL };
+
+#ifdef _WIN32
+ if (!is_extension_available(devList[0], "cl_khr_external_memory_win32"))
+ {
+ throw std::runtime_error("Device does not support"
+ "cl_khr_external_memory_win32 extension \n");
+ }
+#else
+ if (!is_extension_available(devList[0], "cl_khr_external_memory_opaque_fd"))
+ {
+ throw std::runtime_error(
+ "Device does not support cl_khr_external_memory_opaque_fd "
+ "extension \n");
+ }
+#endif
+ uint32_t width = 256;
+ uint32_t height = 16;
+ cl_image_desc image_desc;
+ memset(&image_desc, 0x0, sizeof(cl_image_desc));
+ cl_image_format img_format = { 0 };
+
+ VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
+ getSupportedVulkanExternalMemoryHandleTypeList()[0];
+ VulkanImage2D* vkImage2D =
+ new VulkanImage2D(vkDevice, VULKAN_FORMAT_R8G8B8A8_UNORM, width, height,
+ 1, vkExternalMemoryHandleType);
+
+ const VulkanMemoryTypeList& memoryTypeList = vkImage2D->getMemoryTypeList();
+ uint64_t totalImageMemSize = vkImage2D->getSize();
+
+ log_info("Memory type index: %lu\n", (uint32_t)memoryTypeList[0]);
+ log_info("Memory type property: %d\n",
+ memoryTypeList[0].getMemoryTypeProperty());
+ log_info("Image size : %d\n", totalImageMemSize);
+
+ VulkanDeviceMemory* vkDeviceMem =
+ new VulkanDeviceMemory(vkDevice, totalImageMemSize, memoryTypeList[0],
+ vkExternalMemoryHandleType);
+ vkDeviceMem->bindImage(*vkImage2D, 0);
+
+ void* handle = NULL;
+ int fd;
+ std::vector<cl_mem_properties> extMemProperties{
+ (cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR,
+ (cl_mem_properties)devList[0],
+ (cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR,
+ };
+ switch (vkExternalMemoryHandleType)
+ {
+#ifdef _WIN32
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT:
+ handle = vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+ errNum = check_external_memory_handle_type(
+ devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR);
+ extMemProperties.push_back(
+ (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR);
+ extMemProperties.push_back((cl_mem_properties)handle);
+ break;
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+ handle = vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+ errNum = check_external_memory_handle_type(
+ devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR);
+ extMemProperties.push_back(
+ (cl_mem_properties)
+ CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR);
+ extMemProperties.push_back((cl_mem_properties)handle);
+ break;
+#else
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD:
+ fd = (int)vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+ errNum = check_external_memory_handle_type(
+ devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
+ extMemProperties.push_back(
+ (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
+ extMemProperties.push_back((cl_mem_properties)fd);
+ break;
+#endif
+ default:
+ errNum = TEST_FAIL;
+ log_error("Unsupported external memory handle type \n");
+ break;
+ }
+ if (errNum != CL_SUCCESS)
+ {
+ log_error("Checks failed for "
+ "CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR\n");
+ return TEST_FAIL;
+ }
+ extMemProperties.push_back(0);
+
+ const VkImageCreateInfo VulkanImageCreateInfo =
+ vkImage2D->getVkImageCreateInfo();
+
+ errNum = getCLImageInfoFromVkImageInfo(
+ &VulkanImageCreateInfo, totalImageMemSize, &img_format, &image_desc);
+ if (errNum != CL_SUCCESS)
+ {
+ log_error("getCLImageInfoFromVkImageInfo failed!!!");
+ return TEST_FAIL;
+ }
+
+ clMemWrapper image;
+
+ // Pass valid properties, image_desc and image_format
+ image = clCreateImageWithProperties(
+ context, extMemProperties.data(), CL_MEM_READ_WRITE, &img_format,
+ &image_desc, NULL /* host_ptr */, &errNum);
+ test_error(errNum, "Unable to create Image with Properties");
+ image.reset();
+
+ // Passing properties, image_desc and image_format all as NULL
+ image = clCreateImageWithProperties(context, NULL, CL_MEM_READ_WRITE, NULL,
+ NULL, NULL, &errNum);
+ test_failure_error(
+ errNum, CL_INVALID_IMAGE_DESCRIPTOR,
+ "Image creation must fail with CL_INVALID_IMAGE_DESCRIPTOR "
+ "when all are passed as NULL");
+
+ image.reset();
+
+ // Passing NULL properties and a valid image_format and image_desc
+ image =
+ clCreateImageWithProperties(context, NULL, CL_MEM_READ_WRITE,
+ &img_format, &image_desc, NULL, &errNum);
+ test_error(errNum,
+ "Unable to create image with NULL properties "
+ "with valid image format and image desc");
+
+ image.reset();
+
+ // Passing image_format as NULL
+ image = clCreateImageWithProperties(context, extMemProperties.data(),
+ CL_MEM_READ_WRITE, NULL, &image_desc,
+ NULL, &errNum);
+ test_failure_error(errNum, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
+ "Image creation must fail with "
+ "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"
+ "when image desc passed as NULL");
+
+ image.reset();
+
+ // Passing image_desc as NULL
+ image = clCreateImageWithProperties(context, extMemProperties.data(),
+ CL_MEM_READ_WRITE, &img_format, NULL,
+ NULL, &errNum);
+ test_failure_error(errNum, CL_INVALID_IMAGE_DESCRIPTOR,
+ "Image creation must fail with "
+ "CL_INVALID_IMAGE_DESCRIPTOR "
+ "when image desc passed as NULL");
+ image.reset();
+
+ return TEST_PASS;
+}
+
+int test_consistency_external_semaphore(cl_device_id deviceID,
+ cl_context _context,
+ cl_command_queue _queue,
+ int num_elements)
+{
+ cl_int errNum;
+ VulkanDevice vkDevice;
+ // Context and command queue creation
+ cl_platform_id platform = NULL;
+ cl_context context = NULL;
+ cl_command_queue cmd_queue = NULL;
+
+ errNum = clGetPlatformIDs(1, &platform, NULL);
+ test_error(errNum, "Failed to get platform Id");
+
+ cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
+
+ contextProperties[1] = (cl_context_properties)platform;
+
+ context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
+ NULL, NULL, &errNum);
+ test_error(errNum, "Unable to create context with properties");
+
+ cmd_queue = clCreateCommandQueue(context, deviceID, 0, &errNum);
+ test_error(errNum, "Unable to create command queue");
+
+ cl_device_id devList[] = { deviceID, NULL };
+
+#ifdef _WIN32
+ if (!is_extension_available(devList[0], "cl_khr_external_semaphore_win32"))
+ {
+ throw std::runtime_error(
+ "Device does not support cl_khr_external_semaphore_win32 "
+ "extension \n");
+ }
+#else
+ if (!is_extension_available(devList[0],
+ "cl_khr_external_semaphore_opaque_fd"))
+ {
+ throw std::runtime_error(
+ "Device does not support "
+ "cl_khr_external_semaphore_opaque_fd extension \n");
+ }
+#endif
+ VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+ getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+ VulkanSemaphore vkVk2Clsemaphore(vkDevice, vkExternalSemaphoreHandleType);
+ VulkanSemaphore vkCl2Vksemaphore(vkDevice, vkExternalSemaphoreHandleType);
+ cl_semaphore_khr clCl2Vksemaphore;
+ cl_semaphore_khr clVk2Clsemaphore;
+
+ void* handle1 = NULL;
+ void* handle2 = NULL;
+ int fd1, fd2;
+ std::vector<cl_semaphore_properties_khr> sema_props1{
+ (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR,
+ (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR,
+ };
+ std::vector<cl_semaphore_properties_khr> sema_props2{
+ (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR,
+ (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR,
+ };
+ switch (vkExternalSemaphoreHandleType)
+ {
+#ifdef _WIN32
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT:
+ log_info(" Opaque NT handles are only supported on Windows\n");
+ handle1 = vkVk2Clsemaphore.getHandle(vkExternalSemaphoreHandleType);
+ handle2 = vkCl2Vksemaphore.getHandle(vkExternalSemaphoreHandleType);
+ errNum = check_external_semaphore_handle_type(
+ devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR);
+ sema_props1.push_back((cl_semaphore_properties_khr)
+ CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR);
+ sema_props1.push_back((cl_semaphore_properties_khr)handle1);
+ sema_props2.push_back((cl_semaphore_properties_khr)
+ CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR);
+ sema_props2.push_back((cl_semaphore_properties_khr)handle2);
+ break;
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+ log_info(" Opaque D3DKMT handles are only supported on Windows\n");
+ handle1 = vkVk2Clsemaphore.getHandle(vkExternalSemaphoreHandleType);
+ handle2 = vkCl2Vksemaphore.getHandle(vkExternalSemaphoreHandleType);
+ errNum = check_external_semaphore_handle_type(
+ devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR);
+ sema_props1.push_back((cl_semaphore_properties_khr)
+ CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR);
+ sema_props1.push_back((cl_semaphore_properties_khr)handle1);
+ sema_props2.push_back((cl_semaphore_properties_khr)
+ CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR);
+ sema_props2.push_back((cl_semaphore_properties_khr)handle2);
+ break;
+#else
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD:
+ log_info(" Opaque file descriptors are not supported on Windows\n");
+ fd1 =
+ (int)vkVk2Clsemaphore.getHandle(vkExternalSemaphoreHandleType);
+ fd2 =
+ (int)vkCl2Vksemaphore.getHandle(vkExternalSemaphoreHandleType);
+ errNum = check_external_semaphore_handle_type(
+ devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR);
+ sema_props1.push_back(
+ (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR);
+ sema_props1.push_back((cl_semaphore_properties_khr)fd1);
+ sema_props2.push_back(
+ (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR);
+ sema_props2.push_back((cl_semaphore_properties_khr)fd2);
+ break;
+#endif
+ default: log_error("Unsupported external memory handle type\n"); break;
+ }
+ if (CL_SUCCESS != errNum)
+ {
+ throw std::runtime_error(
+ "Unsupported external sempahore handle type\n ");
+ }
+ sema_props1.push_back(
+ (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_KHR);
+ sema_props1.push_back((cl_semaphore_properties_khr)devList[0]);
+ sema_props1.push_back(
+ (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_END_KHR);
+ sema_props2.push_back(
+ (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_KHR);
+ sema_props2.push_back((cl_semaphore_properties_khr)devList[0]);
+ sema_props2.push_back(
+ (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_END_KHR);
+ sema_props1.push_back(0);
+ sema_props2.push_back(0);
+
+ // Pass NULL properties
+ cl_semaphore_khr cl_ext_semaphore =
+ clCreateSemaphoreWithPropertiesKHRptr(context, NULL, &errNum);
+ test_failure_error(errNum, CL_INVALID_VALUE,
+ "Semaphore creation must fail with CL_INVALID_VALUE "
+ " when properties are passed as NULL");
+
+
+ // Pass invalid semaphore object to wait
+ errNum =
+ clEnqueueWaitSemaphoresKHRptr(cmd_queue, 1, NULL, NULL, 0, NULL, NULL);
+ test_failure_error(errNum, CL_INVALID_VALUE,
+ "clEnqueueWaitSemaphoresKHR fails with CL_INVALID_VALUE "
+ "when invalid semaphore object is passed");
+
+
+ // Pass invalid semaphore object to signal
+ errNum = clEnqueueSignalSemaphoresKHRptr(cmd_queue, 1, NULL, NULL, 0, NULL,
+ NULL);
+ test_failure_error(
+ errNum, CL_INVALID_VALUE,
+ "clEnqueueSignalSemaphoresKHR fails with CL_INVALID_VALUE"
+ "when invalid semaphore object is passed");
+
+
+ // Create two semaphore objects
+ clVk2Clsemaphore = clCreateSemaphoreWithPropertiesKHRptr(
+ context, sema_props1.data(), &errNum);
+ test_error(errNum,
+ "Unable to create semaphore with valid semaphore properties");
+
+ clCl2Vksemaphore = clCreateSemaphoreWithPropertiesKHRptr(
+ context, sema_props2.data(), &errNum);
+ test_error(errNum,
+ "Unable to create semaphore with valid semaphore properties");
+
+
+ // Call Signal twice consecutively
+ errNum = clEnqueueSignalSemaphoresKHRptr(cmd_queue, 1, &clVk2Clsemaphore,
+ NULL, 0, NULL, NULL);
+ test_error(errNum, "clEnqueueSignalSemaphoresKHRptr failed");
+
+ errNum = clEnqueueSignalSemaphoresKHRptr(cmd_queue, 1, &clCl2Vksemaphore,
+ NULL, 0, NULL, NULL);
+ test_error(errNum,
+ "clEnqueueSignalSemaphoresKHRptr failed for two "
+ "consecutive wait events");
+
+
+ // Call Wait twice consecutively
+ errNum = clEnqueueWaitSemaphoresKHRptr(cmd_queue, 1, &clVk2Clsemaphore,
+ NULL, 0, NULL, NULL);
+ test_error(errNum, "clEnqueueWaitSemaphoresKHRptr failed");
+
+ errNum = clEnqueueWaitSemaphoresKHRptr(cmd_queue, 1, &clCl2Vksemaphore,
+ NULL, 0, NULL, NULL);
+ test_error(errNum,
+ "clEnqueueWaitSemaphoresKHRptr failed for two "
+ " consecutive wait events");
+
+
+ // Pass invalid object to release call
+ errNum = clReleaseSemaphoreKHRptr(NULL);
+ test_failure_error(errNum, CL_INVALID_VALUE,
+ "clReleaseSemaphoreKHRptr fails with "
+ "CL_INVALID_VALUE when NULL semaphore object is passed");
+
+ // Release both semaphore objects
+ errNum = clReleaseSemaphoreKHRptr(clVk2Clsemaphore);
+ test_error(errNum, "clReleaseSemaphoreKHRptr failed");
+
+ errNum = clReleaseSemaphoreKHRptr(clCl2Vksemaphore);
+ test_error(errNum, "clReleaseSemaphoreKHRptr failed");
+
+ return TEST_PASS;
+}
diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
new file mode 100644
index 00000000..9b0bc9de
--- /dev/null
+++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
@@ -0,0 +1,1786 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <vulkan_interop_common.hpp>
+#include <vulkan_wrapper.hpp>
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+#include <assert.h>
+#include <vector>
+#include <iostream>
+#include <string.h>
+#include "harness/errorHelpers.h"
+
+#define MAX_BUFFERS 5
+#define MAX_IMPORTS 5
+#define BUFFERSIZE 3000
+static cl_uchar uuid[CL_UUID_SIZE_KHR];
+static cl_device_id deviceId = NULL;
+
+namespace {
+struct Params
+{
+ uint32_t numBuffers;
+ uint32_t bufferSize;
+ uint32_t interBufferOffset;
+};
+}
+
+const char *kernel_text_numbuffer_1 = " \
+__kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a) { \n\
+ int gid = get_global_id(0); \n\
+ if (gid < bufferSize) { \n\
+ a[gid]++; \n\
+ } \n\
+}";
+
+const char *kernel_text_numbuffer_2 = " \
+__kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a, __global unsigned char *b) { \n\
+ int gid = get_global_id(0); \n\
+ if (gid < bufferSize) { \n\
+ a[gid]++; \n\
+ b[gid]++;\n\
+ } \n\
+}";
+
+const char *kernel_text_numbuffer_4 = " \
+__kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a, __global unsigned char *b, __global unsigned char *c, __global unsigned char *d) { \n\
+ int gid = get_global_id(0); \n\
+ if (gid < bufferSize) { \n\
+ a[gid]++;\n\
+ b[gid]++; \n\
+ c[gid]++; \n\
+ d[gid]++; \n\
+ } \n\
+}";
+
+
+const char *kernel_text_verify = " \
+__kernel void checkKernel(__global unsigned char *ptr, int size, int expVal, __global unsigned char *err) \n\
+{ \n\
+ int idx = get_global_id(0); \n\
+ if ((idx < size) && (*err == 0)) { \n\
+ if (ptr[idx] != expVal){ \n\
+ *err = 1; \n\
+ } \n\
+ } \n\
+}";
+
+int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
+ cl_command_queue &cmd_queue2, cl_kernel *kernel,
+ cl_kernel &verify_kernel, VulkanDevice &vkDevice,
+ uint32_t numBuffers, uint32_t bufferSize)
+{
+ int err = CL_SUCCESS;
+ size_t global_work_size[1];
+ uint8_t *error_2;
+ cl_mem error_1;
+ cl_kernel update_buffer_kernel;
+ cl_kernel kernel_cq;
+ clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
+ clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
+ const char *program_source_const = kernel_text_numbuffer_2;
+ size_t program_source_length = strlen(program_source_const);
+ cl_program program = clCreateProgramWithSource(
+ context, 1, &program_source_const, &program_source_length, &err);
+ err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "Error: Failed to build program \n");
+ return err;
+ }
+ // create the kernel
+ kernel_cq = clCreateKernel(program, "clUpdateBuffer", &err);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "clCreateKernel failed \n");
+ return err;
+ }
+
+ const std::vector<VulkanExternalMemoryHandleType>
+ vkExternalMemoryHandleTypeList =
+ getSupportedVulkanExternalMemoryHandleTypeList();
+ VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+ getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+ VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+ VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+ VulkanQueue &vkQueue = vkDevice.getQueue();
+
+ std::vector<char> vkBufferShader = readFile("buffer.spv");
+
+ VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
+ VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
+ MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+ VulkanDescriptorSetLayout vkDescriptorSetLayout(
+ vkDevice, vkDescriptorSetLayoutBindingList);
+ VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
+ VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
+ vkBufferShaderModule);
+
+ VulkanDescriptorPool vkDescriptorPool(vkDevice,
+ vkDescriptorSetLayoutBindingList);
+ VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
+ vkDescriptorSetLayout);
+
+ clVk2CLExternalSemaphore = new clExternalSemaphore(
+ vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ clCl2VkExternalSemaphore = new clExternalSemaphore(
+ vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+
+ const uint32_t maxIter = innerIterations;
+ VulkanCommandPool vkCommandPool(vkDevice);
+ VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
+
+ VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
+ VulkanDeviceMemory vkParamsDeviceMemory(
+ vkDevice, vkParamsBuffer.getSize(),
+ getVulkanMemoryType(vkDevice,
+ VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
+ vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
+ std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
+ std::vector<clExternalMemory *> externalMemory;
+ for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
+ emhtIdx++)
+ {
+ VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
+ vkExternalMemoryHandleTypeList[emhtIdx];
+ log_info("External memory handle type: %d\n",
+ vkExternalMemoryHandleType);
+
+ VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
+ vkExternalMemoryHandleType);
+ const VulkanMemoryTypeList &memoryTypeList =
+ vkDummyBuffer.getMemoryTypeList();
+
+ for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
+ {
+ const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
+
+ log_info("Memory type index: %d\n", (uint32_t)memoryType);
+ log_info("Memory type property: %d\n",
+ memoryType.getMemoryTypeProperty());
+
+ VulkanBufferList vkBufferList(numBuffers, vkDevice, bufferSize,
+ vkExternalMemoryHandleType);
+
+ for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
+ {
+ vkBufferListDeviceMemory.push_back(
+ new VulkanDeviceMemory(vkDevice, bufferSize, memoryType,
+ vkExternalMemoryHandleType));
+ externalMemory.push_back(new clExternalMemory(
+ vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType,
+ 0, bufferSize, context, deviceId));
+ }
+ cl_mem buffers[MAX_BUFFERS];
+ clFinish(cmd_queue1);
+ Params *params = (Params *)vkParamsDeviceMemory.map();
+ params->numBuffers = numBuffers;
+ params->bufferSize = bufferSize;
+ params->interBufferOffset = 0;
+ vkParamsDeviceMemory.unmap();
+ vkDescriptorSet.update(0, vkParamsBuffer);
+ for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
+ {
+ size_t buffer_size = vkBufferList[bIdx].getSize();
+ vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx],
+ 0);
+ buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer();
+ vkDescriptorSet.update((uint32_t)bIdx + 1, vkBufferList[bIdx]);
+ }
+ vkCommandBuffer.begin();
+ vkCommandBuffer.bindPipeline(vkComputePipeline);
+ vkCommandBuffer.bindDescriptorSets(
+ vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
+ vkCommandBuffer.dispatch(512, 1, 1);
+ vkCommandBuffer.end();
+
+ if (vkBufferList.size() == 2)
+ {
+ update_buffer_kernel = kernel[0];
+ }
+ else if (vkBufferList.size() == 3)
+ {
+ update_buffer_kernel = kernel[1];
+ }
+ else if (vkBufferList.size() == 5)
+ {
+ update_buffer_kernel = kernel[2];
+ }
+ // global work size should be less than or equal to
+ // bufferSizeList[i]
+ global_work_size[0] = bufferSize;
+ for (uint32_t iter = 0; iter < maxIter; iter++)
+ {
+
+ if (iter == 0)
+ {
+ vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+ }
+ else
+ {
+ vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
+ vkVk2CLSemaphore);
+ }
+ clVk2CLExternalSemaphore->wait(cmd_queue1);
+
+ err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t),
+ (void *)&bufferSize);
+ err |= clSetKernelArg(kernel_cq, 0, sizeof(uint32_t),
+ (void *)&bufferSize);
+ err |= clSetKernelArg(kernel_cq, 1, sizeof(cl_mem),
+ (void *)&(buffers[0]));
+
+ for (int i = 0; i < vkBufferList.size() - 1; i++)
+ {
+ err |=
+ clSetKernelArg(update_buffer_kernel, i + 1,
+ sizeof(cl_mem), (void *)&(buffers[i]));
+ }
+
+ err |=
+ clSetKernelArg(kernel_cq, 2, sizeof(cl_mem),
+ (void *)&(buffers[vkBufferList.size() - 1]));
+
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to set arg values for kernel\n");
+ goto CLEANUP;
+ }
+ cl_event first_launch;
+
+ err = clEnqueueNDRangeKernel(cmd_queue1, update_buffer_kernel,
+ 1, NULL, global_work_size, NULL, 0,
+ NULL, &first_launch);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to launch update_buffer_kernel,"
+ "error\n");
+ goto CLEANUP;
+ }
+
+ err = clEnqueueNDRangeKernel(cmd_queue2, kernel_cq, 1, NULL,
+ global_work_size, NULL, 1,
+ &first_launch, NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to launch update_buffer_kernel,"
+ "error\n");
+ goto CLEANUP;
+ }
+
+ if (iter != (maxIter - 1))
+ {
+ clCl2VkExternalSemaphore->signal(cmd_queue2);
+ }
+ }
+ error_2 = (uint8_t *)malloc(sizeof(uint8_t));
+ if (NULL == error_2)
+ {
+ log_error("Not able to allocate memory\n");
+ goto CLEANUP;
+ }
+ clFinish(cmd_queue2);
+ error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+ sizeof(uint8_t), NULL, &err);
+ if (CL_SUCCESS != err)
+ {
+ print_error(err, "Error: clCreateBuffer \n");
+ goto CLEANUP;
+ }
+ uint8_t val = 0;
+ err = clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
+ sizeof(uint8_t), &val, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "Error: Failed read output, error\n");
+ goto CLEANUP;
+ }
+
+ int calc_max_iter;
+ for (int i = 0; i < vkBufferList.size(); i++)
+ {
+ if (i == 0)
+ calc_max_iter = (maxIter * 3);
+ else
+ calc_max_iter = (maxIter * 2);
+ err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
+ (void *)&(buffers[i]));
+ err |=
+ clSetKernelArg(verify_kernel, 1, sizeof(int), &bufferSize);
+ err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
+ &calc_max_iter);
+ err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
+ (void *)&error_1);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to set arg values for "
+ "verify_kernel \n");
+ goto CLEANUP;
+ }
+ err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1, NULL,
+ global_work_size, NULL, 0, NULL,
+ NULL);
+
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to launch verify_kernel,"
+ "error \n");
+ goto CLEANUP;
+ }
+ err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
+ sizeof(uint8_t), error_2, 0, NULL,
+ NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "Error: Failed read output, error \n ");
+ goto CLEANUP;
+ }
+ if (*error_2 == 1)
+ {
+ log_error("&&&& vulkan_opencl_buffer test FAILED\n");
+ goto CLEANUP;
+ }
+ }
+ for (size_t i = 0; i < vkBufferList.size(); i++)
+ {
+ delete vkBufferListDeviceMemory[i];
+ delete externalMemory[i];
+ }
+ vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
+ vkBufferListDeviceMemory.begin()
+ + numBuffers);
+ externalMemory.erase(externalMemory.begin(),
+ externalMemory.begin() + numBuffers);
+ }
+ }
+CLEANUP:
+ for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
+ {
+ if (vkBufferListDeviceMemory[i])
+ {
+ delete vkBufferListDeviceMemory[i];
+ }
+ if (externalMemory[i])
+ {
+ delete externalMemory[i];
+ }
+ }
+ if (program) clReleaseProgram(program);
+ if (kernel_cq) clReleaseKernel(kernel_cq);
+ if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
+ if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+ if (error_2) free(error_2);
+ if (error_1) clReleaseMemObject(error_1);
+
+ return err;
+}
+
+int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
+ cl_kernel *kernel, cl_kernel &verify_kernel,
+ VulkanDevice &vkDevice, uint32_t numBuffers,
+ uint32_t bufferSize)
+{
+ log_info("RUNNING TEST WITH ONE QUEUE...... \n\n");
+ size_t global_work_size[1];
+ uint8_t *error_2;
+ cl_mem error_1;
+ cl_kernel update_buffer_kernel;
+ clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
+ clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
+ int err = CL_SUCCESS;
+
+ const std::vector<VulkanExternalMemoryHandleType>
+ vkExternalMemoryHandleTypeList =
+ getSupportedVulkanExternalMemoryHandleTypeList();
+ VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+ getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+ VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+ VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+ VulkanQueue &vkQueue = vkDevice.getQueue();
+
+ std::vector<char> vkBufferShader = readFile("buffer.spv");
+ VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
+ VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
+ MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+ VulkanDescriptorSetLayout vkDescriptorSetLayout(
+ vkDevice, vkDescriptorSetLayoutBindingList);
+ VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
+ VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
+ vkBufferShaderModule);
+
+ VulkanDescriptorPool vkDescriptorPool(vkDevice,
+ vkDescriptorSetLayoutBindingList);
+ VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
+ vkDescriptorSetLayout);
+
+ clVk2CLExternalSemaphore = new clExternalSemaphore(
+ vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ clCl2VkExternalSemaphore = new clExternalSemaphore(
+ vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ const uint32_t maxIter = innerIterations;
+ VulkanCommandPool vkCommandPool(vkDevice);
+ VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
+
+ VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
+ VulkanDeviceMemory vkParamsDeviceMemory(
+ vkDevice, vkParamsBuffer.getSize(),
+ getVulkanMemoryType(vkDevice,
+ VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
+ vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
+ std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
+ std::vector<clExternalMemory *> externalMemory;
+
+ for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
+ emhtIdx++)
+ {
+ VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
+ vkExternalMemoryHandleTypeList[emhtIdx];
+ log_info("External memory handle type: %d\n",
+ vkExternalMemoryHandleType);
+
+ VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
+ vkExternalMemoryHandleType);
+ const VulkanMemoryTypeList &memoryTypeList =
+ vkDummyBuffer.getMemoryTypeList();
+
+ for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
+ {
+ const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
+
+ log_info("Memory type index: %d\n", (uint32_t)memoryType);
+ log_info("Memory type property: %d\n",
+ memoryType.getMemoryTypeProperty());
+
+ VulkanBufferList vkBufferList(numBuffers, vkDevice, bufferSize,
+ vkExternalMemoryHandleType);
+
+ for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
+ {
+ vkBufferListDeviceMemory.push_back(
+ new VulkanDeviceMemory(vkDevice, bufferSize, memoryType,
+ vkExternalMemoryHandleType));
+ externalMemory.push_back(new clExternalMemory(
+ vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType,
+ 0, bufferSize, context, deviceId));
+ }
+ cl_mem buffers[4];
+ clFinish(cmd_queue1);
+ Params *params = (Params *)vkParamsDeviceMemory.map();
+ params->numBuffers = numBuffers;
+ params->bufferSize = bufferSize;
+ params->interBufferOffset = 0;
+ vkParamsDeviceMemory.unmap();
+ vkDescriptorSet.update(0, vkParamsBuffer);
+ for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
+ {
+ size_t buffer_size = vkBufferList[bIdx].getSize();
+ vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx],
+ 0);
+ buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer();
+ vkDescriptorSet.update((uint32_t)bIdx + 1, vkBufferList[bIdx]);
+ }
+ vkCommandBuffer.begin();
+ vkCommandBuffer.bindPipeline(vkComputePipeline);
+ vkCommandBuffer.bindDescriptorSets(
+ vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
+ vkCommandBuffer.dispatch(512, 1, 1);
+ vkCommandBuffer.end();
+
+ if (vkBufferList.size() == 1)
+ {
+ update_buffer_kernel = kernel[0];
+ }
+ else if (vkBufferList.size() == 2)
+ {
+ update_buffer_kernel = kernel[1];
+ }
+ else if (vkBufferList.size() == 4)
+ {
+ update_buffer_kernel = kernel[2];
+ }
+
+ // global work size should be less than or equal to
+ // bufferSizeList[i]
+ global_work_size[0] = bufferSize;
+
+ for (uint32_t iter = 0; iter < maxIter; iter++)
+ {
+ if (iter == 0)
+ {
+ vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+ }
+ else
+ {
+ vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
+ vkVk2CLSemaphore);
+ }
+ clVk2CLExternalSemaphore->wait(cmd_queue1);
+
+ err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t),
+ (void *)&bufferSize);
+ for (int i = 0; i < vkBufferList.size(); i++)
+ {
+ err |=
+ clSetKernelArg(update_buffer_kernel, i + 1,
+ sizeof(cl_mem), (void *)&(buffers[i]));
+ }
+
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to set arg values for kernel\n");
+ goto CLEANUP;
+ }
+ err = clEnqueueNDRangeKernel(cmd_queue1, update_buffer_kernel,
+ 1, NULL, global_work_size, NULL, 0,
+ NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to launch update_buffer_kernel,"
+ " error\n");
+ goto CLEANUP;
+ }
+ if (iter != (maxIter - 1))
+ {
+ clCl2VkExternalSemaphore->signal(cmd_queue1);
+ }
+ }
+ error_2 = (uint8_t *)malloc(sizeof(uint8_t));
+ if (NULL == error_2)
+ {
+ log_error("Not able to allocate memory\n");
+ goto CLEANUP;
+ }
+
+ error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+ sizeof(uint8_t), NULL, &err);
+ if (CL_SUCCESS != err)
+ {
+ print_error(err, "Error: clCreateBuffer \n");
+ goto CLEANUP;
+ }
+ uint8_t val = 0;
+ err = clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
+ sizeof(uint8_t), &val, 0, NULL, NULL);
+ if (CL_SUCCESS != err)
+ {
+ print_error(err, "Error: clEnqueueWriteBuffer \n");
+ goto CLEANUP;
+ }
+
+ int calc_max_iter = (maxIter * 2);
+ for (int i = 0; i < vkBufferList.size(); i++)
+ {
+ err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
+ (void *)&(buffers[i]));
+ err |=
+ clSetKernelArg(verify_kernel, 1, sizeof(int), &bufferSize);
+ err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
+ &calc_max_iter);
+ err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
+ (void *)&error_1);
+ if (err != CL_SUCCESS)
+ {
+ print_error(
+ err,
+ "Error: Failed to set arg values for verify_kernel \n");
+ goto CLEANUP;
+ }
+ err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1, NULL,
+ global_work_size, NULL, 0, NULL,
+ NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(
+ err, "Error: Failed to launch verify_kernel, error\n");
+ goto CLEANUP;
+ }
+
+ err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
+ sizeof(uint8_t), error_2, 0, NULL,
+ NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "Error: Failed read output, error \n");
+ goto CLEANUP;
+ }
+ if (*error_2 == 1)
+ {
+ log_error("&&&& vulkan_opencl_buffer test FAILED\n");
+ goto CLEANUP;
+ }
+ }
+ for (size_t i = 0; i < vkBufferList.size(); i++)
+ {
+ delete vkBufferListDeviceMemory[i];
+ delete externalMemory[i];
+ }
+ vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
+ vkBufferListDeviceMemory.begin()
+ + numBuffers);
+ externalMemory.erase(externalMemory.begin(),
+ externalMemory.begin() + numBuffers);
+ }
+ }
+CLEANUP:
+ for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
+ {
+ if (vkBufferListDeviceMemory[i])
+ {
+ delete vkBufferListDeviceMemory[i];
+ }
+ if (externalMemory[i])
+ {
+ delete externalMemory[i];
+ }
+ }
+ if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
+ if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+ if (error_2) free(error_2);
+ if (error_1) clReleaseMemObject(error_1);
+ return err;
+}
+
+int run_test_with_multi_import_same_ctx(
+ cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel,
+ cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers,
+ uint32_t bufferSize, uint32_t bufferSizeForOffset)
+{
+ size_t global_work_size[1];
+ uint8_t *error_2;
+ cl_mem error_1;
+ int numImports = numBuffers;
+ cl_kernel update_buffer_kernel[MAX_IMPORTS];
+ clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
+ clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
+ int err = CL_SUCCESS;
+ int calc_max_iter;
+ bool withOffset;
+ uint32_t pBufferSize;
+
+ const std::vector<VulkanExternalMemoryHandleType>
+ vkExternalMemoryHandleTypeList =
+ getSupportedVulkanExternalMemoryHandleTypeList();
+ VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+ getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+ VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+ VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+ VulkanQueue &vkQueue = vkDevice.getQueue();
+
+ std::vector<char> vkBufferShader = readFile("buffer.spv");
+
+ VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
+ VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
+ MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+ VulkanDescriptorSetLayout vkDescriptorSetLayout(
+ vkDevice, vkDescriptorSetLayoutBindingList);
+ VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
+ VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
+ vkBufferShaderModule);
+
+ VulkanDescriptorPool vkDescriptorPool(vkDevice,
+ vkDescriptorSetLayoutBindingList);
+ VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
+ vkDescriptorSetLayout);
+
+ clVk2CLExternalSemaphore = new clExternalSemaphore(
+ vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ clCl2VkExternalSemaphore = new clExternalSemaphore(
+ vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ const uint32_t maxIter = innerIterations;
+ VulkanCommandPool vkCommandPool(vkDevice);
+ VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
+
+ VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
+ VulkanDeviceMemory vkParamsDeviceMemory(
+ vkDevice, vkParamsBuffer.getSize(),
+ getVulkanMemoryType(vkDevice,
+ VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
+ vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
+ std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
+ std::vector<std::vector<clExternalMemory *>> externalMemory;
+
+
+ for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
+ emhtIdx++)
+ {
+ VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
+ vkExternalMemoryHandleTypeList[emhtIdx];
+ log_info("External memory handle type: %d\n",
+ vkExternalMemoryHandleType);
+
+ VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
+ vkExternalMemoryHandleType);
+ const VulkanMemoryTypeList &memoryTypeList =
+ vkDummyBuffer.getMemoryTypeList();
+
+ for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
+ {
+ const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
+
+ log_info("Memory type index: %d\n", (uint32_t)memoryType);
+ log_info("Memory type property: %d\n",
+ memoryType.getMemoryTypeProperty());
+ for (unsigned int withOffset = 0;
+ withOffset <= (unsigned int)enableOffset; withOffset++)
+ {
+ log_info("Running withOffset case %d\n", (uint32_t)withOffset);
+ if (withOffset)
+ {
+ pBufferSize = bufferSizeForOffset;
+ }
+ else
+ {
+ pBufferSize = bufferSize;
+ }
+ cl_mem buffers[MAX_BUFFERS][MAX_IMPORTS];
+ VulkanBufferList vkBufferList(numBuffers, vkDevice, pBufferSize,
+ vkExternalMemoryHandleType);
+ uint32_t interBufferOffset =
+ (uint32_t)(vkBufferList[0].getSize());
+
+ for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
+ {
+ if (withOffset == 0)
+ {
+ vkBufferListDeviceMemory.push_back(
+ new VulkanDeviceMemory(vkDevice, pBufferSize,
+ memoryType,
+ vkExternalMemoryHandleType));
+ }
+ if (withOffset == 1)
+ {
+ uint32_t totalSize =
+ (uint32_t)(vkBufferList.size() * interBufferOffset);
+ vkBufferListDeviceMemory.push_back(
+ new VulkanDeviceMemory(vkDevice, totalSize,
+ memoryType,
+ vkExternalMemoryHandleType));
+ }
+ std::vector<clExternalMemory *> pExternalMemory;
+ for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
+ {
+ pExternalMemory.push_back(new clExternalMemory(
+ vkBufferListDeviceMemory[bIdx],
+ vkExternalMemoryHandleType,
+ withOffset * bIdx * interBufferOffset, pBufferSize,
+ context, deviceId));
+ }
+ externalMemory.push_back(pExternalMemory);
+ }
+
+ clFinish(cmd_queue1);
+ Params *params = (Params *)vkParamsDeviceMemory.map();
+ params->numBuffers = numBuffers;
+ params->bufferSize = pBufferSize;
+ params->interBufferOffset = interBufferOffset * withOffset;
+ vkParamsDeviceMemory.unmap();
+ vkDescriptorSet.update(0, vkParamsBuffer);
+ for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
+ {
+ size_t buffer_size = vkBufferList[bIdx].getSize();
+ vkBufferListDeviceMemory[bIdx]->bindBuffer(
+ vkBufferList[bIdx],
+ bIdx * interBufferOffset * withOffset);
+ for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
+ {
+ buffers[bIdx][cl_bIdx] =
+ externalMemory[bIdx][cl_bIdx]
+ ->getExternalMemoryBuffer();
+ }
+ vkDescriptorSet.update((uint32_t)bIdx + 1,
+ vkBufferList[bIdx]);
+ }
+ vkCommandBuffer.begin();
+ vkCommandBuffer.bindPipeline(vkComputePipeline);
+ vkCommandBuffer.bindDescriptorSets(
+ vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
+ vkCommandBuffer.dispatch(512, 1, 1);
+ vkCommandBuffer.end();
+ for (int i = 0; i < numImports; i++)
+ {
+ update_buffer_kernel[i] = (numBuffers == 1)
+ ? kernel[0]
+ : ((numBuffers == 2) ? kernel[1] : kernel[2]);
+ }
+ // global work size should be less than or equal to
+ // bufferSizeList[i]
+ global_work_size[0] = pBufferSize;
+
+ for (uint32_t iter = 0; iter < maxIter; iter++)
+ {
+ if (iter == 0)
+ {
+ vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+ }
+ else
+ {
+ vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
+ vkVk2CLSemaphore);
+ }
+ clVk2CLExternalSemaphore->wait(cmd_queue1);
+ for (uint8_t launchIter = 0; launchIter < numImports;
+ launchIter++)
+ {
+ err = clSetKernelArg(update_buffer_kernel[launchIter],
+ 0, sizeof(uint32_t),
+ (void *)&pBufferSize);
+ for (int i = 0; i < numBuffers; i++)
+ {
+ err |= clSetKernelArg(
+ update_buffer_kernel[launchIter], i + 1,
+ sizeof(cl_mem),
+ (void *)&(buffers[i][launchIter]));
+ }
+
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to set arg values for "
+ "kernel\n ");
+ goto CLEANUP;
+ }
+ err = clEnqueueNDRangeKernel(
+ cmd_queue1, update_buffer_kernel[launchIter], 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to launch "
+ "update_buffer_kernel, error\n ");
+ goto CLEANUP;
+ }
+ }
+ if (iter != (maxIter - 1))
+ {
+ clCl2VkExternalSemaphore->signal(cmd_queue1);
+ }
+ }
+ error_2 = (uint8_t *)malloc(sizeof(uint8_t));
+ if (NULL == error_2)
+ {
+ log_error("Not able to allocate memory\n");
+ goto CLEANUP;
+ }
+
+ error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+ sizeof(uint8_t), NULL, &err);
+ if (CL_SUCCESS != err)
+ {
+ print_error(err, "Error: clCreateBuffer \n");
+ goto CLEANUP;
+ }
+ uint8_t val = 0;
+ err =
+ clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
+ sizeof(uint8_t), &val, 0, NULL, NULL);
+ if (CL_SUCCESS != err)
+ {
+ print_error(err, "Error: clEnqueueWriteBuffer \n");
+ goto CLEANUP;
+ }
+ calc_max_iter = maxIter * (numBuffers + 1);
+
+ for (int i = 0; i < vkBufferList.size(); i++)
+ {
+ err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
+ (void *)&(buffers[i][0]));
+ err |= clSetKernelArg(verify_kernel, 1, sizeof(int),
+ &pBufferSize);
+ err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
+ &calc_max_iter);
+ err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
+ (void *)&error_1);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to set arg values for "
+ "verify_kernel \n");
+ goto CLEANUP;
+ }
+ err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1,
+ NULL, global_work_size, NULL,
+ 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(
+ err,
+ "Error: Failed to launch verify_kernel, error\n");
+ goto CLEANUP;
+ }
+
+ err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
+ sizeof(uint8_t), error_2, 0, NULL,
+ NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "Error: Failed read output, error \n");
+ goto CLEANUP;
+ }
+ if (*error_2 == 1)
+ {
+ log_error("&&&& vulkan_opencl_buffer test FAILED\n");
+ goto CLEANUP;
+ }
+ }
+ for (size_t i = 0; i < vkBufferList.size(); i++)
+ {
+ for (size_t j = 0; j < numImports; j++)
+ {
+ delete externalMemory[i][j];
+ }
+ }
+ for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
+ {
+ delete vkBufferListDeviceMemory[i];
+ }
+ vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
+ vkBufferListDeviceMemory.end());
+ for (size_t i = 0; i < externalMemory.size(); i++)
+ {
+ externalMemory[i].erase(externalMemory[i].begin(),
+ externalMemory[i].begin()
+ + numBuffers);
+ }
+ externalMemory.clear();
+ }
+ }
+ }
+CLEANUP:
+ for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
+ {
+ if (vkBufferListDeviceMemory[i])
+ {
+ delete vkBufferListDeviceMemory[i];
+ }
+ }
+ for (size_t i = 0; i < externalMemory.size(); i++)
+ {
+ for (size_t j = 0; j < externalMemory[i].size(); j++)
+ {
+ if (externalMemory[i][j])
+ {
+ delete externalMemory[i][j];
+ }
+ }
+ }
+ if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
+ if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+ if (error_2) free(error_2);
+ if (error_1) clReleaseMemObject(error_1);
+ return err;
+}
+
+int run_test_with_multi_import_diff_ctx(
+ cl_context &context, cl_context &context2, cl_command_queue &cmd_queue1,
+ cl_command_queue &cmd_queue2, cl_kernel *kernel1, cl_kernel *kernel2,
+ cl_kernel &verify_kernel, cl_kernel verify_kernel2, VulkanDevice &vkDevice,
+ uint32_t numBuffers, uint32_t bufferSize, uint32_t bufferSizeForOffset)
+{
+ size_t global_work_size[1];
+ uint8_t *error_3;
+ cl_mem error_1;
+ cl_mem error_2;
+ int numImports = numBuffers;
+ cl_kernel update_buffer_kernel1[MAX_IMPORTS];
+ cl_kernel update_buffer_kernel2[MAX_IMPORTS];
+ clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
+ clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
+ clExternalSemaphore *clVk2CLExternalSemaphore2 = NULL;
+ clExternalSemaphore *clCl2VkExternalSemaphore2 = NULL;
+ int err = CL_SUCCESS;
+ int calc_max_iter;
+ bool withOffset;
+ uint32_t pBufferSize;
+
+ const std::vector<VulkanExternalMemoryHandleType>
+ vkExternalMemoryHandleTypeList =
+ getSupportedVulkanExternalMemoryHandleTypeList();
+ VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+ getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+ VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+ VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+ VulkanQueue &vkQueue = vkDevice.getQueue();
+
+ std::vector<char> vkBufferShader = readFile("buffer.spv");
+
+ VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
+ VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
+ MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+ VulkanDescriptorSetLayout vkDescriptorSetLayout(
+ vkDevice, vkDescriptorSetLayoutBindingList);
+ VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
+ VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
+ vkBufferShaderModule);
+
+ VulkanDescriptorPool vkDescriptorPool(vkDevice,
+ vkDescriptorSetLayoutBindingList);
+ VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
+ vkDescriptorSetLayout);
+
+ clVk2CLExternalSemaphore = new clExternalSemaphore(
+ vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ clCl2VkExternalSemaphore = new clExternalSemaphore(
+ vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+
+ clVk2CLExternalSemaphore2 = new clExternalSemaphore(
+ vkVk2CLSemaphore, context2, vkExternalSemaphoreHandleType, deviceId);
+ clCl2VkExternalSemaphore2 = new clExternalSemaphore(
+ vkCl2VkSemaphore, context2, vkExternalSemaphoreHandleType, deviceId);
+
+ const uint32_t maxIter = innerIterations;
+ VulkanCommandPool vkCommandPool(vkDevice);
+ VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
+
+ VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
+ VulkanDeviceMemory vkParamsDeviceMemory(
+ vkDevice, vkParamsBuffer.getSize(),
+ getVulkanMemoryType(vkDevice,
+ VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
+ vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
+ std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
+ std::vector<std::vector<clExternalMemory *>> externalMemory1;
+ std::vector<std::vector<clExternalMemory *>> externalMemory2;
+
+ for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
+ emhtIdx++)
+ {
+ VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
+ vkExternalMemoryHandleTypeList[emhtIdx];
+ log_info("External memory handle type:%d\n",
+ vkExternalMemoryHandleType);
+
+ VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
+ vkExternalMemoryHandleType);
+ const VulkanMemoryTypeList &memoryTypeList =
+ vkDummyBuffer.getMemoryTypeList();
+
+ for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
+ {
+ const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
+
+ log_info("Memory type index: %d\n", (uint32_t)memoryType);
+ log_info("Memory type property: %d\n",
+ memoryType.getMemoryTypeProperty());
+
+ for (unsigned int withOffset = 0;
+ withOffset <= (unsigned int)enableOffset; withOffset++)
+ {
+ log_info("Running withOffset case %d\n", (uint32_t)withOffset);
+ cl_mem buffers1[MAX_BUFFERS][MAX_IMPORTS];
+ cl_mem buffers2[MAX_BUFFERS][MAX_IMPORTS];
+ if (withOffset)
+ {
+ pBufferSize = bufferSizeForOffset;
+ }
+ else
+ {
+ pBufferSize = bufferSize;
+ }
+ VulkanBufferList vkBufferList(numBuffers, vkDevice, pBufferSize,
+ vkExternalMemoryHandleType);
+ uint32_t interBufferOffset =
+ (uint32_t)(vkBufferList[0].getSize());
+
+ for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
+ {
+ if (withOffset == 0)
+ {
+ vkBufferListDeviceMemory.push_back(
+ new VulkanDeviceMemory(vkDevice, pBufferSize,
+ memoryType,
+ vkExternalMemoryHandleType));
+ }
+ if (withOffset == 1)
+ {
+ uint32_t totalSize =
+ (uint32_t)(vkBufferList.size() * interBufferOffset);
+ vkBufferListDeviceMemory.push_back(
+ new VulkanDeviceMemory(vkDevice, totalSize,
+ memoryType,
+ vkExternalMemoryHandleType));
+ }
+ std::vector<clExternalMemory *> pExternalMemory1;
+ std::vector<clExternalMemory *> pExternalMemory2;
+ for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
+ {
+ pExternalMemory1.push_back(new clExternalMemory(
+ vkBufferListDeviceMemory[bIdx],
+ vkExternalMemoryHandleType,
+ withOffset * bIdx * interBufferOffset, pBufferSize,
+ context, deviceId));
+ pExternalMemory2.push_back(new clExternalMemory(
+ vkBufferListDeviceMemory[bIdx],
+ vkExternalMemoryHandleType,
+ withOffset * bIdx * interBufferOffset, pBufferSize,
+ context2, deviceId));
+ }
+ externalMemory1.push_back(pExternalMemory1);
+ externalMemory2.push_back(pExternalMemory2);
+ }
+
+ clFinish(cmd_queue1);
+ Params *params = (Params *)vkParamsDeviceMemory.map();
+ params->numBuffers = numBuffers;
+ params->bufferSize = pBufferSize;
+ params->interBufferOffset = interBufferOffset * withOffset;
+ vkParamsDeviceMemory.unmap();
+ vkDescriptorSet.update(0, vkParamsBuffer);
+ for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
+ {
+ size_t buffer_size = vkBufferList[bIdx].getSize();
+ vkBufferListDeviceMemory[bIdx]->bindBuffer(
+ vkBufferList[bIdx],
+ bIdx * interBufferOffset * withOffset);
+ for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
+ {
+ buffers1[bIdx][cl_bIdx] =
+ externalMemory1[bIdx][cl_bIdx]
+ ->getExternalMemoryBuffer();
+ buffers2[bIdx][cl_bIdx] =
+ externalMemory2[bIdx][cl_bIdx]
+ ->getExternalMemoryBuffer();
+ }
+ vkDescriptorSet.update((uint32_t)bIdx + 1,
+ vkBufferList[bIdx]);
+ }
+
+ vkCommandBuffer.begin();
+ vkCommandBuffer.bindPipeline(vkComputePipeline);
+ vkCommandBuffer.bindDescriptorSets(
+ vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
+ vkCommandBuffer.dispatch(512, 1, 1);
+ vkCommandBuffer.end();
+
+ for (int i = 0; i < numImports; i++)
+ {
+ update_buffer_kernel1[i] = (numBuffers == 1)
+ ? kernel1[0]
+ : ((numBuffers == 2) ? kernel1[1] : kernel1[2]);
+ update_buffer_kernel2[i] = (numBuffers == 1)
+ ? kernel2[0]
+ : ((numBuffers == 2) ? kernel2[1] : kernel2[2]);
+ }
+
+ // global work size should be less than or equal
+ // to bufferSizeList[i]
+ global_work_size[0] = pBufferSize;
+
+ for (uint32_t iter = 0; iter < maxIter; iter++)
+ {
+ if (iter == 0)
+ {
+ vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+ }
+ else
+ {
+ vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
+ vkVk2CLSemaphore);
+ }
+ clVk2CLExternalSemaphore->wait(cmd_queue1);
+
+ for (uint8_t launchIter = 0; launchIter < numImports;
+ launchIter++)
+ {
+ err = clSetKernelArg(update_buffer_kernel1[launchIter],
+ 0, sizeof(uint32_t),
+ (void *)&pBufferSize);
+ for (int i = 0; i < numBuffers; i++)
+ {
+ err |= clSetKernelArg(
+ update_buffer_kernel1[launchIter], i + 1,
+ sizeof(cl_mem),
+ (void *)&(buffers1[i][launchIter]));
+ }
+
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to set arg values for "
+ "kernel\n ");
+ goto CLEANUP;
+ }
+ err = clEnqueueNDRangeKernel(
+ cmd_queue1, update_buffer_kernel1[launchIter], 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to launch "
+ "update_buffer_kernel, error\n");
+ goto CLEANUP;
+ }
+ }
+ if (iter != (maxIter - 1))
+ {
+ clCl2VkExternalSemaphore->signal(cmd_queue1);
+ }
+ }
+ clFinish(cmd_queue1);
+ for (uint32_t iter = 0; iter < maxIter; iter++)
+ {
+ if (iter == 0)
+ {
+ vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+ }
+ else
+ {
+ vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
+ vkVk2CLSemaphore);
+ }
+ clVk2CLExternalSemaphore2->wait(cmd_queue2);
+
+ for (uint8_t launchIter = 0; launchIter < numImports;
+ launchIter++)
+ {
+ err = clSetKernelArg(update_buffer_kernel2[launchIter],
+ 0, sizeof(uint32_t),
+ (void *)&bufferSize);
+ for (int i = 0; i < numBuffers; i++)
+ {
+ err |= clSetKernelArg(
+ update_buffer_kernel2[launchIter], i + 1,
+ sizeof(cl_mem),
+ (void *)&(buffers2[i][launchIter]));
+ }
+
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to set arg values for "
+ "kernel\n ");
+ goto CLEANUP;
+ }
+ err = clEnqueueNDRangeKernel(
+ cmd_queue2, update_buffer_kernel2[launchIter], 1,
+ NULL, global_work_size, NULL, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to launch "
+ "update_buffer_kernel, error\n ");
+ goto CLEANUP;
+ }
+ }
+ if (iter != (maxIter - 1))
+ {
+ clCl2VkExternalSemaphore2->signal(cmd_queue2);
+ }
+ }
+ clFinish(cmd_queue2);
+ error_3 = (uint8_t *)malloc(sizeof(uint8_t));
+ if (NULL == error_3)
+ {
+ log_error("Not able to allocate memory\n");
+ goto CLEANUP;
+ }
+
+ error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+ sizeof(uint8_t), NULL, &err);
+ if (CL_SUCCESS != err)
+ {
+ print_error(err, "Error: clCreateBuffer \n");
+ goto CLEANUP;
+ }
+ error_2 = clCreateBuffer(context2, CL_MEM_WRITE_ONLY,
+ sizeof(uint8_t), NULL, &err);
+ if (CL_SUCCESS != err)
+ {
+ print_error(err, "Error: clCreateBuffer \n");
+ goto CLEANUP;
+ }
+ uint8_t val = 0;
+ err =
+ clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
+ sizeof(uint8_t), &val, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "Error: Failed read output, error \n");
+ goto CLEANUP;
+ }
+
+ err =
+ clEnqueueWriteBuffer(cmd_queue2, error_2, CL_TRUE, 0,
+ sizeof(uint8_t), &val, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "Error: Failed read output, error \n");
+ goto CLEANUP;
+ }
+
+ calc_max_iter = maxIter * 2 * (numBuffers + 1);
+ for (int i = 0; i < numBuffers; i++)
+ {
+ err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
+ (void *)&(buffers1[i][0]));
+ err |= clSetKernelArg(verify_kernel, 1, sizeof(int),
+ &pBufferSize);
+ err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
+ &calc_max_iter);
+ err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
+ (void *)&error_1);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to set arg values for "
+ "verify_kernel \n");
+ goto CLEANUP;
+ }
+ err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1,
+ NULL, global_work_size, NULL,
+ 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to launch verify_kernel,"
+ "error\n");
+ goto CLEANUP;
+ }
+
+ err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
+ sizeof(uint8_t), error_3, 0, NULL,
+ NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "Error: Failed read output, error\n");
+ goto CLEANUP;
+ }
+ if (*error_3 == 1)
+ {
+ log_error("&&&& vulkan_opencl_buffer test FAILED\n");
+ goto CLEANUP;
+ }
+ }
+ *error_3 = 0;
+ for (int i = 0; i < vkBufferList.size(); i++)
+ {
+ err = clSetKernelArg(verify_kernel2, 0, sizeof(cl_mem),
+ (void *)&(buffers2[i][0]));
+ err |= clSetKernelArg(verify_kernel2, 1, sizeof(int),
+ &pBufferSize);
+ err |= clSetKernelArg(verify_kernel2, 2, sizeof(int),
+ &calc_max_iter);
+ err |= clSetKernelArg(verify_kernel2, 3, sizeof(cl_mem),
+ (void *)&error_2);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to set arg values for "
+ "verify_kernel \n");
+ goto CLEANUP;
+ }
+ err = clEnqueueNDRangeKernel(cmd_queue2, verify_kernel2, 1,
+ NULL, global_work_size, NULL,
+ 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to launch verify_kernel,"
+ "error\n");
+ goto CLEANUP;
+ }
+
+ err = clEnqueueReadBuffer(cmd_queue2, error_2, CL_TRUE, 0,
+ sizeof(uint8_t), error_3, 0, NULL,
+ NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "Error: Failed read output, error\n");
+ goto CLEANUP;
+ }
+ if (*error_3 == 1)
+ {
+ log_error("&&&& vulkan_opencl_buffer test FAILED\n");
+ goto CLEANUP;
+ }
+ }
+ for (size_t i = 0; i < vkBufferList.size(); i++)
+ {
+ for (size_t j = 0; j < numImports; j++)
+ {
+ delete externalMemory1[i][j];
+ delete externalMemory2[i][j];
+ }
+ }
+ for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
+ {
+ delete vkBufferListDeviceMemory[i];
+ }
+ vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
+ vkBufferListDeviceMemory.end());
+ for (size_t i = 0; i < externalMemory1.size(); i++)
+ {
+ externalMemory1[i].erase(externalMemory1[i].begin(),
+ externalMemory1[i].begin()
+ + numBuffers);
+ externalMemory2[i].erase(externalMemory2[i].begin(),
+ externalMemory2[i].begin()
+ + numBuffers);
+ }
+ externalMemory1.clear();
+ externalMemory2.clear();
+ }
+ }
+ }
+CLEANUP:
+ for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
+ {
+ if (vkBufferListDeviceMemory[i])
+ {
+ delete vkBufferListDeviceMemory[i];
+ }
+ }
+ for (size_t i = 0; i < externalMemory1.size(); i++)
+ {
+ for (size_t j = 0; j < externalMemory1[i].size(); j++)
+ {
+ if (externalMemory1[i][j])
+ {
+ delete externalMemory1[i][j];
+ }
+ }
+ }
+ for (size_t i = 0; i < externalMemory2.size(); i++)
+ {
+ for (size_t j = 0; j < externalMemory2[i].size(); j++)
+ {
+ if (externalMemory2[i][j])
+ {
+ delete externalMemory2[i][j];
+ }
+ }
+ }
+ if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
+ if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+ if (clVk2CLExternalSemaphore2) delete clVk2CLExternalSemaphore2;
+ if (clCl2VkExternalSemaphore2) delete clCl2VkExternalSemaphore2;
+ if (error_3) free(error_3);
+ if (error_1) clReleaseMemObject(error_1);
+ if (error_2) clReleaseMemObject(error_2);
+ return err;
+}
+
+int test_buffer_common(cl_device_id device_, cl_context context_,
+ cl_command_queue queue_, int numElements_)
+{
+
+ int current_device = 0;
+ int device_count = 0;
+ int devices_prohibited = 0;
+ cl_int errNum = CL_SUCCESS;
+ cl_platform_id platform = NULL;
+ size_t extensionSize = 0;
+ cl_uint num_devices = 0;
+ cl_uint device_no = 0;
+ const size_t bufsize = BUFFERSIZE;
+ char buf[BUFFERSIZE];
+ cl_device_id *devices;
+ char *extensions = NULL;
+ cl_kernel verify_kernel;
+ cl_kernel verify_kernel2;
+ cl_kernel kernel[3] = { NULL, NULL, NULL };
+ cl_kernel kernel2[3] = { NULL, NULL, NULL };
+ const char *program_source_const[3] = { kernel_text_numbuffer_1,
+ kernel_text_numbuffer_2,
+ kernel_text_numbuffer_4 };
+ const char *program_source_const_verify;
+ size_t program_source_length;
+ cl_command_queue cmd_queue1 = NULL;
+ cl_command_queue cmd_queue2 = NULL;
+ cl_command_queue cmd_queue3 = NULL;
+ cl_context context = NULL;
+ cl_program program[3] = { NULL, NULL, NULL };
+ cl_program program_verify, program_verify2;
+ cl_context context2 = NULL;
+
+
+ VulkanDevice vkDevice;
+ uint32_t numBuffersList[] = { 1, 2, 4 };
+ uint32_t bufferSizeList[] = { 4 * 1024, 64 * 1024, 2 * 1024 * 1024 };
+ uint32_t bufferSizeListforOffset[] = { 256, 512, 1024 };
+
+ cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
+ errNum = clGetPlatformIDs(1, &platform, NULL);
+ if (errNum != CL_SUCCESS)
+ {
+ print_error(errNum, "Error: Failed to get platform\n");
+ goto CLEANUP;
+ }
+
+ errNum =
+ clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
+ if (CL_SUCCESS != errNum)
+ {
+ print_error(errNum, "clGetDeviceIDs failed in returning of devices\n");
+ goto CLEANUP;
+ }
+ devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
+ if (NULL == devices)
+ {
+ errNum = CL_OUT_OF_HOST_MEMORY;
+ print_error(errNum, "Unable to allocate memory for devices\n");
+ goto CLEANUP;
+ }
+ errNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices,
+ NULL);
+ if (CL_SUCCESS != errNum)
+ {
+ print_error(errNum, "Failed to get deviceID.\n");
+ goto CLEANUP;
+ }
+ contextProperties[1] = (cl_context_properties)platform;
+ log_info("Assigned contextproperties for platform\n");
+ for (device_no = 0; device_no < num_devices; device_no++)
+ {
+ errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0,
+ NULL, &extensionSize);
+ if (CL_SUCCESS != errNum)
+ {
+ print_error(errNum,
+ "Error in clGetDeviceInfo for getting device_extension "
+ "size....\n");
+ goto CLEANUP;
+ }
+ extensions = (char *)malloc(extensionSize);
+ if (NULL == extensions)
+ {
+ print_error(errNum, "Unable to allocate memory for extensions\n");
+ errNum = CL_OUT_OF_HOST_MEMORY;
+ goto CLEANUP;
+ }
+ errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS,
+ extensionSize, extensions, NULL);
+ if (CL_SUCCESS != errNum)
+ {
+ print_error(errNum,
+ "Error in clGetDeviceInfo for device_extension\n");
+ goto CLEANUP;
+ }
+ errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR,
+ CL_UUID_SIZE_KHR, uuid, &extensionSize);
+ if (CL_SUCCESS != errNum)
+ {
+ print_error(errNum, "clGetDeviceInfo failed\n");
+ goto CLEANUP;
+ }
+ errNum =
+ memcmp(uuid, vkDevice.getPhysicalDevice().getUUID(), VK_UUID_SIZE);
+ if (errNum == 0)
+ {
+ break;
+ }
+ }
+ if (device_no >= num_devices)
+ {
+ errNum = EXIT_FAILURE;
+ print_error(errNum,
+ "OpenCL error: "
+ "No Vulkan-OpenCL Interop capable GPU found.\n");
+ goto CLEANUP;
+ }
+ deviceId = devices[device_no];
+ context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
+ NULL, NULL, &errNum);
+ if (CL_SUCCESS != errNum)
+ {
+ print_error(errNum, "error creating context\n");
+ goto CLEANUP;
+ }
+ log_info("Successfully created context !!!\n");
+
+ cmd_queue1 = clCreateCommandQueue(context, devices[device_no], 0, &errNum);
+ if (CL_SUCCESS != errNum)
+ {
+ errNum = CL_INVALID_COMMAND_QUEUE;
+ print_error(errNum, "Error: Failed to create command queue!\n");
+ goto CLEANUP;
+ }
+ cmd_queue2 = clCreateCommandQueue(context, devices[device_no], 0, &errNum);
+ if (CL_SUCCESS != errNum)
+ {
+ errNum = CL_INVALID_COMMAND_QUEUE;
+ print_error(errNum, "Error: Failed to create command queue!\n");
+ goto CLEANUP;
+ }
+ log_info("clCreateCommandQueue successful\n");
+ for (int i = 0; i < 3; i++)
+ {
+ program_source_length = strlen(program_source_const[i]);
+ program[i] =
+ clCreateProgramWithSource(context, 1, &program_source_const[i],
+ &program_source_length, &errNum);
+ errNum = clBuildProgram(program[i], 0, NULL, NULL, NULL, NULL);
+ if (errNum != CL_SUCCESS)
+ {
+ print_error(errNum, "Error: Failed to build program \n");
+ return errNum;
+ }
+ // create the kernel
+ kernel[i] = clCreateKernel(program[i], "clUpdateBuffer", &errNum);
+ if (errNum != CL_SUCCESS)
+ {
+ print_error(errNum, "clCreateKernel failed \n");
+ return errNum;
+ }
+ }
+
+ program_source_const_verify = kernel_text_verify;
+ program_source_length = strlen(program_source_const_verify);
+ program_verify =
+ clCreateProgramWithSource(context, 1, &program_source_const_verify,
+ &program_source_length, &errNum);
+ errNum = clBuildProgram(program_verify, 0, NULL, NULL, NULL, NULL);
+ if (errNum != CL_SUCCESS)
+ {
+ log_error("Error: Failed to build program2\n");
+ return errNum;
+ }
+ verify_kernel = clCreateKernel(program_verify, "checkKernel", &errNum);
+ if (errNum != CL_SUCCESS)
+ {
+ print_error(errNum, "clCreateKernel failed \n");
+ return errNum;
+ }
+
+ if (multiCtx) // different context guard
+ {
+ context2 = clCreateContextFromType(
+ contextProperties, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum);
+ if (CL_SUCCESS != errNum)
+ {
+ print_error(errNum, "error creating context\n");
+ goto CLEANUP;
+ }
+ cmd_queue3 =
+ clCreateCommandQueue(context2, devices[device_no], 0, &errNum);
+ if (CL_SUCCESS != errNum)
+ {
+ errNum = CL_INVALID_COMMAND_QUEUE;
+ print_error(errNum, "Error: Failed to create command queue!\n");
+ goto CLEANUP;
+ }
+ for (int i = 0; i < 3; i++)
+ {
+ program_source_length = strlen(program_source_const[i]);
+ program[i] =
+ clCreateProgramWithSource(context2, 1, &program_source_const[i],
+ &program_source_length, &errNum);
+ errNum = clBuildProgram(program[i], 0, NULL, NULL, NULL, NULL);
+ if (errNum != CL_SUCCESS)
+ {
+ print_error(errNum, "Error: Failed to build program \n");
+ return errNum;
+ }
+ // create the kernel
+ kernel2[i] = clCreateKernel(program[i], "clUpdateBuffer", &errNum);
+ if (errNum != CL_SUCCESS)
+ {
+ print_error(errNum, "clCreateKernel failed \n");
+ return errNum;
+ }
+ }
+ program_source_length = strlen(program_source_const_verify);
+ program_verify =
+ clCreateProgramWithSource(context2, 1, &program_source_const_verify,
+ &program_source_length, &errNum);
+ errNum = clBuildProgram(program_verify, 0, NULL, NULL, NULL, NULL);
+ if (errNum != CL_SUCCESS)
+ {
+ log_error("Error: Failed to build program2\n");
+ return errNum;
+ }
+ verify_kernel2 = clCreateKernel(program_verify, "checkKernel", &errNum);
+ if (errNum != CL_SUCCESS)
+ {
+ print_error(errNum, "clCreateKernel failed \n");
+ return errNum;
+ }
+ }
+
+ for (size_t numBuffersIdx = 0; numBuffersIdx < ARRAY_SIZE(numBuffersList);
+ numBuffersIdx++)
+ {
+ uint32_t numBuffers = numBuffersList[numBuffersIdx];
+ log_info("Number of buffers: %d\n", numBuffers);
+ for (size_t sizeIdx = 0; sizeIdx < ARRAY_SIZE(bufferSizeList);
+ sizeIdx++)
+ {
+ uint32_t bufferSize = bufferSizeList[sizeIdx];
+ uint32_t bufferSizeForOffset = bufferSizeListforOffset[sizeIdx];
+ log_info("&&&& RUNNING vulkan_opencl_buffer test for Buffer size: "
+ "%d\n",
+ bufferSize);
+ if (multiImport && !multiCtx)
+ {
+ errNum = run_test_with_multi_import_same_ctx(
+ context, cmd_queue1, kernel, verify_kernel, vkDevice,
+ numBuffers, bufferSize, bufferSizeForOffset);
+ }
+ else if (multiImport && multiCtx)
+ {
+ errNum = run_test_with_multi_import_diff_ctx(
+ context, context2, cmd_queue1, cmd_queue3, kernel, kernel2,
+ verify_kernel, verify_kernel2, vkDevice, numBuffers,
+ bufferSize, bufferSizeForOffset);
+ }
+ else if (numCQ == 2)
+ {
+ errNum = run_test_with_two_queue(
+ context, cmd_queue1, cmd_queue2, kernel, verify_kernel,
+ vkDevice, numBuffers + 1, bufferSize);
+ }
+ else
+ {
+ errNum = run_test_with_one_queue(context, cmd_queue1, kernel,
+ verify_kernel, vkDevice,
+ numBuffers, bufferSize);
+ }
+ if (errNum != CL_SUCCESS)
+ {
+ print_error(errNum, "func_name failed \n");
+ goto CLEANUP;
+ }
+ }
+ }
+
+CLEANUP:
+ for (int i = 0; i < 3; i++)
+ {
+ if (program[i]) clReleaseProgram(program[i]);
+ if (kernel[i]) clReleaseKernel(kernel[i]);
+ }
+ if (cmd_queue1) clReleaseCommandQueue(cmd_queue1);
+ if (cmd_queue2) clReleaseCommandQueue(cmd_queue2);
+ if (cmd_queue3) clReleaseCommandQueue(cmd_queue3);
+ if (context) clReleaseContext(context);
+ if (context2) clReleaseContext(context2);
+
+ if (devices) free(devices);
+ if (extensions) free(extensions);
+
+ return errNum;
+}
diff --git a/test_conformance/vulkan/test_vulkan_interop_image.cpp b/test_conformance/vulkan/test_vulkan_interop_image.cpp
new file mode 100644
index 00000000..7577de09
--- /dev/null
+++ b/test_conformance/vulkan/test_vulkan_interop_image.cpp
@@ -0,0 +1,1596 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#define NOMINMAX
+#include <vulkan_interop_common.hpp>
+#include <string>
+#include "harness/errorHelpers.h"
+
+#define MAX_2D_IMAGES 5
+#define MAX_2D_IMAGE_WIDTH 1024
+#define MAX_2D_IMAGE_HEIGHT 1024
+#define MAX_2D_IMAGE_ELEMENT_SIZE 16
+#define MAX_2D_IMAGE_MIP_LEVELS 11
+#define MAX_2D_IMAGE_DESCRIPTORS MAX_2D_IMAGES *MAX_2D_IMAGE_MIP_LEVELS
+#define NUM_THREADS_PER_GROUP_X 32
+#define NUM_THREADS_PER_GROUP_Y 32
+#define NUM_BLOCKS(size, blockSize) \
+ (ROUND_UP((size), (blockSize)) / (blockSize))
+
+#define ASSERT(x) \
+ if (!(x)) \
+ { \
+ fprintf(stderr, "Assertion \"%s\" failed at %s:%d\n", #x, __FILE__, \
+ __LINE__); \
+ exit(1); \
+ }
+
+#define ASSERT_LEQ(x, y) \
+ if (x > y) \
+ { \
+ ASSERT(0); \
+ }
+
+namespace {
+struct Params
+{
+ uint32_t numImage2DDescriptors;
+};
+}
+static cl_uchar uuid[CL_UUID_SIZE_KHR];
+static cl_device_id deviceId = NULL;
+size_t max_width = MAX_2D_IMAGE_WIDTH;
+size_t max_height = MAX_2D_IMAGE_HEIGHT;
+
+const char *kernel_text_numImage_1 = " \
+__constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
+__kernel void image2DKernel(read_only image2d_t InputImage, write_only image2d_t OutImage, int num2DImages, int baseWidth, int baseHeight, int numMipLevels)\n\
+{\n\
+ int threadIdxX = get_global_id(0);\n\
+ int threadIdxY = get_global_id(1);\n\
+ int numThreadsX = get_global_size(0); \n\
+ int numThreadsY = get_global_size(1);\n\
+ if (threadIdxX >= baseWidth || threadIdxY >= baseHeight)\n\
+ {\n\
+ return;\n\
+ }\n\
+ %s dataA = read_image%s(InputImage, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
+ %s dataB = read_image%s(InputImage, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
+ write_image%s(OutImage, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
+ write_image%s(OutImage, (int2)( threadIdxX, threadIdxY), dataB);\n\
+\n\
+}";
+
+const char *kernel_text_numImage_2 = " \
+__constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
+__kernel void image2DKernel(read_only image2d_t InputImage_1, write_only image2d_t OutImage_1, read_only image2d_t InputImage_2,write_only image2d_t OutImage_2,int num2DImages, int baseWidth, int baseHeight, int numMipLevels) \n\
+{\n\
+ int threadIdxX = get_global_id(0);\n\
+ int threadIdxY = get_global_id(1);\n\
+ int numThreadsX = get_global_size(0);\n\
+ int numThreadsY = get_global_size(1);\n\
+ if (threadIdxX >= baseWidth || threadIdxY >= baseHeight) \n\
+ {\n\
+ return;\n\
+ }\n\
+ %s dataA = read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
+ %s dataB = read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
+ %s dataC = read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
+ %s dataD = read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
+ write_image%s(OutImage_1, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
+ write_image%s(OutImage_1, (int2)(threadIdxX, threadIdxY), dataB);\n\
+ write_image%s(OutImage_2, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataC);\n\
+ write_image%s(OutImage_2, (int2)(threadIdxX, threadIdxY), dataD);\n\
+\n\
+}";
+
+const char *kernel_text_numImage_4 = " \
+__constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
+__kernel void image2DKernel(read_only image2d_t InputImage_1, write_only image2d_t OutImage_1, read_only image2d_t InputImage_2, write_only image2d_t OutImage_2, read_only image2d_t InputImage_3, write_only image2d_t OutImage_3, read_only image2d_t InputImage_4, write_only image2d_t OutImage_4, int num2DImages, int baseWidth, int baseHeight, int numMipLevels) \n\
+{\n\
+ int threadIdxX = get_global_id(0);\n\
+ int threadIdxY = get_global_id(1);\n\
+ int numThreadsX = get_global_size(0);\n\
+ int numThreadsY = get_global_size(1);\n\
+ if (threadIdxX >= baseWidth || threadIdxY >= baseHeight) \n\
+ {\n\
+ return;\n\
+ }\n\
+ %s dataA = read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
+ %s dataB = read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
+ %s dataC = read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
+ %s dataD = read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
+ %s dataE = read_image%s(InputImage_3, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
+ %s dataF = read_image%s(InputImage_3, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
+ %s dataG = read_image%s(InputImage_4, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
+ %s dataH = read_image%s(InputImage_4, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
+ write_image%s(OutImage_1, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
+ write_image%s(OutImage_1, (int2)(threadIdxX, threadIdxY), dataB);\n\
+ write_image%s(OutImage_2, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataC);\n\
+ write_image%s(OutImage_2, (int2)(threadIdxX, threadIdxY), dataD);\n\
+ write_image%s(OutImage_3, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataE);\n\
+ write_image%s(OutImage_3, (int2)(threadIdxX, threadIdxY), dataF);\n\
+ write_image%s(OutImage_4, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataG);\n\
+ write_image%s(OutImage_4, (int2)(threadIdxX, threadIdxY), dataH);\n\
+\n\
+}";
+
+const uint32_t num2DImagesList[] = { 1, 2, 4 };
+const uint32_t widthList[] = { 4, 64, 183, 1024 };
+const uint32_t heightList[] = { 4, 64, 365 };
+
+const cl_kernel getKernelType(VulkanFormat format, cl_kernel kernel_float,
+ cl_kernel kernel_signed,
+ cl_kernel kernel_unsigned)
+{
+ cl_kernel kernel;
+ switch (format)
+ {
+ case VULKAN_FORMAT_R32G32B32A32_SFLOAT: kernel = kernel_float; break;
+
+ case VULKAN_FORMAT_R32G32B32A32_UINT: kernel = kernel_unsigned; break;
+
+ case VULKAN_FORMAT_R32G32B32A32_SINT: kernel = kernel_signed; break;
+
+ case VULKAN_FORMAT_R16G16B16A16_UINT: kernel = kernel_unsigned; break;
+
+ case VULKAN_FORMAT_R16G16B16A16_SINT: kernel = kernel_signed; break;
+
+ case VULKAN_FORMAT_R8G8B8A8_UINT: kernel = kernel_unsigned; break;
+
+ case VULKAN_FORMAT_R8G8B8A8_SINT: kernel = kernel_signed; break;
+
+ case VULKAN_FORMAT_R32G32_SFLOAT: kernel = kernel_float; break;
+
+ case VULKAN_FORMAT_R32G32_UINT: kernel = kernel_unsigned; break;
+
+ case VULKAN_FORMAT_R32G32_SINT: kernel = kernel_signed; break;
+
+ case VULKAN_FORMAT_R16G16_UINT: kernel = kernel_unsigned; break;
+
+ case VULKAN_FORMAT_R16G16_SINT: kernel = kernel_signed; break;
+
+ case VULKAN_FORMAT_R8G8_UINT: kernel = kernel_unsigned; break;
+
+ case VULKAN_FORMAT_R8G8_SINT: kernel = kernel_signed; break;
+
+ case VULKAN_FORMAT_R32_SFLOAT: kernel = kernel_float; break;
+
+ case VULKAN_FORMAT_R32_UINT: kernel = kernel_unsigned; break;
+
+ case VULKAN_FORMAT_R32_SINT: kernel = kernel_signed; break;
+
+ case VULKAN_FORMAT_R16_UINT: kernel = kernel_unsigned; break;
+
+ case VULKAN_FORMAT_R16_SINT: kernel = kernel_signed; break;
+
+ case VULKAN_FORMAT_R8_UINT: kernel = kernel_unsigned; break;
+
+ case VULKAN_FORMAT_R8_SINT: kernel = kernel_signed; break;
+
+ default:
+ log_error(" Unsupported format");
+ ASSERT(0);
+ break;
+ }
+ return kernel;
+}
+
+int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
+ cl_command_queue &cmd_queue2,
+ cl_kernel *kernel_unsigned,
+ cl_kernel *kernel_signed, cl_kernel *kernel_float,
+ VulkanDevice &vkDevice)
+{
+ cl_int err = CL_SUCCESS;
+ size_t origin[3] = { 0, 0, 0 };
+ size_t region[3] = { 1, 1, 1 };
+
+ cl_kernel updateKernelCQ1, updateKernelCQ2;
+ std::vector<VulkanFormat> vkFormatList = getSupportedVulkanFormatList();
+ const std::vector<VulkanExternalMemoryHandleType>
+ vkExternalMemoryHandleTypeList =
+ getSupportedVulkanExternalMemoryHandleTypeList();
+ char magicValue = 0;
+
+ VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
+ VulkanDeviceMemory vkParamsDeviceMemory(
+ vkDevice, vkParamsBuffer.getSize(),
+ getVulkanMemoryType(vkDevice,
+ VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
+ vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
+
+ uint64_t maxImage2DSize =
+ max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
+ VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
+ VulkanDeviceMemory vkSrcBufferDeviceMemory(
+ vkDevice, vkSrcBuffer.getSize(),
+ getVulkanMemoryType(vkDevice,
+ VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
+ vkSrcBufferDeviceMemory.bindBuffer(vkSrcBuffer);
+
+ char *srcBufferPtr, *dstBufferPtr;
+ srcBufferPtr = (char *)malloc(maxImage2DSize);
+ dstBufferPtr = (char *)malloc(maxImage2DSize);
+
+ VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
+ VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1,
+ VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS);
+ VulkanDescriptorSetLayout vkDescriptorSetLayout(
+ vkDevice, vkDescriptorSetLayoutBindingList);
+ VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
+
+ VulkanDescriptorPool vkDescriptorPool(vkDevice,
+ vkDescriptorSetLayoutBindingList);
+ VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
+ vkDescriptorSetLayout);
+
+ VulkanCommandPool vkCommandPool(vkDevice);
+ VulkanCommandBuffer vkCopyCommandBuffer(vkDevice, vkCommandPool);
+ VulkanCommandBuffer vkShaderCommandBuffer(vkDevice, vkCommandPool);
+ VulkanQueue &vkQueue = vkDevice.getQueue();
+
+ VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+ getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+ VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+ VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+ clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
+ clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
+
+ clVk2CLExternalSemaphore = new clExternalSemaphore(
+ vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ clCl2VkExternalSemaphore = new clExternalSemaphore(
+ vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+
+ std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory1;
+ std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory2;
+ std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory1;
+ std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory2;
+ std::vector<char> vkImage2DShader;
+
+ for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
+ {
+ VulkanFormat vkFormat = vkFormatList[fIdx];
+ log_info("Format: %d\n", vkFormat);
+ uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
+ ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
+ log_info("elementSize= %d\n", elementSize);
+
+ std::string fileName = "image2D_"
+ + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv";
+ log_info("Load %s file", fileName.c_str());
+ vkImage2DShader = readFile(fileName);
+ VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader);
+
+ VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
+ vkImage2DShaderModule);
+
+ for (size_t wIdx = 0; wIdx < ARRAY_SIZE(widthList); wIdx++)
+ {
+ uint32_t width = widthList[wIdx];
+ log_info("Width: %d\n", width);
+ if (width > max_width) continue;
+ region[0] = width;
+ for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
+ {
+ uint32_t height = heightList[hIdx];
+ log_info("Height: %d", height);
+ if (height > max_height) continue;
+ region[1] = height;
+
+ uint32_t numMipLevels = 1;
+ log_info("Number of mipmap levels: %d\n", numMipLevels);
+
+ magicValue++;
+ char *vkSrcBufferDeviceMemoryPtr =
+ (char *)vkSrcBufferDeviceMemory.map();
+ uint64_t srcBufSize = 0;
+ memset(vkSrcBufferDeviceMemoryPtr, 0, maxImage2DSize);
+ memset(srcBufferPtr, 0, maxImage2DSize);
+ uint32_t mipLevel = 0;
+ for (uint32_t row = 0;
+ row < std::max(height >> mipLevel, uint32_t(1)); row++)
+ {
+ for (uint32_t col = 0;
+ col < std::max(width >> mipLevel, uint32_t(1)); col++)
+ {
+ for (uint32_t elementByte = 0;
+ elementByte < elementSize; elementByte++)
+ {
+ vkSrcBufferDeviceMemoryPtr[srcBufSize] =
+ (char)(magicValue + mipLevel + row + col);
+ srcBufferPtr[srcBufSize] =
+ (char)(magicValue + mipLevel + row + col);
+ srcBufSize++;
+ }
+ }
+ }
+ srcBufSize = ROUND_UP(
+ srcBufSize,
+ std::max(
+ elementSize,
+ (uint32_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT));
+ vkSrcBufferDeviceMemory.unmap();
+
+ for (size_t niIdx = 0; niIdx < ARRAY_SIZE(num2DImagesList);
+ niIdx++)
+ {
+ uint32_t num2DImages = num2DImagesList[niIdx] + 1;
+ // added one image for cross-cq case for updateKernelCQ2
+ log_info("Number of images: %d\n", num2DImages);
+ ASSERT_LEQ(num2DImages, (uint32_t)MAX_2D_IMAGES);
+ uint32_t num_2D_image;
+ if (useSingleImageKernel)
+ {
+ num_2D_image = 1;
+ }
+ else
+ {
+ num_2D_image = num2DImages;
+ }
+ Params *params = (Params *)vkParamsDeviceMemory.map();
+ params->numImage2DDescriptors = num_2D_image * numMipLevels;
+ vkParamsDeviceMemory.unmap();
+ vkDescriptorSet.update(0, vkParamsBuffer);
+ for (size_t emhtIdx = 0;
+ emhtIdx < vkExternalMemoryHandleTypeList.size();
+ emhtIdx++)
+ {
+ VulkanExternalMemoryHandleType
+ vkExternalMemoryHandleType =
+ vkExternalMemoryHandleTypeList[emhtIdx];
+ log_info("External memory handle type: %d \n",
+ vkExternalMemoryHandleType);
+ if ((true == disableNTHandleType)
+ && (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
+ == vkExternalMemoryHandleType))
+ {
+ // Skip running for WIN32 NT handle.
+ continue;
+ }
+ VulkanImage2D vkDummyImage2D(
+ vkDevice, vkFormatList[0], widthList[0],
+ heightList[0], 1, vkExternalMemoryHandleType);
+ const VulkanMemoryTypeList &memoryTypeList =
+ vkDummyImage2D.getMemoryTypeList();
+
+ for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
+ mtIdx++)
+ {
+ const VulkanMemoryType &memoryType =
+ memoryTypeList[mtIdx];
+ log_info("Memory type index: %d\n",
+ (uint32_t)memoryType);
+ log_info("Memory type property: %d\n",
+ memoryType.getMemoryTypeProperty());
+ if (!useDeviceLocal)
+ {
+ if (VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL
+ == memoryType.getMemoryTypeProperty())
+ {
+ continue;
+ }
+ }
+
+ size_t totalImageMemSize = 0;
+ uint64_t interImageOffset = 0;
+ {
+ VulkanImage2D vkImage2D(
+ vkDevice, vkFormat, width, height,
+ numMipLevels, vkExternalMemoryHandleType);
+ ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize);
+ totalImageMemSize =
+ ROUND_UP(vkImage2D.getSize(),
+ vkImage2D.getAlignment());
+ }
+ VulkanImage2DList vkNonDedicatedImage2DList(
+ num2DImages, vkDevice, vkFormat, width, height,
+ numMipLevels, vkExternalMemoryHandleType);
+ for (size_t bIdx = 0; bIdx < num2DImages; bIdx++)
+ {
+ if (non_dedicated)
+ {
+ vkNonDedicatedImage2DListDeviceMemory1
+ .push_back(new VulkanDeviceMemory(
+ vkDevice, totalImageMemSize,
+ memoryType,
+ vkExternalMemoryHandleType));
+ }
+ else
+ {
+ vkNonDedicatedImage2DListDeviceMemory1
+ .push_back(new VulkanDeviceMemory(
+ vkDevice,
+ vkNonDedicatedImage2DList[bIdx],
+ memoryType,
+ vkExternalMemoryHandleType));
+ }
+ vkNonDedicatedImage2DListDeviceMemory1[bIdx]
+ ->bindImage(vkNonDedicatedImage2DList[bIdx],
+ 0);
+ nonDedicatedExternalMemory1.push_back(
+ new clExternalMemoryImage(
+ *vkNonDedicatedImage2DListDeviceMemory1
+ [bIdx],
+ vkExternalMemoryHandleType, context,
+ totalImageMemSize, width, height, 0,
+ vkNonDedicatedImage2DList[bIdx],
+ deviceId));
+ }
+ VulkanImageViewList vkNonDedicatedImage2DViewList(
+ vkDevice, vkNonDedicatedImage2DList);
+ VulkanImage2DList vkNonDedicatedImage2DList2(
+ num2DImages, vkDevice, vkFormat, width, height,
+ numMipLevels, vkExternalMemoryHandleType);
+ for (size_t bIdx = 0; bIdx < num2DImages; bIdx++)
+ {
+ if (non_dedicated)
+ {
+ vkNonDedicatedImage2DListDeviceMemory2
+ .push_back(new VulkanDeviceMemory(
+ vkDevice, totalImageMemSize,
+ memoryType,
+ vkExternalMemoryHandleType));
+ }
+ else
+ {
+ vkNonDedicatedImage2DListDeviceMemory2
+ .push_back(new VulkanDeviceMemory(
+ vkDevice,
+ vkNonDedicatedImage2DList2[bIdx],
+ memoryType,
+ vkExternalMemoryHandleType));
+ }
+ vkNonDedicatedImage2DListDeviceMemory2[bIdx]
+ ->bindImage(
+ vkNonDedicatedImage2DList2[bIdx], 0);
+ nonDedicatedExternalMemory2.push_back(
+ new clExternalMemoryImage(
+ *vkNonDedicatedImage2DListDeviceMemory2
+ [bIdx],
+ vkExternalMemoryHandleType, context,
+ totalImageMemSize, width, height, 0,
+ vkNonDedicatedImage2DList2[bIdx],
+ deviceId));
+ }
+ VulkanImageViewList vkDedicatedImage2DViewList(
+ vkDevice, vkNonDedicatedImage2DList2);
+
+ cl_mem external_mem_image1[5];
+ cl_mem external_mem_image2[5];
+ for (int i = 0; i < num2DImages; i++)
+ {
+ external_mem_image1[i] =
+ nonDedicatedExternalMemory1[i]
+ ->getExternalMemoryImage();
+ external_mem_image2[i] =
+ nonDedicatedExternalMemory2[i]
+ ->getExternalMemoryImage();
+ }
+ VulkanImage2DList &vkImage2DList =
+ vkNonDedicatedImage2DList;
+ VulkanImageViewList &vkImage2DViewList =
+ vkNonDedicatedImage2DViewList;
+
+ clCl2VkExternalSemaphore->signal(cmd_queue1);
+ if (!useSingleImageKernel)
+ {
+ for (size_t i2DIdx = 0;
+ i2DIdx < vkImage2DList.size(); i2DIdx++)
+ {
+ for (uint32_t mipLevel = 0;
+ mipLevel < numMipLevels; mipLevel++)
+ {
+ uint32_t i2DvIdx =
+ (uint32_t)(i2DIdx * numMipLevels)
+ + mipLevel;
+ vkDescriptorSet.update(
+ 1 + i2DvIdx,
+ vkImage2DViewList[i2DvIdx]);
+ }
+ }
+ vkCopyCommandBuffer.begin();
+ vkCopyCommandBuffer.pipelineBarrier(
+ vkImage2DList,
+ VULKAN_IMAGE_LAYOUT_UNDEFINED,
+ VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+ for (size_t i2DIdx = 0;
+ i2DIdx < vkImage2DList.size(); i2DIdx++)
+ {
+ vkCopyCommandBuffer.copyBufferToImage(
+ vkSrcBuffer, vkImage2DList[i2DIdx],
+ VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+ }
+ vkCopyCommandBuffer.pipelineBarrier(
+ vkImage2DList,
+ VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ VULKAN_IMAGE_LAYOUT_GENERAL);
+ vkCopyCommandBuffer.end();
+ memset(dstBufferPtr, 0, srcBufSize);
+ vkQueue.submit(vkCopyCommandBuffer);
+ vkShaderCommandBuffer.begin();
+ vkShaderCommandBuffer.bindPipeline(
+ vkComputePipeline);
+ vkShaderCommandBuffer.bindDescriptorSets(
+ vkComputePipeline, vkPipelineLayout,
+ vkDescriptorSet);
+ vkShaderCommandBuffer.dispatch(
+ NUM_BLOCKS(width, NUM_THREADS_PER_GROUP_X),
+ NUM_BLOCKS(height,
+ NUM_THREADS_PER_GROUP_Y / 2),
+ 1);
+ vkShaderCommandBuffer.end();
+ }
+ for (uint32_t iter = 0; iter < innerIterations;
+ iter++)
+ {
+ if (useSingleImageKernel)
+ {
+ for (size_t i2DIdx = 0;
+ i2DIdx < vkImage2DList.size();
+ i2DIdx++)
+ {
+ vkDescriptorSet.update(
+ 1, vkImage2DViewList[i2DIdx]);
+ vkCopyCommandBuffer.begin();
+ vkCopyCommandBuffer.pipelineBarrier(
+ vkImage2DList,
+ VULKAN_IMAGE_LAYOUT_UNDEFINED,
+ VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+
+ vkCopyCommandBuffer.copyBufferToImage(
+ vkSrcBuffer, vkImage2DList[i2DIdx],
+ VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+ vkCopyCommandBuffer.pipelineBarrier(
+ vkImage2DList,
+ VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ VULKAN_IMAGE_LAYOUT_GENERAL);
+ vkCopyCommandBuffer.end();
+ memset(dstBufferPtr, 0, srcBufSize);
+ vkQueue.submit(vkCopyCommandBuffer);
+ vkShaderCommandBuffer.begin();
+ vkShaderCommandBuffer.bindPipeline(
+ vkComputePipeline);
+ vkShaderCommandBuffer
+ .bindDescriptorSets(
+ vkComputePipeline,
+ vkPipelineLayout,
+ vkDescriptorSet);
+ vkShaderCommandBuffer.dispatch(
+ NUM_BLOCKS(width,
+ NUM_THREADS_PER_GROUP_X),
+ NUM_BLOCKS(height,
+ NUM_THREADS_PER_GROUP_Y
+ / 2),
+ 1);
+ vkShaderCommandBuffer.end();
+ if (i2DIdx < vkImage2DList.size() - 1)
+ {
+ vkQueue.submit(
+ vkShaderCommandBuffer);
+ }
+ }
+ }
+ vkQueue.submit(vkCl2VkSemaphore,
+ vkShaderCommandBuffer,
+ vkVk2CLSemaphore);
+ clVk2CLExternalSemaphore->wait(cmd_queue1);
+ switch (num2DImages)
+ {
+ case 2:
+ updateKernelCQ1 = getKernelType(
+ vkFormat, kernel_float[0],
+ kernel_signed[0],
+ kernel_unsigned[0]);
+ break;
+ case 3:
+ updateKernelCQ1 = getKernelType(
+ vkFormat, kernel_float[1],
+ kernel_signed[1],
+ kernel_unsigned[1]);
+ break;
+ case 5:
+ updateKernelCQ1 = getKernelType(
+ vkFormat, kernel_float[2],
+ kernel_signed[2],
+ kernel_unsigned[2]);
+ break;
+ }
+ updateKernelCQ2 = getKernelType(
+ vkFormat, kernel_float[3], kernel_signed[3],
+ kernel_unsigned[3]);
+ // similar kernel-type based on vkFormat
+ int j = 0;
+ // Setting arguments of updateKernelCQ2
+
+ err = clSetKernelArg(updateKernelCQ2, 0,
+ sizeof(cl_mem),
+ &external_mem_image1[0]);
+ err |= clSetKernelArg(updateKernelCQ2, 1,
+ sizeof(cl_mem),
+ &external_mem_image2[0]);
+ err |= clSetKernelArg(
+ updateKernelCQ2, 2, sizeof(cl_mem),
+ &external_mem_image1[num2DImages - 1]);
+ err |= clSetKernelArg(
+ updateKernelCQ2, 3, sizeof(cl_mem),
+ &external_mem_image2[num2DImages - 1]);
+ err |= clSetKernelArg(updateKernelCQ2, 4,
+ sizeof(unsigned int),
+ &num2DImages);
+ err |= clSetKernelArg(updateKernelCQ2, 5,
+ sizeof(unsigned int),
+ &width);
+ err |= clSetKernelArg(updateKernelCQ2, 6,
+ sizeof(unsigned int),
+ &height);
+ err |= clSetKernelArg(updateKernelCQ2, 7,
+ sizeof(unsigned int),
+ &numMipLevels);
+ for (int i = 0; i < num2DImages - 1; i++, ++j)
+ {
+ err = clSetKernelArg(
+ updateKernelCQ1, j, sizeof(cl_mem),
+ &external_mem_image1[i]);
+ err |= clSetKernelArg(
+ updateKernelCQ1, ++j, sizeof(cl_mem),
+ &external_mem_image2[i]);
+ }
+ err |= clSetKernelArg(updateKernelCQ1, j,
+ sizeof(unsigned int),
+ &num2DImages);
+ err |= clSetKernelArg(updateKernelCQ1, ++j,
+ sizeof(unsigned int),
+ &width);
+ err |= clSetKernelArg(updateKernelCQ1, ++j,
+ sizeof(unsigned int),
+ &height);
+ err |= clSetKernelArg(updateKernelCQ1, ++j,
+ sizeof(unsigned int),
+ &numMipLevels);
+
+ if (err != CL_SUCCESS)
+ {
+ print_error(
+ err,
+ "Error: Failed to set arg values \n");
+ goto CLEANUP;
+ }
+ // clVk2CLExternalSemaphore->wait(cmd_queue1);
+ size_t global_work_size[3] = { width, height,
+ 1 };
+ cl_event first_launch;
+ err = clEnqueueNDRangeKernel(
+ cmd_queue1, updateKernelCQ1, 2, NULL,
+ global_work_size, NULL, 0, NULL,
+ &first_launch);
+ if (err != CL_SUCCESS)
+ {
+ goto CLEANUP;
+ }
+ err = clEnqueueNDRangeKernel(
+ cmd_queue2, updateKernelCQ2, 2, NULL,
+ global_work_size, NULL, 1, &first_launch,
+ NULL);
+ if (err != CL_SUCCESS)
+ {
+ goto CLEANUP;
+ }
+
+ clFinish(cmd_queue2);
+ clCl2VkExternalSemaphore->signal(cmd_queue2);
+ }
+
+ unsigned int flags = 0;
+ size_t mipmapLevelOffset = 0;
+ cl_event eventReadImage = NULL;
+ clFinish(cmd_queue2);
+ for (int i = 0; i < num2DImages; i++)
+ {
+ err = clEnqueueReadImage(
+ cmd_queue1, external_mem_image2[i], CL_TRUE,
+ origin, region, 0, 0, dstBufferPtr, 0, NULL,
+ &eventReadImage);
+
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "clEnqueueReadImage failed with"
+ "error\n");
+ }
+
+ if (memcmp(srcBufferPtr, dstBufferPtr,
+ srcBufSize))
+ {
+ log_info("Source and destination buffers "
+ "don't match\n");
+ if (debug_trace)
+ {
+ log_info("Source buffer contents: \n");
+ for (uint64_t sIdx = 0;
+ sIdx < srcBufSize; sIdx++)
+ {
+ log_info(
+ "%d ",
+ (int)vkSrcBufferDeviceMemoryPtr
+ [sIdx]);
+ }
+ log_info("Destination buffer contents:"
+ "\n");
+ for (uint64_t dIdx = 0;
+ dIdx < srcBufSize; dIdx++)
+ {
+ log_info("%d ",
+ (int)dstBufferPtr[dIdx]);
+ }
+ }
+ err = -1;
+ break;
+ }
+ }
+ for (int i = 0; i < num2DImages; i++)
+ {
+ delete vkNonDedicatedImage2DListDeviceMemory1
+ [i];
+ delete vkNonDedicatedImage2DListDeviceMemory2
+ [i];
+ delete nonDedicatedExternalMemory1[i];
+ delete nonDedicatedExternalMemory2[i];
+ }
+ vkNonDedicatedImage2DListDeviceMemory1.erase(
+ vkNonDedicatedImage2DListDeviceMemory1.begin(),
+ vkNonDedicatedImage2DListDeviceMemory1.begin()
+ + num2DImages);
+ vkNonDedicatedImage2DListDeviceMemory2.erase(
+ vkNonDedicatedImage2DListDeviceMemory2.begin(),
+ vkNonDedicatedImage2DListDeviceMemory2.begin()
+ + num2DImages);
+ nonDedicatedExternalMemory1.erase(
+ nonDedicatedExternalMemory1.begin(),
+ nonDedicatedExternalMemory1.begin()
+ + num2DImages);
+ nonDedicatedExternalMemory2.erase(
+ nonDedicatedExternalMemory2.begin(),
+ nonDedicatedExternalMemory2.begin()
+ + num2DImages);
+ if (CL_SUCCESS != err)
+ {
+ goto CLEANUP;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ vkImage2DShader.clear();
+ }
+CLEANUP:
+ if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
+ if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+
+ if (srcBufferPtr) free(srcBufferPtr);
+ if (dstBufferPtr) free(dstBufferPtr);
+ return err;
+}
+
+int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
+ cl_kernel *kernel_unsigned,
+ cl_kernel *kernel_signed, cl_kernel *kernel_float,
+ VulkanDevice &vkDevice)
+{
+ cl_int err = CL_SUCCESS;
+ size_t origin[3] = { 0, 0, 0 };
+ size_t region[3] = { 1, 1, 1 };
+ cl_kernel updateKernelCQ1;
+ std::vector<VulkanFormat> vkFormatList = getSupportedVulkanFormatList();
+ const std::vector<VulkanExternalMemoryHandleType>
+ vkExternalMemoryHandleTypeList =
+ getSupportedVulkanExternalMemoryHandleTypeList();
+ char magicValue = 0;
+
+ VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
+ VulkanDeviceMemory vkParamsDeviceMemory(
+ vkDevice, vkParamsBuffer.getSize(),
+ getVulkanMemoryType(vkDevice,
+ VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
+ vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
+
+ uint64_t maxImage2DSize =
+ max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
+ VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
+ VulkanDeviceMemory vkSrcBufferDeviceMemory(
+ vkDevice, vkSrcBuffer.getSize(),
+ getVulkanMemoryType(vkDevice,
+ VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
+ vkSrcBufferDeviceMemory.bindBuffer(vkSrcBuffer);
+
+ char *srcBufferPtr, *dstBufferPtr;
+ srcBufferPtr = (char *)malloc(maxImage2DSize);
+ dstBufferPtr = (char *)malloc(maxImage2DSize);
+
+ VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
+ VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1,
+ VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS);
+ VulkanDescriptorSetLayout vkDescriptorSetLayout(
+ vkDevice, vkDescriptorSetLayoutBindingList);
+ VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
+
+ VulkanDescriptorPool vkDescriptorPool(vkDevice,
+ vkDescriptorSetLayoutBindingList);
+ VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
+ vkDescriptorSetLayout);
+
+ VulkanCommandPool vkCommandPool(vkDevice);
+ VulkanCommandBuffer vkCopyCommandBuffer(vkDevice, vkCommandPool);
+ VulkanCommandBuffer vkShaderCommandBuffer(vkDevice, vkCommandPool);
+ VulkanQueue &vkQueue = vkDevice.getQueue();
+
+ VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+ getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+ VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+ VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+ clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
+ clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
+
+ clVk2CLExternalSemaphore = new clExternalSemaphore(
+ vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ clCl2VkExternalSemaphore = new clExternalSemaphore(
+ vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+
+ std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory1;
+ std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory2;
+ std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory1;
+ std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory2;
+ std::vector<char> vkImage2DShader;
+
+ for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
+ {
+ VulkanFormat vkFormat = vkFormatList[fIdx];
+ log_info("Format: %d\n", vkFormat);
+ uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
+ ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
+ log_info("elementSize= %d\n", elementSize);
+
+ std::string fileName = "image2D_"
+ + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv";
+ log_info("Load %s file", fileName.c_str());
+ vkImage2DShader = readFile(fileName);
+ VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader);
+
+ VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
+ vkImage2DShaderModule);
+
+ for (size_t wIdx = 0; wIdx < ARRAY_SIZE(widthList); wIdx++)
+ {
+ uint32_t width = widthList[wIdx];
+ log_info("Width: %d\n", width);
+ if (width > max_width) continue;
+ region[0] = width;
+ for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
+ {
+ uint32_t height = heightList[hIdx];
+ log_info("Height: %d\n", height);
+ if (height > max_height) continue;
+ region[1] = height;
+
+ uint32_t numMipLevels = 1;
+ log_info("Number of mipmap levels: %d\n", numMipLevels);
+
+ magicValue++;
+ char *vkSrcBufferDeviceMemoryPtr =
+ (char *)vkSrcBufferDeviceMemory.map();
+ uint64_t srcBufSize = 0;
+ memset(vkSrcBufferDeviceMemoryPtr, 0, maxImage2DSize);
+ memset(srcBufferPtr, 0, maxImage2DSize);
+ uint32_t mipLevel = 0;
+ for (uint32_t row = 0;
+ row < std::max(height >> mipLevel, uint32_t(1)); row++)
+ {
+ for (uint32_t col = 0;
+ col < std::max(width >> mipLevel, uint32_t(1)); col++)
+ {
+ for (uint32_t elementByte = 0;
+ elementByte < elementSize; elementByte++)
+ {
+ vkSrcBufferDeviceMemoryPtr[srcBufSize] =
+ (char)(magicValue + mipLevel + row + col);
+ srcBufferPtr[srcBufSize] =
+ (char)(magicValue + mipLevel + row + col);
+ srcBufSize++;
+ }
+ }
+ }
+ srcBufSize = ROUND_UP(
+ srcBufSize,
+ std::max(
+ elementSize,
+ (uint32_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT));
+ vkSrcBufferDeviceMemory.unmap();
+
+ for (size_t niIdx = 0; niIdx < ARRAY_SIZE(num2DImagesList);
+ niIdx++)
+ {
+ uint32_t num2DImages = num2DImagesList[niIdx];
+ log_info("Number of images: %d\n", num2DImages);
+ ASSERT_LEQ(num2DImages, (uint32_t)MAX_2D_IMAGES);
+
+ Params *params = (Params *)vkParamsDeviceMemory.map();
+ uint32_t num_2D_image;
+ if (useSingleImageKernel)
+ {
+ num_2D_image = 1;
+ }
+ else
+ {
+ num_2D_image = num2DImages;
+ }
+ params->numImage2DDescriptors = num_2D_image * numMipLevels;
+ vkParamsDeviceMemory.unmap();
+ vkDescriptorSet.update(0, vkParamsBuffer);
+ for (size_t emhtIdx = 0;
+ emhtIdx < vkExternalMemoryHandleTypeList.size();
+ emhtIdx++)
+ {
+ VulkanExternalMemoryHandleType
+ vkExternalMemoryHandleType =
+ vkExternalMemoryHandleTypeList[emhtIdx];
+ log_info("External memory handle type: %d \n",
+ vkExternalMemoryHandleType);
+ if ((true == disableNTHandleType)
+ && (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
+ == vkExternalMemoryHandleType))
+ {
+ // Skip running for WIN32 NT handle.
+ continue;
+ }
+ VulkanImage2D vkDummyImage2D(
+ vkDevice, vkFormatList[0], widthList[0],
+ heightList[0], 1, vkExternalMemoryHandleType);
+ const VulkanMemoryTypeList &memoryTypeList =
+ vkDummyImage2D.getMemoryTypeList();
+
+ for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
+ mtIdx++)
+ {
+ const VulkanMemoryType &memoryType =
+ memoryTypeList[mtIdx];
+ log_info("Memory type index: %d\n",
+ (uint32_t)memoryType);
+ log_info("Memory type property: %d\n",
+ memoryType.getMemoryTypeProperty());
+ if (!useDeviceLocal)
+ {
+ if (VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL
+ == memoryType.getMemoryTypeProperty())
+ {
+ continue;
+ }
+ }
+ size_t totalImageMemSize = 0;
+ uint64_t interImageOffset = 0;
+ {
+ VulkanImage2D vkImage2D(
+ vkDevice, vkFormat, width, height,
+ numMipLevels, vkExternalMemoryHandleType);
+ ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize);
+ totalImageMemSize =
+ ROUND_UP(vkImage2D.getSize(),
+ vkImage2D.getAlignment());
+ }
+ VulkanImage2DList vkNonDedicatedImage2DList(
+ num2DImages, vkDevice, vkFormat, width, height,
+ numMipLevels, vkExternalMemoryHandleType);
+ for (size_t bIdx = 0;
+ bIdx < vkNonDedicatedImage2DList.size();
+ bIdx++)
+ {
+ // Create list of Vulkan device memories and
+ // bind the list of Vulkan images.
+ vkNonDedicatedImage2DListDeviceMemory1
+ .push_back(new VulkanDeviceMemory(
+ vkDevice, totalImageMemSize, memoryType,
+ vkExternalMemoryHandleType));
+ vkNonDedicatedImage2DListDeviceMemory1[bIdx]
+ ->bindImage(vkNonDedicatedImage2DList[bIdx],
+ 0);
+ nonDedicatedExternalMemory1.push_back(
+ new clExternalMemoryImage(
+ *vkNonDedicatedImage2DListDeviceMemory1
+ [bIdx],
+ vkExternalMemoryHandleType, context,
+ totalImageMemSize, width, height, 0,
+ vkNonDedicatedImage2DList[bIdx],
+ deviceId));
+ }
+ VulkanImageViewList vkNonDedicatedImage2DViewList(
+ vkDevice, vkNonDedicatedImage2DList);
+
+ VulkanImage2DList vkNonDedicatedImage2DList2(
+ num2DImages, vkDevice, vkFormat, width, height,
+ numMipLevels, vkExternalMemoryHandleType);
+ for (size_t bIdx = 0;
+ bIdx < vkNonDedicatedImage2DList2.size();
+ bIdx++)
+ {
+ vkNonDedicatedImage2DListDeviceMemory2
+ .push_back(new VulkanDeviceMemory(
+ vkDevice, totalImageMemSize, memoryType,
+ vkExternalMemoryHandleType));
+ vkNonDedicatedImage2DListDeviceMemory2[bIdx]
+ ->bindImage(
+ vkNonDedicatedImage2DList2[bIdx], 0);
+ nonDedicatedExternalMemory2.push_back(
+ new clExternalMemoryImage(
+ *vkNonDedicatedImage2DListDeviceMemory2
+ [bIdx],
+ vkExternalMemoryHandleType, context,
+ totalImageMemSize, width, height, 0,
+ vkNonDedicatedImage2DList2[bIdx],
+ deviceId));
+ }
+ VulkanImageViewList vkDedicatedImage2DViewList(
+ vkDevice, vkNonDedicatedImage2DList2);
+ cl_mem external_mem_image1[4];
+ cl_mem external_mem_image2[4];
+ for (int i = 0; i < num2DImages; i++)
+ {
+ external_mem_image1[i] =
+ nonDedicatedExternalMemory1[i]
+ ->getExternalMemoryImage();
+ external_mem_image2[i] =
+ nonDedicatedExternalMemory2[i]
+ ->getExternalMemoryImage();
+ }
+ VulkanImage2DList &vkImage2DList =
+ vkNonDedicatedImage2DList;
+ VulkanImageViewList &vkImage2DViewList =
+ vkNonDedicatedImage2DViewList;
+
+ clCl2VkExternalSemaphore->signal(cmd_queue1);
+ if (!useSingleImageKernel)
+ {
+ for (size_t i2DIdx = 0;
+ i2DIdx < vkImage2DList.size(); i2DIdx++)
+ {
+ for (uint32_t mipLevel = 0;
+ mipLevel < numMipLevels; mipLevel++)
+ {
+ uint32_t i2DvIdx =
+ (uint32_t)(i2DIdx * numMipLevels)
+ + mipLevel;
+ vkDescriptorSet.update(
+ 1 + i2DvIdx,
+ vkImage2DViewList[i2DvIdx]);
+ }
+ }
+ vkCopyCommandBuffer.begin();
+ vkCopyCommandBuffer.pipelineBarrier(
+ vkImage2DList,
+ VULKAN_IMAGE_LAYOUT_UNDEFINED,
+ VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+ for (size_t i2DIdx = 0;
+ i2DIdx < vkImage2DList.size(); i2DIdx++)
+ {
+ vkCopyCommandBuffer.copyBufferToImage(
+ vkSrcBuffer, vkImage2DList[i2DIdx],
+ VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+ }
+ vkCopyCommandBuffer.pipelineBarrier(
+ vkImage2DList,
+ VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ VULKAN_IMAGE_LAYOUT_GENERAL);
+ vkCopyCommandBuffer.end();
+ memset(dstBufferPtr, 0, srcBufSize);
+ vkQueue.submit(vkCopyCommandBuffer);
+ vkShaderCommandBuffer.begin();
+ vkShaderCommandBuffer.bindPipeline(
+ vkComputePipeline);
+ vkShaderCommandBuffer.bindDescriptorSets(
+ vkComputePipeline, vkPipelineLayout,
+ vkDescriptorSet);
+ vkShaderCommandBuffer.dispatch(
+ NUM_BLOCKS(width, NUM_THREADS_PER_GROUP_X),
+ NUM_BLOCKS(height,
+ NUM_THREADS_PER_GROUP_Y / 2),
+ 1);
+ vkShaderCommandBuffer.end();
+ }
+ for (uint32_t iter = 0; iter < innerIterations;
+ iter++)
+ {
+ if (useSingleImageKernel)
+ {
+ for (size_t i2DIdx = 0;
+ i2DIdx < vkImage2DList.size();
+ i2DIdx++)
+ {
+ vkDescriptorSet.update(
+ 1, vkImage2DViewList[i2DIdx]);
+ vkCopyCommandBuffer.begin();
+ vkCopyCommandBuffer.pipelineBarrier(
+ vkImage2DList,
+ VULKAN_IMAGE_LAYOUT_UNDEFINED,
+ VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+
+ vkCopyCommandBuffer.copyBufferToImage(
+ vkSrcBuffer, vkImage2DList[i2DIdx],
+ VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+ vkCopyCommandBuffer.pipelineBarrier(
+ vkImage2DList,
+ VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ VULKAN_IMAGE_LAYOUT_GENERAL);
+ vkCopyCommandBuffer.end();
+ memset(dstBufferPtr, 0, srcBufSize);
+ vkQueue.submit(vkCopyCommandBuffer);
+ vkShaderCommandBuffer.begin();
+ vkShaderCommandBuffer.bindPipeline(
+ vkComputePipeline);
+ vkShaderCommandBuffer
+ .bindDescriptorSets(
+ vkComputePipeline,
+ vkPipelineLayout,
+ vkDescriptorSet);
+ vkShaderCommandBuffer.dispatch(
+ NUM_BLOCKS(width,
+ NUM_THREADS_PER_GROUP_X),
+ NUM_BLOCKS(height,
+ NUM_THREADS_PER_GROUP_Y
+ / 2),
+ 1);
+ vkShaderCommandBuffer.end();
+ if (i2DIdx < vkImage2DList.size() - 1)
+ {
+ vkQueue.submit(
+ vkShaderCommandBuffer);
+ }
+ }
+ }
+ vkQueue.submit(vkCl2VkSemaphore,
+ vkShaderCommandBuffer,
+ vkVk2CLSemaphore);
+ clVk2CLExternalSemaphore->wait(cmd_queue1);
+ switch (num2DImages)
+ {
+ case 1:
+ updateKernelCQ1 = getKernelType(
+ vkFormat, kernel_float[0],
+ kernel_signed[0],
+ kernel_unsigned[0]);
+ break;
+ case 2:
+ updateKernelCQ1 = getKernelType(
+ vkFormat, kernel_float[1],
+ kernel_signed[1],
+ kernel_unsigned[1]);
+ break;
+ case 4:
+ updateKernelCQ1 = getKernelType(
+ vkFormat, kernel_float[2],
+ kernel_signed[2],
+ kernel_unsigned[2]);
+ break;
+ }
+ int j = 0;
+ for (int i = 0; i < num2DImages; i++, ++j)
+ {
+ err = clSetKernelArg(
+ updateKernelCQ1, j, sizeof(cl_mem),
+ &external_mem_image1[i]);
+ err |= clSetKernelArg(
+ updateKernelCQ1, ++j, sizeof(cl_mem),
+ &external_mem_image2[i]);
+ }
+ err |= clSetKernelArg(updateKernelCQ1, j,
+ sizeof(unsigned int),
+ &num2DImages);
+ err |= clSetKernelArg(updateKernelCQ1, ++j,
+ sizeof(unsigned int),
+ &width);
+ err |= clSetKernelArg(updateKernelCQ1, ++j,
+ sizeof(unsigned int),
+ &height);
+ err |= clSetKernelArg(updateKernelCQ1, ++j,
+ sizeof(unsigned int),
+ &numMipLevels);
+
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "Error: Failed to set arg "
+ "values for kernel-1\n");
+ goto CLEANUP;
+ }
+
+ size_t global_work_size[3] = { width, height,
+ 1 };
+ err = clEnqueueNDRangeKernel(
+ cmd_queue1, updateKernelCQ1, 2, NULL,
+ global_work_size, NULL, 0, NULL, NULL);
+ if (err != CL_SUCCESS)
+ {
+ goto CLEANUP;
+ }
+ clCl2VkExternalSemaphore->signal(cmd_queue1);
+ }
+
+ unsigned int flags = 0;
+ size_t mipmapLevelOffset = 0;
+ cl_event eventReadImage = NULL;
+ for (int i = 0; i < num2DImages; i++)
+ {
+ err = clEnqueueReadImage(
+ cmd_queue1, external_mem_image2[i], CL_TRUE,
+ origin, region, 0, 0, dstBufferPtr, 0, NULL,
+ &eventReadImage);
+
+ if (err != CL_SUCCESS)
+ {
+ print_error(err,
+ "clEnqueueReadImage failed with"
+ "error\n");
+ }
+
+ if (memcmp(srcBufferPtr, dstBufferPtr,
+ srcBufSize))
+ {
+ log_info("Source and destination buffers "
+ "don't match\n");
+ if (debug_trace)
+ {
+ log_info("Source buffer contents: \n");
+ for (uint64_t sIdx = 0;
+ sIdx < srcBufSize; sIdx++)
+ {
+ log_info(
+ "%d",
+ (int)vkSrcBufferDeviceMemoryPtr
+ [sIdx]);
+ }
+ log_info(
+ "Destination buffer contents:");
+ for (uint64_t dIdx = 0;
+ dIdx < srcBufSize; dIdx++)
+ {
+ log_info("%d",
+ (int)dstBufferPtr[dIdx]);
+ }
+ }
+ err = -1;
+ break;
+ }
+ }
+ for (int i = 0; i < num2DImages; i++)
+ {
+ delete vkNonDedicatedImage2DListDeviceMemory1
+ [i];
+ delete vkNonDedicatedImage2DListDeviceMemory2
+ [i];
+ delete nonDedicatedExternalMemory1[i];
+ delete nonDedicatedExternalMemory2[i];
+ }
+ vkNonDedicatedImage2DListDeviceMemory1.erase(
+ vkNonDedicatedImage2DListDeviceMemory1.begin(),
+ vkNonDedicatedImage2DListDeviceMemory1.begin()
+ + num2DImages);
+ vkNonDedicatedImage2DListDeviceMemory2.erase(
+ vkNonDedicatedImage2DListDeviceMemory2.begin(),
+ vkNonDedicatedImage2DListDeviceMemory2.begin()
+ + num2DImages);
+ nonDedicatedExternalMemory1.erase(
+ nonDedicatedExternalMemory1.begin(),
+ nonDedicatedExternalMemory1.begin()
+ + num2DImages);
+ nonDedicatedExternalMemory2.erase(
+ nonDedicatedExternalMemory2.begin(),
+ nonDedicatedExternalMemory2.begin()
+ + num2DImages);
+ if (CL_SUCCESS != err)
+ {
+ goto CLEANUP;
+ }
+ }
+ }
+ }
+ }
+ }
+ vkImage2DShader.clear();
+ }
+CLEANUP:
+ if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
+ if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+
+ if (srcBufferPtr) free(srcBufferPtr);
+ if (dstBufferPtr) free(dstBufferPtr);
+ return err;
+}
+
+int test_image_common(cl_device_id device_, cl_context context_,
+ cl_command_queue queue_, int numElements_)
+{
+ int current_device = 0;
+ int device_count = 0;
+ int devices_prohibited = 0;
+ cl_int err = CL_SUCCESS;
+ cl_platform_id platform = NULL;
+ size_t extensionSize = 0;
+ cl_uint num_devices = 0;
+ cl_uint device_no = 0;
+ cl_device_id *devices;
+ char *extensions = NULL;
+ const char *program_source_const;
+ cl_command_queue cmd_queue1 = NULL;
+ cl_command_queue cmd_queue2 = NULL;
+ cl_context context = NULL;
+ const uint32_t num_kernels = ARRAY_SIZE(num2DImagesList) + 1;
+ // One kernel for Cross-CQ case
+ const uint32_t num_kernel_types = 3;
+ const char *kernel_source[num_kernels] = { kernel_text_numImage_1,
+ kernel_text_numImage_2,
+ kernel_text_numImage_4 };
+ char source_1[4096];
+ char source_2[4096];
+ char source_3[4096];
+ size_t program_source_length;
+ cl_program program[num_kernel_types];
+ cl_kernel kernel_float[num_kernels] = { NULL, NULL, NULL, NULL };
+ cl_kernel kernel_signed[num_kernels] = { NULL, NULL, NULL, NULL };
+ cl_kernel kernel_unsigned[num_kernels] = { NULL, NULL, NULL, NULL };
+ cl_mem external_mem_image1;
+ cl_mem external_mem_image2;
+
+ VulkanDevice vkDevice;
+
+ cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
+ // get the platform ID
+ err = clGetPlatformIDs(1, &platform, NULL);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "Error: Failed to get platform\n");
+ goto CLEANUP;
+ }
+
+ err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
+ if (CL_SUCCESS != err)
+ {
+ print_error(err, "clGetDeviceIDs failed in returning no. of devices\n");
+ goto CLEANUP;
+ }
+ devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
+ if (NULL == devices)
+ {
+ err = CL_OUT_OF_HOST_MEMORY;
+ print_error(err, "Unable to allocate memory for devices\n");
+ goto CLEANUP;
+ }
+ err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices,
+ NULL);
+ if (CL_SUCCESS != err)
+ {
+ print_error(err, "Failed to get deviceID.\n");
+ goto CLEANUP;
+ }
+ contextProperties[1] = (cl_context_properties)platform;
+ log_info("Assigned contextproperties for platform\n");
+ for (device_no = 0; device_no < num_devices; device_no++)
+ {
+ err = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0, NULL,
+ &extensionSize);
+ if (CL_SUCCESS != err)
+ {
+ print_error(
+ err,
+ "Error in clGetDeviceInfo for getting device_extension size\n");
+ goto CLEANUP;
+ }
+ extensions = (char *)malloc(extensionSize);
+ if (NULL == extensions)
+ {
+ err = CL_OUT_OF_HOST_MEMORY;
+ print_error(err, "Unable to allocate memory for extensions\n");
+ goto CLEANUP;
+ }
+ err = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS,
+ extensionSize, extensions, NULL);
+ if (CL_SUCCESS != err)
+ {
+ print_error(
+ err, "Error in clGetDeviceInfo for getting device_extension\n");
+ goto CLEANUP;
+ }
+ err = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR,
+ CL_UUID_SIZE_KHR, uuid, &extensionSize);
+ if (CL_SUCCESS != err)
+ {
+ print_error(err, "clGetDeviceInfo failed with error");
+ goto CLEANUP;
+ }
+ err =
+ memcmp(uuid, vkDevice.getPhysicalDevice().getUUID(), VK_UUID_SIZE);
+ if (err == 0)
+ {
+ break;
+ }
+ }
+ if (device_no >= num_devices)
+ {
+ err = EXIT_FAILURE;
+ print_error(err,
+ "OpenCL error:"
+ "No Vulkan-OpenCL Interop capable GPU found.\n");
+ goto CLEANUP;
+ }
+ deviceId = devices[device_no];
+ err = setMaxImageDimensions(deviceId, max_width, max_height);
+ if (CL_SUCCESS != err)
+ {
+ print_error(err, "error setting max image dimensions");
+ goto CLEANUP;
+ }
+ log_info("Set max_width to %lu and max_height to %lu\n", max_width,
+ max_height);
+ context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
+ NULL, NULL, &err);
+ if (CL_SUCCESS != err)
+ {
+ print_error(err, "error creating context");
+ goto CLEANUP;
+ }
+ log_info("Successfully created context !!!\n");
+
+ cmd_queue1 = clCreateCommandQueue(context, devices[device_no], 0, &err);
+ if (CL_SUCCESS != err)
+ {
+ err = CL_INVALID_COMMAND_QUEUE;
+ print_error(err, "Error: Failed to create command queue!\n");
+ goto CLEANUP;
+ }
+ log_info("clCreateCommandQueue successfull \n");
+
+ cmd_queue2 = clCreateCommandQueue(context, devices[device_no], 0, &err);
+ if (CL_SUCCESS != err)
+ {
+ err = CL_INVALID_COMMAND_QUEUE;
+ print_error(err, "Error: Failed to create command queue!\n");
+ goto CLEANUP;
+ }
+ log_info("clCreateCommandQueue2 successful \n");
+
+ for (int i = 0; i < num_kernels; i++)
+ {
+ switch (i)
+ {
+ case 0:
+ sprintf(source_1, kernel_source[i], "float4", "f", "float4",
+ "f", "f", "f");
+ sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
+ "i", "i");
+ sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
+ "ui", "ui", "ui");
+ break;
+ case 1:
+ sprintf(source_1, kernel_source[i], "float4", "f", "float4",
+ "f", "float4", "f", "float4", "f", "f", "f", "f", "f");
+ sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
+ "int4", "i", "int4", "i", "i", "i", "i", "i");
+ sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
+ "ui", "uint4", "ui", "uint4", "ui", "ui", "ui", "ui",
+ "ui");
+ break;
+ case 2:
+ sprintf(source_1, kernel_source[i], "float4", "f", "float4",
+ "f", "float4", "f", "float4", "f", "float4", "f",
+ "float4", "f", "float4", "f", "float4", "f", "f", "f",
+ "f", "f", "f", "f", "f", "f");
+ sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
+ "int4", "i", "int4", "i", "int4", "i", "int4", "i",
+ "int4", "i", "int4", "i", "i", "i", "i", "i", "i", "i",
+ "i", "i");
+ sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
+ "ui", "uint4", "ui", "uint4", "ui", "uint4", "ui",
+ "uint4", "ui", "uint4", "ui", "uint4", "ui", "ui", "ui",
+ "ui", "ui", "ui", "ui", "ui", "ui");
+ break;
+ case 3:
+ // Addtional case for creating updateKernelCQ2 which takes two
+ // images
+ sprintf(source_1, kernel_source[1], "float4", "f", "float4",
+ "f", "float4", "f", "float4", "f", "f", "f", "f", "f");
+ sprintf(source_2, kernel_source[1], "int4", "i", "int4", "i",
+ "int4", "i", "int4", "i", "i", "i", "i", "i");
+ sprintf(source_3, kernel_source[1], "uint4", "ui", "uint4",
+ "ui", "uint4", "ui", "uint4", "ui", "ui", "ui", "ui",
+ "ui");
+ break;
+ }
+ const char *sourceTexts[num_kernel_types] = { source_1, source_2,
+ source_3 };
+ for (int k = 0; k < num_kernel_types; k++)
+ {
+ program_source_length = strlen(sourceTexts[k]);
+ program[k] = clCreateProgramWithSource(
+ context, 1, &sourceTexts[k], &program_source_length, &err);
+ err |= clBuildProgram(program[k], 0, NULL, NULL, NULL, NULL);
+ }
+
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "Error: Failed to build program");
+ goto CLEANUP;
+ }
+ // create the kernel
+ kernel_float[i] = clCreateKernel(program[0], "image2DKernel", &err);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "clCreateKernel failed");
+ goto CLEANUP;
+ }
+ kernel_signed[i] = clCreateKernel(program[1], "image2DKernel", &err);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "clCreateKernel failed");
+ goto CLEANUP;
+ }
+ kernel_unsigned[i] = clCreateKernel(program[2], "image2DKernel", &err);
+ if (err != CL_SUCCESS)
+ {
+ print_error(err, "clCreateKernel failed ");
+ goto CLEANUP;
+ }
+ }
+ if (numCQ == 2)
+ {
+ err = run_test_with_two_queue(context, cmd_queue1, cmd_queue2,
+ kernel_unsigned, kernel_signed,
+ kernel_float, vkDevice);
+ }
+ else
+ {
+ err = run_test_with_one_queue(context, cmd_queue1, kernel_unsigned,
+ kernel_signed, kernel_float, vkDevice);
+ }
+CLEANUP:
+ for (int i = 0; i < num_kernels; i++)
+ {
+ if (kernel_float[i])
+ {
+ clReleaseKernel(kernel_float[i]);
+ }
+ if (kernel_unsigned[i])
+ {
+ clReleaseKernel(kernel_unsigned[i]);
+ }
+ if (kernel_signed[i])
+ {
+ clReleaseKernel(kernel_signed[i]);
+ }
+ }
+ for (int i = 0; i < num_kernel_types; i++)
+ {
+ if (program[i])
+ {
+ clReleaseProgram(program[i]);
+ }
+ }
+ if (cmd_queue1) clReleaseCommandQueue(cmd_queue1);
+ if (cmd_queue2) clReleaseCommandQueue(cmd_queue2);
+ if (context) clReleaseContext(context);
+
+ if (extensions) free(extensions);
+ if (devices) free(devices);
+
+ return err;
+}
diff --git a/test_conformance/vulkan/test_vulkan_platform_device_info.cpp b/test_conformance/vulkan/test_vulkan_platform_device_info.cpp
new file mode 100644
index 00000000..12f373b5
--- /dev/null
+++ b/test_conformance/vulkan/test_vulkan_platform_device_info.cpp
@@ -0,0 +1,146 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+#include "harness/testHarness.h"
+#include <iostream>
+#include <string>
+
+typedef struct
+{
+ cl_uint info;
+ const char *name;
+} _info;
+
+_info platform_info_table[] = {
+#define STRING(x) \
+ { \
+ x, #x \
+ }
+ STRING(CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR),
+ STRING(CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
+ STRING(CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR)
+#undef STRING
+};
+
+_info device_info_table[] = {
+#define STRING(x) \
+ { \
+ x, #x \
+ }
+ STRING(CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR),
+ STRING(CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
+ STRING(CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR)
+#undef STRING
+};
+
+int test_platform_info(cl_device_id deviceID, cl_context _context,
+ cl_command_queue _queue, int num_elements)
+{
+ cl_uint num_platforms;
+ cl_uint i, j;
+ cl_platform_id *platforms;
+ cl_int errNum;
+ cl_uint *handle_type;
+ size_t handle_type_size = 0;
+ cl_uint num_handles = 0;
+
+ // get total # of platforms
+ errNum = clGetPlatformIDs(0, NULL, &num_platforms);
+ test_error(errNum, "clGetPlatformIDs (getting count) failed");
+
+ platforms =
+ (cl_platform_id *)malloc(num_platforms * sizeof(cl_platform_id));
+ if (!platforms)
+ {
+ printf("error allocating memory\n");
+ exit(1);
+ }
+ log_info("%d platforms available\n", num_platforms);
+ errNum = clGetPlatformIDs(num_platforms, platforms, NULL);
+ test_error(errNum, "clGetPlatformIDs (getting IDs) failed");
+
+ for (i = 0; i < num_platforms; i++)
+ {
+ log_info("Platform%d (id %lu) info:\n", i, (unsigned long)platforms[i]);
+ for (j = 0;
+ j < sizeof(platform_info_table) / sizeof(platform_info_table[0]);
+ j++)
+ {
+ errNum =
+ clGetPlatformInfo(platforms[i], platform_info_table[j].info, 0,
+ NULL, &handle_type_size);
+ test_error(errNum, "clGetPlatformInfo failed");
+ num_handles = handle_type_size / sizeof(cl_uint);
+ handle_type = (cl_uint *)malloc(handle_type_size);
+ errNum =
+ clGetPlatformInfo(platforms[i], platform_info_table[j].info,
+ handle_type_size, handle_type, NULL);
+ test_error(errNum, "clGetPlatformInfo failed");
+
+ log_info("%s: \n", platform_info_table[j].name);
+ while (num_handles--)
+ {
+ log_info("%x \n", handle_type[num_handles]);
+ }
+ if (handle_type)
+ {
+ free(handle_type);
+ }
+ }
+ }
+ if (platforms)
+ {
+ free(platforms);
+ }
+ return TEST_PASS;
+}
+
+int test_device_info(cl_device_id deviceID, cl_context _context,
+ cl_command_queue _queue, int num_elements)
+{
+ cl_uint j;
+ cl_uint *handle_type;
+ size_t handle_type_size = 0;
+ cl_uint num_handles = 0;
+ cl_int errNum = CL_SUCCESS;
+ for (j = 0; j < sizeof(device_info_table) / sizeof(device_info_table[0]);
+ j++)
+ {
+ errNum = clGetDeviceInfo(deviceID, device_info_table[j].info, 0, NULL,
+ &handle_type_size);
+ test_error(errNum, "clGetDeviceInfo failed");
+
+ num_handles = handle_type_size / sizeof(cl_uint);
+ handle_type = (cl_uint *)malloc(handle_type_size);
+
+ errNum = clGetDeviceInfo(deviceID, device_info_table[j].info,
+ handle_type_size, handle_type, NULL);
+ test_error(errNum, "clGetDeviceInfo failed");
+
+ log_info("%s: \n", device_info_table[j].name);
+ while (num_handles--)
+ {
+ log_info("%x \n", handle_type[num_handles]);
+ }
+ if (handle_type)
+ {
+ free(handle_type);
+ }
+ }
+ return TEST_PASS;
+}
diff --git a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp
new file mode 100644
index 00000000..9d9a6601
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp
@@ -0,0 +1,853 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <CL/cl_ext.h>
+#include "opencl_vulkan_wrapper.hpp"
+#include "vulkan_wrapper.hpp"
+#include "harness/errorHelpers.h"
+#include "harness/deviceInfo.h"
+#include <assert.h>
+#include <iostream>
+#include <stdexcept>
+
+#define ASSERT(x) assert((x))
+#define GB(x) ((unsigned long long)(x) << 30)
+
+pfnclCreateSemaphoreWithPropertiesKHR clCreateSemaphoreWithPropertiesKHRptr;
+pfnclEnqueueWaitSemaphoresKHR clEnqueueWaitSemaphoresKHRptr;
+pfnclEnqueueSignalSemaphoresKHR clEnqueueSignalSemaphoresKHRptr;
+pfnclEnqueueAcquireExternalMemObjectsKHR
+ clEnqueueAcquireExternalMemObjectsKHRptr;
+pfnclEnqueueReleaseExternalMemObjectsKHR
+ clEnqueueReleaseExternalMemObjectsKHRptr;
+pfnclReleaseSemaphoreKHR clReleaseSemaphoreKHRptr;
+
+void init_cl_vk_ext(cl_platform_id opencl_platform)
+{
+ clEnqueueWaitSemaphoresKHRptr =
+ (pfnclEnqueueWaitSemaphoresKHR)clGetExtensionFunctionAddressForPlatform(
+ opencl_platform, "clEnqueueWaitSemaphoresKHR");
+ if (NULL == clEnqueueWaitSemaphoresKHRptr)
+ {
+ throw std::runtime_error("Failed to get the function pointer of "
+ "clEnqueueWaitSemaphoresKHRptr!");
+ }
+ clEnqueueSignalSemaphoresKHRptr = (pfnclEnqueueSignalSemaphoresKHR)
+ clGetExtensionFunctionAddressForPlatform(
+ opencl_platform, "clEnqueueSignalSemaphoresKHR");
+ if (NULL == clEnqueueSignalSemaphoresKHRptr)
+ {
+ throw std::runtime_error("Failed to get the function pointer of "
+ "clEnqueueSignalSemaphoresKHRptr!");
+ }
+ clReleaseSemaphoreKHRptr =
+ (pfnclReleaseSemaphoreKHR)clGetExtensionFunctionAddressForPlatform(
+ opencl_platform, "clReleaseSemaphoreKHR");
+ if (NULL == clReleaseSemaphoreKHRptr)
+ {
+ throw std::runtime_error("Failed to get the function pointer of "
+ "clReleaseSemaphoreKHRptr!");
+ }
+ clCreateSemaphoreWithPropertiesKHRptr =
+ (pfnclCreateSemaphoreWithPropertiesKHR)
+ clGetExtensionFunctionAddressForPlatform(
+ opencl_platform, "clCreateSemaphoreWithPropertiesKHR");
+ if (NULL == clCreateSemaphoreWithPropertiesKHRptr)
+ {
+ throw std::runtime_error("Failed to get the function pointer of "
+ "clCreateSemaphoreWithPropertiesKHRptr!");
+ }
+}
+
+cl_int setMaxImageDimensions(cl_device_id deviceID, size_t &max_width,
+ size_t &max_height)
+{
+ cl_int result = CL_SUCCESS;
+ cl_ulong val;
+ size_t paramSize;
+
+ result = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE,
+ sizeof(cl_ulong), &val, &paramSize);
+
+ if (result != CL_SUCCESS)
+ {
+ return result;
+ }
+
+ if (val < GB(4))
+ {
+ max_width = 256;
+ max_height = 256;
+ }
+ else if (val < GB(8))
+ {
+ max_width = 512;
+ max_height = 256;
+ }
+ else
+ {
+ max_width = 1024;
+ max_height = 512;
+ }
+
+ return result;
+}
+
+cl_int getCLFormatFromVkFormat(VkFormat vkFormat,
+ cl_image_format *clImageFormat)
+{
+ cl_int result = CL_SUCCESS;
+ switch (vkFormat)
+ {
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ clImageFormat->image_channel_order = CL_RGBA;
+ clImageFormat->image_channel_data_type = CL_UNORM_INT8;
+ break;
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ clImageFormat->image_channel_order = CL_BGRA;
+ clImageFormat->image_channel_data_type = CL_UNORM_INT8;
+ break;
+ case VK_FORMAT_R16G16B16A16_UNORM:
+ clImageFormat->image_channel_order = CL_RGBA;
+ clImageFormat->image_channel_data_type = CL_UNORM_INT16;
+ break;
+ case VK_FORMAT_R8G8B8A8_SINT:
+ clImageFormat->image_channel_order = CL_RGBA;
+ clImageFormat->image_channel_data_type = CL_SIGNED_INT8;
+ break;
+ case VK_FORMAT_R16G16B16A16_SINT:
+ clImageFormat->image_channel_order = CL_RGBA;
+ clImageFormat->image_channel_data_type = CL_SIGNED_INT16;
+ break;
+ case VK_FORMAT_R32G32B32A32_SINT:
+ clImageFormat->image_channel_order = CL_RGBA;
+ clImageFormat->image_channel_data_type = CL_SIGNED_INT32;
+ break;
+ case VK_FORMAT_R8G8B8A8_UINT:
+ clImageFormat->image_channel_order = CL_RGBA;
+ clImageFormat->image_channel_data_type = CL_UNSIGNED_INT8;
+ break;
+ case VK_FORMAT_R16G16B16A16_UINT:
+ clImageFormat->image_channel_order = CL_RGBA;
+ clImageFormat->image_channel_data_type = CL_UNSIGNED_INT16;
+ break;
+ case VK_FORMAT_R32G32B32A32_UINT:
+ clImageFormat->image_channel_order = CL_RGBA;
+ clImageFormat->image_channel_data_type = CL_UNSIGNED_INT32;
+ break;
+ case VK_FORMAT_R16G16B16A16_SFLOAT:
+ clImageFormat->image_channel_order = CL_RGBA;
+ clImageFormat->image_channel_data_type = CL_HALF_FLOAT;
+ break;
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ clImageFormat->image_channel_order = CL_RGBA;
+ clImageFormat->image_channel_data_type = CL_FLOAT;
+ break;
+ case VK_FORMAT_R8_SNORM:
+ clImageFormat->image_channel_order = CL_R;
+ clImageFormat->image_channel_data_type = CL_SNORM_INT8;
+ break;
+ case VK_FORMAT_R16_SNORM:
+ clImageFormat->image_channel_order = CL_R;
+ clImageFormat->image_channel_data_type = CL_SNORM_INT16;
+ break;
+ case VK_FORMAT_R8_UNORM:
+ clImageFormat->image_channel_order = CL_R;
+ clImageFormat->image_channel_data_type = CL_UNORM_INT8;
+ break;
+ case VK_FORMAT_R16_UNORM:
+ clImageFormat->image_channel_order = CL_R;
+ clImageFormat->image_channel_data_type = CL_UNORM_INT16;
+ break;
+ case VK_FORMAT_R8_SINT:
+ clImageFormat->image_channel_order = CL_R;
+ clImageFormat->image_channel_data_type = CL_SIGNED_INT8;
+ break;
+ case VK_FORMAT_R16_SINT:
+ clImageFormat->image_channel_order = CL_R;
+ clImageFormat->image_channel_data_type = CL_SIGNED_INT16;
+ break;
+ case VK_FORMAT_R32_SINT:
+ clImageFormat->image_channel_order = CL_R;
+ clImageFormat->image_channel_data_type = CL_SIGNED_INT32;
+ break;
+ case VK_FORMAT_R8_UINT:
+ clImageFormat->image_channel_order = CL_R;
+ clImageFormat->image_channel_data_type = CL_UNSIGNED_INT8;
+ break;
+ case VK_FORMAT_R16_UINT:
+ clImageFormat->image_channel_order = CL_R;
+ clImageFormat->image_channel_data_type = CL_UNSIGNED_INT16;
+ break;
+ case VK_FORMAT_R32_UINT:
+ clImageFormat->image_channel_order = CL_R;
+ clImageFormat->image_channel_data_type = CL_UNSIGNED_INT32;
+ break;
+ case VK_FORMAT_R16_SFLOAT:
+ clImageFormat->image_channel_order = CL_R;
+ clImageFormat->image_channel_data_type = CL_HALF_FLOAT;
+ break;
+ case VK_FORMAT_R32_SFLOAT:
+ clImageFormat->image_channel_order = CL_R;
+ clImageFormat->image_channel_data_type = CL_FLOAT;
+ break;
+ case VK_FORMAT_R8G8_SNORM:
+ clImageFormat->image_channel_order = CL_RG;
+ clImageFormat->image_channel_data_type = CL_SNORM_INT8;
+ break;
+ case VK_FORMAT_R16G16_SNORM:
+ clImageFormat->image_channel_order = CL_RG;
+ clImageFormat->image_channel_data_type = CL_SNORM_INT16;
+ break;
+ case VK_FORMAT_R8G8_UNORM:
+ clImageFormat->image_channel_order = CL_RG;
+ clImageFormat->image_channel_data_type = CL_UNORM_INT8;
+ break;
+ case VK_FORMAT_R16G16_UNORM:
+ clImageFormat->image_channel_order = CL_RG;
+ clImageFormat->image_channel_data_type = CL_UNORM_INT16;
+ break;
+ case VK_FORMAT_R8G8_SINT:
+ clImageFormat->image_channel_order = CL_RG;
+ clImageFormat->image_channel_data_type = CL_SIGNED_INT8;
+ break;
+ case VK_FORMAT_R16G16_SINT:
+ clImageFormat->image_channel_order = CL_RG;
+ clImageFormat->image_channel_data_type = CL_SIGNED_INT16;
+ break;
+ case VK_FORMAT_R32G32_SINT:
+ clImageFormat->image_channel_order = CL_RG;
+ clImageFormat->image_channel_data_type = CL_SIGNED_INT32;
+ break;
+ case VK_FORMAT_R8G8_UINT:
+ clImageFormat->image_channel_order = CL_RG;
+ clImageFormat->image_channel_data_type = CL_UNSIGNED_INT8;
+ break;
+ case VK_FORMAT_R16G16_UINT:
+ clImageFormat->image_channel_order = CL_RG;
+ clImageFormat->image_channel_data_type = CL_UNSIGNED_INT16;
+ break;
+ case VK_FORMAT_R32G32_UINT:
+ clImageFormat->image_channel_order = CL_RG;
+ clImageFormat->image_channel_data_type = CL_UNSIGNED_INT32;
+ break;
+ case VK_FORMAT_R16G16_SFLOAT:
+ clImageFormat->image_channel_order = CL_RG;
+ clImageFormat->image_channel_data_type = CL_HALF_FLOAT;
+ break;
+ case VK_FORMAT_R32G32_SFLOAT:
+ clImageFormat->image_channel_order = CL_RG;
+ clImageFormat->image_channel_data_type = CL_FLOAT;
+ break;
+ case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ clImageFormat->image_channel_order = CL_RGBA;
+ clImageFormat->image_channel_data_type = CL_UNORM_SHORT_565;
+ break;
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ clImageFormat->image_channel_order = CL_RGBA;
+ clImageFormat->image_channel_data_type = CL_UNORM_SHORT_555;
+ break;
+ case VK_FORMAT_R8G8B8A8_SNORM:
+ clImageFormat->image_channel_order = CL_RGBA;
+ clImageFormat->image_channel_data_type = CL_SNORM_INT8;
+ break;
+ case VK_FORMAT_R16G16B16A16_SNORM:
+ clImageFormat->image_channel_order = CL_RGBA;
+ clImageFormat->image_channel_data_type = CL_SNORM_INT16;
+ break;
+ case VK_FORMAT_B8G8R8A8_SNORM:
+ clImageFormat->image_channel_order = CL_BGRA;
+ clImageFormat->image_channel_data_type = CL_SNORM_INT8;
+ break;
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ clImageFormat->image_channel_order = CL_BGRA;
+ clImageFormat->image_channel_data_type = CL_UNORM_SHORT_565;
+ break;
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
+ clImageFormat->image_channel_order = CL_BGRA;
+ clImageFormat->image_channel_data_type = CL_UNORM_SHORT_555;
+ break;
+ case VK_FORMAT_B8G8R8A8_SINT:
+ clImageFormat->image_channel_order = CL_BGRA;
+ clImageFormat->image_channel_data_type = CL_SIGNED_INT8;
+ break;
+ case VK_FORMAT_B8G8R8A8_UINT:
+ clImageFormat->image_channel_order = CL_BGRA;
+ clImageFormat->image_channel_data_type = CL_UNSIGNED_INT8;
+ break;
+ case VK_FORMAT_A8B8G8R8_SNORM_PACK32: result = CL_INVALID_VALUE; break;
+ case VK_FORMAT_A8B8G8R8_UNORM_PACK32: result = CL_INVALID_VALUE; break;
+ case VK_FORMAT_A8B8G8R8_SINT_PACK32: result = CL_INVALID_VALUE; break;
+ case VK_FORMAT_A8B8G8R8_UINT_PACK32: result = CL_INVALID_VALUE; break;
+ default:
+ log_error("Unsupported format\n");
+ ASSERT(0);
+ break;
+ }
+ return result;
+}
+
+cl_mem_object_type getImageTypeFromVk(VkImageType imageType)
+{
+ cl_mem_object_type cl_image_type = CL_INVALID_VALUE;
+ switch (imageType)
+ {
+ case VK_IMAGE_TYPE_1D: cl_image_type = CL_MEM_OBJECT_IMAGE1D; break;
+ case VK_IMAGE_TYPE_2D: cl_image_type = CL_MEM_OBJECT_IMAGE2D; break;
+ case VK_IMAGE_TYPE_3D: cl_image_type = CL_MEM_OBJECT_IMAGE3D; break;
+ default: break;
+ }
+ return cl_image_type;
+}
+
+size_t GetElementNBytes(const cl_image_format *format)
+{
+ size_t result;
+
+ switch (format->image_channel_order)
+ {
+ case CL_R:
+ case CL_A:
+ case CL_INTENSITY:
+ case CL_LUMINANCE:
+ case CL_DEPTH: result = 1; break;
+ case CL_RG:
+ case CL_RA: result = 2; break;
+ case CL_RGB: result = 3; break;
+ case CL_RGBA:
+ case CL_ARGB:
+ case CL_BGRA:
+ case CL_sRGBA: result = 4; break;
+ default: result = 0; break;
+ }
+
+ switch (format->image_channel_data_type)
+ {
+ case CL_SNORM_INT8:
+ case CL_UNORM_INT8:
+ case CL_SIGNED_INT8:
+ case CL_UNSIGNED_INT8:
+ // result *= 1;
+ break;
+
+ case CL_SNORM_INT16:
+ case CL_UNORM_INT16:
+ case CL_SIGNED_INT16:
+ case CL_UNSIGNED_INT16:
+ case CL_HALF_FLOAT: result *= 2; break;
+
+ case CL_SIGNED_INT32:
+ case CL_UNSIGNED_INT32:
+ case CL_FLOAT: result *= 4; break;
+
+ case CL_UNORM_SHORT_565:
+ case CL_UNORM_SHORT_555:
+ if (result == 3)
+ {
+ result = 2;
+ }
+ else
+ {
+ result = 0;
+ }
+ break;
+
+ case CL_UNORM_INT_101010:
+ if (result == 3)
+ {
+ result = 4;
+ }
+ else
+ {
+ result = 0;
+ }
+ break;
+
+ default: result = 0; break;
+ }
+
+ return result;
+}
+
+cl_int get2DImageDimensions(const VkImageCreateInfo *VulkanImageCreateInfo,
+ cl_image_format *img_fmt, size_t totalImageSize,
+ size_t &width, size_t &height)
+{
+ cl_int result = CL_SUCCESS;
+ if (totalImageSize == 0)
+ {
+ result = CL_INVALID_VALUE;
+ }
+ size_t element_size = GetElementNBytes(img_fmt);
+ size_t row_pitch = element_size * VulkanImageCreateInfo->extent.width;
+ row_pitch = row_pitch % 64 == 0 ? row_pitch : ((row_pitch / 64) + 1) * 64;
+
+ width = row_pitch / element_size;
+ height = totalImageSize / row_pitch;
+
+ return result;
+}
+
+cl_int
+getCLImageInfoFromVkImageInfo(const VkImageCreateInfo *VulkanImageCreateInfo,
+ size_t totalImageSize, cl_image_format *img_fmt,
+ cl_image_desc *img_desc)
+{
+ cl_int result = CL_SUCCESS;
+
+ cl_image_format clImgFormat = { 0 };
+ result =
+ getCLFormatFromVkFormat(VulkanImageCreateInfo->format, &clImgFormat);
+ if (CL_SUCCESS != result)
+ {
+ return result;
+ }
+ memcpy(img_fmt, &clImgFormat, sizeof(cl_image_format));
+
+ img_desc->image_type = getImageTypeFromVk(VulkanImageCreateInfo->imageType);
+ if (CL_INVALID_VALUE == img_desc->image_type)
+ {
+ return CL_INVALID_VALUE;
+ }
+
+ result =
+ get2DImageDimensions(VulkanImageCreateInfo, img_fmt, totalImageSize,
+ img_desc->image_width, img_desc->image_height);
+ if (CL_SUCCESS != result)
+ {
+ throw std::runtime_error("get2DImageDimensions failed!!!");
+ }
+
+ img_desc->image_depth = 0; // VulkanImageCreateInfo->extent.depth;
+ img_desc->image_array_size = 0;
+ img_desc->image_row_pitch = 0; // Row pitch set to zero as host_ptr is NULL
+ img_desc->image_slice_pitch =
+ img_desc->image_row_pitch * img_desc->image_height;
+ img_desc->num_mip_levels = 1;
+ img_desc->num_samples = 0;
+ img_desc->buffer = NULL;
+
+ return result;
+}
+
+cl_int check_external_memory_handle_type(
+ cl_device_id deviceID,
+ cl_external_memory_handle_type_khr requiredHandleType)
+{
+ unsigned int i;
+ cl_external_memory_handle_type_khr *handle_type;
+ size_t handle_type_size = 0;
+
+ cl_int errNum = CL_SUCCESS;
+
+ errNum = clGetDeviceInfo(deviceID,
+ CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR,
+ 0, NULL, &handle_type_size);
+ handle_type =
+ (cl_external_memory_handle_type_khr *)malloc(handle_type_size);
+
+ errNum = clGetDeviceInfo(deviceID,
+ CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR,
+ handle_type_size, handle_type, NULL);
+
+ test_error(
+ errNum,
+ "Unable to query CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR \n");
+
+ for (i = 0; i < handle_type_size; i++)
+ {
+ if (requiredHandleType == handle_type[i])
+ {
+ return CL_SUCCESS;
+ }
+ }
+ log_error("cl_khr_external_memory extension is missing support for %d\n",
+ requiredHandleType);
+
+ return CL_INVALID_VALUE;
+}
+
+cl_int check_external_semaphore_handle_type(
+ cl_device_id deviceID,
+ cl_external_semaphore_handle_type_khr requiredHandleType)
+{
+ unsigned int i;
+ cl_external_semaphore_handle_type_khr *handle_type;
+ size_t handle_type_size = 0;
+ cl_int errNum = CL_SUCCESS;
+
+ errNum =
+ clGetDeviceInfo(deviceID, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
+ 0, NULL, &handle_type_size);
+ handle_type =
+ (cl_external_semaphore_handle_type_khr *)malloc(handle_type_size);
+
+ errNum =
+ clGetDeviceInfo(deviceID, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
+ handle_type_size, handle_type, NULL);
+
+ test_error(
+ errNum,
+ "Unable to query CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR \n");
+
+ for (i = 0; i < handle_type_size; i++)
+ {
+ if (requiredHandleType == handle_type[i])
+ {
+ return CL_SUCCESS;
+ }
+ }
+ log_error("cl_khr_external_semaphore extension is missing support for %d\n",
+ requiredHandleType);
+
+ return CL_INVALID_VALUE;
+}
+clExternalMemory::clExternalMemory() {}
+
+clExternalMemory::clExternalMemory(const clExternalMemory &externalMemory)
+ : m_externalMemory(externalMemory.m_externalMemory)
+{}
+
+clExternalMemory::clExternalMemory(
+ const VulkanDeviceMemory *deviceMemory,
+ VulkanExternalMemoryHandleType externalMemoryHandleType, uint64_t offset,
+ uint64_t size, cl_context context, cl_device_id deviceId)
+{
+ int err = 0;
+ m_externalMemory = NULL;
+ cl_device_id devList[] = { deviceId, NULL };
+ std::vector<cl_mem_properties> extMemProperties;
+#ifdef _WIN32
+ if (!is_extension_available(devList[0], "cl_khr_external_memory_win32"))
+ {
+ throw std::runtime_error(
+ "Device does not support cl_khr_external_memory_win32 extension\n");
+ }
+#else
+ if (!is_extension_available(devList[0], "cl_khr_external_memory_opaque_fd"))
+ {
+ throw std::runtime_error(
+ "Device does not support cl_khr_external_memory_opaque_fd "
+ "extension \n");
+ }
+#endif
+
+ switch (externalMemoryHandleType)
+ {
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD:
+#ifdef _WIN32
+ ASSERT(0);
+#endif
+ log_info("Opaque file descriptors are not supported on Windows\n");
+ fd = (int)deviceMemory->getHandle(externalMemoryHandleType);
+ err = check_external_memory_handle_type(
+ devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
+ extMemProperties.push_back(
+ (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
+ extMemProperties.push_back((cl_mem_properties)fd);
+ break;
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT:
+#ifndef _WIN32
+ ASSERT(0);
+#else
+ log_info(" Opaque NT handles are only supported on Windows\n");
+ handle = deviceMemory->getHandle(externalMemoryHandleType);
+ err = check_external_memory_handle_type(
+ devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR);
+ extMemProperties.push_back(
+ (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR);
+ extMemProperties.push_back((cl_mem_properties)handle);
+#endif
+ break;
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+#ifndef _WIN32
+ ASSERT(0);
+#else
+ log_info("Opaque D3DKMT handles are only supported on Windows\n");
+ handle = deviceMemory->getHandle(externalMemoryHandleType);
+ err = check_external_memory_handle_type(
+ devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR);
+ extMemProperties.push_back(
+ (cl_mem_properties)
+ CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR);
+ extMemProperties.push_back((cl_mem_properties)handle);
+#endif
+ break;
+ default:
+ ASSERT(0);
+ log_error("Unsupported external memory handle type\n");
+ break;
+ }
+ if (CL_SUCCESS != err)
+ {
+ throw std::runtime_error("Unsupported external memory type\n ");
+ }
+
+ extMemProperties.push_back((cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR);
+ extMemProperties.push_back((cl_mem_properties)devList[0]);
+ extMemProperties.push_back(
+ (cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR);
+ extMemProperties.push_back(0);
+
+ m_externalMemory = clCreateBufferWithProperties(
+ context, extMemProperties.data(), 1, size, NULL, &err);
+ if (CL_SUCCESS != err)
+ {
+ log_error("clCreateBufferWithProperties failed with %d\n", err);
+ throw std::runtime_error("clCreateBufferWithProperties failed ");
+ }
+}
+clExternalMemoryImage::clExternalMemoryImage(
+ const VulkanDeviceMemory &deviceMemory,
+ VulkanExternalMemoryHandleType externalMemoryHandleType, cl_context context,
+ size_t totalImageMemSize, size_t imageWidth, size_t imageHeight,
+ size_t totalSize, const VulkanImage2D &image2D, cl_device_id deviceId)
+{
+ cl_int errcode_ret = 0;
+ std::vector<cl_mem_properties> extMemProperties1;
+ cl_device_id devList[] = { deviceId, NULL };
+
+#ifdef _WIN32
+ if (!is_extension_available(devList[0], "cl_khr_external_memory_win32"))
+ {
+ throw std::runtime_error("Device does not support "
+ "cl_khr_external_memory_win32 extension \n");
+ }
+#elif !defined(__APPLE__)
+ if (!is_extension_available(devList[0], "cl_khr_external_memory_opaque_fd"))
+ {
+ throw std::runtime_error(
+ "Device does not support cl_khr_external_memory_opaque_fd "
+ "extension\n");
+ }
+#endif
+
+ switch (externalMemoryHandleType)
+ {
+#ifdef _WIN32
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT:
+ log_info("Opaque NT handles are only supported on Windows\n");
+ handle = deviceMemory.getHandle(externalMemoryHandleType);
+ errcode_ret = check_external_memory_handle_type(
+ devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR);
+ extMemProperties1.push_back(
+ (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR);
+ extMemProperties1.push_back((cl_mem_properties)handle);
+ break;
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+ log_info("Opaque D3DKMT handles are only supported on Windows\n");
+ handle = deviceMemory.getHandle(externalMemoryHandleType);
+ errcode_ret = check_external_memory_handle_type(
+ devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR);
+ extMemProperties1.push_back(
+ (cl_mem_properties)
+ CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR);
+ extMemProperties1.push_back((cl_mem_properties)handle);
+ break;
+#elif !defined(__APPLE__)
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD:
+ log_info(" Opaque file descriptors are not supported on Windows\n");
+ fd = (int)deviceMemory.getHandle(externalMemoryHandleType);
+ errcode_ret = check_external_memory_handle_type(
+ devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
+ extMemProperties1.push_back(
+ (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
+ extMemProperties1.push_back((cl_mem_properties)fd);
+ break;
+#endif
+ default:
+ ASSERT(0);
+ log_error("Unsupported external memory handle type\n");
+ break;
+ }
+ if (CL_SUCCESS != errcode_ret)
+ {
+ throw std::runtime_error("Unsupported external memory type\n ");
+ }
+ // Set cl_image_desc
+ size_t clImageFormatSize;
+ cl_image_desc image_desc;
+ memset(&image_desc, 0x0, sizeof(cl_image_desc));
+ cl_image_format img_format = { 0 };
+ const VkImageCreateInfo VulkanImageCreateInfo =
+ image2D.getVkImageCreateInfo();
+
+ errcode_ret = getCLImageInfoFromVkImageInfo(
+ &VulkanImageCreateInfo, image2D.getSize(), &img_format, &image_desc);
+ if (CL_SUCCESS != errcode_ret)
+ {
+ throw std::runtime_error("getCLImageInfoFromVkImageInfo failed!!!");
+ }
+
+ extMemProperties1.push_back((cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR);
+ extMemProperties1.push_back((cl_mem_properties)devList[0]);
+ extMemProperties1.push_back(
+ (cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR);
+ extMemProperties1.push_back(0);
+ m_externalMemory = clCreateImageWithProperties(
+ context, extMemProperties1.data(), CL_MEM_READ_WRITE, &img_format,
+ &image_desc, NULL, &errcode_ret);
+ if (CL_SUCCESS != errcode_ret)
+ {
+ throw std::runtime_error("clCreateImageWithProperties failed!!!");
+ }
+}
+
+cl_mem clExternalMemory::getExternalMemoryBuffer() { return m_externalMemory; }
+
+cl_mem clExternalMemoryImage::getExternalMemoryImage()
+{
+ return m_externalMemory;
+}
+
+clExternalMemoryImage::~clExternalMemoryImage()
+{
+ clReleaseMemObject(m_externalMemory);
+}
+
+clExternalMemory::~clExternalMemory() { clReleaseMemObject(m_externalMemory); }
+
+clExternalMemoryImage::clExternalMemoryImage() {}
+
+
+//////////////////////////////////////////
+// clExternalSemaphore implementation //
+//////////////////////////////////////////
+
+clExternalSemaphore::clExternalSemaphore(
+ const clExternalSemaphore &externalSemaphore)
+ : m_externalSemaphore(externalSemaphore.m_externalSemaphore)
+{}
+
+clExternalSemaphore::clExternalSemaphore(
+ const VulkanSemaphore &semaphore, cl_context context,
+ VulkanExternalSemaphoreHandleType externalSemaphoreHandleType,
+ cl_device_id deviceId)
+{
+
+ cl_int err = 0;
+ cl_device_id devList[] = { deviceId, NULL };
+
+#ifdef _WIN32
+ if (!is_extension_available(devList[0], "cl_khr_external_semaphore_win32"))
+ {
+ throw std::runtime_error("Device does not support "
+ "cl_khr_external_semaphore_win32 extension\n");
+ }
+#elif !defined(__APPLE__)
+ if (!is_extension_available(devList[0],
+ "cl_khr_external_semaphore_opaque_fd"))
+ {
+ throw std::runtime_error(
+ "Device does not support cl_khr_external_semaphore_opaque_fd "
+ "extension \n");
+ }
+#endif
+
+ std::vector<cl_semaphore_properties_khr> sema_props{
+ (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR,
+ (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR,
+ };
+ switch (externalSemaphoreHandleType)
+ {
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD:
+#ifdef _WIN32
+ ASSERT(0);
+#else
+ log_info(" Opaque file descriptors are not supported on Windows\n");
+ fd = (int)semaphore.getHandle(externalSemaphoreHandleType);
+ err = check_external_semaphore_handle_type(
+ devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR);
+ sema_props.push_back(
+ (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR);
+ sema_props.push_back((cl_semaphore_properties_khr)fd);
+#endif
+ break;
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT:
+#ifndef _WIN32
+ ASSERT(0);
+#else
+ log_info(" Opaque NT handles are only supported on Windows\n");
+ handle = semaphore.getName().size()
+ ? NULL
+ : semaphore.getHandle(externalSemaphoreHandleType);
+ err = check_external_semaphore_handle_type(
+ devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR);
+ sema_props.push_back((cl_semaphore_properties_khr)
+ CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR);
+ sema_props.push_back((cl_semaphore_properties_khr)handle);
+#endif
+ break;
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+#ifndef _WIN32
+ ASSERT(0);
+#else
+ log_info(" Opaque D3DKMT handles are only supported on Windows\n");
+ handle = semaphore.getHandle(externalSemaphoreHandleType);
+ err = check_external_semaphore_handle_type(
+ devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR);
+ sema_props.push_back((cl_semaphore_properties_khr)
+ CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR);
+ sema_props.push_back((cl_semaphore_properties_khr)handle);
+#endif
+ break;
+ default:
+ ASSERT(0);
+ log_error("Unsupported external memory handle type\n");
+ break;
+ }
+ if (CL_SUCCESS != err)
+ {
+ throw std::runtime_error(
+ "Unsupported external sempahore handle type\n ");
+ }
+
+ sema_props.push_back(
+ (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_KHR);
+ sema_props.push_back((cl_semaphore_properties_khr)devList[0]);
+ sema_props.push_back(
+ (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_END_KHR);
+ sema_props.push_back(0);
+ m_externalSemaphore =
+ clCreateSemaphoreWithPropertiesKHRptr(context, sema_props.data(), &err);
+ if (CL_SUCCESS != err)
+ {
+ log_error("clCreateSemaphoreWithPropertiesKHRptr failed with %d\n",
+ err);
+ throw std::runtime_error(
+ "clCreateSemaphoreWithPropertiesKHRptr failed! ");
+ }
+}
+
+clExternalSemaphore::~clExternalSemaphore()
+{
+ cl_int err = clReleaseSemaphoreKHRptr(m_externalSemaphore);
+ if (err != CL_SUCCESS)
+ {
+ throw std::runtime_error("clReleaseSemaphoreKHR failed!");
+ }
+}
+
+void clExternalSemaphore::signal(cl_command_queue cmd_queue)
+{
+ clEnqueueSignalSemaphoresKHRptr(cmd_queue, 1, &m_externalSemaphore, NULL, 0,
+ NULL, NULL);
+}
+
+void clExternalSemaphore::wait(cl_command_queue cmd_queue)
+{
+ clEnqueueWaitSemaphoresKHRptr(cmd_queue, 1, &m_externalSemaphore, NULL, 0,
+ NULL, NULL);
+}
diff --git a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp
new file mode 100644
index 00000000..d9f8dccb
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp
@@ -0,0 +1,131 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _opencl_vulkan_wrapper_hpp_
+#define _opencl_vulkan_wrapper_hpp_
+
+#include "vulkan_wrapper.hpp"
+
+#if !defined(__APPLE__)
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+#else
+#include <OpenCL/cl.h>
+#include <OpenCL/cl_ext.h>
+#endif
+
+typedef cl_semaphore_khr (*pfnclCreateSemaphoreWithPropertiesKHR)(
+ cl_context context, cl_semaphore_properties_khr *sema_props,
+ cl_int *errcode_ret);
+typedef cl_int (*pfnclEnqueueWaitSemaphoresKHR)(
+ cl_command_queue command_queue, cl_uint num_semaphores,
+ const cl_semaphore_khr *sema_list,
+ const cl_semaphore_payload_khr *sema_payload_list,
+ cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
+ cl_event *event);
+typedef cl_int (*pfnclEnqueueSignalSemaphoresKHR)(
+ cl_command_queue command_queue, cl_uint num_semaphores,
+ const cl_semaphore_khr *sema_list,
+ const cl_semaphore_payload_khr *sema_payload_list,
+ cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
+ cl_event *event);
+typedef cl_int (*pfnclEnqueueAcquireExternalMemObjectsKHR)(
+ cl_command_queue command_queue, cl_uint num_mem_objects,
+ const cl_mem *mem_objects, cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list, cl_event *event);
+typedef cl_int (*pfnclEnqueueReleaseExternalMemObjectsKHR)(
+ cl_command_queue command_queue, cl_uint num_mem_objects,
+ const cl_mem *mem_objects, cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list, cl_event *event);
+typedef cl_int (*pfnclReleaseSemaphoreKHR)(cl_semaphore_khr sema_object);
+
+extern pfnclCreateSemaphoreWithPropertiesKHR
+ clCreateSemaphoreWithPropertiesKHRptr;
+extern pfnclEnqueueWaitSemaphoresKHR clEnqueueWaitSemaphoresKHRptr;
+extern pfnclEnqueueSignalSemaphoresKHR clEnqueueSignalSemaphoresKHRptr;
+extern pfnclEnqueueAcquireExternalMemObjectsKHR
+ clEnqueueAcquireExternalMemObjectsKHRptr;
+extern pfnclEnqueueReleaseExternalMemObjectsKHR
+ clEnqueueReleaseExternalMemObjectsKHRptr;
+extern pfnclReleaseSemaphoreKHR clReleaseSemaphoreKHRptr;
+
+cl_int getCLImageInfoFromVkImageInfo(const VkImageCreateInfo *, size_t,
+ cl_image_format *, cl_image_desc *);
+cl_int check_external_memory_handle_type(
+ cl_device_id deviceID,
+ cl_external_memory_handle_type_khr requiredHandleType);
+cl_int check_external_semaphore_handle_type(
+ cl_device_id deviceID,
+ cl_external_semaphore_handle_type_khr requiredHandleType);
+cl_int setMaxImageDimensions(cl_device_id deviceID, size_t &width,
+ size_t &height);
+
+class clExternalMemory {
+protected:
+ cl_mem m_externalMemory;
+ int fd;
+ void *handle;
+ clExternalMemory(const clExternalMemory &externalMemory);
+
+public:
+ clExternalMemory();
+ clExternalMemory(const VulkanDeviceMemory *deviceMemory,
+ VulkanExternalMemoryHandleType externalMemoryHandleType,
+ uint64_t offset, uint64_t size, cl_context context,
+ cl_device_id deviceId);
+
+ virtual ~clExternalMemory();
+ cl_mem getExternalMemoryBuffer();
+};
+class clExternalMemoryImage {
+protected:
+ cl_mem m_externalMemory;
+ int fd;
+ void *handle;
+ cl_command_queue cmd_queue;
+ clExternalMemoryImage();
+
+public:
+ clExternalMemoryImage(
+ const VulkanDeviceMemory &deviceMemory,
+ VulkanExternalMemoryHandleType externalMemoryHandleType,
+ cl_context context, size_t totalImageMemSize, size_t imageWidth,
+ size_t imageHeight, size_t totalSize, const VulkanImage2D &image2D,
+ cl_device_id deviceId);
+ virtual ~clExternalMemoryImage();
+ cl_mem getExternalMemoryImage();
+};
+
+class clExternalSemaphore {
+protected:
+ cl_semaphore_khr m_externalSemaphore;
+ int fd;
+ void *handle;
+ clExternalSemaphore(const clExternalSemaphore &externalSemaphore);
+
+public:
+ clExternalSemaphore(
+ const VulkanSemaphore &deviceSemaphore, cl_context context,
+ VulkanExternalSemaphoreHandleType externalSemaphoreHandleType,
+ cl_device_id deviceId);
+ virtual ~clExternalSemaphore();
+ void signal(cl_command_queue command_queue);
+ void wait(cl_command_queue command_queue);
+ // operator openclExternalSemaphore_t() const;
+};
+
+extern void init_cl_vk_ext(cl_platform_id);
+
+#endif // _opencl_vulkan_wrapper_hpp_
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_api_list.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_api_list.hpp
new file mode 100644
index 00000000..017aefd2
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_api_list.hpp
@@ -0,0 +1,195 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _vulkan_api_list_hpp_
+#define _vulkan_api_list_hpp_
+
+#define VK_FUNC_LIST \
+ VK_FUNC_DECL(vkEnumerateInstanceVersion) \
+ VK_FUNC_DECL(vkEnumerateInstanceExtensionProperties) \
+ VK_FUNC_DECL(vkEnumerateInstanceLayerProperties) \
+ VK_FUNC_DECL(vkCreateInstance) \
+ VK_FUNC_DECL(vkGetInstanceProcAddr) \
+ VK_FUNC_DECL(vkGetDeviceProcAddr) \
+ VK_FUNC_DECL(vkEnumeratePhysicalDevices) \
+ VK_FUNC_DECL(vkGetPhysicalDeviceProperties) \
+ VK_FUNC_DECL(vkCreateDevice) \
+ VK_FUNC_DECL(vkDestroyDevice) \
+ VK_FUNC_DECL(vkGetDeviceQueue) \
+ VK_FUNC_DECL(vkQueueWaitIdle) \
+ VK_FUNC_DECL(vkCreateDescriptorSetLayout) \
+ VK_FUNC_DECL(vkCreatePipelineLayout) \
+ VK_FUNC_DECL(vkCreateShaderModule) \
+ VK_FUNC_DECL(vkCreateComputePipelines) \
+ VK_FUNC_DECL(vkCreateDescriptorPool) \
+ VK_FUNC_DECL(vkAllocateDescriptorSets) \
+ VK_FUNC_DECL(vkFreeDescriptorSets) \
+ VK_FUNC_DECL(vkAllocateCommandBuffers) \
+ VK_FUNC_DECL(vkBeginCommandBuffer) \
+ VK_FUNC_DECL(vkCmdBindPipeline) \
+ VK_FUNC_DECL(vkCmdBindDescriptorSets) \
+ VK_FUNC_DECL(vkCmdPipelineBarrier) \
+ VK_FUNC_DECL(vkCmdDispatch) \
+ VK_FUNC_DECL(vkCmdFillBuffer) \
+ VK_FUNC_DECL(vkCmdCopyBuffer) \
+ VK_FUNC_DECL(vkCmdUpdateBuffer) \
+ VK_FUNC_DECL(vkCmdCopyBufferToImage) \
+ VK_FUNC_DECL(vkCmdCopyImageToBuffer) \
+ VK_FUNC_DECL(vkEndCommandBuffer) \
+ VK_FUNC_DECL(vkCreateBuffer) \
+ VK_FUNC_DECL(vkCreateImageView) \
+ VK_FUNC_DECL(vkAllocateMemory) \
+ VK_FUNC_DECL(vkMapMemory) \
+ VK_FUNC_DECL(vkBindBufferMemory) \
+ VK_FUNC_DECL(vkBindImageMemory) \
+ VK_FUNC_DECL(vkUnmapMemory) \
+ VK_FUNC_DECL(vkFreeMemory) \
+ VK_FUNC_DECL(vkCreateCommandPool) \
+ VK_FUNC_DECL(vkResetCommandPool) \
+ VK_FUNC_DECL(vkDestroyCommandPool) \
+ VK_FUNC_DECL(vkResetCommandBuffer) \
+ VK_FUNC_DECL(vkFreeCommandBuffers) \
+ VK_FUNC_DECL(vkQueueSubmit) \
+ VK_FUNC_DECL(vkCmdExecuteCommands) \
+ VK_FUNC_DECL(vkCreateFence) \
+ VK_FUNC_DECL(vkDestroyFence) \
+ VK_FUNC_DECL(vkGetFenceStatus) \
+ VK_FUNC_DECL(vkResetFences) \
+ VK_FUNC_DECL(vkWaitForFences) \
+ VK_FUNC_DECL(vkCreateSemaphore) \
+ VK_FUNC_DECL(vkDestroySemaphore) \
+ VK_FUNC_DECL(vkCreateEvent) \
+ VK_FUNC_DECL(vkDestroyImageView) \
+ VK_FUNC_DECL(vkCreateImage) \
+ VK_FUNC_DECL(vkGetImageMemoryRequirements) \
+ VK_FUNC_DECL(vkDestroyImage) \
+ VK_FUNC_DECL(vkDestroyBuffer) \
+ VK_FUNC_DECL(vkDestroyPipeline) \
+ VK_FUNC_DECL(vkDestroyShaderModule) \
+ VK_FUNC_DECL(vkGetPhysicalDeviceMemoryProperties) \
+ VK_FUNC_DECL(vkDestroyInstance) \
+ VK_FUNC_DECL(vkUpdateDescriptorSets) \
+ VK_FUNC_DECL(vkDestroyDescriptorPool) \
+ VK_FUNC_DECL(vkDestroyPipelineLayout) \
+ VK_FUNC_DECL(vkDestroyDescriptorSetLayout) \
+ VK_FUNC_DECL(vkGetPhysicalDeviceQueueFamilyProperties) \
+ VK_FUNC_DECL(vkGetPhysicalDeviceFeatures) \
+ VK_FUNC_DECL(vkGetPhysicalDeviceProperties2KHR) \
+ VK_FUNC_DECL(vkGetBufferMemoryRequirements) \
+ VK_FUNC_DECL(vkGetMemoryFdKHR) \
+ VK_FUNC_DECL(vkGetSemaphoreFdKHR) \
+ VK_FUNC_DECL(vkEnumeratePhysicalDeviceGroups) \
+ VK_FUNC_DECL(vkGetPhysicalDeviceSurfaceCapabilitiesKHR) \
+ VK_FUNC_DECL(vkGetPhysicalDeviceSurfaceFormatsKHR) \
+ VK_FUNC_DECL(vkGetPhysicalDeviceSurfacePresentModesKHR) \
+ VK_FUNC_DECL(vkEnumerateDeviceExtensionProperties) \
+ VK_FUNC_DECL(vkGetPhysicalDeviceSurfaceSupportKHR)
+
+#define VK_WINDOWS_FUNC_LIST \
+ VK_FUNC_DECL(vkGetMemoryWin32HandleKHR) \
+ VK_FUNC_DECL(vkGetSemaphoreWin32HandleKHR)
+
+#define vkEnumerateInstanceVersion _vkEnumerateInstanceVersion
+#define vkEnumerateInstanceExtensionProperties \
+ _vkEnumerateInstanceExtensionProperties
+#define vkEnumerateInstanceLayerProperties _vkEnumerateInstanceLayerProperties
+#define vkCreateInstance _vkCreateInstance
+#define vkGetInstanceProcAddr _vkGetInstanceProcAddr
+#define vkGetDeviceProcAddr _vkGetDeviceProcAddr
+#define vkEnumeratePhysicalDevices _vkEnumeratePhysicalDevices
+#define vkGetPhysicalDeviceProperties _vkGetPhysicalDeviceProperties
+#define vkCreateDevice _vkCreateDevice
+#define vkDestroyDevice _vkDestroyDevice
+#define vkGetDeviceQueue _vkGetDeviceQueue
+#define vkQueueWaitIdle _vkQueueWaitIdle
+#define vkCreateDescriptorSetLayout _vkCreateDescriptorSetLayout
+#define vkCreatePipelineLayout _vkCreatePipelineLayout
+#define vkCreateShaderModule _vkCreateShaderModule
+#define vkCreateComputePipelines _vkCreateComputePipelines
+#define vkCreateDescriptorPool _vkCreateDescriptorPool
+#define vkAllocateDescriptorSets _vkAllocateDescriptorSets
+#define vkFreeDescriptorSets _vkFreeDescriptorSets
+#define vkAllocateCommandBuffers _vkAllocateCommandBuffers
+#define vkBeginCommandBuffer _vkBeginCommandBuffer
+#define vkCmdBindPipeline _vkCmdBindPipeline
+#define vkCmdBindDescriptorSets _vkCmdBindDescriptorSets
+#define vkCmdPipelineBarrier _vkCmdPipelineBarrier
+#define vkCmdDispatch _vkCmdDispatch
+#define vkCmdFillBuffer _vkCmdFillBuffer
+#define vkCmdCopyBuffer _vkCmdCopyBuffer
+#define vkCmdUpdateBuffer _vkCmdUpdateBuffer
+#define vkCmdCopyBufferToImage _vkCmdCopyBufferToImage
+#define vkCmdCopyImageToBuffer _vkCmdCopyImageToBuffer
+#define vkEndCommandBuffer _vkEndCommandBuffer
+#define vkCreateBuffer _vkCreateBuffer
+#define vkCreateImageView _vkCreateImageView
+#define vkAllocateMemory _vkAllocateMemory
+#define vkMapMemory _vkMapMemory
+#define vkBindBufferMemory _vkBindBufferMemory
+#define vkBindImageMemory _vkBindImageMemory
+#define vkUnmapMemory _vkUnmapMemory
+#define vkFreeMemory _vkFreeMemory
+#define vkCreateCommandPool _vkCreateCommandPool
+#define vkResetCommandPool _vkResetCommandPool
+#define vkDestroyCommandPool _vkDestroyCommandPool
+#define vkResetCommandBuffer _vkResetCommandBuffer
+#define vkFreeCommandBuffers _vkFreeCommandBuffers
+#define vkQueueSubmit _vkQueueSubmit
+#define vkCmdExecuteCommands _vkCmdExecuteCommands
+#define vkCreateFence _vkCreateFence
+#define vkDestroyFence _vkDestroyFence
+#define vkGetFenceStatus _vkGetFenceStatus
+#define vkResetFences _vkResetFences
+#define vkWaitForFences _vkWaitForFences
+#define vkCreateSemaphore _vkCreateSemaphore
+#define vkDestroySemaphore _vkDestroySemaphore
+#define vkCreateEvent _vkCreateEvent
+#define vkDestroyImageView _vkDestroyImageView
+#define vkCreateImage _vkCreateImage
+#define vkGetImageMemoryRequirements _vkGetImageMemoryRequirements
+#define vkDestroyImage _vkDestroyImage
+#define vkDestroyBuffe _vkDestroyBuffer
+#define vkDestroyPipeline _vkDestroyPipeline
+#define vkDestroyShaderModule _vkDestroyShaderModule
+#define vkGetPhysicalDeviceMemoryProperties _vkGetPhysicalDeviceMemoryProperties
+#define vkDestroyInstance _vkDestroyInstance
+#define vkUpdateDescriptorSets _vkUpdateDescriptorSets
+#define vkDestroyDescriptorPool _vkDestroyDescriptorPool
+#define vkDestroyPipelineLayout _vkDestroyPipelineLayout
+#define vkDestroyDescriptorSetLayout _vkDestroyDescriptorSetLayout
+#define vkGetPhysicalDeviceQueueFamilyProperties \
+ _vkGetPhysicalDeviceQueueFamilyProperties
+#define vkGetPhysicalDeviceFeatures _vkGetPhysicalDeviceFeatures
+#define vkGetPhysicalDeviceProperties2KHR _vkGetPhysicalDeviceProperties2KHR
+#define vkGetBufferMemoryRequirements _vkGetBufferMemoryRequirements
+#define vkGetMemoryFdKHR _vkGetMemoryFdKHR
+#define vkGetSemaphoreFdKHR _vkGetSemaphoreFdKHR
+#define vkEnumeratePhysicalDeviceGroups _vkEnumeratePhysicalDeviceGroups
+#define vkGetPhysicalDeviceSurfaceCapabilitiesKHR \
+ _vkGetPhysicalDeviceSurfaceCapabilitiesKHR
+#define vkGetPhysicalDeviceSurfaceFormatsKHR \
+ _vkGetPhysicalDeviceSurfaceFormatsKHR
+#define vkGetPhysicalDeviceSurfacePresentModesKHR \
+ _vkGetPhysicalDeviceSurfacePresentModesKHR
+#define vkEnumerateDeviceExtensionProperties \
+ _vkEnumerateDeviceExtensionProperties
+#define vkGetPhysicalDeviceSurfaceSupportKHR \
+ _vkGetPhysicalDeviceSurfaceSupportKHR
+
+#define vkGetMemoryWin32HandleKHR _vkGetMemoryWin32HandleKHR
+#define vkGetSemaphoreWin32HandleKHR _vkGetSemaphoreWin32HandleKHR
+
+#endif //_vulkan_api_list_hpp_
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.cpp
new file mode 100644
index 00000000..db9d168f
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.cpp
@@ -0,0 +1,22 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "vulkan_interop_common.hpp"
+
+uint32_t innerIterations(5);
+uint32_t perfIterations(100);
+uint32_t stressIterations(1000);
+size_t cpuThreadsPerGpu(3);
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.hpp
new file mode 100644
index 00000000..18d84f09
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.hpp
@@ -0,0 +1,50 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _vulkan_interop_common_hpp_
+#define _vulkan_interop_common_hpp_
+
+#include "vulkan_wrapper_types.hpp"
+#include "vulkan_wrapper.hpp"
+#include "vulkan_list_map.hpp"
+#include "vulkan_utility.hpp"
+#include "opencl_vulkan_wrapper.hpp"
+
+// Number of iterations for loops within tests (default value 5)
+extern unsigned int innerIterations;
+// Number of iterations for loops within perf tests (default value 100)
+extern unsigned int perfIterations;
+// Number of iterations for loops within stress tests (default value 1000)
+extern unsigned int stressIterations;
+// Number of CPU threads per GPU (default value 3)
+extern size_t cpuThreadsPerGpu;
+// Number of command queues (default value 1)
+extern unsigned int numCQ;
+// Enable Multi-import of vulkan device memory
+extern bool multiImport;
+// Enable Multi-import of vulkan device memory under different context
+extern bool multiCtx;
+// Enable additional debug info logging
+extern bool debug_trace;
+
+extern bool useSingleImageKernel;
+extern bool useDeviceLocal;
+extern bool disableNTHandleType;
+// Enable offset for multiImport of vulkan device memory
+extern bool enableOffset;
+extern bool non_dedicated;
+
+#endif // _vulkan_interop_common_hpp_
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.cpp
new file mode 100644
index 00000000..bdae5d22
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.cpp
@@ -0,0 +1,424 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifdef _WIN32
+#define NOMINMAX
+#endif
+#include "vulkan_list_map.hpp"
+#include "vulkan_utility.hpp"
+#include "vulkan_wrapper.hpp"
+
+/////////////////////////////////////////////
+// VulkanPhysicalDeviceList implementation //
+/////////////////////////////////////////////
+
+VulkanPhysicalDeviceList::VulkanPhysicalDeviceList(
+ const VulkanPhysicalDeviceList &physicalDeviceList)
+{}
+
+VulkanPhysicalDeviceList::VulkanPhysicalDeviceList() {}
+
+VulkanPhysicalDeviceList::~VulkanPhysicalDeviceList() {}
+
+/////////////////////////////////////////
+// VulkanMemoryHeapList implementation //
+/////////////////////////////////////////
+
+VulkanMemoryHeapList::VulkanMemoryHeapList(
+ const VulkanMemoryHeapList &memoryHeapList)
+{}
+
+VulkanMemoryHeapList::VulkanMemoryHeapList() {}
+
+VulkanMemoryHeapList::~VulkanMemoryHeapList() {}
+
+/////////////////////////////////////////
+// VulkanMemoryTypeList implementation //
+/////////////////////////////////////////
+
+VulkanMemoryTypeList::VulkanMemoryTypeList(
+ const VulkanMemoryTypeList &memoryTypeList)
+{}
+
+VulkanMemoryTypeList::VulkanMemoryTypeList() {}
+
+VulkanMemoryTypeList::~VulkanMemoryTypeList() {}
+
+//////////////////////////////////////////
+// VulkanQueueFamilyList implementation //
+//////////////////////////////////////////
+
+VulkanQueueFamilyList::VulkanQueueFamilyList(
+ const VulkanQueueFamilyList &queueFamilyList)
+{}
+
+VulkanQueueFamilyList::VulkanQueueFamilyList() {}
+
+VulkanQueueFamilyList::~VulkanQueueFamilyList() {}
+
+/////////////////////////////////////////////////////
+// VulkanQueueFamilyToQueueCountMap implementation //
+/////////////////////////////////////////////////////
+
+VulkanQueueFamilyToQueueCountMap::VulkanQueueFamilyToQueueCountMap(
+ const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap)
+{}
+
+VulkanQueueFamilyToQueueCountMap::VulkanQueueFamilyToQueueCountMap(
+ uint32_t numQueuesPerFamily)
+{
+ uint32_t maxQueueFamilyCount = 0;
+ const VulkanPhysicalDeviceList &physicalDeviceList =
+ getVulkanInstance().getPhysicalDeviceList();
+ for (size_t pdIdx = 0; pdIdx < physicalDeviceList.size(); pdIdx++)
+ {
+ maxQueueFamilyCount = std::max(
+ maxQueueFamilyCount,
+ (uint32_t)physicalDeviceList[pdIdx].getQueueFamilyList().size());
+ }
+
+ for (uint32_t qfIdx = 0; qfIdx < maxQueueFamilyCount; qfIdx++)
+ {
+ insert(qfIdx, numQueuesPerFamily);
+ }
+}
+
+VulkanQueueFamilyToQueueCountMap::~VulkanQueueFamilyToQueueCountMap() {}
+
+////////////////////////////////////////////////////
+// VulkanQueueFamilyToQueueListMap implementation //
+////////////////////////////////////////////////////
+
+VulkanQueueFamilyToQueueListMap::VulkanQueueFamilyToQueueListMap(
+ const VulkanQueueFamilyToQueueListMap &queueFamilyToQueueMap)
+{}
+
+VulkanQueueFamilyToQueueListMap::VulkanQueueFamilyToQueueListMap() {}
+
+VulkanQueueFamilyToQueueListMap::~VulkanQueueFamilyToQueueListMap() {}
+
+void VulkanQueueFamilyToQueueListMap::insert(uint32_t key,
+ VulkanQueueList &queueList)
+{
+ m_map.insert(std::pair<uint32_t, std::reference_wrapper<VulkanQueueList>>(
+ key, std::reference_wrapper<VulkanQueueList>(queueList)));
+}
+
+VulkanQueueList &VulkanQueueFamilyToQueueListMap::operator[](uint32_t key)
+{
+ return m_map.at(key).get();
+}
+
+////////////////////////////////////
+// VulkanQueueList implementation //
+////////////////////////////////////
+
+VulkanQueueList::VulkanQueueList(const VulkanQueueList &queueList) {}
+
+VulkanQueueList::VulkanQueueList() {}
+
+VulkanQueueList::~VulkanQueueList() {}
+
+/////////////////////////////////////////////////////////
+// VulkanDescriptorSetLayoutBindingList implementation //
+/////////////////////////////////////////////////////////
+
+VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList(
+ const VulkanDescriptorSetLayoutBindingList &descriptorSetLayoutBindingList)
+{}
+
+VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList() {}
+
+VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList(
+ size_t numDescriptorSetLayoutBindings, VulkanDescriptorType descriptorType,
+ uint32_t descriptorCount, VulkanShaderStage shaderStage)
+{
+ for (size_t idx = 0; idx < numDescriptorSetLayoutBindings; idx++)
+ {
+ VulkanDescriptorSetLayoutBinding *descriptorSetLayoutBinding =
+ new VulkanDescriptorSetLayoutBinding((uint32_t)idx, descriptorType,
+ descriptorCount, shaderStage);
+ add(*descriptorSetLayoutBinding);
+ }
+}
+
+VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList(
+ VulkanDescriptorType descriptorType0, uint32_t descriptorCount0,
+ VulkanDescriptorType descriptorType1, uint32_t descriptorCount1,
+ VulkanShaderStage shaderStage)
+{
+ for (uint32_t idx = 0; idx < descriptorCount0; idx++)
+ {
+ VulkanDescriptorSetLayoutBinding *descriptorSetLayoutBinding0 =
+ new VulkanDescriptorSetLayoutBinding(idx, descriptorType0, 1,
+ shaderStage);
+ add(*descriptorSetLayoutBinding0);
+ }
+ for (uint32_t idx = 0; idx < descriptorCount1; idx++)
+ {
+ VulkanDescriptorSetLayoutBinding *descriptorSetLayoutBinding1 =
+ new VulkanDescriptorSetLayoutBinding(
+ descriptorCount0 + idx, descriptorType1, 1, shaderStage);
+ add(*descriptorSetLayoutBinding1);
+ }
+}
+
+VulkanDescriptorSetLayoutBindingList::~VulkanDescriptorSetLayoutBindingList()
+{
+ for (size_t idx = 0; idx < m_wrapperList.size(); idx++)
+ {
+ VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding =
+ m_wrapperList[idx];
+ delete &descriptorSetLayoutBinding;
+ }
+}
+
+//////////////////////////////////////////////////
+// VulkanDescriptorSetLayoutList implementation //
+//////////////////////////////////////////////////
+
+VulkanDescriptorSetLayoutList::VulkanDescriptorSetLayoutList(
+ const VulkanDescriptorSetLayoutList &descriptorSetLayoutList)
+{}
+
+VulkanDescriptorSetLayoutList::VulkanDescriptorSetLayoutList() {}
+
+VulkanDescriptorSetLayoutList::~VulkanDescriptorSetLayoutList() {}
+
+////////////////////////////////////////////
+// VulkanCommandBufferList implementation //
+////////////////////////////////////////////
+
+VulkanCommandBufferList::VulkanCommandBufferList(
+ const VulkanCommandBufferList &commandBufferList)
+{}
+
+VulkanCommandBufferList::VulkanCommandBufferList() {}
+
+VulkanCommandBufferList::VulkanCommandBufferList(
+ size_t numCommandBuffers, const VulkanDevice &device,
+ const VulkanCommandPool &commandPool)
+{
+ for (size_t idx = 0; idx < numCommandBuffers; idx++)
+ {
+ VulkanCommandBuffer *commandBuffer =
+ new VulkanCommandBuffer(device, commandPool);
+ add(*commandBuffer);
+ }
+}
+
+VulkanCommandBufferList::~VulkanCommandBufferList()
+{
+ for (size_t idx = 0; idx < m_wrapperList.size(); idx++)
+ {
+ VulkanCommandBuffer &commandBuffer = m_wrapperList[idx];
+ delete &commandBuffer;
+ }
+}
+
+/////////////////////////////////////
+// VulkanBufferList implementation //
+/////////////////////////////////////
+
+VulkanBufferList::VulkanBufferList(const VulkanBufferList &bufferList) {}
+
+VulkanBufferList::VulkanBufferList(
+ size_t numBuffers, const VulkanDevice &device, uint64_t size,
+ VulkanExternalMemoryHandleType externalMemoryHandleType,
+ VulkanBufferUsage bufferUsage, VulkanSharingMode sharingMode,
+ const VulkanQueueFamilyList &queueFamilyList)
+{
+ for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
+ {
+ VulkanBuffer *buffer =
+ new VulkanBuffer(device, size, externalMemoryHandleType,
+ bufferUsage, sharingMode, queueFamilyList);
+ add(*buffer);
+ }
+}
+
+VulkanBufferList::~VulkanBufferList()
+{
+ for (size_t bIdx = 0; bIdx < m_wrapperList.size(); bIdx++)
+ {
+ VulkanBuffer &buffer = m_wrapperList[bIdx];
+ delete &buffer;
+ }
+}
+
+//////////////////////////////////////
+// VulkanImage2DList implementation //
+//////////////////////////////////////
+
+VulkanImage2DList::VulkanImage2DList(const VulkanImage2DList &image2DList) {}
+
+VulkanImage2DList::VulkanImage2DList(
+ size_t numImages, std::vector<VulkanDeviceMemory *> &deviceMemory,
+ uint64_t baseOffset, uint64_t interImageOffset, const VulkanDevice &device,
+ VulkanFormat format, uint32_t width, uint32_t height, uint32_t mipLevels,
+ VulkanExternalMemoryHandleType externalMemoryHandleType,
+ VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage,
+ VulkanSharingMode sharingMode)
+{
+ for (size_t i2DIdx = 0; i2DIdx < numImages; i2DIdx++)
+ {
+ VulkanImage2D *image2D = new VulkanImage2D(
+ device, format, width, height, mipLevels, externalMemoryHandleType,
+ imageCreateFlag, imageUsage, sharingMode);
+ add(*image2D);
+ deviceMemory[i2DIdx]->bindImage(
+ *image2D, baseOffset + (i2DIdx * interImageOffset));
+ }
+}
+
+VulkanImage2DList::VulkanImage2DList(
+ size_t numImages, const VulkanDevice &device, VulkanFormat format,
+ uint32_t width, uint32_t height, uint32_t mipLevels,
+ VulkanExternalMemoryHandleType externalMemoryHandleType,
+ VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage,
+ VulkanSharingMode sharingMode)
+{
+ for (size_t bIdx = 0; bIdx < numImages; bIdx++)
+ {
+ VulkanImage2D *image2D = new VulkanImage2D(
+ device, format, width, height, mipLevels, externalMemoryHandleType,
+ imageCreateFlag, imageUsage, sharingMode);
+ add(*image2D);
+ }
+}
+
+VulkanImage2DList::~VulkanImage2DList()
+{
+ for (size_t i2DIdx = 0; i2DIdx < m_wrapperList.size(); i2DIdx++)
+ {
+ VulkanImage2D &image2D = m_wrapperList[i2DIdx];
+ delete &image2D;
+ }
+}
+
+////////////////////////////////////////
+// VulkanImageViewList implementation //
+////////////////////////////////////////
+
+VulkanImageViewList::VulkanImageViewList(const VulkanImageViewList &image2DList)
+{}
+
+VulkanImageViewList::VulkanImageViewList(const VulkanDevice &device,
+ const VulkanImage2DList &image2DList,
+ bool createImageViewPerMipLevel)
+{
+ for (size_t i2DIdx = 0; i2DIdx < image2DList.size(); i2DIdx++)
+ {
+ if (createImageViewPerMipLevel)
+ {
+ for (uint32_t mipLevel = 0;
+ mipLevel < image2DList[i2DIdx].getNumMipLevels(); mipLevel++)
+ {
+ VulkanImageView *image2DView =
+ new VulkanImageView(device, image2DList[i2DIdx],
+ VULKAN_IMAGE_VIEW_TYPE_2D, mipLevel, 1);
+ add(*image2DView);
+ }
+ }
+ else
+ {
+ VulkanImageView *image2DView = new VulkanImageView(
+ device, image2DList[i2DIdx], VULKAN_IMAGE_VIEW_TYPE_2D);
+ add(*image2DView);
+ }
+ }
+}
+
+VulkanImageViewList::~VulkanImageViewList()
+{
+ for (size_t ivIdx = 0; ivIdx < m_wrapperList.size(); ivIdx++)
+ {
+ VulkanImageView &imageView = m_wrapperList[ivIdx];
+ delete &imageView;
+ }
+}
+
+///////////////////////////////////////////
+// VulkanDeviceMemoryList implementation //
+///////////////////////////////////////////
+
+VulkanDeviceMemoryList::VulkanDeviceMemoryList(
+ const VulkanDeviceMemoryList &deviceMemoryList)
+{}
+
+VulkanDeviceMemoryList::VulkanDeviceMemoryList(
+ size_t numImages, const VulkanImage2DList &image2DList,
+ const VulkanDevice &device, const VulkanMemoryType &memoryType,
+ VulkanExternalMemoryHandleType externalMemoryHandleType)
+{
+ for (size_t i2DIdx = 0; i2DIdx < image2DList.size(); i2DIdx++)
+ {
+ VulkanDeviceMemory *deviceMemory = new VulkanDeviceMemory(
+ device, image2DList[i2DIdx], memoryType, externalMemoryHandleType);
+ add(*deviceMemory);
+ deviceMemory->bindImage(image2DList[i2DIdx]);
+ }
+}
+
+VulkanDeviceMemoryList::~VulkanDeviceMemoryList()
+{
+ for (size_t dmIdx = 0; dmIdx < m_wrapperList.size(); dmIdx++)
+ {
+ VulkanDeviceMemory &deviceMemory = m_wrapperList[dmIdx];
+ delete &deviceMemory;
+ }
+}
+
+////////////////////////////////////////
+// VulkanSemaphoreList implementation //
+////////////////////////////////////////
+
+VulkanSemaphoreList::VulkanSemaphoreList(
+ const VulkanSemaphoreList &semaphoreList)
+{}
+
+VulkanSemaphoreList::VulkanSemaphoreList() {}
+
+VulkanSemaphoreList::VulkanSemaphoreList(
+ size_t numSemaphores, const VulkanDevice &device,
+ VulkanExternalSemaphoreHandleType externalSemaphoreHandleType,
+ const std::wstring namePrefix)
+{
+ std::wstring name = L"";
+ for (size_t idx = 0; idx < numSemaphores; idx++)
+ {
+ if (namePrefix.size())
+ {
+ const size_t maxNameSize = 256;
+ wchar_t tempName[maxNameSize];
+ swprintf(tempName, maxNameSize, L"%s%d", namePrefix.c_str(),
+ (int)idx);
+ name = tempName;
+ }
+ VulkanSemaphore *semaphore =
+ new VulkanSemaphore(device, externalSemaphoreHandleType, name);
+ add(*semaphore);
+ }
+}
+
+VulkanSemaphoreList::~VulkanSemaphoreList()
+{
+ for (size_t idx = 0; idx < m_wrapperList.size(); idx++)
+ {
+ VulkanSemaphore &Semaphore = m_wrapperList[idx];
+ delete &Semaphore;
+ }
+}
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
new file mode 100644
index 00000000..52206779
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
@@ -0,0 +1,386 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _vulkan_list_map_hpp_
+#define _vulkan_list_map_hpp_
+
+#include <functional>
+#include "vulkan_wrapper_types.hpp"
+#include "vulkan_utility.hpp"
+#include <iostream>
+template <class VulkanWrapper, class VulkanNative> class VulkanList {
+protected:
+ std::vector<std::reference_wrapper<VulkanWrapper>> m_wrapperList;
+ std::vector<std::reference_wrapper<const VulkanWrapper>> m_constWrapperList;
+ std::vector<VulkanNative> m_nativeList;
+
+ VulkanList(const VulkanList &list);
+ VulkanList();
+ virtual ~VulkanList();
+ virtual void add(VulkanWrapper &wrapper);
+
+public:
+ virtual void add(const VulkanWrapper &wrapper);
+ virtual size_t size() const;
+ virtual const VulkanWrapper &operator[](size_t idx) const;
+ virtual VulkanWrapper &operator[](size_t idx);
+ virtual const VulkanNative *operator()() const;
+};
+
+template <class VulkanKey, class VulkanValue> class VulkanMap {
+protected:
+ std::map<VulkanKey, VulkanValue> m_map;
+
+ VulkanMap(const VulkanMap &map);
+ VulkanMap();
+ virtual ~VulkanMap();
+
+public:
+ void insert(const VulkanKey &key, VulkanValue &value);
+ const VulkanValue &operator[](const VulkanKey &key) const;
+ VulkanValue &operator[](const VulkanKey &key);
+};
+
+class VulkanPhysicalDeviceList
+ : public VulkanList<VulkanPhysicalDevice, VkPhysicalDevice> {
+ friend class VulkanInstance;
+
+protected:
+ VulkanPhysicalDeviceList(
+ const VulkanPhysicalDeviceList &physicalDeviceList);
+
+public:
+ VulkanPhysicalDeviceList();
+ virtual ~VulkanPhysicalDeviceList();
+};
+
+class VulkanQueueFamilyList : public VulkanList<VulkanQueueFamily, uint32_t> {
+ friend class VulkanPhysicalDevice;
+
+protected:
+ VulkanQueueFamilyList(const VulkanQueueFamilyList &queueFamilyList);
+
+public:
+ VulkanQueueFamilyList();
+ virtual ~VulkanQueueFamilyList();
+};
+
+class VulkanMemoryHeapList : public VulkanList<VulkanMemoryHeap, uint32_t> {
+ friend class VulkanPhysicalDevice;
+
+protected:
+ VulkanMemoryHeapList(const VulkanMemoryHeapList &memoryHeapList);
+
+public:
+ VulkanMemoryHeapList();
+ virtual ~VulkanMemoryHeapList();
+};
+
+class VulkanMemoryTypeList : public VulkanList<VulkanMemoryType, uint32_t> {
+ friend class VulkanPhysicalDevice;
+ friend class VulkanBuffer;
+ friend class VulkanImage;
+
+protected:
+ VulkanMemoryTypeList(const VulkanMemoryTypeList &memoryTypeList);
+
+public:
+ VulkanMemoryTypeList();
+ virtual ~VulkanMemoryTypeList();
+};
+
+class VulkanQueueFamilyToQueueCountMap : public VulkanMap<uint32_t, uint32_t> {
+protected:
+ VulkanQueueFamilyToQueueCountMap(
+ const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap);
+
+public:
+ VulkanQueueFamilyToQueueCountMap(uint32_t numQueuesPerFamily = 0);
+ virtual ~VulkanQueueFamilyToQueueCountMap();
+};
+
+class VulkanQueueList : public VulkanList<VulkanQueue, VkQueue> {
+ friend class VulkanDevice;
+
+protected:
+ VulkanQueueList(const VulkanQueueList &queueList);
+
+public:
+ VulkanQueueList();
+ virtual ~VulkanQueueList();
+};
+
+class VulkanQueueFamilyToQueueListMap
+ : public VulkanMap<uint32_t, std::reference_wrapper<VulkanQueueList>> {
+protected:
+ VulkanQueueFamilyToQueueListMap(
+ const VulkanQueueFamilyToQueueListMap &queueFamilyToQueueMap);
+
+public:
+ VulkanQueueFamilyToQueueListMap();
+ virtual ~VulkanQueueFamilyToQueueListMap();
+ void insert(uint32_t key, VulkanQueueList &queueList);
+ VulkanQueueList &operator[](uint32_t key);
+};
+
+class VulkanDescriptorSetLayoutBindingList
+ : public VulkanList<VulkanDescriptorSetLayoutBinding,
+ VkDescriptorSetLayoutBinding> {
+protected:
+ VulkanDescriptorSetLayoutBindingList(
+ const VulkanDescriptorSetLayoutBindingList
+ &descriptorSetLayoutBindingList);
+
+public:
+ VulkanDescriptorSetLayoutBindingList();
+ VulkanDescriptorSetLayoutBindingList(
+ size_t numDescriptorSetLayoutBindings,
+ VulkanDescriptorType descriptorType, uint32_t descriptorCount = 1,
+ VulkanShaderStage shaderStage = VULKAN_SHADER_STAGE_COMPUTE);
+ VulkanDescriptorSetLayoutBindingList(
+ VulkanDescriptorType descriptorType0, uint32_t descriptorCount0,
+ VulkanDescriptorType descriptorType1, uint32_t descriptorCount1,
+ VulkanShaderStage shaderStage = VULKAN_SHADER_STAGE_COMPUTE);
+ virtual ~VulkanDescriptorSetLayoutBindingList();
+};
+
+class VulkanDescriptorSetLayoutList
+ : public VulkanList<VulkanDescriptorSetLayout, VkDescriptorSetLayout> {
+protected:
+ VulkanDescriptorSetLayoutList(
+ const VulkanDescriptorSetLayoutList &descriptorSetLayoutList);
+
+public:
+ VulkanDescriptorSetLayoutList();
+ virtual ~VulkanDescriptorSetLayoutList();
+};
+
+class VulkanCommandBufferList
+ : public VulkanList<VulkanCommandBuffer, VkCommandBuffer> {
+protected:
+ VulkanCommandBufferList(const VulkanCommandBufferList &commandBufferList);
+
+public:
+ VulkanCommandBufferList();
+ VulkanCommandBufferList(size_t numCommandBuffers,
+ const VulkanDevice &device,
+ const VulkanCommandPool &commandPool);
+ virtual ~VulkanCommandBufferList();
+};
+
+class VulkanBufferList : public VulkanList<VulkanBuffer, VkBuffer> {
+protected:
+ VulkanBufferList(const VulkanBufferList &bufferList);
+
+public:
+ VulkanBufferList(
+ size_t numBuffers, const VulkanDevice &device, uint64_t size,
+ VulkanExternalMemoryHandleType externalMemoryHandleType =
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+ VulkanBufferUsage bufferUsage =
+ VULKAN_BUFFER_USAGE_STORAGE_BUFFER_TRANSFER_SRC_DST,
+ VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE,
+ const VulkanQueueFamilyList &queueFamilyList =
+ getEmptyVulkanQueueFamilyList());
+ virtual ~VulkanBufferList();
+};
+
+class VulkanImage2DList : public VulkanList<VulkanImage2D, VkImage> {
+protected:
+ VulkanImage2DList(const VulkanImage2DList &image2DList);
+
+public:
+ VulkanImage2DList(
+ size_t numImages, std::vector<VulkanDeviceMemory *> &deviceMemory,
+ uint64_t baseOffset, uint64_t interImageOffset,
+ const VulkanDevice &device, VulkanFormat format, uint32_t width,
+ uint32_t height, uint32_t mipLevels,
+ VulkanExternalMemoryHandleType externalMemoryHandleType =
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+ VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE,
+ VulkanImageUsage imageUsage =
+ VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST,
+ VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE);
+ VulkanImage2DList(
+ size_t numImages, const VulkanDevice &device, VulkanFormat format,
+ uint32_t width, uint32_t height, uint32_t mipLevels = 1,
+ VulkanExternalMemoryHandleType externalMemoryHandleType =
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+ VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE,
+ VulkanImageUsage imageUsage =
+ VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST,
+ VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE);
+ virtual ~VulkanImage2DList();
+};
+
+class VulkanImageViewList : public VulkanList<VulkanImageView, VkImageView> {
+protected:
+ VulkanImageViewList(const VulkanImageViewList &imageViewList);
+
+public:
+ VulkanImageViewList(const VulkanDevice &device,
+ const VulkanImage2DList &image2DList,
+ bool createImageViewPerMipLevel = true);
+ virtual ~VulkanImageViewList();
+};
+
+class VulkanDeviceMemoryList
+ : public VulkanList<VulkanDeviceMemory, VkDeviceMemory> {
+protected:
+ VulkanDeviceMemoryList(const VulkanDeviceMemoryList &deviceMemoryList);
+
+public:
+ VulkanDeviceMemoryList(
+ size_t numImages, const VulkanImage2DList &image2DList,
+ const VulkanDevice &device, const VulkanMemoryType &memoryType,
+ VulkanExternalMemoryHandleType externalMemoryHandleType =
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE);
+ virtual ~VulkanDeviceMemoryList();
+};
+
+class VulkanSemaphoreList : public VulkanList<VulkanSemaphore, VkSemaphore> {
+protected:
+ VulkanSemaphoreList(const VulkanSemaphoreList &semaphoreList);
+
+public:
+ VulkanSemaphoreList();
+ VulkanSemaphoreList(
+ size_t numSemaphores, const VulkanDevice &device,
+ VulkanExternalSemaphoreHandleType externalSemaphoreHandleType =
+ VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE,
+ const std::wstring namePrefix = L"");
+ virtual ~VulkanSemaphoreList();
+};
+
+///////////////////////////////
+// VulkanList implementation //
+///////////////////////////////
+
+template <class VulkanWrapper, class VulkanNative>
+VulkanList<VulkanWrapper, VulkanNative>::VulkanList(const VulkanList &list)
+ : m_wrapperList(list.m_wrapperList),
+ m_constWrapperList(list.m_constWrapperList),
+ m_nativeList(list.m_nativeList)
+{}
+
+template <class VulkanWrapper, class VulkanNative>
+VulkanList<VulkanWrapper, VulkanNative>::VulkanList()
+{}
+
+template <class VulkanWrapper, class VulkanNative>
+VulkanList<VulkanWrapper, VulkanNative>::~VulkanList()
+{}
+
+template <class VulkanWrapper, class VulkanNative>
+void VulkanList<VulkanWrapper, VulkanNative>::add(VulkanWrapper &wrapper)
+{
+
+ if (m_constWrapperList.size() != size_t(0))
+ {
+ std::cout << "This list can only contain externally allocated objects"
+ << std::endl;
+ return;
+ }
+ m_wrapperList.push_back(std::reference_wrapper<VulkanWrapper>(wrapper));
+ m_nativeList.push_back((VulkanNative)wrapper);
+}
+
+template <class VulkanWrapper, class VulkanNative>
+void VulkanList<VulkanWrapper, VulkanNative>::add(const VulkanWrapper &wrapper)
+{
+ if (m_wrapperList.size() != size_t(0))
+ {
+ std::cout << "This list cannot contain externally allocated objects"
+ << std::endl;
+ return;
+ }
+
+ m_constWrapperList.push_back(
+ std::reference_wrapper<const VulkanWrapper>(wrapper));
+ m_nativeList.push_back((VulkanNative)wrapper);
+}
+
+template <class VulkanWrapper, class VulkanNative>
+size_t VulkanList<VulkanWrapper, VulkanNative>::size() const
+{
+ return (m_wrapperList.size() > 0) ? m_wrapperList.size()
+ : m_constWrapperList.size();
+}
+
+template <class VulkanWrapper, class VulkanNative>
+const VulkanWrapper &
+ VulkanList<VulkanWrapper, VulkanNative>::operator[](size_t idx) const
+{
+ if (idx < size())
+ {
+ // CHECK_LT(idx, size());
+ return (m_wrapperList.size() > 0) ? m_wrapperList[idx].get()
+ : m_constWrapperList[idx].get();
+ }
+}
+
+template <class VulkanWrapper, class VulkanNative>
+VulkanWrapper &VulkanList<VulkanWrapper, VulkanNative>::operator[](size_t idx)
+{
+ // CHECK_LT(idx, m_wrapperList.size());
+ return m_wrapperList[idx].get();
+}
+
+template <class VulkanWrapper, class VulkanNative>
+const VulkanNative *VulkanList<VulkanWrapper, VulkanNative>::operator()() const
+{
+ return m_nativeList.data();
+}
+
+//////////////////////////////
+// VulkanMap implementation //
+//////////////////////////////
+
+template <class VulkanKey, class VulkanValue>
+VulkanMap<VulkanKey, VulkanValue>::VulkanMap(const VulkanMap &map)
+ : m_map(map.m_map)
+{}
+
+template <class VulkanKey, class VulkanValue>
+VulkanMap<VulkanKey, VulkanValue>::VulkanMap()
+{}
+
+template <class VulkanKey, class VulkanValue>
+VulkanMap<VulkanKey, VulkanValue>::~VulkanMap()
+{}
+
+template <class VulkanKey, class VulkanValue>
+void VulkanMap<VulkanKey, VulkanValue>::insert(const VulkanKey &key,
+ VulkanValue &value)
+{
+ m_map.insert(std::pair<VulkanKey, std::reference_wrapper<VulkanValue>>(
+ key, std::reference_wrapper<VulkanValue>(value)));
+}
+
+template <class VulkanKey, class VulkanValue>
+const VulkanValue &
+ VulkanMap<VulkanKey, VulkanValue>::operator[](const VulkanKey &key) const
+{
+ return m_map.at(key);
+}
+
+template <class VulkanKey, class VulkanValue>
+VulkanValue &VulkanMap<VulkanKey, VulkanValue>::operator[](const VulkanKey &key)
+{
+ return m_map.at(key);
+}
+
+#endif // _vulkan_list_map_hpp_
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
new file mode 100644
index 00000000..1a313cce
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
@@ -0,0 +1,692 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "vulkan_utility.hpp"
+#include "vulkan_wrapper.hpp"
+#include <assert.h>
+#include <iostream>
+#include <fstream>
+#include <set>
+#include <string>
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+#if defined(_WIN32) || defined(_WIN64)
+#include <versionhelpers.h>
+#endif
+#define ASSERT(x) assert((x))
+#define BUFFERSIZE 3000
+
+
+const VulkanInstance &getVulkanInstance()
+{
+ static VulkanInstance instance;
+ return instance;
+}
+
+const VulkanPhysicalDevice &getVulkanPhysicalDevice()
+{
+ size_t pdIdx;
+ cl_int errNum = 0;
+ cl_platform_id platform = NULL;
+ cl_uchar uuid[CL_UUID_SIZE_KHR];
+ cl_device_id *devices;
+ char *extensions = NULL;
+ size_t extensionSize = 0;
+ cl_uint num_devices = 0;
+ cl_uint device_no = 0;
+ const size_t bufsize = BUFFERSIZE;
+ char buf[BUFFERSIZE];
+ const VulkanInstance &instance = getVulkanInstance();
+ const VulkanPhysicalDeviceList &physicalDeviceList =
+ instance.getPhysicalDeviceList();
+
+ // get the platform ID
+ errNum = clGetPlatformIDs(1, &platform, NULL);
+ if (errNum != CL_SUCCESS)
+ {
+ printf("Error: Failed to get platform\n");
+ throw std::runtime_error("Error: Failed to get number of platform\n");
+ }
+
+ errNum =
+ clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
+ if (CL_SUCCESS != errNum)
+ {
+ throw std::runtime_error(
+ "Error: clGetDeviceIDs failed in returning of devices\n");
+ }
+ devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
+ if (NULL == devices)
+ {
+ throw std::runtime_error(
+ "Error: Unable to allocate memory for devices\n");
+ }
+ errNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices,
+ NULL);
+ if (CL_SUCCESS != errNum)
+ {
+ throw std::runtime_error("Error: Failed to get deviceID.\n");
+ }
+ bool is_selected = false;
+ for (device_no = 0; device_no < num_devices; device_no++)
+ {
+ errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0,
+ NULL, &extensionSize);
+ if (CL_SUCCESS != errNum)
+ {
+ throw std::runtime_error("Error in clGetDeviceInfo for getting "
+ "device_extension size....\n");
+ }
+ extensions = (char *)malloc(extensionSize);
+ if (NULL == extensions)
+ {
+ throw std::runtime_error(
+ "Unable to allocate memory for extensions\n");
+ }
+ errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS,
+ extensionSize, extensions, NULL);
+ if (CL_SUCCESS != errNum)
+ {
+ throw std::runtime_error("Error: Error in clGetDeviceInfo for "
+ "getting device_extension\n");
+ }
+ errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR,
+ CL_UUID_SIZE_KHR, uuid, &extensionSize);
+ if (CL_SUCCESS != errNum)
+ {
+ throw std::runtime_error(
+ "Error: clGetDeviceInfo failed with error\n");
+ }
+ free(extensions);
+ for (pdIdx = 0; pdIdx < physicalDeviceList.size(); pdIdx++)
+ {
+ if (!memcmp(&uuid, physicalDeviceList[pdIdx].getUUID(),
+ VK_UUID_SIZE))
+ {
+ std::cout << "Selected physical device = "
+ << physicalDeviceList[pdIdx] << std::endl;
+ is_selected = true;
+ break;
+ }
+ }
+ if (is_selected)
+ {
+ break;
+ }
+ }
+
+ if ((pdIdx >= physicalDeviceList.size())
+ || (physicalDeviceList[pdIdx] == (VkPhysicalDevice)VK_NULL_HANDLE))
+ {
+ throw std::runtime_error("failed to find a suitable GPU!");
+ }
+ std::cout << "Selected physical device is: " << physicalDeviceList[pdIdx]
+ << std::endl;
+ return physicalDeviceList[pdIdx];
+}
+
+const VulkanQueueFamily &getVulkanQueueFamily(uint32_t queueFlags)
+{
+ size_t qfIdx;
+ const VulkanPhysicalDevice &physicalDevice = getVulkanPhysicalDevice();
+ const VulkanQueueFamilyList &queueFamilyList =
+ physicalDevice.getQueueFamilyList();
+
+ for (qfIdx = 0; qfIdx < queueFamilyList.size(); qfIdx++)
+ {
+ if ((queueFamilyList[qfIdx].getQueueFlags() & queueFlags) == queueFlags)
+ {
+ break;
+ }
+ }
+
+ return queueFamilyList[qfIdx];
+}
+
+const VulkanMemoryType &
+getVulkanMemoryType(const VulkanDevice &device,
+ VulkanMemoryTypeProperty memoryTypeProperty)
+{
+ size_t mtIdx;
+ const VulkanMemoryTypeList &memoryTypeList =
+ device.getPhysicalDevice().getMemoryTypeList();
+
+ for (mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
+ {
+ if ((memoryTypeList[mtIdx].getMemoryTypeProperty() & memoryTypeProperty)
+ == memoryTypeProperty)
+ {
+ break;
+ }
+ }
+
+ // CHECK_LT(mtIdx, memoryTypeList.size());
+ return memoryTypeList[mtIdx];
+}
+
+bool checkVkSupport()
+{
+ bool result = true;
+ const VulkanInstance &instance = getVulkanInstance();
+ const VulkanPhysicalDeviceList &physicalDeviceList =
+ instance.getPhysicalDeviceList();
+ if (physicalDeviceList() == NULL)
+ {
+ std::cout << "physicalDeviceList is null, No GPUs found with "
+ "Vulkan support !!!\n";
+ result = false;
+ }
+ return result;
+}
+
+const VulkanQueueFamilyList &getEmptyVulkanQueueFamilyList()
+{
+ static VulkanQueueFamilyList queueFamilyList;
+ return queueFamilyList;
+}
+
+const VulkanDescriptorSetLayoutList &getEmptyVulkanDescriptorSetLayoutList()
+{
+ static VulkanDescriptorSetLayoutList descriptorSetLayoutList;
+
+ return descriptorSetLayoutList;
+}
+
+const VulkanQueueFamilyToQueueCountMap &
+getDefaultVulkanQueueFamilyToQueueCountMap()
+{
+ static VulkanQueueFamilyToQueueCountMap queueFamilyToQueueCountMap(1);
+
+ return queueFamilyToQueueCountMap;
+}
+
+const std::vector<VulkanExternalMemoryHandleType>
+getSupportedVulkanExternalMemoryHandleTypeList()
+{
+ std::vector<VulkanExternalMemoryHandleType> externalMemoryHandleTypeList;
+
+#if _WIN32
+ if (IsWindows8OrGreater())
+ {
+ externalMemoryHandleTypeList.push_back(
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT);
+ }
+ externalMemoryHandleTypeList.push_back(
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT);
+#else
+ externalMemoryHandleTypeList.push_back(
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD);
+#endif
+
+ return externalMemoryHandleTypeList;
+}
+
+const std::vector<VulkanExternalSemaphoreHandleType>
+getSupportedVulkanExternalSemaphoreHandleTypeList()
+{
+ std::vector<VulkanExternalSemaphoreHandleType>
+ externalSemaphoreHandleTypeList;
+
+#if _WIN32
+ if (IsWindows8OrGreater())
+ {
+ externalSemaphoreHandleTypeList.push_back(
+ VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT);
+ }
+ externalSemaphoreHandleTypeList.push_back(
+ VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT);
+#else
+ externalSemaphoreHandleTypeList.push_back(
+ VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD);
+#endif
+
+ return externalSemaphoreHandleTypeList;
+}
+
+const std::vector<VulkanFormat> getSupportedVulkanFormatList()
+{
+ std::vector<VulkanFormat> formatList;
+
+ formatList.push_back(VULKAN_FORMAT_R8_UINT);
+ formatList.push_back(VULKAN_FORMAT_R8_SINT);
+ formatList.push_back(VULKAN_FORMAT_R8G8_UINT);
+ formatList.push_back(VULKAN_FORMAT_R8G8_SINT);
+ formatList.push_back(VULKAN_FORMAT_R8G8B8A8_UINT);
+ formatList.push_back(VULKAN_FORMAT_R8G8B8A8_SINT);
+ formatList.push_back(VULKAN_FORMAT_R16_UINT);
+ formatList.push_back(VULKAN_FORMAT_R16_SINT);
+ formatList.push_back(VULKAN_FORMAT_R16G16_UINT);
+ formatList.push_back(VULKAN_FORMAT_R16G16_SINT);
+ formatList.push_back(VULKAN_FORMAT_R16G16B16A16_UINT);
+ formatList.push_back(VULKAN_FORMAT_R16G16B16A16_SINT);
+ formatList.push_back(VULKAN_FORMAT_R32_UINT);
+ formatList.push_back(VULKAN_FORMAT_R32_SINT);
+ formatList.push_back(VULKAN_FORMAT_R32_SFLOAT);
+ formatList.push_back(VULKAN_FORMAT_R32G32_UINT);
+ formatList.push_back(VULKAN_FORMAT_R32G32_SINT);
+ formatList.push_back(VULKAN_FORMAT_R32G32_SFLOAT);
+ formatList.push_back(VULKAN_FORMAT_R32G32B32A32_UINT);
+ formatList.push_back(VULKAN_FORMAT_R32G32B32A32_SINT);
+ formatList.push_back(VULKAN_FORMAT_R32G32B32A32_SFLOAT);
+
+ for (size_t fIdx = 0; fIdx < formatList.size(); fIdx++)
+ {
+ switch (formatList[fIdx])
+ {
+ case VULKAN_FORMAT_R8_UINT:
+ case VULKAN_FORMAT_R8_SINT:
+ case VULKAN_FORMAT_R8G8_UINT:
+ case VULKAN_FORMAT_R8G8_SINT:
+ case VULKAN_FORMAT_R8G8B8A8_UINT:
+ case VULKAN_FORMAT_R8G8B8A8_SINT:
+ case VULKAN_FORMAT_R16_UINT:
+ case VULKAN_FORMAT_R16_SINT:
+ case VULKAN_FORMAT_R16G16_UINT:
+ case VULKAN_FORMAT_R16G16_SINT:
+ case VULKAN_FORMAT_R16G16B16A16_UINT:
+ case VULKAN_FORMAT_R16G16B16A16_SINT:
+ case VULKAN_FORMAT_R32_UINT:
+ case VULKAN_FORMAT_R32_SINT:
+ case VULKAN_FORMAT_R32_SFLOAT:
+ case VULKAN_FORMAT_R32G32_UINT:
+ case VULKAN_FORMAT_R32G32_SINT:
+ case VULKAN_FORMAT_R32G32_SFLOAT:
+ case VULKAN_FORMAT_R32G32B32A32_UINT:
+ case VULKAN_FORMAT_R32G32B32A32_SINT:
+ case VULKAN_FORMAT_R32G32B32A32_SFLOAT: break;
+
+ case VULKAN_FORMAT_UNDEFINED:
+ case VULKAN_FORMAT_R4G4_UNORM_PACK8:
+ case VULKAN_FORMAT_R4G4B4A4_UNORM_PACK16:
+ case VULKAN_FORMAT_B4G4R4A4_UNORM_PACK16:
+ case VULKAN_FORMAT_R5G6B5_UNORM_PACK16:
+ case VULKAN_FORMAT_B5G6R5_UNORM_PACK16:
+ case VULKAN_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case VULKAN_FORMAT_B5G5R5A1_UNORM_PACK16:
+ case VULKAN_FORMAT_A1R5G5B5_UNORM_PACK16:
+ case VULKAN_FORMAT_R8_UNORM:
+ case VULKAN_FORMAT_R8_SNORM:
+ case VULKAN_FORMAT_R8_USCALED:
+ case VULKAN_FORMAT_R8_SSCALED:
+ case VULKAN_FORMAT_R8_SRGB:
+ case VULKAN_FORMAT_R8G8_SNORM:
+ case VULKAN_FORMAT_R8G8_UNORM:
+ case VULKAN_FORMAT_R8G8_USCALED:
+ case VULKAN_FORMAT_R8G8_SSCALED:
+ case VULKAN_FORMAT_R8G8_SRGB:
+ case VULKAN_FORMAT_R8G8B8_UNORM:
+ case VULKAN_FORMAT_R8G8B8_SNORM:
+ case VULKAN_FORMAT_R8G8B8_USCALED:
+ case VULKAN_FORMAT_R8G8B8_SSCALED:
+ case VULKAN_FORMAT_R8G8B8_UINT:
+ case VULKAN_FORMAT_R8G8B8_SINT:
+ case VULKAN_FORMAT_R8G8B8_SRGB:
+ case VULKAN_FORMAT_B8G8R8_UNORM:
+ case VULKAN_FORMAT_B8G8R8_SNORM:
+ case VULKAN_FORMAT_B8G8R8_USCALED:
+ case VULKAN_FORMAT_B8G8R8_SSCALED:
+ case VULKAN_FORMAT_B8G8R8_UINT:
+ case VULKAN_FORMAT_B8G8R8_SINT:
+ case VULKAN_FORMAT_B8G8R8_SRGB:
+ case VULKAN_FORMAT_R8G8B8A8_UNORM:
+ case VULKAN_FORMAT_R8G8B8A8_SNORM:
+ case VULKAN_FORMAT_R8G8B8A8_USCALED:
+ case VULKAN_FORMAT_R8G8B8A8_SSCALED:
+ case VULKAN_FORMAT_R8G8B8A8_SRGB:
+ case VULKAN_FORMAT_B8G8R8A8_UNORM:
+ case VULKAN_FORMAT_B8G8R8A8_SNORM:
+ case VULKAN_FORMAT_B8G8R8A8_USCALED:
+ case VULKAN_FORMAT_B8G8R8A8_SSCALED:
+ case VULKAN_FORMAT_B8G8R8A8_UINT:
+ case VULKAN_FORMAT_B8G8R8A8_SINT:
+ case VULKAN_FORMAT_B8G8R8A8_SRGB:
+ case VULKAN_FORMAT_A8B8G8R8_UNORM_PACK32:
+ case VULKAN_FORMAT_A8B8G8R8_SNORM_PACK32:
+ case VULKAN_FORMAT_A8B8G8R8_USCALED_PACK32:
+ case VULKAN_FORMAT_A8B8G8R8_SSCALED_PACK32:
+ case VULKAN_FORMAT_A8B8G8R8_UINT_PACK32:
+ case VULKAN_FORMAT_A8B8G8R8_SINT_PACK32:
+ case VULKAN_FORMAT_A8B8G8R8_SRGB_PACK32:
+ case VULKAN_FORMAT_A2R10G10B10_UNORM_PACK32:
+ case VULKAN_FORMAT_A2R10G10B10_SNORM_PACK32:
+ case VULKAN_FORMAT_A2R10G10B10_USCALED_PACK32:
+ case VULKAN_FORMAT_A2R10G10B10_SSCALED_PACK32:
+ case VULKAN_FORMAT_A2R10G10B10_UINT_PACK32:
+ case VULKAN_FORMAT_A2R10G10B10_SINT_PACK32:
+ case VULKAN_FORMAT_A2B10G10R10_UNORM_PACK32:
+ case VULKAN_FORMAT_A2B10G10R10_SNORM_PACK32:
+ case VULKAN_FORMAT_A2B10G10R10_USCALED_PACK32:
+ case VULKAN_FORMAT_A2B10G10R10_SSCALED_PACK32:
+ case VULKAN_FORMAT_A2B10G10R10_UINT_PACK32:
+ case VULKAN_FORMAT_A2B10G10R10_SINT_PACK32:
+ case VULKAN_FORMAT_R16_UNORM:
+ case VULKAN_FORMAT_R16_SNORM:
+ case VULKAN_FORMAT_R16_USCALED:
+ case VULKAN_FORMAT_R16_SSCALED:
+ case VULKAN_FORMAT_R16_SFLOAT:
+ case VULKAN_FORMAT_R16G16_UNORM:
+ case VULKAN_FORMAT_R16G16_SNORM:
+ case VULKAN_FORMAT_R16G16_USCALED:
+ case VULKAN_FORMAT_R16G16_SSCALED:
+ case VULKAN_FORMAT_R16G16_SFLOAT:
+ case VULKAN_FORMAT_R16G16B16_UNORM:
+ case VULKAN_FORMAT_R16G16B16_SNORM:
+ case VULKAN_FORMAT_R16G16B16_USCALED:
+ case VULKAN_FORMAT_R16G16B16_SSCALED:
+ case VULKAN_FORMAT_R16G16B16_UINT:
+ case VULKAN_FORMAT_R16G16B16_SINT:
+ case VULKAN_FORMAT_R16G16B16_SFLOAT:
+ case VULKAN_FORMAT_R16G16B16A16_UNORM:
+ case VULKAN_FORMAT_R16G16B16A16_SNORM:
+ case VULKAN_FORMAT_R16G16B16A16_USCALED:
+ case VULKAN_FORMAT_R16G16B16A16_SSCALED:
+ case VULKAN_FORMAT_R16G16B16A16_SFLOAT:
+ case VULKAN_FORMAT_R32G32B32_UINT:
+ case VULKAN_FORMAT_R32G32B32_SINT:
+ case VULKAN_FORMAT_R32G32B32_SFLOAT:
+ case VULKAN_FORMAT_R64_UINT:
+ case VULKAN_FORMAT_R64_SINT:
+ case VULKAN_FORMAT_R64_SFLOAT:
+ case VULKAN_FORMAT_R64G64_UINT:
+ case VULKAN_FORMAT_R64G64_SINT:
+ case VULKAN_FORMAT_R64G64_SFLOAT:
+ case VULKAN_FORMAT_R64G64B64_UINT:
+ case VULKAN_FORMAT_R64G64B64_SINT:
+ case VULKAN_FORMAT_R64G64B64_SFLOAT:
+ case VULKAN_FORMAT_R64G64B64A64_UINT:
+ case VULKAN_FORMAT_R64G64B64A64_SINT:
+ case VULKAN_FORMAT_R64G64B64A64_SFLOAT:
+ case VULKAN_FORMAT_B10G11R11_UFLOAT_PACK32:
+ case VULKAN_FORMAT_E5B9G9R9_UFLOAT_PACK32:
+ case VULKAN_FORMAT_D16_UNORM:
+ case VULKAN_FORMAT_X8_D24_UNORM_PACK32:
+ case VULKAN_FORMAT_D32_SFLOAT:
+ case VULKAN_FORMAT_S8_UINT:
+ case VULKAN_FORMAT_D16_UNORM_S8_UINT:
+ case VULKAN_FORMAT_D24_UNORM_S8_UINT:
+ case VULKAN_FORMAT_D32_SFLOAT_S8_UINT:
+ case VULKAN_FORMAT_BC1_RGB_UNORM_BLOCK:
+ case VULKAN_FORMAT_BC1_RGB_SRGB_BLOCK:
+ case VULKAN_FORMAT_BC1_RGBA_UNORM_BLOCK:
+ case VULKAN_FORMAT_BC1_RGBA_SRGB_BLOCK:
+ case VULKAN_FORMAT_BC2_UNORM_BLOCK:
+ case VULKAN_FORMAT_BC2_SRGB_BLOCK:
+ case VULKAN_FORMAT_BC3_UNORM_BLOCK:
+ case VULKAN_FORMAT_BC3_SRGB_BLOCK:
+ case VULKAN_FORMAT_BC4_UNORM_BLOCK:
+ case VULKAN_FORMAT_BC4_SNORM_BLOCK:
+ case VULKAN_FORMAT_BC5_UNORM_BLOCK:
+ case VULKAN_FORMAT_BC5_SNORM_BLOCK:
+ case VULKAN_FORMAT_BC6H_UFLOAT_BLOCK:
+ case VULKAN_FORMAT_BC6H_SFLOAT_BLOCK:
+ case VULKAN_FORMAT_BC7_UNORM_BLOCK:
+ case VULKAN_FORMAT_BC7_SRGB_BLOCK:
+ case VULKAN_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
+ case VULKAN_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
+ case VULKAN_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
+ case VULKAN_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
+ case VULKAN_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
+ case VULKAN_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
+ case VULKAN_FORMAT_EAC_R11_UNORM_BLOCK:
+ case VULKAN_FORMAT_EAC_R11_SNORM_BLOCK:
+ case VULKAN_FORMAT_EAC_R11G11_UNORM_BLOCK:
+ case VULKAN_FORMAT_EAC_R11G11_SNORM_BLOCK:
+ case VULKAN_FORMAT_ASTC_4x4_UNORM_BLOCK:
+ case VULKAN_FORMAT_ASTC_4x4_SRGB_BLOCK:
+ case VULKAN_FORMAT_ASTC_5x4_UNORM_BLOCK:
+ case VULKAN_FORMAT_ASTC_5x4_SRGB_BLOCK:
+ case VULKAN_FORMAT_ASTC_5x5_UNORM_BLOCK:
+ case VULKAN_FORMAT_ASTC_5x5_SRGB_BLOCK:
+ case VULKAN_FORMAT_ASTC_6x5_UNORM_BLOCK:
+ case VULKAN_FORMAT_ASTC_6x5_SRGB_BLOCK:
+ case VULKAN_FORMAT_ASTC_6x6_UNORM_BLOCK:
+ case VULKAN_FORMAT_ASTC_6x6_SRGB_BLOCK:
+ case VULKAN_FORMAT_ASTC_8x5_UNORM_BLOCK:
+ case VULKAN_FORMAT_ASTC_8x5_SRGB_BLOCK:
+ case VULKAN_FORMAT_ASTC_8x6_UNORM_BLOCK:
+ case VULKAN_FORMAT_ASTC_8x6_SRGB_BLOCK:
+ case VULKAN_FORMAT_ASTC_8x8_UNORM_BLOCK:
+ case VULKAN_FORMAT_ASTC_8x8_SRGB_BLOCK:
+ case VULKAN_FORMAT_ASTC_10x5_UNORM_BLOCK:
+ case VULKAN_FORMAT_ASTC_10x5_SRGB_BLOCK:
+ case VULKAN_FORMAT_ASTC_10x6_UNORM_BLOCK:
+ case VULKAN_FORMAT_ASTC_10x6_SRGB_BLOCK:
+ case VULKAN_FORMAT_ASTC_10x8_UNORM_BLOCK:
+ case VULKAN_FORMAT_ASTC_10x8_SRGB_BLOCK:
+ case VULKAN_FORMAT_ASTC_10x10_UNORM_BLOCK:
+ case VULKAN_FORMAT_ASTC_10x10_SRGB_BLOCK:
+ case VULKAN_FORMAT_ASTC_12x10_UNORM_BLOCK:
+ case VULKAN_FORMAT_ASTC_12x10_SRGB_BLOCK:
+ case VULKAN_FORMAT_ASTC_12x12_UNORM_BLOCK:
+ case VULKAN_FORMAT_ASTC_12x12_SRGB_BLOCK:
+ ASSERT(0);
+ std::cout << "Unsupport texture format";
+ }
+ }
+
+ return formatList;
+}
+
+uint32_t getVulkanFormatElementSize(VulkanFormat format)
+{
+ switch (format)
+ {
+ case VULKAN_FORMAT_R8_UINT: return uint32_t(1);
+ case VULKAN_FORMAT_R8_SINT: return uint32_t(1);
+ case VULKAN_FORMAT_R8G8_UINT: return uint32_t(2);
+ case VULKAN_FORMAT_R8G8_SINT: return uint32_t(2);
+ case VULKAN_FORMAT_R8G8B8A8_UINT: return uint32_t(4);
+ case VULKAN_FORMAT_R8G8B8A8_SINT: return uint32_t(4);
+ case VULKAN_FORMAT_R16_UINT: return uint32_t(2);
+ case VULKAN_FORMAT_R16_SINT: return uint32_t(2);
+ case VULKAN_FORMAT_R16G16_UINT: return uint32_t(4);
+ case VULKAN_FORMAT_R16G16_SINT: return uint32_t(4);
+ case VULKAN_FORMAT_R16G16B16A16_UINT: return uint32_t(8);
+ case VULKAN_FORMAT_R16G16B16A16_SINT: return uint32_t(8);
+ case VULKAN_FORMAT_R32_UINT: return uint32_t(4);
+ case VULKAN_FORMAT_R32_SINT: return uint32_t(4);
+ case VULKAN_FORMAT_R32_SFLOAT: return uint32_t(4);
+ case VULKAN_FORMAT_R32G32_UINT: return uint32_t(8);
+ case VULKAN_FORMAT_R32G32_SINT: return uint32_t(8);
+ case VULKAN_FORMAT_R32G32_SFLOAT: return uint32_t(8);
+ case VULKAN_FORMAT_R32G32B32A32_UINT: return uint32_t(16);
+ case VULKAN_FORMAT_R32G32B32A32_SINT: return uint32_t(16);
+ case VULKAN_FORMAT_R32G32B32A32_SFLOAT: return uint32_t(16);
+ default: ASSERT(0); std::cout << "Unknown format";
+ }
+
+ return uint32_t(0);
+}
+
+const char *getVulkanFormatGLSLFormat(VulkanFormat format)
+{
+ switch (format)
+ {
+ case VULKAN_FORMAT_R8_UINT: return "r8ui";
+ case VULKAN_FORMAT_R8_SINT: return "r8i";
+ case VULKAN_FORMAT_R8G8_UINT: return "rg8ui";
+ case VULKAN_FORMAT_R8G8_SINT: return "rg8i";
+ case VULKAN_FORMAT_R8G8B8A8_UINT: return "rgba8ui";
+ case VULKAN_FORMAT_R8G8B8A8_SINT: return "rgba8i";
+ case VULKAN_FORMAT_R16_UINT: return "r16ui";
+ case VULKAN_FORMAT_R16_SINT: return "r16i";
+ case VULKAN_FORMAT_R16G16_UINT: return "rg16ui";
+ case VULKAN_FORMAT_R16G16_SINT: return "rg16i";
+ case VULKAN_FORMAT_R16G16B16A16_UINT: return "rgba16ui";
+ case VULKAN_FORMAT_R16G16B16A16_SINT: return "rgba16i";
+ case VULKAN_FORMAT_R32_UINT: return "r32ui";
+ case VULKAN_FORMAT_R32_SINT: return "r32i";
+ case VULKAN_FORMAT_R32_SFLOAT: return "r32f";
+ case VULKAN_FORMAT_R32G32_UINT: return "rg32ui";
+ case VULKAN_FORMAT_R32G32_SINT: return "rg32i";
+ case VULKAN_FORMAT_R32G32_SFLOAT: return "rg32f";
+ case VULKAN_FORMAT_R32G32B32A32_UINT: return "rgba32ui";
+ case VULKAN_FORMAT_R32G32B32A32_SINT: return "rgba32i";
+ case VULKAN_FORMAT_R32G32B32A32_SFLOAT: return "rgba32f";
+ default: ASSERT(0); std::cout << "Unknown format";
+ }
+
+ return (const char *)size_t(0);
+}
+
+std::ostream &operator<<(std::ostream &os,
+ VulkanMemoryTypeProperty memoryTypeProperty)
+{
+ switch (memoryTypeProperty)
+ {
+ case VULKAN_MEMORY_TYPE_PROPERTY_NONE: return os << "None";
+ case VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL:
+ return os << "Device local";
+ case VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT:
+ return os << "Host visible and coherent";
+ case VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_CACHED:
+ return os << "Host visible and cached";
+ case VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_CACHED_COHERENT:
+ return os << "Host visible, cached and coherent";
+ case VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_COHERENT:
+ return os << "Device local, Host visible and coherent";
+ case VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_CACHED:
+ return os << "Device local, Host visible and cached";
+ case VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_CACHED_COHERENT:
+ return os << "Device local, Host visible, cached and coherent";
+ }
+
+ return os;
+}
+
+std::ostream &
+operator<<(std::ostream &os,
+ VulkanExternalMemoryHandleType externalMemoryHandleType)
+{
+ switch (externalMemoryHandleType)
+ {
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE: return os << "None";
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD:
+ return os << "Opaque file descriptor";
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT:
+ return os << "Opaque NT handle";
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+ return os << "Opaque D3DKMT handle";
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT:
+ return os << "Opaque NT and D3DKMT handle";
+ }
+
+ return os;
+}
+
+std::ostream &
+operator<<(std::ostream &os,
+ VulkanExternalSemaphoreHandleType externalSemaphoreHandleType)
+{
+ switch (externalSemaphoreHandleType)
+ {
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE: return os << "None";
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD:
+ return os << "Opaque file descriptor";
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT:
+ return os << "Opaque NT handle";
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+ return os << "Opaque D3DKMT handle";
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT:
+ return os << "Opaque NT and D3DKMT handle";
+ }
+
+ return os;
+}
+
+std::ostream &operator<<(std::ostream &os, VulkanFormat format)
+{
+ switch (format)
+ {
+ case VULKAN_FORMAT_R8_UINT: return os << "R8_UINT";
+ case VULKAN_FORMAT_R8_SINT: return os << "R8_SINT";
+ case VULKAN_FORMAT_R8G8_UINT: return os << "R8G8_UINT";
+ case VULKAN_FORMAT_R8G8_SINT: return os << "R8G8_SINT";
+ case VULKAN_FORMAT_R8G8B8A8_UINT: return os << "R8G8B8A8_UINT";
+ case VULKAN_FORMAT_R8G8B8A8_SINT: return os << "R8G8B8A8_SINT";
+ case VULKAN_FORMAT_R16_UINT: return os << "R16_UINT";
+ case VULKAN_FORMAT_R16_SINT: return os << "R16_SINT";
+ case VULKAN_FORMAT_R16G16_UINT: return os << "R16G16_UINT";
+ case VULKAN_FORMAT_R16G16_SINT: return os << "R16G16_SINT";
+ case VULKAN_FORMAT_R16G16B16A16_UINT: return os << "R16G16B16A16_UINT";
+ case VULKAN_FORMAT_R16G16B16A16_SINT: return os << "R16G16B16A16_SINT";
+ case VULKAN_FORMAT_R32_UINT: return os << "R32_UINT";
+ case VULKAN_FORMAT_R32_SINT: return os << "R32_SINT";
+ case VULKAN_FORMAT_R32_SFLOAT: return os << "R32_SFLOAT";
+ case VULKAN_FORMAT_R32G32_UINT: return os << "R32G32_UINT";
+ case VULKAN_FORMAT_R32G32_SINT: return os << "R32G32_SINT";
+ case VULKAN_FORMAT_R32G32_SFLOAT: return os << "R32G32_SFLOAT";
+ case VULKAN_FORMAT_R32G32B32A32_UINT: return os << "R32G32B32A32_UINT";
+ case VULKAN_FORMAT_R32G32B32A32_SINT: return os << "R32G32B32A32_SINT";
+ case VULKAN_FORMAT_R32G32B32A32_SFLOAT:
+ return os << "R32G32B32A32_SFLOAT";
+ break;
+ default: ASSERT(0); std::cout << "Unknown format";
+ }
+
+ return os;
+}
+
+static char *findFilePath(const std::string filename)
+{
+ const char *searchPath[] = {
+ "./", // Same dir
+ "./shaders/", // In shaders folder in same dir
+ "../test_conformance/vulkan/shaders/" // In src folder
+ };
+ for (unsigned int i = 0; i < sizeof(searchPath) / sizeof(char *); ++i)
+ {
+ std::string path(searchPath[i]);
+
+ path.append(filename);
+ FILE *fp;
+ fp = fopen(path.c_str(), "rb");
+
+ if (fp != NULL)
+ {
+ fclose(fp);
+ // File found
+ char *file_path = (char *)(malloc(path.length() + 1));
+ strncpy(file_path, path.c_str(), path.length() + 1);
+ return file_path;
+ }
+ if (fp)
+ {
+ fclose(fp);
+ }
+ }
+ // File not found
+ return 0;
+}
+
+std::vector<char> readFile(const std::string &filename)
+{
+ char *file_path = findFilePath(filename);
+
+ std::ifstream file(file_path, std::ios::ate | std::ios::binary);
+
+ if (!file.is_open())
+ {
+ throw std::runtime_error("failed to open shader spv file!\n");
+ }
+ size_t fileSize = (size_t)file.tellg();
+ std::vector<char> buffer(fileSize);
+ file.seekg(0);
+ file.read(buffer.data(), fileSize);
+ file.close();
+ printf("filesize is %d", fileSize);
+ return buffer;
+}
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp
new file mode 100644
index 00000000..04f5a594
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp
@@ -0,0 +1,70 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _vulkan_utility_hpp_
+#define _vulkan_utility_hpp_
+
+#include "vulkan_wrapper_types.hpp"
+#include <vector>
+#include <ostream>
+#include <string.h>
+#include <map>
+#include "../../../test_common/harness/testHarness.h"
+
+#define STRING_(str) #str
+#define STRING(str) STRING_(str)
+
+#define ROUND_UP(n, multiple) \
+ (((n) + (multiple)-1) - ((((n) + (multiple)-1)) % (multiple)))
+
+const VulkanInstance& getVulkanInstance();
+const VulkanPhysicalDevice& getVulkanPhysicalDevice();
+const VulkanQueueFamily&
+getVulkanQueueFamily(uint32_t queueFlags = VULKAN_QUEUE_FLAG_MASK_ALL);
+const VulkanMemoryType&
+getVulkanMemoryType(const VulkanDevice& device,
+ VulkanMemoryTypeProperty memoryTypeProperty);
+bool checkVkSupport();
+const VulkanQueueFamilyList& getEmptyVulkanQueueFamilyList();
+const VulkanDescriptorSetLayoutList& getEmptyVulkanDescriptorSetLayoutList();
+const VulkanQueueFamilyToQueueCountMap&
+getDefaultVulkanQueueFamilyToQueueCountMap();
+const std::vector<VulkanExternalMemoryHandleType>
+getSupportedVulkanExternalMemoryHandleTypeList();
+const std::vector<VulkanExternalSemaphoreHandleType>
+getSupportedVulkanExternalSemaphoreHandleTypeList();
+const std::vector<VulkanFormat> getSupportedVulkanFormatList();
+
+uint32_t getVulkanFormatElementSize(VulkanFormat format);
+const char* getVulkanFormatGLSLFormat(VulkanFormat format);
+const char* getVulkanFormatGLSLTypePrefix(VulkanFormat format);
+
+std::string prepareVulkanShader(
+ std::string shaderCode,
+ const std::map<std::string, std::string>& patternToSubstituteMap);
+
+std::ostream& operator<<(std::ostream& os,
+ VulkanMemoryTypeProperty memoryTypeProperty);
+std::ostream&
+operator<<(std::ostream& os,
+ VulkanExternalMemoryHandleType externalMemoryHandleType);
+std::ostream&
+operator<<(std::ostream& os,
+ VulkanExternalSemaphoreHandleType externalSemaphoreHandleType);
+std::ostream& operator<<(std::ostream& os, VulkanFormat format);
+
+std::vector<char> readFile(const std::string& filename);
+#endif // _vulkan_utility_hpp_
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
new file mode 100644
index 00000000..6209a747
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
@@ -0,0 +1,2072 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifdef _WIN32
+#define NOMINMAX
+#include <Windows.h>
+#include <dxgi1_2.h>
+#include <aclapi.h>
+#endif
+#include <vulkan/vulkan.h>
+#include "vulkan_wrapper.hpp"
+#if defined(__linux__) && !defined(__ANDROID__)
+#include <gnu/libc-version.h>
+#include <dlfcn.h>
+#elif defined(__ANDROID__)
+#include <dlfcn.h>
+#endif
+#if defined _WIN32
+#define LoadFunction GetProcAddress
+#elif defined __linux
+#define LoadFunction dlsym
+#endif
+
+extern "C" {
+#define VK_FUNC_DECL(name) PFN_##name _##name = NULL;
+VK_FUNC_LIST
+#if defined(_WIN32) || defined(_WIN64)
+VK_WINDOWS_FUNC_LIST
+#endif
+#undef VK_FUNC_DECL
+}
+
+#define WAIVED 2
+#define HANDLE_ERROR -1
+
+#define CHECK_VK(call) \
+ if (call != VK_SUCCESS) return call;
+///////////////////////////////////
+// VulkanInstance implementation //
+///////////////////////////////////
+
+VulkanInstance::VulkanInstance(const VulkanInstance &instance)
+ : m_vkInstance(instance.m_vkInstance),
+ m_physicalDeviceList(instance.m_physicalDeviceList)
+{}
+
+VulkanInstance::VulkanInstance(): m_vkInstance(VK_NULL_HANDLE)
+{
+#if defined(__linux__) && !defined(__ANDROID__)
+ char *glibcVersion = strdup(gnu_get_libc_version());
+ int majNum = (int)atoi(strtok(glibcVersion, "."));
+ int minNum = (int)atoi(strtok(NULL, "."));
+ free(glibcVersion);
+ if ((majNum < 2) || (majNum == 2 && minNum < 17))
+ {
+ // WAIVE_TEST() << "Insufficient GLIBC version. Test waived!";
+ }
+#endif
+
+#if defined(_WIN32) || defined(_WIN64)
+ const char *vulkanLoaderLibraryName = "vulkan-1.dll";
+#elif defined(__linux__)
+ const char *vulkanLoaderLibraryName = "libvulkan.so.1";
+#endif
+#ifdef _WIN32
+ HINSTANCE hDLL;
+ hDLL = LoadLibrary(vulkanLoaderLibraryName);
+ if (hDLL == NULL)
+ {
+ throw std::runtime_error("LoadLibrary failed!");
+ }
+ vkGetInstanceProcAddr =
+ (PFN_vkGetInstanceProcAddr)LoadFunction(hDLL, "vkGetInstanceProcAddr");
+#else
+#if !defined(__APPLE__)
+ void *handle;
+ handle = dlopen(vulkanLoaderLibraryName, RTLD_LAZY);
+ if (!handle)
+ {
+ fputs(dlerror(), stderr);
+ throw std::runtime_error("dlopen failed !!!");
+ }
+ vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)LoadFunction(
+ handle, "vkGetInstanceProcAddr");
+#endif
+#endif
+ if ((unsigned long long)vkGetInstanceProcAddr == (unsigned long long)NULL)
+ {
+ throw std::runtime_error("vkGetInstanceProcAddr() not found!");
+ }
+#define VK_GET_NULL_INSTANCE_PROC_ADDR(name) \
+ _##name = (PFN_##name)vkGetInstanceProcAddr(NULL, #name);
+
+ if ((unsigned long long)vkGetInstanceProcAddr == (unsigned long long)NULL)
+ {
+ throw std::runtime_error("Couldn't obtain address for function");
+ }
+ VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceExtensionProperties);
+ uint32_t instanceExtensionPropertiesCount;
+ VkResult vkStatus = VK_SUCCESS;
+ vkStatus = vkEnumerateInstanceExtensionProperties(
+ NULL, &instanceExtensionPropertiesCount, NULL);
+ // Something went wrong in vulkan initialization (most likely incompatible
+ // device/driver combination)
+ if (vkStatus == VK_ERROR_INCOMPATIBLE_DRIVER)
+ {
+ throw std::runtime_error(
+ "Waiving vulkan test because "
+ "vkEnumerateInstanceExtensionProperties failed.");
+ // return WAIVED;
+ }
+
+ VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceVersion);
+ VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceLayerProperties);
+ VK_GET_NULL_INSTANCE_PROC_ADDR(vkCreateInstance);
+#undef VK_GET_NULL_INSTANCE_PROC_ADDR
+
+ VkApplicationInfo vkApplicationInfo = {};
+ vkApplicationInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
+ vkApplicationInfo.pNext = NULL;
+ vkApplicationInfo.pApplicationName = "Default app";
+ vkApplicationInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0);
+ vkApplicationInfo.pEngineName = "No engine";
+ vkApplicationInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0);
+ vkApplicationInfo.apiVersion = VK_API_VERSION_1_0;
+
+ std::vector<const char *> enabledExtensionNameList;
+ enabledExtensionNameList.push_back(
+ VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
+ enabledExtensionNameList.push_back(
+ VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME);
+ enabledExtensionNameList.push_back(
+ VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME);
+
+ std::vector<VkExtensionProperties> vkExtensionPropertiesList(
+ instanceExtensionPropertiesCount);
+ vkEnumerateInstanceExtensionProperties(NULL,
+ &instanceExtensionPropertiesCount,
+ vkExtensionPropertiesList.data());
+
+ for (size_t eenIdx = 0; eenIdx < enabledExtensionNameList.size(); eenIdx++)
+ {
+ bool isSupported = false;
+ for (size_t epIdx = 0; epIdx < vkExtensionPropertiesList.size();
+ epIdx++)
+ {
+ if (!strcmp(enabledExtensionNameList[eenIdx],
+ vkExtensionPropertiesList[epIdx].extensionName))
+ {
+ isSupported = true;
+ break;
+ }
+ }
+ if (!isSupported)
+ {
+ return;
+ }
+ }
+
+ VkInstanceCreateInfo vkInstanceCreateInfo = {};
+ vkInstanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
+ vkInstanceCreateInfo.pNext = NULL;
+ vkInstanceCreateInfo.flags = 0;
+ vkInstanceCreateInfo.pApplicationInfo = &vkApplicationInfo;
+ vkInstanceCreateInfo.enabledLayerCount = 0;
+ vkInstanceCreateInfo.ppEnabledLayerNames = NULL;
+ vkInstanceCreateInfo.enabledExtensionCount =
+ (uint32_t)enabledExtensionNameList.size();
+ vkInstanceCreateInfo.ppEnabledExtensionNames =
+ enabledExtensionNameList.data();
+
+ vkCreateInstance(&vkInstanceCreateInfo, NULL, &m_vkInstance);
+
+#define VK_FUNC_DECL(name) \
+ _##name = (PFN_##name)vkGetInstanceProcAddr(m_vkInstance, #name); \
+ // ASSERT_NEQ((unsigned long long)name, 0ULL) << "Couldn't obtain address
+ // for function" << #name;
+
+ VK_FUNC_LIST
+#if defined(_WIN32) || defined(_WIN64)
+ VK_WINDOWS_FUNC_LIST
+#endif
+#undef VK_FUNC_DECL
+
+ uint32_t physicalDeviceCount = 0;
+ vkEnumeratePhysicalDevices(m_vkInstance, &physicalDeviceCount, NULL);
+ // CHECK_NEQ(physicalDeviceCount, uint32_t(0));
+
+ if (physicalDeviceCount == uint32_t(0))
+ {
+ std::cout << "failed to find GPUs with Vulkan support!\n";
+ return;
+ }
+
+ std::vector<VkPhysicalDevice> vkPhysicalDeviceList(physicalDeviceCount,
+ VK_NULL_HANDLE);
+ vkEnumeratePhysicalDevices(m_vkInstance, &physicalDeviceCount,
+ vkPhysicalDeviceList.data());
+
+ for (size_t ppdIdx = 0; ppdIdx < vkPhysicalDeviceList.size(); ppdIdx++)
+ {
+ VulkanPhysicalDevice *physicalDevice =
+ new VulkanPhysicalDevice(vkPhysicalDeviceList[ppdIdx]);
+ m_physicalDeviceList.add(*physicalDevice);
+ }
+}
+
+VulkanInstance::~VulkanInstance()
+{
+ for (size_t pdIdx = 0; pdIdx < m_physicalDeviceList.size(); pdIdx++)
+ {
+ const VulkanPhysicalDevice &physicalDevice =
+ m_physicalDeviceList[pdIdx];
+ delete &physicalDevice;
+ }
+ if (m_vkInstance)
+ {
+ vkDestroyInstance(m_vkInstance, NULL);
+ }
+}
+
+const VulkanPhysicalDeviceList &VulkanInstance::getPhysicalDeviceList() const
+{
+ return m_physicalDeviceList;
+}
+
+VulkanInstance::operator VkInstance() const { return m_vkInstance; }
+
+/////////////////////////////////////////
+// VulkanPhysicalDevice implementation //
+/////////////////////////////////////////
+
+VulkanPhysicalDevice::VulkanPhysicalDevice(
+ const VulkanPhysicalDevice &physicalDevice)
+ : m_vkPhysicalDevice(physicalDevice.m_vkPhysicalDevice),
+ m_vkPhysicalDeviceProperties(physicalDevice.m_vkPhysicalDeviceProperties),
+ m_vkDeviceNodeMask(physicalDevice.m_vkDeviceNodeMask),
+ m_vkPhysicalDeviceFeatures(physicalDevice.m_vkPhysicalDeviceFeatures),
+ m_vkPhysicalDeviceMemoryProperties(
+ physicalDevice.m_vkPhysicalDeviceMemoryProperties),
+ m_queueFamilyList(physicalDevice.m_queueFamilyList)
+{
+ memcpy(m_vkDeviceUUID, physicalDevice.m_vkDeviceUUID, VK_UUID_SIZE);
+}
+
+VulkanPhysicalDevice::VulkanPhysicalDevice(VkPhysicalDevice vkPhysicalDevice)
+ : m_vkPhysicalDevice(vkPhysicalDevice)
+{
+ if (m_vkPhysicalDevice == (VkPhysicalDevice)VK_NULL_HANDLE)
+ {
+ throw std::runtime_error("failed to find a suitable GPU!");
+ }
+
+ vkGetPhysicalDeviceProperties(m_vkPhysicalDevice,
+ &m_vkPhysicalDeviceProperties);
+ vkGetPhysicalDeviceFeatures(m_vkPhysicalDevice,
+ &m_vkPhysicalDeviceFeatures);
+
+ VkPhysicalDeviceIDPropertiesKHR vkPhysicalDeviceIDPropertiesKHR = {};
+ vkPhysicalDeviceIDPropertiesKHR.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR;
+ vkPhysicalDeviceIDPropertiesKHR.pNext = NULL;
+
+ VkPhysicalDeviceProperties2KHR vkPhysicalDeviceProperties2KHR = {};
+ vkPhysicalDeviceProperties2KHR.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
+ vkPhysicalDeviceProperties2KHR.pNext = &vkPhysicalDeviceIDPropertiesKHR;
+
+ vkGetPhysicalDeviceProperties2KHR(m_vkPhysicalDevice,
+ &vkPhysicalDeviceProperties2KHR);
+
+ memcpy(m_vkDeviceUUID, vkPhysicalDeviceIDPropertiesKHR.deviceUUID,
+ sizeof(m_vkDeviceUUID));
+ memcpy(m_vkDeviceLUID, vkPhysicalDeviceIDPropertiesKHR.deviceLUID,
+ sizeof(m_vkDeviceLUID));
+ m_vkDeviceNodeMask = vkPhysicalDeviceIDPropertiesKHR.deviceNodeMask;
+
+ uint32_t queueFamilyCount = 0;
+ vkGetPhysicalDeviceQueueFamilyProperties(m_vkPhysicalDevice,
+ &queueFamilyCount, NULL);
+
+ std::vector<VkQueueFamilyProperties> vkQueueFamilyPropertiesList(
+ queueFamilyCount);
+ vkGetPhysicalDeviceQueueFamilyProperties(
+ m_vkPhysicalDevice, &queueFamilyCount,
+ vkQueueFamilyPropertiesList.data());
+
+ for (size_t qfpIdx = 0; qfpIdx < vkQueueFamilyPropertiesList.size();
+ qfpIdx++)
+ {
+ VulkanQueueFamily *queueFamily = new VulkanQueueFamily(
+ uint32_t(qfpIdx), vkQueueFamilyPropertiesList[qfpIdx]);
+ m_queueFamilyList.add(*queueFamily);
+ }
+
+ vkGetPhysicalDeviceMemoryProperties(m_vkPhysicalDevice,
+ &m_vkPhysicalDeviceMemoryProperties);
+
+ for (uint32_t mhIdx = 0;
+ mhIdx < m_vkPhysicalDeviceMemoryProperties.memoryHeapCount; mhIdx++)
+ {
+ VulkanMemoryHeap *memoryHeap = new VulkanMemoryHeap(
+ mhIdx, m_vkPhysicalDeviceMemoryProperties.memoryHeaps[mhIdx].size,
+ (VulkanMemoryHeapFlag)m_vkPhysicalDeviceMemoryProperties
+ .memoryHeaps[mhIdx]
+ .flags);
+ m_memoryHeapList.add(*memoryHeap);
+ }
+
+ for (uint32_t mtIdx = 0;
+ mtIdx < m_vkPhysicalDeviceMemoryProperties.memoryTypeCount; mtIdx++)
+ {
+ const VulkanMemoryHeap &memoryHeap = m_memoryHeapList
+ [m_vkPhysicalDeviceMemoryProperties.memoryTypes[mtIdx].heapIndex];
+ VulkanMemoryType *memoryType = new VulkanMemoryType(
+ mtIdx,
+ (VulkanMemoryTypeProperty)m_vkPhysicalDeviceMemoryProperties
+ .memoryTypes[mtIdx]
+ .propertyFlags,
+ memoryHeap);
+ m_memoryTypeList.add(*memoryType);
+ }
+}
+
+VulkanPhysicalDevice::~VulkanPhysicalDevice()
+{
+ for (size_t mtIdx = 0; mtIdx < m_memoryTypeList.size(); mtIdx++)
+ {
+ const VulkanMemoryType &memoryType = m_memoryTypeList[mtIdx];
+ delete &memoryType;
+ }
+
+ for (size_t mhIdx = 0; mhIdx < m_memoryHeapList.size(); mhIdx++)
+ {
+ const VulkanMemoryHeap &memoryHeap = m_memoryHeapList[mhIdx];
+ delete &memoryHeap;
+ }
+
+ for (size_t qfIdx = 0; qfIdx < m_queueFamilyList.size(); qfIdx++)
+ {
+ const VulkanQueueFamily &queueFamily = m_queueFamilyList[qfIdx];
+ delete &queueFamily;
+ }
+}
+
+
+const VulkanQueueFamilyList &VulkanPhysicalDevice::getQueueFamilyList() const
+{
+ return m_queueFamilyList;
+}
+
+const VulkanMemoryHeapList &VulkanPhysicalDevice::getMemoryHeapList() const
+{
+ return m_memoryHeapList;
+}
+
+const VulkanMemoryTypeList &VulkanPhysicalDevice::getMemoryTypeList() const
+{
+ return m_memoryTypeList;
+}
+
+const uint8_t *VulkanPhysicalDevice::getUUID() const { return m_vkDeviceUUID; }
+
+const uint8_t *VulkanPhysicalDevice::getLUID() const { return m_vkDeviceLUID; }
+
+uint32_t VulkanPhysicalDevice::getNodeMask() const
+{
+ return m_vkDeviceNodeMask;
+}
+
+VulkanPhysicalDevice::operator VkPhysicalDevice() const
+{
+ return m_vkPhysicalDevice;
+}
+
+bool operator<(const VulkanQueueFamily &queueFamilyA,
+ const VulkanQueueFamily &queueFamilyB)
+{
+ return (uint32_t)queueFamilyA < (uint32_t)queueFamilyB;
+}
+
+/////////////////////////////////////
+// VulkanMemoryHeap implementation //
+/////////////////////////////////////
+
+VulkanMemoryHeap::VulkanMemoryHeap(const VulkanMemoryHeap &memoryHeap)
+ : m_memoryHeapIndex(memoryHeap.m_memoryHeapIndex),
+ m_size(memoryHeap.m_size), m_memoryHeapFlag(memoryHeap.m_memoryHeapFlag)
+{}
+
+VulkanMemoryHeap::VulkanMemoryHeap(uint32_t memoryHeapIndex, uint64_t size,
+ VulkanMemoryHeapFlag memoryHeapFlag)
+ : m_memoryHeapIndex(memoryHeapIndex), m_size(size),
+ m_memoryHeapFlag(memoryHeapFlag)
+{}
+
+VulkanMemoryHeap::~VulkanMemoryHeap() {}
+
+uint64_t VulkanMemoryHeap::getSize() const { return m_size; }
+
+
+VulkanMemoryHeapFlag VulkanMemoryHeap::getMemoryHeapFlag() const
+{
+ return m_memoryHeapFlag;
+}
+
+VulkanMemoryHeap::operator uint32_t() const { return m_memoryHeapIndex; }
+
+/////////////////////////////////////
+// VulkanMemoryType implementation //
+/////////////////////////////////////
+
+VulkanMemoryType::VulkanMemoryType(const VulkanMemoryType &memoryType)
+ : m_memoryTypeIndex(memoryType.m_memoryTypeIndex),
+ m_memoryTypeProperty(memoryType.m_memoryTypeProperty),
+ m_memoryHeap(memoryType.m_memoryHeap)
+{}
+
+VulkanMemoryType::VulkanMemoryType(uint32_t memoryTypeIndex,
+ VulkanMemoryTypeProperty memoryTypeProperty,
+ const VulkanMemoryHeap &memoryHeap)
+ : m_memoryTypeIndex(memoryTypeIndex),
+ m_memoryTypeProperty(memoryTypeProperty), m_memoryHeap(memoryHeap)
+{}
+
+VulkanMemoryType::~VulkanMemoryType() {}
+
+VulkanMemoryTypeProperty VulkanMemoryType::getMemoryTypeProperty() const
+{
+ return m_memoryTypeProperty;
+}
+
+const VulkanMemoryHeap &VulkanMemoryType::getMemoryHeap() const
+{
+ return m_memoryHeap;
+}
+
+VulkanMemoryType::operator uint32_t() const { return m_memoryTypeIndex; }
+
+//////////////////////////////////////
+// VulkanQueueFamily implementation //
+//////////////////////////////////////
+
+VulkanQueueFamily::VulkanQueueFamily(const VulkanQueueFamily &queueFamily)
+ : m_queueFamilyIndex(queueFamily.m_queueFamilyIndex),
+ m_vkQueueFamilyProperties(queueFamily.m_vkQueueFamilyProperties)
+{}
+
+VulkanQueueFamily::VulkanQueueFamily(
+ uint32_t queueFamilyIndex, VkQueueFamilyProperties vkQueueFamilyProperties)
+ : m_queueFamilyIndex(queueFamilyIndex),
+ m_vkQueueFamilyProperties(vkQueueFamilyProperties)
+{}
+
+VulkanQueueFamily::~VulkanQueueFamily() {}
+
+uint32_t VulkanQueueFamily::getQueueFlags() const
+{
+ return m_vkQueueFamilyProperties.queueFlags
+ & (uint32_t)VULKAN_QUEUE_FLAG_MASK_ALL;
+}
+
+uint32_t VulkanQueueFamily::getQueueCount() const
+{
+ return m_vkQueueFamilyProperties.queueCount;
+}
+
+VulkanQueueFamily::operator uint32_t() const { return m_queueFamilyIndex; }
+
+/////////////////////////////////
+// VulkanDevice implementation //
+/////////////////////////////////
+
+VulkanDevice::VulkanDevice(const VulkanDevice &device)
+ : m_physicalDevice(device.m_physicalDevice), m_vkDevice(device.m_vkDevice)
+{}
+
+VulkanDevice::VulkanDevice(
+ const VulkanPhysicalDevice &physicalDevice,
+ const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap)
+ : m_physicalDevice(physicalDevice), m_vkDevice(NULL)
+{
+ uint32_t maxQueueCount = 0;
+ for (uint32_t qfIdx = 0;
+ qfIdx < (uint32_t)physicalDevice.getQueueFamilyList().size(); qfIdx++)
+ {
+ maxQueueCount =
+ std::max(maxQueueCount, queueFamilyToQueueCountMap[qfIdx]);
+ }
+
+ std::vector<VkDeviceQueueCreateInfo> vkDeviceQueueCreateInfoList;
+ std::vector<float> queuePriorities(maxQueueCount);
+ for (uint32_t qfIdx = 0;
+ qfIdx < (uint32_t)physicalDevice.getQueueFamilyList().size(); qfIdx++)
+ {
+ if (queueFamilyToQueueCountMap[qfIdx])
+ {
+ VkDeviceQueueCreateInfo vkDeviceQueueCreateInfo = {};
+ vkDeviceQueueCreateInfo.sType =
+ VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
+ vkDeviceQueueCreateInfo.pNext = NULL;
+ vkDeviceQueueCreateInfo.flags = 0;
+ vkDeviceQueueCreateInfo.queueFamilyIndex = qfIdx;
+ vkDeviceQueueCreateInfo.queueCount =
+ queueFamilyToQueueCountMap[qfIdx];
+ vkDeviceQueueCreateInfo.pQueuePriorities = queuePriorities.data();
+
+ vkDeviceQueueCreateInfoList.push_back(vkDeviceQueueCreateInfo);
+ }
+ }
+
+ std::vector<const char *> enabledExtensionNameList;
+ enabledExtensionNameList.push_back(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME);
+ enabledExtensionNameList.push_back(
+ VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME);
+#if defined(_WIN32) || defined(_WIN64)
+ enabledExtensionNameList.push_back(
+ VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME);
+ enabledExtensionNameList.push_back(
+ VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME);
+#else
+ enabledExtensionNameList.push_back(
+ VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME);
+ enabledExtensionNameList.push_back(
+ VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME);
+#endif
+
+
+ VkDeviceCreateInfo vkDeviceCreateInfo = {};
+ vkDeviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
+ vkDeviceCreateInfo.pNext = NULL;
+ vkDeviceCreateInfo.flags = 0;
+ vkDeviceCreateInfo.queueCreateInfoCount =
+ (uint32_t)vkDeviceQueueCreateInfoList.size();
+ vkDeviceCreateInfo.pQueueCreateInfos = vkDeviceQueueCreateInfoList.data();
+ vkDeviceCreateInfo.enabledLayerCount = 0;
+ vkDeviceCreateInfo.ppEnabledLayerNames = NULL;
+ vkDeviceCreateInfo.enabledExtensionCount =
+ (uint32_t)enabledExtensionNameList.size();
+ vkDeviceCreateInfo.ppEnabledExtensionNames =
+ enabledExtensionNameList.data();
+ vkDeviceCreateInfo.pEnabledFeatures = NULL;
+
+ vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice);
+
+ for (uint32_t qfIdx = 0;
+ qfIdx < (uint32_t)m_physicalDevice.getQueueFamilyList().size();
+ qfIdx++)
+ {
+ VulkanQueueList *queueList = new VulkanQueueList();
+ m_queueFamilyIndexToQueueListMap.insert(qfIdx, *queueList);
+ for (uint32_t qIdx = 0; qIdx < queueFamilyToQueueCountMap[qfIdx];
+ qIdx++)
+ {
+ VkQueue vkQueue;
+ vkGetDeviceQueue(m_vkDevice, qfIdx, qIdx, &vkQueue);
+ VulkanQueue *queue = new VulkanQueue(vkQueue);
+ m_queueFamilyIndexToQueueListMap[qfIdx].add(*queue);
+ }
+ }
+}
+
+VulkanDevice::~VulkanDevice()
+{
+ for (uint32_t qfIdx = 0;
+ qfIdx < (uint32_t)m_physicalDevice.getQueueFamilyList().size();
+ qfIdx++)
+ {
+ for (size_t qIdx = 0;
+ qIdx < m_queueFamilyIndexToQueueListMap[qfIdx].size(); qIdx++)
+ {
+ VulkanQueue &queue = m_queueFamilyIndexToQueueListMap[qfIdx][qIdx];
+ delete &queue;
+ }
+ VulkanQueueList &queueList = m_queueFamilyIndexToQueueListMap[qfIdx];
+ delete &queueList;
+ }
+ vkDestroyDevice(m_vkDevice, NULL);
+}
+
+const VulkanPhysicalDevice &VulkanDevice::getPhysicalDevice() const
+{
+ return m_physicalDevice;
+}
+
+VulkanQueue &VulkanDevice::getQueue(const VulkanQueueFamily &queueFamily,
+ uint32_t queueIndex)
+{
+ return m_queueFamilyIndexToQueueListMap[queueFamily][queueIndex];
+}
+
+VulkanDevice::operator VkDevice() const { return m_vkDevice; }
+
+////////////////////////////////
+// VulkanQueue implementation //
+////////////////////////////////
+
+VulkanQueue::VulkanQueue(const VulkanQueue &queue): m_vkQueue(queue.m_vkQueue)
+{}
+
+VulkanQueue::VulkanQueue(VkQueue vkQueue): m_vkQueue(vkQueue) {}
+
+VulkanQueue::~VulkanQueue() {}
+
+void VulkanQueue::submit(const VulkanSemaphoreList &waitSemaphoreList,
+ const VulkanCommandBufferList &commandBufferList,
+ const VulkanSemaphoreList &signalSemaphoreList)
+{
+ std::vector<VkPipelineStageFlags> vkPipelineStageFlagsList(
+ waitSemaphoreList.size(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
+
+ VkSubmitInfo vkSubmitInfo = {};
+ vkSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ vkSubmitInfo.pNext = NULL;
+ vkSubmitInfo.waitSemaphoreCount = (uint32_t)waitSemaphoreList.size();
+ vkSubmitInfo.pWaitSemaphores = waitSemaphoreList();
+ vkSubmitInfo.pWaitDstStageMask = vkPipelineStageFlagsList.data();
+ vkSubmitInfo.commandBufferCount = (uint32_t)commandBufferList.size();
+ vkSubmitInfo.pCommandBuffers = commandBufferList();
+ vkSubmitInfo.signalSemaphoreCount = (uint32_t)signalSemaphoreList.size();
+ vkSubmitInfo.pSignalSemaphores = signalSemaphoreList();
+
+ vkQueueSubmit(m_vkQueue, 1, &vkSubmitInfo, NULL);
+}
+
+void VulkanQueue::submit(const VulkanSemaphore &waitSemaphore,
+ const VulkanCommandBuffer &commandBuffer,
+ const VulkanSemaphore &signalSemaphore)
+{
+ VulkanSemaphoreList waitSemaphoreList;
+ VulkanCommandBufferList commandBufferList;
+ VulkanSemaphoreList signalSemaphoreList;
+
+ waitSemaphoreList.add(waitSemaphore);
+ commandBufferList.add(commandBuffer);
+ signalSemaphoreList.add(signalSemaphore);
+
+ submit(waitSemaphoreList, commandBufferList, signalSemaphoreList);
+}
+
+void VulkanQueue::submit(const VulkanCommandBuffer &commandBuffer,
+ const VulkanSemaphore &signalSemaphore)
+{
+ VulkanSemaphoreList waitSemaphoreList;
+ VulkanCommandBufferList commandBufferList;
+ VulkanSemaphoreList signalSemaphoreList;
+
+ commandBufferList.add(commandBuffer);
+ signalSemaphoreList.add(signalSemaphore);
+
+ submit(waitSemaphoreList, commandBufferList, signalSemaphoreList);
+}
+
+void VulkanQueue::submit(const VulkanCommandBuffer &commandBuffer)
+{
+ VulkanSemaphoreList waitSemaphoreList;
+ VulkanCommandBufferList commandBufferList;
+ VulkanSemaphoreList signalSemaphoreList;
+
+ commandBufferList.add(commandBuffer);
+
+ submit(waitSemaphoreList, commandBufferList, signalSemaphoreList);
+}
+
+void VulkanQueue::waitIdle() { vkQueueWaitIdle(m_vkQueue); }
+
+VulkanQueue::operator VkQueue() const { return m_vkQueue; }
+
+/////////////////////////////////////////////////////
+// VulkanDescriptorSetLayoutBinding implementation //
+/////////////////////////////////////////////////////
+
+VulkanDescriptorSetLayoutBinding::VulkanDescriptorSetLayoutBinding(
+ const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding)
+ : m_vkDescriptorSetLayoutBinding(
+ descriptorSetLayoutBinding.m_vkDescriptorSetLayoutBinding)
+{}
+
+VulkanDescriptorSetLayoutBinding::VulkanDescriptorSetLayoutBinding(
+ uint32_t binding, VulkanDescriptorType descriptorType,
+ uint32_t descriptorCount, VulkanShaderStage shaderStage)
+{
+ m_vkDescriptorSetLayoutBinding.binding = binding;
+ m_vkDescriptorSetLayoutBinding.descriptorType =
+ (VkDescriptorType)descriptorType;
+ m_vkDescriptorSetLayoutBinding.descriptorCount = descriptorCount;
+ m_vkDescriptorSetLayoutBinding.stageFlags =
+ (VkShaderStageFlags)(VkShaderStageFlagBits)shaderStage;
+ m_vkDescriptorSetLayoutBinding.pImmutableSamplers = NULL;
+}
+
+VulkanDescriptorSetLayoutBinding::~VulkanDescriptorSetLayoutBinding() {}
+
+VulkanDescriptorSetLayoutBinding::operator VkDescriptorSetLayoutBinding() const
+{
+ return m_vkDescriptorSetLayoutBinding;
+}
+
+//////////////////////////////////////////////
+// VulkanDescriptorSetLayout implementation //
+//////////////////////////////////////////////
+
+VulkanDescriptorSetLayout::VulkanDescriptorSetLayout(
+ const VulkanDescriptorSetLayout &descriptorSetLayout)
+ : m_device(descriptorSetLayout.m_device),
+ m_vkDescriptorSetLayout(descriptorSetLayout.m_vkDescriptorSetLayout)
+{}
+
+void VulkanDescriptorSetLayout::VulkanDescriptorSetLayoutCommon(
+ const VulkanDescriptorSetLayoutBindingList &descriptorSetLayoutBindingList)
+{
+ VkDescriptorSetLayoutCreateInfo vkDescriptorSetLayoutCreateInfo = {};
+ vkDescriptorSetLayoutCreateInfo.sType =
+ VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
+ vkDescriptorSetLayoutCreateInfo.pNext = NULL;
+ vkDescriptorSetLayoutCreateInfo.flags = 0;
+ vkDescriptorSetLayoutCreateInfo.bindingCount =
+ (uint32_t)descriptorSetLayoutBindingList.size();
+ vkDescriptorSetLayoutCreateInfo.pBindings =
+ descriptorSetLayoutBindingList();
+
+ vkCreateDescriptorSetLayout(m_device, &vkDescriptorSetLayoutCreateInfo,
+ NULL, &m_vkDescriptorSetLayout);
+}
+
+VulkanDescriptorSetLayout::VulkanDescriptorSetLayout(
+ const VulkanDevice &device,
+ const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding)
+ : m_device(device), m_vkDescriptorSetLayout(VK_NULL_HANDLE)
+{
+ VulkanDescriptorSetLayoutBindingList descriptorSetLayoutBindingList;
+ descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding);
+
+ VulkanDescriptorSetLayoutCommon(descriptorSetLayoutBindingList);
+}
+
+VulkanDescriptorSetLayout::VulkanDescriptorSetLayout(
+ const VulkanDevice &device,
+ const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding0,
+ const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding1)
+ : m_device(device), m_vkDescriptorSetLayout(VK_NULL_HANDLE)
+{
+ VulkanDescriptorSetLayoutBindingList descriptorSetLayoutBindingList;
+ descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding0);
+ descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding1);
+
+ VulkanDescriptorSetLayoutCommon(descriptorSetLayoutBindingList);
+}
+
+VulkanDescriptorSetLayout::VulkanDescriptorSetLayout(
+ const VulkanDevice &device,
+ const VulkanDescriptorSetLayoutBindingList &descriptorSetLayoutBindingList)
+ : m_device(device), m_vkDescriptorSetLayout(VK_NULL_HANDLE)
+{
+ VulkanDescriptorSetLayoutCommon(descriptorSetLayoutBindingList);
+}
+
+VulkanDescriptorSetLayout::~VulkanDescriptorSetLayout()
+{
+ if (m_vkDescriptorSetLayout != VK_NULL_HANDLE)
+ {
+ vkDestroyDescriptorSetLayout(m_device, m_vkDescriptorSetLayout, NULL);
+ }
+}
+
+VulkanDescriptorSetLayout::operator VkDescriptorSetLayout() const
+{
+ return m_vkDescriptorSetLayout;
+}
+
+/////////////////////////////////////////
+// VulkanPipelineLayout implementation //
+/////////////////////////////////////////
+
+VulkanPipelineLayout::VulkanPipelineLayout(
+ const VulkanPipelineLayout &pipelineLayout)
+ : m_device(pipelineLayout.m_device),
+ m_vkPipelineLayout(pipelineLayout.m_vkPipelineLayout)
+{}
+
+void VulkanPipelineLayout::VulkanPipelineLayoutCommon(
+ const VulkanDescriptorSetLayoutList &descriptorSetLayoutList)
+{
+ VkPipelineLayoutCreateInfo vkPipelineLayoutCreateInfo = {};
+ vkPipelineLayoutCreateInfo.sType =
+ VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
+ vkPipelineLayoutCreateInfo.pNext = NULL;
+ vkPipelineLayoutCreateInfo.flags = 0;
+ vkPipelineLayoutCreateInfo.setLayoutCount =
+ (uint32_t)descriptorSetLayoutList.size();
+ vkPipelineLayoutCreateInfo.pSetLayouts = descriptorSetLayoutList();
+ vkPipelineLayoutCreateInfo.pushConstantRangeCount = 0;
+ vkPipelineLayoutCreateInfo.pPushConstantRanges = NULL;
+
+ vkCreatePipelineLayout(m_device, &vkPipelineLayoutCreateInfo, NULL,
+ &m_vkPipelineLayout);
+}
+
+VulkanPipelineLayout::VulkanPipelineLayout(
+ const VulkanDevice &device,
+ const VulkanDescriptorSetLayout &descriptorSetLayout)
+ : m_device(device), m_vkPipelineLayout(VK_NULL_HANDLE)
+{
+ VulkanDescriptorSetLayoutList descriptorSetLayoutList;
+ descriptorSetLayoutList.add(descriptorSetLayout);
+
+ VulkanPipelineLayoutCommon(descriptorSetLayoutList);
+}
+
+VulkanPipelineLayout::VulkanPipelineLayout(
+ const VulkanDevice &device,
+ const VulkanDescriptorSetLayoutList &descriptorSetLayoutList)
+ : m_device(device), m_vkPipelineLayout(VK_NULL_HANDLE)
+{
+ VulkanPipelineLayoutCommon(descriptorSetLayoutList);
+}
+
+VulkanPipelineLayout::~VulkanPipelineLayout()
+{
+ vkDestroyPipelineLayout(m_device, m_vkPipelineLayout, NULL);
+}
+
+VulkanPipelineLayout::operator VkPipelineLayout() const
+{
+ return m_vkPipelineLayout;
+}
+
+///////////////////////////////////////
+// VulkanShaderModule implementation //
+///////////////////////////////////////
+
+VulkanShaderModule::VulkanShaderModule(const VulkanShaderModule &shaderModule)
+ : m_device(shaderModule.m_device),
+ m_vkShaderModule(shaderModule.m_vkShaderModule)
+{}
+
+VulkanShaderModule::VulkanShaderModule(const VulkanDevice &device,
+ const std::vector<char> &code)
+ : m_device(device)
+{
+
+ VkShaderModuleCreateInfo vkShaderModuleCreateInfo = {};
+ vkShaderModuleCreateInfo.sType =
+ VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
+ vkShaderModuleCreateInfo.pNext = NULL;
+ vkShaderModuleCreateInfo.flags = 0;
+ vkShaderModuleCreateInfo.codeSize = code.size();
+ vkShaderModuleCreateInfo.pCode =
+ reinterpret_cast<const uint32_t *>(code.data());
+
+ vkCreateShaderModule(m_device, &vkShaderModuleCreateInfo, NULL,
+ &m_vkShaderModule);
+}
+
+VulkanShaderModule::~VulkanShaderModule()
+{
+ vkDestroyShaderModule(m_device, m_vkShaderModule, NULL);
+}
+
+VulkanShaderModule::operator VkShaderModule() const { return m_vkShaderModule; }
+
+///////////////////////////////////
+// VulkanPipeline implementation //
+///////////////////////////////////
+
+VulkanPipeline::VulkanPipeline(const VulkanPipeline &pipeline)
+ : m_device(pipeline.m_device), m_vkPipeline(pipeline.m_vkPipeline)
+{}
+
+VulkanPipeline::VulkanPipeline(const VulkanDevice &device)
+ : m_device(device), m_vkPipeline(VK_NULL_HANDLE)
+{}
+
+VulkanPipeline::~VulkanPipeline()
+{
+ vkDestroyPipeline(m_device, m_vkPipeline, NULL);
+}
+
+VulkanPipeline::operator VkPipeline() const { return m_vkPipeline; }
+
+//////////////////////////////////////////
+// VulkanComputePipeline implementation //
+//////////////////////////////////////////
+
+VulkanComputePipeline::VulkanComputePipeline(
+ const VulkanComputePipeline &computePipeline)
+ : VulkanPipeline(computePipeline)
+{}
+
+VulkanComputePipeline::VulkanComputePipeline(
+ const VulkanDevice &device, const VulkanPipelineLayout &pipelineLayout,
+ const VulkanShaderModule &shaderModule, const std::string &entryFuncName)
+ : VulkanPipeline(device)
+{
+ VkPipelineShaderStageCreateInfo vkPipelineShaderStageCreateInfo = {};
+ vkPipelineShaderStageCreateInfo.sType =
+ VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+ vkPipelineShaderStageCreateInfo.pNext = NULL;
+ vkPipelineShaderStageCreateInfo.flags = 0;
+ vkPipelineShaderStageCreateInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT;
+ vkPipelineShaderStageCreateInfo.module = shaderModule;
+ vkPipelineShaderStageCreateInfo.pName = entryFuncName.c_str();
+ vkPipelineShaderStageCreateInfo.pSpecializationInfo = NULL;
+
+ VkComputePipelineCreateInfo vkComputePipelineCreateInfo = {};
+ vkComputePipelineCreateInfo.sType =
+ VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
+ vkComputePipelineCreateInfo.pNext = NULL;
+ vkComputePipelineCreateInfo.flags = 0;
+ vkComputePipelineCreateInfo.stage = vkPipelineShaderStageCreateInfo;
+ vkComputePipelineCreateInfo.layout = pipelineLayout;
+ vkComputePipelineCreateInfo.basePipelineHandle = VK_NULL_HANDLE;
+ vkComputePipelineCreateInfo.basePipelineIndex = 0;
+
+ vkCreateComputePipelines(device, VK_NULL_HANDLE, 1,
+ &vkComputePipelineCreateInfo, NULL, &m_vkPipeline);
+}
+
+VulkanComputePipeline::~VulkanComputePipeline() {}
+
+VulkanPipelineBindPoint VulkanComputePipeline::getPipelineBindPoint() const
+{
+ return VULKAN_PIPELINE_BIND_POINT_COMPUTE;
+}
+
+/////////////////////////////////////////
+// VulkanDescriptorPool implementation //
+/////////////////////////////////////////
+
+VulkanDescriptorPool::VulkanDescriptorPool(
+ const VulkanDescriptorPool &descriptorPool)
+ : m_device(descriptorPool.m_device),
+ m_vkDescriptorPool(descriptorPool.m_vkDescriptorPool)
+{}
+
+void VulkanDescriptorPool::VulkanDescriptorPoolCommon(
+ const VulkanDescriptorSetLayoutBindingList &descriptorSetLayoutBindingList)
+{
+ if (descriptorSetLayoutBindingList.size())
+ {
+ std::map<VkDescriptorType, uint32_t>
+ vkDescriptorTypeToDescriptorCountMap;
+
+ for (size_t dslbIdx = 0;
+ dslbIdx < descriptorSetLayoutBindingList.size(); dslbIdx++)
+ {
+ VkDescriptorSetLayoutBinding vkDescriptorSetLayoutBinding =
+ descriptorSetLayoutBindingList[dslbIdx];
+ if (vkDescriptorTypeToDescriptorCountMap.find(
+ vkDescriptorSetLayoutBinding.descriptorType)
+ == vkDescriptorTypeToDescriptorCountMap.end())
+ {
+ vkDescriptorTypeToDescriptorCountMap
+ [vkDescriptorSetLayoutBinding.descriptorType] = 1;
+ }
+ else
+ {
+ vkDescriptorTypeToDescriptorCountMap
+ [vkDescriptorSetLayoutBinding.descriptorType]++;
+ }
+ }
+
+ std::vector<VkDescriptorPoolSize> vkDescriptorPoolSizeList;
+ std::map<VkDescriptorType, uint32_t>::iterator dtdcIt;
+ for (dtdcIt = vkDescriptorTypeToDescriptorCountMap.begin();
+ dtdcIt != vkDescriptorTypeToDescriptorCountMap.end(); ++dtdcIt)
+ {
+ VkDescriptorPoolSize vkDescriptorPoolSize = {};
+ vkDescriptorPoolSize.type = dtdcIt->first;
+ vkDescriptorPoolSize.descriptorCount = dtdcIt->second;
+
+ vkDescriptorPoolSizeList.push_back(vkDescriptorPoolSize);
+ }
+
+ VkDescriptorPoolCreateInfo vkDescriptorPoolCreateInfo = {};
+ vkDescriptorPoolCreateInfo.sType =
+ VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
+ vkDescriptorPoolCreateInfo.pNext = NULL;
+ vkDescriptorPoolCreateInfo.flags =
+ VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
+ vkDescriptorPoolCreateInfo.maxSets = 1;
+ vkDescriptorPoolCreateInfo.poolSizeCount =
+ (uint32_t)vkDescriptorPoolSizeList.size();
+ vkDescriptorPoolCreateInfo.pPoolSizes = vkDescriptorPoolSizeList.data();
+
+ vkCreateDescriptorPool(m_device, &vkDescriptorPoolCreateInfo, NULL,
+ &m_vkDescriptorPool);
+ }
+}
+
+VulkanDescriptorPool::VulkanDescriptorPool(
+ const VulkanDevice &device,
+ const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding)
+ : m_device(device), m_vkDescriptorPool(VK_NULL_HANDLE)
+{
+ VulkanDescriptorSetLayoutBindingList descriptorSetLayoutBindingList;
+ descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding);
+
+ VulkanDescriptorPoolCommon(descriptorSetLayoutBindingList);
+}
+
+VulkanDescriptorPool::VulkanDescriptorPool(
+ const VulkanDevice &device,
+ const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding0,
+ const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding1)
+ : m_device(device), m_vkDescriptorPool(VK_NULL_HANDLE)
+{
+ VulkanDescriptorSetLayoutBindingList descriptorSetLayoutBindingList;
+ descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding0);
+ descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding1);
+
+ VulkanDescriptorPoolCommon(descriptorSetLayoutBindingList);
+}
+
+VulkanDescriptorPool::VulkanDescriptorPool(
+ const VulkanDevice &device,
+ const VulkanDescriptorSetLayoutBindingList &descriptorSetLayoutBindingList)
+ : m_device(device), m_vkDescriptorPool(VK_NULL_HANDLE)
+{
+ VulkanDescriptorPoolCommon(descriptorSetLayoutBindingList);
+}
+
+VulkanDescriptorPool::~VulkanDescriptorPool()
+{
+ if (m_vkDescriptorPool != VK_NULL_HANDLE)
+ {
+ vkDestroyDescriptorPool(m_device, m_vkDescriptorPool, NULL);
+ }
+}
+
+VulkanDescriptorPool::operator VkDescriptorPool() const
+{
+ return m_vkDescriptorPool;
+}
+
+////////////////////////////////////////
+// VulkanDescriptorSet implementation //
+////////////////////////////////////////
+
+VulkanDescriptorSet::VulkanDescriptorSet(
+ const VulkanDescriptorSet &descriptorSet)
+ : m_device(descriptorSet.m_device),
+ m_descriptorPool(descriptorSet.m_descriptorPool),
+ m_vkDescriptorSet(descriptorSet.m_vkDescriptorSet)
+{}
+
+VulkanDescriptorSet::VulkanDescriptorSet(
+ const VulkanDevice &device, const VulkanDescriptorPool &descriptorPool,
+ const VulkanDescriptorSetLayout &descriptorSetLayout)
+ : m_device(device), m_descriptorPool(descriptorPool),
+ m_vkDescriptorSet(VK_NULL_HANDLE)
+{
+ VkDescriptorSetLayout vkDescriptorSetLayout = descriptorSetLayout;
+
+ if ((VkDescriptorPool)m_descriptorPool)
+ {
+ VkDescriptorSetAllocateInfo vkDescriptorSetAllocateInfo = {};
+ vkDescriptorSetAllocateInfo.sType =
+ VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
+ vkDescriptorSetAllocateInfo.pNext = NULL;
+ vkDescriptorSetAllocateInfo.descriptorPool = descriptorPool;
+ vkDescriptorSetAllocateInfo.descriptorSetCount = 1;
+ vkDescriptorSetAllocateInfo.pSetLayouts = &vkDescriptorSetLayout;
+
+ vkAllocateDescriptorSets(m_device, &vkDescriptorSetAllocateInfo,
+ &m_vkDescriptorSet);
+ }
+}
+
+VulkanDescriptorSet::~VulkanDescriptorSet()
+{
+ if ((VkDescriptorPool)m_descriptorPool)
+ {
+ vkFreeDescriptorSets(m_device, m_descriptorPool, 1, &m_vkDescriptorSet);
+ }
+}
+
+void VulkanDescriptorSet::update(uint32_t binding, const VulkanBuffer &buffer)
+{
+ VkDescriptorBufferInfo vkDescriptorBufferInfo = {};
+ vkDescriptorBufferInfo.buffer = buffer;
+ vkDescriptorBufferInfo.offset = 0;
+ vkDescriptorBufferInfo.range = VK_WHOLE_SIZE;
+
+ VkWriteDescriptorSet vkWriteDescriptorSet = {};
+ vkWriteDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+ vkWriteDescriptorSet.pNext = NULL;
+ vkWriteDescriptorSet.dstSet = m_vkDescriptorSet;
+ vkWriteDescriptorSet.dstBinding = binding;
+ vkWriteDescriptorSet.dstArrayElement = 0;
+ vkWriteDescriptorSet.descriptorCount = 1;
+ vkWriteDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+ vkWriteDescriptorSet.pImageInfo = NULL;
+ vkWriteDescriptorSet.pBufferInfo = &vkDescriptorBufferInfo;
+ vkWriteDescriptorSet.pTexelBufferView = NULL;
+
+ vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL);
+}
+
+void VulkanDescriptorSet::update(uint32_t binding,
+ const VulkanImageView &imageView)
+{
+ VkDescriptorImageInfo vkDescriptorImageInfo = {};
+ vkDescriptorImageInfo.sampler = VK_NULL_HANDLE;
+ vkDescriptorImageInfo.imageView = imageView;
+ vkDescriptorImageInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+
+ VkWriteDescriptorSet vkWriteDescriptorSet = {};
+ vkWriteDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+ vkWriteDescriptorSet.pNext = NULL;
+ vkWriteDescriptorSet.dstSet = m_vkDescriptorSet;
+ vkWriteDescriptorSet.dstBinding = binding;
+ vkWriteDescriptorSet.dstArrayElement = 0;
+ vkWriteDescriptorSet.descriptorCount = 1;
+ vkWriteDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+ vkWriteDescriptorSet.pImageInfo = &vkDescriptorImageInfo;
+ vkWriteDescriptorSet.pBufferInfo = NULL;
+ vkWriteDescriptorSet.pTexelBufferView = NULL;
+
+ vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL);
+}
+
+VulkanDescriptorSet::operator VkDescriptorSet() const
+{
+ return m_vkDescriptorSet;
+}
+
+///////////////////////////////////
+// VulkanOffset3D implementation //
+///////////////////////////////////
+
+VulkanOffset3D::VulkanOffset3D(const VulkanOffset3D &offset3D)
+ : m_vkOffset3D(offset3D.m_vkOffset3D)
+{}
+
+VulkanOffset3D::VulkanOffset3D(uint32_t x, uint32_t y, uint32_t z)
+{
+ m_vkOffset3D.x = x;
+ m_vkOffset3D.y = y;
+ m_vkOffset3D.z = z;
+}
+
+VulkanOffset3D::~VulkanOffset3D() {}
+
+uint32_t VulkanOffset3D::getX() const { return m_vkOffset3D.x; }
+
+uint32_t VulkanOffset3D::getY() const { return m_vkOffset3D.y; }
+
+uint32_t VulkanOffset3D::getZ() const { return m_vkOffset3D.z; }
+
+VulkanOffset3D::operator VkOffset3D() const { return m_vkOffset3D; }
+
+///////////////////////////////////
+// VulkanExtent3D implementation //
+///////////////////////////////////
+
+VulkanExtent3D::VulkanExtent3D(const VulkanExtent3D &extent3D)
+ : m_vkExtent3D(extent3D.m_vkExtent3D)
+{}
+
+VulkanExtent3D::VulkanExtent3D(uint32_t width, uint32_t height, uint32_t depth)
+{
+ m_vkExtent3D.width = width;
+ m_vkExtent3D.height = height;
+ m_vkExtent3D.depth = depth;
+}
+
+VulkanExtent3D::~VulkanExtent3D() {}
+
+uint32_t VulkanExtent3D::getWidth() const { return m_vkExtent3D.width; }
+
+uint32_t VulkanExtent3D::getHeight() const { return m_vkExtent3D.height; }
+
+uint32_t VulkanExtent3D::getDepth() const { return m_vkExtent3D.depth; }
+
+VulkanExtent3D::operator VkExtent3D() const { return m_vkExtent3D; }
+
+//////////////////////////////////////
+// VulkanCommandPool implementation //
+//////////////////////////////////////
+
+VulkanCommandPool::VulkanCommandPool(const VulkanCommandPool &commandPool)
+ : m_device(commandPool.m_device),
+ m_vkCommandPool(commandPool.m_vkCommandPool)
+{}
+
+VulkanCommandPool::VulkanCommandPool(const VulkanDevice &device,
+ const VulkanQueueFamily &queueFamily)
+ : m_device(device)
+{
+ VkCommandPoolCreateInfo vkCommandPoolCreateInfo = {};
+ vkCommandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
+ vkCommandPoolCreateInfo.pNext = NULL;
+ vkCommandPoolCreateInfo.flags =
+ VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
+ vkCommandPoolCreateInfo.queueFamilyIndex = queueFamily;
+
+ vkCreateCommandPool(m_device, &vkCommandPoolCreateInfo, NULL,
+ &m_vkCommandPool);
+}
+
+VulkanCommandPool::~VulkanCommandPool()
+{
+ vkDestroyCommandPool(m_device, m_vkCommandPool, NULL);
+}
+
+VulkanCommandPool::operator VkCommandPool() const { return m_vkCommandPool; }
+
+////////////////////////////////////////
+// VulkanCommandBuffer implementation //
+////////////////////////////////////////
+
+VulkanCommandBuffer::VulkanCommandBuffer(
+ const VulkanCommandBuffer &commandBuffer)
+ : m_device(commandBuffer.m_device),
+ m_commandPool(commandBuffer.m_commandPool),
+ m_vkCommandBuffer(commandBuffer.m_vkCommandBuffer)
+{}
+
+VulkanCommandBuffer::VulkanCommandBuffer(const VulkanDevice &device,
+ const VulkanCommandPool &commandPool)
+ : m_device(device), m_commandPool(commandPool)
+{
+ VkCommandBufferAllocateInfo vkCommandBufferAllocateInfo = {};
+ vkCommandBufferAllocateInfo.sType =
+ VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+ vkCommandBufferAllocateInfo.pNext = NULL;
+ vkCommandBufferAllocateInfo.commandPool = commandPool;
+ vkCommandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+ vkCommandBufferAllocateInfo.commandBufferCount = 1;
+
+ vkAllocateCommandBuffers(m_device, &vkCommandBufferAllocateInfo,
+ &m_vkCommandBuffer);
+}
+
+VulkanCommandBuffer::~VulkanCommandBuffer()
+{
+ vkFreeCommandBuffers(m_device, m_commandPool, 1, &m_vkCommandBuffer);
+}
+
+void VulkanCommandBuffer::begin()
+{
+ VkCommandBufferBeginInfo vkCommandBufferBeginInfo = {};
+ vkCommandBufferBeginInfo.sType =
+ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+ vkCommandBufferBeginInfo.pNext = NULL;
+ vkCommandBufferBeginInfo.flags =
+ VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
+ vkCommandBufferBeginInfo.pInheritanceInfo = NULL;
+
+ vkBeginCommandBuffer(m_vkCommandBuffer, &vkCommandBufferBeginInfo);
+}
+
+void VulkanCommandBuffer::bindPipeline(const VulkanPipeline &pipeline)
+{
+ VkPipelineBindPoint vkPipelineBindPoint =
+ (VkPipelineBindPoint)pipeline.getPipelineBindPoint();
+
+ vkCmdBindPipeline(m_vkCommandBuffer, vkPipelineBindPoint, pipeline);
+}
+
+void VulkanCommandBuffer::bindDescriptorSets(
+ const VulkanPipeline &pipeline, const VulkanPipelineLayout &pipelineLayout,
+ const VulkanDescriptorSet &descriptorSet)
+{
+ VkPipelineBindPoint vkPipelineBindPoint =
+ (VkPipelineBindPoint)pipeline.getPipelineBindPoint();
+ VkDescriptorSet vkDescriptorSet = descriptorSet;
+
+ vkCmdBindDescriptorSets(m_vkCommandBuffer, vkPipelineBindPoint,
+ pipelineLayout, 0, 1, &vkDescriptorSet, 0, NULL);
+}
+
+void VulkanCommandBuffer::pipelineBarrier(const VulkanImage2DList &image2DList,
+ VulkanImageLayout oldImageLayout,
+ VulkanImageLayout newImageLayout)
+{
+ std::vector<VkImageMemoryBarrier> vkImageMemoryBarrierList;
+ for (size_t i2DIdx = 0; i2DIdx < image2DList.size(); i2DIdx++)
+ {
+ VkImageSubresourceRange vkImageSubresourceRange = {};
+ vkImageSubresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ vkImageSubresourceRange.baseMipLevel = 0;
+ vkImageSubresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
+ vkImageSubresourceRange.baseArrayLayer = 0;
+ vkImageSubresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
+
+ VkImageMemoryBarrier vkImageMemoryBarrier = {};
+ vkImageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+ vkImageMemoryBarrier.pNext = NULL;
+ vkImageMemoryBarrier.srcAccessMask = 0;
+ vkImageMemoryBarrier.dstAccessMask = 0;
+ vkImageMemoryBarrier.oldLayout = (VkImageLayout)oldImageLayout;
+ vkImageMemoryBarrier.newLayout = (VkImageLayout)newImageLayout;
+ vkImageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ vkImageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ vkImageMemoryBarrier.image = image2DList[i2DIdx];
+ vkImageMemoryBarrier.subresourceRange = vkImageSubresourceRange;
+
+ vkImageMemoryBarrierList.push_back(vkImageMemoryBarrier);
+ }
+
+ vkCmdPipelineBarrier(m_vkCommandBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, NULL, 0,
+ NULL, (uint32_t)vkImageMemoryBarrierList.size(),
+ vkImageMemoryBarrierList.data());
+}
+
+void VulkanCommandBuffer::dispatch(uint32_t groupCountX, uint32_t groupCountY,
+ uint32_t groupCountZ)
+{
+ vkCmdDispatch(m_vkCommandBuffer, groupCountX, groupCountY, groupCountZ);
+}
+
+void VulkanCommandBuffer::fillBuffer(const VulkanBuffer &buffer, uint32_t data,
+ uint64_t offset, uint64_t size)
+{
+ vkCmdFillBuffer(m_vkCommandBuffer, buffer, offset, size, data);
+}
+
+void VulkanCommandBuffer::updateBuffer(const VulkanBuffer &buffer, void *pdata,
+ uint64_t offset, uint64_t size)
+{
+ vkCmdUpdateBuffer(m_vkCommandBuffer, buffer, offset, size, pdata);
+}
+
+void VulkanCommandBuffer::copyBufferToImage(const VulkanBuffer &buffer,
+ const VulkanImage &image,
+ VulkanImageLayout imageLayout)
+{
+ VkDeviceSize bufferOffset = 0;
+
+ std::vector<VkBufferImageCopy> vkBufferImageCopyList;
+ for (uint32_t mipLevel = 0; mipLevel < image.getNumMipLevels(); mipLevel++)
+ {
+ VulkanExtent3D extent3D = image.getExtent3D(mipLevel);
+ size_t elementSize = getVulkanFormatElementSize(image.getFormat());
+
+ VkImageSubresourceLayers vkImageSubresourceLayers = {};
+ vkImageSubresourceLayers.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ vkImageSubresourceLayers.mipLevel = mipLevel;
+ vkImageSubresourceLayers.baseArrayLayer = 0;
+ vkImageSubresourceLayers.layerCount = image.getNumLayers();
+
+ VkBufferImageCopy vkBufferImageCopy = {};
+ vkBufferImageCopy.bufferOffset = bufferOffset;
+ vkBufferImageCopy.bufferRowLength = 0;
+ vkBufferImageCopy.bufferImageHeight = 0;
+ vkBufferImageCopy.imageSubresource = vkImageSubresourceLayers;
+ vkBufferImageCopy.imageOffset = VulkanOffset3D(0, 0, 0);
+ vkBufferImageCopy.imageExtent = extent3D;
+
+ vkBufferImageCopyList.push_back(vkBufferImageCopy);
+
+ bufferOffset += extent3D.getWidth() * extent3D.getHeight()
+ * extent3D.getDepth() * elementSize;
+ bufferOffset =
+ ROUND_UP(bufferOffset,
+ std::max(elementSize,
+ (size_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT));
+ }
+
+ vkCmdCopyBufferToImage(
+ m_vkCommandBuffer, buffer, image, (VkImageLayout)imageLayout,
+ (uint32_t)vkBufferImageCopyList.size(), vkBufferImageCopyList.data());
+}
+
+void VulkanCommandBuffer::copyBufferToImage(
+ const VulkanBuffer &buffer, const VulkanImage &image, uint64_t bufferOffset,
+ uint32_t mipLevel, uint32_t baseArrayLayer, uint32_t layerCount,
+ VulkanOffset3D offset3D, VulkanExtent3D extent3D)
+{
+ VkImageSubresourceLayers vkImageSubresourceLayers = {};
+ vkImageSubresourceLayers.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ vkImageSubresourceLayers.mipLevel = mipLevel;
+ vkImageSubresourceLayers.baseArrayLayer = baseArrayLayer;
+ vkImageSubresourceLayers.layerCount = layerCount;
+
+ VkExtent3D vkExtent3D = extent3D;
+ if ((extent3D.getWidth() == 0) && (extent3D.getHeight() == 0)
+ && (extent3D.getDepth() == 0))
+ {
+ vkExtent3D = image.getExtent3D(mipLevel);
+ }
+
+ VkBufferImageCopy vkBufferImageCopy = {};
+ vkBufferImageCopy.bufferOffset = bufferOffset;
+ vkBufferImageCopy.bufferRowLength = 0;
+ vkBufferImageCopy.bufferImageHeight = 0;
+ vkBufferImageCopy.imageSubresource = vkImageSubresourceLayers;
+ vkBufferImageCopy.imageOffset = offset3D;
+ vkBufferImageCopy.imageExtent = vkExtent3D;
+
+ vkCmdCopyBufferToImage(m_vkCommandBuffer, buffer, image,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
+ &vkBufferImageCopy);
+}
+
+void VulkanCommandBuffer::copyImageToBuffer(
+ const VulkanImage &image, const VulkanBuffer &buffer, uint64_t bufferOffset,
+ uint32_t mipLevel, uint32_t baseArrayLayer, uint32_t layerCount,
+ VulkanOffset3D offset3D, VulkanExtent3D extent3D)
+{
+ VkImageSubresourceLayers vkImageSubresourceLayers = {};
+ vkImageSubresourceLayers.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ vkImageSubresourceLayers.mipLevel = mipLevel;
+ vkImageSubresourceLayers.baseArrayLayer = baseArrayLayer;
+ vkImageSubresourceLayers.layerCount = layerCount;
+
+ VkExtent3D vkExtent3D = extent3D;
+ if ((extent3D.getWidth() == 0) && (extent3D.getHeight() == 0)
+ && (extent3D.getDepth() == 0))
+ {
+ vkExtent3D = image.getExtent3D(mipLevel);
+ }
+
+ VkBufferImageCopy vkBufferImageCopy = {};
+ vkBufferImageCopy.bufferOffset = bufferOffset;
+ vkBufferImageCopy.bufferRowLength = 0;
+ vkBufferImageCopy.bufferImageHeight = 0;
+ vkBufferImageCopy.imageSubresource = vkImageSubresourceLayers;
+ vkBufferImageCopy.imageOffset = offset3D;
+ vkBufferImageCopy.imageExtent = vkExtent3D;
+
+ vkCmdCopyImageToBuffer(m_vkCommandBuffer, image, VK_IMAGE_LAYOUT_GENERAL,
+ buffer, 1, &vkBufferImageCopy);
+}
+
+void VulkanCommandBuffer::end() { vkEndCommandBuffer(m_vkCommandBuffer); }
+
+VulkanCommandBuffer::operator VkCommandBuffer() const
+{
+ return m_vkCommandBuffer;
+}
+
+/////////////////////////////////
+// VulkanBuffer implementation //
+/////////////////////////////////
+
+VulkanBuffer::VulkanBuffer(const VulkanBuffer &buffer)
+ : m_device(buffer.m_device), m_vkBuffer(buffer.m_vkBuffer),
+ m_size(buffer.m_size), m_alignment(buffer.m_alignment),
+ m_memoryTypeList(buffer.m_memoryTypeList)
+{}
+
+VulkanBuffer::VulkanBuffer(
+ const VulkanDevice &device, uint64_t size,
+ VulkanExternalMemoryHandleType externalMemoryHandleType,
+ VulkanBufferUsage bufferUsage, VulkanSharingMode sharingMode,
+ const VulkanQueueFamilyList &queueFamilyList)
+ : m_device(device), m_vkBuffer(VK_NULL_HANDLE)
+{
+ std::vector<uint32_t> queueFamilyIndexList;
+ if (queueFamilyList.size() == 0)
+ {
+ for (size_t qfIdx = 0;
+ qfIdx < device.getPhysicalDevice().getQueueFamilyList().size();
+ qfIdx++)
+ {
+ queueFamilyIndexList.push_back(
+ device.getPhysicalDevice().getQueueFamilyList()[qfIdx]);
+ }
+ }
+ else
+ {
+ for (size_t qfIdx = 0; qfIdx < queueFamilyList.size(); qfIdx++)
+ {
+ queueFamilyIndexList.push_back(queueFamilyList[qfIdx]);
+ }
+ }
+
+ VkBufferCreateInfo vkBufferCreateInfo = {};
+ vkBufferCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+ vkBufferCreateInfo.pNext = NULL;
+ vkBufferCreateInfo.flags = 0;
+ vkBufferCreateInfo.size = (VkDeviceSize)size;
+ vkBufferCreateInfo.usage = (VkBufferUsageFlags)bufferUsage;
+ vkBufferCreateInfo.sharingMode = (VkSharingMode)sharingMode;
+ vkBufferCreateInfo.queueFamilyIndexCount =
+ (uint32_t)queueFamilyIndexList.size();
+ vkBufferCreateInfo.pQueueFamilyIndices = queueFamilyIndexList.data();
+
+ VkExternalMemoryBufferCreateInfo vkExternalMemoryBufferCreateInfo = {};
+ if (externalMemoryHandleType != VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE)
+ {
+ vkExternalMemoryBufferCreateInfo.sType =
+ VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR;
+ vkExternalMemoryBufferCreateInfo.pNext = NULL;
+ vkExternalMemoryBufferCreateInfo.handleTypes =
+ (VkExternalMemoryHandleTypeFlags)externalMemoryHandleType;
+
+ vkBufferCreateInfo.pNext = &vkExternalMemoryBufferCreateInfo;
+ }
+
+ vkCreateBuffer(m_device, &vkBufferCreateInfo, NULL, &m_vkBuffer);
+
+ VkMemoryRequirements vkMemoryRequirements = {};
+ vkGetBufferMemoryRequirements(m_device, m_vkBuffer, &vkMemoryRequirements);
+ m_size = vkMemoryRequirements.size;
+ m_alignment = vkMemoryRequirements.alignment;
+ const VulkanMemoryTypeList &memoryTypeList =
+ m_device.getPhysicalDevice().getMemoryTypeList();
+ for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
+ {
+ uint32_t memoryTypeIndex = memoryTypeList[mtIdx];
+ if ((1 << memoryTypeIndex) & vkMemoryRequirements.memoryTypeBits)
+ {
+ m_memoryTypeList.add(memoryTypeList[mtIdx]);
+ }
+ }
+}
+
+VulkanBuffer::~VulkanBuffer() { vkDestroyBuffer(m_device, m_vkBuffer, NULL); }
+
+uint64_t VulkanBuffer::getSize() const { return m_size; }
+
+uint64_t VulkanBuffer::getAlignment() const { return m_alignment; }
+
+const VulkanMemoryTypeList &VulkanBuffer::getMemoryTypeList() const
+{
+ return m_memoryTypeList;
+}
+
+VulkanBuffer::operator VkBuffer() const { return m_vkBuffer; }
+
+////////////////////////////////
+// VulkanImage implementation //
+////////////////////////////////
+
+VulkanImage::VulkanImage(const VulkanImage &image)
+ : m_device(image.m_device), m_imageType(image.m_imageType),
+ m_extent3D(image.m_extent3D), m_format(image.m_format),
+ m_numMipLevels(image.m_numMipLevels), m_numLayers(image.m_numLayers),
+ m_vkImage(image.m_vkImage), m_size(image.m_size),
+ m_alignment(image.m_alignment), m_memoryTypeList(image.m_memoryTypeList)
+{}
+
+VulkanImage::VulkanImage(
+ const VulkanDevice &device, VulkanImageType imageType, VulkanFormat format,
+ const VulkanExtent3D &extent3D, uint32_t numMipLevels, uint32_t arrayLayers,
+ VulkanExternalMemoryHandleType externalMemoryHandleType,
+ VulkanImageCreateFlag imageCreateFlag, VulkanImageTiling imageTiling,
+ VulkanImageUsage imageUsage, VulkanSharingMode sharingMode)
+ : m_device(device), m_imageType(imageType), m_extent3D(extent3D),
+ m_format(format), m_numMipLevels(numMipLevels), m_numLayers(arrayLayers),
+ m_vkImage(VK_NULL_HANDLE)
+{
+ VkImageCreateInfo vkImageCreateInfo = {};
+ vkImageCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
+ vkImageCreateInfo.pNext = NULL;
+ vkImageCreateInfo.flags = (VkImageCreateFlags)imageCreateFlag;
+ vkImageCreateInfo.imageType = (VkImageType)imageType;
+ vkImageCreateInfo.format = (VkFormat)format;
+ vkImageCreateInfo.extent = extent3D;
+ vkImageCreateInfo.mipLevels = numMipLevels;
+ vkImageCreateInfo.arrayLayers = arrayLayers;
+ vkImageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
+ vkImageCreateInfo.tiling = (VkImageTiling)imageTiling;
+ vkImageCreateInfo.usage = (VkImageUsageFlags)imageUsage;
+ vkImageCreateInfo.sharingMode = (VkSharingMode)sharingMode;
+ vkImageCreateInfo.queueFamilyIndexCount =
+ (uint32_t)m_device.getPhysicalDevice().getQueueFamilyList().size();
+ vkImageCreateInfo.pQueueFamilyIndices =
+ m_device.getPhysicalDevice().getQueueFamilyList()();
+ vkImageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+
+ VkExternalMemoryImageCreateInfo vkExternalMemoryImageCreateInfo = {};
+ if (externalMemoryHandleType != VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE)
+ {
+ vkExternalMemoryImageCreateInfo.sType =
+ VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
+ vkExternalMemoryImageCreateInfo.pNext = NULL;
+ vkExternalMemoryImageCreateInfo.handleTypes =
+ (VkExternalMemoryHandleTypeFlags)externalMemoryHandleType;
+
+ vkImageCreateInfo.pNext = &vkExternalMemoryImageCreateInfo;
+ }
+
+ vkCreateImage(m_device, &vkImageCreateInfo, NULL, &m_vkImage);
+ VulkanImageCreateInfo = vkImageCreateInfo;
+ VkMemoryRequirements vkMemoryRequirements = {};
+ vkGetImageMemoryRequirements(m_device, m_vkImage, &vkMemoryRequirements);
+ m_size = vkMemoryRequirements.size;
+ m_alignment = vkMemoryRequirements.alignment;
+ const VulkanMemoryTypeList &memoryTypeList =
+ m_device.getPhysicalDevice().getMemoryTypeList();
+ for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
+ {
+ uint32_t memoryTypeIndex = memoryTypeList[mtIdx];
+ if ((1 << memoryTypeIndex) & vkMemoryRequirements.memoryTypeBits)
+ {
+ m_memoryTypeList.add(memoryTypeList[mtIdx]);
+ }
+ }
+}
+
+VulkanImage::~VulkanImage() { vkDestroyImage(m_device, m_vkImage, NULL); }
+
+VulkanExtent3D VulkanImage::getExtent3D(uint32_t mipLevel) const
+{
+ return VulkanExtent3D(0, 0, 0);
+}
+
+VulkanFormat VulkanImage::getFormat() const { return m_format; }
+
+VkImageCreateInfo VulkanImage::getVkImageCreateInfo() const
+{
+ return VulkanImageCreateInfo;
+}
+
+uint32_t VulkanImage::getNumMipLevels() const { return m_numMipLevels; }
+
+uint32_t VulkanImage::getNumLayers() const { return m_numLayers; }
+
+uint64_t VulkanImage::getSize() const { return m_size; }
+
+uint64_t VulkanImage::getAlignment() const { return m_alignment; }
+
+const VulkanMemoryTypeList &VulkanImage::getMemoryTypeList() const
+{
+ return m_memoryTypeList;
+}
+
+VulkanImage::operator VkImage() const { return m_vkImage; }
+
+//////////////////////////////////
+// VulkanImage2D implementation //
+//////////////////////////////////
+
+VulkanImage2D::VulkanImage2D(const VulkanImage2D &image2D): VulkanImage(image2D)
+{}
+
+VulkanImage2D::VulkanImage2D(
+ const VulkanDevice &device, VulkanFormat format, uint32_t width,
+ uint32_t height, uint32_t numMipLevels,
+ VulkanExternalMemoryHandleType externalMemoryHandleType,
+ VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage,
+ VulkanSharingMode sharingMode)
+ : VulkanImage(device, VULKAN_IMAGE_TYPE_2D, format,
+ VulkanExtent3D(width, height, 1), numMipLevels, 1,
+ externalMemoryHandleType, imageCreateFlag,
+ VULKAN_IMAGE_TILING_OPTIMAL, imageUsage, sharingMode)
+{}
+
+VulkanImage2D::~VulkanImage2D() {}
+
+VulkanExtent3D VulkanImage2D::getExtent3D(uint32_t mipLevel) const
+{
+ uint32_t width = std::max(m_extent3D.getWidth() >> mipLevel, uint32_t(1));
+ uint32_t height = std::max(m_extent3D.getHeight() >> mipLevel, uint32_t(1));
+ uint32_t depth = 1;
+
+ return VulkanExtent3D(width, height, depth);
+}
+
+////////////////////////////////////
+// VulkanImageView implementation //
+////////////////////////////////////
+
+VulkanImageView::VulkanImageView(const VulkanImageView &imageView)
+ : m_device(imageView.m_device), m_vkImageView(imageView.m_vkImageView)
+{}
+
+VulkanImageView::VulkanImageView(const VulkanDevice &device,
+ const VulkanImage &image,
+ VulkanImageViewType imageViewType,
+ uint32_t baseMipLevel, uint32_t levelCount,
+ uint32_t baseArrayLayer, uint32_t layerCount)
+ : m_device(device), m_vkImageView(VK_NULL_HANDLE)
+{
+ VkComponentMapping vkComponentMapping = {};
+ vkComponentMapping.r = VK_COMPONENT_SWIZZLE_IDENTITY;
+ vkComponentMapping.g = VK_COMPONENT_SWIZZLE_IDENTITY;
+ vkComponentMapping.b = VK_COMPONENT_SWIZZLE_IDENTITY;
+ vkComponentMapping.a = VK_COMPONENT_SWIZZLE_IDENTITY;
+
+ VkImageSubresourceRange vkImageSubresourceRange = {};
+ vkImageSubresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ vkImageSubresourceRange.baseMipLevel = baseMipLevel;
+ vkImageSubresourceRange.levelCount = levelCount;
+ vkImageSubresourceRange.baseArrayLayer = baseArrayLayer;
+ vkImageSubresourceRange.layerCount = layerCount;
+
+ VkImageViewCreateInfo vkImageViewCreateInfo = {};
+ vkImageViewCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
+ vkImageViewCreateInfo.pNext = NULL;
+ vkImageViewCreateInfo.flags = 0;
+ vkImageViewCreateInfo.image = image;
+ vkImageViewCreateInfo.viewType = (VkImageViewType)imageViewType;
+ vkImageViewCreateInfo.format = (VkFormat)image.getFormat();
+ vkImageViewCreateInfo.components = vkComponentMapping;
+ vkImageViewCreateInfo.subresourceRange = vkImageSubresourceRange;
+
+ vkCreateImageView(m_device, &vkImageViewCreateInfo, NULL, &m_vkImageView);
+}
+
+VulkanImageView::~VulkanImageView()
+{
+ vkDestroyImageView(m_device, m_vkImageView, NULL);
+}
+
+VulkanImageView::operator VkImageView() const { return m_vkImageView; }
+
+///////////////////////////////////////
+// VulkanDeviceMemory implementation //
+///////////////////////////////////////
+
+#if defined(_WIN32) || defined(_WIN64)
+
+class WindowsSecurityAttributes {
+protected:
+ SECURITY_ATTRIBUTES m_winSecurityAttributes;
+ PSECURITY_DESCRIPTOR m_winPSecurityDescriptor;
+
+public:
+ WindowsSecurityAttributes();
+ SECURITY_ATTRIBUTES *operator&();
+ ~WindowsSecurityAttributes();
+};
+
+
+WindowsSecurityAttributes::WindowsSecurityAttributes()
+{
+ m_winPSecurityDescriptor = (PSECURITY_DESCRIPTOR)calloc(
+ 1, SECURITY_DESCRIPTOR_MIN_LENGTH + 2 * sizeof(void **));
+ // CHECK_NEQ(m_winPSecurityDescriptor, (PSECURITY_DESCRIPTOR)NULL);
+ PSID *ppSID = (PSID *)((PBYTE)m_winPSecurityDescriptor
+ + SECURITY_DESCRIPTOR_MIN_LENGTH);
+ PACL *ppACL = (PACL *)((PBYTE)ppSID + sizeof(PSID *));
+ InitializeSecurityDescriptor(m_winPSecurityDescriptor,
+ SECURITY_DESCRIPTOR_REVISION);
+ SID_IDENTIFIER_AUTHORITY sidIdentifierAuthority =
+ SECURITY_WORLD_SID_AUTHORITY;
+ AllocateAndInitializeSid(&sidIdentifierAuthority, 1, SECURITY_WORLD_RID, 0,
+ 0, 0, 0, 0, 0, 0, ppSID);
+ EXPLICIT_ACCESS explicitAccess;
+ ZeroMemory(&explicitAccess, sizeof(EXPLICIT_ACCESS));
+ explicitAccess.grfAccessPermissions =
+ STANDARD_RIGHTS_ALL | SPECIFIC_RIGHTS_ALL;
+ explicitAccess.grfAccessMode = SET_ACCESS;
+ explicitAccess.grfInheritance = INHERIT_ONLY;
+ explicitAccess.Trustee.TrusteeForm = TRUSTEE_IS_SID;
+ explicitAccess.Trustee.TrusteeType = TRUSTEE_IS_WELL_KNOWN_GROUP;
+ explicitAccess.Trustee.ptstrName = (LPTSTR)*ppSID;
+ SetEntriesInAcl(1, &explicitAccess, NULL, ppACL);
+ SetSecurityDescriptorDacl(m_winPSecurityDescriptor, TRUE, *ppACL, FALSE);
+ m_winSecurityAttributes.nLength = sizeof(m_winSecurityAttributes);
+ m_winSecurityAttributes.lpSecurityDescriptor = m_winPSecurityDescriptor;
+ m_winSecurityAttributes.bInheritHandle = TRUE;
+}
+
+SECURITY_ATTRIBUTES *WindowsSecurityAttributes::operator&()
+{
+ return &m_winSecurityAttributes;
+}
+
+WindowsSecurityAttributes::~WindowsSecurityAttributes()
+{
+ PSID *ppSID = (PSID *)((PBYTE)m_winPSecurityDescriptor
+ + SECURITY_DESCRIPTOR_MIN_LENGTH);
+ PACL *ppACL = (PACL *)((PBYTE)ppSID + sizeof(PSID *));
+ if (*ppSID)
+ {
+ FreeSid(*ppSID);
+ }
+ if (*ppACL)
+ {
+ LocalFree(*ppACL);
+ }
+ free(m_winPSecurityDescriptor);
+}
+
+#endif
+
+VulkanDeviceMemory::VulkanDeviceMemory(const VulkanDeviceMemory &deviceMemory)
+ : m_device(deviceMemory.m_device),
+ m_vkDeviceMemory(deviceMemory.m_vkDeviceMemory),
+ m_size(deviceMemory.m_size), m_isDedicated(deviceMemory.m_isDedicated)
+{}
+
+VulkanDeviceMemory::VulkanDeviceMemory(
+ const VulkanDevice &device, uint64_t size,
+ const VulkanMemoryType &memoryType,
+ VulkanExternalMemoryHandleType externalMemoryHandleType, const void *name)
+ : m_device(device), m_size(size), m_isDedicated(false)
+{
+#if defined(_WIN32) || defined(_WIN64)
+ WindowsSecurityAttributes winSecurityAttributes;
+
+ VkExportMemoryWin32HandleInfoKHR vkExportMemoryWin32HandleInfoKHR = {};
+ vkExportMemoryWin32HandleInfoKHR.sType =
+ VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR;
+ vkExportMemoryWin32HandleInfoKHR.pNext = NULL;
+ vkExportMemoryWin32HandleInfoKHR.pAttributes = &winSecurityAttributes;
+ vkExportMemoryWin32HandleInfoKHR.dwAccess =
+ DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE;
+ vkExportMemoryWin32HandleInfoKHR.name = (LPCWSTR)name;
+
+#endif
+
+ VkExportMemoryAllocateInfoKHR vkExportMemoryAllocateInfoKHR = {};
+ vkExportMemoryAllocateInfoKHR.sType =
+ VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR;
+#if defined(_WIN32) || defined(_WIN64)
+ vkExportMemoryAllocateInfoKHR.pNext = externalMemoryHandleType
+ & VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
+ ? &vkExportMemoryWin32HandleInfoKHR
+ : NULL;
+#else
+ vkExportMemoryAllocateInfoKHR.pNext = NULL;
+#endif
+ vkExportMemoryAllocateInfoKHR.handleTypes =
+ (VkExternalMemoryHandleTypeFlagsKHR)externalMemoryHandleType;
+
+ VkMemoryAllocateInfo vkMemoryAllocateInfo = {};
+ vkMemoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+ vkMemoryAllocateInfo.pNext =
+ externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL;
+ vkMemoryAllocateInfo.allocationSize = m_size;
+ vkMemoryAllocateInfo.memoryTypeIndex = (uint32_t)memoryType;
+
+ vkAllocateMemory(m_device, &vkMemoryAllocateInfo, NULL, &m_vkDeviceMemory);
+}
+
+VulkanDeviceMemory::VulkanDeviceMemory(
+ const VulkanDevice &device, const VulkanImage &image,
+ const VulkanMemoryType &memoryType,
+ VulkanExternalMemoryHandleType externalMemoryHandleType, const void *name)
+ : m_device(device), m_size(image.getSize()), m_isDedicated(true)
+{
+#if defined(_WIN32) || defined(_WIN64)
+ WindowsSecurityAttributes winSecurityAttributes;
+
+ VkExportMemoryWin32HandleInfoKHR vkExportMemoryWin32HandleInfoKHR = {};
+ vkExportMemoryWin32HandleInfoKHR.sType =
+ VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR;
+ vkExportMemoryWin32HandleInfoKHR.pNext = NULL;
+ vkExportMemoryWin32HandleInfoKHR.pAttributes = &winSecurityAttributes;
+ vkExportMemoryWin32HandleInfoKHR.dwAccess =
+ DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE;
+ vkExportMemoryWin32HandleInfoKHR.name = (LPCWSTR)name;
+
+#endif
+
+ VkExportMemoryAllocateInfoKHR vkExportMemoryAllocateInfoKHR = {};
+ vkExportMemoryAllocateInfoKHR.sType =
+ VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR;
+#if defined(_WIN32) || defined(_WIN64)
+ vkExportMemoryAllocateInfoKHR.pNext = externalMemoryHandleType
+ & VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
+ ? &vkExportMemoryWin32HandleInfoKHR
+ : NULL;
+#else
+ vkExportMemoryAllocateInfoKHR.pNext = NULL;
+#endif
+ vkExportMemoryAllocateInfoKHR.handleTypes =
+ (VkExternalMemoryHandleTypeFlagsKHR)externalMemoryHandleType;
+
+ VkMemoryDedicatedAllocateInfo vkMemoryDedicatedAllocateInfo = {};
+ vkMemoryDedicatedAllocateInfo.sType =
+ VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
+ vkMemoryDedicatedAllocateInfo.pNext =
+ externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL;
+ vkMemoryDedicatedAllocateInfo.image = image;
+ vkMemoryDedicatedAllocateInfo.buffer = VK_NULL_HANDLE;
+
+ VkMemoryAllocateInfo vkMemoryAllocateInfo = {};
+ vkMemoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+ vkMemoryAllocateInfo.pNext = &vkMemoryDedicatedAllocateInfo;
+ vkMemoryAllocateInfo.allocationSize = m_size;
+ vkMemoryAllocateInfo.memoryTypeIndex = (uint32_t)memoryType;
+
+ vkAllocateMemory(m_device, &vkMemoryAllocateInfo, NULL, &m_vkDeviceMemory);
+}
+
+VulkanDeviceMemory::~VulkanDeviceMemory()
+{
+ vkFreeMemory(m_device, m_vkDeviceMemory, NULL);
+}
+
+uint64_t VulkanDeviceMemory::getSize() const { return m_size; }
+
+#ifdef _WIN32
+HANDLE VulkanDeviceMemory::getHandle(
+ VulkanExternalMemoryHandleType externalMemoryHandleType) const
+{
+ HANDLE handle;
+
+ VkMemoryGetWin32HandleInfoKHR vkMemoryGetWin32HandleInfoKHR = {};
+ vkMemoryGetWin32HandleInfoKHR.sType =
+ VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR;
+ vkMemoryGetWin32HandleInfoKHR.pNext = NULL;
+ vkMemoryGetWin32HandleInfoKHR.memory = m_vkDeviceMemory;
+ vkMemoryGetWin32HandleInfoKHR.handleType =
+ (VkExternalMemoryHandleTypeFlagBitsKHR)externalMemoryHandleType;
+
+ vkGetMemoryWin32HandleKHR(m_device, &vkMemoryGetWin32HandleInfoKHR,
+ &handle);
+
+ return handle;
+}
+#else
+int VulkanDeviceMemory::getHandle(
+ VulkanExternalMemoryHandleType externalMemoryHandleType) const
+{
+ if (externalMemoryHandleType
+ == VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD)
+ {
+ int fd;
+
+ VkMemoryGetFdInfoKHR vkMemoryGetFdInfoKHR = {};
+ vkMemoryGetFdInfoKHR.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR;
+ vkMemoryGetFdInfoKHR.pNext = NULL;
+ vkMemoryGetFdInfoKHR.memory = m_vkDeviceMemory;
+ vkMemoryGetFdInfoKHR.handleType =
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
+
+ vkGetMemoryFdKHR(m_device, &vkMemoryGetFdInfoKHR, &fd);
+
+ return fd;
+ }
+ return HANDLE_ERROR;
+}
+#endif
+
+bool VulkanDeviceMemory::isDedicated() const { return m_isDedicated; }
+
+void *VulkanDeviceMemory::map(size_t offset, size_t size)
+{
+ void *pData;
+
+ vkMapMemory(m_device, m_vkDeviceMemory, (VkDeviceSize)offset,
+ (VkDeviceSize)size, 0, &pData);
+
+ return pData;
+}
+
+void VulkanDeviceMemory::unmap() { vkUnmapMemory(m_device, m_vkDeviceMemory); }
+
+void VulkanDeviceMemory::bindBuffer(const VulkanBuffer &buffer, uint64_t offset)
+{
+ vkBindBufferMemory(m_device, buffer, m_vkDeviceMemory, offset);
+}
+
+void VulkanDeviceMemory::bindImage(const VulkanImage &image, uint64_t offset)
+{
+ vkBindImageMemory(m_device, image, m_vkDeviceMemory, offset);
+}
+
+VulkanDeviceMemory::operator VkDeviceMemory() const { return m_vkDeviceMemory; }
+
+////////////////////////////////////
+// VulkanSemaphore implementation //
+////////////////////////////////////
+
+VulkanSemaphore::VulkanSemaphore(const VulkanSemaphore &semaphore)
+ : m_device(semaphore.m_device), m_vkSemaphore(semaphore.m_vkSemaphore)
+{}
+
+VulkanSemaphore::VulkanSemaphore(
+ const VulkanDevice &device,
+ VulkanExternalSemaphoreHandleType externalSemaphoreHandleType,
+ const std::wstring name)
+ : m_device(device), m_name(name)
+{
+#if defined(_WIN32) || defined(_WIN64)
+ WindowsSecurityAttributes winSecurityAttributes;
+
+ VkExportSemaphoreWin32HandleInfoKHR
+ vkExportSemaphoreWin32HandleInfoKHR = {};
+ vkExportSemaphoreWin32HandleInfoKHR.sType =
+ VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR;
+ vkExportSemaphoreWin32HandleInfoKHR.pNext = NULL;
+ vkExportSemaphoreWin32HandleInfoKHR.pAttributes = &winSecurityAttributes;
+ vkExportSemaphoreWin32HandleInfoKHR.dwAccess =
+ DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE;
+ vkExportSemaphoreWin32HandleInfoKHR.name =
+ m_name.size() ? (LPCWSTR)m_name.c_str() : NULL;
+#endif
+
+ VkExportSemaphoreCreateInfoKHR vkExportSemaphoreCreateInfoKHR = {};
+ vkExportSemaphoreCreateInfoKHR.sType =
+ VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO_KHR;
+#if defined(_WIN32) || defined(_WIN64)
+ vkExportSemaphoreCreateInfoKHR.pNext =
+ (externalSemaphoreHandleType
+ & VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT)
+ ? &vkExportSemaphoreWin32HandleInfoKHR
+ : NULL;
+#else
+ vkExportSemaphoreCreateInfoKHR.pNext = NULL;
+#endif
+ vkExportSemaphoreCreateInfoKHR.handleTypes =
+ (VkExternalSemaphoreHandleTypeFlagsKHR)externalSemaphoreHandleType;
+
+ VkSemaphoreCreateInfo vkSemaphoreCreateInfo = {};
+ vkSemaphoreCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
+ vkSemaphoreCreateInfo.pNext =
+ (externalSemaphoreHandleType
+ != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE)
+ ? &vkExportSemaphoreCreateInfoKHR
+ : NULL;
+ vkSemaphoreCreateInfo.flags = 0;
+
+ vkCreateSemaphore(m_device, &vkSemaphoreCreateInfo, NULL, &m_vkSemaphore);
+}
+
+VulkanSemaphore::~VulkanSemaphore()
+{
+ vkDestroySemaphore(m_device, m_vkSemaphore, NULL);
+}
+
+#if defined(_WIN32) || defined(_WIN64)
+HANDLE VulkanSemaphore::getHandle(
+ VulkanExternalSemaphoreHandleType externalSemaphoreHandleType) const
+{
+ HANDLE handle;
+
+ VkSemaphoreGetWin32HandleInfoKHR vkSemaphoreGetWin32HandleInfoKHR = {};
+ vkSemaphoreGetWin32HandleInfoKHR.sType =
+ VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR;
+ vkSemaphoreGetWin32HandleInfoKHR.pNext = NULL;
+ vkSemaphoreGetWin32HandleInfoKHR.semaphore = m_vkSemaphore;
+ vkSemaphoreGetWin32HandleInfoKHR.handleType =
+ (VkExternalSemaphoreHandleTypeFlagBitsKHR)externalSemaphoreHandleType;
+
+ vkGetSemaphoreWin32HandleKHR(m_device, &vkSemaphoreGetWin32HandleInfoKHR,
+ &handle);
+
+ return handle;
+}
+#else
+int VulkanSemaphore::getHandle(
+ VulkanExternalSemaphoreHandleType externalSemaphoreHandleType) const
+{
+ if (externalSemaphoreHandleType
+ == VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD)
+ {
+ int fd;
+
+ VkSemaphoreGetFdInfoKHR vkSemaphoreGetFdInfoKHR = {};
+ vkSemaphoreGetFdInfoKHR.sType =
+ VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR;
+ vkSemaphoreGetFdInfoKHR.pNext = NULL;
+ vkSemaphoreGetFdInfoKHR.semaphore = m_vkSemaphore;
+ vkSemaphoreGetFdInfoKHR.handleType =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
+
+ vkGetSemaphoreFdKHR(m_device, &vkSemaphoreGetFdInfoKHR, &fd);
+
+ return fd;
+ }
+ return HANDLE_ERROR;
+}
+#endif
+
+const std::wstring &VulkanSemaphore::getName() const { return m_name; }
+
+VulkanSemaphore::operator VkSemaphore() const { return m_vkSemaphore; }
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp
new file mode 100644
index 00000000..37925ee4
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp
@@ -0,0 +1,580 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _vulkan_wrapper_hpp_
+#define _vulkan_wrapper_hpp_
+
+#include <vulkan/vulkan.h>
+#include "vulkan_wrapper_types.hpp"
+#include "vulkan_list_map.hpp"
+#include "vulkan_api_list.hpp"
+
+class VulkanInstance {
+ friend const VulkanInstance &getVulkanInstance();
+
+protected:
+ VkInstance m_vkInstance;
+ VulkanPhysicalDeviceList m_physicalDeviceList;
+
+ VulkanInstance();
+ VulkanInstance(const VulkanInstance &);
+ virtual ~VulkanInstance();
+
+public:
+ const VulkanPhysicalDeviceList &getPhysicalDeviceList() const;
+ operator VkInstance() const;
+};
+
+class VulkanPhysicalDevice {
+ friend class VulkanInstance;
+
+protected:
+ VkPhysicalDevice m_vkPhysicalDevice;
+ VkPhysicalDeviceProperties m_vkPhysicalDeviceProperties;
+ uint8_t m_vkDeviceUUID[VK_UUID_SIZE];
+ uint8_t m_vkDeviceLUID[VK_LUID_SIZE];
+ uint32_t m_vkDeviceNodeMask;
+ VkPhysicalDeviceFeatures m_vkPhysicalDeviceFeatures;
+ VkPhysicalDeviceMemoryProperties m_vkPhysicalDeviceMemoryProperties;
+ VulkanQueueFamilyList m_queueFamilyList;
+ VulkanMemoryHeapList m_memoryHeapList;
+ VulkanMemoryTypeList m_memoryTypeList;
+
+ VulkanPhysicalDevice(const VulkanPhysicalDevice &physicalDevice);
+ VulkanPhysicalDevice(VkPhysicalDevice vkPhysicalDevice);
+ virtual ~VulkanPhysicalDevice();
+
+public:
+ const VulkanQueueFamilyList &getQueueFamilyList() const;
+ const VulkanMemoryHeapList &getMemoryHeapList() const;
+ const VulkanMemoryTypeList &getMemoryTypeList() const;
+ const uint8_t *getUUID() const;
+ const uint8_t *getLUID() const;
+ uint32_t getNodeMask() const;
+ operator VkPhysicalDevice() const;
+};
+
+class VulkanMemoryHeap {
+ friend class VulkanPhysicalDevice;
+
+protected:
+ uint32_t m_memoryHeapIndex;
+ uint64_t m_size;
+ VulkanMemoryHeapFlag m_memoryHeapFlag;
+
+ VulkanMemoryHeap(const VulkanMemoryHeap &memoryHeap);
+ VulkanMemoryHeap(uint32_t m_memoryHeapIndex, uint64_t m_size,
+ VulkanMemoryHeapFlag m_memoryHeapFlag);
+ virtual ~VulkanMemoryHeap();
+
+public:
+ uint64_t getSize() const;
+ VulkanMemoryHeapFlag getMemoryHeapFlag() const;
+ operator uint32_t() const;
+};
+
+class VulkanMemoryType {
+ friend class VulkanPhysicalDevice;
+
+protected:
+ uint32_t m_memoryTypeIndex;
+ const VulkanMemoryTypeProperty m_memoryTypeProperty;
+ const VulkanMemoryHeap &m_memoryHeap;
+
+ VulkanMemoryType(const VulkanMemoryType &memoryType);
+ VulkanMemoryType(uint32_t memoryTypeIndex,
+ VulkanMemoryTypeProperty memoryTypeProperty,
+ const VulkanMemoryHeap &memoryHeap);
+ virtual ~VulkanMemoryType();
+
+public:
+ VulkanMemoryTypeProperty getMemoryTypeProperty() const;
+ const VulkanMemoryHeap &getMemoryHeap() const;
+ operator uint32_t() const;
+};
+
+class VulkanQueueFamily {
+ friend class VulkanPhysicalDevice;
+
+protected:
+ uint32_t m_queueFamilyIndex;
+ VkQueueFamilyProperties m_vkQueueFamilyProperties;
+
+ VulkanQueueFamily(const VulkanQueueFamily &queueFamily);
+ VulkanQueueFamily(uint32_t queueFamilyIndex,
+ VkQueueFamilyProperties vkQueueFamilyProperties);
+ virtual ~VulkanQueueFamily();
+
+public:
+ uint32_t getQueueFlags() const;
+ uint32_t getQueueCount() const;
+ operator uint32_t() const;
+};
+
+class VulkanDevice {
+protected:
+ const VulkanPhysicalDevice &m_physicalDevice;
+ VkDevice m_vkDevice;
+ VulkanQueueFamilyToQueueListMap m_queueFamilyIndexToQueueListMap;
+
+ VulkanDevice(const VulkanDevice &device);
+
+public:
+ VulkanDevice(
+ const VulkanPhysicalDevice &physicalDevice = getVulkanPhysicalDevice(),
+ const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap =
+ getDefaultVulkanQueueFamilyToQueueCountMap());
+ virtual ~VulkanDevice();
+ const VulkanPhysicalDevice &getPhysicalDevice() const;
+ VulkanQueue &
+ getQueue(const VulkanQueueFamily &queueFamily = getVulkanQueueFamily(),
+ uint32_t queueIndex = 0);
+ operator VkDevice() const;
+};
+
+class VulkanQueue {
+ friend class VulkanDevice;
+
+protected:
+ VkQueue m_vkQueue;
+
+ VulkanQueue(VkQueue vkQueue);
+ VulkanQueue(const VulkanQueue &queue);
+ virtual ~VulkanQueue();
+
+public:
+ const VulkanQueueFamily &getQueueFamily();
+ void submit(const VulkanSemaphoreList &waitSemaphoreList,
+ const VulkanCommandBufferList &commandBufferList,
+ const VulkanSemaphoreList &signalSemaphoreList);
+ void submit(const VulkanSemaphore &waitSemaphore,
+ const VulkanCommandBuffer &commandBuffer,
+ const VulkanSemaphore &signalSemaphore);
+ void submit(const VulkanCommandBuffer &commandBuffer,
+ const VulkanSemaphore &signalSemaphore);
+ void submit(const VulkanCommandBuffer &commandBuffer);
+ void waitIdle();
+ operator VkQueue() const;
+};
+
+class VulkanDescriptorSetLayoutBinding {
+protected:
+ VkDescriptorSetLayoutBinding m_vkDescriptorSetLayoutBinding;
+
+ VulkanDescriptorSetLayoutBinding(
+ const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding);
+
+public:
+ VulkanDescriptorSetLayoutBinding(
+ uint32_t binding, VulkanDescriptorType descriptorType,
+ uint32_t descriptorCount = 1,
+ VulkanShaderStage shaderStage = VULKAN_SHADER_STAGE_COMPUTE);
+ virtual ~VulkanDescriptorSetLayoutBinding();
+ operator VkDescriptorSetLayoutBinding() const;
+};
+
+class VulkanDescriptorSetLayout {
+protected:
+ const VulkanDevice &m_device;
+ VkDescriptorSetLayout m_vkDescriptorSetLayout;
+
+ VulkanDescriptorSetLayout(
+ const VulkanDescriptorSetLayout &descriptorSetLayout);
+ void
+ VulkanDescriptorSetLayoutCommon(const VulkanDescriptorSetLayoutBindingList
+ &descriptorSetLayoutBindingList);
+
+public:
+ VulkanDescriptorSetLayout(
+ const VulkanDevice &device,
+ const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding);
+ VulkanDescriptorSetLayout(
+ const VulkanDevice &device,
+ const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding0,
+ const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding1);
+ VulkanDescriptorSetLayout(const VulkanDevice &device,
+ const VulkanDescriptorSetLayoutBindingList
+ &descriptorSetLayoutBindingList);
+ virtual ~VulkanDescriptorSetLayout();
+ operator VkDescriptorSetLayout() const;
+};
+
+class VulkanPipelineLayout {
+protected:
+ const VulkanDevice &m_device;
+ VkPipelineLayout m_vkPipelineLayout;
+
+ VulkanPipelineLayout(const VulkanPipelineLayout &pipelineLayout);
+ void VulkanPipelineLayoutCommon(
+ const VulkanDescriptorSetLayoutList &descriptorSetLayoutList);
+
+public:
+ VulkanPipelineLayout(const VulkanDevice &device,
+ const VulkanDescriptorSetLayout &descriptorSetLayout);
+ VulkanPipelineLayout(
+ const VulkanDevice &device,
+ const VulkanDescriptorSetLayoutList &descriptorSetLayoutList =
+ getEmptyVulkanDescriptorSetLayoutList());
+ virtual ~VulkanPipelineLayout();
+ operator VkPipelineLayout() const;
+};
+
+class VulkanShaderModule {
+protected:
+ const VulkanDevice &m_device;
+ VkShaderModule m_vkShaderModule;
+
+ VulkanShaderModule(const VulkanShaderModule &shaderModule);
+
+public:
+ VulkanShaderModule(const VulkanDevice &device,
+ const std::vector<char> &code);
+ virtual ~VulkanShaderModule();
+ operator VkShaderModule() const;
+};
+
+class VulkanPipeline {
+protected:
+ const VulkanDevice &m_device;
+ VkPipeline m_vkPipeline;
+
+ VulkanPipeline(const VulkanPipeline &pipeline);
+
+public:
+ VulkanPipeline(const VulkanDevice &device);
+ virtual ~VulkanPipeline();
+ virtual VulkanPipelineBindPoint getPipelineBindPoint() const = 0;
+ operator VkPipeline() const;
+};
+
+class VulkanComputePipeline : public VulkanPipeline {
+protected:
+ VulkanComputePipeline(const VulkanComputePipeline &computePipeline);
+
+public:
+ VulkanComputePipeline(const VulkanDevice &device,
+ const VulkanPipelineLayout &pipelineLayout,
+ const VulkanShaderModule &shaderModule,
+ const std::string &entryFuncName = "main");
+ virtual ~VulkanComputePipeline();
+ VulkanPipelineBindPoint getPipelineBindPoint() const;
+};
+
+class VulkanDescriptorPool {
+protected:
+ const VulkanDevice &m_device;
+ VkDescriptorPool m_vkDescriptorPool;
+
+ VulkanDescriptorPool(const VulkanDescriptorPool &descriptorPool);
+ void VulkanDescriptorPoolCommon(const VulkanDescriptorSetLayoutBindingList
+ &descriptorSetLayoutBindingList);
+
+public:
+ VulkanDescriptorPool(
+ const VulkanDevice &device,
+ const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding);
+ VulkanDescriptorPool(
+ const VulkanDevice &device,
+ const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding0,
+ const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding1);
+ VulkanDescriptorPool(const VulkanDevice &device,
+ const VulkanDescriptorSetLayoutBindingList
+ &descriptorSetLayoutBindingList);
+ virtual ~VulkanDescriptorPool();
+ operator VkDescriptorPool() const;
+};
+
+class VulkanDescriptorSet {
+protected:
+ const VulkanDevice &m_device;
+ const VulkanDescriptorPool &m_descriptorPool;
+ VkDescriptorSet m_vkDescriptorSet;
+
+ VulkanDescriptorSet(const VulkanDescriptorSet &descriptorSet);
+
+public:
+ VulkanDescriptorSet(const VulkanDevice &device,
+ const VulkanDescriptorPool &descriptorPool,
+ const VulkanDescriptorSetLayout &descriptorSetLayout);
+ virtual ~VulkanDescriptorSet();
+ void update(uint32_t binding, const VulkanBuffer &buffer);
+ void update(uint32_t binding, const VulkanImageView &imageView);
+ operator VkDescriptorSet() const;
+};
+
+class VulkanOffset3D {
+protected:
+ VkOffset3D m_vkOffset3D;
+
+public:
+ VulkanOffset3D(const VulkanOffset3D &extent3D);
+ VulkanOffset3D(uint32_t x = 0, uint32_t y = 0, uint32_t z = 0);
+ virtual ~VulkanOffset3D();
+ uint32_t getX() const;
+ uint32_t getY() const;
+ uint32_t getZ() const;
+ operator VkOffset3D() const;
+};
+
+class VulkanExtent3D {
+protected:
+ VkExtent3D m_vkExtent3D;
+
+public:
+ VulkanExtent3D(const VulkanExtent3D &extent3D);
+ VulkanExtent3D(uint32_t width, uint32_t height = 1, uint32_t depth = 1);
+ virtual ~VulkanExtent3D();
+ uint32_t getWidth() const;
+ uint32_t getHeight() const;
+ uint32_t getDepth() const;
+ operator VkExtent3D() const;
+};
+
+class VulkanCommandPool {
+protected:
+ const VulkanDevice &m_device;
+ VkCommandPool m_vkCommandPool;
+
+ VulkanCommandPool(const VulkanCommandPool &commandPool);
+
+public:
+ VulkanCommandPool(
+ const VulkanDevice &device,
+ const VulkanQueueFamily &queueFamily = getVulkanQueueFamily());
+ virtual ~VulkanCommandPool();
+ operator VkCommandPool() const;
+};
+
+class VulkanCommandBuffer {
+protected:
+ const VulkanDevice &m_device;
+ const VulkanCommandPool &m_commandPool;
+ VkCommandBuffer m_vkCommandBuffer;
+
+ VulkanCommandBuffer(const VulkanCommandBuffer &commandBuffer);
+
+public:
+ VulkanCommandBuffer(const VulkanDevice &device,
+ const VulkanCommandPool &commandPool);
+ virtual ~VulkanCommandBuffer();
+ void begin();
+ void bindPipeline(const VulkanPipeline &pipeline);
+ void bindDescriptorSets(const VulkanPipeline &pipeline,
+ const VulkanPipelineLayout &pipelineLayout,
+ const VulkanDescriptorSet &descriptorSet);
+ void pipelineBarrier(const VulkanImage2DList &image2DList,
+ VulkanImageLayout oldImageLayout,
+ VulkanImageLayout newImageLayout);
+ void dispatch(uint32_t groupCountX, uint32_t groupCountY,
+ uint32_t groupCountZ);
+ void fillBuffer(const VulkanBuffer &buffer, uint32_t data,
+ uint64_t offset = 0, uint64_t size = VK_WHOLE_SIZE);
+ void updateBuffer(const VulkanBuffer &buffer, void *pdata,
+ uint64_t offset = 0, uint64_t size = VK_WHOLE_SIZE);
+ void copyBufferToImage(const VulkanBuffer &buffer, const VulkanImage &image,
+ VulkanImageLayout imageLayout =
+ VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+ void copyBufferToImage(const VulkanBuffer &buffer, const VulkanImage &image,
+ uint64_t bufferOffset = 0, uint32_t mipLevel = 0,
+ uint32_t baseArrayLayer = 0, uint32_t layerCount = 1,
+ VulkanOffset3D offset3D = VulkanOffset3D(0, 0, 0),
+ VulkanExtent3D extent3D = VulkanExtent3D(0, 0, 0));
+ void copyImageToBuffer(const VulkanImage &image, const VulkanBuffer &buffer,
+ uint64_t bufferOffset = 0, uint32_t mipLevel = 0,
+ uint32_t baseArrayLayer = 0, uint32_t layerCount = 1,
+ VulkanOffset3D offset3D = VulkanOffset3D(0, 0, 0),
+ VulkanExtent3D extent3D = VulkanExtent3D(0, 0, 0));
+ void end();
+ operator VkCommandBuffer() const;
+};
+
+class VulkanBuffer {
+protected:
+ const VulkanDevice &m_device;
+ VkBuffer m_vkBuffer;
+ uint64_t m_size;
+ uint64_t m_alignment;
+ VulkanMemoryTypeList m_memoryTypeList;
+
+ VulkanBuffer(const VulkanBuffer &buffer);
+
+public:
+ VulkanBuffer(const VulkanDevice &device, uint64_t size,
+ VulkanExternalMemoryHandleType externalMemoryHandleType =
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+ VulkanBufferUsage bufferUsage =
+ VULKAN_BUFFER_USAGE_STORAGE_BUFFER_TRANSFER_SRC_DST,
+ VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE,
+ const VulkanQueueFamilyList &queueFamilyList =
+ getEmptyVulkanQueueFamilyList());
+ virtual ~VulkanBuffer();
+ uint64_t getSize() const;
+ uint64_t getAlignment() const;
+ const VulkanMemoryTypeList &getMemoryTypeList() const;
+ operator VkBuffer() const;
+};
+
+class VulkanImage {
+protected:
+ const VulkanDevice &m_device;
+ const VulkanImageType m_imageType;
+ const VulkanExtent3D m_extent3D;
+ const VulkanFormat m_format;
+ const uint32_t m_numMipLevels;
+ const uint32_t m_numLayers;
+ VkImage m_vkImage;
+ uint64_t m_size;
+ uint64_t m_alignment;
+ VulkanMemoryTypeList m_memoryTypeList;
+ VkImageCreateInfo VulkanImageCreateInfo;
+ VulkanImage(const VulkanImage &image);
+
+public:
+ VulkanImage(
+ const VulkanDevice &device, VulkanImageType imageType,
+ VulkanFormat format, const VulkanExtent3D &extent3D,
+ uint32_t numMipLevels = 1, uint32_t arrayLayers = 1,
+ VulkanExternalMemoryHandleType externalMemoryHandleType =
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+ VulkanImageCreateFlag imageCreateFlags = VULKAN_IMAGE_CREATE_FLAG_NONE,
+ VulkanImageTiling imageTiling = VULKAN_IMAGE_TILING_OPTIMAL,
+ VulkanImageUsage imageUsage =
+ VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST,
+ VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE);
+ virtual ~VulkanImage();
+ virtual VulkanExtent3D getExtent3D(uint32_t mipLevel = 0) const;
+ VulkanFormat getFormat() const;
+ uint32_t getNumMipLevels() const;
+ uint32_t getNumLayers() const;
+ uint64_t getSize() const;
+ uint64_t getAlignment() const;
+ const VulkanMemoryTypeList &getMemoryTypeList() const;
+ VkImageCreateInfo getVkImageCreateInfo() const;
+ operator VkImage() const;
+};
+
+class VulkanImage2D : public VulkanImage {
+protected:
+ VkImageView m_vkImageView;
+
+ VulkanImage2D(const VulkanImage2D &image2D);
+
+public:
+ VulkanImage2D(
+ const VulkanDevice &device, VulkanFormat format, uint32_t width,
+ uint32_t height, uint32_t numMipLevels = 1,
+ VulkanExternalMemoryHandleType externalMemoryHandleType =
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+ VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE,
+ VulkanImageUsage imageUsage =
+ VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST,
+ VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE);
+ virtual ~VulkanImage2D();
+ virtual VulkanExtent3D getExtent3D(uint32_t mipLevel = 0) const;
+};
+
+class VulkanImageView {
+protected:
+ const VulkanDevice &m_device;
+ VkImageView m_vkImageView;
+
+ VulkanImageView(const VulkanImageView &imageView);
+
+public:
+ VulkanImageView(const VulkanDevice &device, const VulkanImage &image,
+ VulkanImageViewType imageViewType,
+ uint32_t baseMipLevel = 0,
+ uint32_t mipLevelCount = VULKAN_REMAINING_MIP_LEVELS,
+ uint32_t baseArrayLayer = 0,
+ uint32_t layerCount = VULKAN_REMAINING_ARRAY_LAYERS);
+ virtual ~VulkanImageView();
+ operator VkImageView() const;
+};
+
+class VulkanDeviceMemory {
+protected:
+ const VulkanDevice &m_device;
+ VkDeviceMemory m_vkDeviceMemory;
+ uint64_t m_size;
+ bool m_isDedicated;
+
+ VulkanDeviceMemory(const VulkanDeviceMemory &deviceMemory);
+
+public:
+ VulkanDeviceMemory(const VulkanDevice &device, uint64_t size,
+ const VulkanMemoryType &memoryType,
+ VulkanExternalMemoryHandleType externalMemoryHandleType =
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+ const void *name = NULL);
+ VulkanDeviceMemory(const VulkanDevice &device, const VulkanImage &image,
+ const VulkanMemoryType &memoryType,
+ VulkanExternalMemoryHandleType externalMemoryHandleType =
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+ const void *name = NULL);
+ virtual ~VulkanDeviceMemory();
+ uint64_t getSize() const;
+#ifdef _WIN32
+ HANDLE
+ getHandle(VulkanExternalMemoryHandleType externalMemoryHandleType) const;
+#else
+ int
+ getHandle(VulkanExternalMemoryHandleType externalMemoryHandleType) const;
+#endif
+ bool isDedicated() const;
+ void *map(size_t offset = 0, size_t size = VK_WHOLE_SIZE);
+ void unmap();
+ void bindBuffer(const VulkanBuffer &buffer, uint64_t offset = 0);
+ void bindImage(const VulkanImage &image, uint64_t offset = 0);
+ operator VkDeviceMemory() const;
+};
+
+class VulkanSemaphore {
+ friend class VulkanQueue;
+
+protected:
+ const VulkanDevice &m_device;
+ VkSemaphore m_vkSemaphore;
+ const std::wstring m_name;
+
+ VulkanSemaphore(const VulkanSemaphore &semaphore);
+
+public:
+ VulkanSemaphore(
+ const VulkanDevice &device,
+ VulkanExternalSemaphoreHandleType externalSemaphoreHandleType =
+ VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE,
+ const std::wstring name = L"");
+ virtual ~VulkanSemaphore();
+#ifdef _WIN32
+ HANDLE getHandle(
+ VulkanExternalSemaphoreHandleType externalSemaphoreHandleType) const;
+#else
+ int getHandle(
+ VulkanExternalSemaphoreHandleType externalSemaphoreHandleType) const;
+#endif
+ const std::wstring &getName() const;
+ operator VkSemaphore() const;
+};
+
+
+#define VK_FUNC_DECL(name) extern "C" PFN_##name _##name;
+VK_FUNC_LIST
+#if defined(_WIN32) || defined(_WIN64)
+VK_WINDOWS_FUNC_LIST
+#endif
+#undef VK_FUNC_DECL
+
+#endif // _vulkan_wrapper_hpp_
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper_types.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper_types.hpp
new file mode 100644
index 00000000..359bcae4
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper_types.hpp
@@ -0,0 +1,463 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _vulkan_wrapper_types_hpp_
+#define _vulkan_wrapper_types_hpp_
+
+#include <vulkan/vulkan.h>
+
+#define VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT 4
+#define VULKAN_REMAINING_MIP_LEVELS VK_REMAINING_MIP_LEVELS
+#define VULKAN_REMAINING_ARRAY_LAYERS VK_REMAINING_ARRAY_LAYERS
+
+class VulkanInstance;
+class VulkanPhysicalDevice;
+class VulkanMemoryHeap;
+class VulkanMemoryType;
+class VulkanQueueFamily;
+class VulkanDevice;
+class VulkanQueue;
+class VulkanDescriptorSetLayoutBinding;
+class VulkanDescriptorSetLayout;
+class VulkanPipelineLayout;
+class VulkanShaderModule;
+class VulkanPipeline;
+class VulkanComputePipeline;
+class VulkanDescriptorPool;
+class VulkanDescriptorSet;
+class VulkanCommandPool;
+class VulkanCommandBuffer;
+class VulkanBuffer;
+class VulkanOffset3D;
+class VulkanExtent3D;
+class VulkanImage;
+class VulkanImage2D;
+class VulkanImageView;
+class VulkanDeviceMemory;
+class VulkanSemaphore;
+
+class VulkanPhysicalDeviceList;
+class VulkanMemoryHeapList;
+class VulkanMemoryTypeList;
+class VulkanQueueFamilyList;
+class VulkanQueueFamilyToQueueCountMap;
+class VulkanQueueFamilyToQueueListMap;
+class VulkanQueueList;
+class VulkanCommandBufferList;
+class VulkanDescriptorSetLayoutList;
+class VulkanBufferList;
+class VulkanImage2DList;
+class VulkanImageViewList;
+class VulkanDeviceMemoryList;
+class VulkanSemaphoreList;
+
+enum VulkanQueueFlag
+{
+ VULKAN_QUEUE_FLAG_GRAPHICS = VK_QUEUE_GRAPHICS_BIT,
+ VULKAN_QUEUE_FLAG_COMPUTE = VK_QUEUE_COMPUTE_BIT,
+ VULKAN_QUEUE_FLAG_TRANSFER = VK_QUEUE_TRANSFER_BIT,
+ VULKAN_QUEUE_FLAG_MASK_ALL = VULKAN_QUEUE_FLAG_GRAPHICS
+ | VULKAN_QUEUE_FLAG_COMPUTE | VULKAN_QUEUE_FLAG_TRANSFER
+};
+
+enum VulkanDescriptorType
+{
+ VULKAN_DESCRIPTOR_TYPE_SAMPLER = VK_DESCRIPTOR_TYPE_SAMPLER,
+ VULKAN_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER =
+ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ VULKAN_DESCRIPTOR_TYPE_SAMPLED_IMAGE = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ VULKAN_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER =
+ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+ VULKAN_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER =
+ VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ VULKAN_DESCRIPTOR_TYPE_UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ VULKAN_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC =
+ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC,
+ VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC =
+ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC,
+ VULKAN_DESCRIPTOR_TYPE_INPUT_ATTACHMENT =
+ VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT,
+};
+
+enum VulkanShaderStage
+{
+ VULKAN_SHADER_STAGE_VERTEX = VK_SHADER_STAGE_VERTEX_BIT,
+ VULKAN_SHADER_STAGE_FRAGMENT = VK_SHADER_STAGE_FRAGMENT_BIT,
+ VULKAN_SHADER_STAGE_COMPUTE = VK_SHADER_STAGE_COMPUTE_BIT,
+ VULKAN_SHADER_STAGE_ALL_GRAPHICS = VK_SHADER_STAGE_ALL_GRAPHICS,
+ VULKAN_SHADER_STAGE_ALL = VK_SHADER_STAGE_ALL
+};
+
+enum VulkanPipelineBindPoint
+{
+ VULKAN_PIPELINE_BIND_POINT_GRAPHICS = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ VULKAN_PIPELINE_BIND_POINT_COMPUTE = VK_PIPELINE_BIND_POINT_COMPUTE
+};
+
+enum VulkanMemoryTypeProperty
+{
+ VULKAN_MEMORY_TYPE_PROPERTY_NONE = 0,
+ VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL =
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+ VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT =
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+ | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_CACHED =
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+ | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+ VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_CACHED_COHERENT =
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT
+ | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_COHERENT =
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
+ | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+ | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_CACHED =
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
+ | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+ | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+ VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_CACHED_COHERENT =
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
+ | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+ | VK_MEMORY_PROPERTY_HOST_CACHED_BIT
+ | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
+};
+
+enum VulkanMemoryHeapFlag
+{
+ VULKAN_MEMORY_HEAP_FLAG_NONE = 0,
+ VULKAN_MEMORY_HEAP_FLAG_DEVICE_LOCAL = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
+};
+
+enum VulkanExternalMemoryHandleType
+{
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE = 0,
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD =
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT =
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR,
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT =
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR,
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT =
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
+ | VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR
+};
+
+enum VulkanExternalSemaphoreHandleType
+{
+ VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE = 0,
+ VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
+ VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR,
+ VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR,
+ VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
+ | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR
+};
+
+enum VulkanBufferUsage
+{
+ VULKAN_BUFFER_USAGE_TRANSFER_SRC = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+ VULKAN_BUFFER_USAGE_TRANSFER_DST = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ VULKAN_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER =
+ VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
+ VULKAN_BUFFER_USAGE_STORAGE_TEXEL_BUFFER =
+ VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
+ VULKAN_BUFFER_USAGE_UNIFORM_BUFFER = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ VULKAN_BUFFER_USAGE_STORAGE_BUFFER = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+ VULKAN_BUFFER_USAGE_INDEX_BUFFER = VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+ VULKAN_BUFFER_USAGE_VERTEX_BUFFER = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+ VULKAN_BUFFER_USAGE_INDIRECT_BUFFER = VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT,
+ VULKAN_BUFFER_USAGE_STORAGE_BUFFER_TRANSFER_SRC_DST =
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT
+ | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ VULKAN_BUFFER_USAGE_UNIFORM_BUFFER_TRANSFER_SRC_DST =
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT
+ | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+};
+
+enum VulkanSharingMode
+{
+ VULKAN_SHARING_MODE_EXCLUSIVE = VK_SHARING_MODE_EXCLUSIVE,
+ VULKAN_SHARING_MODE_CONCURRENT = VK_SHARING_MODE_CONCURRENT
+};
+
+enum VulkanImageType
+{
+ VULKAN_IMAGE_TYPE_1D = VK_IMAGE_TYPE_1D,
+ VULKAN_IMAGE_TYPE_2D = VK_IMAGE_TYPE_2D,
+ VULKAN_IMAGE_TYPE_3D = VK_IMAGE_TYPE_3D
+};
+
+enum VulkanFormat
+{
+ VULKAN_FORMAT_UNDEFINED = VK_FORMAT_UNDEFINED,
+ VULKAN_FORMAT_R4G4_UNORM_PACK8 = VK_FORMAT_R4G4_UNORM_PACK8,
+ VULKAN_FORMAT_R4G4B4A4_UNORM_PACK16 = VK_FORMAT_R4G4B4A4_UNORM_PACK16,
+ VULKAN_FORMAT_B4G4R4A4_UNORM_PACK16 = VK_FORMAT_B4G4R4A4_UNORM_PACK16,
+ VULKAN_FORMAT_R5G6B5_UNORM_PACK16 = VK_FORMAT_R5G6B5_UNORM_PACK16,
+ VULKAN_FORMAT_B5G6R5_UNORM_PACK16 = VK_FORMAT_B5G6R5_UNORM_PACK16,
+ VULKAN_FORMAT_R5G5B5A1_UNORM_PACK16 = VK_FORMAT_R5G5B5A1_UNORM_PACK16,
+ VULKAN_FORMAT_B5G5R5A1_UNORM_PACK16 = VK_FORMAT_B5G5R5A1_UNORM_PACK16,
+ VULKAN_FORMAT_A1R5G5B5_UNORM_PACK16 = VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+ VULKAN_FORMAT_R8_UNORM = VK_FORMAT_R8_UNORM,
+ VULKAN_FORMAT_R8_SNORM = VK_FORMAT_R8_SNORM,
+ VULKAN_FORMAT_R8_USCALED = VK_FORMAT_R8_USCALED,
+ VULKAN_FORMAT_R8_SSCALED = VK_FORMAT_R8_SSCALED,
+ VULKAN_FORMAT_R8_UINT = VK_FORMAT_R8_UINT,
+ VULKAN_FORMAT_R8_SINT = VK_FORMAT_R8_SINT,
+ VULKAN_FORMAT_R8_SRGB = VK_FORMAT_R8_SRGB,
+ VULKAN_FORMAT_R8G8_SNORM = VK_FORMAT_R8G8_SNORM,
+ VULKAN_FORMAT_R8G8_UNORM = VK_FORMAT_R8G8_UNORM,
+ VULKAN_FORMAT_R8G8_USCALED = VK_FORMAT_R8G8_USCALED,
+ VULKAN_FORMAT_R8G8_SSCALED = VK_FORMAT_R8G8_SSCALED,
+ VULKAN_FORMAT_R8G8_UINT = VK_FORMAT_R8G8_UINT,
+ VULKAN_FORMAT_R8G8_SINT = VK_FORMAT_R8G8_SINT,
+ VULKAN_FORMAT_R8G8_SRGB = VK_FORMAT_R8G8_SRGB,
+ VULKAN_FORMAT_R8G8B8_UNORM = VK_FORMAT_R8G8B8_UNORM,
+ VULKAN_FORMAT_R8G8B8_SNORM = VK_FORMAT_R8G8B8_SNORM,
+ VULKAN_FORMAT_R8G8B8_USCALED = VK_FORMAT_R8G8B8_USCALED,
+ VULKAN_FORMAT_R8G8B8_SSCALED = VK_FORMAT_R8G8B8_SSCALED,
+ VULKAN_FORMAT_R8G8B8_UINT = VK_FORMAT_R8G8B8_UINT,
+ VULKAN_FORMAT_R8G8B8_SINT = VK_FORMAT_R8G8B8_SINT,
+ VULKAN_FORMAT_R8G8B8_SRGB = VK_FORMAT_R8G8B8_SRGB,
+ VULKAN_FORMAT_B8G8R8_UNORM = VK_FORMAT_B8G8R8_UNORM,
+ VULKAN_FORMAT_B8G8R8_SNORM = VK_FORMAT_B8G8R8_SNORM,
+ VULKAN_FORMAT_B8G8R8_USCALED = VK_FORMAT_B8G8R8_USCALED,
+ VULKAN_FORMAT_B8G8R8_SSCALED = VK_FORMAT_B8G8R8_SSCALED,
+ VULKAN_FORMAT_B8G8R8_UINT = VK_FORMAT_B8G8R8_UINT,
+ VULKAN_FORMAT_B8G8R8_SINT = VK_FORMAT_B8G8R8_SINT,
+ VULKAN_FORMAT_B8G8R8_SRGB = VK_FORMAT_B8G8R8_SRGB,
+ VULKAN_FORMAT_R8G8B8A8_UNORM = VK_FORMAT_R8G8B8A8_UNORM,
+ VULKAN_FORMAT_R8G8B8A8_SNORM = VK_FORMAT_R8G8B8A8_SNORM,
+ VULKAN_FORMAT_R8G8B8A8_USCALED = VK_FORMAT_R8G8B8A8_USCALED,
+ VULKAN_FORMAT_R8G8B8A8_SSCALED = VK_FORMAT_R8G8B8A8_SSCALED,
+ VULKAN_FORMAT_R8G8B8A8_UINT = VK_FORMAT_R8G8B8A8_UINT,
+ VULKAN_FORMAT_R8G8B8A8_SINT = VK_FORMAT_R8G8B8A8_SINT,
+ VULKAN_FORMAT_R8G8B8A8_SRGB = VK_FORMAT_R8G8B8A8_SRGB,
+ VULKAN_FORMAT_B8G8R8A8_UNORM = VK_FORMAT_B8G8R8A8_UNORM,
+ VULKAN_FORMAT_B8G8R8A8_SNORM = VK_FORMAT_B8G8R8A8_SNORM,
+ VULKAN_FORMAT_B8G8R8A8_USCALED = VK_FORMAT_B8G8R8A8_USCALED,
+ VULKAN_FORMAT_B8G8R8A8_SSCALED = VK_FORMAT_B8G8R8A8_SSCALED,
+ VULKAN_FORMAT_B8G8R8A8_UINT = VK_FORMAT_B8G8R8A8_UINT,
+ VULKAN_FORMAT_B8G8R8A8_SINT = VK_FORMAT_B8G8R8A8_SINT,
+ VULKAN_FORMAT_B8G8R8A8_SRGB = VK_FORMAT_B8G8R8A8_SRGB,
+ VULKAN_FORMAT_A8B8G8R8_UNORM_PACK32 = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
+ VULKAN_FORMAT_A8B8G8R8_SNORM_PACK32 = VK_FORMAT_A8B8G8R8_SNORM_PACK32,
+ VULKAN_FORMAT_A8B8G8R8_USCALED_PACK32 = VK_FORMAT_A8B8G8R8_USCALED_PACK32,
+ VULKAN_FORMAT_A8B8G8R8_SSCALED_PACK32 = VK_FORMAT_A8B8G8R8_SSCALED_PACK32,
+ VULKAN_FORMAT_A8B8G8R8_UINT_PACK32 = VK_FORMAT_A8B8G8R8_UINT_PACK32,
+ VULKAN_FORMAT_A8B8G8R8_SINT_PACK32 = VK_FORMAT_A8B8G8R8_SINT_PACK32,
+ VULKAN_FORMAT_A8B8G8R8_SRGB_PACK32 = VK_FORMAT_A8B8G8R8_SRGB_PACK32,
+ VULKAN_FORMAT_A2R10G10B10_UNORM_PACK32 = VK_FORMAT_A2R10G10B10_UNORM_PACK32,
+ VULKAN_FORMAT_A2R10G10B10_SNORM_PACK32 = VK_FORMAT_A2R10G10B10_SNORM_PACK32,
+ VULKAN_FORMAT_A2R10G10B10_USCALED_PACK32 =
+ VK_FORMAT_A2R10G10B10_USCALED_PACK32,
+ VULKAN_FORMAT_A2R10G10B10_SSCALED_PACK32 =
+ VK_FORMAT_A2R10G10B10_SSCALED_PACK32,
+ VULKAN_FORMAT_A2R10G10B10_UINT_PACK32 = VK_FORMAT_A2R10G10B10_UINT_PACK32,
+ VULKAN_FORMAT_A2R10G10B10_SINT_PACK32 = VK_FORMAT_A2R10G10B10_SINT_PACK32,
+ VULKAN_FORMAT_A2B10G10R10_UNORM_PACK32 = VK_FORMAT_A2B10G10R10_UNORM_PACK32,
+ VULKAN_FORMAT_A2B10G10R10_SNORM_PACK32 = VK_FORMAT_A2B10G10R10_SNORM_PACK32,
+ VULKAN_FORMAT_A2B10G10R10_USCALED_PACK32 =
+ VK_FORMAT_A2B10G10R10_USCALED_PACK32,
+ VULKAN_FORMAT_A2B10G10R10_SSCALED_PACK32 =
+ VK_FORMAT_A2B10G10R10_SSCALED_PACK32,
+ VULKAN_FORMAT_A2B10G10R10_UINT_PACK32 = VK_FORMAT_A2B10G10R10_UINT_PACK32,
+ VULKAN_FORMAT_A2B10G10R10_SINT_PACK32 = VK_FORMAT_A2B10G10R10_SINT_PACK32,
+ VULKAN_FORMAT_R16_UNORM = VK_FORMAT_R16_UNORM,
+ VULKAN_FORMAT_R16_SNORM = VK_FORMAT_R16_SNORM,
+ VULKAN_FORMAT_R16_USCALED = VK_FORMAT_R16_USCALED,
+ VULKAN_FORMAT_R16_SSCALED = VK_FORMAT_R16_SSCALED,
+ VULKAN_FORMAT_R16_UINT = VK_FORMAT_R16_UINT,
+ VULKAN_FORMAT_R16_SINT = VK_FORMAT_R16_SINT,
+ VULKAN_FORMAT_R16_SFLOAT = VK_FORMAT_R16_SFLOAT,
+ VULKAN_FORMAT_R16G16_UNORM = VK_FORMAT_R16G16_UNORM,
+ VULKAN_FORMAT_R16G16_SNORM = VK_FORMAT_R16G16_SNORM,
+ VULKAN_FORMAT_R16G16_USCALED = VK_FORMAT_R16G16_USCALED,
+ VULKAN_FORMAT_R16G16_SSCALED = VK_FORMAT_R16G16_SSCALED,
+ VULKAN_FORMAT_R16G16_UINT = VK_FORMAT_R16G16_UINT,
+ VULKAN_FORMAT_R16G16_SINT = VK_FORMAT_R16G16_SINT,
+ VULKAN_FORMAT_R16G16_SFLOAT = VK_FORMAT_R16G16_SFLOAT,
+ VULKAN_FORMAT_R16G16B16_UNORM = VK_FORMAT_R16G16B16_UNORM,
+ VULKAN_FORMAT_R16G16B16_SNORM = VK_FORMAT_R16G16B16_SNORM,
+ VULKAN_FORMAT_R16G16B16_USCALED = VK_FORMAT_R16G16B16_USCALED,
+ VULKAN_FORMAT_R16G16B16_SSCALED = VK_FORMAT_R16G16B16_SSCALED,
+ VULKAN_FORMAT_R16G16B16_UINT = VK_FORMAT_R16G16B16_UINT,
+ VULKAN_FORMAT_R16G16B16_SINT = VK_FORMAT_R16G16B16_SINT,
+ VULKAN_FORMAT_R16G16B16_SFLOAT = VK_FORMAT_R16G16B16_SFLOAT,
+ VULKAN_FORMAT_R16G16B16A16_UNORM = VK_FORMAT_R16G16B16A16_UNORM,
+ VULKAN_FORMAT_R16G16B16A16_SNORM = VK_FORMAT_R16G16B16A16_SNORM,
+ VULKAN_FORMAT_R16G16B16A16_USCALED = VK_FORMAT_R16G16B16A16_USCALED,
+ VULKAN_FORMAT_R16G16B16A16_SSCALED = VK_FORMAT_R16G16B16A16_SSCALED,
+ VULKAN_FORMAT_R16G16B16A16_UINT = VK_FORMAT_R16G16B16A16_UINT,
+ VULKAN_FORMAT_R16G16B16A16_SINT = VK_FORMAT_R16G16B16A16_SINT,
+ VULKAN_FORMAT_R16G16B16A16_SFLOAT = VK_FORMAT_R16G16B16A16_SFLOAT,
+ VULKAN_FORMAT_R32_UINT = VK_FORMAT_R32_UINT,
+ VULKAN_FORMAT_R32_SINT = VK_FORMAT_R32_SINT,
+ VULKAN_FORMAT_R32_SFLOAT = VK_FORMAT_R32_SFLOAT,
+ VULKAN_FORMAT_R32G32_UINT = VK_FORMAT_R32G32_UINT,
+ VULKAN_FORMAT_R32G32_SINT = VK_FORMAT_R32G32_SINT,
+ VULKAN_FORMAT_R32G32_SFLOAT = VK_FORMAT_R32G32_SFLOAT,
+ VULKAN_FORMAT_R32G32B32_UINT = VK_FORMAT_R32G32B32_UINT,
+ VULKAN_FORMAT_R32G32B32_SINT = VK_FORMAT_R32G32B32_SINT,
+ VULKAN_FORMAT_R32G32B32_SFLOAT = VK_FORMAT_R32G32B32_SFLOAT,
+ VULKAN_FORMAT_R32G32B32A32_UINT = VK_FORMAT_R32G32B32A32_UINT,
+ VULKAN_FORMAT_R32G32B32A32_SINT = VK_FORMAT_R32G32B32A32_SINT,
+ VULKAN_FORMAT_R32G32B32A32_SFLOAT = VK_FORMAT_R32G32B32A32_SFLOAT,
+ VULKAN_FORMAT_R64_UINT = VK_FORMAT_R64_UINT,
+ VULKAN_FORMAT_R64_SINT = VK_FORMAT_R64_SINT,
+ VULKAN_FORMAT_R64_SFLOAT = VK_FORMAT_R64_SFLOAT,
+ VULKAN_FORMAT_R64G64_UINT = VK_FORMAT_R64G64_UINT,
+ VULKAN_FORMAT_R64G64_SINT = VK_FORMAT_R64G64_SINT,
+ VULKAN_FORMAT_R64G64_SFLOAT = VK_FORMAT_R64G64_SFLOAT,
+ VULKAN_FORMAT_R64G64B64_UINT = VK_FORMAT_R64G64B64_UINT,
+ VULKAN_FORMAT_R64G64B64_SINT = VK_FORMAT_R64G64B64_SINT,
+ VULKAN_FORMAT_R64G64B64_SFLOAT = VK_FORMAT_R64G64B64_SFLOAT,
+ VULKAN_FORMAT_R64G64B64A64_UINT = VK_FORMAT_R64G64B64A64_UINT,
+ VULKAN_FORMAT_R64G64B64A64_SINT = VK_FORMAT_R64G64B64A64_SINT,
+ VULKAN_FORMAT_R64G64B64A64_SFLOAT = VK_FORMAT_R64G64B64A64_SFLOAT,
+ VULKAN_FORMAT_B10G11R11_UFLOAT_PACK32 = VK_FORMAT_B10G11R11_UFLOAT_PACK32,
+ VULKAN_FORMAT_E5B9G9R9_UFLOAT_PACK32 = VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
+ VULKAN_FORMAT_D16_UNORM = VK_FORMAT_D16_UNORM,
+ VULKAN_FORMAT_X8_D24_UNORM_PACK32 = VK_FORMAT_X8_D24_UNORM_PACK32,
+ VULKAN_FORMAT_D32_SFLOAT = VK_FORMAT_D32_SFLOAT,
+ VULKAN_FORMAT_S8_UINT = VK_FORMAT_S8_UINT,
+ VULKAN_FORMAT_D16_UNORM_S8_UINT = VK_FORMAT_D16_UNORM_S8_UINT,
+ VULKAN_FORMAT_D24_UNORM_S8_UINT = VK_FORMAT_D24_UNORM_S8_UINT,
+ VULKAN_FORMAT_D32_SFLOAT_S8_UINT = VK_FORMAT_D32_SFLOAT_S8_UINT,
+ VULKAN_FORMAT_BC1_RGB_UNORM_BLOCK = VK_FORMAT_BC1_RGB_UNORM_BLOCK,
+ VULKAN_FORMAT_BC1_RGB_SRGB_BLOCK = VK_FORMAT_BC1_RGB_SRGB_BLOCK,
+ VULKAN_FORMAT_BC1_RGBA_UNORM_BLOCK = VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
+ VULKAN_FORMAT_BC1_RGBA_SRGB_BLOCK = VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
+ VULKAN_FORMAT_BC2_UNORM_BLOCK = VK_FORMAT_BC2_UNORM_BLOCK,
+ VULKAN_FORMAT_BC2_SRGB_BLOCK = VK_FORMAT_BC2_SRGB_BLOCK,
+ VULKAN_FORMAT_BC3_UNORM_BLOCK = VK_FORMAT_BC3_UNORM_BLOCK,
+ VULKAN_FORMAT_BC3_SRGB_BLOCK = VK_FORMAT_BC3_SRGB_BLOCK,
+ VULKAN_FORMAT_BC4_UNORM_BLOCK = VK_FORMAT_BC4_UNORM_BLOCK,
+ VULKAN_FORMAT_BC4_SNORM_BLOCK = VK_FORMAT_BC4_SNORM_BLOCK,
+ VULKAN_FORMAT_BC5_UNORM_BLOCK = VK_FORMAT_BC5_UNORM_BLOCK,
+ VULKAN_FORMAT_BC5_SNORM_BLOCK = VK_FORMAT_BC5_SNORM_BLOCK,
+ VULKAN_FORMAT_BC6H_UFLOAT_BLOCK = VK_FORMAT_BC6H_UFLOAT_BLOCK,
+ VULKAN_FORMAT_BC6H_SFLOAT_BLOCK = VK_FORMAT_BC6H_SFLOAT_BLOCK,
+ VULKAN_FORMAT_BC7_UNORM_BLOCK = VK_FORMAT_BC7_UNORM_BLOCK,
+ VULKAN_FORMAT_BC7_SRGB_BLOCK = VK_FORMAT_BC7_SRGB_BLOCK,
+ VULKAN_FORMAT_ETC2_R8G8B8_UNORM_BLOCK = VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK,
+ VULKAN_FORMAT_ETC2_R8G8B8_SRGB_BLOCK = VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK,
+ VULKAN_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK =
+ VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK,
+ VULKAN_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK = VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK,
+ VULKAN_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK =
+ VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK,
+ VULKAN_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK = VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK,
+ VULKAN_FORMAT_EAC_R11_UNORM_BLOCK = VK_FORMAT_EAC_R11_UNORM_BLOCK,
+ VULKAN_FORMAT_EAC_R11_SNORM_BLOCK = VK_FORMAT_EAC_R11_SNORM_BLOCK,
+ VULKAN_FORMAT_EAC_R11G11_UNORM_BLOCK = VK_FORMAT_EAC_R11G11_UNORM_BLOCK,
+ VULKAN_FORMAT_EAC_R11G11_SNORM_BLOCK = VK_FORMAT_EAC_R11G11_SNORM_BLOCK,
+ VULKAN_FORMAT_ASTC_4x4_UNORM_BLOCK = VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
+ VULKAN_FORMAT_ASTC_4x4_SRGB_BLOCK = VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
+ VULKAN_FORMAT_ASTC_5x4_UNORM_BLOCK = VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
+ VULKAN_FORMAT_ASTC_5x4_SRGB_BLOCK = VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
+ VULKAN_FORMAT_ASTC_5x5_UNORM_BLOCK = VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
+ VULKAN_FORMAT_ASTC_5x5_SRGB_BLOCK = VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
+ VULKAN_FORMAT_ASTC_6x5_UNORM_BLOCK = VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
+ VULKAN_FORMAT_ASTC_6x5_SRGB_BLOCK = VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
+ VULKAN_FORMAT_ASTC_6x6_UNORM_BLOCK = VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
+ VULKAN_FORMAT_ASTC_6x6_SRGB_BLOCK = VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
+ VULKAN_FORMAT_ASTC_8x5_UNORM_BLOCK = VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
+ VULKAN_FORMAT_ASTC_8x5_SRGB_BLOCK = VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
+ VULKAN_FORMAT_ASTC_8x6_UNORM_BLOCK = VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
+ VULKAN_FORMAT_ASTC_8x6_SRGB_BLOCK = VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
+ VULKAN_FORMAT_ASTC_8x8_UNORM_BLOCK = VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
+ VULKAN_FORMAT_ASTC_8x8_SRGB_BLOCK = VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
+ VULKAN_FORMAT_ASTC_10x5_UNORM_BLOCK = VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
+ VULKAN_FORMAT_ASTC_10x5_SRGB_BLOCK = VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
+ VULKAN_FORMAT_ASTC_10x6_UNORM_BLOCK = VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
+ VULKAN_FORMAT_ASTC_10x6_SRGB_BLOCK = VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
+ VULKAN_FORMAT_ASTC_10x8_UNORM_BLOCK = VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
+ VULKAN_FORMAT_ASTC_10x8_SRGB_BLOCK = VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
+ VULKAN_FORMAT_ASTC_10x10_UNORM_BLOCK = VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
+ VULKAN_FORMAT_ASTC_10x10_SRGB_BLOCK = VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
+ VULKAN_FORMAT_ASTC_12x10_UNORM_BLOCK = VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
+ VULKAN_FORMAT_ASTC_12x10_SRGB_BLOCK = VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
+ VULKAN_FORMAT_ASTC_12x12_UNORM_BLOCK = VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
+ VULKAN_FORMAT_ASTC_12x12_SRGB_BLOCK = VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
+};
+
+enum VulkanImageLayout
+{
+ VULKAN_IMAGE_LAYOUT_UNDEFINED = VK_IMAGE_LAYOUT_UNDEFINED,
+ VULKAN_IMAGE_LAYOUT_GENERAL = VK_IMAGE_LAYOUT_GENERAL,
+ VULKAN_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL =
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL =
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+};
+
+enum VulkanImageUsage
+{
+ VULKAN_IMAGE_USAGE_TRANSFER_SRC = VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
+ VULKAN_IMAGE_USAGE_TRANSFER_DST = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
+ VULKAN_IMAGE_USAGE_SAMPLED = VK_IMAGE_USAGE_SAMPLED_BIT,
+ VULKAN_IMAGE_USAGE_STORAGE = VK_IMAGE_USAGE_STORAGE_BIT,
+ VULKAN_IMAGE_USAGE_COLOR_ATTACHMENT = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ VULKAN_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT =
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+ VULKAN_IMAGE_USAGE_TRANSIENT_ATTACHMENT =
+ VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT,
+ VULKAN_IMAGE_USAGE_INPUT_ATTACHMENT = VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT,
+ VULKAN_IMAGE_USAGE_TRANSFER_SRC_DST =
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,
+ VULKAN_IMAGE_USAGE_STORAGE_TRANSFER_SRC_DST = VULKAN_IMAGE_USAGE_STORAGE
+ | VULKAN_IMAGE_USAGE_TRANSFER_SRC | VULKAN_IMAGE_USAGE_TRANSFER_DST,
+ VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST =
+ VK_IMAGE_USAGE_SAMPLED_BIT | VULKAN_IMAGE_USAGE_STORAGE
+ | VULKAN_IMAGE_USAGE_TRANSFER_SRC | VULKAN_IMAGE_USAGE_TRANSFER_DST
+};
+
+enum VulkanImageTiling
+{
+ VULKAN_IMAGE_TILING_OPTIMAL = VK_IMAGE_TILING_OPTIMAL,
+ VULKAN_IMAGE_TILING_LINEAR = VK_IMAGE_TILING_LINEAR
+};
+
+enum VulkanImageCreateFlag
+{
+ VULKAN_IMAGE_CREATE_FLAG_NONE = 0,
+ VULKAN_IMAGE_CREATE_FLAG_MUTABLE_FORMAT =
+ VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
+ VULKAN_IMAGE_CREATE_FLAG_CUBE_COMPATIBLE =
+ VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT,
+ VULKAN_IMAGE_CREATE_FLAG_CUBE_COMPATIBLE_MUTABLE_FORMAT =
+ VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT | VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT
+};
+
+enum VulkanImageViewType
+{
+ VULKAN_IMAGE_VIEW_TYPE_1D = VK_IMAGE_VIEW_TYPE_1D,
+ VULKAN_IMAGE_VIEW_TYPE_2D = VK_IMAGE_VIEW_TYPE_2D,
+ VULKAN_IMAGE_VIEW_TYPE_3D = VK_IMAGE_VIEW_TYPE_3D,
+ VULKAN_IMAGE_VIEW_TYPE_CUBE = VK_IMAGE_VIEW_TYPE_CUBE,
+ VULKAN_IMAGE_VIEW_TYPE_1D_ARRAY = VK_IMAGE_VIEW_TYPE_1D_ARRAY,
+ VULKAN_IMAGE_VIEW_TYPE_2D_ARRAY = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
+ VULKAN_IMAGE_VIEW_TYPE_CUBE_ARRAY = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY,
+};
+
+#endif // _vulkan_wrapper_types_hpp_
diff --git a/test_conformance/workgroups/CMakeLists.txt b/test_conformance/workgroups/CMakeLists.txt
index 08886086..0c004b32 100644
--- a/test_conformance/workgroups/CMakeLists.txt
+++ b/test_conformance/workgroups/CMakeLists.txt
@@ -5,15 +5,8 @@ set(${MODULE_NAME}_SOURCES
test_wg_all.cpp
test_wg_any.cpp
test_wg_broadcast.cpp
- test_wg_reduce.cpp
- test_wg_reduce_max.cpp
- test_wg_reduce_min.cpp
- test_wg_scan_exclusive_add.cpp
- test_wg_scan_exclusive_min.cpp
- test_wg_scan_exclusive_max.cpp
- test_wg_scan_inclusive_add.cpp
- test_wg_scan_inclusive_min.cpp
- test_wg_scan_inclusive_max.cpp
+ test_wg_scan_reduce.cpp
+ test_wg_suggested_local_work_size.cpp
)
include(../CMakeCommon.txt)
diff --git a/test_conformance/workgroups/main.cpp b/test_conformance/workgroups/main.cpp
index 41ffa741..abb1145b 100644
--- a/test_conformance/workgroups/main.cpp
+++ b/test_conformance/workgroups/main.cpp
@@ -24,27 +24,30 @@
#endif
test_definition test_list[] = {
- ADD_TEST(work_group_all),
- ADD_TEST(work_group_any),
- ADD_TEST(work_group_reduce_add),
- ADD_TEST(work_group_reduce_min),
- ADD_TEST(work_group_reduce_max),
- ADD_TEST(work_group_scan_inclusive_add),
- ADD_TEST(work_group_scan_inclusive_min),
- ADD_TEST(work_group_scan_inclusive_max),
- ADD_TEST(work_group_scan_exclusive_add),
- ADD_TEST(work_group_scan_exclusive_min),
- ADD_TEST(work_group_scan_exclusive_max),
- ADD_TEST(work_group_broadcast_1D),
- ADD_TEST(work_group_broadcast_2D),
- ADD_TEST(work_group_broadcast_3D),
+ ADD_TEST_VERSION(work_group_all, Version(2, 0)),
+ ADD_TEST_VERSION(work_group_any, Version(2, 0)),
+ ADD_TEST_VERSION(work_group_reduce_add, Version(2, 0)),
+ ADD_TEST_VERSION(work_group_reduce_min, Version(2, 0)),
+ ADD_TEST_VERSION(work_group_reduce_max, Version(2, 0)),
+ ADD_TEST_VERSION(work_group_scan_inclusive_add, Version(2, 0)),
+ ADD_TEST_VERSION(work_group_scan_inclusive_min, Version(2, 0)),
+ ADD_TEST_VERSION(work_group_scan_inclusive_max, Version(2, 0)),
+ ADD_TEST_VERSION(work_group_scan_exclusive_add, Version(2, 0)),
+ ADD_TEST_VERSION(work_group_scan_exclusive_min, Version(2, 0)),
+ ADD_TEST_VERSION(work_group_scan_exclusive_max, Version(2, 0)),
+ ADD_TEST_VERSION(work_group_broadcast_1D, Version(2, 0)),
+ ADD_TEST_VERSION(work_group_broadcast_2D, Version(2, 0)),
+ ADD_TEST_VERSION(work_group_broadcast_3D, Version(2, 0)),
+ ADD_TEST(work_group_suggested_local_size_1D),
+ ADD_TEST(work_group_suggested_local_size_2D),
+ ADD_TEST(work_group_suggested_local_size_3D)
};
const int test_num = ARRAY_SIZE(test_list);
test_status InitCL(cl_device_id device) {
auto version = get_device_cl_version(device);
- auto expected_min_version = Version(2, 0);
+ auto expected_min_version = Version(1, 2);
if (version < expected_min_version)
{
version_expected_info("Test", "OpenCL",
diff --git a/test_conformance/workgroups/procs.h b/test_conformance/workgroups/procs.h
index 2e6e79e2..6143d525 100644
--- a/test_conformance/workgroups/procs.h
+++ b/test_conformance/workgroups/procs.h
@@ -1,6 +1,6 @@
//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
+// Copyright (c) 2017, 2021 The Khronos Group Inc.
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -16,6 +16,7 @@
#include "harness/testHarness.h"
#include "harness/kernelHelpers.h"
#include "harness/errorHelpers.h"
+#include "harness/typeWrappers.h"
#include "harness/conversions.h"
#include "harness/mt19937.h"
@@ -36,3 +37,16 @@ extern int test_work_group_scan_exclusive_max(cl_device_id deviceID, cl_context
extern int test_work_group_scan_inclusive_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_work_group_scan_inclusive_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_work_group_scan_inclusive_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_work_group_suggested_local_size_1D(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int n_elems);
+extern int test_work_group_suggested_local_size_2D(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int n_elems);
+extern int test_work_group_suggested_local_size_3D(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int n_elems);
diff --git a/test_conformance/workgroups/test_wg_broadcast.cpp b/test_conformance/workgroups/test_wg_broadcast.cpp
index 35559476..29380211 100644
--- a/test_conformance/workgroups/test_wg_broadcast.cpp
+++ b/test_conformance/workgroups/test_wg_broadcast.cpp
@@ -20,6 +20,8 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include <algorithm>
+
#include "procs.h"
@@ -310,7 +312,7 @@ test_work_group_broadcast_2D(cl_device_id device, cl_context context, cl_command
localsize[0] = localsize[1] = 1;
}
- num_workgroups = MAX(n_elems/wg_size[0], 16);
+ num_workgroups = std::max(n_elems / wg_size[0], (size_t)16);
globalsize[0] = num_workgroups * localsize[0];
globalsize[1] = num_workgroups * localsize[1];
num_elements = globalsize[0] * globalsize[1];
@@ -437,7 +439,7 @@ test_work_group_broadcast_3D(cl_device_id device, cl_context context, cl_command
localsize[0] = localsize[1] = localsize[2] = 1;
}
- num_workgroups = MAX(n_elems/wg_size[0], 8);
+ num_workgroups = std::max(n_elems / wg_size[0], (size_t)8);
globalsize[0] = num_workgroups * localsize[0];
globalsize[1] = num_workgroups * localsize[1];
globalsize[2] = num_workgroups * localsize[2];
diff --git a/test_conformance/workgroups/test_wg_reduce.cpp b/test_conformance/workgroups/test_wg_reduce.cpp
deleted file mode 100644
index eb26f498..00000000
--- a/test_conformance/workgroups/test_wg_reduce.cpp
+++ /dev/null
@@ -1,596 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-
-const char *wg_reduce_add_kernel_code_int =
-"__kernel void test_wg_reduce_add_int(global int *input, global int *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" int result = work_group_reduce_add(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_reduce_add_kernel_code_uint =
-"__kernel void test_wg_reduce_add_uint(global uint *input, global uint *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" uint result = work_group_reduce_add(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-const char *wg_reduce_add_kernel_code_long =
-"__kernel void test_wg_reduce_add_long(global long *input, global long *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" long result = work_group_reduce_add(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_reduce_add_kernel_code_ulong =
-"__kernel void test_wg_reduce_add_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" ulong result = work_group_reduce_add(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-static int
-verify_wg_reduce_add_int(int *inptr, int *outptr, size_t n, size_t wg_size)
-{
- size_t i, j;
-
- for (i=0; i<n; i+=wg_size)
- {
- int sum = 0;
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- sum += inptr[i+j];
-
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- {
- if ( sum != outptr[i+j] )
- {
- log_info("work_group_reduce_add int: Error at %u: expected = %d, got = %d\n", i+j, sum, outptr[i+j]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_reduce_add_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size)
-{
- size_t i, j;
-
- for (i=0; i<n; i+=wg_size)
- {
- unsigned int sum = 0;
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- sum += inptr[i+j];
-
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- {
- if ( sum != outptr[i+j] )
- {
- log_info("work_group_reduce_add uint: Error at %u: expected = %d, got = %d\n", i+j, sum, outptr[i+j]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_reduce_add_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size)
-{
- size_t i, j;
-
- for (i=0; i<n; i+=wg_size)
- {
- cl_long sum = 0;
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- sum += inptr[i+j];
-
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- {
- if ( sum != outptr[i+j] )
- {
- log_info("work_group_reduce_add long: Error at %u: expected = %lld, got = %lld\n", i+j, sum, outptr[i+j]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_reduce_add_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size)
-{
- size_t i, j;
-
- for (i=0; i<n; i+=wg_size)
- {
- cl_ulong sum = 0;
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- sum += inptr[i+j];
-
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- {
- if ( sum != outptr[i+j] )
- {
- log_info("work_group_reduce_add ulong: Error at %u: expected = %llu, got = %llu\n", i+j, sum, outptr[i+j]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-
-
-int
-test_work_group_reduce_add_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_int *input_ptr[1], *p;
- cl_int *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_reduce_add_kernel_code_int,
- "test_wg_reduce_add_int");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)num_elements;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_reduce_add_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_reduce_add int failed\n");
- return -1;
- }
- log_info("work_group_reduce_add int passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_reduce_add_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_uint *input_ptr[1], *p;
- cl_uint *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_reduce_add_kernel_code_uint,
- "test_wg_reduce_add_uint");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_reduce_add_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_reduce_add uint failed\n");
- return -1;
- }
- log_info("work_group_reduce_add uint passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-int
-test_work_group_reduce_add_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_long *input_ptr[1], *p;
- cl_long *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_reduce_add_kernel_code_long,
- "test_wg_reduce_add_long");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_reduce_add_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_reduce_add long failed\n");
- return -1;
- }
- log_info("work_group_reduce_add long passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_reduce_add_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_ulong *input_ptr[1], *p;
- cl_ulong *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_reduce_add_kernel_code_ulong,
- "test_wg_reduce_add_ulong");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_reduce_add_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_reduce_add ulong failed\n");
- return -1;
- }
- log_info("work_group_reduce_add ulong passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_reduce_add(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- int err;
-
- err = test_work_group_reduce_add_int(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_reduce_add_uint(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_reduce_add_long(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_reduce_add_ulong(device, context, queue, n_elems);
- return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_reduce_max.cpp b/test_conformance/workgroups/test_wg_reduce_max.cpp
deleted file mode 100644
index 3bbd3f25..00000000
--- a/test_conformance/workgroups/test_wg_reduce_max.cpp
+++ /dev/null
@@ -1,632 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-
-const char *wg_reduce_max_kernel_code_int =
-"__kernel void test_wg_reduce_max_int(global int *input, global int *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" int result = work_group_reduce_max(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_reduce_max_kernel_code_uint =
-"__kernel void test_wg_reduce_max_uint(global uint *input, global uint *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" uint result = work_group_reduce_max(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-const char *wg_reduce_max_kernel_code_long =
-"__kernel void test_wg_reduce_max_long(global long *input, global long *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" long result = work_group_reduce_max(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_reduce_max_kernel_code_ulong =
-"__kernel void test_wg_reduce_max_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" ulong result = work_group_reduce_max(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-static int
-verify_wg_reduce_max_int(int *inptr, int *outptr, size_t n, size_t wg_size)
-{
- size_t i, j;
-
- for (i=0; i<n; i+=wg_size)
- {
- int max = CL_INT_MIN;
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- max = (max > inptr[i+j]) ? max : inptr[i+j];
-
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- {
- if ( max != outptr[i+j] )
- {
- log_info("work_group_reduce_max int: Error at %u: expected = %d, got = %d\n", i+j, max, outptr[i+j]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_reduce_max_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size)
-{
- size_t i, j;
-
- for (i=0; i<n; i+=wg_size)
- {
- unsigned int max = 0;
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- max = (max > inptr[i+j]) ? max : inptr[i+j];
-
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- {
- if ( max != outptr[i+j] )
- {
- log_info("work_group_reduce_max uint: Error at %u: expected = %d, got = %d\n", i+j, max, outptr[i+j]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_reduce_max_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size)
-{
- size_t i, j;
-
- for (i=0; i<n; i+=wg_size)
- {
- cl_long max = CL_LONG_MIN;
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- max = (max > inptr[i+j]) ? max : inptr[i+j];
-
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- {
- if ( max != outptr[i+j] )
- {
- log_info("work_group_reduce_max long: Error at %u: expected = %lld, got = %lld\n", i+j, max, outptr[i+j]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_reduce_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size)
-{
- size_t i, j;
-
- for (i=0; i<n; i+=wg_size)
- {
- cl_ulong max = 0;
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- max = (max > inptr[i+j]) ? max : inptr[i+j];
-
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- {
- if ( max != outptr[i+j] )
- {
- log_info("work_group_reduce_max ulong: Error at %u: expected = %llu, got = %llu\n", i+j, max, outptr[i+j]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-
-
-int
-test_work_group_reduce_max_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_int *input_ptr[1], *p;
- cl_int *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t wg_sizes_per_dimension[3];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_reduce_max_kernel_code_int,
- "test_wg_reduce_max_int");
- if (err)
- return -1;
-
- err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
- if (err)
- return -1;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
- if (err)
- return -1;
- if(wg_sizes_per_dimension[0] < wg_size[0])
- {
- wg_size[0] = wg_sizes_per_dimension[0];
- }
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)num_elements;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_reduce_max_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_reduce_max int failed\n");
- return -1;
- }
- log_info("work_group_reduce_max int passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_reduce_max_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_uint *input_ptr[1], *p;
- cl_uint *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t wg_sizes_per_dimension[3];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_reduce_max_kernel_code_uint,
- "test_wg_reduce_max_uint");
- if (err)
- return -1;
-
- err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
- if (err)
- return -1;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
- if (err)
- return -1;
- if(wg_sizes_per_dimension[0] < wg_size[0])
- {
- wg_size[0] = wg_sizes_per_dimension[0];
- }
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_reduce_max_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_reduce_max uint failed\n");
- return -1;
- }
- log_info("work_group_reduce_max uint passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-int
-test_work_group_reduce_max_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_long *input_ptr[1], *p;
- cl_long *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t wg_sizes_per_dimension[3];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_reduce_max_kernel_code_long,
- "test_wg_reduce_max_long");
- if (err)
- return -1;
-
- err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
- if (err)
- return -1;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
- if (err)
- return -1;
- if(wg_sizes_per_dimension[0] < wg_size[0])
- {
- wg_size[0] = wg_sizes_per_dimension[0];
- }
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_reduce_max_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_reduce_max long failed\n");
- return -1;
- }
- log_info("work_group_reduce_max long passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_reduce_max_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_ulong *input_ptr[1], *p;
- cl_ulong *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t wg_sizes_per_dimension[3];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_reduce_max_kernel_code_ulong,
- "test_wg_reduce_max_ulong");
- if (err)
- return -1;
-
- err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
- if (err)
- return -1;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
- if (err)
- return -1;
- if(wg_sizes_per_dimension[0] < wg_size[0])
- {
- wg_size[0] = wg_sizes_per_dimension[0];
- }
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_reduce_max_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_reduce_max ulong failed\n");
- return -1;
- }
- log_info("work_group_reduce_max ulong passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_reduce_max(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- int err;
-
- err = test_work_group_reduce_max_int(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_reduce_max_uint(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_reduce_max_long(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_reduce_max_ulong(device, context, queue, n_elems);
- return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_reduce_min.cpp b/test_conformance/workgroups/test_wg_reduce_min.cpp
deleted file mode 100644
index 7b1b22e8..00000000
--- a/test_conformance/workgroups/test_wg_reduce_min.cpp
+++ /dev/null
@@ -1,632 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-
-const char *wg_reduce_min_kernel_code_int =
-"__kernel void test_wg_reduce_min_int(global int *input, global int *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" int result = work_group_reduce_min(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_reduce_min_kernel_code_uint =
-"__kernel void test_wg_reduce_min_uint(global uint *input, global uint *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" uint result = work_group_reduce_min(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-const char *wg_reduce_min_kernel_code_long =
-"__kernel void test_wg_reduce_min_long(global long *input, global long *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" long result = work_group_reduce_min(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_reduce_min_kernel_code_ulong =
-"__kernel void test_wg_reduce_min_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" ulong result = work_group_reduce_min(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-static int
-verify_wg_reduce_min_int(int *inptr, int *outptr, size_t n, size_t wg_size)
-{
- size_t i, j;
-
- for (i=0; i<n; i+=wg_size)
- {
- int min = CL_INT_MAX;
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- min = (min < inptr[i+j]) ? min : inptr[i+j];
-
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- {
- if ( min != outptr[i+j] )
- {
- log_info("work_group_reduce_min int: Error at %u: expected = %d, got = %d\n", i+j, min, outptr[i+j]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_reduce_min_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size)
-{
- size_t i, j;
-
- for (i=0; i<n; i+=wg_size)
- {
- unsigned int min = CL_UINT_MAX;
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- min = (min < inptr[i+j]) ? min : inptr[i+j];
-
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- {
- if ( min != outptr[i+j] )
- {
- log_info("work_group_reduce_min uint: Error at %u: expected = %d, got = %d\n", i+j, min, outptr[i+j]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_reduce_min_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size)
-{
- size_t i, j;
-
- for (i=0; i<n; i+=wg_size)
- {
- cl_long min = CL_ULONG_MAX;
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- min = (min < inptr[i+j]) ? min : inptr[i+j];
-
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- {
- if ( min != outptr[i+j] )
- {
- log_info("work_group_reduce_min long: Error at %u: expected = %lld, got = %lld\n", i+j, min, outptr[i+j]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_reduce_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size)
-{
- size_t i, j;
-
- for (i=0; i<n; i+=wg_size)
- {
- cl_ulong min = CL_ULONG_MAX;
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- min = (min < inptr[i+j]) ? min : inptr[i+j];
-
- for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
- {
- if ( min != outptr[i+j] )
- {
- log_info("work_group_reduce_min ulong: Error at %u: expected = %llu, got = %llu\n", i+j, min, outptr[i+j]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-
-
-int
-test_work_group_reduce_min_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_int *input_ptr[1], *p;
- cl_int *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t wg_sizes_per_dimension[3];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_reduce_min_kernel_code_int,
- "test_wg_reduce_min_int");
- if (err)
- return -1;
-
- err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
- if (err)
- return -1;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
- if (err)
- return -1;
- if(wg_sizes_per_dimension[0] < wg_size[0])
- {
- wg_size[0] = wg_sizes_per_dimension[0];
- }
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)num_elements;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_reduce_min_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_reduce_min int failed\n");
- return -1;
- }
- log_info("work_group_reduce_min int passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_reduce_min_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_uint *input_ptr[1], *p;
- cl_uint *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t wg_sizes_per_dimension[3];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_reduce_min_kernel_code_uint,
- "test_wg_reduce_min_uint");
- if (err)
- return -1;
-
- err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
- if (err)
- return -1;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
- if (err)
- return -1;
- if(wg_sizes_per_dimension[0] < wg_size[0])
- {
- wg_size[0] = wg_sizes_per_dimension[0];
- }
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_reduce_min_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_reduce_min uint failed\n");
- return -1;
- }
- log_info("work_group_reduce_min uint passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-int
-test_work_group_reduce_min_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_long *input_ptr[1], *p;
- cl_long *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t wg_sizes_per_dimension[3];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_reduce_min_kernel_code_long,
- "test_wg_reduce_min_long");
- if (err)
- return -1;
-
- err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
- if (err)
- return -1;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
- if (err)
- return -1;
- if(wg_sizes_per_dimension[0] < wg_size[0])
- {
- wg_size[0] = wg_sizes_per_dimension[0];
- }
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_reduce_min_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_reduce_min long failed\n");
- return -1;
- }
- log_info("work_group_reduce_min long passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_reduce_min_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_ulong *input_ptr[1], *p;
- cl_ulong *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t wg_sizes_per_dimension[3];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_reduce_min_kernel_code_ulong,
- "test_wg_reduce_min_ulong");
- if (err)
- return -1;
-
- err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
- if (err)
- return -1;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
- if (err)
- return -1;
- if(wg_sizes_per_dimension[0] < wg_size[0])
- {
- wg_size[0] = wg_sizes_per_dimension[0];
- }
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_reduce_min_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_reduce_min ulong failed\n");
- return -1;
- }
- log_info("work_group_reduce_min ulong passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_reduce_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- int err;
-
- err = test_work_group_reduce_min_int(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_reduce_min_uint(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_reduce_min_long(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_reduce_min_ulong(device, context, queue, n_elems);
- return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp
deleted file mode 100644
index e695a165..00000000
--- a/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp
+++ /dev/null
@@ -1,604 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-
-const char *wg_scan_exclusive_add_kernel_code_int =
-"__kernel void test_wg_scan_exclusive_add_int(global int *input, global int *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" int result = work_group_scan_exclusive_add(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_exclusive_add_kernel_code_uint =
-"__kernel void test_wg_scan_exclusive_add_uint(global uint *input, global uint *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" uint result = work_group_scan_exclusive_add(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-const char *wg_scan_exclusive_add_kernel_code_long =
-"__kernel void test_wg_scan_exclusive_add_long(global long *input, global long *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" long result = work_group_scan_exclusive_add(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_exclusive_add_kernel_code_ulong =
-"__kernel void test_wg_scan_exclusive_add_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" ulong result = work_group_scan_exclusive_add(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-static int
-verify_wg_scan_exclusive_add_int(int *inptr, int *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
- int s, lasts;
-
-
-
- for (j = 0; j < n; j += wg_size) {
- m = n - j;
- if (m > wg_size) m = wg_size;
-
- s = 0;
- lasts = 0;
- for (i = 0; i < m; ++i) {
- s += inptr[j + i];
- if (outptr[j + i] != lasts) {
- log_info("work_group_scan_exclusive_add int: Error at %u: expected = %d, got = %d\n",
- (unsigned int)(j + i), lasts, outptr[j + i]);
- return -1;
- }
- lasts = s;
- }
- }
- return 0;
-}
-
-static int
-verify_wg_scan_exclusive_add_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
- unsigned int s, lasts;
-
- for (j = 0; j < n; j += wg_size) {
- m = n - j;
- if (m > wg_size) m = wg_size;
- s = 0;
- lasts = 0;
- for (i = 0; i < m; ++i) {
- s += inptr[j + i];
- if (outptr[j + i] != lasts) {
- log_info("work_group_scan_exclusive_add uint: Error at %u: expected = %u, got = %u\n",
- (unsigned int)(j + i), lasts, outptr[j + i]);
- return -1;
- }
- lasts = s;
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_scan_exclusive_add_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
- cl_long s, lasts;
-
- for (j = 0; j < n; j += wg_size) {
- m = n - j;
- if (m > wg_size) m = wg_size;
- s = 0;
-
- lasts = 0;
- for (i = 0; i < m; ++i) {
- s += inptr[j + i];
-
- if (outptr[j + i] != lasts) {
- log_info("work_group_scan_exclusive_add long: Error at %u: expected = %lld, got = %lld\n",
- (unsigned int)(j + i), (long long)lasts, (long long)outptr[j + i]);
- return -1;
- }
- lasts = s;
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_scan_exclusive_add_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- cl_ulong s, lasts;
-
- for (j = 0; j < n; j += wg_size) {
- m = n - j;
- if (m > wg_size) m = wg_size;
-
- s = 0;
- lasts = 0;
- for (i = 0; i < m; ++i) {
- s += inptr[j + i];
- if (outptr[j + i] != lasts) {
- log_info("work_group_scan_exclusive_add ulong: Error at %u: expected = %llu, got = %llu\n",
- (unsigned int)(j + i), (unsigned long long)lasts, (unsigned long long)outptr[j + i]);
- return -1;
- }
- lasts = s;
- }
- }
- return 0;
-}
-
-
-int
-test_work_group_scan_exclusive_add_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_int *input_ptr[1], *p;
- cl_int *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_exclusive_add_kernel_code_int,
- "test_wg_scan_exclusive_add_int");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)num_elements;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_exclusive_add_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_exclusive_add int failed\n");
- return -1;
- }
- log_info("work_group_scan_exclusive_add int passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_exclusive_add_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_uint *input_ptr[1], *p;
- cl_uint *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_exclusive_add_kernel_code_uint,
- "test_wg_scan_exclusive_add_uint");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_exclusive_add_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_exclusive_add uint failed\n");
- return -1;
- }
- log_info("work_group_scan_exclusive_add uint passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-int
-test_work_group_scan_exclusive_add_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_long *input_ptr[1], *p;
- cl_long *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_exclusive_add_kernel_code_long,
- "test_wg_scan_exclusive_add_long");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_exclusive_add_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_exclusive_add long failed\n");
- return -1;
- }
- log_info("work_group_scan_exclusive_add long passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_exclusive_add_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_ulong *input_ptr[1], *p;
- cl_ulong *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_exclusive_add_kernel_code_ulong,
- "test_wg_scan_exclusive_add_ulong");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_exclusive_add_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_exclusiveadd ulong failed\n");
- return -1;
- }
- log_info("work_group_scan_exclusive_add ulong passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_exclusive_add(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- int err;
-
- err = test_work_group_scan_exclusive_add_int(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_exclusive_add_uint(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_exclusive_add_long(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_exclusive_add_ulong(device, context, queue, n_elems);
- return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp
deleted file mode 100644
index 12338b68..00000000
--- a/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp
+++ /dev/null
@@ -1,631 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-
-const char *wg_scan_exclusive_max_kernel_code_int =
-"__kernel void test_wg_scan_exclusive_max_int(global int *input, global int *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" int result = work_group_scan_exclusive_max(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_exclusive_max_kernel_code_uint =
-"__kernel void test_wg_scan_exclusive_max_uint(global uint *input, global uint *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" uint result = work_group_scan_exclusive_max(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-const char *wg_scan_exclusive_max_kernel_code_long =
-"__kernel void test_wg_scan_exclusive_max_long(global long *input, global long *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" long result = work_group_scan_exclusive_max(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_exclusive_max_kernel_code_ulong =
-"__kernel void test_wg_scan_exclusive_max_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" ulong result = work_group_scan_exclusive_max(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-static int
-verify_wg_scan_exclusive_max_int(int *inptr, int *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- for (j=0; j<n; j+=wg_size) {
- int max_ = 0x80000000;
-
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- for (i = 0; i < m; ++i) {
- if (outptr[j+i] != max_) {
- log_info("work_group_scan_exclusive_max int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), max_, outptr[j+i]);
- return -1;
- }
- max_ = MAX(inptr[j+i], max_);
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_scan_exclusive_max_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- for (j=0; j<n; j+=wg_size) {
- unsigned int max_ = 0x0;
-
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- for (i = 0; i < m; ++i) {
- if (outptr[j+i] != max_) {
- log_info("work_group_scan_exclusive_max int: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), max_, outptr[j+i]);
- return -1;
- }
- max_ = MAX(inptr[j+i], max_);
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_scan_exclusive_max_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- for (j=0; j<n; j+=wg_size) {
- cl_long max_ = 0x8000000000000000ULL;
-
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- for (i = 0; i < m; ++i) {
- if (outptr[j+i] != max_) {
- log_info("work_group_scan_exclusive_max long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), max_, outptr[j+i]);
- return -1;
- }
- max_ = MAX(inptr[j+i], max_);
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_scan_exclusive_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- for (j=0; j<n; j+=wg_size) {
- cl_ulong max_ = 0x0;
-
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- for (i = 0; i < m; ++i) {
- if (outptr[j+i] != max_) {
- log_info("work_group_scan_exclusive_max ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), max_, outptr[j+i]);
- return -1;
- }
- max_ = MAX(inptr[j+i], max_);
- }
- }
-
- return 0;
-}
-
-
-int
-test_work_group_scan_exclusive_max_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_int *input_ptr[1], *p;
- cl_int *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t wg_sizes_per_dimension[3];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_exclusive_max_kernel_code_int,
- "test_wg_scan_exclusive_max_int");
- if (err)
- return -1;
-
- err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
- if (err)
- return -1;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
- if (err)
- return -1;
- if(wg_sizes_per_dimension[0] < wg_size[0])
- {
- wg_size[0] = wg_sizes_per_dimension[0];
- }
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)num_elements;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_exclusive_max_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_exclusive_max int failed\n");
- return -1;
- }
- log_info("work_group_scan_exclusive_max int passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_exclusive_max_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_uint *input_ptr[1], *p;
- cl_uint *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t wg_sizes_per_dimension[3];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_exclusive_max_kernel_code_uint,
- "test_wg_scan_exclusive_max_uint");
- if (err)
- return -1;
-
- err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
- if (err)
- return -1;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
- if (err)
- return -1;
- if(wg_sizes_per_dimension[0] < wg_size[0])
- {
- wg_size[0] = wg_sizes_per_dimension[0];
- }
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_exclusive_max_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_exclusive_max uint failed\n");
- return -1;
- }
- log_info("work_group_scan_exclusive_max uint passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-int
-test_work_group_scan_exclusive_max_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_long *input_ptr[1], *p;
- cl_long *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t wg_sizes_per_dimension[3];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_exclusive_max_kernel_code_long,
- "test_wg_scan_exclusive_max_long");
- if (err)
- return -1;
-
- err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
- if (err)
- return -1;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
- if (err)
- return -1;
- if(wg_sizes_per_dimension[0] < wg_size[0])
- {
- wg_size[0] = wg_sizes_per_dimension[0];
- }
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_exclusive_max_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_exclusive_max long failed\n");
- return -1;
- }
- log_info("work_group_scan_exclusive_max long passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_exclusive_max_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_ulong *input_ptr[1], *p;
- cl_ulong *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t wg_sizes_per_dimension[3];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_exclusive_max_kernel_code_ulong,
- "test_wg_scan_exclusive_max_ulong");
- if (err)
- return -1;
-
- err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
- if (err)
- return -1;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
- if (err)
- return -1;
- if(wg_sizes_per_dimension[0] < wg_size[0])
- {
- wg_size[0] = wg_sizes_per_dimension[0];
- }
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_exclusive_max_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_exclusiveadd ulong failed\n");
- return -1;
- }
- log_info("work_group_scan_exclusive_max ulong passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_exclusive_max(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- int err;
-
- err = test_work_group_scan_exclusive_max_int(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_exclusive_max_uint(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_exclusive_max_long(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_exclusive_max_ulong(device, context, queue, n_elems);
- return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp
deleted file mode 100644
index f4e6bf97..00000000
--- a/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp
+++ /dev/null
@@ -1,632 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-
-const char *wg_scan_exclusive_min_kernel_code_int =
-"__kernel void test_wg_scan_exclusive_min_int(global int *input, global int *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" int result = work_group_scan_exclusive_min(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_exclusive_min_kernel_code_uint =
-"__kernel void test_wg_scan_exclusive_min_uint(global uint *input, global uint *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" uint result = work_group_scan_exclusive_min(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-const char *wg_scan_exclusive_min_kernel_code_long =
-"__kernel void test_wg_scan_exclusive_min_long(global long *input, global long *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" long result = work_group_scan_exclusive_min(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_exclusive_min_kernel_code_ulong =
-"__kernel void test_wg_scan_exclusive_min_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" ulong result = work_group_scan_exclusive_min(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-
-static int
-verify_wg_scan_exclusive_min_int(int *inptr, int *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- for (j=0; j<n; j+=wg_size) {
- int min_ = 0x7fffffff;
-
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- for (i = 0; i < m; ++i) {
- if (outptr[j+i] != min_) {
- log_info("work_group_scan_exclusive_min int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), min_, outptr[j+i]);
- return -1;
- }
- min_ = MIN(inptr[j+i], min_);
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_scan_exclusive_min_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- for (j=0; j<n; j+=wg_size) {
- unsigned int min_ = 0xffffffff;
-
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- for (i = 0; i < m; ++i) {
- if (outptr[j+i] != min_) {
- log_info("work_group_scan_exclusive_min int: Error at %u: expected = %u, got = %u\n", j+i, min_, outptr[j+i]);
- return -1;
- }
- min_ = MIN(inptr[j+i], min_);
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_scan_exclusive_min_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- for (j=0; j<n; j+=wg_size) {
- cl_long min_ = 0x7fffffffffffffffULL;
-
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- for (i = 0; i < m; ++i) {
- if (outptr[j+i] != min_) {
- log_info("work_group_scan_exclusive_min long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), min_, outptr[j+i]);
- return -1;
- }
- min_ = MIN(inptr[j+i], min_);
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_scan_exclusive_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- for (j=0; j<n; j+=wg_size) {
- cl_ulong min_ = 0xffffffffffffffffULL;
-
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- for (i = 0; i < m; ++i) {
- if (outptr[j+i] != min_) {
- log_info("work_group_scan_exclusive_min ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), min_, outptr[j+i]);
- return -1;
- }
- min_ = MIN(inptr[j+i], min_);
- }
- }
-
- return 0;
-}
-
-
-int
-test_work_group_scan_exclusive_min_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_int *input_ptr[1], *p;
- cl_int *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t wg_sizes_per_dimension[3];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_exclusive_min_kernel_code_int,
- "test_wg_scan_exclusive_min_int");
- if (err)
- return -1;
-
- err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
- if (err)
- return -1;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
- if (err)
- return -1;
- if(wg_sizes_per_dimension[0] < wg_size[0])
- {
- wg_size[0] = wg_sizes_per_dimension[0];
- }
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)num_elements;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_exclusive_min_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_exclusive_min int failed\n");
- return -1;
- }
- log_info("work_group_scan_exclusive_min int passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_exclusive_min_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_uint *input_ptr[1], *p;
- cl_uint *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t wg_sizes_per_dimension[3];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_exclusive_min_kernel_code_uint,
- "test_wg_scan_exclusive_min_uint");
- if (err)
- return -1;
-
- err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
- if (err)
- return -1;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
- if (err)
- return -1;
- if(wg_sizes_per_dimension[0] < wg_size[0])
- {
- wg_size[0] = wg_sizes_per_dimension[0];
- }
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_exclusive_min_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_exclusive_min uint failed\n");
- return -1;
- }
- log_info("work_group_scan_exclusive_min uint passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-int
-test_work_group_scan_exclusive_min_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_long *input_ptr[1], *p;
- cl_long *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t wg_sizes_per_dimension[3];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_exclusive_min_kernel_code_long,
- "test_wg_scan_exclusive_min_long");
- if (err)
- return -1;
-
- err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
- if (err)
- return -1;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
- if (err)
- return -1;
- if(wg_sizes_per_dimension[0] < wg_size[0])
- {
- wg_size[0] = wg_sizes_per_dimension[0];
- }
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_exclusive_min_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_exclusive_min long failed\n");
- return -1;
- }
- log_info("work_group_scan_exclusive_min long passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_exclusive_min_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_ulong *input_ptr[1], *p;
- cl_ulong *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t wg_sizes_per_dimension[3];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_exclusive_min_kernel_code_ulong,
- "test_wg_scan_exclusive_min_ulong");
- if (err)
- return -1;
-
- err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
- if (err)
- return -1;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
- if (err)
- return -1;
- if(wg_sizes_per_dimension[0] < wg_size[0])
- {
- wg_size[0] = wg_sizes_per_dimension[0];
- }
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_exclusive_min_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_exclusiveadd ulong failed\n");
- return -1;
- }
- log_info("work_group_scan_exclusive_min ulong passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_exclusive_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- int err;
-
- err = test_work_group_scan_exclusive_min_int(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_exclusive_min_uint(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_exclusive_min_long(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_exclusive_min_ulong(device, context, queue, n_elems);
- return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp
deleted file mode 100644
index 51c98a4e..00000000
--- a/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp
+++ /dev/null
@@ -1,593 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-
-const char *wg_scan_inclusive_add_kernel_code_int =
-"__kernel void test_wg_scan_inclusive_add_int(global int *input, global int *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" int result = work_group_scan_inclusive_add(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_inclusive_add_kernel_code_uint =
-"__kernel void test_wg_scan_inclusive_add_uint(global uint *input, global uint *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" uint result = work_group_scan_inclusive_add(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-const char *wg_scan_inclusive_add_kernel_code_long =
-"__kernel void test_wg_scan_inclusive_add_long(global long *input, global long *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" long result = work_group_scan_inclusive_add(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_inclusive_add_kernel_code_ulong =
-"__kernel void test_wg_scan_inclusive_add_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" ulong result = work_group_scan_inclusive_add(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-static int
-verify_wg_scan_inclusive_add_int(int *inptr, int *outptr, size_t n, size_t wg_size)
-{
- size_t i, j, m;
- int s;
-
- for (j=0; j<n; j+=wg_size) {
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- s = 0;
- for (i=0; i<m; ++i) {
- s += inptr[j+i];
- if (outptr[j+i] != s) {
- log_info("work_group_scan_inclusive_add int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), s, outptr[j+i]);
- return -1;
- }
- }
- }
- return 0;
-}
-
-static int
-verify_wg_scan_inclusive_add_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size)
-{
- size_t i, j, m;
- unsigned int s;
-
- for (j=0; j<n; j+=wg_size) {
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- s = 0;
- for (i=0; i<m; ++i) {
- s += inptr[j+i];
- if (outptr[j+i] != s) {
- log_info("work_group_scan_inclusive_add uint: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), s, outptr[j+i]);
- return -1;
- }
- }
- }
- return 0;
-}
-
-static int
-verify_wg_scan_inclusive_add_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size)
-{
- size_t i, j, m;
- cl_long s;
-
- for (j=0; j<n; j+=wg_size) {
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- s = 0;
- for (i=0; i<m; ++i) {
- s += inptr[j+i];
- if (outptr[j+i] != s) {
- log_info("work_group_scan_inclusive_add long: Error at %u: expected = %lld, got = %lld\n",
- (unsigned int)(j+i), (long long)s, (long long)outptr[j+i]);
- return -1;
- }
- }
- }
- return 0;
-}
-
-static int
-verify_wg_scan_inclusive_add_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size)
-{
- size_t i, j, m;
- cl_ulong s;
-
- for (j=0; j<n; j+=wg_size) {
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- s = 0;
- for (i=0; i<m; ++i) {
- s += inptr[j+i];
- if (outptr[j+i] != s) {
- log_info("work_group_scan_inclusive_add int: Error at %u: expected = %llu, got = %llu\n",
- (unsigned int)(j+i), (unsigned long long)s, (unsigned long long)outptr[j+i]);
- return -1;
- }
- }
- }
- return 0;
-}
-
-
-int
-test_work_group_scan_inclusive_add_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_int *input_ptr[1], *p;
- cl_int *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_inclusive_add_kernel_code_int,
- "test_wg_scan_inclusive_add_int");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)num_elements;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_inclusive_add_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_inclusive_add int failed\n");
- return -1;
- }
- log_info("work_group_scan_inclusive_add int passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_inclusive_add_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_uint *input_ptr[1], *p;
- cl_uint *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_inclusive_add_kernel_code_uint,
- "test_wg_scan_inclusive_add_uint");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_inclusive_add_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_inclusive_add uint failed\n");
- return -1;
- }
- log_info("work_group_scan_inclusive_add uint passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-int
-test_work_group_scan_inclusive_add_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_long *input_ptr[1], *p;
- cl_long *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_inclusive_add_kernel_code_long,
- "test_wg_scan_inclusive_add_long");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_inclusive_add_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_inclusive_add long failed\n");
- return -1;
- }
- log_info("work_group_scan_inclusive_add long passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_inclusive_add_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_ulong *input_ptr[1], *p;
- cl_ulong *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_inclusive_add_kernel_code_ulong,
- "test_wg_scan_inclusive_add_ulong");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_inclusive_add_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_inclusiveadd ulong failed\n");
- return -1;
- }
- log_info("work_group_scan_inclusive_add ulong passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_inclusive_add(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- int err;
-
- err = test_work_group_scan_inclusive_add_int(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_inclusive_add_uint(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_inclusive_add_long(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_inclusive_add_ulong(device, context, queue, n_elems);
- return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp
deleted file mode 100644
index 44ebf805..00000000
--- a/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp
+++ /dev/null
@@ -1,595 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-
-const char *wg_scan_inclusive_max_kernel_code_int =
-"__kernel void test_wg_scan_inclusive_max_int(global int *input, global int *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" int result = work_group_scan_inclusive_max(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_inclusive_max_kernel_code_uint =
-"__kernel void test_wg_scan_inclusive_max_uint(global uint *input, global uint *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" uint result = work_group_scan_inclusive_max(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-const char *wg_scan_inclusive_max_kernel_code_long =
-"__kernel void test_wg_scan_inclusive_max_long(global long *input, global long *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" long result = work_group_scan_inclusive_max(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_inclusive_max_kernel_code_ulong =
-"__kernel void test_wg_scan_inclusive_max_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" ulong result = work_group_scan_inclusive_max(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-static int
-verify_wg_scan_inclusive_max_int(int *inptr, int *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- for (j=0; j<n; j+=wg_size) {
- int max_ = 0x80000000;
-
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- for (i = 0; i < m; ++i) {
- max_ = MAX(inptr[j+i], max_);
- if (outptr[j+i] != max_) {
- log_info("work_group_scan_inclusive_max int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), max_, outptr[j+i]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_scan_inclusive_max_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- for (j=0; j<n; j+=wg_size) {
- unsigned int max_ = 0x0;
-
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- for (i = 0; i < m; ++i) {
- max_ = MAX(inptr[j+i], max_);
- if (outptr[j+i] != max_) {
- log_info("work_group_scan_inclusive_max int: Error at %lu: expected = %u, got = %u\n", (unsigned long)(j+i), max_, outptr[j+i]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_scan_inclusive_max_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- for (j=0; j<n; j+=wg_size) {
- cl_long max_ = 0x8000000000000000ULL;
-
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- for (i = 0; i < m; ++i) {
- max_ = MAX(inptr[j+i], max_);
- if (outptr[j+i] != max_) {
- log_info("work_group_scan_inclusive_max long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), max_, outptr[j+i]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_scan_inclusive_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- for (j=0; j<n; j+=wg_size) {
- cl_ulong max_ = 0x0;
-
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- for (i = 0; i < m; ++i) {
- max_ = MAX(inptr[j+i], max_);
- if (outptr[j+i] != max_) {
- log_info("work_group_scan_inclusive_max ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), max_, outptr[j+i]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-
-int
-test_work_group_scan_inclusive_max_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_int *input_ptr[1], *p;
- cl_int *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_inclusive_max_kernel_code_int,
- "test_wg_scan_inclusive_max_int");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)num_elements;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_inclusive_max_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_inclusive_max int failed\n");
- return -1;
- }
- log_info("work_group_scan_inclusive_max int passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_inclusive_max_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_uint *input_ptr[1], *p;
- cl_uint *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_inclusive_max_kernel_code_uint,
- "test_wg_scan_inclusive_max_uint");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_inclusive_max_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_inclusive_max uint failed\n");
- return -1;
- }
- log_info("work_group_scan_inclusive_max uint passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-int
-test_work_group_scan_inclusive_max_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_long *input_ptr[1], *p;
- cl_long *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_inclusive_max_kernel_code_long,
- "test_wg_scan_inclusive_max_long");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_inclusive_max_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_inclusive_max long failed\n");
- return -1;
- }
- log_info("work_group_scan_inclusive_max long passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_inclusive_max_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_ulong *input_ptr[1], *p;
- cl_ulong *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_inclusive_max_kernel_code_ulong,
- "test_wg_scan_inclusive_max_ulong");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_inclusive_max_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_inclusiveadd ulong failed\n");
- return -1;
- }
- log_info("work_group_scan_inclusive_max ulong passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_inclusive_max(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- int err;
-
- err = test_work_group_scan_inclusive_max_int(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_inclusive_max_uint(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_inclusive_max_long(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_inclusive_max_ulong(device, context, queue, n_elems);
- return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp
deleted file mode 100644
index f2f05788..00000000
--- a/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp
+++ /dev/null
@@ -1,595 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-
-const char *wg_scan_inclusive_min_kernel_code_int =
-"__kernel void test_wg_scan_inclusive_min_int(global int *input, global int *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" int result = work_group_scan_inclusive_min(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_inclusive_min_kernel_code_uint =
-"__kernel void test_wg_scan_inclusive_min_uint(global uint *input, global uint *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" uint result = work_group_scan_inclusive_min(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-const char *wg_scan_inclusive_min_kernel_code_long =
-"__kernel void test_wg_scan_inclusive_min_long(global long *input, global long *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" long result = work_group_scan_inclusive_min(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_inclusive_min_kernel_code_ulong =
-"__kernel void test_wg_scan_inclusive_min_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" ulong result = work_group_scan_inclusive_min(input[tid]);\n"
-" output[tid] = result;\n"
-"}\n";
-
-
-static int
-verify_wg_scan_inclusive_min_int(int *inptr, int *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- for (j=0; j<n; j+=wg_size) {
- int min_ = 0x7fffffff;
-
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- for (i = 0; i < m; ++i) {
- min_ = MIN(inptr[j+i], min_);
- if (outptr[j+i] != min_) {
- log_info("work_group_scan_inclusive_min int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), min_, outptr[j+i]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_scan_inclusive_min_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- for (j=0; j<n; j+=wg_size) {
- unsigned int min_ = 0xffffffff;
-
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- for (i = 0; i < m; ++i) {
- min_ = MIN(inptr[j+i], min_);
- if (outptr[j+i] != min_) {
- log_info("work_group_scan_inclusive_min int: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), min_, outptr[j+i]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_scan_inclusive_min_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- for (j=0; j<n; j+=wg_size) {
- cl_long min_ = 0x7fffffffffffffffULL;
-
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- for (i = 0; i < m; ++i) {
- min_ = MIN(inptr[j+i], min_);
- if (outptr[j+i] != min_) {
- log_info("work_group_scan_inclusive_min long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), min_, outptr[j+i]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-static int
-verify_wg_scan_inclusive_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) {
-
- size_t i, j, m;
-
- for (j=0; j<n; j+=wg_size) {
- cl_ulong min_ = 0xffffffffffffffffULL;
-
- m = n - j;
- if (m > wg_size)
- m = wg_size;
-
- for (i = 0; i < m; ++i) {
- min_ = MIN(inptr[j+i], min_);
- if (outptr[j+i] != min_) {
- log_info("work_group_scan_inclusive_min ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), min_, outptr[j+i]);
- return -1;
- }
- }
- }
-
- return 0;
-}
-
-
-int
-test_work_group_scan_inclusive_min_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_int *input_ptr[1], *p;
- cl_int *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_inclusive_min_kernel_code_int,
- "test_wg_scan_inclusive_min_int");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)num_elements;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_inclusive_min_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_inclusive_min int failed\n");
- return -1;
- }
- log_info("work_group_scan_inclusive_min int passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_inclusive_min_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_uint *input_ptr[1], *p;
- cl_uint *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_inclusive_min_kernel_code_uint,
- "test_wg_scan_inclusive_min_uint");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uint) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_inclusive_min_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_inclusive_min uint failed\n");
- return -1;
- }
- log_info("work_group_scan_inclusive_min uint passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-int
-test_work_group_scan_inclusive_min_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_long *input_ptr[1], *p;
- cl_long *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_inclusive_min_kernel_code_long,
- "test_wg_scan_inclusive_min_long");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_long) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_inclusive_min_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_inclusive_min long failed\n");
- return -1;
- }
- log_info("work_group_scan_inclusive_min long passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_inclusive_min_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- cl_mem streams[2];
- cl_ulong *input_ptr[1], *p;
- cl_ulong *output_ptr;
- cl_program program;
- cl_kernel kernel;
- void *values[2];
- size_t threads[1];
- size_t wg_size[1];
- size_t num_elements;
- int err;
- int i;
- MTdata d;
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &wg_scan_inclusive_min_kernel_code_ulong,
- "test_wg_scan_inclusive_min_ulong");
- if (err)
- return -1;
-
- // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
- err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
- test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
- num_elements = n_elems;
-
- input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_ulong) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- p = input_ptr[0];
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- p[i] = genrand_int64(d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- values[0] = streams[0];
- values[1] = streams[1];
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- // Line below is troublesome...
- threads[0] = (size_t)n_elems;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- cl_uint dead = 0xdeaddead;
- memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- if (verify_wg_scan_inclusive_min_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
- {
- log_error("work_group_scan_inclusiveadd ulong failed\n");
- return -1;
- }
- log_info("work_group_scan_inclusive_min ulong passed\n");
-
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr[0]);
- free(output_ptr);
-
- return err;
-}
-
-
-int
-test_work_group_scan_inclusive_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
- int err;
-
- err = test_work_group_scan_inclusive_min_int(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_inclusive_min_uint(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_inclusive_min_long(device, context, queue, n_elems);
- if (err) return err;
- err = test_work_group_scan_inclusive_min_ulong(device, context, queue, n_elems);
- return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_scan_reduce.cpp b/test_conformance/workgroups/test_wg_scan_reduce.cpp
new file mode 100644
index 00000000..bf4dc89e
--- /dev/null
+++ b/test_conformance/workgroups/test_wg_scan_reduce.cpp
@@ -0,0 +1,456 @@
+//
+// Copyright (c) 2017-2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness/compat.h"
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include "procs.h"
+
+static std::string make_kernel_string(const std::string &type,
+ const std::string &kernelName,
+ const std::string &func)
+{
+ // Build a kernel string of the form:
+ // __kernel void KERNEL_NAME(global TYPE *input, global TYPE *output) {
+ // int tid = get_global_id(0);
+ // output[tid] = FUNC(input[tid]);
+ // }
+
+ std::ostringstream os;
+ os << "__kernel void " << kernelName << "(global " << type
+ << " *input, global " << type << " *output) {\n";
+ os << " int tid = get_global_id(0);\n";
+ os << " output[tid] = " << func << "(input[tid]);\n";
+ os << "}\n";
+ return os.str();
+}
+
+template <typename T> struct TestTypeInfo
+{
+};
+
+template <> struct TestTypeInfo<cl_int>
+{
+ static constexpr const char *deviceName = "int";
+};
+
+template <> struct TestTypeInfo<cl_uint>
+{
+ static constexpr const char *deviceName = "uint";
+};
+
+template <> struct TestTypeInfo<cl_long>
+{
+ static constexpr const char *deviceName = "long";
+};
+
+template <> struct TestTypeInfo<cl_ulong>
+{
+ static constexpr const char *deviceName = "ulong";
+};
+
+template <typename T> struct Add
+{
+ using Type = T;
+ static constexpr const char *opName = "add";
+ static constexpr T identityValue = 0;
+ static T combine(T a, T b) { return a + b; }
+};
+
+template <typename T> struct Max
+{
+ using Type = T;
+ static constexpr const char *opName = "max";
+ static constexpr T identityValue = std::numeric_limits<T>::min();
+ static T combine(T a, T b) { return std::max(a, b); }
+};
+
+template <typename T> struct Min
+{
+ using Type = T;
+ static constexpr const char *opName = "min";
+ static constexpr T identityValue = std::numeric_limits<T>::max();
+ static T combine(T a, T b) { return std::min(a, b); }
+};
+
+template <typename C> struct Reduce
+{
+ using Type = typename C::Type;
+
+ static constexpr const char *testName = "work_group_reduce";
+ static constexpr const char *testOpName = C::opName;
+ static constexpr const char *deviceTypeName =
+ TestTypeInfo<Type>::deviceName;
+ static constexpr const char *kernelName = "test_wg_reduce";
+ static int verify(Type *inptr, Type *outptr, size_t n_elems,
+ size_t max_wg_size)
+ {
+ for (size_t i = 0; i < n_elems; i += max_wg_size)
+ {
+ size_t wg_size = std::min(max_wg_size, n_elems - i);
+
+ Type result = C::identityValue;
+ for (size_t j = 0; j < wg_size; j++)
+ {
+ result = C::combine(result, inptr[i + j]);
+ }
+
+ for (size_t j = 0; j < wg_size; j++)
+ {
+ if (result != outptr[i + j])
+ {
+ log_info("%s_%s: Error at %zu\n", testName, testOpName,
+ i + j);
+ return -1;
+ }
+ }
+ }
+ return 0;
+ }
+};
+
+template <typename C> struct ScanInclusive
+{
+ using Type = typename C::Type;
+
+ static constexpr const char *testName = "work_group_scan_inclusive";
+ static constexpr const char *testOpName = C::opName;
+ static constexpr const char *deviceTypeName =
+ TestTypeInfo<Type>::deviceName;
+ static constexpr const char *kernelName = "test_wg_scan_inclusive";
+ static int verify(Type *inptr, Type *outptr, size_t n_elems,
+ size_t max_wg_size)
+ {
+ for (size_t i = 0; i < n_elems; i += max_wg_size)
+ {
+ size_t wg_size = std::min(max_wg_size, n_elems - i);
+
+ Type result = C::identityValue;
+ for (size_t j = 0; j < wg_size; ++j)
+ {
+ result = C::combine(result, inptr[i + j]);
+ if (result != outptr[i + j])
+ {
+ log_info("%s_%s: Error at %zu\n", testName, testOpName,
+ i + j);
+ return -1;
+ }
+ }
+ }
+ return 0;
+ }
+};
+
+template <typename C> struct ScanExclusive
+{
+ using Type = typename C::Type;
+
+ static constexpr const char *testName = "work_group_scan_exclusive";
+ static constexpr const char *testOpName = C::opName;
+ static constexpr const char *deviceTypeName =
+ TestTypeInfo<Type>::deviceName;
+ static constexpr const char *kernelName = "test_wg_scan_exclusive";
+ static int verify(Type *inptr, Type *outptr, size_t n_elems,
+ size_t max_wg_size)
+ {
+ for (size_t i = 0; i < n_elems; i += max_wg_size)
+ {
+ size_t wg_size = std::min(max_wg_size, n_elems - i);
+
+ Type result = C::identityValue;
+ for (size_t j = 0; j < wg_size; ++j)
+ {
+ if (result != outptr[i + j])
+ {
+ log_info("%s_%s: Error at %zu\n", testName, testOpName,
+ i + j);
+ return -1;
+ }
+ result = C::combine(result, inptr[i + j]);
+ }
+ }
+ return 0;
+ }
+};
+
+template <typename TestInfo>
+static int run_test(cl_device_id device, cl_context context,
+ cl_command_queue queue, int n_elems)
+{
+ using T = typename TestInfo::Type;
+
+ cl_int err = CL_SUCCESS;
+
+ clProgramWrapper program;
+ clKernelWrapper kernel;
+
+ std::string funcName = TestInfo::testName;
+ funcName += "_";
+ funcName += TestInfo::testOpName;
+
+ std::string kernelName = TestInfo::kernelName;
+ kernelName += "_";
+ kernelName += TestInfo::testOpName;
+ kernelName += "_";
+ kernelName += TestInfo::deviceTypeName;
+
+ std::string kernelString =
+ make_kernel_string(TestInfo::deviceTypeName, kernelName, funcName);
+
+ const char *kernel_source = kernelString.c_str();
+ err = create_single_kernel_helper(context, &program, &kernel, 1,
+ &kernel_source, kernelName.c_str());
+ test_error(err, "Unable to create test kernel");
+
+ size_t wg_size[1];
+ err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
+ test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
+
+ clMemWrapper src = clCreateBuffer(context, CL_MEM_READ_WRITE,
+ sizeof(T) * n_elems, NULL, &err);
+ test_error(err, "Unable to create source buffer");
+
+ clMemWrapper dst = clCreateBuffer(context, CL_MEM_READ_WRITE,
+ sizeof(T) * n_elems, NULL, &err);
+ test_error(err, "Unable to create destination buffer");
+
+ std::vector<T> input_ptr(n_elems);
+
+ MTdataHolder d(gRandomSeed);
+ for (int i = 0; i < n_elems; i++)
+ {
+ input_ptr[i] = (T)genrand_int64(d);
+ }
+
+ err = clEnqueueWriteBuffer(queue, src, CL_TRUE, 0, sizeof(T) * n_elems,
+ input_ptr.data(), 0, NULL, NULL);
+ test_error(err, "clWriteBuffer to initialize src buffer failed");
+
+ err = clSetKernelArg(kernel, 0, sizeof(src), &src);
+ test_error(err, "Unable to set src buffer kernel arg");
+ err |= clSetKernelArg(kernel, 1, sizeof(dst), &dst);
+ test_error(err, "Unable to set dst buffer kernel arg");
+
+ size_t global_work_size[] = { (size_t)n_elems };
+ err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
+ wg_size, 0, NULL, NULL);
+ test_error(err, "Unable to enqueue test kernel");
+
+ std::vector<T> output_ptr(n_elems);
+
+ cl_uint dead = 0xdeaddead;
+ memset_pattern4(output_ptr.data(), &dead, sizeof(T) * n_elems);
+ err = clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, sizeof(T) * n_elems,
+ output_ptr.data(), 0, NULL, NULL);
+ test_error(err, "clEnqueueReadBuffer to read read dst buffer failed");
+
+ if (TestInfo::verify(input_ptr.data(), output_ptr.data(), n_elems,
+ wg_size[0]))
+ {
+ log_error("%s_%s %s failed\n", TestInfo::testName, TestInfo::testOpName,
+ TestInfo::deviceTypeName);
+ return TEST_FAIL;
+ }
+
+ log_info("%s_%s %s passed\n", TestInfo::testName, TestInfo::testOpName,
+ TestInfo::deviceTypeName);
+ return TEST_PASS;
+}
+
+int test_work_group_reduce_add(cl_device_id device, cl_context context,
+ cl_command_queue queue, int n_elems)
+{
+ int result = TEST_PASS;
+
+ result |= run_test<Reduce<Add<cl_int>>>(device, context, queue, n_elems);
+ result |= run_test<Reduce<Add<cl_uint>>>(device, context, queue, n_elems);
+
+ if (gHasLong)
+ {
+ result |=
+ run_test<Reduce<Add<cl_long>>>(device, context, queue, n_elems);
+ result |=
+ run_test<Reduce<Add<cl_ulong>>>(device, context, queue, n_elems);
+ }
+
+ return result;
+}
+
+int test_work_group_reduce_max(cl_device_id device, cl_context context,
+ cl_command_queue queue, int n_elems)
+{
+ int result = TEST_PASS;
+
+ result |= run_test<Reduce<Max<cl_int>>>(device, context, queue, n_elems);
+ result |= run_test<Reduce<Max<cl_uint>>>(device, context, queue, n_elems);
+
+ if (gHasLong)
+ {
+ result |=
+ run_test<Reduce<Max<cl_long>>>(device, context, queue, n_elems);
+ result |=
+ run_test<Reduce<Max<cl_ulong>>>(device, context, queue, n_elems);
+ }
+
+ return result;
+}
+
+int test_work_group_reduce_min(cl_device_id device, cl_context context,
+ cl_command_queue queue, int n_elems)
+{
+ int result = TEST_PASS;
+
+ result |= run_test<Reduce<Min<cl_int>>>(device, context, queue, n_elems);
+ result |= run_test<Reduce<Min<cl_uint>>>(device, context, queue, n_elems);
+
+ if (gHasLong)
+ {
+ result |=
+ run_test<Reduce<Min<cl_long>>>(device, context, queue, n_elems);
+ result |=
+ run_test<Reduce<Min<cl_ulong>>>(device, context, queue, n_elems);
+ }
+
+ return result;
+}
+
+int test_work_group_scan_inclusive_add(cl_device_id device, cl_context context,
+ cl_command_queue queue, int n_elems)
+{
+ int result = TEST_PASS;
+
+ result |=
+ run_test<ScanInclusive<Add<cl_int>>>(device, context, queue, n_elems);
+ result |=
+ run_test<ScanInclusive<Add<cl_uint>>>(device, context, queue, n_elems);
+
+ if (gHasLong)
+ {
+ result |= run_test<ScanInclusive<Add<cl_long>>>(device, context, queue,
+ n_elems);
+ result |= run_test<ScanInclusive<Add<cl_ulong>>>(device, context, queue,
+ n_elems);
+ }
+
+ return result;
+}
+
+int test_work_group_scan_inclusive_max(cl_device_id device, cl_context context,
+ cl_command_queue queue, int n_elems)
+{
+ int result = TEST_PASS;
+
+ result |=
+ run_test<ScanInclusive<Max<cl_int>>>(device, context, queue, n_elems);
+ result |=
+ run_test<ScanInclusive<Max<cl_uint>>>(device, context, queue, n_elems);
+
+ if (gHasLong)
+ {
+ result |= run_test<ScanInclusive<Max<cl_long>>>(device, context, queue,
+ n_elems);
+ result |= run_test<ScanInclusive<Max<cl_ulong>>>(device, context, queue,
+ n_elems);
+ }
+
+ return result;
+}
+
+int test_work_group_scan_inclusive_min(cl_device_id device, cl_context context,
+ cl_command_queue queue, int n_elems)
+{
+ int result = TEST_PASS;
+
+ result |=
+ run_test<ScanInclusive<Min<cl_int>>>(device, context, queue, n_elems);
+ result |=
+ run_test<ScanInclusive<Min<cl_uint>>>(device, context, queue, n_elems);
+
+ if (gHasLong)
+ {
+ result |= run_test<ScanInclusive<Min<cl_long>>>(device, context, queue,
+ n_elems);
+ result |= run_test<ScanInclusive<Min<cl_ulong>>>(device, context, queue,
+ n_elems);
+ }
+
+ return result;
+}
+
+int test_work_group_scan_exclusive_add(cl_device_id device, cl_context context,
+ cl_command_queue queue, int n_elems)
+{
+ int result = TEST_PASS;
+
+ result |=
+ run_test<ScanExclusive<Add<cl_int>>>(device, context, queue, n_elems);
+ result |=
+ run_test<ScanExclusive<Add<cl_uint>>>(device, context, queue, n_elems);
+
+ if (gHasLong)
+ {
+ result |= run_test<ScanExclusive<Add<cl_long>>>(device, context, queue,
+ n_elems);
+ result |= run_test<ScanExclusive<Add<cl_ulong>>>(device, context, queue,
+ n_elems);
+ }
+
+ return result;
+}
+
+int test_work_group_scan_exclusive_max(cl_device_id device, cl_context context,
+ cl_command_queue queue, int n_elems)
+{
+ int result = TEST_PASS;
+
+ result |=
+ run_test<ScanExclusive<Max<cl_int>>>(device, context, queue, n_elems);
+ result |=
+ run_test<ScanExclusive<Max<cl_uint>>>(device, context, queue, n_elems);
+
+ if (gHasLong)
+ {
+ result |= run_test<ScanExclusive<Max<cl_long>>>(device, context, queue,
+ n_elems);
+ result |= run_test<ScanExclusive<Max<cl_ulong>>>(device, context, queue,
+ n_elems);
+ }
+
+ return result;
+}
+
+int test_work_group_scan_exclusive_min(cl_device_id device, cl_context context,
+ cl_command_queue queue, int n_elems)
+{
+ int result = TEST_PASS;
+
+ result |=
+ run_test<ScanExclusive<Min<cl_int>>>(device, context, queue, n_elems);
+ result |=
+ run_test<ScanExclusive<Min<cl_uint>>>(device, context, queue, n_elems);
+
+ if (gHasLong)
+ {
+ result |= run_test<ScanExclusive<Min<cl_long>>>(device, context, queue,
+ n_elems);
+ result |= run_test<ScanExclusive<Min<cl_ulong>>>(device, context, queue,
+ n_elems);
+ }
+
+ return result;
+}
diff --git a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp
new file mode 100644
index 00000000..aa02391c
--- /dev/null
+++ b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp
@@ -0,0 +1,611 @@
+//
+// Copyright (c) 2021 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness/compat.h"
+
+#include <stdio.h>
+#include <iostream>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include <CL/cl_ext.h>
+
+/** @brief Gets the number of elements of type s in a fixed length array of s */
+#define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
+#define test_error_ret_and_free(errCode, msg, retValue, ptr) \
+ { \
+ auto errCodeResult = errCode; \
+ if (errCodeResult != CL_SUCCESS) \
+ { \
+ print_error(errCodeResult, msg); \
+ free(ptr); \
+ return retValue; \
+ } \
+ }
+
+const char* wg_scan_local_work_group_size = R"(
+ bool is_zero_linear_id()
+ {
+ size_t linear_id;
+#if __OPENCL_VERSION__ < CL_VERSION_2_0
+ linear_id = ((get_global_id(2) - get_global_offset(2)) * get_global_size(1) * get_global_size(0)) +
+ ((get_global_id(1) - get_global_offset(1)) * get_global_size(0)) +
+ (get_global_id(0) - get_global_offset(0));
+#else
+ linear_id = get_global_linear_id();
+#endif
+ return linear_id == 0;
+ }
+
+ uint get_l_size(size_t dim)
+ {
+#if __OPENCL_VERSION__ < CL_VERSION_2_0
+ return get_local_size(dim);
+#else
+ return get_enqueued_local_size(dim);
+#endif
+ }
+
+ __kernel void test_wg_scan_local_work_group_size(global uint *output)
+ {
+ if(!is_zero_linear_id()) return;
+ for (uint i = 0; i < 3; i++)
+ {
+ output[i] = get_l_size(i);
+ }
+ }
+ __kernel void test_wg_scan_local_work_group_size_static_local(
+ global uint *output)
+ {
+ __local char c[LOCAL_MEM_SIZE];
+
+ if(!is_zero_linear_id()) return;
+ for (uint i = 0; i < 3; i++)
+ {
+ output[i] = get_l_size(i);
+ }
+ }
+ __kernel void test_wg_scan_local_work_group_size_dynlocal(
+ global uint *output,
+ __local char * c)
+ {
+ if(!is_zero_linear_id()) return;
+ for (uint i = 0; i < 3; i++)
+ {
+ output[i] = get_l_size(i);
+ }
+ };)";
+
+bool is_prime(size_t a)
+{
+ size_t c;
+
+ for (c = 2; c < a; c++)
+ {
+ if (a % c == 0) return false;
+ }
+ return true;
+}
+
+bool is_not_prime(size_t a) { return !is_prime(a); }
+
+bool is_not_even(size_t a) { return (is_prime(a) || (a % 2 == 1)); }
+
+bool is_not_odd(size_t a) { return (is_prime(a) || (a % 2 == 0)); }
+
+#define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
+/* The numbers we chose in the value_range are to be used for the second and
+ third dimension of the global work group size. The numbers below cover many
+ different cases: 1024 is a power of 2, 3 is an odd and small prime number, 12
+ is a multiple of 4 but not a power of 2, 1031 is a large odd and prime number
+ and 1 is to test the lack of this dimension if the others are present */
+const size_t value_range[] = { 1024, 3, 12, 1031, 1 };
+/* The value_range_nD contains numbers to be used for the experiments with 2D
+ and 3D global work sizes. This is because we need smaller numbers so that the
+ resulting number of work items is meaningful and does not become too large.
+ The cases here are: 64 that is a power of 2, 3 is an odd and small prime
+ number, 12 is a multiple of 4 but not a power of 2, 113 is a large prime
+ number
+ and 1 is to test the lack of this dimension if the others are present */
+const size_t value_range_nD[] = { 64, 3, 12, 113, 1 };
+const size_t basic_increment = 16;
+const size_t primes_increment = 1;
+enum num_dims
+{
+ _1D = 1,
+ _2D = 2,
+ _3D = 3
+};
+
+int do_test(cl_device_id device, cl_context context, cl_command_queue queue,
+ cl_kernel scan_kernel, int work_dim, size_t global_work_offset[3],
+ size_t test_values[3], size_t dyn_mem_size)
+{
+ size_t local_work_size[] = { 1, 1, 1 };
+ size_t suggested_total_size;
+ size_t workgroupinfo_size;
+ cl_uint kernel_work_size[3] = { 0 };
+ clMemWrapper buffer;
+ cl_platform_id platform;
+
+ int err = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
+ &platform, NULL);
+ test_error_ret(err, "clGetDeviceInfo failed", -1);
+ clGetKernelSuggestedLocalWorkSizeKHR_fn
+ clGetKernelSuggestedLocalWorkSizeKHR =
+ (clGetKernelSuggestedLocalWorkSizeKHR_fn)
+ clGetExtensionFunctionAddressForPlatform(
+ platform, "clGetKernelSuggestedLocalWorkSizeKHR");
+
+ if (clGetKernelSuggestedLocalWorkSizeKHR == NULL)
+ {
+ log_info("Extension 'cl_khr_suggested_local_work_size' could not be "
+ "found.\n");
+ return TEST_FAIL;
+ }
+
+ /* Create the actual buffer, using local_buffer as the host pointer, and ask
+ * to copy that into the buffer */
+ buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
+ sizeof(kernel_work_size), NULL, &err);
+ test_error_ret(err, "clCreateBuffer failed", -1);
+ err = clSetKernelArg(scan_kernel, 0, sizeof(buffer), &buffer);
+ test_error_ret(err, "clSetKernelArg failed", -1);
+ if (dyn_mem_size)
+ {
+ err = clSetKernelArg(scan_kernel, 1, dyn_mem_size, NULL);
+ test_error_ret(err, "clSetKernelArg failed", -1);
+ }
+ err = clGetKernelSuggestedLocalWorkSizeKHR(queue, scan_kernel, work_dim,
+ global_work_offset, test_values,
+ local_work_size);
+ test_error_ret(err, "clGetKernelSuggestedLocalWorkSizeKHR failed", -1);
+ suggested_total_size =
+ local_work_size[0] * local_work_size[1] * local_work_size[2];
+ err = clGetKernelWorkGroupInfo(
+ scan_kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
+ sizeof(workgroupinfo_size), &workgroupinfo_size, NULL);
+ test_error_ret(err, "clGetKernelWorkGroupInfo failed", -1);
+ if (suggested_total_size > workgroupinfo_size)
+ {
+ std::cout << "The suggested work group size consist of "
+ << suggested_total_size << " work items.\n"
+ << "Work items are limited by " << workgroupinfo_size
+ << std::endl;
+ std::cout << "Size from clGetKernelWorkGroupInfo: "
+ << workgroupinfo_size;
+ std::cout << "\nSize from clGetKernelSuggestedLocalWorkSizeKHR: "
+ << local_work_size[0] * local_work_size[1]
+ * local_work_size[2]
+ << std::endl;
+ return -1;
+ }
+
+ err =
+ clEnqueueNDRangeKernel(queue, scan_kernel, work_dim, global_work_offset,
+ test_values, // global work size
+ NULL, 0, NULL, NULL);
+ test_error_ret(err, "clEnqueueNDRangeKernel failed", -1);
+ err = clEnqueueReadBuffer(queue, buffer, CL_NON_BLOCKING, 0,
+ sizeof(kernel_work_size), kernel_work_size, 0,
+ NULL, NULL);
+ test_error_ret(err, "clEnqueueReadBuffer failed", -1);
+ err = clFinish(queue);
+ test_error_ret(err, "clFinish failed", -1);
+
+ if (kernel_work_size[0] != local_work_size[0]
+ || kernel_work_size[1] != local_work_size[1]
+ || kernel_work_size[2] != local_work_size[2])
+ {
+ std::cout
+ << "Kernel work size differs from local work size suggested:\n"
+ << "Kernel work size: (" << kernel_work_size[0] << ", "
+ << kernel_work_size[1] << ", " << kernel_work_size[2] << ")"
+ << "Local work size: (" << local_work_size[0] << ", "
+ << local_work_size[1] << ", " << local_work_size[2] << ")\n";
+ return -1;
+ }
+ return err;
+}
+
+int do_test_work_group_suggested_local_size(
+ cl_device_id device, cl_context context, cl_command_queue queue,
+ bool (*skip_cond)(size_t), size_t start, size_t end, size_t incr,
+ cl_long max_local_mem_size, size_t global_work_offset[], num_dims dim)
+{
+ clProgramWrapper scan_program;
+ clKernelWrapper scan_kernel;
+ int err;
+ size_t test_values[] = { 1, 1, 1 };
+ std::string kernel_names[6] = {
+ "test_wg_scan_local_work_group_size",
+ "test_wg_scan_local_work_group_size_static_local",
+ "test_wg_scan_local_work_group_size_static_local",
+ "test_wg_scan_local_work_group_size_static_local",
+ "test_wg_scan_local_work_group_size_static_local",
+ "test_wg_scan_local_work_group_size_dynlocal"
+ };
+ std::string str_local_mem_size[6] = {
+ "-DLOCAL_MEM_SIZE=1", "-DLOCAL_MEM_SIZE=1024",
+ "-DLOCAL_MEM_SIZE=4096", "-DLOCAL_MEM_SIZE=16384",
+ "-DLOCAL_MEM_SIZE=32768", "-DLOCAL_MEM_SIZE=1"
+ };
+ size_t local_mem_size[6] = { 1, 1024, 4096, 16384, 32768, 1 };
+ size_t dyn_mem_size[6] = { 0, 0, 0, 0, 0, 1024 };
+ cl_ulong kernel_local_mem_size;
+ for (int kernel_num = 0; kernel_num < 6; kernel_num++)
+ {
+ if (max_local_mem_size < local_mem_size[kernel_num]) continue;
+ // Create the kernel
+ err = create_single_kernel_helper(
+ context, &scan_program, &scan_kernel, 1,
+ &wg_scan_local_work_group_size, (kernel_names[kernel_num]).c_str(),
+ (str_local_mem_size[kernel_num]).c_str());
+ test_error_ret(err,
+ ("create_single_kernel_helper failed for kernel "
+ + kernel_names[kernel_num])
+ .c_str(),
+ -1);
+
+ // Check if the local memory used by the kernel is going to exceed the
+ // max_local_mem_size
+ err = clGetKernelWorkGroupInfo(
+ scan_kernel, device, CL_KERNEL_LOCAL_MEM_SIZE,
+ sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL);
+ test_error_ret(err, "clGetKernelWorkGroupInfo failed", -1);
+ if (kernel_local_mem_size > max_local_mem_size) continue;
+ // return error if no number is found due to the skip condition
+ err = -1;
+ unsigned int j = 0;
+ size_t num_elems = NELEMS(value_range);
+ for (size_t i = start; i < end; i += incr)
+ {
+ if (skip_cond(i)) continue;
+ err = 0;
+ test_values[0] = i;
+ if (dim == _2D) test_values[1] = value_range_nD[j++ % num_elems];
+ if (dim == _3D)
+ {
+ test_values[1] = value_range_nD[j++ % num_elems];
+ test_values[2] = value_range_nD[rand() % num_elems];
+ }
+ err |= do_test(device, context, queue, scan_kernel, dim,
+ global_work_offset, test_values,
+ dyn_mem_size[kernel_num]);
+ test_error_ret(
+ err,
+ ("do_test failed for kernel " + kernel_names[kernel_num])
+ .c_str(),
+ -1);
+ }
+ }
+ return err;
+}
+
+int test_work_group_suggested_local_size_1D(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue, int n_elems)
+{
+ if (!is_extension_available(device, "cl_khr_suggested_local_work_size"))
+ {
+ log_info("Device does not support 'cl_khr_suggested_local_work_size'. "
+ "Skipping the test.\n");
+ return TEST_SKIPPED_ITSELF;
+ }
+ cl_long max_local_mem_size;
+ cl_int err =
+ clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE,
+ sizeof(max_local_mem_size), &max_local_mem_size, NULL);
+ test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.",
+ -1);
+
+ size_t start, end, incr;
+ size_t global_work_offset[] = { 0, 0, 0 };
+ size_t max_work_items = 0;
+ clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+ sizeof(max_work_items), &max_work_items, NULL);
+
+ // odds
+ start = 1;
+ end = max_work_items;
+ incr = basic_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_odd, start, end, incr,
+ max_local_mem_size, global_work_offset, _1D);
+ test_error_ret(
+ err, "test_work_group_suggested_local_size_1D for odds failed.", -1);
+ log_info("test_work_group_suggested_local_size_1D odds passed\n");
+
+ // evens
+ start = 2;
+ end = max_work_items;
+ incr = basic_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_even, start, end, incr,
+ max_local_mem_size, global_work_offset, _1D);
+ test_error_ret(
+ err, "test_work_group_suggested_local_size_1D for evens failed.", -1);
+ log_info("test_work_group_suggested_local_size_1D evens passed\n");
+
+ // primes
+ start = max_work_items + 1;
+ end = 2 * max_work_items;
+ incr = primes_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_prime, start, end, incr,
+ max_local_mem_size, global_work_offset, _1D);
+ test_error_ret(
+ err, "test_work_group_suggested_local_size_1D for primes failed.", -1);
+ log_info("test_work_group_suggested_local_size_1D primes passed\n");
+
+ global_work_offset[0] = 10;
+ global_work_offset[1] = 10;
+ global_work_offset[2] = 10;
+ // odds
+ start = 1;
+ end = max_work_items;
+ incr = basic_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_odd, start, end, incr,
+ max_local_mem_size, global_work_offset, _1D);
+ test_error_ret(err,
+ "test_work_group_suggested_local_size_1D for odds with "
+ "global_work_offset failed.",
+ -1);
+ log_info("test_work_group_suggested_local_size_1D odds with "
+ "global_work_offset passed\n");
+
+ // evens
+ start = 2;
+ end = max_work_items;
+ incr = basic_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_even, start, end, incr,
+ max_local_mem_size, global_work_offset, _1D);
+ test_error_ret(err,
+ "test_work_group_suggested_local_size_1D for evens with "
+ "global_work_offset failed.",
+ -1);
+ log_info("test_work_group_suggested_local_size_1D evens with "
+ "global_work_offset passed\n");
+
+ // primes
+ start = max_work_items + 1;
+ end = 2 * max_work_items;
+ incr = primes_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_prime, start, end, incr,
+ max_local_mem_size, global_work_offset, _1D);
+ test_error_ret(err,
+ "test_work_group_suggested_local_size_1D for primes with "
+ "global_work_offset failed.",
+ -1);
+ log_info("test_work_group_suggested_local_size_1D primes with "
+ "global_work_offset passed\n");
+
+ return err;
+}
+
+int test_work_group_suggested_local_size_2D(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue, int n_elems)
+{
+ if (!is_extension_available(device, "cl_khr_suggested_local_work_size"))
+ {
+ log_info("Device does not support 'cl_khr_suggested_local_work_size'. "
+ "Skipping the test.\n");
+ return TEST_SKIPPED_ITSELF;
+ }
+ cl_long max_local_mem_size;
+ cl_int err =
+ clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE,
+ sizeof(max_local_mem_size), &max_local_mem_size, NULL);
+ test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.",
+ -1);
+
+ size_t start, end, incr;
+ size_t global_work_offset[] = { 0, 0, 0 };
+ size_t max_work_items = 0;
+ clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+ sizeof(max_work_items), &max_work_items, NULL);
+
+ // odds
+ start = 1;
+ end = max_work_items;
+ incr = basic_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_odd, start, end, incr,
+ max_local_mem_size, global_work_offset, _2D);
+ test_error_ret(
+ err, "test_work_group_suggested_local_size_2D for odds failed.", -1);
+ log_info("test_work_group_suggested_local_size_2D odds passed\n");
+
+ // evens
+ start = 2;
+ end = max_work_items;
+ incr = basic_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_even, start, end, incr,
+ max_local_mem_size, global_work_offset, _2D);
+ test_error_ret(
+ err, "test_work_group_suggested_local_size_2D for evens failed.", -1);
+ log_info("test_work_group_suggested_local_size_2D evens passed\n");
+
+ // primes
+ start = max_work_items + 1;
+ end = max_work_items + max_work_items / 4;
+ incr = primes_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_prime, start, end, incr,
+ max_local_mem_size, global_work_offset, _2D);
+ test_error_ret(
+ err, "test_work_group_suggested_local_size_2D for primes failed.", -1);
+ log_info("test_work_group_suggested_local_size_2D primes passed\n");
+
+ global_work_offset[0] = 10;
+ global_work_offset[1] = 10;
+ global_work_offset[2] = 10;
+
+ // odds
+ start = 1;
+ end = max_work_items;
+ incr = basic_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_odd, start, end, incr,
+ max_local_mem_size, global_work_offset, _2D);
+ test_error_ret(err,
+ "test_work_group_suggested_local_size_2D for odds with "
+ "global_work_offset failed.",
+ -1);
+ log_info("test_work_group_suggested_local_size_2D odds with "
+ "global_work_offset passed\n");
+
+ // evens
+ start = 2;
+ end = max_work_items;
+ incr = basic_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_even, start, end, incr,
+ max_local_mem_size, global_work_offset, _2D);
+ test_error_ret(err,
+ "test_work_group_suggested_local_size_2D for evens with "
+ "global_work_offset failed.",
+ -1);
+ log_info("test_work_group_suggested_local_size_2D evens with "
+ "global_work_offset passed\n");
+
+ // primes
+ start = max_work_items + 1;
+ end = max_work_items + max_work_items / 4;
+ incr = primes_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_prime, start, end, incr,
+ max_local_mem_size, global_work_offset, _2D);
+ test_error_ret(err,
+ "test_work_group_suggested_local_size_2D for primes with "
+ "global_work_offset failed.",
+ -1);
+ log_info("test_work_group_suggested_local_size_2D primes with "
+ "global_work_offset passed\n");
+
+ return err;
+}
+
+int test_work_group_suggested_local_size_3D(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue, int n_elems)
+{
+ if (!is_extension_available(device, "cl_khr_suggested_local_work_size"))
+ {
+ log_info("Device does not support 'cl_khr_suggested_local_work_size'. "
+ "Skipping the test.\n");
+ return TEST_SKIPPED_ITSELF;
+ }
+ cl_long max_local_mem_size;
+ cl_int err =
+ clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE,
+ sizeof(max_local_mem_size), &max_local_mem_size, NULL);
+ test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.",
+ -1);
+
+ size_t start, end, incr;
+ size_t global_work_offset[] = { 0, 0, 0 };
+ size_t max_work_items = 0;
+ clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+ sizeof(max_work_items), &max_work_items, NULL);
+
+ // odds
+ start = 1;
+ end = max_work_items / 2;
+ incr = basic_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_odd, start, end, incr,
+ max_local_mem_size, global_work_offset, _3D);
+ test_error_ret(
+ err, "test_work_group_suggested_local_size_3D for odds failed.", -1);
+ log_info("test_work_group_suggested_local_size_3D odds passed\n");
+
+ // evens
+ start = 2;
+ end = max_work_items / 2;
+ incr = basic_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_even, start, end, incr,
+ max_local_mem_size, global_work_offset, _3D);
+ test_error_ret(
+ err, "test_work_group_suggested_local_size_3D for evens failed.", -1);
+ log_info("test_work_group_suggested_local_size_3D evens passed\n");
+
+ // primes
+ start = max_work_items + 1;
+ end = max_work_items + max_work_items / 4;
+ incr = primes_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_prime, start, end, incr,
+ max_local_mem_size, global_work_offset, _3D);
+ test_error_ret(
+ err, "test_work_group_suggested_local_size_3D for primes failed.", -1);
+ log_info("test_work_group_suggested_local_size_3D primes passed\n");
+
+ global_work_offset[0] = 10;
+ global_work_offset[1] = 10;
+ global_work_offset[2] = 10;
+
+ // odds
+ start = 1;
+ end = max_work_items / 2;
+ incr = basic_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_odd, start, end, incr,
+ max_local_mem_size, global_work_offset, _3D);
+ test_error_ret(err,
+ "test_work_group_suggested_local_size_3D for odds with "
+ "global_work_offset failed.",
+ -1);
+ log_info("test_work_group_suggested_local_size_3D odds with "
+ "global_work_offset passed\n");
+
+ // evens
+ start = 2;
+ end = max_work_items / 2;
+ incr = basic_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_even, start, end, incr,
+ max_local_mem_size, global_work_offset, _3D);
+ test_error_ret(err,
+ "test_work_group_suggested_local_size_3D for evens with "
+ "global_work_offset failed.",
+ -1);
+ log_info("test_work_group_suggested_local_size_3D evens with "
+ "global_work_offset passed\n");
+
+ // primes
+ start = max_work_items + 1;
+ end = max_work_items + max_work_items / 4;
+ incr = primes_increment;
+ err = do_test_work_group_suggested_local_size(
+ device, context, queue, is_not_prime, start, end, incr,
+ max_local_mem_size, global_work_offset, _3D);
+ test_error_ret(err,
+ "test_work_group_suggested_local_size_3D for primes with "
+ "global_work_offset failed.",
+ -1);
+ log_info("test_work_group_suggested_local_size_3D primes with "
+ "global_work_offset passed\n");
+
+ return err;
+}