diff options
author | Sven van Haastregt <sven.vanhaastregt@arm.com> | 2022-09-27 17:32:23 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-27 09:32:23 -0700 |
commit | 9bf6486352bf4c87a49ecb212ae71f96c293c26f (patch) | |
tree | 97793a4fefa8ec0817fba4e6e50f331e3c4fd4db | |
parent | 9b21e9f06b88e7ce96b76b0e94c6dfef644ac1ee (diff) | |
download | OpenCL-CTS-9bf6486352bf4c87a49ecb212ae71f96c293c26f.tar.gz |
[NFC] clang-format test_atomics (#1516)
Add some clang-format off/on comments to keep lists and kernel code
readable.
Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
-rw-r--r-- | test_conformance/atomics/main.cpp | 7 | ||||
-rw-r--r-- | test_conformance/atomics/procs.h | 49 | ||||
-rw-r--r-- | test_conformance/atomics/testBase.h | 5 | ||||
-rw-r--r-- | test_conformance/atomics/test_atomics.cpp | 1255 | ||||
-rw-r--r-- | test_conformance/atomics/test_indexed_cases.cpp | 507 |
5 files changed, 1143 insertions, 680 deletions
diff --git a/test_conformance/atomics/main.cpp b/test_conformance/atomics/main.cpp index afdea376..987d6bfa 100644 --- a/test_conformance/atomics/main.cpp +++ b/test_conformance/atomics/main.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -24,6 +24,7 @@ #include <unistd.h> #endif +// clang-format off test_definition test_list[] = { ADD_TEST( atomic_add ), ADD_TEST( atomic_sub ), @@ -40,11 +41,11 @@ test_definition test_list[] = { ADD_TEST( atomic_add_index ), ADD_TEST( atomic_add_index_bin ), }; +// clang-format on -const int test_num = ARRAY_SIZE( test_list ); +const int test_num = ARRAY_SIZE(test_list); int main(int argc, const char *argv[]) { return runTestHarness(argc, argv, test_num, test_list, false, 0); } - diff --git a/test_conformance/atomics/procs.h b/test_conformance/atomics/procs.h index bf053f25..fa85aad5 100644 --- a/test_conformance/atomics/procs.h +++ b/test_conformance/atomics/procs.h @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -18,22 +18,35 @@ #include "harness/threadTesting.h" #include "harness/typeWrappers.h" -extern int create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret); - -extern int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); - -extern int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); - +extern int create_program_and_kernel(const char *source, + const char *kernel_name, + cl_program *program_ret, + cl_kernel *kernel_ret); +extern int test_atomic_add(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_sub(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_xchg(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_min(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_max(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_inc(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_dec(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_and(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_or(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_xor(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_add_index(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); diff --git a/test_conformance/atomics/testBase.h b/test_conformance/atomics/testBase.h index ba67d140..22bce1d2 100644 --- a/test_conformance/atomics/testBase.h +++ b/test_conformance/atomics/testBase.h @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -26,6 +26,3 @@ #include "procs.h" #endif // _testBase_h - - - diff --git a/test_conformance/atomics/test_atomics.cpp b/test_conformance/atomics/test_atomics.cpp index c0c01363..31d08500 100644 --- a/test_conformance/atomics/test_atomics.cpp +++ b/test_conformance/atomics/test_atomics.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -22,7 +22,7 @@ #define INT_TEST_VALUE 402258822 #define LONG_TEST_VALUE 515154531254381446LL - +// clang-format off const char *atomic_global_pattern[] = { "__kernel void test_atomic_fn(volatile __global %s *destMemory, __global %s *oldValues)\n" "{\n" @@ -36,19 +36,20 @@ const char *atomic_local_pattern[] = { "__kernel void test_atomic_fn(__global %s *finalDest, __global %s *oldValues, volatile __local %s *destMemory, int numDestItems )\n" "{\n" " int tid = get_global_id(0);\n" - " int dstItemIdx;\n" + " int dstItemIdx;\n" "\n" " // Everybody does the following line(s), but it all has the same result. We still need to ensure we sync before the atomic op, though\n" - " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n" + " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n" " destMemory[ dstItemIdx ] = finalDest[ dstItemIdx ];\n" " barrier( CLK_LOCAL_MEM_FENCE );\n" "\n" , " barrier( CLK_LOCAL_MEM_FENCE );\n" " // Finally, write out the last value. Again, we're synced, so everyone will be writing the same value\n" - " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n" + " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n" " finalDest[ dstItemIdx ] = destMemory[ dstItemIdx ];\n" "}\n" }; +// clang-format on #define TEST_COUNT 128 * 1024 @@ -56,41 +57,48 @@ const char *atomic_local_pattern[] = { struct TestFns { - cl_int mIntStartValue; - cl_long mLongStartValue; + cl_int mIntStartValue; + cl_long mLongStartValue; - size_t (*NumResultsFn)( size_t threadSize, ExplicitType dataType ); + size_t (*NumResultsFn)(size_t threadSize, ExplicitType dataType); // Integer versions - cl_int (*ExpectedValueIntFn)( size_t size, cl_int *startRefValues, size_t whichDestValue ); - void (*GenerateRefsIntFn)( size_t size, cl_int *startRefValues, MTdata d ); - bool (*VerifyRefsIntFn)( size_t size, cl_int *refValues, cl_int finalValue ); + cl_int (*ExpectedValueIntFn)(size_t size, cl_int *startRefValues, + size_t whichDestValue); + void (*GenerateRefsIntFn)(size_t size, cl_int *startRefValues, MTdata d); + bool (*VerifyRefsIntFn)(size_t size, cl_int *refValues, cl_int finalValue); // Long versions - cl_long (*ExpectedValueLongFn)( size_t size, cl_long *startRefValues, size_t whichDestValue ); - void (*GenerateRefsLongFn)( size_t size, cl_long *startRefValues, MTdata d ); - bool (*VerifyRefsLongFn)( size_t size, cl_long *refValues, cl_long finalValue ); + cl_long (*ExpectedValueLongFn)(size_t size, cl_long *startRefValues, + size_t whichDestValue); + void (*GenerateRefsLongFn)(size_t size, cl_long *startRefValues, MTdata d); + bool (*VerifyRefsLongFn)(size_t size, cl_long *refValues, + cl_long finalValue); // Float versions - cl_float (*ExpectedValueFloatFn)( size_t size, cl_float *startRefValues, size_t whichDestValue ); - void (*GenerateRefsFloatFn)( size_t size, cl_float *startRefValues, MTdata d ); - bool (*VerifyRefsFloatFn)( size_t size, cl_float *refValues, cl_float finalValue ); + cl_float (*ExpectedValueFloatFn)(size_t size, cl_float *startRefValues, + size_t whichDestValue); + void (*GenerateRefsFloatFn)(size_t size, cl_float *startRefValues, + MTdata d); + bool (*VerifyRefsFloatFn)(size_t size, cl_float *refValues, + cl_float finalValue); }; -bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, ExplicitType dataType ) +bool check_atomic_support(cl_device_id device, bool extended, bool isLocal, + ExplicitType dataType) { + // clang-format off const char *extensionNames[8] = { "cl_khr_global_int32_base_atomics", "cl_khr_global_int32_extended_atomics", "cl_khr_local_int32_base_atomics", "cl_khr_local_int32_extended_atomics", "cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics", "cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics" // this line intended to be the same as the last one }; + // clang-format on size_t index = 0; - if( extended ) - index += 1; - if( isLocal ) - index += 2; + if (extended) index += 1; + if (isLocal) index += 2; Version version = get_device_cl_version(device); @@ -98,26 +106,28 @@ bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, Exp { case kInt: case kUInt: - if( version >= Version(1,1) ) - return 1; + if (version >= Version(1, 1)) return 1; break; case kLong: - case kULong: - index += 4; - break; - case kFloat: // this has to stay separate since the float atomics arent in the 1.0 extensions - return version >= Version(1,1); + case kULong: index += 4; break; + case kFloat: // this has to stay separate since the float atomics arent + // in the 1.0 extensions + return version >= Version(1, 1); default: - log_error( "ERROR: Unsupported data type (%d) in check_atomic_support\n", dataType ); + log_error( + "ERROR: Unsupported data type (%d) in check_atomic_support\n", + dataType); return 0; } - return is_extension_available( device, extensionNames[index] ); + return is_extension_available(device, extensionNames[index]); } -int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore, - TestFns testFns, - bool extended, bool isLocal, ExplicitType dataType, bool matchGroupSize ) +int test_atomic_function(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + const char *programCore, TestFns testFns, + bool extended, bool isLocal, ExplicitType dataType, + bool matchGroupSize) { clProgramWrapper program; clKernelWrapper kernel; @@ -127,55 +137,65 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q void *refValues, *startRefValues; size_t threadSize, groupSize; const char *programLines[4]; - char pragma[ 512 ]; - char programHeader[ 512 ]; + char pragma[512]; + char programHeader[512]; MTdata d; - size_t typeSize = get_explicit_type_size( dataType ); + size_t typeSize = get_explicit_type_size(dataType); // Verify we can run first - bool isUnsigned = ( dataType == kULong ) || ( dataType == kUInt ); - if( !check_atomic_support( deviceID, extended, isLocal, dataType ) ) + bool isUnsigned = (dataType == kULong) || (dataType == kUInt); + if (!check_atomic_support(deviceID, extended, isLocal, dataType)) { - // Only print for the signed (unsigned comes right after, and if signed isn't supported, unsigned isn't either) - if( dataType == kFloat ) - log_info( "\t%s float not supported\n", isLocal ? "Local" : "Global" ); - else if( !isUnsigned ) - log_info( "\t%s %sint%d not supported\n", isLocal ? "Local" : "Global", isUnsigned ? "u" : "", (int)typeSize * 8 ); + // Only print for the signed (unsigned comes right after, and if signed + // isn't supported, unsigned isn't either) + if (dataType == kFloat) + log_info("\t%s float not supported\n", + isLocal ? "Local" : "Global"); + else if (!isUnsigned) + log_info("\t%s %sint%d not supported\n", + isLocal ? "Local" : "Global", isUnsigned ? "u" : "", + (int)typeSize * 8); // Since we don't support the operation, they implicitly pass return 0; } else { - if( dataType == kFloat ) - log_info( "\t%s float%s...", isLocal ? "local" : "global", isLocal ? " " : "" ); + if (dataType == kFloat) + log_info("\t%s float%s...", isLocal ? "local" : "global", + isLocal ? " " : ""); else - log_info( "\t%s %sint%d%s%s...", isLocal ? "local" : "global", isUnsigned ? "u" : "", - (int)typeSize * 8, isUnsigned ? "" : " ", isLocal ? " " : "" ); + log_info("\t%s %sint%d%s%s...", isLocal ? "local" : "global", + isUnsigned ? "u" : "", (int)typeSize * 8, + isUnsigned ? "" : " ", isLocal ? " " : ""); } //// Set up the kernel code // Create the pragma line for this kernel - bool isLong = ( dataType == kLong || dataType == kULong ); - sprintf( pragma, "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n", - isLong ? "" : (isLocal ? "_local" : "_global"), isLong ? "64" : "32", - extended ? "extended" : "base" ); + bool isLong = (dataType == kLong || dataType == kULong); + sprintf(pragma, + "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n", + isLong ? "" : (isLocal ? "_local" : "_global"), + isLong ? "64" : "32", extended ? "extended" : "base"); // Now create the program header - const char *typeName = get_explicit_type_name( dataType ); - if( isLocal ) - sprintf( programHeader, atomic_local_pattern[ 0 ], typeName, typeName, typeName ); + const char *typeName = get_explicit_type_name(dataType); + if (isLocal) + sprintf(programHeader, atomic_local_pattern[0], typeName, typeName, + typeName); else - sprintf( programHeader, atomic_global_pattern[ 0 ], typeName, typeName ); + sprintf(programHeader, atomic_global_pattern[0], typeName, typeName); // Set up our entire program now - programLines[ 0 ] = pragma; - programLines[ 1 ] = programHeader; - programLines[ 2 ] = programCore; - programLines[ 3 ] = ( isLocal ) ? atomic_local_pattern[ 1 ] : atomic_global_pattern[ 1 ]; - - if( create_single_kernel_helper( context, &program, &kernel, 4, programLines, "test_atomic_fn" ) ) + programLines[0] = pragma; + programLines[1] = programHeader; + programLines[2] = programCore; + programLines[3] = + (isLocal) ? atomic_local_pattern[1] : atomic_global_pattern[1]; + + if (create_single_kernel_helper(context, &program, &kernel, 4, programLines, + "test_atomic_fn")) { return -1; } @@ -183,29 +203,37 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q //// Set up to actually run threadSize = num_elements; - error = get_max_common_work_group_size( context, kernel, threadSize, &groupSize ); - test_error( error, "Unable to get thread group max size" ); + error = + get_max_common_work_group_size(context, kernel, threadSize, &groupSize); + test_error(error, "Unable to get thread group max size"); - if( matchGroupSize ) + if (matchGroupSize) // HACK because xchg and cmpxchg apparently are limited by hardware threadSize = groupSize; - if( isLocal ) + if (isLocal) { - size_t maxSizes[3] = {0, 0, 0}; - error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3*sizeof(size_t), maxSizes, 0); - test_error( error, "Unable to obtain max work item sizes for the device" ); + size_t maxSizes[3] = { 0, 0, 0 }; + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, + 3 * sizeof(size_t), maxSizes, 0); + test_error(error, + "Unable to obtain max work item sizes for the device"); size_t workSize; - error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL ); - test_error( error, "Unable to obtain max work group size for device and kernel combo" ); + error = clGetKernelWorkGroupInfo(kernel, deviceID, + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(workSize), &workSize, NULL); + test_error( + error, + "Unable to obtain max work group size for device and kernel combo"); // Limit workSize to avoid extremely large local buffer size and slow // run. if (workSize > 65536) workSize = 65536; - // "workSize" is limited to that of the first dimension as only a 1DRange is executed. - if( maxSizes[0] < workSize ) + // "workSize" is limited to that of the first dimension as only a + // 1DRange is executed. + if (maxSizes[0] < workSize) { workSize = maxSizes[0]; } @@ -214,38 +242,43 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q } - log_info( "\t(thread count %d, group size %d)\n", (int)threadSize, (int)groupSize ); + log_info("\t(thread count %d, group size %d)\n", (int)threadSize, + (int)groupSize); - refValues = (cl_int *)malloc( typeSize * threadSize ); + refValues = (cl_int *)malloc(typeSize * threadSize); - if( testFns.GenerateRefsIntFn != NULL ) + if (testFns.GenerateRefsIntFn != NULL) { // We have a ref generator provided - d = init_genrand( gRandomSeed ); - startRefValues = malloc( typeSize * threadSize ); - if( typeSize == 4 ) - testFns.GenerateRefsIntFn( threadSize, (cl_int *)startRefValues, d ); + d = init_genrand(gRandomSeed); + startRefValues = malloc(typeSize * threadSize); + if (typeSize == 4) + testFns.GenerateRefsIntFn(threadSize, (cl_int *)startRefValues, d); else - testFns.GenerateRefsLongFn( threadSize, (cl_long *)startRefValues, d ); + testFns.GenerateRefsLongFn(threadSize, (cl_long *)startRefValues, + d); free_mtdata(d); d = NULL; } else startRefValues = NULL; - // If we're given a num_results function, we need to determine how many result objects we need. If - // we don't have it, we assume it's just 1 - size_t numDestItems = ( testFns.NumResultsFn != NULL ) ? testFns.NumResultsFn( threadSize, dataType ) : 1; + // If we're given a num_results function, we need to determine how many + // result objects we need. If we don't have it, we assume it's just 1 + size_t numDestItems = (testFns.NumResultsFn != NULL) + ? testFns.NumResultsFn(threadSize, dataType) + : 1; - char * destItems = new char[ typeSize * numDestItems ]; - if( destItems == NULL ) + char *destItems = new char[typeSize * numDestItems]; + if (destItems == NULL) { - log_error( "ERROR: Unable to allocate memory!\n" ); + log_error("ERROR: Unable to allocate memory!\n"); return -1; } - void * startValue = ( typeSize == 4 ) ? (void *)&testFns.mIntStartValue : (void *)&testFns.mLongStartValue; - for( size_t i = 0; i < numDestItems; i++ ) - memcpy( destItems + i * typeSize, startValue, typeSize ); + void *startValue = (typeSize == 4) ? (void *)&testFns.mIntStartValue + : (void *)&testFns.mLongStartValue; + for (size_t i = 0; i < numDestItems; i++) + memcpy(destItems + i * typeSize, startValue, typeSize); streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, typeSize * numDestItems, destItems, NULL); @@ -265,82 +298,96 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q } /* Set the arguments */ - error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] ); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] ); - test_error( error, "Unable to set indexed kernel arguments" ); + error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set indexed kernel arguments"); - if( isLocal ) + if (isLocal) { - error = clSetKernelArg( kernel, 2, typeSize * numDestItems, NULL ); - test_error( error, "Unable to set indexed local kernel argument" ); + error = clSetKernelArg(kernel, 2, typeSize * numDestItems, NULL); + test_error(error, "Unable to set indexed local kernel argument"); cl_int numDestItemsInt = (cl_int)numDestItems; - error = clSetKernelArg( kernel, 3, sizeof( cl_int ), &numDestItemsInt ); - test_error( error, "Unable to set indexed kernel argument" ); + error = clSetKernelArg(kernel, 3, sizeof(cl_int), &numDestItemsInt); + test_error(error, "Unable to set indexed kernel argument"); } /* Run the kernel */ threads[0] = threadSize; - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &groupSize, 0, NULL, NULL ); - test_error( error, "Unable to execute test kernel" ); - - error = clEnqueueReadBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL ); - test_error( error, "Unable to read result value!" ); - - error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize * threadSize, refValues, 0, NULL, NULL ); - test_error( error, "Unable to read reference values!" ); - - // If we have an expectedFn, then we need to generate a final value to compare against. If we don't - // have one, it's because we're comparing ref values only - if( testFns.ExpectedValueIntFn != NULL ) + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, &groupSize, + 0, NULL, NULL); + test_error(error, "Unable to execute test kernel"); + + error = + clEnqueueReadBuffer(queue, streams[0], true, 0, typeSize * numDestItems, + destItems, 0, NULL, NULL); + test_error(error, "Unable to read result value!"); + + error = + clEnqueueReadBuffer(queue, streams[1], true, 0, typeSize * threadSize, + refValues, 0, NULL, NULL); + test_error(error, "Unable to read reference values!"); + + // If we have an expectedFn, then we need to generate a final value to + // compare against. If we don't have one, it's because we're comparing ref + // values only + if (testFns.ExpectedValueIntFn != NULL) { - for( size_t i = 0; i < numDestItems; i++ ) + for (size_t i = 0; i < numDestItems; i++) { - char expected[ 8 ]; + char expected[8]; cl_int intVal; cl_long longVal; - if( typeSize == 4 ) + if (typeSize == 4) { // Int version - intVal = testFns.ExpectedValueIntFn( threadSize, (cl_int *)startRefValues, i ); - memcpy( expected, &intVal, sizeof( intVal ) ); + intVal = testFns.ExpectedValueIntFn( + threadSize, (cl_int *)startRefValues, i); + memcpy(expected, &intVal, sizeof(intVal)); } else { // Long version - longVal = testFns.ExpectedValueLongFn( threadSize, (cl_long *)startRefValues, i ); - memcpy( expected, &longVal, sizeof( longVal ) ); + longVal = testFns.ExpectedValueLongFn( + threadSize, (cl_long *)startRefValues, i); + memcpy(expected, &longVal, sizeof(longVal)); } - if( memcmp( expected, destItems + i * typeSize, typeSize ) != 0 ) + if (memcmp(expected, destItems + i * typeSize, typeSize) != 0) { - if( typeSize == 4 ) + if (typeSize == 4) { - cl_int *outValue = (cl_int *)( destItems + i * typeSize ); - log_error( "ERROR: Result %ld from kernel does not validate! (should be %d, was %d)\n", i, intVal, *outValue ); + cl_int *outValue = (cl_int *)(destItems + i * typeSize); + log_error("ERROR: Result %ld from kernel does not " + "validate! (should be %d, was %d)\n", + i, intVal, *outValue); cl_int *startRefs = (cl_int *)startRefValues; cl_int *refs = (cl_int *)refValues; - for( i = 0; i < threadSize; i++ ) + for (i = 0; i < threadSize; i++) { - if( startRefs != NULL ) - log_info( " --- %ld - %d --- %d\n", i, startRefs[i], refs[i] ); + if (startRefs != NULL) + log_info(" --- %ld - %d --- %d\n", i, startRefs[i], + refs[i]); else - log_info( " --- %ld --- %d\n", i, refs[i] ); + log_info(" --- %ld --- %d\n", i, refs[i]); } } else { - cl_long *outValue = (cl_long *)( destItems + i * typeSize ); - log_error( "ERROR: Result %ld from kernel does not validate! (should be %lld, was %lld)\n", i, longVal, *outValue ); + cl_long *outValue = (cl_long *)(destItems + i * typeSize); + log_error("ERROR: Result %ld from kernel does not " + "validate! (should be %lld, was %lld)\n", + i, longVal, *outValue); cl_long *startRefs = (cl_long *)startRefValues; cl_long *refs = (cl_long *)refValues; - for( i = 0; i < threadSize; i++ ) + for (i = 0; i < threadSize; i++) { - if( startRefs != NULL ) - log_info( " --- %ld - %lld --- %lld\n", i, startRefs[i], refs[i] ); + if (startRefs != NULL) + log_info(" --- %ld - %lld --- %lld\n", i, + startRefs[i], refs[i]); else - log_info( " --- %ld --- %lld\n", i, refs[i] ); + log_info(" --- %ld --- %lld\n", i, refs[i]); } } return -1; @@ -348,104 +395,140 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q } } - if( testFns.VerifyRefsIntFn != NULL ) + if (testFns.VerifyRefsIntFn != NULL) { /* Use the verify function to also check the results */ - if( dataType == kFloat ) + if (dataType == kFloat) { cl_float *outValue = (cl_float *)destItems; - if( !testFns.VerifyRefsFloatFn( threadSize, (cl_float *)refValues, *outValue ) != 0 ) + if (!testFns.VerifyRefsFloatFn(threadSize, (cl_float *)refValues, + *outValue) + != 0) { - log_error( "ERROR: Reference values did not validate!\n" ); + log_error("ERROR: Reference values did not validate!\n"); return -1; } } - else if( typeSize == 4 ) + else if (typeSize == 4) { cl_int *outValue = (cl_int *)destItems; - if( !testFns.VerifyRefsIntFn( threadSize, (cl_int *)refValues, *outValue ) != 0 ) + if (!testFns.VerifyRefsIntFn(threadSize, (cl_int *)refValues, + *outValue) + != 0) { - log_error( "ERROR: Reference values did not validate!\n" ); + log_error("ERROR: Reference values did not validate!\n"); return -1; } } else { cl_long *outValue = (cl_long *)destItems; - if( !testFns.VerifyRefsLongFn( threadSize, (cl_long *)refValues, *outValue ) != 0 ) + if (!testFns.VerifyRefsLongFn(threadSize, (cl_long *)refValues, + *outValue) + != 0) { - log_error( "ERROR: Reference values did not validate!\n" ); + log_error("ERROR: Reference values did not validate!\n"); return -1; } } } - else if( testFns.ExpectedValueIntFn == NULL ) + else if (testFns.ExpectedValueIntFn == NULL) { - log_error( "ERROR: Test doesn't check total or refs; no values are verified!\n" ); + log_error("ERROR: Test doesn't check total or refs; no values are " + "verified!\n"); return -1; } /* Re-write the starting value */ - for( size_t i = 0; i < numDestItems; i++ ) - memcpy( destItems + i * typeSize, startValue, typeSize ); - error = clEnqueueWriteBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL ); - test_error( error, "Unable to write starting values!" ); - - /* Run the kernel once for a single thread, so we can verify that the returned value is the original one */ + for (size_t i = 0; i < numDestItems; i++) + memcpy(destItems + i * typeSize, startValue, typeSize); + error = + clEnqueueWriteBuffer(queue, streams[0], true, 0, + typeSize * numDestItems, destItems, 0, NULL, NULL); + test_error(error, "Unable to write starting values!"); + + /* Run the kernel once for a single thread, so we can verify that the + * returned value is the original one */ threads[0] = 1; - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, threads, 0, NULL, NULL ); - test_error( error, "Unable to execute test kernel" ); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, threads, 0, + NULL, NULL); + test_error(error, "Unable to execute test kernel"); - error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize, refValues, 0, NULL, NULL ); - test_error( error, "Unable to read reference values!" ); + error = clEnqueueReadBuffer(queue, streams[1], true, 0, typeSize, refValues, + 0, NULL, NULL); + test_error(error, "Unable to read reference values!"); - if( memcmp( refValues, destItems, typeSize ) != 0 ) + if (memcmp(refValues, destItems, typeSize) != 0) { - if( typeSize == 4 ) + if (typeSize == 4) { cl_int *s = (cl_int *)destItems; cl_int *r = (cl_int *)refValues; - log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value " - " (should have been %d, returned %d)!\n", *s, *r ); + log_error("ERROR: atomic function operated correctly but did NOT " + "return correct 'old' value " + " (should have been %d, returned %d)!\n", + *s, *r); } else { cl_long *s = (cl_long *)destItems; cl_long *r = (cl_long *)refValues; - log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value " - " (should have been %lld, returned %lld)!\n", *s, *r ); + log_error("ERROR: atomic function operated correctly but did NOT " + "return correct 'old' value " + " (should have been %lld, returned %lld)!\n", + *s, *r); } return -1; } - delete [] destItems; - free( refValues ); - if( startRefValues != NULL ) - free( startRefValues ); + delete[] destItems; + free(refValues); + if (startRefValues != NULL) free(startRefValues); return 0; } -int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore, - TestFns testFns, - bool extended, bool matchGroupSize, bool usingAtomicPrefix ) +int test_atomic_function_set(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + const char *programCore, TestFns testFns, + bool extended, bool matchGroupSize, + bool usingAtomicPrefix) { - log_info(" Testing %s functions...\n", usingAtomicPrefix ? "atomic_" : "atom_"); + log_info(" Testing %s functions...\n", + usingAtomicPrefix ? "atomic_" : "atom_"); int errors = 0; - errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kInt, matchGroupSize ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kUInt, matchGroupSize ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kInt, matchGroupSize ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kUInt, matchGroupSize ); - - // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64 bit functions still use the "atom" prefix. - // The argument usingAtomicPrefix is set to true if programCore was generated with the "atomic" prefix. - if (!usingAtomicPrefix) { - errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kLong, matchGroupSize ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kULong, matchGroupSize ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kLong, matchGroupSize ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kULong, matchGroupSize ); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + programCore, testFns, extended, false, kInt, + matchGroupSize); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + programCore, testFns, extended, false, kUInt, + matchGroupSize); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + programCore, testFns, extended, true, kInt, + matchGroupSize); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + programCore, testFns, extended, true, kUInt, + matchGroupSize); + + // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64 + // bit functions still use the "atom" prefix. The argument usingAtomicPrefix + // is set to true if programCore was generated with the "atomic" prefix. + if (!usingAtomicPrefix) + { + errors |= test_atomic_function(deviceID, context, queue, num_elements, + programCore, testFns, extended, false, + kLong, matchGroupSize); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + programCore, testFns, extended, false, + kULong, matchGroupSize); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + programCore, testFns, extended, true, + kLong, matchGroupSize); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + programCore, testFns, extended, true, + kULong, matchGroupSize); } return errors; @@ -454,265 +537,345 @@ int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_comma #pragma mark ---- add const char atom_add_core[] = -" oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n" -" atom_add( &destMemory[0], tid + 3 );\n" -" atom_add( &destMemory[0], tid + 3 );\n" -" atom_add( &destMemory[0], tid + 3 );\n"; + " oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n" + " atom_add( &destMemory[0], tid + 3 );\n" + " atom_add( &destMemory[0], tid + 3 );\n" + " atom_add( &destMemory[0], tid + 3 );\n"; const char atomic_add_core[] = -" oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n" -" atomic_add( &destMemory[0], tid + 3 );\n" -" atomic_add( &destMemory[0], tid + 3 );\n" -" atomic_add( &destMemory[0], tid + 3 );\n"; + " oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n" + " atomic_add( &destMemory[0], tid + 3 );\n" + " atomic_add( &destMemory[0], tid + 3 );\n" + " atomic_add( &destMemory[0], tid + 3 );\n"; -cl_int test_atomic_add_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue ) +cl_int test_atomic_add_result_int(size_t size, cl_int *startRefValues, + size_t whichDestValue) { cl_int total = 0; - for( size_t i = 0; i < size; i++ ) - total += ( (cl_int)i + 3 ) * 4; + for (size_t i = 0; i < size; i++) total += ((cl_int)i + 3) * 4; return total; } -cl_long test_atomic_add_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue ) +cl_long test_atomic_add_result_long(size_t size, cl_long *startRefValues, + size_t whichDestValue) { cl_long total = 0; - for( size_t i = 0; i < size; i++ ) - total += ( ( i + 3 ) * 4 ); + for (size_t i = 0; i < size; i++) total += ((i + 3) * 4); return total; } -int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_add(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { 0, 0LL, NULL, test_atomic_add_result_int, NULL, NULL, test_atomic_add_result_long, NULL, NULL }; - - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + TestFns set = { 0, + 0LL, + NULL, + test_atomic_add_result_int, + NULL, + NULL, + test_atomic_add_result_long, + NULL, + NULL }; + + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_add_core, set, false, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) + return -1; + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_add_core, set, false, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) - return -1; return 0; } #pragma mark ---- sub -const char atom_sub_core[] = " oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n"; +const char atom_sub_core[] = + " oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n"; -const char atomic_sub_core[] = " oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n"; +const char atomic_sub_core[] = + " oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n"; -cl_int test_atomic_sub_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue ) +cl_int test_atomic_sub_result_int(size_t size, cl_int *startRefValues, + size_t whichDestValue) { cl_int total = INT_TEST_VALUE; - for( size_t i = 0; i < size; i++ ) - total -= (cl_int)i + 3; + for (size_t i = 0; i < size; i++) total -= (cl_int)i + 3; return total; } -cl_long test_atomic_sub_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue ) +cl_long test_atomic_sub_result_long(size_t size, cl_long *startRefValues, + size_t whichDestValue) { cl_long total = LONG_TEST_VALUE; - for( size_t i = 0; i < size; i++ ) - total -= i + 3; + for (size_t i = 0; i < size; i++) total -= i + 3; return total; } -int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_sub(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_sub_result_int, NULL, NULL, test_atomic_sub_result_long, NULL, NULL }; - - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + TestFns set = { INT_TEST_VALUE, + LONG_TEST_VALUE, + NULL, + test_atomic_sub_result_int, + NULL, + NULL, + test_atomic_sub_result_long, + NULL, + NULL }; + + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_sub_core, set, false, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_sub_core, set, false, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; return 0; } #pragma mark ---- xchg -const char atom_xchg_core[] = " oldValues[tid] = atom_xchg( &destMemory[0], tid );\n"; +const char atom_xchg_core[] = + " oldValues[tid] = atom_xchg( &destMemory[0], tid );\n"; -const char atomic_xchg_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n"; -const char atomic_xchg_float_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n"; +const char atomic_xchg_core[] = + " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n"; +const char atomic_xchg_float_core[] = + " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n"; -bool test_atomic_xchg_verify_int( size_t size, cl_int *refValues, cl_int finalValue ) +bool test_atomic_xchg_verify_int(size_t size, cl_int *refValues, + cl_int finalValue) { - /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */ + /* For xchg, each value from 0 to size - 1 should have an entry in the ref + * array, and ONLY one entry */ char *valids; size_t i; char originalValidCount = 0; - valids = (char *)malloc( sizeof( char ) * size ); - memset( valids, 0, sizeof( char ) * size ); + valids = (char *)malloc(sizeof(char) * size); + memset(valids, 0, sizeof(char) * size); - for( i = 0; i < size; i++ ) + for (i = 0; i < size; i++) { - if( refValues[ i ] == INT_TEST_VALUE ) + if (refValues[i] == INT_TEST_VALUE) { // Special initial value originalValidCount++; continue; } - if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size ) + if (refValues[i] < 0 || (size_t)refValues[i] >= size) { - log_error( "ERROR: Reference value %ld outside of valid range! (%d)\n", i, refValues[ i ] ); + log_error( + "ERROR: Reference value %ld outside of valid range! (%d)\n", i, + refValues[i]); return false; } - valids[ refValues[ i ] ] ++; + valids[refValues[i]]++; } - /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be - the final value outputted */ - if( valids[ finalValue ] > 0 ) + /* Note: ONE entry will have zero count. It'll be the last one that + executed, because that value should be the final value outputted */ + if (valids[finalValue] > 0) { - log_error( "ERROR: Final value %d was also in ref list!\n", finalValue ); + log_error("ERROR: Final value %d was also in ref list!\n", finalValue); return false; } else - valids[ finalValue ] = 1; // So the following loop will be okay + valids[finalValue] = 1; // So the following loop will be okay /* Now check that every entry has one and only one count */ - if( originalValidCount != 1 ) + if (originalValidCount != 1) { - log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount ); + log_error("ERROR: Starting reference value %d did not occur " + "once-and-only-once (occurred %d)\n", + 65191, originalValidCount); return false; } - for( i = 0; i < size; i++ ) + for (i = 0; i < size; i++) { - if( valids[ i ] != 1 ) + if (valids[i] != 1) { - log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] ); - for( size_t j = 0; j < size; j++ ) - log_info( "%d: %d\n", (int)j, (int)valids[ j ] ); + log_error("ERROR: Reference value %ld did not occur " + "once-and-only-once (occurred %d)\n", + i, valids[i]); + for (size_t j = 0; j < size; j++) + log_info("%d: %d\n", (int)j, (int)valids[j]); return false; } } - free( valids ); + free(valids); return true; } -bool test_atomic_xchg_verify_long( size_t size, cl_long *refValues, cl_long finalValue ) +bool test_atomic_xchg_verify_long(size_t size, cl_long *refValues, + cl_long finalValue) { - /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */ + /* For xchg, each value from 0 to size - 1 should have an entry in the ref + * array, and ONLY one entry */ char *valids; size_t i; char originalValidCount = 0; - valids = (char *)malloc( sizeof( char ) * size ); - memset( valids, 0, sizeof( char ) * size ); + valids = (char *)malloc(sizeof(char) * size); + memset(valids, 0, sizeof(char) * size); - for( i = 0; i < size; i++ ) + for (i = 0; i < size; i++) { - if( refValues[ i ] == LONG_TEST_VALUE ) + if (refValues[i] == LONG_TEST_VALUE) { // Special initial value originalValidCount++; continue; } - if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size ) + if (refValues[i] < 0 || (size_t)refValues[i] >= size) { - log_error( "ERROR: Reference value %ld outside of valid range! (%lld)\n", i, refValues[ i ] ); + log_error( + "ERROR: Reference value %ld outside of valid range! (%lld)\n", + i, refValues[i]); return false; } - valids[ refValues[ i ] ] ++; + valids[refValues[i]]++; } - /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be - the final value outputted */ - if( valids[ finalValue ] > 0 ) + /* Note: ONE entry will have zero count. It'll be the last one that + executed, because that value should be the final value outputted */ + if (valids[finalValue] > 0) { - log_error( "ERROR: Final value %lld was also in ref list!\n", finalValue ); + log_error("ERROR: Final value %lld was also in ref list!\n", + finalValue); return false; } else - valids[ finalValue ] = 1; // So the following loop will be okay + valids[finalValue] = 1; // So the following loop will be okay /* Now check that every entry has one and only one count */ - if( originalValidCount != 1 ) + if (originalValidCount != 1) { - log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount ); + log_error("ERROR: Starting reference value %d did not occur " + "once-and-only-once (occurred %d)\n", + 65191, originalValidCount); return false; } - for( i = 0; i < size; i++ ) + for (i = 0; i < size; i++) { - if( valids[ i ] != 1 ) + if (valids[i] != 1) { - log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] ); - for( size_t j = 0; j < size; j++ ) - log_info( "%d: %d\n", (int)j, (int)valids[ j ] ); + log_error("ERROR: Reference value %ld did not occur " + "once-and-only-once (occurred %d)\n", + i, valids[i]); + for (size_t j = 0; j < size; j++) + log_info("%d: %d\n", (int)j, (int)valids[j]); return false; } } - free( valids ); + free(valids); return true; } -bool test_atomic_xchg_verify_float( size_t size, cl_float *refValues, cl_float finalValue ) +bool test_atomic_xchg_verify_float(size_t size, cl_float *refValues, + cl_float finalValue) { - /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */ + /* For xchg, each value from 0 to size - 1 should have an entry in the ref + * array, and ONLY one entry */ char *valids; size_t i; char originalValidCount = 0; - valids = (char *)malloc( sizeof( char ) * size ); - memset( valids, 0, sizeof( char ) * size ); + valids = (char *)malloc(sizeof(char) * size); + memset(valids, 0, sizeof(char) * size); - for( i = 0; i < size; i++ ) + for (i = 0; i < size; i++) { - cl_int *intRefValue = (cl_int *)( &refValues[ i ] ); - if( *intRefValue == INT_TEST_VALUE ) + cl_int *intRefValue = (cl_int *)(&refValues[i]); + if (*intRefValue == INT_TEST_VALUE) { // Special initial value originalValidCount++; continue; } - if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size ) + if (refValues[i] < 0 || (size_t)refValues[i] >= size) { - log_error( "ERROR: Reference value %ld outside of valid range! (%a)\n", i, refValues[ i ] ); + log_error( + "ERROR: Reference value %ld outside of valid range! (%a)\n", i, + refValues[i]); return false; } - valids[ (int)refValues[ i ] ] ++; + valids[(int)refValues[i]]++; } - /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be - the final value outputted */ - if( valids[ (int)finalValue ] > 0 ) + /* Note: ONE entry will have zero count. It'll be the last one that + executed, because that value should be the final value outputted */ + if (valids[(int)finalValue] > 0) { - log_error( "ERROR: Final value %a was also in ref list!\n", finalValue ); + log_error("ERROR: Final value %a was also in ref list!\n", finalValue); return false; } else - valids[ (int)finalValue ] = 1; // So the following loop will be okay + valids[(int)finalValue] = 1; // So the following loop will be okay /* Now check that every entry has one and only one count */ - if( originalValidCount != 1 ) + if (originalValidCount != 1) { - log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount ); + log_error("ERROR: Starting reference value %d did not occur " + "once-and-only-once (occurred %d)\n", + 65191, originalValidCount); return false; } - for( i = 0; i < size; i++ ) + for (i = 0; i < size; i++) { - if( valids[ i ] != 1 ) + if (valids[i] != 1) { - log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] ); - for( size_t j = 0; j < size; j++ ) - log_info( "%d: %d\n", (int)j, (int)valids[ j ] ); + log_error("ERROR: Reference value %ld did not occur " + "once-and-only-once (occurred %d)\n", + i, valids[i]); + for (size_t j = 0; j < size; j++) + log_info("%d: %d\n", (int)j, (int)valids[j]); return false; } } - free( valids ); + free(valids); return true; } -int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_xchg(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, NULL, NULL, test_atomic_xchg_verify_int, NULL, NULL, test_atomic_xchg_verify_long, NULL, NULL, test_atomic_xchg_verify_float }; - - int errors = test_atomic_function_set( deviceID, context, queue, num_elements, atom_xchg_core, set, false, true, /*usingAtomicPrefix*/ false ); - errors |= test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xchg_core, set, false, true, /*usingAtomicPrefix*/ true ); - - errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, false, kFloat, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, true, kFloat, true ); + TestFns set = { INT_TEST_VALUE, + LONG_TEST_VALUE, + NULL, + NULL, + NULL, + test_atomic_xchg_verify_int, + NULL, + NULL, + test_atomic_xchg_verify_long, + NULL, + NULL, + test_atomic_xchg_verify_float }; + + int errors = test_atomic_function_set( + deviceID, context, queue, num_elements, atom_xchg_core, set, false, + true, /*usingAtomicPrefix*/ false); + errors |= test_atomic_function_set(deviceID, context, queue, num_elements, + atomic_xchg_core, set, false, true, + /*usingAtomicPrefix*/ true); + + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atomic_xchg_float_core, set, false, false, + kFloat, true); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atomic_xchg_float_core, set, false, true, + kFloat, true); return errors; } @@ -720,51 +883,71 @@ int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue #pragma mark ---- min -const char atom_min_core[] = " oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n"; +const char atom_min_core[] = + " oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n"; -const char atomic_min_core[] = " oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n"; +const char atomic_min_core[] = + " oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n"; -cl_int test_atomic_min_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue ) +cl_int test_atomic_min_result_int(size_t size, cl_int *startRefValues, + size_t whichDestValue) { cl_int total = 0x7fffffffL; - for( size_t i = 0; i < size; i++ ) + for (size_t i = 0; i < size; i++) { - if( startRefValues[ i ] < total ) - total = startRefValues[ i ]; + if (startRefValues[i] < total) total = startRefValues[i]; } return total; } -void test_atomic_min_gen_int( size_t size, cl_int *startRefValues, MTdata d ) +void test_atomic_min_gen_int(size_t size, cl_int *startRefValues, MTdata d) { - for( size_t i = 0; i < size; i++ ) - startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff; + for (size_t i = 0; i < size; i++) + startRefValues[i] = + (cl_int)(genrand_int32(d) % 0x3fffffff) + 0x3fffffff; } -cl_long test_atomic_min_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue ) +cl_long test_atomic_min_result_long(size_t size, cl_long *startRefValues, + size_t whichDestValue) { cl_long total = 0x7fffffffffffffffLL; - for( size_t i = 0; i < size; i++ ) + for (size_t i = 0; i < size; i++) { - if( startRefValues[ i ] < total ) - total = startRefValues[ i ]; + if (startRefValues[i] < total) total = startRefValues[i]; } return total; } -void test_atomic_min_gen_long( size_t size, cl_long *startRefValues, MTdata d ) +void test_atomic_min_gen_long(size_t size, cl_long *startRefValues, MTdata d) { - for( size_t i = 0; i < size; i++ ) - startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) ); + for (size_t i = 0; i < size; i++) + startRefValues[i] = + (cl_long)(genrand_int32(d) + | (((cl_long)genrand_int32(d) & 0x7fffffffL) << 16)); } -int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_min(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { 0x7fffffffL, 0x7fffffffffffffffLL, NULL, test_atomic_min_result_int, test_atomic_min_gen_int, NULL, test_atomic_min_result_long, test_atomic_min_gen_long, NULL }; - - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + TestFns set = { 0x7fffffffL, + 0x7fffffffffffffffLL, + NULL, + test_atomic_min_result_int, + test_atomic_min_gen_int, + NULL, + test_atomic_min_result_long, + test_atomic_min_gen_long, + NULL }; + + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_min_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_min_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; return 0; } @@ -772,79 +955,118 @@ int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue #pragma mark ---- max -const char atom_max_core[] = " oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n"; +const char atom_max_core[] = + " oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n"; -const char atomic_max_core[] = " oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n"; +const char atomic_max_core[] = + " oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n"; -cl_int test_atomic_max_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue ) +cl_int test_atomic_max_result_int(size_t size, cl_int *startRefValues, + size_t whichDestValue) { cl_int total = 0; - for( size_t i = 0; i < size; i++ ) + for (size_t i = 0; i < size; i++) { - if( startRefValues[ i ] > total ) - total = startRefValues[ i ]; + if (startRefValues[i] > total) total = startRefValues[i]; } return total; } -void test_atomic_max_gen_int( size_t size, cl_int *startRefValues, MTdata d ) +void test_atomic_max_gen_int(size_t size, cl_int *startRefValues, MTdata d) { - for( size_t i = 0; i < size; i++ ) - startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff; + for (size_t i = 0; i < size; i++) + startRefValues[i] = + (cl_int)(genrand_int32(d) % 0x3fffffff) + 0x3fffffff; } -cl_long test_atomic_max_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue ) +cl_long test_atomic_max_result_long(size_t size, cl_long *startRefValues, + size_t whichDestValue) { cl_long total = 0; - for( size_t i = 0; i < size; i++ ) + for (size_t i = 0; i < size; i++) { - if( startRefValues[ i ] > total ) - total = startRefValues[ i ]; + if (startRefValues[i] > total) total = startRefValues[i]; } return total; } -void test_atomic_max_gen_long( size_t size, cl_long *startRefValues, MTdata d ) +void test_atomic_max_gen_long(size_t size, cl_long *startRefValues, MTdata d) { - for( size_t i = 0; i < size; i++ ) - startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) ); + for (size_t i = 0; i < size; i++) + startRefValues[i] = + (cl_long)(genrand_int32(d) + | (((cl_long)genrand_int32(d) & 0x7fffffffL) << 16)); } -int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_max(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { 0, 0, NULL, test_atomic_max_result_int, test_atomic_max_gen_int, NULL, test_atomic_max_result_long, test_atomic_max_gen_long, NULL }; - - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + TestFns set = { 0, + 0, + NULL, + test_atomic_max_result_int, + test_atomic_max_gen_int, + NULL, + test_atomic_max_result_long, + test_atomic_max_gen_long, + NULL }; + + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_max_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) + return -1; + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_max_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) - return -1; return 0; } #pragma mark ---- inc -const char atom_inc_core[] = " oldValues[tid] = atom_inc( &destMemory[0] );\n"; +const char atom_inc_core[] = + " oldValues[tid] = atom_inc( &destMemory[0] );\n"; -const char atomic_inc_core[] = " oldValues[tid] = atomic_inc( &destMemory[0] );\n"; +const char atomic_inc_core[] = + " oldValues[tid] = atomic_inc( &destMemory[0] );\n"; -cl_int test_atomic_inc_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue ) +cl_int test_atomic_inc_result_int(size_t size, cl_int *startRefValues, + size_t whichDestValue) { return INT_TEST_VALUE + (cl_int)size; } -cl_long test_atomic_inc_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue ) +cl_long test_atomic_inc_result_long(size_t size, cl_long *startRefValues, + size_t whichDestValue) { return LONG_TEST_VALUE + size; } -int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_inc(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_inc_result_int, NULL, NULL, test_atomic_inc_result_long, NULL, NULL }; - - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + TestFns set = { INT_TEST_VALUE, + LONG_TEST_VALUE, + NULL, + test_atomic_inc_result_int, + NULL, + NULL, + test_atomic_inc_result_long, + NULL, + NULL }; + + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_inc_core, set, false, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_inc_core, set, false, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; return 0; } @@ -852,27 +1074,46 @@ int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue #pragma mark ---- dec -const char atom_dec_core[] = " oldValues[tid] = atom_dec( &destMemory[0] );\n"; +const char atom_dec_core[] = + " oldValues[tid] = atom_dec( &destMemory[0] );\n"; -const char atomic_dec_core[] = " oldValues[tid] = atomic_dec( &destMemory[0] );\n"; +const char atomic_dec_core[] = + " oldValues[tid] = atomic_dec( &destMemory[0] );\n"; -cl_int test_atomic_dec_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue ) +cl_int test_atomic_dec_result_int(size_t size, cl_int *startRefValues, + size_t whichDestValue) { return INT_TEST_VALUE - (cl_int)size; } -cl_long test_atomic_dec_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue ) +cl_long test_atomic_dec_result_long(size_t size, cl_long *startRefValues, + size_t whichDestValue) { return LONG_TEST_VALUE - size; } -int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_dec(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_dec_result_int, NULL, NULL, test_atomic_dec_result_long, NULL, NULL }; - - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + TestFns set = { INT_TEST_VALUE, + LONG_TEST_VALUE, + NULL, + test_atomic_dec_result_int, + NULL, + NULL, + test_atomic_dec_result_long, + NULL, + NULL }; + + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_dec_core, set, false, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_dec_core, set, false, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; return 0; } @@ -881,129 +1122,159 @@ int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue #pragma mark ---- cmpxchg /* We test cmpxchg by implementing (the long way) atom_add */ +// clang-format off const char atom_cmpxchg_core[] = -" int oldValue, origValue, newValue;\n" -" do { \n" -" origValue = destMemory[0];\n" -" newValue = origValue + tid + 2;\n" -" oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n" -" } while( oldValue != origValue );\n" -" oldValues[tid] = oldValue;\n" -; + " int oldValue, origValue, newValue;\n" + " do { \n" + " origValue = destMemory[0];\n" + " newValue = origValue + tid + 2;\n" + " oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n" + " } while( oldValue != origValue );\n" + " oldValues[tid] = oldValue;\n"; const char atom_cmpxchg64_core[] = -" long oldValue, origValue, newValue;\n" -" do { \n" -" origValue = destMemory[0];\n" -" newValue = origValue + tid + 2;\n" -" oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n" -" } while( oldValue != origValue );\n" -" oldValues[tid] = oldValue;\n" -; + " long oldValue, origValue, newValue;\n" + " do { \n" + " origValue = destMemory[0];\n" + " newValue = origValue + tid + 2;\n" + " oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n" + " } while( oldValue != origValue );\n" + " oldValues[tid] = oldValue;\n"; const char atomic_cmpxchg_core[] = -" int oldValue, origValue, newValue;\n" -" do { \n" -" origValue = destMemory[0];\n" -" newValue = origValue + tid + 2;\n" -" oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n" -" } while( oldValue != origValue );\n" -" oldValues[tid] = oldValue;\n" -; - -cl_int test_atomic_cmpxchg_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue ) + " int oldValue, origValue, newValue;\n" + " do { \n" + " origValue = destMemory[0];\n" + " newValue = origValue + tid + 2;\n" + " oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n" + " } while( oldValue != origValue );\n" + " oldValues[tid] = oldValue;\n"; +// clang-format on + +cl_int test_atomic_cmpxchg_result_int(size_t size, cl_int *startRefValues, + size_t whichDestValue) { cl_int total = INT_TEST_VALUE; - for( size_t i = 0; i < size; i++ ) - total += (cl_int)i + 2; + for (size_t i = 0; i < size; i++) total += (cl_int)i + 2; return total; } -cl_long test_atomic_cmpxchg_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue ) +cl_long test_atomic_cmpxchg_result_long(size_t size, cl_long *startRefValues, + size_t whichDestValue) { cl_long total = LONG_TEST_VALUE; - for( size_t i = 0; i < size; i++ ) - total += i + 2; + for (size_t i = 0; i < size; i++) total += i + 2; return total; } -int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_cmpxchg_result_int, NULL, NULL, test_atomic_cmpxchg_result_long, NULL, NULL }; + TestFns set = { INT_TEST_VALUE, + LONG_TEST_VALUE, + NULL, + test_atomic_cmpxchg_result_int, + NULL, + NULL, + test_atomic_cmpxchg_result_long, + NULL, + NULL }; int errors = 0; log_info(" Testing atom_ functions...\n"); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kInt, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kUInt, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kInt, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kUInt, true ); - - errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kLong, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kULong, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kLong, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kULong, true ); + errors |= + test_atomic_function(deviceID, context, queue, num_elements, + atom_cmpxchg_core, set, false, false, kInt, true); + errors |= + test_atomic_function(deviceID, context, queue, num_elements, + atom_cmpxchg_core, set, false, false, kUInt, true); + errors |= + test_atomic_function(deviceID, context, queue, num_elements, + atom_cmpxchg_core, set, false, true, kInt, true); + errors |= + test_atomic_function(deviceID, context, queue, num_elements, + atom_cmpxchg_core, set, false, true, kUInt, true); + + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atom_cmpxchg64_core, set, false, false, + kLong, true); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atom_cmpxchg64_core, set, false, false, + kULong, true); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atom_cmpxchg64_core, set, false, true, kLong, + true); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atom_cmpxchg64_core, set, false, true, + kULong, true); log_info(" Testing atomic_ functions...\n"); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kInt, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kUInt, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kInt, true ); - errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kUInt, true ); - - if( errors ) - return -1; + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atomic_cmpxchg_core, set, false, false, kInt, + true); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atomic_cmpxchg_core, set, false, false, + kUInt, true); + errors |= + test_atomic_function(deviceID, context, queue, num_elements, + atomic_cmpxchg_core, set, false, true, kInt, true); + errors |= test_atomic_function(deviceID, context, queue, num_elements, + atomic_cmpxchg_core, set, false, true, kUInt, + true); + + if (errors) return -1; return 0; } #pragma mark -------- Bitwise functions -size_t test_bitwise_num_results( size_t threadCount, ExplicitType dataType ) +size_t test_bitwise_num_results(size_t threadCount, ExplicitType dataType) { - size_t numBits = get_explicit_type_size( dataType ) * 8; + size_t numBits = get_explicit_type_size(dataType) * 8; - return ( threadCount + numBits - 1 ) / numBits; + return (threadCount + numBits - 1) / numBits; } #pragma mark ---- and +// clang-format off const char atom_and_core[] = -" size_t numBits = sizeof( destMemory[0] ) * 8;\n" -" int whichResult = tid / numBits;\n" -" int bitIndex = tid - ( whichResult * numBits );\n" -"\n" -" oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n" -; + " size_t numBits = sizeof( destMemory[0] ) * 8;\n" + " int whichResult = tid / numBits;\n" + " int bitIndex = tid - ( whichResult * numBits );\n" + "\n" + " oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"; const char atomic_and_core[] = -" size_t numBits = sizeof( destMemory[0] ) * 8;\n" -" int whichResult = tid / numBits;\n" -" int bitIndex = tid - ( whichResult * numBits );\n" -"\n" -" oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n" -; + " size_t numBits = sizeof( destMemory[0] ) * 8;\n" + " int whichResult = tid / numBits;\n" + " int bitIndex = tid - ( whichResult * numBits );\n" + "\n" + " oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"; +// clang-format on -cl_int test_atomic_and_result_int( size_t size, cl_int *startRefValues, size_t whichResult ) +cl_int test_atomic_and_result_int(size_t size, cl_int *startRefValues, + size_t whichResult) { - size_t numThreads = ( (size_t)size + 31 ) / 32; - if( whichResult < numThreads - 1 ) - return 0; + size_t numThreads = ((size_t)size + 31) / 32; + if (whichResult < numThreads - 1) return 0; // Last item doesn't get and'ed on every bit, so we have to mask away size_t numBits = (size_t)size - whichResult * 32; cl_int bits = (cl_int)0xffffffffL; - for( size_t i = 0; i < numBits; i++ ) - bits &= ~( 1 << i ); + for (size_t i = 0; i < numBits; i++) bits &= ~(1 << i); return bits; } -cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_t whichResult ) +cl_long test_atomic_and_result_long(size_t size, cl_long *startRefValues, + size_t whichResult) { - size_t numThreads = ( (size_t)size + 63 ) / 64; - if( whichResult < numThreads - 1 ) - return 0; + size_t numThreads = ((size_t)size + 63) / 64; + if (whichResult < numThreads - 1) return 0; // Last item doesn't get and'ed on every bit, so we have to mask away size_t numBits = (size_t)size - whichResult * 64; @@ -1013,14 +1284,28 @@ cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_ return bits; } -int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_and(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { 0xffffffff, 0xffffffffffffffffLL, test_bitwise_num_results, - test_atomic_and_result_int, NULL, NULL, test_atomic_and_result_long, NULL, NULL }; - - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + TestFns set = { 0xffffffff, + 0xffffffffffffffffLL, + test_bitwise_num_results, + test_atomic_and_result_int, + NULL, + NULL, + test_atomic_and_result_long, + NULL, + NULL }; + + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_and_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_and_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; return 0; } @@ -1028,59 +1313,68 @@ int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue #pragma mark ---- or +// clang-format off const char atom_or_core[] = -" size_t numBits = sizeof( destMemory[0] ) * 8;\n" -" int whichResult = tid / numBits;\n" -" int bitIndex = tid - ( whichResult * numBits );\n" -"\n" -" oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n" -; + " size_t numBits = sizeof( destMemory[0] ) * 8;\n" + " int whichResult = tid / numBits;\n" + " int bitIndex = tid - ( whichResult * numBits );\n" + "\n" + " oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"; const char atomic_or_core[] = -" size_t numBits = sizeof( destMemory[0] ) * 8;\n" -" int whichResult = tid / numBits;\n" -" int bitIndex = tid - ( whichResult * numBits );\n" -"\n" -" oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n" -; - -cl_int test_atomic_or_result_int( size_t size, cl_int *startRefValues, size_t whichResult ) + " size_t numBits = sizeof( destMemory[0] ) * 8;\n" + " int whichResult = tid / numBits;\n" + " int bitIndex = tid - ( whichResult * numBits );\n" + "\n" + " oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"; +// clang-format on + +cl_int test_atomic_or_result_int(size_t size, cl_int *startRefValues, + size_t whichResult) { - size_t numThreads = ( (size_t)size + 31 ) / 32; - if( whichResult < numThreads - 1 ) - return 0xffffffff; + size_t numThreads = ((size_t)size + 31) / 32; + if (whichResult < numThreads - 1) return 0xffffffff; // Last item doesn't get and'ed on every bit, so we have to mask away size_t numBits = (size_t)size - whichResult * 32; cl_int bits = 0; - for( size_t i = 0; i < numBits; i++ ) - bits |= ( 1 << i ); + for (size_t i = 0; i < numBits; i++) bits |= (1 << i); return bits; } -cl_long test_atomic_or_result_long( size_t size, cl_long *startRefValues, size_t whichResult ) +cl_long test_atomic_or_result_long(size_t size, cl_long *startRefValues, + size_t whichResult) { - size_t numThreads = ( (size_t)size + 63 ) / 64; - if( whichResult < numThreads - 1 ) - return 0x0ffffffffffffffffLL; + size_t numThreads = ((size_t)size + 63) / 64; + if (whichResult < numThreads - 1) return 0x0ffffffffffffffffLL; // Last item doesn't get and'ed on every bit, so we have to mask away size_t numBits = (size_t)size - whichResult * 64; cl_long bits = 0; - for( size_t i = 0; i < numBits; i++ ) - bits |= ( 1LL << i ); + for (size_t i = 0; i < numBits; i++) bits |= (1LL << i); return bits; } -int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_or(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int, NULL, NULL, test_atomic_or_result_long, NULL, NULL }; + TestFns set = { + 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int, + NULL, NULL, test_atomic_or_result_long, NULL, + NULL + }; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_or_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_or_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; return 0; } @@ -1100,33 +1394,44 @@ const char atomic_xor_core[] = "\n" " oldValues[tid] = atomic_xor( &destMemory[0], 1L << bitIndex );\n"; -cl_int test_atomic_xor_result_int( size_t size, cl_int *startRefValues, size_t whichResult ) +cl_int test_atomic_xor_result_int(size_t size, cl_int *startRefValues, + size_t whichResult) { cl_int total = 0x2f08ab41; - for( size_t i = 0; i < size; i++ ) - total ^= ( 1 << ( i & 31 ) ); + for (size_t i = 0; i < size; i++) total ^= (1 << (i & 31)); return total; } -cl_long test_atomic_xor_result_long( size_t size, cl_long *startRefValues, size_t whichResult ) +cl_long test_atomic_xor_result_long(size_t size, cl_long *startRefValues, + size_t whichResult) { cl_long total = 0x2f08ab418ba0541LL; - for( size_t i = 0; i < size; i++ ) - total ^= ( 1LL << ( i & 63 ) ); + for (size_t i = 0; i < size; i++) total ^= (1LL << (i & 63)); return total; } -int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_xor(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - TestFns set = { 0x2f08ab41, 0x2f08ab418ba0541LL, NULL, test_atomic_xor_result_int, NULL, NULL, test_atomic_xor_result_long, NULL, NULL }; - - if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 ) + TestFns set = { 0x2f08ab41, + 0x2f08ab418ba0541LL, + NULL, + test_atomic_xor_result_int, + NULL, + NULL, + test_atomic_xor_result_long, + NULL, + NULL }; + + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atom_xor_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false) + != 0) return -1; - if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 ) + if (test_atomic_function_set( + deviceID, context, queue, num_elements, atomic_xor_core, set, true, + /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true) + != 0) return -1; return 0; } - - - - diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp index b85e3d24..d625d8b4 100644 --- a/test_conformance/atomics/test_indexed_cases.cpp +++ b/test_conformance/atomics/test_indexed_cases.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -16,22 +16,25 @@ #include "testBase.h" #include "harness/conversions.h" -const char * atomic_index_source = -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"// Counter keeps track of which index in counts we are using.\n" -"// We get that value, increment it, and then set that index in counts to our thread ID.\n" -"// At the end of this we should have all thread IDs in some random location in counts\n" -"// exactly once. If atom_add failed then we will write over various thread IDs and we\n" -"// will be missing some.\n" -"\n" -"__kernel void add_index_test(__global int *counter, __global int *counts) {\n" -" int tid = get_global_id(0);\n" -" \n" -" int counter_to_use = atom_add(counter, 1);\n" -" counts[counter_to_use] = tid;\n" -"}"; - -int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +// clang-format off +const char *atomic_index_source = + "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" + "// Counter keeps track of which index in counts we are using.\n" + "// We get that value, increment it, and then set that index in counts to our thread ID.\n" + "// At the end of this we should have all thread IDs in some random location in counts\n" + "// exactly once. If atom_add failed then we will write over various thread IDs and we\n" + "// will be missing some.\n" + "\n" + "__kernel void add_index_test(__global int *counter, __global int *counts) {\n" + " int tid = get_global_id(0);\n" + " \n" + " int counter_to_use = atom_add(counter, 1);\n" + " counts[counter_to_use] = tid;\n" + "}"; +// clang-format on + +int test_atomic_add_index(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { clProgramWrapper program; clKernelWrapper kernel; @@ -39,25 +42,29 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_ size_t numGlobalThreads, numLocalThreads; int fail = 0, succeed = 0, err; - /* Check if atomics are supported. */ - if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) { - log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n"); - return 0; - } + /* Check if atomics are supported. */ + if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) + { + log_info("Base atomics not supported " + "(cl_khr_global_int32_base_atomics). Skipping test.\n"); + return 0; + } //===== add_index test // The index test replicates what particles does. - // It uses one memory location to keep track of the current index and then each thread - // does an atomic add to it to get its new location. The threads then write to their - // assigned location. At the end we check to make sure that each thread's ID shows up - // exactly once in the output. + // It uses one memory location to keep track of the current index and then + // each thread does an atomic add to it to get its new location. The threads + // then write to their assigned location. At the end we check to make sure + // that each thread's ID shows up exactly once in the output. numGlobalThreads = 2048; - if( create_single_kernel_helper( context, &program, &kernel, 1, &atomic_index_source, "add_index_test" ) ) + if (create_single_kernel_helper(context, &program, &kernel, 1, + &atomic_index_source, "add_index_test")) return -1; - if( get_max_common_work_group_size( context, kernel, numGlobalThreads, &numLocalThreads ) ) + if (get_max_common_work_group_size(context, kernel, numGlobalThreads, + &numLocalThreads)) return -1; log_info("Execute global_threads:%d local_threads:%d\n", @@ -72,86 +79,133 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_ sizeof(cl_int) * numGlobalThreads, NULL, NULL); // Reset all those locations to -1 to indciate they have not been used. - cl_int *values = (cl_int*) malloc(sizeof(cl_int)*numGlobalThreads); - if (values == NULL) { - log_error("add_index_test FAILED to allocate memory for initial values.\n"); - fail = 1; succeed = -1; - } else { + cl_int *values = (cl_int *)malloc(sizeof(cl_int) * numGlobalThreads); + if (values == NULL) + { + log_error( + "add_index_test FAILED to allocate memory for initial values.\n"); + fail = 1; + succeed = -1; + } + else + { memset(values, -1, numLocalThreads); - unsigned int i=0; - for (i=0; i<numGlobalThreads; i++) - values[i] = -1; - int init=0; - err = clEnqueueWriteBuffer(queue, counters, true, 0, numGlobalThreads*sizeof(cl_int), values, 0, NULL, NULL); - err |= clEnqueueWriteBuffer(queue, counter, true, 0,1*sizeof(cl_int), &init, 0, NULL, NULL); - if (err) { - log_error("add_index_test FAILED to write initial values to arrays: %d\n", err); - fail=1; succeed=-1; - } else { + unsigned int i = 0; + for (i = 0; i < numGlobalThreads; i++) values[i] = -1; + int init = 0; + err = clEnqueueWriteBuffer(queue, counters, true, 0, + numGlobalThreads * sizeof(cl_int), values, 0, + NULL, NULL); + err |= clEnqueueWriteBuffer(queue, counter, true, 0, 1 * sizeof(cl_int), + &init, 0, NULL, NULL); + if (err) + { + log_error( + "add_index_test FAILED to write initial values to arrays: %d\n", + err); + fail = 1; + succeed = -1; + } + else + { err = clSetKernelArg(kernel, 0, sizeof(counter), &counter); err |= clSetKernelArg(kernel, 1, sizeof(counters), &counters); - if (err) { - log_error("add_index_test FAILED to set kernel arguments: %d\n", err); - fail=1; succeed=-1; - } else { - err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numGlobalThreads, &numLocalThreads, 0, NULL, NULL ); - if (err) { - log_error("add_index_test FAILED to execute kernel: %d\n", err); - fail=1; succeed=-1; - } else { - err = clEnqueueReadBuffer( queue, counters, true, 0, sizeof(cl_int)*numGlobalThreads, values, 0, NULL, NULL ); - if (err) { - log_error("add_index_test FAILED to read back results: %d\n", err); - fail = 1; succeed=-1; - } else { + if (err) + { + log_error("add_index_test FAILED to set kernel arguments: %d\n", + err); + fail = 1; + succeed = -1; + } + else + { + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, + &numGlobalThreads, + &numLocalThreads, 0, NULL, NULL); + if (err) + { + log_error("add_index_test FAILED to execute kernel: %d\n", + err); + fail = 1; + succeed = -1; + } + else + { + err = clEnqueueReadBuffer(queue, counters, true, 0, + sizeof(cl_int) * numGlobalThreads, + values, 0, NULL, NULL); + if (err) + { + log_error( + "add_index_test FAILED to read back results: %d\n", + err); + fail = 1; + succeed = -1; + } + else + { unsigned int looking_for, index; - for (looking_for=0; looking_for<numGlobalThreads; looking_for++) { - int instances_found=0; - for (index=0; index<numGlobalThreads; index++) { - if (values[index]==(int)looking_for) + for (looking_for = 0; looking_for < numGlobalThreads; + looking_for++) + { + int instances_found = 0; + for (index = 0; index < numGlobalThreads; index++) + { + if (values[index] == (int)looking_for) instances_found++; } - if (instances_found != 1) { - log_error("add_index_test FAILED: wrong number of instances (%d!=1) for counter %d.\n", instances_found, looking_for); - fail = 1; succeed=-1; + if (instances_found != 1) + { + log_error( + "add_index_test FAILED: wrong number of " + "instances (%d!=1) for counter %d.\n", + instances_found, looking_for); + fail = 1; + succeed = -1; } } } } } } - if (!fail) { - log_info("add_index_test passed. Each thread used exactly one index.\n"); + if (!fail) + { + log_info( + "add_index_test passed. Each thread used exactly one index.\n"); } free(values); } return fail; } +// clang-format off const char *add_index_bin_kernel[] = { -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n" -"// using an atomic add to keep track of the current location to write into in each bin.\n" -"// This is the same as the memory update for the particles demo.\n" -"\n" -"__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n" -" int tid = get_global_id(0);\n" -"\n" -" int location = bin_assignments[tid];\n" -" int counter = atom_add(&bin_counters[location], 1);\n" -" bins[location*max_counts_per_bin + counter] = tid;\n" -"}" }; - -// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel -// using an atomic add to keep track of the current location to write into in each bin. -// This is the same as the memory update for the particles demo. -int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_context context, MTdata d) + "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" + "// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n" + "// using an atomic add to keep track of the current location to write into in each bin.\n" + "// This is the same as the memory update for the particles demo.\n" + "\n" + "__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n" + " int tid = get_global_id(0);\n" + "\n" + " int location = bin_assignments[tid];\n" + " int counter = atom_add(&bin_counters[location], 1);\n" + " bins[location*max_counts_per_bin + counter] = tid;\n" + "}" }; +// clang-format on + +// This test assigns a bunch of values to bins and then tries to put them in the +// bins in parallel using an atomic add to keep track of the current location to +// write into in each bin. This is the same as the memory update for the +// particles demo. +int add_index_bin_test(size_t *global_threads, cl_command_queue queue, + cl_context context, MTdata d) { int number_of_items = (int)global_threads[0]; size_t local_threads[1]; int divisor = 12; - int number_of_bins = number_of_items/divisor; - int max_counts_per_bin = divisor*2; + int number_of_bins = number_of_items / divisor; + int max_counts_per_bin = divisor * 2; int fail = 0; int succeed = 0; @@ -160,15 +214,20 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_contex clProgramWrapper program; clKernelWrapper kernel; - // log_info("add_index_bin_test: %d items, into %d bins, with a max of %d items per bin (bins is %d long).\n", - // number_of_items, number_of_bins, max_counts_per_bin, number_of_bins*max_counts_per_bin); + // log_info("add_index_bin_test: %d items, into %d bins, with a max of %d + // items per bin (bins is %d long).\n", + // number_of_items, number_of_bins, max_counts_per_bin, + // number_of_bins*max_counts_per_bin); //===== add_index_bin test // The index test replicates what particles does. - err = create_single_kernel_helper(context, &program, &kernel, 1, add_index_bin_kernel, "add_index_bin_test" ); - test_error( err, "Unable to create testing kernel" ); + err = + create_single_kernel_helper(context, &program, &kernel, 1, + add_index_bin_kernel, "add_index_bin_test"); + test_error(err, "Unable to create testing kernel"); - if( get_max_common_work_group_size( context, kernel, global_threads[0], &local_threads[0] ) ) + if (get_max_common_work_group_size(context, kernel, global_threads[0], + &local_threads[0])) return -1; log_info("Execute global_threads:%d local_threads:%d\n", @@ -185,152 +244,232 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_contex clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * number_of_items, NULL, NULL); - if (bin_counters == NULL) { + if (bin_counters == NULL) + { log_error("add_index_bin_test FAILED to allocate bin_counters.\n"); return -1; } - if (bins == NULL) { + if (bins == NULL) + { log_error("add_index_bin_test FAILED to allocate bins.\n"); return -1; } - if (bin_assignments == NULL) { + if (bin_assignments == NULL) + { log_error("add_index_bin_test FAILED to allocate bin_assignments.\n"); return -1; } // Initialize our storage - cl_int *l_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins); - if (!l_bin_counts) { - log_error("add_index_bin_test FAILED to allocate initial values for bin_counters.\n"); + cl_int *l_bin_counts = (cl_int *)malloc(sizeof(cl_int) * number_of_bins); + if (!l_bin_counts) + { + log_error("add_index_bin_test FAILED to allocate initial values for " + "bin_counters.\n"); return -1; } int i; - for (i=0; i<number_of_bins; i++) - l_bin_counts[i] = 0; - err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, l_bin_counts, 0, NULL, NULL); - if (err) { - log_error("add_index_bin_test FAILED to set initial values for bin_counters: %d\n", err); + for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0; + err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, + sizeof(cl_int) * number_of_bins, l_bin_counts, 0, + NULL, NULL); + if (err) + { + log_error("add_index_bin_test FAILED to set initial values for " + "bin_counters: %d\n", + err); return -1; } - cl_int *values = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin); - if (!values) { - log_error("add_index_bin_test FAILED to allocate initial values for bins.\n"); + cl_int *values = + (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin); + if (!values) + { + log_error( + "add_index_bin_test FAILED to allocate initial values for bins.\n"); return -1; } - for (i=0; i<number_of_bins*max_counts_per_bin; i++) - values[i] = -1; - err = clEnqueueWriteBuffer(queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, values, 0, NULL, NULL); - if (err) { - log_error("add_index_bin_test FAILED to set initial values for bins: %d\n", err); + for (i = 0; i < number_of_bins * max_counts_per_bin; i++) values[i] = -1; + err = clEnqueueWriteBuffer(queue, bins, true, 0, + sizeof(cl_int) * number_of_bins + * max_counts_per_bin, + values, 0, NULL, NULL); + if (err) + { + log_error( + "add_index_bin_test FAILED to set initial values for bins: %d\n", + err); return -1; } free(values); - cl_int *l_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_items); - if (!l_bin_assignments) { - log_error("add_index_bin_test FAILED to allocate initial values for l_bin_assignments.\n"); + cl_int *l_bin_assignments = + (cl_int *)malloc(sizeof(cl_int) * number_of_items); + if (!l_bin_assignments) + { + log_error("add_index_bin_test FAILED to allocate initial values for " + "l_bin_assignments.\n"); return -1; } - for (i=0; i<number_of_items; i++) { - int bin = random_in_range(0, number_of_bins-1, d); - while (l_bin_counts[bin] >= max_counts_per_bin) { - bin = random_in_range(0, number_of_bins-1, d); + for (i = 0; i < number_of_items; i++) + { + int bin = random_in_range(0, number_of_bins - 1, d); + while (l_bin_counts[bin] >= max_counts_per_bin) + { + bin = random_in_range(0, number_of_bins - 1, d); } if (bin >= number_of_bins) - log_error("add_index_bin_test internal error generating bin assignments: bin %d >= number_of_bins %d.\n", bin, number_of_bins); - if (l_bin_counts[bin]+1 > max_counts_per_bin) - log_error("add_index_bin_test internal error generating bin assignments: bin %d has more entries (%d) than max_counts_per_bin (%d).\n", bin, l_bin_counts[bin], max_counts_per_bin); + log_error("add_index_bin_test internal error generating bin " + "assignments: bin %d >= number_of_bins %d.\n", + bin, number_of_bins); + if (l_bin_counts[bin] + 1 > max_counts_per_bin) + log_error( + "add_index_bin_test internal error generating bin assignments: " + "bin %d has more entries (%d) than max_counts_per_bin (%d).\n", + bin, l_bin_counts[bin], max_counts_per_bin); l_bin_counts[bin]++; l_bin_assignments[i] = bin; - // log_info("item %d assigned to bin %d (%d items)\n", i, bin, l_bin_counts[bin]); + // log_info("item %d assigned to bin %d (%d items)\n", i, bin, + // l_bin_counts[bin]); } - err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int)*number_of_items, l_bin_assignments, 0, NULL, NULL); - if (err) { - log_error("add_index_bin_test FAILED to set initial values for bin_assignments: %d\n", err); + err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, + sizeof(cl_int) * number_of_items, + l_bin_assignments, 0, NULL, NULL); + if (err) + { + log_error("add_index_bin_test FAILED to set initial values for " + "bin_assignments: %d\n", + err); return -1; } // Setup the kernel err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters); err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins); err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments); - err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin), &max_counts_per_bin); - if (err) { - log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", err); - fail=1; succeed=-1; + err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin), + &max_counts_per_bin); + if (err) + { + log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", + err); + fail = 1; + succeed = -1; return -1; } - err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL ); - if (err) { + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads, + local_threads, 0, NULL, NULL); + if (err) + { log_error("add_index_bin_test FAILED to execute kernel: %d\n", err); - fail=1; succeed=-1; + fail = 1; + succeed = -1; } - cl_int *final_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin); - if (!final_bin_assignments) { - log_error("add_index_bin_test FAILED to allocate initial values for final_bin_assignments.\n"); + cl_int *final_bin_assignments = + (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin); + if (!final_bin_assignments) + { + log_error("add_index_bin_test FAILED to allocate initial values for " + "final_bin_assignments.\n"); return -1; } - err = clEnqueueReadBuffer( queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, final_bin_assignments, 0, NULL, NULL ); - if (err) { + err = clEnqueueReadBuffer(queue, bins, true, 0, + sizeof(cl_int) * number_of_bins + * max_counts_per_bin, + final_bin_assignments, 0, NULL, NULL); + if (err) + { log_error("add_index_bin_test FAILED to read back bins: %d\n", err); - fail = 1; succeed=-1; + fail = 1; + succeed = -1; } - cl_int *final_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins); - if (!final_bin_counts) { - log_error("add_index_bin_test FAILED to allocate initial values for final_bin_counts.\n"); + cl_int *final_bin_counts = + (cl_int *)malloc(sizeof(cl_int) * number_of_bins); + if (!final_bin_counts) + { + log_error("add_index_bin_test FAILED to allocate initial values for " + "final_bin_counts.\n"); return -1; } - err = clEnqueueReadBuffer( queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, final_bin_counts, 0, NULL, NULL ); - if (err) { - log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", err); - fail = 1; succeed=-1; + err = clEnqueueReadBuffer(queue, bin_counters, true, 0, + sizeof(cl_int) * number_of_bins, final_bin_counts, + 0, NULL, NULL); + if (err) + { + log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", + err); + fail = 1; + succeed = -1; } // Verification. - int errors=0; + int errors = 0; int current_bin; int search; // Print out all the contents of the bins. // for (current_bin=0; current_bin<number_of_bins; current_bin++) // for (search=0; search<max_counts_per_bin; search++) - // log_info("[bin %d, entry %d] = %d\n", current_bin, search, final_bin_assignments[current_bin*max_counts_per_bin+search]); + // log_info("[bin %d, entry %d] = %d\n", current_bin, search, + // final_bin_assignments[current_bin*max_counts_per_bin+search]); // First verify that there are the correct number in each bin. - for (current_bin=0; current_bin<number_of_bins; current_bin++) { + for (current_bin = 0; current_bin < number_of_bins; current_bin++) + { int expected_number = l_bin_counts[current_bin]; int actual_number = final_bin_counts[current_bin]; - if (expected_number != actual_number) { - log_error("add_index_bin_test FAILED: bin %d reported %d entries when %d were expected.\n", current_bin, actual_number, expected_number); + if (expected_number != actual_number) + { + log_error("add_index_bin_test FAILED: bin %d reported %d entries " + "when %d were expected.\n", + current_bin, actual_number, expected_number); errors++; } - for (search=0; search<expected_number; search++) { - if (final_bin_assignments[current_bin*max_counts_per_bin+search] == -1) { - log_error("add_index_bin_test FAILED: bin %d had no entry at position %d when it should have had %d entries.\n", current_bin, search, expected_number); + for (search = 0; search < expected_number; search++) + { + if (final_bin_assignments[current_bin * max_counts_per_bin + search] + == -1) + { + log_error("add_index_bin_test FAILED: bin %d had no entry at " + "position %d when it should have had %d entries.\n", + current_bin, search, expected_number); errors++; } } - for (search=expected_number; search<max_counts_per_bin; search++) { - if (final_bin_assignments[current_bin*max_counts_per_bin+search] != -1) { - log_error("add_index_bin_test FAILED: bin %d had an extra entry at position %d when it should have had only %d entries.\n", current_bin, search, expected_number); + for (search = expected_number; search < max_counts_per_bin; search++) + { + if (final_bin_assignments[current_bin * max_counts_per_bin + search] + != -1) + { + log_error( + "add_index_bin_test FAILED: bin %d had an extra entry at " + "position %d when it should have had only %d entries.\n", + current_bin, search, expected_number); errors++; } } } // Now verify that the correct ones are in each bin int index; - for (index=0; index<number_of_items; index++) { + for (index = 0; index < number_of_items; index++) + { int expected_bin = l_bin_assignments[index]; int found_it = 0; - for (search=0; search<l_bin_counts[expected_bin]; search++) { - if (final_bin_assignments[expected_bin*max_counts_per_bin+search] == index) { + for (search = 0; search < l_bin_counts[expected_bin]; search++) + { + if (final_bin_assignments[expected_bin * max_counts_per_bin + + search] + == index) + { found_it = 1; } } - if (found_it == 0) { - log_error("add_index_bin_test FAILED: did not find item %d in bin %d.\n", index, expected_bin); + if (found_it == 0) + { + log_error( + "add_index_bin_test FAILED: did not find item %d in bin %d.\n", + index, expected_bin); errors++; } } @@ -341,41 +480,49 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_contex clReleaseMemObject(bin_counters); clReleaseMemObject(bins); clReleaseMemObject(bin_assignments); - if (errors == 0) { - log_info("add_index_bin_test passed. Each item was put in the correct bin in parallel.\n"); + if (errors == 0) + { + log_info("add_index_bin_test passed. Each item was put in the correct " + "bin in parallel.\n"); return 0; - } else { + } + else + { log_error("add_index_bin_test FAILED: %d errors.\n", errors); return -1; } } -int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { //===== add_index_bin test size_t numGlobalThreads = 2048; - int iteration=0; + int iteration = 0; int err, failed = 0; - MTdata d = init_genrand( gRandomSeed ); - - /* Check if atomics are supported. */ - if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) { - log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n"); - free_mtdata( d ); - return 0; - } + MTdata d = init_genrand(gRandomSeed); + + /* Check if atomics are supported. */ + if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) + { + log_info("Base atomics not supported " + "(cl_khr_global_int32_base_atomics). Skipping test.\n"); + free_mtdata(d); + return 0; + } - for(iteration=0; iteration<10; iteration++) { - log_info("add_index_bin_test with %d elements:\n", (int)numGlobalThreads); - err = add_index_bin_test(&numGlobalThreads, queue, context, d); - if (err) { + for (iteration = 0; iteration < 10; iteration++) + { + log_info("add_index_bin_test with %d elements:\n", + (int)numGlobalThreads); + err = add_index_bin_test(&numGlobalThreads, queue, context, d); + if (err) + { failed++; break; } - numGlobalThreads*=2; + numGlobalThreads *= 2; } - free_mtdata( d ); + free_mtdata(d); return failed; } - - |