aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSven van Haastregt <sven.vanhaastregt@arm.com>2022-09-27 17:32:23 +0100
committerGitHub <noreply@github.com>2022-09-27 09:32:23 -0700
commit9bf6486352bf4c87a49ecb212ae71f96c293c26f (patch)
tree97793a4fefa8ec0817fba4e6e50f331e3c4fd4db
parent9b21e9f06b88e7ce96b76b0e94c6dfef644ac1ee (diff)
downloadOpenCL-CTS-9bf6486352bf4c87a49ecb212ae71f96c293c26f.tar.gz
[NFC] clang-format test_atomics (#1516)
Add some clang-format off/on comments to keep lists and kernel code readable. Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com> Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
-rw-r--r--test_conformance/atomics/main.cpp7
-rw-r--r--test_conformance/atomics/procs.h49
-rw-r--r--test_conformance/atomics/testBase.h5
-rw-r--r--test_conformance/atomics/test_atomics.cpp1255
-rw-r--r--test_conformance/atomics/test_indexed_cases.cpp507
5 files changed, 1143 insertions, 680 deletions
diff --git a/test_conformance/atomics/main.cpp b/test_conformance/atomics/main.cpp
index afdea376..987d6bfa 100644
--- a/test_conformance/atomics/main.cpp
+++ b/test_conformance/atomics/main.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -24,6 +24,7 @@
#include <unistd.h>
#endif
+// clang-format off
test_definition test_list[] = {
ADD_TEST( atomic_add ),
ADD_TEST( atomic_sub ),
@@ -40,11 +41,11 @@ test_definition test_list[] = {
ADD_TEST( atomic_add_index ),
ADD_TEST( atomic_add_index_bin ),
};
+// clang-format on
-const int test_num = ARRAY_SIZE( test_list );
+const int test_num = ARRAY_SIZE(test_list);
int main(int argc, const char *argv[])
{
return runTestHarness(argc, argv, test_num, test_list, false, 0);
}
-
diff --git a/test_conformance/atomics/procs.h b/test_conformance/atomics/procs.h
index bf053f25..fa85aad5 100644
--- a/test_conformance/atomics/procs.h
+++ b/test_conformance/atomics/procs.h
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -18,22 +18,35 @@
#include "harness/threadTesting.h"
#include "harness/typeWrappers.h"
-extern int create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
-
-extern int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-
-extern int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-
+extern int create_program_and_kernel(const char *source,
+ const char *kernel_name,
+ cl_program *program_ret,
+ cl_kernel *kernel_ret);
+extern int test_atomic_add(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_sub(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_xchg(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_min(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_max(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_inc(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_dec(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_and(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_or(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_xor(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_add_index(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
diff --git a/test_conformance/atomics/testBase.h b/test_conformance/atomics/testBase.h
index ba67d140..22bce1d2 100644
--- a/test_conformance/atomics/testBase.h
+++ b/test_conformance/atomics/testBase.h
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -26,6 +26,3 @@
#include "procs.h"
#endif // _testBase_h
-
-
-
diff --git a/test_conformance/atomics/test_atomics.cpp b/test_conformance/atomics/test_atomics.cpp
index c0c01363..31d08500 100644
--- a/test_conformance/atomics/test_atomics.cpp
+++ b/test_conformance/atomics/test_atomics.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -22,7 +22,7 @@
#define INT_TEST_VALUE 402258822
#define LONG_TEST_VALUE 515154531254381446LL
-
+// clang-format off
const char *atomic_global_pattern[] = {
"__kernel void test_atomic_fn(volatile __global %s *destMemory, __global %s *oldValues)\n"
"{\n"
@@ -36,19 +36,20 @@ const char *atomic_local_pattern[] = {
"__kernel void test_atomic_fn(__global %s *finalDest, __global %s *oldValues, volatile __local %s *destMemory, int numDestItems )\n"
"{\n"
" int tid = get_global_id(0);\n"
- " int dstItemIdx;\n"
+ " int dstItemIdx;\n"
"\n"
" // Everybody does the following line(s), but it all has the same result. We still need to ensure we sync before the atomic op, though\n"
- " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
+ " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
" destMemory[ dstItemIdx ] = finalDest[ dstItemIdx ];\n"
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
,
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" // Finally, write out the last value. Again, we're synced, so everyone will be writing the same value\n"
- " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
+ " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
" finalDest[ dstItemIdx ] = destMemory[ dstItemIdx ];\n"
"}\n" };
+// clang-format on
#define TEST_COUNT 128 * 1024
@@ -56,41 +57,48 @@ const char *atomic_local_pattern[] = {
struct TestFns
{
- cl_int mIntStartValue;
- cl_long mLongStartValue;
+ cl_int mIntStartValue;
+ cl_long mLongStartValue;
- size_t (*NumResultsFn)( size_t threadSize, ExplicitType dataType );
+ size_t (*NumResultsFn)(size_t threadSize, ExplicitType dataType);
// Integer versions
- cl_int (*ExpectedValueIntFn)( size_t size, cl_int *startRefValues, size_t whichDestValue );
- void (*GenerateRefsIntFn)( size_t size, cl_int *startRefValues, MTdata d );
- bool (*VerifyRefsIntFn)( size_t size, cl_int *refValues, cl_int finalValue );
+ cl_int (*ExpectedValueIntFn)(size_t size, cl_int *startRefValues,
+ size_t whichDestValue);
+ void (*GenerateRefsIntFn)(size_t size, cl_int *startRefValues, MTdata d);
+ bool (*VerifyRefsIntFn)(size_t size, cl_int *refValues, cl_int finalValue);
// Long versions
- cl_long (*ExpectedValueLongFn)( size_t size, cl_long *startRefValues, size_t whichDestValue );
- void (*GenerateRefsLongFn)( size_t size, cl_long *startRefValues, MTdata d );
- bool (*VerifyRefsLongFn)( size_t size, cl_long *refValues, cl_long finalValue );
+ cl_long (*ExpectedValueLongFn)(size_t size, cl_long *startRefValues,
+ size_t whichDestValue);
+ void (*GenerateRefsLongFn)(size_t size, cl_long *startRefValues, MTdata d);
+ bool (*VerifyRefsLongFn)(size_t size, cl_long *refValues,
+ cl_long finalValue);
// Float versions
- cl_float (*ExpectedValueFloatFn)( size_t size, cl_float *startRefValues, size_t whichDestValue );
- void (*GenerateRefsFloatFn)( size_t size, cl_float *startRefValues, MTdata d );
- bool (*VerifyRefsFloatFn)( size_t size, cl_float *refValues, cl_float finalValue );
+ cl_float (*ExpectedValueFloatFn)(size_t size, cl_float *startRefValues,
+ size_t whichDestValue);
+ void (*GenerateRefsFloatFn)(size_t size, cl_float *startRefValues,
+ MTdata d);
+ bool (*VerifyRefsFloatFn)(size_t size, cl_float *refValues,
+ cl_float finalValue);
};
-bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, ExplicitType dataType )
+bool check_atomic_support(cl_device_id device, bool extended, bool isLocal,
+ ExplicitType dataType)
{
+ // clang-format off
const char *extensionNames[8] = {
"cl_khr_global_int32_base_atomics", "cl_khr_global_int32_extended_atomics",
"cl_khr_local_int32_base_atomics", "cl_khr_local_int32_extended_atomics",
"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics",
"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics" // this line intended to be the same as the last one
};
+ // clang-format on
size_t index = 0;
- if( extended )
- index += 1;
- if( isLocal )
- index += 2;
+ if (extended) index += 1;
+ if (isLocal) index += 2;
Version version = get_device_cl_version(device);
@@ -98,26 +106,28 @@ bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, Exp
{
case kInt:
case kUInt:
- if( version >= Version(1,1) )
- return 1;
+ if (version >= Version(1, 1)) return 1;
break;
case kLong:
- case kULong:
- index += 4;
- break;
- case kFloat: // this has to stay separate since the float atomics arent in the 1.0 extensions
- return version >= Version(1,1);
+ case kULong: index += 4; break;
+ case kFloat: // this has to stay separate since the float atomics arent
+ // in the 1.0 extensions
+ return version >= Version(1, 1);
default:
- log_error( "ERROR: Unsupported data type (%d) in check_atomic_support\n", dataType );
+ log_error(
+ "ERROR: Unsupported data type (%d) in check_atomic_support\n",
+ dataType);
return 0;
}
- return is_extension_available( device, extensionNames[index] );
+ return is_extension_available(device, extensionNames[index]);
}
-int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
- TestFns testFns,
- bool extended, bool isLocal, ExplicitType dataType, bool matchGroupSize )
+int test_atomic_function(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ const char *programCore, TestFns testFns,
+ bool extended, bool isLocal, ExplicitType dataType,
+ bool matchGroupSize)
{
clProgramWrapper program;
clKernelWrapper kernel;
@@ -127,55 +137,65 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
void *refValues, *startRefValues;
size_t threadSize, groupSize;
const char *programLines[4];
- char pragma[ 512 ];
- char programHeader[ 512 ];
+ char pragma[512];
+ char programHeader[512];
MTdata d;
- size_t typeSize = get_explicit_type_size( dataType );
+ size_t typeSize = get_explicit_type_size(dataType);
// Verify we can run first
- bool isUnsigned = ( dataType == kULong ) || ( dataType == kUInt );
- if( !check_atomic_support( deviceID, extended, isLocal, dataType ) )
+ bool isUnsigned = (dataType == kULong) || (dataType == kUInt);
+ if (!check_atomic_support(deviceID, extended, isLocal, dataType))
{
- // Only print for the signed (unsigned comes right after, and if signed isn't supported, unsigned isn't either)
- if( dataType == kFloat )
- log_info( "\t%s float not supported\n", isLocal ? "Local" : "Global" );
- else if( !isUnsigned )
- log_info( "\t%s %sint%d not supported\n", isLocal ? "Local" : "Global", isUnsigned ? "u" : "", (int)typeSize * 8 );
+ // Only print for the signed (unsigned comes right after, and if signed
+ // isn't supported, unsigned isn't either)
+ if (dataType == kFloat)
+ log_info("\t%s float not supported\n",
+ isLocal ? "Local" : "Global");
+ else if (!isUnsigned)
+ log_info("\t%s %sint%d not supported\n",
+ isLocal ? "Local" : "Global", isUnsigned ? "u" : "",
+ (int)typeSize * 8);
// Since we don't support the operation, they implicitly pass
return 0;
}
else
{
- if( dataType == kFloat )
- log_info( "\t%s float%s...", isLocal ? "local" : "global", isLocal ? " " : "" );
+ if (dataType == kFloat)
+ log_info("\t%s float%s...", isLocal ? "local" : "global",
+ isLocal ? " " : "");
else
- log_info( "\t%s %sint%d%s%s...", isLocal ? "local" : "global", isUnsigned ? "u" : "",
- (int)typeSize * 8, isUnsigned ? "" : " ", isLocal ? " " : "" );
+ log_info("\t%s %sint%d%s%s...", isLocal ? "local" : "global",
+ isUnsigned ? "u" : "", (int)typeSize * 8,
+ isUnsigned ? "" : " ", isLocal ? " " : "");
}
//// Set up the kernel code
// Create the pragma line for this kernel
- bool isLong = ( dataType == kLong || dataType == kULong );
- sprintf( pragma, "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n",
- isLong ? "" : (isLocal ? "_local" : "_global"), isLong ? "64" : "32",
- extended ? "extended" : "base" );
+ bool isLong = (dataType == kLong || dataType == kULong);
+ sprintf(pragma,
+ "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n",
+ isLong ? "" : (isLocal ? "_local" : "_global"),
+ isLong ? "64" : "32", extended ? "extended" : "base");
// Now create the program header
- const char *typeName = get_explicit_type_name( dataType );
- if( isLocal )
- sprintf( programHeader, atomic_local_pattern[ 0 ], typeName, typeName, typeName );
+ const char *typeName = get_explicit_type_name(dataType);
+ if (isLocal)
+ sprintf(programHeader, atomic_local_pattern[0], typeName, typeName,
+ typeName);
else
- sprintf( programHeader, atomic_global_pattern[ 0 ], typeName, typeName );
+ sprintf(programHeader, atomic_global_pattern[0], typeName, typeName);
// Set up our entire program now
- programLines[ 0 ] = pragma;
- programLines[ 1 ] = programHeader;
- programLines[ 2 ] = programCore;
- programLines[ 3 ] = ( isLocal ) ? atomic_local_pattern[ 1 ] : atomic_global_pattern[ 1 ];
-
- if( create_single_kernel_helper( context, &program, &kernel, 4, programLines, "test_atomic_fn" ) )
+ programLines[0] = pragma;
+ programLines[1] = programHeader;
+ programLines[2] = programCore;
+ programLines[3] =
+ (isLocal) ? atomic_local_pattern[1] : atomic_global_pattern[1];
+
+ if (create_single_kernel_helper(context, &program, &kernel, 4, programLines,
+ "test_atomic_fn"))
{
return -1;
}
@@ -183,29 +203,37 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
//// Set up to actually run
threadSize = num_elements;
- error = get_max_common_work_group_size( context, kernel, threadSize, &groupSize );
- test_error( error, "Unable to get thread group max size" );
+ error =
+ get_max_common_work_group_size(context, kernel, threadSize, &groupSize);
+ test_error(error, "Unable to get thread group max size");
- if( matchGroupSize )
+ if (matchGroupSize)
// HACK because xchg and cmpxchg apparently are limited by hardware
threadSize = groupSize;
- if( isLocal )
+ if (isLocal)
{
- size_t maxSizes[3] = {0, 0, 0};
- error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3*sizeof(size_t), maxSizes, 0);
- test_error( error, "Unable to obtain max work item sizes for the device" );
+ size_t maxSizes[3] = { 0, 0, 0 };
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+ 3 * sizeof(size_t), maxSizes, 0);
+ test_error(error,
+ "Unable to obtain max work item sizes for the device");
size_t workSize;
- error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL );
- test_error( error, "Unable to obtain max work group size for device and kernel combo" );
+ error = clGetKernelWorkGroupInfo(kernel, deviceID,
+ CL_KERNEL_WORK_GROUP_SIZE,
+ sizeof(workSize), &workSize, NULL);
+ test_error(
+ error,
+ "Unable to obtain max work group size for device and kernel combo");
// Limit workSize to avoid extremely large local buffer size and slow
// run.
if (workSize > 65536) workSize = 65536;
- // "workSize" is limited to that of the first dimension as only a 1DRange is executed.
- if( maxSizes[0] < workSize )
+ // "workSize" is limited to that of the first dimension as only a
+ // 1DRange is executed.
+ if (maxSizes[0] < workSize)
{
workSize = maxSizes[0];
}
@@ -214,38 +242,43 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
}
- log_info( "\t(thread count %d, group size %d)\n", (int)threadSize, (int)groupSize );
+ log_info("\t(thread count %d, group size %d)\n", (int)threadSize,
+ (int)groupSize);
- refValues = (cl_int *)malloc( typeSize * threadSize );
+ refValues = (cl_int *)malloc(typeSize * threadSize);
- if( testFns.GenerateRefsIntFn != NULL )
+ if (testFns.GenerateRefsIntFn != NULL)
{
// We have a ref generator provided
- d = init_genrand( gRandomSeed );
- startRefValues = malloc( typeSize * threadSize );
- if( typeSize == 4 )
- testFns.GenerateRefsIntFn( threadSize, (cl_int *)startRefValues, d );
+ d = init_genrand(gRandomSeed);
+ startRefValues = malloc(typeSize * threadSize);
+ if (typeSize == 4)
+ testFns.GenerateRefsIntFn(threadSize, (cl_int *)startRefValues, d);
else
- testFns.GenerateRefsLongFn( threadSize, (cl_long *)startRefValues, d );
+ testFns.GenerateRefsLongFn(threadSize, (cl_long *)startRefValues,
+ d);
free_mtdata(d);
d = NULL;
}
else
startRefValues = NULL;
- // If we're given a num_results function, we need to determine how many result objects we need. If
- // we don't have it, we assume it's just 1
- size_t numDestItems = ( testFns.NumResultsFn != NULL ) ? testFns.NumResultsFn( threadSize, dataType ) : 1;
+ // If we're given a num_results function, we need to determine how many
+ // result objects we need. If we don't have it, we assume it's just 1
+ size_t numDestItems = (testFns.NumResultsFn != NULL)
+ ? testFns.NumResultsFn(threadSize, dataType)
+ : 1;
- char * destItems = new char[ typeSize * numDestItems ];
- if( destItems == NULL )
+ char *destItems = new char[typeSize * numDestItems];
+ if (destItems == NULL)
{
- log_error( "ERROR: Unable to allocate memory!\n" );
+ log_error("ERROR: Unable to allocate memory!\n");
return -1;
}
- void * startValue = ( typeSize == 4 ) ? (void *)&testFns.mIntStartValue : (void *)&testFns.mLongStartValue;
- for( size_t i = 0; i < numDestItems; i++ )
- memcpy( destItems + i * typeSize, startValue, typeSize );
+ void *startValue = (typeSize == 4) ? (void *)&testFns.mIntStartValue
+ : (void *)&testFns.mLongStartValue;
+ for (size_t i = 0; i < numDestItems; i++)
+ memcpy(destItems + i * typeSize, startValue, typeSize);
streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
typeSize * numDestItems, destItems, NULL);
@@ -265,82 +298,96 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
}
/* Set the arguments */
- error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
- test_error( error, "Unable to set indexed kernel arguments" );
- error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
- test_error( error, "Unable to set indexed kernel arguments" );
+ error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+ test_error(error, "Unable to set indexed kernel arguments");
+ error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
+ test_error(error, "Unable to set indexed kernel arguments");
- if( isLocal )
+ if (isLocal)
{
- error = clSetKernelArg( kernel, 2, typeSize * numDestItems, NULL );
- test_error( error, "Unable to set indexed local kernel argument" );
+ error = clSetKernelArg(kernel, 2, typeSize * numDestItems, NULL);
+ test_error(error, "Unable to set indexed local kernel argument");
cl_int numDestItemsInt = (cl_int)numDestItems;
- error = clSetKernelArg( kernel, 3, sizeof( cl_int ), &numDestItemsInt );
- test_error( error, "Unable to set indexed kernel argument" );
+ error = clSetKernelArg(kernel, 3, sizeof(cl_int), &numDestItemsInt);
+ test_error(error, "Unable to set indexed kernel argument");
}
/* Run the kernel */
threads[0] = threadSize;
- error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &groupSize, 0, NULL, NULL );
- test_error( error, "Unable to execute test kernel" );
-
- error = clEnqueueReadBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
- test_error( error, "Unable to read result value!" );
-
- error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize * threadSize, refValues, 0, NULL, NULL );
- test_error( error, "Unable to read reference values!" );
-
- // If we have an expectedFn, then we need to generate a final value to compare against. If we don't
- // have one, it's because we're comparing ref values only
- if( testFns.ExpectedValueIntFn != NULL )
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, &groupSize,
+ 0, NULL, NULL);
+ test_error(error, "Unable to execute test kernel");
+
+ error =
+ clEnqueueReadBuffer(queue, streams[0], true, 0, typeSize * numDestItems,
+ destItems, 0, NULL, NULL);
+ test_error(error, "Unable to read result value!");
+
+ error =
+ clEnqueueReadBuffer(queue, streams[1], true, 0, typeSize * threadSize,
+ refValues, 0, NULL, NULL);
+ test_error(error, "Unable to read reference values!");
+
+ // If we have an expectedFn, then we need to generate a final value to
+ // compare against. If we don't have one, it's because we're comparing ref
+ // values only
+ if (testFns.ExpectedValueIntFn != NULL)
{
- for( size_t i = 0; i < numDestItems; i++ )
+ for (size_t i = 0; i < numDestItems; i++)
{
- char expected[ 8 ];
+ char expected[8];
cl_int intVal;
cl_long longVal;
- if( typeSize == 4 )
+ if (typeSize == 4)
{
// Int version
- intVal = testFns.ExpectedValueIntFn( threadSize, (cl_int *)startRefValues, i );
- memcpy( expected, &intVal, sizeof( intVal ) );
+ intVal = testFns.ExpectedValueIntFn(
+ threadSize, (cl_int *)startRefValues, i);
+ memcpy(expected, &intVal, sizeof(intVal));
}
else
{
// Long version
- longVal = testFns.ExpectedValueLongFn( threadSize, (cl_long *)startRefValues, i );
- memcpy( expected, &longVal, sizeof( longVal ) );
+ longVal = testFns.ExpectedValueLongFn(
+ threadSize, (cl_long *)startRefValues, i);
+ memcpy(expected, &longVal, sizeof(longVal));
}
- if( memcmp( expected, destItems + i * typeSize, typeSize ) != 0 )
+ if (memcmp(expected, destItems + i * typeSize, typeSize) != 0)
{
- if( typeSize == 4 )
+ if (typeSize == 4)
{
- cl_int *outValue = (cl_int *)( destItems + i * typeSize );
- log_error( "ERROR: Result %ld from kernel does not validate! (should be %d, was %d)\n", i, intVal, *outValue );
+ cl_int *outValue = (cl_int *)(destItems + i * typeSize);
+ log_error("ERROR: Result %ld from kernel does not "
+ "validate! (should be %d, was %d)\n",
+ i, intVal, *outValue);
cl_int *startRefs = (cl_int *)startRefValues;
cl_int *refs = (cl_int *)refValues;
- for( i = 0; i < threadSize; i++ )
+ for (i = 0; i < threadSize; i++)
{
- if( startRefs != NULL )
- log_info( " --- %ld - %d --- %d\n", i, startRefs[i], refs[i] );
+ if (startRefs != NULL)
+ log_info(" --- %ld - %d --- %d\n", i, startRefs[i],
+ refs[i]);
else
- log_info( " --- %ld --- %d\n", i, refs[i] );
+ log_info(" --- %ld --- %d\n", i, refs[i]);
}
}
else
{
- cl_long *outValue = (cl_long *)( destItems + i * typeSize );
- log_error( "ERROR: Result %ld from kernel does not validate! (should be %lld, was %lld)\n", i, longVal, *outValue );
+ cl_long *outValue = (cl_long *)(destItems + i * typeSize);
+ log_error("ERROR: Result %ld from kernel does not "
+ "validate! (should be %lld, was %lld)\n",
+ i, longVal, *outValue);
cl_long *startRefs = (cl_long *)startRefValues;
cl_long *refs = (cl_long *)refValues;
- for( i = 0; i < threadSize; i++ )
+ for (i = 0; i < threadSize; i++)
{
- if( startRefs != NULL )
- log_info( " --- %ld - %lld --- %lld\n", i, startRefs[i], refs[i] );
+ if (startRefs != NULL)
+ log_info(" --- %ld - %lld --- %lld\n", i,
+ startRefs[i], refs[i]);
else
- log_info( " --- %ld --- %lld\n", i, refs[i] );
+ log_info(" --- %ld --- %lld\n", i, refs[i]);
}
}
return -1;
@@ -348,104 +395,140 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
}
}
- if( testFns.VerifyRefsIntFn != NULL )
+ if (testFns.VerifyRefsIntFn != NULL)
{
/* Use the verify function to also check the results */
- if( dataType == kFloat )
+ if (dataType == kFloat)
{
cl_float *outValue = (cl_float *)destItems;
- if( !testFns.VerifyRefsFloatFn( threadSize, (cl_float *)refValues, *outValue ) != 0 )
+ if (!testFns.VerifyRefsFloatFn(threadSize, (cl_float *)refValues,
+ *outValue)
+ != 0)
{
- log_error( "ERROR: Reference values did not validate!\n" );
+ log_error("ERROR: Reference values did not validate!\n");
return -1;
}
}
- else if( typeSize == 4 )
+ else if (typeSize == 4)
{
cl_int *outValue = (cl_int *)destItems;
- if( !testFns.VerifyRefsIntFn( threadSize, (cl_int *)refValues, *outValue ) != 0 )
+ if (!testFns.VerifyRefsIntFn(threadSize, (cl_int *)refValues,
+ *outValue)
+ != 0)
{
- log_error( "ERROR: Reference values did not validate!\n" );
+ log_error("ERROR: Reference values did not validate!\n");
return -1;
}
}
else
{
cl_long *outValue = (cl_long *)destItems;
- if( !testFns.VerifyRefsLongFn( threadSize, (cl_long *)refValues, *outValue ) != 0 )
+ if (!testFns.VerifyRefsLongFn(threadSize, (cl_long *)refValues,
+ *outValue)
+ != 0)
{
- log_error( "ERROR: Reference values did not validate!\n" );
+ log_error("ERROR: Reference values did not validate!\n");
return -1;
}
}
}
- else if( testFns.ExpectedValueIntFn == NULL )
+ else if (testFns.ExpectedValueIntFn == NULL)
{
- log_error( "ERROR: Test doesn't check total or refs; no values are verified!\n" );
+ log_error("ERROR: Test doesn't check total or refs; no values are "
+ "verified!\n");
return -1;
}
/* Re-write the starting value */
- for( size_t i = 0; i < numDestItems; i++ )
- memcpy( destItems + i * typeSize, startValue, typeSize );
- error = clEnqueueWriteBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
- test_error( error, "Unable to write starting values!" );
-
- /* Run the kernel once for a single thread, so we can verify that the returned value is the original one */
+ for (size_t i = 0; i < numDestItems; i++)
+ memcpy(destItems + i * typeSize, startValue, typeSize);
+ error =
+ clEnqueueWriteBuffer(queue, streams[0], true, 0,
+ typeSize * numDestItems, destItems, 0, NULL, NULL);
+ test_error(error, "Unable to write starting values!");
+
+ /* Run the kernel once for a single thread, so we can verify that the
+ * returned value is the original one */
threads[0] = 1;
- error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, threads, 0, NULL, NULL );
- test_error( error, "Unable to execute test kernel" );
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, threads, 0,
+ NULL, NULL);
+ test_error(error, "Unable to execute test kernel");
- error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize, refValues, 0, NULL, NULL );
- test_error( error, "Unable to read reference values!" );
+ error = clEnqueueReadBuffer(queue, streams[1], true, 0, typeSize, refValues,
+ 0, NULL, NULL);
+ test_error(error, "Unable to read reference values!");
- if( memcmp( refValues, destItems, typeSize ) != 0 )
+ if (memcmp(refValues, destItems, typeSize) != 0)
{
- if( typeSize == 4 )
+ if (typeSize == 4)
{
cl_int *s = (cl_int *)destItems;
cl_int *r = (cl_int *)refValues;
- log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
- " (should have been %d, returned %d)!\n", *s, *r );
+ log_error("ERROR: atomic function operated correctly but did NOT "
+ "return correct 'old' value "
+ " (should have been %d, returned %d)!\n",
+ *s, *r);
}
else
{
cl_long *s = (cl_long *)destItems;
cl_long *r = (cl_long *)refValues;
- log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
- " (should have been %lld, returned %lld)!\n", *s, *r );
+ log_error("ERROR: atomic function operated correctly but did NOT "
+ "return correct 'old' value "
+ " (should have been %lld, returned %lld)!\n",
+ *s, *r);
}
return -1;
}
- delete [] destItems;
- free( refValues );
- if( startRefValues != NULL )
- free( startRefValues );
+ delete[] destItems;
+ free(refValues);
+ if (startRefValues != NULL) free(startRefValues);
return 0;
}
-int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
- TestFns testFns,
- bool extended, bool matchGroupSize, bool usingAtomicPrefix )
+int test_atomic_function_set(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ const char *programCore, TestFns testFns,
+ bool extended, bool matchGroupSize,
+ bool usingAtomicPrefix)
{
- log_info(" Testing %s functions...\n", usingAtomicPrefix ? "atomic_" : "atom_");
+ log_info(" Testing %s functions...\n",
+ usingAtomicPrefix ? "atomic_" : "atom_");
int errors = 0;
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kInt, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kUInt, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kInt, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kUInt, matchGroupSize );
-
- // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64 bit functions still use the "atom" prefix.
- // The argument usingAtomicPrefix is set to true if programCore was generated with the "atomic" prefix.
- if (!usingAtomicPrefix) {
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kLong, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kULong, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kLong, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kULong, matchGroupSize );
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, false, kInt,
+ matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, false, kUInt,
+ matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, true, kInt,
+ matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, true, kUInt,
+ matchGroupSize);
+
+ // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64
+ // bit functions still use the "atom" prefix. The argument usingAtomicPrefix
+ // is set to true if programCore was generated with the "atomic" prefix.
+ if (!usingAtomicPrefix)
+ {
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, false,
+ kLong, matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, false,
+ kULong, matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, true,
+ kLong, matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, true,
+ kULong, matchGroupSize);
}
return errors;
@@ -454,265 +537,345 @@ int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_comma
#pragma mark ---- add
const char atom_add_core[] =
-" oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
-" atom_add( &destMemory[0], tid + 3 );\n"
-" atom_add( &destMemory[0], tid + 3 );\n"
-" atom_add( &destMemory[0], tid + 3 );\n";
+ " oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
+ " atom_add( &destMemory[0], tid + 3 );\n"
+ " atom_add( &destMemory[0], tid + 3 );\n"
+ " atom_add( &destMemory[0], tid + 3 );\n";
const char atomic_add_core[] =
-" oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
-" atomic_add( &destMemory[0], tid + 3 );\n"
-" atomic_add( &destMemory[0], tid + 3 );\n"
-" atomic_add( &destMemory[0], tid + 3 );\n";
+ " oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
+ " atomic_add( &destMemory[0], tid + 3 );\n"
+ " atomic_add( &destMemory[0], tid + 3 );\n"
+ " atomic_add( &destMemory[0], tid + 3 );\n";
-cl_int test_atomic_add_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_add_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
cl_int total = 0;
- for( size_t i = 0; i < size; i++ )
- total += ( (cl_int)i + 3 ) * 4;
+ for (size_t i = 0; i < size; i++) total += ((cl_int)i + 3) * 4;
return total;
}
-cl_long test_atomic_add_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_add_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
cl_long total = 0;
- for( size_t i = 0; i < size; i++ )
- total += ( ( i + 3 ) * 4 );
+ for (size_t i = 0; i < size; i++) total += ((i + 3) * 4);
return total;
}
-int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_add(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0, 0LL, NULL, test_atomic_add_result_int, NULL, NULL, test_atomic_add_result_long, NULL, NULL };
-
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ TestFns set = { 0,
+ 0LL,
+ NULL,
+ test_atomic_add_result_int,
+ NULL,
+ NULL,
+ test_atomic_add_result_long,
+ NULL,
+ NULL };
+
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_add_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
+ return -1;
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_add_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
- return -1;
return 0;
}
#pragma mark ---- sub
-const char atom_sub_core[] = " oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n";
+const char atom_sub_core[] =
+ " oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n";
-const char atomic_sub_core[] = " oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n";
+const char atomic_sub_core[] =
+ " oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n";
-cl_int test_atomic_sub_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_sub_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
cl_int total = INT_TEST_VALUE;
- for( size_t i = 0; i < size; i++ )
- total -= (cl_int)i + 3;
+ for (size_t i = 0; i < size; i++) total -= (cl_int)i + 3;
return total;
}
-cl_long test_atomic_sub_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_sub_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
cl_long total = LONG_TEST_VALUE;
- for( size_t i = 0; i < size; i++ )
- total -= i + 3;
+ for (size_t i = 0; i < size; i++) total -= i + 3;
return total;
}
-int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_sub(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_sub_result_int, NULL, NULL, test_atomic_sub_result_long, NULL, NULL };
-
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ TestFns set = { INT_TEST_VALUE,
+ LONG_TEST_VALUE,
+ NULL,
+ test_atomic_sub_result_int,
+ NULL,
+ NULL,
+ test_atomic_sub_result_long,
+ NULL,
+ NULL };
+
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_sub_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_sub_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
#pragma mark ---- xchg
-const char atom_xchg_core[] = " oldValues[tid] = atom_xchg( &destMemory[0], tid );\n";
+const char atom_xchg_core[] =
+ " oldValues[tid] = atom_xchg( &destMemory[0], tid );\n";
-const char atomic_xchg_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
-const char atomic_xchg_float_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
+const char atomic_xchg_core[] =
+ " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
+const char atomic_xchg_float_core[] =
+ " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
-bool test_atomic_xchg_verify_int( size_t size, cl_int *refValues, cl_int finalValue )
+bool test_atomic_xchg_verify_int(size_t size, cl_int *refValues,
+ cl_int finalValue)
{
- /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+ /* For xchg, each value from 0 to size - 1 should have an entry in the ref
+ * array, and ONLY one entry */
char *valids;
size_t i;
char originalValidCount = 0;
- valids = (char *)malloc( sizeof( char ) * size );
- memset( valids, 0, sizeof( char ) * size );
+ valids = (char *)malloc(sizeof(char) * size);
+ memset(valids, 0, sizeof(char) * size);
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- if( refValues[ i ] == INT_TEST_VALUE )
+ if (refValues[i] == INT_TEST_VALUE)
{
// Special initial value
originalValidCount++;
continue;
}
- if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+ if (refValues[i] < 0 || (size_t)refValues[i] >= size)
{
- log_error( "ERROR: Reference value %ld outside of valid range! (%d)\n", i, refValues[ i ] );
+ log_error(
+ "ERROR: Reference value %ld outside of valid range! (%d)\n", i,
+ refValues[i]);
return false;
}
- valids[ refValues[ i ] ] ++;
+ valids[refValues[i]]++;
}
- /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
- the final value outputted */
- if( valids[ finalValue ] > 0 )
+ /* Note: ONE entry will have zero count. It'll be the last one that
+ executed, because that value should be the final value outputted */
+ if (valids[finalValue] > 0)
{
- log_error( "ERROR: Final value %d was also in ref list!\n", finalValue );
+ log_error("ERROR: Final value %d was also in ref list!\n", finalValue);
return false;
}
else
- valids[ finalValue ] = 1; // So the following loop will be okay
+ valids[finalValue] = 1; // So the following loop will be okay
/* Now check that every entry has one and only one count */
- if( originalValidCount != 1 )
+ if (originalValidCount != 1)
{
- log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+ log_error("ERROR: Starting reference value %d did not occur "
+ "once-and-only-once (occurred %d)\n",
+ 65191, originalValidCount);
return false;
}
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- if( valids[ i ] != 1 )
+ if (valids[i] != 1)
{
- log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
- for( size_t j = 0; j < size; j++ )
- log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+ log_error("ERROR: Reference value %ld did not occur "
+ "once-and-only-once (occurred %d)\n",
+ i, valids[i]);
+ for (size_t j = 0; j < size; j++)
+ log_info("%d: %d\n", (int)j, (int)valids[j]);
return false;
}
}
- free( valids );
+ free(valids);
return true;
}
-bool test_atomic_xchg_verify_long( size_t size, cl_long *refValues, cl_long finalValue )
+bool test_atomic_xchg_verify_long(size_t size, cl_long *refValues,
+ cl_long finalValue)
{
- /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+ /* For xchg, each value from 0 to size - 1 should have an entry in the ref
+ * array, and ONLY one entry */
char *valids;
size_t i;
char originalValidCount = 0;
- valids = (char *)malloc( sizeof( char ) * size );
- memset( valids, 0, sizeof( char ) * size );
+ valids = (char *)malloc(sizeof(char) * size);
+ memset(valids, 0, sizeof(char) * size);
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- if( refValues[ i ] == LONG_TEST_VALUE )
+ if (refValues[i] == LONG_TEST_VALUE)
{
// Special initial value
originalValidCount++;
continue;
}
- if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+ if (refValues[i] < 0 || (size_t)refValues[i] >= size)
{
- log_error( "ERROR: Reference value %ld outside of valid range! (%lld)\n", i, refValues[ i ] );
+ log_error(
+ "ERROR: Reference value %ld outside of valid range! (%lld)\n",
+ i, refValues[i]);
return false;
}
- valids[ refValues[ i ] ] ++;
+ valids[refValues[i]]++;
}
- /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
- the final value outputted */
- if( valids[ finalValue ] > 0 )
+ /* Note: ONE entry will have zero count. It'll be the last one that
+ executed, because that value should be the final value outputted */
+ if (valids[finalValue] > 0)
{
- log_error( "ERROR: Final value %lld was also in ref list!\n", finalValue );
+ log_error("ERROR: Final value %lld was also in ref list!\n",
+ finalValue);
return false;
}
else
- valids[ finalValue ] = 1; // So the following loop will be okay
+ valids[finalValue] = 1; // So the following loop will be okay
/* Now check that every entry has one and only one count */
- if( originalValidCount != 1 )
+ if (originalValidCount != 1)
{
- log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+ log_error("ERROR: Starting reference value %d did not occur "
+ "once-and-only-once (occurred %d)\n",
+ 65191, originalValidCount);
return false;
}
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- if( valids[ i ] != 1 )
+ if (valids[i] != 1)
{
- log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
- for( size_t j = 0; j < size; j++ )
- log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+ log_error("ERROR: Reference value %ld did not occur "
+ "once-and-only-once (occurred %d)\n",
+ i, valids[i]);
+ for (size_t j = 0; j < size; j++)
+ log_info("%d: %d\n", (int)j, (int)valids[j]);
return false;
}
}
- free( valids );
+ free(valids);
return true;
}
-bool test_atomic_xchg_verify_float( size_t size, cl_float *refValues, cl_float finalValue )
+bool test_atomic_xchg_verify_float(size_t size, cl_float *refValues,
+ cl_float finalValue)
{
- /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+ /* For xchg, each value from 0 to size - 1 should have an entry in the ref
+ * array, and ONLY one entry */
char *valids;
size_t i;
char originalValidCount = 0;
- valids = (char *)malloc( sizeof( char ) * size );
- memset( valids, 0, sizeof( char ) * size );
+ valids = (char *)malloc(sizeof(char) * size);
+ memset(valids, 0, sizeof(char) * size);
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- cl_int *intRefValue = (cl_int *)( &refValues[ i ] );
- if( *intRefValue == INT_TEST_VALUE )
+ cl_int *intRefValue = (cl_int *)(&refValues[i]);
+ if (*intRefValue == INT_TEST_VALUE)
{
// Special initial value
originalValidCount++;
continue;
}
- if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+ if (refValues[i] < 0 || (size_t)refValues[i] >= size)
{
- log_error( "ERROR: Reference value %ld outside of valid range! (%a)\n", i, refValues[ i ] );
+ log_error(
+ "ERROR: Reference value %ld outside of valid range! (%a)\n", i,
+ refValues[i]);
return false;
}
- valids[ (int)refValues[ i ] ] ++;
+ valids[(int)refValues[i]]++;
}
- /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
- the final value outputted */
- if( valids[ (int)finalValue ] > 0 )
+ /* Note: ONE entry will have zero count. It'll be the last one that
+ executed, because that value should be the final value outputted */
+ if (valids[(int)finalValue] > 0)
{
- log_error( "ERROR: Final value %a was also in ref list!\n", finalValue );
+ log_error("ERROR: Final value %a was also in ref list!\n", finalValue);
return false;
}
else
- valids[ (int)finalValue ] = 1; // So the following loop will be okay
+ valids[(int)finalValue] = 1; // So the following loop will be okay
/* Now check that every entry has one and only one count */
- if( originalValidCount != 1 )
+ if (originalValidCount != 1)
{
- log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+ log_error("ERROR: Starting reference value %d did not occur "
+ "once-and-only-once (occurred %d)\n",
+ 65191, originalValidCount);
return false;
}
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- if( valids[ i ] != 1 )
+ if (valids[i] != 1)
{
- log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
- for( size_t j = 0; j < size; j++ )
- log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+ log_error("ERROR: Reference value %ld did not occur "
+ "once-and-only-once (occurred %d)\n",
+ i, valids[i]);
+ for (size_t j = 0; j < size; j++)
+ log_info("%d: %d\n", (int)j, (int)valids[j]);
return false;
}
}
- free( valids );
+ free(valids);
return true;
}
-int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_xchg(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, NULL, NULL, test_atomic_xchg_verify_int, NULL, NULL, test_atomic_xchg_verify_long, NULL, NULL, test_atomic_xchg_verify_float };
-
- int errors = test_atomic_function_set( deviceID, context, queue, num_elements, atom_xchg_core, set, false, true, /*usingAtomicPrefix*/ false );
- errors |= test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xchg_core, set, false, true, /*usingAtomicPrefix*/ true );
-
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, false, kFloat, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, true, kFloat, true );
+ TestFns set = { INT_TEST_VALUE,
+ LONG_TEST_VALUE,
+ NULL,
+ NULL,
+ NULL,
+ test_atomic_xchg_verify_int,
+ NULL,
+ NULL,
+ test_atomic_xchg_verify_long,
+ NULL,
+ NULL,
+ test_atomic_xchg_verify_float };
+
+ int errors = test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_xchg_core, set, false,
+ true, /*usingAtomicPrefix*/ false);
+ errors |= test_atomic_function_set(deviceID, context, queue, num_elements,
+ atomic_xchg_core, set, false, true,
+ /*usingAtomicPrefix*/ true);
+
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_xchg_float_core, set, false, false,
+ kFloat, true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_xchg_float_core, set, false, true,
+ kFloat, true);
return errors;
}
@@ -720,51 +883,71 @@ int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue
#pragma mark ---- min
-const char atom_min_core[] = " oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n";
+const char atom_min_core[] =
+ " oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n";
-const char atomic_min_core[] = " oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n";
+const char atomic_min_core[] =
+ " oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n";
-cl_int test_atomic_min_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_min_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
cl_int total = 0x7fffffffL;
- for( size_t i = 0; i < size; i++ )
+ for (size_t i = 0; i < size; i++)
{
- if( startRefValues[ i ] < total )
- total = startRefValues[ i ];
+ if (startRefValues[i] < total) total = startRefValues[i];
}
return total;
}
-void test_atomic_min_gen_int( size_t size, cl_int *startRefValues, MTdata d )
+void test_atomic_min_gen_int(size_t size, cl_int *startRefValues, MTdata d)
{
- for( size_t i = 0; i < size; i++ )
- startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
+ for (size_t i = 0; i < size; i++)
+ startRefValues[i] =
+ (cl_int)(genrand_int32(d) % 0x3fffffff) + 0x3fffffff;
}
-cl_long test_atomic_min_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_min_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
cl_long total = 0x7fffffffffffffffLL;
- for( size_t i = 0; i < size; i++ )
+ for (size_t i = 0; i < size; i++)
{
- if( startRefValues[ i ] < total )
- total = startRefValues[ i ];
+ if (startRefValues[i] < total) total = startRefValues[i];
}
return total;
}
-void test_atomic_min_gen_long( size_t size, cl_long *startRefValues, MTdata d )
+void test_atomic_min_gen_long(size_t size, cl_long *startRefValues, MTdata d)
{
- for( size_t i = 0; i < size; i++ )
- startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
+ for (size_t i = 0; i < size; i++)
+ startRefValues[i] =
+ (cl_long)(genrand_int32(d)
+ | (((cl_long)genrand_int32(d) & 0x7fffffffL) << 16));
}
-int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_min(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0x7fffffffL, 0x7fffffffffffffffLL, NULL, test_atomic_min_result_int, test_atomic_min_gen_int, NULL, test_atomic_min_result_long, test_atomic_min_gen_long, NULL };
-
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ TestFns set = { 0x7fffffffL,
+ 0x7fffffffffffffffLL,
+ NULL,
+ test_atomic_min_result_int,
+ test_atomic_min_gen_int,
+ NULL,
+ test_atomic_min_result_long,
+ test_atomic_min_gen_long,
+ NULL };
+
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_min_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_min_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
@@ -772,79 +955,118 @@ int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue
#pragma mark ---- max
-const char atom_max_core[] = " oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n";
+const char atom_max_core[] =
+ " oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n";
-const char atomic_max_core[] = " oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n";
+const char atomic_max_core[] =
+ " oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n";
-cl_int test_atomic_max_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_max_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
cl_int total = 0;
- for( size_t i = 0; i < size; i++ )
+ for (size_t i = 0; i < size; i++)
{
- if( startRefValues[ i ] > total )
- total = startRefValues[ i ];
+ if (startRefValues[i] > total) total = startRefValues[i];
}
return total;
}
-void test_atomic_max_gen_int( size_t size, cl_int *startRefValues, MTdata d )
+void test_atomic_max_gen_int(size_t size, cl_int *startRefValues, MTdata d)
{
- for( size_t i = 0; i < size; i++ )
- startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
+ for (size_t i = 0; i < size; i++)
+ startRefValues[i] =
+ (cl_int)(genrand_int32(d) % 0x3fffffff) + 0x3fffffff;
}
-cl_long test_atomic_max_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_max_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
cl_long total = 0;
- for( size_t i = 0; i < size; i++ )
+ for (size_t i = 0; i < size; i++)
{
- if( startRefValues[ i ] > total )
- total = startRefValues[ i ];
+ if (startRefValues[i] > total) total = startRefValues[i];
}
return total;
}
-void test_atomic_max_gen_long( size_t size, cl_long *startRefValues, MTdata d )
+void test_atomic_max_gen_long(size_t size, cl_long *startRefValues, MTdata d)
{
- for( size_t i = 0; i < size; i++ )
- startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
+ for (size_t i = 0; i < size; i++)
+ startRefValues[i] =
+ (cl_long)(genrand_int32(d)
+ | (((cl_long)genrand_int32(d) & 0x7fffffffL) << 16));
}
-int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_max(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0, 0, NULL, test_atomic_max_result_int, test_atomic_max_gen_int, NULL, test_atomic_max_result_long, test_atomic_max_gen_long, NULL };
-
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ TestFns set = { 0,
+ 0,
+ NULL,
+ test_atomic_max_result_int,
+ test_atomic_max_gen_int,
+ NULL,
+ test_atomic_max_result_long,
+ test_atomic_max_gen_long,
+ NULL };
+
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_max_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
+ return -1;
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_max_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
- return -1;
return 0;
}
#pragma mark ---- inc
-const char atom_inc_core[] = " oldValues[tid] = atom_inc( &destMemory[0] );\n";
+const char atom_inc_core[] =
+ " oldValues[tid] = atom_inc( &destMemory[0] );\n";
-const char atomic_inc_core[] = " oldValues[tid] = atomic_inc( &destMemory[0] );\n";
+const char atomic_inc_core[] =
+ " oldValues[tid] = atomic_inc( &destMemory[0] );\n";
-cl_int test_atomic_inc_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_inc_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
return INT_TEST_VALUE + (cl_int)size;
}
-cl_long test_atomic_inc_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_inc_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
return LONG_TEST_VALUE + size;
}
-int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_inc(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_inc_result_int, NULL, NULL, test_atomic_inc_result_long, NULL, NULL };
-
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ TestFns set = { INT_TEST_VALUE,
+ LONG_TEST_VALUE,
+ NULL,
+ test_atomic_inc_result_int,
+ NULL,
+ NULL,
+ test_atomic_inc_result_long,
+ NULL,
+ NULL };
+
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_inc_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_inc_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
@@ -852,27 +1074,46 @@ int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue
#pragma mark ---- dec
-const char atom_dec_core[] = " oldValues[tid] = atom_dec( &destMemory[0] );\n";
+const char atom_dec_core[] =
+ " oldValues[tid] = atom_dec( &destMemory[0] );\n";
-const char atomic_dec_core[] = " oldValues[tid] = atomic_dec( &destMemory[0] );\n";
+const char atomic_dec_core[] =
+ " oldValues[tid] = atomic_dec( &destMemory[0] );\n";
-cl_int test_atomic_dec_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_dec_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
return INT_TEST_VALUE - (cl_int)size;
}
-cl_long test_atomic_dec_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_dec_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
return LONG_TEST_VALUE - size;
}
-int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_dec(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_dec_result_int, NULL, NULL, test_atomic_dec_result_long, NULL, NULL };
-
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ TestFns set = { INT_TEST_VALUE,
+ LONG_TEST_VALUE,
+ NULL,
+ test_atomic_dec_result_int,
+ NULL,
+ NULL,
+ test_atomic_dec_result_long,
+ NULL,
+ NULL };
+
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_dec_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_dec_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
@@ -881,129 +1122,159 @@ int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue
#pragma mark ---- cmpxchg
/* We test cmpxchg by implementing (the long way) atom_add */
+// clang-format off
const char atom_cmpxchg_core[] =
-" int oldValue, origValue, newValue;\n"
-" do { \n"
-" origValue = destMemory[0];\n"
-" newValue = origValue + tid + 2;\n"
-" oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
-" } while( oldValue != origValue );\n"
-" oldValues[tid] = oldValue;\n"
-;
+ " int oldValue, origValue, newValue;\n"
+ " do { \n"
+ " origValue = destMemory[0];\n"
+ " newValue = origValue + tid + 2;\n"
+ " oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
+ " } while( oldValue != origValue );\n"
+ " oldValues[tid] = oldValue;\n";
const char atom_cmpxchg64_core[] =
-" long oldValue, origValue, newValue;\n"
-" do { \n"
-" origValue = destMemory[0];\n"
-" newValue = origValue + tid + 2;\n"
-" oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
-" } while( oldValue != origValue );\n"
-" oldValues[tid] = oldValue;\n"
-;
+ " long oldValue, origValue, newValue;\n"
+ " do { \n"
+ " origValue = destMemory[0];\n"
+ " newValue = origValue + tid + 2;\n"
+ " oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
+ " } while( oldValue != origValue );\n"
+ " oldValues[tid] = oldValue;\n";
const char atomic_cmpxchg_core[] =
-" int oldValue, origValue, newValue;\n"
-" do { \n"
-" origValue = destMemory[0];\n"
-" newValue = origValue + tid + 2;\n"
-" oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n"
-" } while( oldValue != origValue );\n"
-" oldValues[tid] = oldValue;\n"
-;
-
-cl_int test_atomic_cmpxchg_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+ " int oldValue, origValue, newValue;\n"
+ " do { \n"
+ " origValue = destMemory[0];\n"
+ " newValue = origValue + tid + 2;\n"
+ " oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n"
+ " } while( oldValue != origValue );\n"
+ " oldValues[tid] = oldValue;\n";
+// clang-format on
+
+cl_int test_atomic_cmpxchg_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
cl_int total = INT_TEST_VALUE;
- for( size_t i = 0; i < size; i++ )
- total += (cl_int)i + 2;
+ for (size_t i = 0; i < size; i++) total += (cl_int)i + 2;
return total;
}
-cl_long test_atomic_cmpxchg_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_cmpxchg_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
cl_long total = LONG_TEST_VALUE;
- for( size_t i = 0; i < size; i++ )
- total += i + 2;
+ for (size_t i = 0; i < size; i++) total += i + 2;
return total;
}
-int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_cmpxchg_result_int, NULL, NULL, test_atomic_cmpxchg_result_long, NULL, NULL };
+ TestFns set = { INT_TEST_VALUE,
+ LONG_TEST_VALUE,
+ NULL,
+ test_atomic_cmpxchg_result_int,
+ NULL,
+ NULL,
+ test_atomic_cmpxchg_result_long,
+ NULL,
+ NULL };
int errors = 0;
log_info(" Testing atom_ functions...\n");
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kUInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kUInt, true );
-
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kLong, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kULong, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kLong, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kULong, true );
+ errors |=
+ test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg_core, set, false, false, kInt, true);
+ errors |=
+ test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg_core, set, false, false, kUInt, true);
+ errors |=
+ test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg_core, set, false, true, kInt, true);
+ errors |=
+ test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg_core, set, false, true, kUInt, true);
+
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg64_core, set, false, false,
+ kLong, true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg64_core, set, false, false,
+ kULong, true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg64_core, set, false, true, kLong,
+ true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg64_core, set, false, true,
+ kULong, true);
log_info(" Testing atomic_ functions...\n");
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kUInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kUInt, true );
-
- if( errors )
- return -1;
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_cmpxchg_core, set, false, false, kInt,
+ true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_cmpxchg_core, set, false, false,
+ kUInt, true);
+ errors |=
+ test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_cmpxchg_core, set, false, true, kInt, true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_cmpxchg_core, set, false, true, kUInt,
+ true);
+
+ if (errors) return -1;
return 0;
}
#pragma mark -------- Bitwise functions
-size_t test_bitwise_num_results( size_t threadCount, ExplicitType dataType )
+size_t test_bitwise_num_results(size_t threadCount, ExplicitType dataType)
{
- size_t numBits = get_explicit_type_size( dataType ) * 8;
+ size_t numBits = get_explicit_type_size(dataType) * 8;
- return ( threadCount + numBits - 1 ) / numBits;
+ return (threadCount + numBits - 1) / numBits;
}
#pragma mark ---- and
+// clang-format off
const char atom_and_core[] =
-" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-" int whichResult = tid / numBits;\n"
-" int bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-" oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
-;
+ " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+ " int whichResult = tid / numBits;\n"
+ " int bitIndex = tid - ( whichResult * numBits );\n"
+ "\n"
+ " oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n";
const char atomic_and_core[] =
-" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-" int whichResult = tid / numBits;\n"
-" int bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-" oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
-;
+ " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+ " int whichResult = tid / numBits;\n"
+ " int bitIndex = tid - ( whichResult * numBits );\n"
+ "\n"
+ " oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n";
+// clang-format on
-cl_int test_atomic_and_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+cl_int test_atomic_and_result_int(size_t size, cl_int *startRefValues,
+ size_t whichResult)
{
- size_t numThreads = ( (size_t)size + 31 ) / 32;
- if( whichResult < numThreads - 1 )
- return 0;
+ size_t numThreads = ((size_t)size + 31) / 32;
+ if (whichResult < numThreads - 1) return 0;
// Last item doesn't get and'ed on every bit, so we have to mask away
size_t numBits = (size_t)size - whichResult * 32;
cl_int bits = (cl_int)0xffffffffL;
- for( size_t i = 0; i < numBits; i++ )
- bits &= ~( 1 << i );
+ for (size_t i = 0; i < numBits; i++) bits &= ~(1 << i);
return bits;
}
-cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+cl_long test_atomic_and_result_long(size_t size, cl_long *startRefValues,
+ size_t whichResult)
{
- size_t numThreads = ( (size_t)size + 63 ) / 64;
- if( whichResult < numThreads - 1 )
- return 0;
+ size_t numThreads = ((size_t)size + 63) / 64;
+ if (whichResult < numThreads - 1) return 0;
// Last item doesn't get and'ed on every bit, so we have to mask away
size_t numBits = (size_t)size - whichResult * 64;
@@ -1013,14 +1284,28 @@ cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_
return bits;
}
-int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_and(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0xffffffff, 0xffffffffffffffffLL, test_bitwise_num_results,
- test_atomic_and_result_int, NULL, NULL, test_atomic_and_result_long, NULL, NULL };
-
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ TestFns set = { 0xffffffff,
+ 0xffffffffffffffffLL,
+ test_bitwise_num_results,
+ test_atomic_and_result_int,
+ NULL,
+ NULL,
+ test_atomic_and_result_long,
+ NULL,
+ NULL };
+
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_and_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_and_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
@@ -1028,59 +1313,68 @@ int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue
#pragma mark ---- or
+// clang-format off
const char atom_or_core[] =
-" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-" int whichResult = tid / numBits;\n"
-" int bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-" oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
-;
+ " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+ " int whichResult = tid / numBits;\n"
+ " int bitIndex = tid - ( whichResult * numBits );\n"
+ "\n"
+ " oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n";
const char atomic_or_core[] =
-" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-" int whichResult = tid / numBits;\n"
-" int bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-" oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
-;
-
-cl_int test_atomic_or_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+ " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+ " int whichResult = tid / numBits;\n"
+ " int bitIndex = tid - ( whichResult * numBits );\n"
+ "\n"
+ " oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n";
+// clang-format on
+
+cl_int test_atomic_or_result_int(size_t size, cl_int *startRefValues,
+ size_t whichResult)
{
- size_t numThreads = ( (size_t)size + 31 ) / 32;
- if( whichResult < numThreads - 1 )
- return 0xffffffff;
+ size_t numThreads = ((size_t)size + 31) / 32;
+ if (whichResult < numThreads - 1) return 0xffffffff;
// Last item doesn't get and'ed on every bit, so we have to mask away
size_t numBits = (size_t)size - whichResult * 32;
cl_int bits = 0;
- for( size_t i = 0; i < numBits; i++ )
- bits |= ( 1 << i );
+ for (size_t i = 0; i < numBits; i++) bits |= (1 << i);
return bits;
}
-cl_long test_atomic_or_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+cl_long test_atomic_or_result_long(size_t size, cl_long *startRefValues,
+ size_t whichResult)
{
- size_t numThreads = ( (size_t)size + 63 ) / 64;
- if( whichResult < numThreads - 1 )
- return 0x0ffffffffffffffffLL;
+ size_t numThreads = ((size_t)size + 63) / 64;
+ if (whichResult < numThreads - 1) return 0x0ffffffffffffffffLL;
// Last item doesn't get and'ed on every bit, so we have to mask away
size_t numBits = (size_t)size - whichResult * 64;
cl_long bits = 0;
- for( size_t i = 0; i < numBits; i++ )
- bits |= ( 1LL << i );
+ for (size_t i = 0; i < numBits; i++) bits |= (1LL << i);
return bits;
}
-int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_or(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int, NULL, NULL, test_atomic_or_result_long, NULL, NULL };
+ TestFns set = {
+ 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int,
+ NULL, NULL, test_atomic_or_result_long, NULL,
+ NULL
+ };
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_or_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_or_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
@@ -1100,33 +1394,44 @@ const char atomic_xor_core[] =
"\n"
" oldValues[tid] = atomic_xor( &destMemory[0], 1L << bitIndex );\n";
-cl_int test_atomic_xor_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+cl_int test_atomic_xor_result_int(size_t size, cl_int *startRefValues,
+ size_t whichResult)
{
cl_int total = 0x2f08ab41;
- for( size_t i = 0; i < size; i++ )
- total ^= ( 1 << ( i & 31 ) );
+ for (size_t i = 0; i < size; i++) total ^= (1 << (i & 31));
return total;
}
-cl_long test_atomic_xor_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+cl_long test_atomic_xor_result_long(size_t size, cl_long *startRefValues,
+ size_t whichResult)
{
cl_long total = 0x2f08ab418ba0541LL;
- for( size_t i = 0; i < size; i++ )
- total ^= ( 1LL << ( i & 63 ) );
+ for (size_t i = 0; i < size; i++) total ^= (1LL << (i & 63));
return total;
}
-int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_xor(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0x2f08ab41, 0x2f08ab418ba0541LL, NULL, test_atomic_xor_result_int, NULL, NULL, test_atomic_xor_result_long, NULL, NULL };
-
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ TestFns set = { 0x2f08ab41,
+ 0x2f08ab418ba0541LL,
+ NULL,
+ test_atomic_xor_result_int,
+ NULL,
+ NULL,
+ test_atomic_xor_result_long,
+ NULL,
+ NULL };
+
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_xor_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_xor_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
-
-
-
-
diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp
index b85e3d24..d625d8b4 100644
--- a/test_conformance/atomics/test_indexed_cases.cpp
+++ b/test_conformance/atomics/test_indexed_cases.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -16,22 +16,25 @@
#include "testBase.h"
#include "harness/conversions.h"
-const char * atomic_index_source =
-"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
-"// Counter keeps track of which index in counts we are using.\n"
-"// We get that value, increment it, and then set that index in counts to our thread ID.\n"
-"// At the end of this we should have all thread IDs in some random location in counts\n"
-"// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
-"// will be missing some.\n"
-"\n"
-"__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
-" int tid = get_global_id(0);\n"
-" \n"
-" int counter_to_use = atom_add(counter, 1);\n"
-" counts[counter_to_use] = tid;\n"
-"}";
-
-int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+// clang-format off
+const char *atomic_index_source =
+ "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
+ "// Counter keeps track of which index in counts we are using.\n"
+ "// We get that value, increment it, and then set that index in counts to our thread ID.\n"
+ "// At the end of this we should have all thread IDs in some random location in counts\n"
+ "// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
+ "// will be missing some.\n"
+ "\n"
+ "__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
+ " int tid = get_global_id(0);\n"
+ " \n"
+ " int counter_to_use = atom_add(counter, 1);\n"
+ " counts[counter_to_use] = tid;\n"
+ "}";
+// clang-format on
+
+int test_atomic_add_index(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
@@ -39,25 +42,29 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_
size_t numGlobalThreads, numLocalThreads;
int fail = 0, succeed = 0, err;
- /* Check if atomics are supported. */
- if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
- log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
- return 0;
- }
+ /* Check if atomics are supported. */
+ if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
+ {
+ log_info("Base atomics not supported "
+ "(cl_khr_global_int32_base_atomics). Skipping test.\n");
+ return 0;
+ }
//===== add_index test
// The index test replicates what particles does.
- // It uses one memory location to keep track of the current index and then each thread
- // does an atomic add to it to get its new location. The threads then write to their
- // assigned location. At the end we check to make sure that each thread's ID shows up
- // exactly once in the output.
+ // It uses one memory location to keep track of the current index and then
+ // each thread does an atomic add to it to get its new location. The threads
+ // then write to their assigned location. At the end we check to make sure
+ // that each thread's ID shows up exactly once in the output.
numGlobalThreads = 2048;
- if( create_single_kernel_helper( context, &program, &kernel, 1, &atomic_index_source, "add_index_test" ) )
+ if (create_single_kernel_helper(context, &program, &kernel, 1,
+ &atomic_index_source, "add_index_test"))
return -1;
- if( get_max_common_work_group_size( context, kernel, numGlobalThreads, &numLocalThreads ) )
+ if (get_max_common_work_group_size(context, kernel, numGlobalThreads,
+ &numLocalThreads))
return -1;
log_info("Execute global_threads:%d local_threads:%d\n",
@@ -72,86 +79,133 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_
sizeof(cl_int) * numGlobalThreads, NULL, NULL);
// Reset all those locations to -1 to indciate they have not been used.
- cl_int *values = (cl_int*) malloc(sizeof(cl_int)*numGlobalThreads);
- if (values == NULL) {
- log_error("add_index_test FAILED to allocate memory for initial values.\n");
- fail = 1; succeed = -1;
- } else {
+ cl_int *values = (cl_int *)malloc(sizeof(cl_int) * numGlobalThreads);
+ if (values == NULL)
+ {
+ log_error(
+ "add_index_test FAILED to allocate memory for initial values.\n");
+ fail = 1;
+ succeed = -1;
+ }
+ else
+ {
memset(values, -1, numLocalThreads);
- unsigned int i=0;
- for (i=0; i<numGlobalThreads; i++)
- values[i] = -1;
- int init=0;
- err = clEnqueueWriteBuffer(queue, counters, true, 0, numGlobalThreads*sizeof(cl_int), values, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(queue, counter, true, 0,1*sizeof(cl_int), &init, 0, NULL, NULL);
- if (err) {
- log_error("add_index_test FAILED to write initial values to arrays: %d\n", err);
- fail=1; succeed=-1;
- } else {
+ unsigned int i = 0;
+ for (i = 0; i < numGlobalThreads; i++) values[i] = -1;
+ int init = 0;
+ err = clEnqueueWriteBuffer(queue, counters, true, 0,
+ numGlobalThreads * sizeof(cl_int), values, 0,
+ NULL, NULL);
+ err |= clEnqueueWriteBuffer(queue, counter, true, 0, 1 * sizeof(cl_int),
+ &init, 0, NULL, NULL);
+ if (err)
+ {
+ log_error(
+ "add_index_test FAILED to write initial values to arrays: %d\n",
+ err);
+ fail = 1;
+ succeed = -1;
+ }
+ else
+ {
err = clSetKernelArg(kernel, 0, sizeof(counter), &counter);
err |= clSetKernelArg(kernel, 1, sizeof(counters), &counters);
- if (err) {
- log_error("add_index_test FAILED to set kernel arguments: %d\n", err);
- fail=1; succeed=-1;
- } else {
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numGlobalThreads, &numLocalThreads, 0, NULL, NULL );
- if (err) {
- log_error("add_index_test FAILED to execute kernel: %d\n", err);
- fail=1; succeed=-1;
- } else {
- err = clEnqueueReadBuffer( queue, counters, true, 0, sizeof(cl_int)*numGlobalThreads, values, 0, NULL, NULL );
- if (err) {
- log_error("add_index_test FAILED to read back results: %d\n", err);
- fail = 1; succeed=-1;
- } else {
+ if (err)
+ {
+ log_error("add_index_test FAILED to set kernel arguments: %d\n",
+ err);
+ fail = 1;
+ succeed = -1;
+ }
+ else
+ {
+ err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
+ &numGlobalThreads,
+ &numLocalThreads, 0, NULL, NULL);
+ if (err)
+ {
+ log_error("add_index_test FAILED to execute kernel: %d\n",
+ err);
+ fail = 1;
+ succeed = -1;
+ }
+ else
+ {
+ err = clEnqueueReadBuffer(queue, counters, true, 0,
+ sizeof(cl_int) * numGlobalThreads,
+ values, 0, NULL, NULL);
+ if (err)
+ {
+ log_error(
+ "add_index_test FAILED to read back results: %d\n",
+ err);
+ fail = 1;
+ succeed = -1;
+ }
+ else
+ {
unsigned int looking_for, index;
- for (looking_for=0; looking_for<numGlobalThreads; looking_for++) {
- int instances_found=0;
- for (index=0; index<numGlobalThreads; index++) {
- if (values[index]==(int)looking_for)
+ for (looking_for = 0; looking_for < numGlobalThreads;
+ looking_for++)
+ {
+ int instances_found = 0;
+ for (index = 0; index < numGlobalThreads; index++)
+ {
+ if (values[index] == (int)looking_for)
instances_found++;
}
- if (instances_found != 1) {
- log_error("add_index_test FAILED: wrong number of instances (%d!=1) for counter %d.\n", instances_found, looking_for);
- fail = 1; succeed=-1;
+ if (instances_found != 1)
+ {
+ log_error(
+ "add_index_test FAILED: wrong number of "
+ "instances (%d!=1) for counter %d.\n",
+ instances_found, looking_for);
+ fail = 1;
+ succeed = -1;
}
}
}
}
}
}
- if (!fail) {
- log_info("add_index_test passed. Each thread used exactly one index.\n");
+ if (!fail)
+ {
+ log_info(
+ "add_index_test passed. Each thread used exactly one index.\n");
}
free(values);
}
return fail;
}
+// clang-format off
const char *add_index_bin_kernel[] = {
-"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
-"// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
-"// using an atomic add to keep track of the current location to write into in each bin.\n"
-"// This is the same as the memory update for the particles demo.\n"
-"\n"
-"__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" int location = bin_assignments[tid];\n"
-" int counter = atom_add(&bin_counters[location], 1);\n"
-" bins[location*max_counts_per_bin + counter] = tid;\n"
-"}" };
-
-// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel
-// using an atomic add to keep track of the current location to write into in each bin.
-// This is the same as the memory update for the particles demo.
-int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_context context, MTdata d)
+ "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
+ "// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
+ "// using an atomic add to keep track of the current location to write into in each bin.\n"
+ "// This is the same as the memory update for the particles demo.\n"
+ "\n"
+ "__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
+ " int tid = get_global_id(0);\n"
+ "\n"
+ " int location = bin_assignments[tid];\n"
+ " int counter = atom_add(&bin_counters[location], 1);\n"
+ " bins[location*max_counts_per_bin + counter] = tid;\n"
+ "}" };
+// clang-format on
+
+// This test assigns a bunch of values to bins and then tries to put them in the
+// bins in parallel using an atomic add to keep track of the current location to
+// write into in each bin. This is the same as the memory update for the
+// particles demo.
+int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
+ cl_context context, MTdata d)
{
int number_of_items = (int)global_threads[0];
size_t local_threads[1];
int divisor = 12;
- int number_of_bins = number_of_items/divisor;
- int max_counts_per_bin = divisor*2;
+ int number_of_bins = number_of_items / divisor;
+ int max_counts_per_bin = divisor * 2;
int fail = 0;
int succeed = 0;
@@ -160,15 +214,20 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_contex
clProgramWrapper program;
clKernelWrapper kernel;
- // log_info("add_index_bin_test: %d items, into %d bins, with a max of %d items per bin (bins is %d long).\n",
- // number_of_items, number_of_bins, max_counts_per_bin, number_of_bins*max_counts_per_bin);
+ // log_info("add_index_bin_test: %d items, into %d bins, with a max of %d
+ // items per bin (bins is %d long).\n",
+ // number_of_items, number_of_bins, max_counts_per_bin,
+ // number_of_bins*max_counts_per_bin);
//===== add_index_bin test
// The index test replicates what particles does.
- err = create_single_kernel_helper(context, &program, &kernel, 1, add_index_bin_kernel, "add_index_bin_test" );
- test_error( err, "Unable to create testing kernel" );
+ err =
+ create_single_kernel_helper(context, &program, &kernel, 1,
+ add_index_bin_kernel, "add_index_bin_test");
+ test_error(err, "Unable to create testing kernel");
- if( get_max_common_work_group_size( context, kernel, global_threads[0], &local_threads[0] ) )
+ if (get_max_common_work_group_size(context, kernel, global_threads[0],
+ &local_threads[0]))
return -1;
log_info("Execute global_threads:%d local_threads:%d\n",
@@ -185,152 +244,232 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_contex
clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(cl_int) * number_of_items, NULL, NULL);
- if (bin_counters == NULL) {
+ if (bin_counters == NULL)
+ {
log_error("add_index_bin_test FAILED to allocate bin_counters.\n");
return -1;
}
- if (bins == NULL) {
+ if (bins == NULL)
+ {
log_error("add_index_bin_test FAILED to allocate bins.\n");
return -1;
}
- if (bin_assignments == NULL) {
+ if (bin_assignments == NULL)
+ {
log_error("add_index_bin_test FAILED to allocate bin_assignments.\n");
return -1;
}
// Initialize our storage
- cl_int *l_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
- if (!l_bin_counts) {
- log_error("add_index_bin_test FAILED to allocate initial values for bin_counters.\n");
+ cl_int *l_bin_counts = (cl_int *)malloc(sizeof(cl_int) * number_of_bins);
+ if (!l_bin_counts)
+ {
+ log_error("add_index_bin_test FAILED to allocate initial values for "
+ "bin_counters.\n");
return -1;
}
int i;
- for (i=0; i<number_of_bins; i++)
- l_bin_counts[i] = 0;
- err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, l_bin_counts, 0, NULL, NULL);
- if (err) {
- log_error("add_index_bin_test FAILED to set initial values for bin_counters: %d\n", err);
+ for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0;
+ err = clEnqueueWriteBuffer(queue, bin_counters, true, 0,
+ sizeof(cl_int) * number_of_bins, l_bin_counts, 0,
+ NULL, NULL);
+ if (err)
+ {
+ log_error("add_index_bin_test FAILED to set initial values for "
+ "bin_counters: %d\n",
+ err);
return -1;
}
- cl_int *values = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
- if (!values) {
- log_error("add_index_bin_test FAILED to allocate initial values for bins.\n");
+ cl_int *values =
+ (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin);
+ if (!values)
+ {
+ log_error(
+ "add_index_bin_test FAILED to allocate initial values for bins.\n");
return -1;
}
- for (i=0; i<number_of_bins*max_counts_per_bin; i++)
- values[i] = -1;
- err = clEnqueueWriteBuffer(queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, values, 0, NULL, NULL);
- if (err) {
- log_error("add_index_bin_test FAILED to set initial values for bins: %d\n", err);
+ for (i = 0; i < number_of_bins * max_counts_per_bin; i++) values[i] = -1;
+ err = clEnqueueWriteBuffer(queue, bins, true, 0,
+ sizeof(cl_int) * number_of_bins
+ * max_counts_per_bin,
+ values, 0, NULL, NULL);
+ if (err)
+ {
+ log_error(
+ "add_index_bin_test FAILED to set initial values for bins: %d\n",
+ err);
return -1;
}
free(values);
- cl_int *l_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_items);
- if (!l_bin_assignments) {
- log_error("add_index_bin_test FAILED to allocate initial values for l_bin_assignments.\n");
+ cl_int *l_bin_assignments =
+ (cl_int *)malloc(sizeof(cl_int) * number_of_items);
+ if (!l_bin_assignments)
+ {
+ log_error("add_index_bin_test FAILED to allocate initial values for "
+ "l_bin_assignments.\n");
return -1;
}
- for (i=0; i<number_of_items; i++) {
- int bin = random_in_range(0, number_of_bins-1, d);
- while (l_bin_counts[bin] >= max_counts_per_bin) {
- bin = random_in_range(0, number_of_bins-1, d);
+ for (i = 0; i < number_of_items; i++)
+ {
+ int bin = random_in_range(0, number_of_bins - 1, d);
+ while (l_bin_counts[bin] >= max_counts_per_bin)
+ {
+ bin = random_in_range(0, number_of_bins - 1, d);
}
if (bin >= number_of_bins)
- log_error("add_index_bin_test internal error generating bin assignments: bin %d >= number_of_bins %d.\n", bin, number_of_bins);
- if (l_bin_counts[bin]+1 > max_counts_per_bin)
- log_error("add_index_bin_test internal error generating bin assignments: bin %d has more entries (%d) than max_counts_per_bin (%d).\n", bin, l_bin_counts[bin], max_counts_per_bin);
+ log_error("add_index_bin_test internal error generating bin "
+ "assignments: bin %d >= number_of_bins %d.\n",
+ bin, number_of_bins);
+ if (l_bin_counts[bin] + 1 > max_counts_per_bin)
+ log_error(
+ "add_index_bin_test internal error generating bin assignments: "
+ "bin %d has more entries (%d) than max_counts_per_bin (%d).\n",
+ bin, l_bin_counts[bin], max_counts_per_bin);
l_bin_counts[bin]++;
l_bin_assignments[i] = bin;
- // log_info("item %d assigned to bin %d (%d items)\n", i, bin, l_bin_counts[bin]);
+ // log_info("item %d assigned to bin %d (%d items)\n", i, bin,
+ // l_bin_counts[bin]);
}
- err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int)*number_of_items, l_bin_assignments, 0, NULL, NULL);
- if (err) {
- log_error("add_index_bin_test FAILED to set initial values for bin_assignments: %d\n", err);
+ err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0,
+ sizeof(cl_int) * number_of_items,
+ l_bin_assignments, 0, NULL, NULL);
+ if (err)
+ {
+ log_error("add_index_bin_test FAILED to set initial values for "
+ "bin_assignments: %d\n",
+ err);
return -1;
}
// Setup the kernel
err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters);
err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins);
err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments);
- err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin), &max_counts_per_bin);
- if (err) {
- log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", err);
- fail=1; succeed=-1;
+ err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin),
+ &max_counts_per_bin);
+ if (err)
+ {
+ log_error("add_index_bin_test FAILED to set kernel arguments: %d\n",
+ err);
+ fail = 1;
+ succeed = -1;
return -1;
}
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
- if (err) {
+ err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads,
+ local_threads, 0, NULL, NULL);
+ if (err)
+ {
log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
- fail=1; succeed=-1;
+ fail = 1;
+ succeed = -1;
}
- cl_int *final_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
- if (!final_bin_assignments) {
- log_error("add_index_bin_test FAILED to allocate initial values for final_bin_assignments.\n");
+ cl_int *final_bin_assignments =
+ (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin);
+ if (!final_bin_assignments)
+ {
+ log_error("add_index_bin_test FAILED to allocate initial values for "
+ "final_bin_assignments.\n");
return -1;
}
- err = clEnqueueReadBuffer( queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, final_bin_assignments, 0, NULL, NULL );
- if (err) {
+ err = clEnqueueReadBuffer(queue, bins, true, 0,
+ sizeof(cl_int) * number_of_bins
+ * max_counts_per_bin,
+ final_bin_assignments, 0, NULL, NULL);
+ if (err)
+ {
log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
- fail = 1; succeed=-1;
+ fail = 1;
+ succeed = -1;
}
- cl_int *final_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
- if (!final_bin_counts) {
- log_error("add_index_bin_test FAILED to allocate initial values for final_bin_counts.\n");
+ cl_int *final_bin_counts =
+ (cl_int *)malloc(sizeof(cl_int) * number_of_bins);
+ if (!final_bin_counts)
+ {
+ log_error("add_index_bin_test FAILED to allocate initial values for "
+ "final_bin_counts.\n");
return -1;
}
- err = clEnqueueReadBuffer( queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, final_bin_counts, 0, NULL, NULL );
- if (err) {
- log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", err);
- fail = 1; succeed=-1;
+ err = clEnqueueReadBuffer(queue, bin_counters, true, 0,
+ sizeof(cl_int) * number_of_bins, final_bin_counts,
+ 0, NULL, NULL);
+ if (err)
+ {
+ log_error("add_index_bin_test FAILED to read back bin_counters: %d\n",
+ err);
+ fail = 1;
+ succeed = -1;
}
// Verification.
- int errors=0;
+ int errors = 0;
int current_bin;
int search;
// Print out all the contents of the bins.
// for (current_bin=0; current_bin<number_of_bins; current_bin++)
// for (search=0; search<max_counts_per_bin; search++)
- // log_info("[bin %d, entry %d] = %d\n", current_bin, search, final_bin_assignments[current_bin*max_counts_per_bin+search]);
+ // log_info("[bin %d, entry %d] = %d\n", current_bin, search,
+ // final_bin_assignments[current_bin*max_counts_per_bin+search]);
// First verify that there are the correct number in each bin.
- for (current_bin=0; current_bin<number_of_bins; current_bin++) {
+ for (current_bin = 0; current_bin < number_of_bins; current_bin++)
+ {
int expected_number = l_bin_counts[current_bin];
int actual_number = final_bin_counts[current_bin];
- if (expected_number != actual_number) {
- log_error("add_index_bin_test FAILED: bin %d reported %d entries when %d were expected.\n", current_bin, actual_number, expected_number);
+ if (expected_number != actual_number)
+ {
+ log_error("add_index_bin_test FAILED: bin %d reported %d entries "
+ "when %d were expected.\n",
+ current_bin, actual_number, expected_number);
errors++;
}
- for (search=0; search<expected_number; search++) {
- if (final_bin_assignments[current_bin*max_counts_per_bin+search] == -1) {
- log_error("add_index_bin_test FAILED: bin %d had no entry at position %d when it should have had %d entries.\n", current_bin, search, expected_number);
+ for (search = 0; search < expected_number; search++)
+ {
+ if (final_bin_assignments[current_bin * max_counts_per_bin + search]
+ == -1)
+ {
+ log_error("add_index_bin_test FAILED: bin %d had no entry at "
+ "position %d when it should have had %d entries.\n",
+ current_bin, search, expected_number);
errors++;
}
}
- for (search=expected_number; search<max_counts_per_bin; search++) {
- if (final_bin_assignments[current_bin*max_counts_per_bin+search] != -1) {
- log_error("add_index_bin_test FAILED: bin %d had an extra entry at position %d when it should have had only %d entries.\n", current_bin, search, expected_number);
+ for (search = expected_number; search < max_counts_per_bin; search++)
+ {
+ if (final_bin_assignments[current_bin * max_counts_per_bin + search]
+ != -1)
+ {
+ log_error(
+ "add_index_bin_test FAILED: bin %d had an extra entry at "
+ "position %d when it should have had only %d entries.\n",
+ current_bin, search, expected_number);
errors++;
}
}
}
// Now verify that the correct ones are in each bin
int index;
- for (index=0; index<number_of_items; index++) {
+ for (index = 0; index < number_of_items; index++)
+ {
int expected_bin = l_bin_assignments[index];
int found_it = 0;
- for (search=0; search<l_bin_counts[expected_bin]; search++) {
- if (final_bin_assignments[expected_bin*max_counts_per_bin+search] == index) {
+ for (search = 0; search < l_bin_counts[expected_bin]; search++)
+ {
+ if (final_bin_assignments[expected_bin * max_counts_per_bin
+ + search]
+ == index)
+ {
found_it = 1;
}
}
- if (found_it == 0) {
- log_error("add_index_bin_test FAILED: did not find item %d in bin %d.\n", index, expected_bin);
+ if (found_it == 0)
+ {
+ log_error(
+ "add_index_bin_test FAILED: did not find item %d in bin %d.\n",
+ index, expected_bin);
errors++;
}
}
@@ -341,41 +480,49 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_contex
clReleaseMemObject(bin_counters);
clReleaseMemObject(bins);
clReleaseMemObject(bin_assignments);
- if (errors == 0) {
- log_info("add_index_bin_test passed. Each item was put in the correct bin in parallel.\n");
+ if (errors == 0)
+ {
+ log_info("add_index_bin_test passed. Each item was put in the correct "
+ "bin in parallel.\n");
return 0;
- } else {
+ }
+ else
+ {
log_error("add_index_bin_test FAILED: %d errors.\n", errors);
return -1;
}
}
-int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
//===== add_index_bin test
size_t numGlobalThreads = 2048;
- int iteration=0;
+ int iteration = 0;
int err, failed = 0;
- MTdata d = init_genrand( gRandomSeed );
-
- /* Check if atomics are supported. */
- if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
- log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
- free_mtdata( d );
- return 0;
- }
+ MTdata d = init_genrand(gRandomSeed);
+
+ /* Check if atomics are supported. */
+ if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
+ {
+ log_info("Base atomics not supported "
+ "(cl_khr_global_int32_base_atomics). Skipping test.\n");
+ free_mtdata(d);
+ return 0;
+ }
- for(iteration=0; iteration<10; iteration++) {
- log_info("add_index_bin_test with %d elements:\n", (int)numGlobalThreads);
- err = add_index_bin_test(&numGlobalThreads, queue, context, d);
- if (err) {
+ for (iteration = 0; iteration < 10; iteration++)
+ {
+ log_info("add_index_bin_test with %d elements:\n",
+ (int)numGlobalThreads);
+ err = add_index_bin_test(&numGlobalThreads, queue, context, d);
+ if (err)
+ {
failed++;
break;
}
- numGlobalThreads*=2;
+ numGlobalThreads *= 2;
}
- free_mtdata( d );
+ free_mtdata(d);
return failed;
}
-
-