diff options
Diffstat (limited to 'test_conformance/math_brute_force/macro_unary_float.cpp')
-rw-r--r-- | test_conformance/math_brute_force/macro_unary_float.cpp | 212 |
1 files changed, 67 insertions, 145 deletions
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp index 6a1b9b9a..53679788 100644 --- a/test_conformance/math_brute_force/macro_unary_float.cpp +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -23,93 +23,15 @@ namespace { -int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - const char *c[] = { "__kernel void math_kernel", - sizeNames[vectorSize], - "( __global int", - sizeNames[vectorSize], - "* out, __global float", - sizeNames[vectorSize], - "* in )\n" - "{\n" - " size_t i = get_global_id(0);\n" - " out[i] = ", - name, - "( in[i] );\n" - "}\n" }; - - const char *c3[] = { - "__kernel void math_kernel", - sizeNames[vectorSize], - "( __global int* out, __global float* in)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " float3 f0 = vload3( 0, in + 3 * i );\n" - " int3 i0 = ", - name, - "( f0 );\n" - " vstore3( i0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are " - "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " - "buffer size \n" - " int3 i0;\n" - " float3 f0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " f0 = (float3)( in[3*i], 0xdead, 0xdead ); \n" - " break;\n" - " case 0:\n" - " f0 = (float3)( in[3*i], in[3*i+1], 0xdead ); \n" - " break;\n" - " }\n" - " i0 = ", - name, - "( f0 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = i0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = i0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c) / sizeof(c[0]); - - if (sizeValues[vectorSize] == 3) - { - kern = c3; - kernSize = sizeof(c3) / sizeof(c3[0]); - } - - char testName[32]; - snprintf(testName, sizeof(testName) - 1, "math_kernel%s", - sizeNames[vectorSize]); - - return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, - relaxedMode); -} - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint vectorSize = gMinVectorSizeIndex + job_id; - return BuildKernel(info->nameInCode, vectorSize, info->threadCount, - info->kernels[vectorSize].data(), - &(info->programs[vectorSize]), info->relaxedMode); + BuildKernelInfo &info = *(BuildKernelInfo *)p; + auto generator = [](const std::string &kernel_name, const char *builtin, + cl_uint vector_size_index) { + return GetUnaryKernel(kernel_name, builtin, ParameterType::Int, + ParameterType::Float, vector_size_index); + }; + return BuildKernels(info, job_id, generator); } // Thread specific data for a worker thread @@ -167,24 +89,27 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) #define ref_func(s) (signbit_test ? func.i_f_f(s) : func.i_f(s)) - // start the map of the output arrays cl_event e[VECTOR_SIZE_COUNT]; cl_int *out[VECTOR_SIZE_COUNT]; - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + if (gHostFill) { - out[j] = (cl_int *)clEnqueueMapBuffer( - tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, - buffer_size, 0, NULL, e + j, &error); - if (error || NULL == out[j]) + // start the map of the output arrays + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, - error); - return error; + out[j] = (cl_int *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, + buffer_size, 0, NULL, e + j, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } } - } - // Get that moving - if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); + } // Init input array cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; @@ -199,31 +124,48 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - // Wait for the map to finish - if ((error = clWaitForEvents(1, e + j))) - { - vlog_error("Error: clWaitForEvents failed! err: %d\n", error); - return error; - } - if ((error = clReleaseEvent(e[j]))) + if (gHostFill) { - vlog_error("Error: clReleaseEvent failed! err: %d\n", error); - return error; + // Wait for the map to finish + if ((error = clWaitForEvents(1, e + j))) + { + vlog_error("Error: clWaitForEvents failed! err: %d\n", error); + return error; + } + if ((error = clReleaseEvent(e[j]))) + { + vlog_error("Error: clReleaseEvent failed! err: %d\n", error); + return error; + } } // Fill the result buffer with garbage, so that old results don't carry // over uint32_t pattern = 0xffffdead; - memset_pattern4(out[j], &pattern, buffer_size); - if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], - out[j], 0, NULL, NULL))) + if (gHostFill) { - vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", - error); - return error; + memset_pattern4(out[j], &pattern, buffer_size); + if ((error = clEnqueueUnmapMemObject( + tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", + error); + return error; + } + } + else + { + if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j], + &pattern, sizeof(pattern), 0, + buffer_size, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n", + error); + return error; + } } - // run the kernel + // Run the kernel size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its @@ -407,13 +349,6 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); test_info.relaxedMode = relaxedMode; - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - test_info.k[i].resize(test_info.threadCount, nullptr); - } - test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { @@ -429,7 +364,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) vlog_error("Error: Unable to create sub-buffer of gInBuffer for " "region {%zd, %zd}\n", region.origin, region.size); - goto exit; + return error; } for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) @@ -442,7 +377,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) vlog_error("Error: Unable to create sub-buffer of " "gOutBuffer[%d] for region {%zd, %zd}\n", (int)j, region.origin, region.size); - goto exit; + return error; } } test_info.tinfo[i].tQueue = @@ -450,27 +385,24 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) if (NULL == test_info.tinfo[i].tQueue || error) { vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; + return error; } } // Init the kernels - { - BuildKernelInfo build_info{ test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, - relaxedMode }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } + BuildKernelInfo build_info{ test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, + relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; // Run the kernels if (!gSkipCorrectnessTesting) { error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - if (error) goto exit; + if (error) return error; if (gWimpyMode) vlog("Wimp pass"); @@ -480,15 +412,5 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - for (auto &kernel : test_info.k[i]) - { - clReleaseKernel(kernel); - } - } - - return error; + return CL_SUCCESS; } |