diff options
Diffstat (limited to 'test_conformance/math_brute_force/binary_i_double.cpp')
-rw-r--r-- | test_conformance/math_brute_force/binary_i_double.cpp | 240 |
1 files changed, 77 insertions, 163 deletions
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp index f8786e68..a6c28557 100644 --- a/test_conformance/math_brute_force/binary_i_double.cpp +++ b/test_conformance/math_brute_force/binary_i_double.cpp @@ -24,100 +24,16 @@ namespace { -int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) -{ - const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", - sizeNames[vectorSize], - "( __global double", - sizeNames[vectorSize], - "* out, __global double", - sizeNames[vectorSize], - "* in1, __global int", - sizeNames[vectorSize], - "* in2 )\n" - "{\n" - " size_t i = get_global_id(0);\n" - " out[i] = ", - name, - "( in1[i], in2[i] );\n" - "}\n" }; - - const char *c3[] = { - "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", - sizeNames[vectorSize], - "( __global double* out, __global double* in, __global int* in2)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " double3 d0 = vload3( 0, in + 3 * i );\n" - " int3 i0 = vload3( 0, in2 + 3 * i );\n" - " d0 = ", - name, - "( d0, i0 );\n" - " vstore3( d0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how many elements are " - "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " - "buffer size \n" - " double3 d0;\n" - " int3 i0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " d0 = (double3)( in[3*i], NAN, NAN ); \n" - " i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n" - " break;\n" - " case 0:\n" - " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n" - " i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n" - " break;\n" - " }\n" - " d0 = ", - name, - "( d0, i0 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = d0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = d0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" - }; - - const char **kern = c; - size_t kernSize = sizeof(c) / sizeof(c[0]); - - if (sizeValues[vectorSize] == 3) - { - kern = c3; - kernSize = sizeof(c3) / sizeof(c3[0]); - } - - char testName[32]; - snprintf(testName, sizeof(testName) - 1, "math_kernel%s", - sizeNames[vectorSize]); - - return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p, - relaxedMode); -} - cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { - BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint vectorSize = gMinVectorSizeIndex + job_id; - return BuildKernel(info->nameInCode, vectorSize, info->threadCount, - info->kernels[vectorSize].data(), - &(info->programs[vectorSize]), info->relaxedMode); + BuildKernelInfo &info = *(BuildKernelInfo *)p; + auto generator = [](const std::string &kernel_name, const char *builtin, + cl_uint vector_size_index) { + return GetBinaryKernel(kernel_name, builtin, ParameterType::Double, + ParameterType::Double, ParameterType::Int, + vector_size_index); + }; + return BuildKernels(info, job_id, generator); } // Thread specific data for a worker thread @@ -309,24 +225,27 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) Force64BitFPUPrecision(); - // start the map of the output arrays cl_event e[VECTOR_SIZE_COUNT]; cl_ulong *out[VECTOR_SIZE_COUNT]; - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + if (gHostFill) { - out[j] = (cl_ulong *)clEnqueueMapBuffer( - tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, - buffer_size, 0, NULL, e + j, &error); - if (error || NULL == out[j]) + // Start the map of the output arrays + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, - error); - return error; + out[j] = (cl_ulong *)clEnqueueMapBuffer( + tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, + buffer_size, 0, NULL, e + j, &error); + if (error || NULL == out[j]) + { + vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, + error); + return error; + } } - } - // Get that moving - if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); + // Get that moving + if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); + } // Init input array cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements; @@ -335,8 +254,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) int totalSpecialValueCount = specialValuesCount * specialValuesIntCount; int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements; + // Test edge cases if (job_id <= (cl_uint)lastSpecialJobIndex) - { // test edge cases + { cl_double *fp = (cl_double *)p; cl_int *ip2 = (cl_int *)p2; uint32_t x, y; @@ -368,43 +288,60 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) buffer_size, p, 0, NULL, NULL))) { vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); - goto exit; + return error; } if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size / 2, p2, 0, NULL, NULL))) { vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error); - goto exit; + return error; } for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - // Wait for the map to finish - if ((error = clWaitForEvents(1, e + j))) - { - vlog_error("Error: clWaitForEvents failed! err: %d\n", error); - goto exit; - } - if ((error = clReleaseEvent(e[j]))) + if (gHostFill) { - vlog_error("Error: clReleaseEvent failed! err: %d\n", error); - goto exit; + // Wait for the map to finish + if ((error = clWaitForEvents(1, e + j))) + { + vlog_error("Error: clWaitForEvents failed! err: %d\n", error); + return error; + } + if ((error = clReleaseEvent(e[j]))) + { + vlog_error("Error: clReleaseEvent failed! err: %d\n", error); + return error; + } } // Fill the result buffer with garbage, so that old results don't carry // over uint32_t pattern = 0xffffdead; - memset_pattern4(out[j], &pattern, buffer_size); - if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], - out[j], 0, NULL, NULL))) + if (gHostFill) { - vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", - error); - goto exit; + memset_pattern4(out[j], &pattern, buffer_size); + if ((error = clEnqueueUnmapMemObject( + tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", + error); + return error; + } + } + else + { + if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j], + &pattern, sizeof(pattern), 0, + buffer_size, 0, NULL, NULL))) + { + vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n", + error); + return error; + } } - // run the kernel + // Run the kernel size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its @@ -434,7 +371,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) &vectorCount, NULL, 0, NULL, NULL))) { vlog_error("FAILED -- could not execute kernel\n"); - goto exit; + return error; } } @@ -462,7 +399,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error); - goto exit; + return error; } } @@ -528,8 +465,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) "*%.13la vs. %.13la\n", name, sizeNames[k], err, s[j], s2[j], r[j], test); - error = -1; - goto exit; + return -1; } } } @@ -548,7 +484,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n"); - if (0 == (base & 0x0fffffff)) { if (gVerboseBruteForce) @@ -565,8 +500,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) fflush(stdout); } -exit: - return error; + return CL_SUCCESS; } } // anonymous namespace @@ -603,13 +537,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) test_info.ftz = f->ftz || gForceFTZ; test_info.relaxedMode = relaxedMode; - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - test_info.k[i].resize(test_info.threadCount, nullptr); - } - test_info.tinfo.resize(test_info.threadCount); for (cl_uint i = 0; i < test_info.threadCount; i++) { @@ -625,7 +552,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) vlog_error("Error: Unable to create sub-buffer of gInBuffer for " "region {%zd, %zd}\n", region.origin, region.size); - goto exit; + return error; } cl_buffer_region region2 = { i * test_info.subBufferSize * sizeof(cl_int), @@ -638,7 +565,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " "region {%zd, %zd}\n", region.origin, region.size); - goto exit; + return error; } for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) @@ -651,7 +578,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) vlog_error("Error: Unable to create sub-buffer of " "gOutBuffer[%d] for region {%zd, %zd}\n", (int)j, region.origin, region.size); - goto exit; + return error; } } test_info.tinfo[i].tQueue = @@ -659,27 +586,26 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) if (NULL == test_info.tinfo[i].tQueue || error) { vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; + return error; } test_info.tinfo[i].d = MTdataHolder(genrand_int32(d)); } // Init the kernels - { - BuildKernelInfo build_info{ test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, - relaxedMode }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } + BuildKernelInfo build_info{ test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, + relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; // Run the kernels if (!gSkipCorrectnessTesting) { error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + if (error) return error; // Accumulate the arithmetic errors for (cl_uint i = 0; i < test_info.threadCount; i++) @@ -692,8 +618,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) } } - if (error) goto exit; - if (gWimpyMode) vlog("Wimp pass"); else @@ -704,15 +628,5 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - for (auto &kernel : test_info.k[i]) - { - clReleaseKernel(kernel); - } - } - - return error; + return CL_SUCCESS; } |