aboutsummaryrefslogtreecommitdiff
path: root/test_conformance/math_brute_force/common.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'test_conformance/math_brute_force/common.cpp')
-rw-r--r--test_conformance/math_brute_force/common.cpp447
1 files changed, 438 insertions, 9 deletions
diff --git a/test_conformance/math_brute_force/common.cpp b/test_conformance/math_brute_force/common.cpp
index f5e9f993..47f493e7 100644
--- a/test_conformance/math_brute_force/common.cpp
+++ b/test_conformance/math_brute_force/common.cpp
@@ -29,6 +29,10 @@ const char *GetTypeName(ParameterType type)
{
case ParameterType::Float: return "float";
case ParameterType::Double: return "double";
+ case ParameterType::Int: return "int";
+ case ParameterType::UInt: return "uint";
+ case ParameterType::Long: return "long";
+ case ParameterType::ULong: return "ulong";
}
return nullptr;
}
@@ -39,6 +43,12 @@ const char *GetUndefValue(ParameterType type)
{
case ParameterType::Float:
case ParameterType::Double: return "NAN";
+
+ case ParameterType::Int:
+ case ParameterType::UInt: return "0x12345678";
+
+ case ParameterType::Long:
+ case ParameterType::ULong: return "0x0ddf00dbadc0ffee";
}
return nullptr;
}
@@ -57,18 +67,50 @@ void EmitDefineUndef(std::ostringstream &kernel, const char *name,
kernel << "#define " << name << " " << GetUndefValue(type) << '\n';
}
-void EmitEnableExtension(std::ostringstream &kernel, ParameterType type)
+void EmitEnableExtension(std::ostringstream &kernel,
+ const std::initializer_list<ParameterType> &types)
{
- switch (type)
+ bool needsFp64 = false;
+
+ for (const auto &type : types)
{
- case ParameterType::Double:
- kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
- break;
+ switch (type)
+ {
+ case ParameterType::Double: needsFp64 = true; break;
- case ParameterType::Float:
- // No extension required.
- break;
+ case ParameterType::Float:
+ case ParameterType::Int:
+ case ParameterType::UInt:
+ case ParameterType::Long:
+ case ParameterType::ULong:
+ // No extension required.
+ break;
+ }
+ }
+
+ if (needsFp64) kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+}
+
+std::string GetBuildOptions(bool relaxed_mode)
+{
+ std::ostringstream options;
+
+ if (gForceFTZ)
+ {
+ options << " -cl-denorms-are-zero";
+ }
+
+ if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
+ {
+ options << " -cl-fp32-correctly-rounded-divide-sqrt";
+ }
+
+ if (relaxed_mode)
+ {
+ options << " -cl-fast-relaxed-math";
}
+
+ return options.str();
}
} // anonymous namespace
@@ -78,6 +120,354 @@ std::string GetKernelName(int vector_size_index)
return std::string("math_kernel") + sizeNames[vector_size_index];
}
+std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
+ ParameterType retType, ParameterType type1,
+ int vector_size_index)
+{
+ // To keep the kernel code readable, use macros for types and undef values.
+ std::ostringstream kernel;
+ EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
+ EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
+ EmitDefineUndef(kernel, "UNDEF1", type1);
+ EmitEnableExtension(kernel, { retType, type1 });
+
+ // clang-format off
+ const char *kernel_nonvec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
+ __global TYPE1* in1)
+{
+ size_t i = get_global_id(0);
+ out[i] = )", builtin, R"((in1[i]);
+}
+)" };
+
+ const char *kernel_vec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
+ __global TYPE1_SCALAR* in1)
+{
+ size_t i = get_global_id(0);
+
+ if (i + 1 < get_global_size(0))
+ {
+ TYPE1 a = vload3(0, in1 + 3 * i);
+ RETTYPE res = )", builtin, R"((a);
+ vstore3(res, 0, out + 3 * i);
+ }
+ else
+ {
+ // Figure out how many elements are left over after
+ // BUFFER_SIZE % (3 * sizeof(type)).
+ // Assume power of two buffer size.
+ size_t parity = i & 1;
+ TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
+ switch (parity)
+ {
+ case 0:
+ a.y = in1[3 * i + 1];
+ // fall through
+ case 1:
+ a.x = in1[3 * i];
+ break;
+ }
+
+ RETTYPE res = )", builtin, R"((a);
+
+ switch (parity)
+ {
+ case 0:
+ out[3 * i + 1] = res.y;
+ // fall through
+ case 1:
+ out[3 * i] = res.x;
+ break;
+ }
+ }
+}
+)" };
+ // clang-format on
+
+ if (sizeValues[vector_size_index] != 3)
+ for (const auto &chunk : kernel_nonvec3) kernel << chunk;
+ else
+ for (const auto &chunk : kernel_vec3) kernel << chunk;
+
+ return kernel.str();
+}
+
+std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
+ ParameterType retType1, ParameterType retType2,
+ ParameterType type1, int vector_size_index)
+{
+ // To keep the kernel code readable, use macros for types and undef values.
+ std::ostringstream kernel;
+ EmitDefineType(kernel, "RETTYPE1", retType1, vector_size_index);
+ EmitDefineType(kernel, "RETTYPE2", retType2, vector_size_index);
+ EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
+ EmitDefineUndef(kernel, "UNDEF1", type1);
+ EmitDefineUndef(kernel, "UNDEFR2", retType2);
+ EmitEnableExtension(kernel, { retType1, retType2, type1 });
+
+ // clang-format off
+ const char *kernel_nonvec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1* out1,
+ __global RETTYPE2* out2,
+ __global TYPE1* in1)
+{
+ size_t i = get_global_id(0);
+ out1[i] = )", builtin, R"((in1[i], out2 + i);
+}
+)" };
+
+ const char *kernel_vec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1_SCALAR* out1,
+ __global RETTYPE2_SCALAR* out2,
+ __global TYPE1_SCALAR* in1)
+{
+ size_t i = get_global_id(0);
+
+ if (i + 1 < get_global_size(0))
+ {
+ TYPE1 a = vload3(0, in1 + 3 * i);
+ RETTYPE2 res2 = UNDEFR2;
+ RETTYPE1 res1 = )", builtin, R"((a, &res2);
+ vstore3(res1, 0, out1 + 3 * i);
+ vstore3(res2, 0, out2 + 3 * i);
+ }
+ else
+ {
+ // Figure out how many elements are left over after
+ // BUFFER_SIZE % (3 * sizeof(type)).
+ // Assume power of two buffer size.
+ size_t parity = i & 1;
+ TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
+ switch (parity)
+ {
+ case 0:
+ a.y = in1[3 * i + 1];
+ // fall through
+ case 1:
+ a.x = in1[3 * i];
+ break;
+ }
+
+ RETTYPE2 res2 = UNDEFR2;
+ RETTYPE1 res1 = )", builtin, R"((a, &res2);
+
+ switch (parity)
+ {
+ case 0:
+ out1[3 * i + 1] = res1.y;
+ out2[3 * i + 1] = res2.y;
+ // fall through
+ case 1:
+ out1[3 * i] = res1.x;
+ out2[3 * i] = res2.x;
+ break;
+ }
+ }
+}
+)" };
+ // clang-format on
+
+ if (sizeValues[vector_size_index] != 3)
+ for (const auto &chunk : kernel_nonvec3) kernel << chunk;
+ else
+ for (const auto &chunk : kernel_vec3) kernel << chunk;
+
+ return kernel.str();
+}
+
+std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
+ ParameterType retType, ParameterType type1,
+ ParameterType type2, int vector_size_index)
+{
+ // To keep the kernel code readable, use macros for types and undef values.
+ std::ostringstream kernel;
+ EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
+ EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
+ EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
+ EmitDefineUndef(kernel, "UNDEF1", type1);
+ EmitDefineUndef(kernel, "UNDEF2", type2);
+ EmitEnableExtension(kernel, { retType, type1, type2 });
+
+ const bool is_vec3 = sizeValues[vector_size_index] == 3;
+
+ std::string invocation;
+ if (strlen(builtin) == 1)
+ {
+ // Assume a single-character builtin is an operator (e.g., +, *, ...).
+ invocation = is_vec3 ? "a" : "in1[i] ";
+ invocation += builtin;
+ invocation += is_vec3 ? "b" : " in2[i]";
+ }
+ else
+ {
+ // Otherwise call the builtin as a function with two arguments.
+ invocation = builtin;
+ invocation += is_vec3 ? "(a, b)" : "(in1[i], in2[i])";
+ }
+
+ // clang-format off
+ const char *kernel_nonvec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
+ __global TYPE1* in1,
+ __global TYPE2* in2)
+{
+ size_t i = get_global_id(0);
+ out[i] = )", invocation.c_str(), R"(;
+}
+)" };
+
+ const char *kernel_vec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
+ __global TYPE1_SCALAR* in1,
+ __global TYPE2_SCALAR* in2)
+{
+ size_t i = get_global_id(0);
+
+ if (i + 1 < get_global_size(0))
+ {
+ TYPE1 a = vload3(0, in1 + 3 * i);
+ TYPE2 b = vload3(0, in2 + 3 * i);
+ RETTYPE res = )", invocation.c_str(), R"(;
+ vstore3(res, 0, out + 3 * i);
+ }
+ else
+ {
+ // Figure out how many elements are left over after
+ // BUFFER_SIZE % (3 * sizeof(type)).
+ // Assume power of two buffer size.
+ size_t parity = i & 1;
+ TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
+ TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
+ switch (parity)
+ {
+ case 0:
+ a.y = in1[3 * i + 1];
+ b.y = in2[3 * i + 1];
+ // fall through
+ case 1:
+ a.x = in1[3 * i];
+ b.x = in2[3 * i];
+ break;
+ }
+
+ RETTYPE res = )", invocation.c_str(), R"(;
+
+ switch (parity)
+ {
+ case 0:
+ out[3 * i + 1] = res.y;
+ // fall through
+ case 1:
+ out[3 * i] = res.x;
+ break;
+ }
+ }
+}
+)" };
+ // clang-format on
+
+ if (!is_vec3)
+ for (const auto &chunk : kernel_nonvec3) kernel << chunk;
+ else
+ for (const auto &chunk : kernel_vec3) kernel << chunk;
+
+ return kernel.str();
+}
+
+std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
+ ParameterType retType1, ParameterType retType2,
+ ParameterType type1, ParameterType type2,
+ int vector_size_index)
+{
+ // To keep the kernel code readable, use macros for types and undef values.
+ std::ostringstream kernel;
+ EmitDefineType(kernel, "RETTYPE1", retType1, vector_size_index);
+ EmitDefineType(kernel, "RETTYPE2", retType2, vector_size_index);
+ EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
+ EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
+ EmitDefineUndef(kernel, "UNDEF1", type1);
+ EmitDefineUndef(kernel, "UNDEF2", type2);
+ EmitDefineUndef(kernel, "UNDEFR2", retType2);
+ EmitEnableExtension(kernel, { retType1, retType2, type1, type2 });
+
+ // clang-format off
+ const char *kernel_nonvec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1* out1,
+ __global RETTYPE2* out2,
+ __global TYPE1* in1,
+ __global TYPE2* in2)
+{
+ size_t i = get_global_id(0);
+ out1[i] = )", builtin, R"((in1[i], in2[i], out2 + i);
+}
+)" };
+
+ const char *kernel_vec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1_SCALAR* out1,
+ __global RETTYPE2_SCALAR* out2,
+ __global TYPE1_SCALAR* in1,
+ __global TYPE2_SCALAR* in2)
+{
+ size_t i = get_global_id(0);
+
+ if (i + 1 < get_global_size(0))
+ {
+ TYPE1 a = vload3(0, in1 + 3 * i);
+ TYPE2 b = vload3(0, in2 + 3 * i);
+ RETTYPE2 res2 = UNDEFR2;
+ RETTYPE1 res1 = )", builtin, R"((a, b, &res2);
+ vstore3(res1, 0, out1 + 3 * i);
+ vstore3(res2, 0, out2 + 3 * i);
+ }
+ else
+ {
+ // Figure out how many elements are left over after
+ // BUFFER_SIZE % (3 * sizeof(type)).
+ // Assume power of two buffer size.
+ size_t parity = i & 1;
+ TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
+ TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
+ switch (parity)
+ {
+ case 0:
+ a.y = in1[3 * i + 1];
+ b.y = in2[3 * i + 1];
+ // fall through
+ case 1:
+ a.x = in1[3 * i];
+ b.x = in2[3 * i];
+ break;
+ }
+
+ RETTYPE2 res2 = UNDEFR2;
+ RETTYPE1 res1 = )", builtin, R"((a, b, &res2);
+
+ switch (parity)
+ {
+ case 0:
+ out1[3 * i + 1] = res1.y;
+ out2[3 * i + 1] = res2.y;
+ // fall through
+ case 1:
+ out1[3 * i] = res1.x;
+ out2[3 * i] = res2.x;
+ break;
+ }
+ }
+}
+)" };
+ // clang-format on
+
+ if (sizeValues[vector_size_index] != 3)
+ for (const auto &chunk : kernel_nonvec3) kernel << chunk;
+ else
+ for (const auto &chunk : kernel_vec3) kernel << chunk;
+
+ return kernel.str();
+}
+
std::string GetTernaryKernel(const std::string &kernel_name,
const char *builtin, ParameterType retType,
ParameterType type1, ParameterType type2,
@@ -92,7 +482,7 @@ std::string GetTernaryKernel(const std::string &kernel_name,
EmitDefineUndef(kernel, "UNDEF1", type1);
EmitDefineUndef(kernel, "UNDEF2", type2);
EmitDefineUndef(kernel, "UNDEF3", type3);
- EmitEnableExtension(kernel, type1);
+ EmitEnableExtension(kernel, { retType, type1, type2, type3 });
// clang-format off
const char *kernel_nonvec3[] = { R"(
@@ -168,3 +558,42 @@ __kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
return kernel.str();
}
+
+cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id,
+ SourceGenerator generator)
+{
+ // Generate the kernel code.
+ cl_uint vector_size_index = gMinVectorSizeIndex + job_id;
+ auto kernel_name = GetKernelName(vector_size_index);
+ auto source = generator(kernel_name, info.nameInCode, vector_size_index);
+ std::array<const char *, 1> sources{ source.c_str() };
+
+ // Create the program.
+ clProgramWrapper &program = info.programs[vector_size_index];
+ auto options = GetBuildOptions(info.relaxedMode);
+ int error =
+ create_single_kernel_helper(gContext, &program, nullptr, sources.size(),
+ sources.data(), nullptr, options.c_str());
+ if (error != CL_SUCCESS)
+ {
+ vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
+ return error;
+ }
+
+ // Create a kernel for each thread. cl_kernels aren't thread safe, so make
+ // one for every thread
+ auto &kernels = info.kernels[vector_size_index];
+ assert(kernels.empty() && "Dirty BuildKernelInfo");
+ kernels.resize(info.threadCount);
+ for (auto &kernel : kernels)
+ {
+ kernel = clCreateKernel(program, kernel_name.c_str(), &error);
+ if (!kernel || error != CL_SUCCESS)
+ {
+ vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
+ return error;
+ }
+ }
+
+ return CL_SUCCESS;
+}