aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Gross <dgross@google.com>2016-06-13 18:06:26 +0000
committerandroid-build-merger <android-build-merger@google.com>2016-06-13 18:06:26 +0000
commitd2e35f95915d480fe203d6614df8b0f5d14de0e5 (patch)
tree821153077b140abfea3b6d53d9c5436595ffca81
parentc6c9c1f04b480a395daa1bdd5d634060e505bd80 (diff)
parenta48ea364652efcf947dd33c8a6ba893e9c00dd6a (diff)
downloadlibbcc-nougat-mr1-flounder-release.tar.gz
Delete simple reduction implementation.android-cts_7.1_r1android-cts-7.1_r9android-cts-7.1_r8android-cts-7.1_r7android-cts-7.1_r6android-cts-7.1_r5android-cts-7.1_r4android-cts-7.1_r3android-cts-7.1_r29android-cts-7.1_r28android-cts-7.1_r27android-cts-7.1_r26android-cts-7.1_r25android-cts-7.1_r24android-cts-7.1_r23android-cts-7.1_r22android-cts-7.1_r21android-cts-7.1_r20android-cts-7.1_r2android-cts-7.1_r19android-cts-7.1_r18android-cts-7.1_r17android-cts-7.1_r16android-cts-7.1_r15android-cts-7.1_r14android-cts-7.1_r13android-cts-7.1_r12android-cts-7.1_r11android-cts-7.1_r10android-cts-7.1_r1android-7.1.2_r9android-7.1.2_r8android-7.1.2_r6android-7.1.2_r5android-7.1.2_r4android-7.1.2_r39android-7.1.2_r38android-7.1.2_r37android-7.1.2_r36android-7.1.2_r33android-7.1.2_r32android-7.1.2_r30android-7.1.2_r3android-7.1.2_r29android-7.1.2_r28android-7.1.2_r27android-7.1.2_r25android-7.1.2_r24android-7.1.2_r23android-7.1.2_r2android-7.1.2_r19android-7.1.2_r18android-7.1.2_r17android-7.1.2_r16android-7.1.2_r15android-7.1.2_r14android-7.1.2_r13android-7.1.2_r12android-7.1.2_r11android-7.1.2_r10android-7.1.2_r1android-7.1.1_r9android-7.1.1_r8android-7.1.1_r7android-7.1.1_r61android-7.1.1_r60android-7.1.1_r6android-7.1.1_r59android-7.1.1_r58android-7.1.1_r57android-7.1.1_r56android-7.1.1_r55android-7.1.1_r54android-7.1.1_r53android-7.1.1_r52android-7.1.1_r51android-7.1.1_r50android-7.1.1_r49android-7.1.1_r48android-7.1.1_r47android-7.1.1_r46android-7.1.1_r45android-7.1.1_r44android-7.1.1_r43android-7.1.1_r42android-7.1.1_r41android-7.1.1_r40android-7.1.1_r4android-7.1.1_r39android-7.1.1_r38android-7.1.1_r35android-7.1.1_r33android-7.1.1_r32android-7.1.1_r31android-7.1.1_r3android-7.1.1_r28android-7.1.1_r27android-7.1.1_r26android-7.1.1_r25android-7.1.1_r24android-7.1.1_r23android-7.1.1_r22android-7.1.1_r21android-7.1.1_r20android-7.1.1_r2android-7.1.1_r17android-7.1.1_r16android-7.1.1_r15android-7.1.1_r14android-7.1.1_r13android-7.1.1_r12android-7.1.1_r11android-7.1.1_r10android-7.1.1_r1android-7.1.0_r7android-7.1.0_r6android-7.1.0_r5android-7.1.0_r4android-7.1.0_r3android-7.1.0_r2android-7.1.0_r1nougat-mr2.3-releasenougat-mr2.2-releasenougat-mr2.1-releasenougat-mr2-security-releasenougat-mr2-releasenougat-mr2-pixel-releasenougat-mr2-devnougat-mr1.8-releasenougat-mr1.7-releasenougat-mr1.6-releasenougat-mr1.5-releasenougat-mr1.4-releasenougat-mr1.3-releasenougat-mr1.2-releasenougat-mr1.1-releasenougat-mr1-volantis-releasenougat-mr1-security-releasenougat-mr1-releasenougat-mr1-flounder-releasenougat-mr1-devnougat-mr1-cts-releasenougat-dr1-release
am: a48ea36465 Change-Id: Ie88be61a5b49bf4c0147db7d05c5cf00bb028ff3
-rw-r--r--bcinfo/MetadataExtractor.cpp69
-rw-r--r--bcinfo/tools/main.cpp53
-rw-r--r--include/bcc/Renderscript/RSUtils.h2
-rw-r--r--include/bcinfo/MetadataExtractor.h38
-rw-r--r--lib/Core/Compiler.cpp33
-rw-r--r--lib/Renderscript/RSEmbedInfo.cpp58
-rw-r--r--lib/Renderscript/RSKernelExpand.cpp373
-rw-r--r--tests/libbcc/test_reduce_general_metadata.ll18
8 files changed, 137 insertions, 507 deletions
diff --git a/bcinfo/MetadataExtractor.cpp b/bcinfo/MetadataExtractor.cpp
index ba0548f..48a2ecb 100644
--- a/bcinfo/MetadataExtractor.cpp
+++ b/bcinfo/MetadataExtractor.cpp
@@ -155,7 +155,7 @@ static const llvm::StringRef ExportForEachMetadataName = "#rs_export_foreach";
// Name of metadata node where exported general reduce information resides
// (should be synced with slang_rs_metadata.h)
-static const llvm::StringRef ExportReduceNewMetadataName = "#rs_export_reduce";
+static const llvm::StringRef ExportReduceMetadataName = "#rs_export_reduce";
// Name of metadata node where RS object slot info resides (should be
// synced with slang_rs_metadata.h)
@@ -175,11 +175,11 @@ static const llvm::StringRef DebugInfoMetadataName = "llvm.dbg.cu";
MetadataExtractor::MetadataExtractor(const char *bitcode, size_t bitcodeSize)
: mModule(nullptr), mBitcode(bitcode), mBitcodeSize(bitcodeSize),
mExportVarCount(0), mExportFuncCount(0), mExportForEachSignatureCount(0),
- mExportReduceCount(0), mExportReduceNewCount(0), mExportVarNameList(nullptr),
+ mExportReduceCount(0), mExportVarNameList(nullptr),
mExportFuncNameList(nullptr), mExportForEachNameList(nullptr),
mExportForEachSignatureList(nullptr),
- mExportForEachInputCountList(nullptr), mExportReduceNameList(nullptr),
- mExportReduceNewList(nullptr),
+ mExportForEachInputCountList(nullptr),
+ mExportReduceList(nullptr),
mPragmaCount(0), mPragmaKeyList(nullptr), mPragmaValueList(nullptr),
mObjectSlotCount(0), mObjectSlotList(nullptr),
mRSFloatPrecision(RS_FP_Full), mIsThreadable(true),
@@ -193,11 +193,11 @@ MetadataExtractor::MetadataExtractor(const char *bitcode, size_t bitcodeSize)
MetadataExtractor::MetadataExtractor(const llvm::Module *module)
: mModule(module), mBitcode(nullptr), mBitcodeSize(0),
mExportVarCount(0), mExportFuncCount(0), mExportForEachSignatureCount(0),
- mExportReduceCount(0), mExportReduceNewCount(0), mExportVarNameList(nullptr),
+ mExportReduceCount(0), mExportVarNameList(nullptr),
mExportFuncNameList(nullptr), mExportForEachNameList(nullptr),
mExportForEachSignatureList(nullptr),
- mExportForEachInputCountList(nullptr), mExportReduceNameList(nullptr),
- mExportReduceNewList(nullptr),
+ mExportForEachInputCountList(nullptr),
+ mExportReduceList(nullptr),
mPragmaCount(0), mPragmaKeyList(nullptr), mPragmaValueList(nullptr),
mObjectSlotCount(0), mObjectSlotList(nullptr),
mRSFloatPrecision(RS_FP_Full), mIsThreadable(true),
@@ -241,17 +241,8 @@ MetadataExtractor::~MetadataExtractor() {
delete [] mExportForEachInputCountList;
mExportForEachInputCountList = nullptr;
- if (mExportReduceNameList) {
- for (size_t i = 0; i < mExportReduceCount; i++) {
- delete [] mExportReduceNameList[i];
- mExportReduceNameList[i] = nullptr;
- }
- }
- delete [] mExportReduceNameList;
- mExportReduceNameList = nullptr;
-
- delete [] mExportReduceNewList;
- mExportReduceNewList = nullptr;
+ delete [] mExportReduceList;
+ mExportReduceList = nullptr;
for (size_t i = 0; i < mPragmaCount; i++) {
if (mPragmaKeyList) {
@@ -496,25 +487,25 @@ bool MetadataExtractor::populateForEachMetadata(
}
-bool MetadataExtractor::populateReduceNewMetadata(const llvm::NamedMDNode *ReduceNewMetadata) {
- mExportReduceNewCount = 0;
- mExportReduceNewList = nullptr;
+bool MetadataExtractor::populateReduceMetadata(const llvm::NamedMDNode *ReduceMetadata) {
+ mExportReduceCount = 0;
+ mExportReduceList = nullptr;
- if (!ReduceNewMetadata || !(mExportReduceNewCount = ReduceNewMetadata->getNumOperands()))
+ if (!ReduceMetadata || !(mExportReduceCount = ReduceMetadata->getNumOperands()))
return true;
- ReduceNew *TmpReduceNewList = new ReduceNew[mExportReduceNewCount];
+ Reduce *TmpReduceList = new Reduce[mExportReduceCount];
- for (size_t i = 0; i < mExportReduceNewCount; i++) {
- llvm::MDNode *Node = ReduceNewMetadata->getOperand(i);
+ for (size_t i = 0; i < mExportReduceCount; i++) {
+ llvm::MDNode *Node = ReduceMetadata->getOperand(i);
if (!Node || Node->getNumOperands() < 3) {
ALOGE("Missing reduce metadata");
return false;
}
- TmpReduceNewList[i].mReduceName = createStringFromValue(Node->getOperand(0));
+ TmpReduceList[i].mReduceName = createStringFromValue(Node->getOperand(0));
- if (!extractUIntFromMetadataString(&TmpReduceNewList[i].mAccumulatorDataSize,
+ if (!extractUIntFromMetadataString(&TmpReduceList[i].mAccumulatorDataSize,
Node->getOperand(1))) {
ALOGE("Non-integer accumulator data size value in reduce metadata");
return false;
@@ -525,8 +516,8 @@ bool MetadataExtractor::populateReduceNewMetadata(const llvm::NamedMDNode *Reduc
ALOGE("Malformed accumulator node in reduce metadata");
return false;
}
- TmpReduceNewList[i].mAccumulatorName = createStringFromValue(AccumulatorNode->getOperand(0));
- if (!extractUIntFromMetadataString(&TmpReduceNewList[i].mSignature,
+ TmpReduceList[i].mAccumulatorName = createStringFromValue(AccumulatorNode->getOperand(0));
+ if (!extractUIntFromMetadataString(&TmpReduceList[i].mSignature,
AccumulatorNode->getOperand(1))) {
ALOGE("Non-integer signature value in reduce metadata");
return false;
@@ -540,18 +531,18 @@ bool MetadataExtractor::populateReduceNewMetadata(const llvm::NamedMDNode *Reduc
// into the expanded accumulator function and is otherwise
// unreferenced).
llvm::Function *Func =
- mModule->getFunction(llvm::StringRef(TmpReduceNewList[i].mAccumulatorName));
+ mModule->getFunction(llvm::StringRef(TmpReduceList[i].mAccumulatorName));
// Why calculateNumInputs() - 1? The "-1" is because we don't
// want to treat the accumulator argument as an input.
- TmpReduceNewList[i].mInputCount = (Func ? calculateNumInputs(Func, TmpReduceNewList[i].mSignature) - 1 : 0);
+ TmpReduceList[i].mInputCount = (Func ? calculateNumInputs(Func, TmpReduceList[i].mSignature) - 1 : 0);
- TmpReduceNewList[i].mInitializerName = createStringFromOptionalValue(Node, 3);
- TmpReduceNewList[i].mCombinerName = createStringFromOptionalValue(Node, 4);
- TmpReduceNewList[i].mOutConverterName = createStringFromOptionalValue(Node, 5);
- TmpReduceNewList[i].mHalterName = createStringFromOptionalValue(Node, 6);
+ TmpReduceList[i].mInitializerName = createStringFromOptionalValue(Node, 3);
+ TmpReduceList[i].mCombinerName = createStringFromOptionalValue(Node, 4);
+ TmpReduceList[i].mOutConverterName = createStringFromOptionalValue(Node, 5);
+ TmpReduceList[i].mHalterName = createStringFromOptionalValue(Node, 6);
}
- mExportReduceNewList = TmpReduceNewList;
+ mExportReduceList = TmpReduceList;
return true;
}
@@ -631,8 +622,8 @@ bool MetadataExtractor::extract() {
mModule->getNamedMetadata(ExportForEachNameMetadataName);
const llvm::NamedMDNode *ExportForEachMetadata =
mModule->getNamedMetadata(ExportForEachMetadataName);
- const llvm::NamedMDNode *ExportReduceNewMetadata =
- mModule->getNamedMetadata(ExportReduceNewMetadataName);
+ const llvm::NamedMDNode *ExportReduceMetadata =
+ mModule->getNamedMetadata(ExportReduceMetadataName);
const llvm::NamedMDNode *PragmaMetadata =
mModule->getNamedMetadata(PragmaMetadataName);
const llvm::NamedMDNode *ObjectSlotMetadata =
@@ -662,7 +653,7 @@ bool MetadataExtractor::extract() {
goto err;
}
- if (!populateReduceNewMetadata(ExportReduceNewMetadata)) {
+ if (!populateReduceMetadata(ExportReduceMetadata)) {
ALOGE("Could not populate export general reduction metadata");
goto err;
}
diff --git a/bcinfo/tools/main.cpp b/bcinfo/tools/main.cpp
index b823bf9..c85fdc4 100644
--- a/bcinfo/tools/main.cpp
+++ b/bcinfo/tools/main.cpp
@@ -110,7 +110,7 @@ static int parseOption(int argc, char** argv) {
}
-static void dumpReduceNewInfo(FILE *info, const char *Kind, const char *Name) {
+static void dumpReduceInfo(FILE *info, const char *Kind, const char *Name) {
if (Name)
fprintf(info, " %s(%s)\n", Kind, Name);
}
@@ -149,23 +149,17 @@ static int dumpInfo(bcinfo::MetadataExtractor *ME) {
}
fprintf(info, "exportReduceCount: %zu\n", ME->getExportReduceCount());
- const char **reduceNameList = ME->getExportReduceNameList();
+ const bcinfo::MetadataExtractor::Reduce *reduceList =
+ ME->getExportReduceList();
for (size_t i = 0; i < ME->getExportReduceCount(); i++) {
- fprintf(info, "%s\n", reduceNameList[i]);
- }
-
- fprintf(info, "exportReduceNewCount: %zu\n", ME->getExportReduceNewCount());
- const bcinfo::MetadataExtractor::ReduceNew *reduceNewList =
- ME->getExportReduceNewList();
- for (size_t i = 0; i < ME->getExportReduceNewCount(); i++) {
- const bcinfo::MetadataExtractor::ReduceNew &reduceNew = reduceNewList[i];
- fprintf(info, "%u - %s - %u - %u\n", reduceNew.mSignature, reduceNew.mReduceName,
- reduceNew.mInputCount, reduceNew.mAccumulatorDataSize);
- dumpReduceNewInfo(info, "initializer", reduceNew.mInitializerName);
- dumpReduceNewInfo(info, "accumulator", reduceNew.mAccumulatorName);
- dumpReduceNewInfo(info, "combiner", reduceNew.mCombinerName);
- dumpReduceNewInfo(info, "outconverter", reduceNew.mOutConverterName);
- dumpReduceNewInfo(info, "halter", reduceNew.mHalterName);
+ const bcinfo::MetadataExtractor::Reduce &reduce = reduceList[i];
+ fprintf(info, "%u - %s - %u - %u\n", reduce.mSignature, reduce.mReduceName,
+ reduce.mInputCount, reduce.mAccumulatorDataSize);
+ dumpReduceInfo(info, "initializer", reduce.mInitializerName);
+ dumpReduceInfo(info, "accumulator", reduce.mAccumulatorName);
+ dumpReduceInfo(info, "combiner", reduce.mCombinerName);
+ dumpReduceInfo(info, "outconverter", reduce.mOutConverterName);
+ dumpReduceInfo(info, "halter", reduce.mHalterName);
}
fprintf(info, "objectSlotCount: %zu\n", ME->getObjectSlotCount());
@@ -223,23 +217,16 @@ static void dumpMetadata(bcinfo::MetadataExtractor *ME) {
printf("\n");
printf("exportReduceCount: %zu\n", ME->getExportReduceCount());
- const char **reduceNameList = ME->getExportReduceNameList();
+ const bcinfo::MetadataExtractor::Reduce *reduceList = ME->getExportReduceList();
for (size_t i = 0; i < ME->getExportReduceCount(); i++) {
- printf("func[%zu]: %s\n", i, reduceNameList[i]);
- }
- printf("\n");
-
- printf("exportReduceNewCount: %zu\n", ME->getExportReduceNewCount());
- const bcinfo::MetadataExtractor::ReduceNew *reduceNewList = ME->getExportReduceNewList();
- for (size_t i = 0; i < ME->getExportReduceNewCount(); i++) {
- const bcinfo::MetadataExtractor::ReduceNew &reduceNew = reduceNewList[i];
- printf("exportReduceNewList[%zu]: %s - 0x%08x - %u - %u\n", i, reduceNew.mReduceName,
- reduceNew.mSignature, reduceNew.mInputCount, reduceNew.mAccumulatorDataSize);
- dumpReduceNewInfo(stdout, "initializer", reduceNew.mInitializerName);
- dumpReduceNewInfo(stdout, "accumulator", reduceNew.mAccumulatorName);
- dumpReduceNewInfo(stdout, "combiner", reduceNew.mCombinerName);
- dumpReduceNewInfo(stdout, "outconverter", reduceNew.mOutConverterName);
- dumpReduceNewInfo(stdout, "halter", reduceNew.mHalterName);
+ const bcinfo::MetadataExtractor::Reduce &reduce = reduceList[i];
+ printf("exportReduceList[%zu]: %s - 0x%08x - %u - %u\n", i, reduce.mReduceName,
+ reduce.mSignature, reduce.mInputCount, reduce.mAccumulatorDataSize);
+ dumpReduceInfo(stdout, "initializer", reduce.mInitializerName);
+ dumpReduceInfo(stdout, "accumulator", reduce.mAccumulatorName);
+ dumpReduceInfo(stdout, "combiner", reduce.mCombinerName);
+ dumpReduceInfo(stdout, "outconverter", reduce.mOutConverterName);
+ dumpReduceInfo(stdout, "halter", reduce.mHalterName);
}
printf("\n");
diff --git a/include/bcc/Renderscript/RSUtils.h b/include/bcc/Renderscript/RSUtils.h
index fbd5ed0..4e80c4e 100644
--- a/include/bcc/Renderscript/RSUtils.h
+++ b/include/bcc/Renderscript/RSUtils.h
@@ -109,7 +109,7 @@ static inline bool isRsObjectType(const llvm::Type *T) {
// we will synthesize a combiner function from the accumulator
// function. Given the accumulator function name, what should be the
// name of the combiner function?
-static inline std::string nameReduceNewCombinerFromAccumulator(llvm::StringRef accumName) {
+static inline std::string nameReduceCombinerFromAccumulator(llvm::StringRef accumName) {
return std::string(accumName) + ".combiner";
}
diff --git a/include/bcinfo/MetadataExtractor.h b/include/bcinfo/MetadataExtractor.h
index 71a6de0..95fc27b 100644
--- a/include/bcinfo/MetadataExtractor.h
+++ b/include/bcinfo/MetadataExtractor.h
@@ -49,8 +49,8 @@ enum MetadataSignatureBitval {
class MetadataExtractor {
public:
- struct ReduceNew {
- // These strings are owned by the ReduceNew instance, and deleted upon its destruction.
+ struct Reduce {
+ // These strings are owned by the Reduce instance, and deleted upon its destruction.
// They are assumed to have been allocated by "new []" and hence are deleted by "delete []".
const char *mReduceName;
const char *mInitializerName;
@@ -63,13 +63,13 @@ class MetadataExtractor {
uint32_t mInputCount; // of accumulator function (and of kernel itself)
uint32_t mAccumulatorDataSize; // in bytes
- ReduceNew() :
+ Reduce() :
mReduceName(nullptr),
mInitializerName(nullptr), mAccumulatorName(nullptr), mCombinerName(nullptr),
mOutConverterName(nullptr), mHalterName(nullptr),
mSignature(0), mInputCount(0), mAccumulatorDataSize(0) {
}
- ~ReduceNew() {
+ ~Reduce() {
delete [] mReduceName;
delete [] mInitializerName;
delete [] mAccumulatorName;
@@ -78,8 +78,8 @@ class MetadataExtractor {
delete [] mHalterName;
}
- ReduceNew(const ReduceNew &) = delete;
- void operator=(const ReduceNew &) = delete;
+ Reduce(const Reduce &) = delete;
+ void operator=(const Reduce &) = delete;
};
private:
@@ -91,14 +91,12 @@ class MetadataExtractor {
size_t mExportFuncCount;
size_t mExportForEachSignatureCount;
size_t mExportReduceCount;
- size_t mExportReduceNewCount;
const char **mExportVarNameList;
const char **mExportFuncNameList;
const char **mExportForEachNameList;
const uint32_t *mExportForEachSignatureList;
const uint32_t *mExportForEachInputCountList;
- const char **mExportReduceNameList;
- const ReduceNew *mExportReduceNewList;
+ const Reduce *mExportReduceList;
size_t mPragmaCount;
const char **mPragmaKeyList;
@@ -123,7 +121,7 @@ class MetadataExtractor {
// Helper functions for extraction
bool populateForEachMetadata(const llvm::NamedMDNode *Names,
const llvm::NamedMDNode *Signatures);
- bool populateReduceNewMetadata(const llvm::NamedMDNode *ReduceNewMetadata);
+ bool populateReduceMetadata(const llvm::NamedMDNode *ReduceMetadata);
bool populateObjectSlotMetadata(const llvm::NamedMDNode *ObjectSlotMetadata);
void populatePragmaMetadata(const llvm::NamedMDNode *PragmaMetadata);
void readThreadableFlag(const llvm::NamedMDNode *ThreadableMetadata);
@@ -224,31 +222,17 @@ class MetadataExtractor {
}
/**
- * \return number of exported simple reduce kernels (slots) in this script/module.
+ * \return number of exported general reduce kernels (slots) in this script/module.
*/
size_t getExportReduceCount() const {
return mExportReduceCount;
}
/**
- * \return array of exported simple reduce kernel names.
- */
- const char **getExportReduceNameList() const {
- return mExportReduceNameList;
- }
-
- /**
- * \return number of exported general reduce kernels (slots) in this script/module.
- */
- size_t getExportReduceNewCount() const {
- return mExportReduceNewCount;
- }
-
- /**
* \return array of exported general reduce kernel descriptions.
*/
- const ReduceNew *getExportReduceNewList() const {
- return mExportReduceNewList;
+ const Reduce *getExportReduceList() const {
+ return mExportReduceList;
}
/**
diff --git a/lib/Core/Compiler.cpp b/lib/Core/Compiler.cpp
index 1988da3..759844f 100644
--- a/lib/Core/Compiler.cpp
+++ b/lib/Core/Compiler.cpp
@@ -354,12 +354,10 @@ bool Compiler::addInternalizeSymbolsPass(Script &pScript, llvm::legacy::PassMana
size_t exportFuncCount = me.getExportFuncCount();
size_t exportForEachCount = me.getExportForEachSignatureCount();
size_t exportReduceCount = me.getExportReduceCount();
- size_t exportReduceNewCount = me.getExportReduceNewCount();
const char **exportVarNameList = me.getExportVarNameList();
const char **exportFuncNameList = me.getExportFuncNameList();
const char **exportForEachNameList = me.getExportForEachNameList();
- const char **exportReduceNameList = me.getExportReduceNameList();
- const bcinfo::MetadataExtractor::ReduceNew *exportReduceNewList = me.getExportReduceNewList();
+ const bcinfo::MetadataExtractor::Reduce *exportReduceList = me.getExportReduceList();
size_t i;
for (i = 0; i < exportVarCount; ++i) {
@@ -370,32 +368,29 @@ bool Compiler::addInternalizeSymbolsPass(Script &pScript, llvm::legacy::PassMana
export_symbols.push_back(exportFuncNameList[i]);
}
- // Expanded foreach and reduce functions should not be internalized;
- // nor should general reduction initializer, combiner, and
- // outconverter functions. keep_funcs keeps the names of these
- // functions around until createInternalizePass() is finished making
- // its own copy of the visible symbols.
+ // Expanded foreach functions should not be internalized; nor should
+ // general reduction initializer, combiner, and outconverter
+ // functions. keep_funcs keeps the names of these functions around
+ // until createInternalizePass() is finished making its own copy of
+ // the visible symbols.
std::vector<std::string> keep_funcs;
- keep_funcs.reserve(exportForEachCount + exportReduceCount + exportReduceNewCount*4);
+ keep_funcs.reserve(exportForEachCount + exportReduceCount*4);
for (i = 0; i < exportForEachCount; ++i) {
keep_funcs.push_back(std::string(exportForEachNameList[i]) + ".expand");
}
- for (i = 0; i < exportReduceCount; ++i) {
- keep_funcs.push_back(std::string(exportReduceNameList[i]) + ".expand");
- }
auto keepFuncsPushBackIfPresent = [&keep_funcs](const char *Name) {
if (Name) keep_funcs.push_back(Name);
};
- for (i = 0; i < exportReduceNewCount; ++i) {
- keep_funcs.push_back(std::string(exportReduceNewList[i].mAccumulatorName) + ".expand");
- keepFuncsPushBackIfPresent(exportReduceNewList[i].mInitializerName);
- if (exportReduceNewList[i].mCombinerName != nullptr) {
- keep_funcs.push_back(exportReduceNewList[i].mCombinerName);
+ for (i = 0; i < exportReduceCount; ++i) {
+ keep_funcs.push_back(std::string(exportReduceList[i].mAccumulatorName) + ".expand");
+ keepFuncsPushBackIfPresent(exportReduceList[i].mInitializerName);
+ if (exportReduceList[i].mCombinerName != nullptr) {
+ keep_funcs.push_back(exportReduceList[i].mCombinerName);
} else {
- keep_funcs.push_back(nameReduceNewCombinerFromAccumulator(exportReduceNewList[i].mAccumulatorName));
+ keep_funcs.push_back(nameReduceCombinerFromAccumulator(exportReduceList[i].mAccumulatorName));
}
- keepFuncsPushBackIfPresent(exportReduceNewList[i].mOutConverterName);
+ keepFuncsPushBackIfPresent(exportReduceList[i].mOutConverterName);
}
for (auto &symbol_name : keep_funcs) {
diff --git a/lib/Renderscript/RSEmbedInfo.cpp b/lib/Renderscript/RSEmbedInfo.cpp
index 2d2e69f..081bed6 100644
--- a/lib/Renderscript/RSEmbedInfo.cpp
+++ b/lib/Renderscript/RSEmbedInfo.cpp
@@ -75,17 +75,15 @@ public:
size_t exportFuncCount = me.getExportFuncCount();
size_t exportForEachCount = me.getExportForEachSignatureCount();
size_t exportReduceCount = me.getExportReduceCount();
- size_t exportReduceNewCount = me.getExportReduceNewCount();
size_t objectSlotCount = me.getObjectSlotCount();
size_t pragmaCount = me.getPragmaCount();
const char **exportVarNameList = me.getExportVarNameList();
const char **exportFuncNameList = me.getExportFuncNameList();
const char **exportForEachNameList = me.getExportForEachNameList();
- const char **exportReduceNameList = me.getExportReduceNameList();
const uint32_t *exportForEachSignatureList =
me.getExportForEachSignatureList();
- const bcinfo::MetadataExtractor::ReduceNew *exportReduceNewList =
- me.getExportReduceNewList();
+ const bcinfo::MetadataExtractor::Reduce *exportReduceList =
+ me.getExportReduceList();
const uint32_t *objectSlotList = me.getObjectSlotList();
const char **pragmaKeyList = me.getPragmaKeyList();
const char **pragmaValueList = me.getPragmaValueList();
@@ -97,18 +95,17 @@ public:
// We use a simple text format here that the compatibility library
// can easily parse. Each section starts out with its name
// followed by a count. The count denotes the number of lines to
- // parse for that particular category. Variables and Functions and
- // simple reduce kernels merely put the appropriate identifier on
- // the line. ForEach kernels have the encoded int signature,
- // followed by a hyphen followed by the identifier (function to
- // look up). General reduce kernels have the encoded int
- // signature, followed by a hyphen followed by the accumulator
- // data size, followed by a hyphen followed by the identifier
- // (reduction name); and then for each possible constituent
- // function, a hyphen followed by the identifier (function name)
- // -- in the case where the function is omitted, "." is used in
- // place of the identifier. Object Slots are just listed as one
- // integer per line.
+ // parse for that particular category. Variables and Functions
+ // merely put the appropriate identifier on the line. ForEach
+ // kernels have the encoded int signature, followed by a hyphen
+ // followed by the identifier (function to look up). General
+ // reduce kernels have the encoded int signature, followed by a
+ // hyphen followed by the accumulator data size, followed by a
+ // hyphen followed by the identifier (reduction name); and then
+ // for each possible constituent function, a hyphen followed by
+ // the identifier (function name) -- in the case where the
+ // function is omitted, "." is used in place of the identifier.
+ // Object Slots are just listed as one integer per line.
s << "exportVarCount: " << exportVarCount << "\n";
for (i = 0; i < exportVarCount; ++i) {
@@ -127,24 +124,19 @@ public:
}
s << "exportReduceCount: " << exportReduceCount << "\n";
+ auto reduceFnName = [](const char *Name) { return Name ? Name : "."; };
for (i = 0; i < exportReduceCount; ++i) {
- s << exportReduceNameList[i] << "\n";
- }
-
- s << "exportReduceNewCount: " << exportReduceNewCount << "\n";
- auto reduceNewFnName = [](const char *Name) { return Name ? Name : "."; };
- for (i = 0; i < exportReduceNewCount; ++i) {
- const bcinfo::MetadataExtractor::ReduceNew &reduceNew = exportReduceNewList[i];
- s << reduceNew.mSignature << " - "
- << reduceNew.mAccumulatorDataSize << " - "
- << reduceNew.mReduceName << " - "
- << reduceNewFnName(reduceNew.mInitializerName) << " - "
- << reduceNewFnName(reduceNew.mAccumulatorName) << " - "
- << ((reduceNew.mCombinerName != nullptr)
- ? reduceNew.mCombinerName
- : nameReduceNewCombinerFromAccumulator(reduceNew.mAccumulatorName)) << " - "
- << reduceNewFnName(reduceNew.mOutConverterName) << " - "
- << reduceNewFnName(reduceNew.mHalterName)
+ const bcinfo::MetadataExtractor::Reduce &reduce = exportReduceList[i];
+ s << reduce.mSignature << " - "
+ << reduce.mAccumulatorDataSize << " - "
+ << reduce.mReduceName << " - "
+ << reduceFnName(reduce.mInitializerName) << " - "
+ << reduceFnName(reduce.mAccumulatorName) << " - "
+ << ((reduce.mCombinerName != nullptr)
+ ? reduce.mCombinerName
+ : nameReduceCombinerFromAccumulator(reduce.mAccumulatorName)) << " - "
+ << reduceFnName(reduce.mOutConverterName) << " - "
+ << reduceFnName(reduce.mHalterName)
<< "\n";
}
diff --git a/lib/Renderscript/RSKernelExpand.cpp b/lib/Renderscript/RSKernelExpand.cpp
index d7e4996..1e27e22 100644
--- a/lib/Renderscript/RSKernelExpand.cpp
+++ b/lib/Renderscript/RSKernelExpand.cpp
@@ -43,8 +43,7 @@
#ifndef __DISABLE_ASSERTS
// Only used in bccAssert()
const int kNumExpandedForeachParams = 4;
-const int kNumExpandedReduceParams = 3;
-const int kNumExpandedReduceNewAccumulatorParams = 4;
+const int kNumExpandedReduceAccumulatorParams = 4;
#endif
const char kRenderScriptTBAARootName[] = "RenderScript Distinct TBAA";
@@ -109,16 +108,13 @@ private:
* for expanded functions. These must be re-calculated for each module
* the pass is run on.
*/
- llvm::FunctionType *ExpandedForEachType, *ExpandedReduceType;
+ llvm::FunctionType *ExpandedForEachType;
llvm::Type *RsExpandKernelDriverInfoPfxTy;
uint32_t mExportForEachCount;
const char **mExportForEachNameList;
const uint32_t *mExportForEachSignatureList;
- uint32_t mExportReduceCount;
- const char **mExportReduceNameList;
-
// Turns on optimization of allocation stride values.
bool mEnableStepOpt;
@@ -310,9 +306,6 @@ private:
// void (const RsExpandKernelDriverInfoPfxTy *p, uint32_t x1, uint32_t x2, uint32_t outstep)
ExpandedForEachType = llvm::FunctionType::get(VoidTy,
{RsExpandKernelDriverInfoPfxPtrTy, Int32Ty, Int32Ty, Int32Ty}, false);
-
- // void (void *inBuf, void *outBuf, uint32_t len)
- ExpandedReduceType = llvm::FunctionType::get(VoidTy, {VoidPtrTy, VoidPtrTy, Int32Ty}, false);
}
/// @brief Create skeleton of the expanded foreach kernel.
@@ -340,41 +333,6 @@ private:
return ExpandedFunction;
}
- // Create skeleton of the expanded reduce kernel.
- //
- // This creates a function with the following signature:
- //
- // void @func.expand(i8* nocapture %inBuf, i8* nocapture %outBuf, i32 len)
- //
- llvm::Function *createEmptyExpandedReduceKernel(llvm::StringRef OldName) {
- llvm::Function *ExpandedFunction =
- llvm::Function::Create(ExpandedReduceType,
- llvm::GlobalValue::ExternalLinkage,
- OldName + ".expand", Module);
- bccAssert(ExpandedFunction->arg_size() == kNumExpandedReduceParams);
-
- llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
-
- using llvm::Attribute;
-
- llvm::Argument *InBuf = &(*AI++);
- InBuf->setName("inBuf");
- InBuf->addAttr(llvm::AttributeSet::get(*Context, InBuf->getArgNo() + 1, llvm::makeArrayRef(Attribute::NoCapture)));
-
- llvm::Argument *OutBuf = &(*AI++);
- OutBuf->setName("outBuf");
- OutBuf->addAttr(llvm::AttributeSet::get(*Context, OutBuf->getArgNo() + 1, llvm::makeArrayRef(Attribute::NoCapture)));
-
- (AI++)->setName("len");
-
- llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
- ExpandedFunction);
- llvm::IRBuilder<> Builder(Begin);
- Builder.CreateRetVoid();
-
- return ExpandedFunction;
- }
-
// Create skeleton of a general reduce kernel's expanded accumulator.
//
// This creates a function with the following signature:
@@ -382,19 +340,19 @@ private:
// void @func.expand(%RsExpandKernelDriverInfoPfx* nocapture %p,
// i32 %x1, i32 %x2, accumType* nocapture %accum)
//
- llvm::Function *createEmptyExpandedReduceNewAccumulator(llvm::StringRef OldName,
- llvm::Type *AccumArgTy) {
+ llvm::Function *createEmptyExpandedReduceAccumulator(llvm::StringRef OldName,
+ llvm::Type *AccumArgTy) {
llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
- llvm::FunctionType *ExpandedReduceNewAccumulatorType =
+ llvm::FunctionType *ExpandedReduceAccumulatorType =
llvm::FunctionType::get(VoidTy,
{RsExpandKernelDriverInfoPfxTy->getPointerTo(),
Int32Ty, Int32Ty, AccumArgTy}, false);
llvm::Function *FnExpandedAccumulator =
- llvm::Function::Create(ExpandedReduceNewAccumulatorType,
+ llvm::Function::Create(ExpandedReduceAccumulatorType,
llvm::GlobalValue::ExternalLinkage,
OldName + ".expand", Module);
- bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceNewAccumulatorParams);
+ bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams);
llvm::Function::arg_iterator AI = FnExpandedAccumulator->arg_begin();
@@ -1115,272 +1073,6 @@ public:
return true;
}
- // Expand a simple reduce-style kernel function.
- //
- // The input is a kernel which represents a binary operation,
- // of the form
- //
- // define foo @func(foo %a, foo %b),
- //
- // (More generally, it can be of the forms
- //
- // define void @func(foo* %ret, foo* %a, foo* %b)
- // define void @func(foo* %ret, foo1 %a, foo1 %b)
- // define foo1 @func(foo2 %a, foo2 %b)
- //
- // as a result of argument / return value conversions. Here, "foo1"
- // and "foo2" refer to possibly coerced types, and the coerced
- // argument type may be different from the coerced return type. See
- // "Note on coercion" below.)
- //
- // Note also, we do not expect to encounter any case when the
- // arguments are promoted to pointers but the return value is
- // unpromoted to pointer, e.g.
- //
- // define foo1 @func(foo* %a, foo* %b)
- //
- // and we will throw an assertion in this case.)
- //
- // The input kernel gets expanded into a kernel of the form
- //
- // define void @func.expand(i8* %inBuf, i8* outBuf, i32 len)
- //
- // which performs a serial reduction of `len` elements from `inBuf`,
- // and stores the result into `outBuf`. In pseudocode, @func.expand
- // does:
- //
- // inArr := (foo *)inBuf;
- // accum := inArr[0];
- // for (i := 1; i < len; ++i) {
- // accum := foo(accum, inArr[i]);
- // }
- // *(foo *)outBuf := accum;
- //
- // Note on coercion
- //
- // Both the return value and the argument types may undergo internal
- // coercion in clang as part of call lowering. As a result, the
- // return value type may differ from the argument type even if the
- // types in the RenderScript signaure are the same. For instance, the
- // kernel
- //
- // int3 add(int3 a, int3 b) { return a + b; }
- //
- // gets lowered by clang as
- //
- // define <3 x i32> @add(<4 x i32> %a.coerce, <4 x i32> %b.coerce)
- //
- // under AArch64. The details of this process are found in clang,
- // lib/CodeGen/TargetInfo.cpp, under classifyArgumentType() and
- // classifyReturnType() in ARMABIInfo, AArch64ABIInfo. If the value
- // is passed by pointer, then the pointed-to type is not coerced.
- //
- // Since we lack the original type information, this code does loads
- // and stores of allocation data by way of pointers to the coerced
- // type.
- bool ExpandReduce(llvm::Function *Function) {
- bccAssert(Function);
-
- ALOGV("Expanding simple reduce kernel %s", Function->getName().str().c_str());
-
- llvm::DataLayout DL(Module);
- if (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING) {
- DL.reset(X86_CUSTOM_DL_STRING);
- }
-
- // TBAA Metadata
- llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, *TBAAAllocation;
- llvm::MDBuilder MDHelper(*Context);
-
- TBAARenderScriptDistinct =
- MDHelper.createTBAARoot(kRenderScriptTBAARootName);
- TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
- TBAARenderScriptDistinct);
- TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
- TBAARenderScript);
- TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
- TBAAAllocation, 0);
-
- llvm::Function *ExpandedFunction =
- createEmptyExpandedReduceKernel(Function->getName());
-
- // Extract the expanded kernel's parameters. It is guaranteed by
- // createEmptyExpandedReduceKernel that there will be 3 parameters.
- auto ExpandedFunctionArgIter = ExpandedFunction->arg_begin();
-
- llvm::Value *Arg_inBuf = &*(ExpandedFunctionArgIter++);
- llvm::Value *Arg_outBuf = &*(ExpandedFunctionArgIter++);
- llvm::Value *Arg_len = &*(ExpandedFunctionArgIter++);
-
- bccAssert(Function->arg_size() == 2 || Function->arg_size() == 3);
-
- // Check if, instead of returning a value, the original kernel has
- // a pointer parameter which points to a temporary buffer into
- // which the return value gets written.
- const bool ReturnValuePointerStyle = (Function->arg_size() == 3);
- bccAssert(Function->getReturnType()->isVoidTy() == ReturnValuePointerStyle);
-
- // Check if, instead of being passed by value, the inputs to the
- // original kernel are passed by pointer.
- auto FirstArgIter = Function->arg_begin();
- // The second argument is always an input to the original kernel.
- auto SecondArgIter = std::next(FirstArgIter);
- const bool InputsPointerStyle = SecondArgIter->getType()->isPointerTy();
-
- // Get the output type (i.e. return type of the original kernel).
- llvm::PointerType *OutPtrTy = nullptr;
- llvm::Type *OutTy = nullptr;
- if (ReturnValuePointerStyle) {
- OutPtrTy = llvm::dyn_cast<llvm::PointerType>(FirstArgIter->getType());
- bccAssert(OutPtrTy && "Expected a pointer parameter to kernel");
- OutTy = OutPtrTy->getElementType();
- } else {
- OutTy = Function->getReturnType();
- bccAssert(!OutTy->isVoidTy());
- OutPtrTy = OutTy->getPointerTo();
- }
-
- // Get the input type (type of the arguments to the original
- // kernel). Some input types are different from the output type,
- // due to explicit coercion that the compiler performs when
- // lowering the parameters. See "Note on coercion" above.
- llvm::PointerType *InPtrTy;
- llvm::Type *InTy;
- if (InputsPointerStyle) {
- InPtrTy = llvm::dyn_cast<llvm::PointerType>(SecondArgIter->getType());
- bccAssert(InPtrTy && "Expected a pointer parameter to kernel");
- bccAssert(ReturnValuePointerStyle);
- bccAssert(std::next(SecondArgIter)->getType() == InPtrTy &&
- "Input type mismatch");
- InTy = InPtrTy->getElementType();
- } else {
- InTy = SecondArgIter->getType();
- InPtrTy = InTy->getPointerTo();
- if (!ReturnValuePointerStyle) {
- bccAssert(InTy == FirstArgIter->getType() && "Input type mismatch");
- } else {
- bccAssert(InTy == std::next(SecondArgIter)->getType() &&
- "Input type mismatch");
- }
- }
-
- // The input type should take up the same amount of space in
- // memory as the output type.
- bccAssert(DL.getTypeAllocSize(InTy) == DL.getTypeAllocSize(OutTy));
-
- // Construct the actual function body.
- llvm::IRBuilder<> Builder(&*ExpandedFunction->getEntryBlock().begin());
-
- // Cast input and output buffers to appropriate types.
- llvm::Value *InBuf = Builder.CreatePointerCast(Arg_inBuf, InPtrTy);
- llvm::Value *OutBuf = Builder.CreatePointerCast(Arg_outBuf, OutPtrTy);
-
- // Create a slot to pass temporary results back. This needs to be
- // separate from the accumulator slot because the kernel may mark
- // the return value slot as noalias.
- llvm::Value *ReturnBuf = nullptr;
- if (ReturnValuePointerStyle) {
- ReturnBuf = Builder.CreateAlloca(OutTy, nullptr, "ret.tmp");
- }
-
- // Create a slot to hold the second input if the inputs are passed
- // by pointer to the original kernel. We cannot directly pass a
- // pointer to the input buffer, because the kernel may modify its
- // inputs.
- llvm::Value *SecondInputTempBuf = nullptr;
- if (InputsPointerStyle) {
- SecondInputTempBuf = Builder.CreateAlloca(InTy, nullptr, "in.tmp");
- }
-
- // Create a slot to accumulate temporary results, and fill it with
- // the first value.
- llvm::Value *AccumBuf = Builder.CreateAlloca(OutTy, nullptr, "accum");
- // Cast to OutPtrTy before loading, since AccumBuf has type OutPtrTy.
- llvm::LoadInst *FirstElementLoad = Builder.CreateLoad(
- Builder.CreatePointerCast(InBuf, OutPtrTy));
- if (gEnableRsTbaa) {
- FirstElementLoad->setMetadata("tbaa", TBAAAllocation);
- }
- // Memory operations with AccumBuf shouldn't be marked with
- // RenderScript TBAA, since this might conflict with TBAA metadata
- // in the kernel function when AccumBuf is passed by pointer.
- Builder.CreateStore(FirstElementLoad, AccumBuf);
-
- // Loop body
-
- // Create the loop structure. Note that the first input in the input buffer
- // has already been accumulated, so that we start at index 1.
- llvm::Value *IndVar;
- llvm::Value *Start = llvm::ConstantInt::get(Arg_len->getType(), 1);
- llvm::BasicBlock *Exit = createLoop(Builder, Start, Arg_len, &IndVar);
-
- llvm::Value *InputPtr = Builder.CreateInBoundsGEP(InBuf, IndVar, "next_input.gep");
-
- // Set up arguments and call the original (unexpanded) kernel.
- //
- // The original kernel can have at most 3 arguments, which is
- // achieved when the signature looks like:
- //
- // define void @func(foo* %ret, bar %a, bar %b)
- //
- // (bar can be one of foo/foo.coerce/foo*).
- llvm::SmallVector<llvm::Value *, 3> KernelArgs;
-
- if (ReturnValuePointerStyle) {
- KernelArgs.push_back(ReturnBuf);
- }
-
- if (InputsPointerStyle) {
- bccAssert(ReturnValuePointerStyle);
- // Because the return buffer is copied back into the
- // accumulator, it's okay if the accumulator is overwritten.
- KernelArgs.push_back(AccumBuf);
-
- llvm::LoadInst *InputLoad = Builder.CreateLoad(InputPtr);
- if (gEnableRsTbaa) {
- InputLoad->setMetadata("tbaa", TBAAAllocation);
- }
- Builder.CreateStore(InputLoad, SecondInputTempBuf);
-
- KernelArgs.push_back(SecondInputTempBuf);
- } else {
- // InPtrTy may be different from OutPtrTy (the type of
- // AccumBuf), so first cast the accumulator buffer to the
- // pointer type corresponding to the input argument type.
- KernelArgs.push_back(
- Builder.CreateLoad(Builder.CreatePointerCast(AccumBuf, InPtrTy)));
-
- llvm::LoadInst *LoadedArg = Builder.CreateLoad(InputPtr);
- if (gEnableRsTbaa) {
- LoadedArg->setMetadata("tbaa", TBAAAllocation);
- }
- KernelArgs.push_back(LoadedArg);
- }
-
- llvm::Value *RetVal = Builder.CreateCall(Function, KernelArgs);
-
- const uint64_t ElementSize = DL.getTypeStoreSize(OutTy);
- const uint64_t ElementAlign = DL.getABITypeAlignment(OutTy);
-
- // Store the output in the accumulator.
- if (ReturnValuePointerStyle) {
- Builder.CreateMemCpy(AccumBuf, ReturnBuf, ElementSize, ElementAlign);
- } else {
- Builder.CreateStore(RetVal, AccumBuf);
- }
-
- // Loop exit
- Builder.SetInsertPoint(Exit, Exit->begin());
-
- llvm::LoadInst *OutputLoad = Builder.CreateLoad(AccumBuf);
- llvm::StoreInst *OutputStore = Builder.CreateStore(OutputLoad, OutBuf);
- if (gEnableRsTbaa) {
- OutputStore->setMetadata("tbaa", TBAAAllocation);
- }
-
- return true;
- }
-
// Certain categories of functions that make up a general
// reduce-style kernel are called directly from the driver with no
// expansion needed. For a function in such a category, we need to
@@ -1389,7 +1081,7 @@ public:
// This promotion is safe because we don't have any kind of cross
// translation unit linkage model (except for linking against
// RenderScript libraries), so we do not risk name clashes.
- bool PromoteReduceNewFunction(const char *Name, FunctionSet &PromotedFunctions) {
+ bool PromoteReduceFunction(const char *Name, FunctionSet &PromotedFunctions) {
if (!Name) // a presumably-optional function that is not present
return false;
@@ -1427,7 +1119,7 @@ public:
// }
//
// This is very similar to foreach kernel expansion with no output.
- bool ExpandReduceNewAccumulator(llvm::Function *FnAccumulator, uint32_t Signature, size_t NumInputs) {
+ bool ExpandReduceAccumulator(llvm::Function *FnAccumulator, uint32_t Signature, size_t NumInputs) {
ALOGV("Expanding accumulator %s for general reduce kernel",
FnAccumulator->getName().str().c_str());
@@ -1451,13 +1143,13 @@ public:
// Create empty accumulator function.
llvm::Function *FnExpandedAccumulator =
- createEmptyExpandedReduceNewAccumulator(FnAccumulator->getName(),
- (AccumulatorArgIter++)->getType());
+ createEmptyExpandedReduceAccumulator(FnAccumulator->getName(),
+ (AccumulatorArgIter++)->getType());
// Extract the expanded accumulator's parameters. It is
- // guaranteed by createEmptyExpandedReduceNewAccumulator that
+ // guaranteed by createEmptyExpandedReduceAccumulator that
// there will be 4 parameters.
- bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceNewAccumulatorParams);
+ bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams);
auto ExpandedAccumulatorArgIter = FnExpandedAccumulator->arg_begin();
llvm::Value *Arg_p = &*(ExpandedAccumulatorArgIter++);
llvm::Value *Arg_x1 = &*(ExpandedAccumulatorArgIter++);
@@ -1507,7 +1199,7 @@ public:
// %1 = load accumType, accumType* %other
// call void @accumFn(accumType* %accum, accumType %1);
// }
- bool CreateReduceNewCombinerFromAccumulator(llvm::Function *FnAccumulator) {
+ bool CreateReduceCombinerFromAccumulator(llvm::Function *FnAccumulator) {
ALOGV("Creating combiner from accumulator %s for general reduce kernel",
FnAccumulator->getName().str().c_str());
@@ -1525,7 +1217,7 @@ public:
llvm::FunctionType::get(VoidTy, { AccumulatorArgType, AccumulatorArgType }, false);
llvm::Function *FnCombiner =
llvm::Function::Create(CombinerType, llvm::GlobalValue::ExternalLinkage,
- nameReduceNewCombinerFromAccumulator(FnAccumulator->getName()),
+ nameReduceCombinerFromAccumulator(FnAccumulator->getName()),
Module);
auto CombinerArgIter = FnCombiner->arg_begin();
@@ -1687,38 +1379,27 @@ public:
}
}
- // Expand simple reduce_* style kernels.
- mExportReduceCount = me.getExportReduceCount();
- mExportReduceNameList = me.getExportReduceNameList();
-
- for (size_t i = 0; i < mExportReduceCount; ++i) {
- llvm::Function *kernel = Module.getFunction(mExportReduceNameList[i]);
- if (kernel) {
- Changed |= ExpandReduce(kernel);
- }
- }
-
// Process general reduce_* style functions.
- const size_t ExportReduceNewCount = me.getExportReduceNewCount();
- const bcinfo::MetadataExtractor::ReduceNew *ExportReduceNewList = me.getExportReduceNewList();
+ const size_t ExportReduceCount = me.getExportReduceCount();
+ const bcinfo::MetadataExtractor::Reduce *ExportReduceList = me.getExportReduceList();
// Note that functions can be shared between kernels
FunctionSet PromotedFunctions, ExpandedAccumulators, AccumulatorsForCombiners;
- for (size_t i = 0; i < ExportReduceNewCount; ++i) {
- Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mInitializerName, PromotedFunctions);
- Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mCombinerName, PromotedFunctions);
- Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mOutConverterName, PromotedFunctions);
+ for (size_t i = 0; i < ExportReduceCount; ++i) {
+ Changed |= PromoteReduceFunction(ExportReduceList[i].mInitializerName, PromotedFunctions);
+ Changed |= PromoteReduceFunction(ExportReduceList[i].mCombinerName, PromotedFunctions);
+ Changed |= PromoteReduceFunction(ExportReduceList[i].mOutConverterName, PromotedFunctions);
// Accumulator
- llvm::Function *accumulator = Module.getFunction(ExportReduceNewList[i].mAccumulatorName);
+ llvm::Function *accumulator = Module.getFunction(ExportReduceList[i].mAccumulatorName);
bccAssert(accumulator != nullptr);
if (ExpandedAccumulators.insert(accumulator).second)
- Changed |= ExpandReduceNewAccumulator(accumulator,
- ExportReduceNewList[i].mSignature,
- ExportReduceNewList[i].mInputCount);
- if (!ExportReduceNewList[i].mCombinerName) {
+ Changed |= ExpandReduceAccumulator(accumulator,
+ ExportReduceList[i].mSignature,
+ ExportReduceList[i].mInputCount);
+ if (!ExportReduceList[i].mCombinerName) {
if (AccumulatorsForCombiners.insert(accumulator).second)
- Changed |= CreateReduceNewCombinerFromAccumulator(accumulator);
+ Changed |= CreateReduceCombinerFromAccumulator(accumulator);
}
}
diff --git a/tests/libbcc/test_reduce_general_metadata.ll b/tests/libbcc/test_reduce_general_metadata.ll
index b41234a..ebf1df4 100644
--- a/tests/libbcc/test_reduce_general_metadata.ll
+++ b/tests/libbcc/test_reduce_general_metadata.ll
@@ -3,34 +3,34 @@
; RUN: llvm-rs-as %s -o %t
; RUN: bcinfo %t | FileCheck %s
-; CHECK: exportReduceNewCount: 8
-; CHECK: exportReduceNewList[0]: addint - 0x00000001 - 1 - 4
+; CHECK: exportReduceCount: 8
+; CHECK: exportReduceList[0]: addint - 0x00000001 - 1 - 4
; CHECK: accumulator(aiAccum)
-; CHECK: exportReduceNewList[1]: mpyint - 0x00000001 - 1 - 4
+; CHECK: exportReduceList[1]: mpyint - 0x00000001 - 1 - 4
; CHECK: initializer(mpyInit)
; CHECK: accumulator(mpyAccum)
-; CHECK: exportReduceNewList[2]: dp - 0x00000001 - 2 - 4
+; CHECK: exportReduceList[2]: dp - 0x00000001 - 2 - 4
; CHECK: accumulator(dpAccum)
; CHECK: combiner(dpSum)
-; CHECK: exportReduceNewList[3]: findMinAndMax - 0x00000009 - 1 - 16
+; CHECK: exportReduceList[3]: findMinAndMax - 0x00000009 - 1 - 16
; CHECK: initializer(fMMInit)
; CHECK: accumulator(fMMAccumulator)
; CHECK: combiner(fMMCombiner)
; CHECK: outconverter(fMMOutConverter)
-; CHECK: exportReduceNewList[4]: fz - 0x00000009 - 1 - 4
+; CHECK: exportReduceList[4]: fz - 0x00000009 - 1 - 4
; CHECK: initializer(fzInit)
; CHECK: accumulator(fzAccum)
; CHECK: combiner(fzCombine)
; CHECK: halter(fzFound)
-; CHECK: exportReduceNewList[5]: fz2 - 0x00000019 - 1 - 8
+; CHECK: exportReduceList[5]: fz2 - 0x00000019 - 1 - 8
; CHECK: initializer(fz2Init)
; CHECK: accumulator(fz2Accum)
; CHECK: combiner(fz2Combine)
; CHECK: halter(fz2Found)
-; CHECK: exportReduceNewList[6]: histogram - 0x00000001 - 1 - 1024
+; CHECK: exportReduceList[6]: histogram - 0x00000001 - 1 - 1024
; CHECK: accumulator(hsgAccum)
; CHECK: combiner(hsgCombine)
-; CHECK: exportReduceNewList[7]: mode - 0x00000001 - 1 - 1024
+; CHECK: exportReduceList[7]: mode - 0x00000001 - 1 - 1024
; CHECK: accumulator(hsgAccum)
; CHECK: combiner(hsgCombine)
; CHECK: outconverter(modeOutConvert)