diff options
author | David Gross <dgross@google.com> | 2016-06-13 18:06:26 +0000 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2016-06-13 18:06:26 +0000 |
commit | d2e35f95915d480fe203d6614df8b0f5d14de0e5 (patch) | |
tree | 821153077b140abfea3b6d53d9c5436595ffca81 | |
parent | c6c9c1f04b480a395daa1bdd5d634060e505bd80 (diff) | |
parent | a48ea364652efcf947dd33c8a6ba893e9c00dd6a (diff) | |
download | libbcc-nougat-mr1-flounder-release.tar.gz |
Delete simple reduction implementation.android-cts_7.1_r1android-cts-7.1_r9android-cts-7.1_r8android-cts-7.1_r7android-cts-7.1_r6android-cts-7.1_r5android-cts-7.1_r4android-cts-7.1_r3android-cts-7.1_r29android-cts-7.1_r28android-cts-7.1_r27android-cts-7.1_r26android-cts-7.1_r25android-cts-7.1_r24android-cts-7.1_r23android-cts-7.1_r22android-cts-7.1_r21android-cts-7.1_r20android-cts-7.1_r2android-cts-7.1_r19android-cts-7.1_r18android-cts-7.1_r17android-cts-7.1_r16android-cts-7.1_r15android-cts-7.1_r14android-cts-7.1_r13android-cts-7.1_r12android-cts-7.1_r11android-cts-7.1_r10android-cts-7.1_r1android-7.1.2_r9android-7.1.2_r8android-7.1.2_r6android-7.1.2_r5android-7.1.2_r4android-7.1.2_r39android-7.1.2_r38android-7.1.2_r37android-7.1.2_r36android-7.1.2_r33android-7.1.2_r32android-7.1.2_r30android-7.1.2_r3android-7.1.2_r29android-7.1.2_r28android-7.1.2_r27android-7.1.2_r25android-7.1.2_r24android-7.1.2_r23android-7.1.2_r2android-7.1.2_r19android-7.1.2_r18android-7.1.2_r17android-7.1.2_r16android-7.1.2_r15android-7.1.2_r14android-7.1.2_r13android-7.1.2_r12android-7.1.2_r11android-7.1.2_r10android-7.1.2_r1android-7.1.1_r9android-7.1.1_r8android-7.1.1_r7android-7.1.1_r61android-7.1.1_r60android-7.1.1_r6android-7.1.1_r59android-7.1.1_r58android-7.1.1_r57android-7.1.1_r56android-7.1.1_r55android-7.1.1_r54android-7.1.1_r53android-7.1.1_r52android-7.1.1_r51android-7.1.1_r50android-7.1.1_r49android-7.1.1_r48android-7.1.1_r47android-7.1.1_r46android-7.1.1_r45android-7.1.1_r44android-7.1.1_r43android-7.1.1_r42android-7.1.1_r41android-7.1.1_r40android-7.1.1_r4android-7.1.1_r39android-7.1.1_r38android-7.1.1_r35android-7.1.1_r33android-7.1.1_r32android-7.1.1_r31android-7.1.1_r3android-7.1.1_r28android-7.1.1_r27android-7.1.1_r26android-7.1.1_r25android-7.1.1_r24android-7.1.1_r23android-7.1.1_r22android-7.1.1_r21android-7.1.1_r20android-7.1.1_r2android-7.1.1_r17android-7.1.1_r16android-7.1.1_r15android-7.1.1_r14android-7.1.1_r13android-7.1.1_r12android-7.1.1_r11android-7.1.1_r10android-7.1.1_r1android-7.1.0_r7android-7.1.0_r6android-7.1.0_r5android-7.1.0_r4android-7.1.0_r3android-7.1.0_r2android-7.1.0_r1nougat-mr2.3-releasenougat-mr2.2-releasenougat-mr2.1-releasenougat-mr2-security-releasenougat-mr2-releasenougat-mr2-pixel-releasenougat-mr2-devnougat-mr1.8-releasenougat-mr1.7-releasenougat-mr1.6-releasenougat-mr1.5-releasenougat-mr1.4-releasenougat-mr1.3-releasenougat-mr1.2-releasenougat-mr1.1-releasenougat-mr1-volantis-releasenougat-mr1-security-releasenougat-mr1-releasenougat-mr1-flounder-releasenougat-mr1-devnougat-mr1-cts-releasenougat-dr1-release
am: a48ea36465
Change-Id: Ie88be61a5b49bf4c0147db7d05c5cf00bb028ff3
-rw-r--r-- | bcinfo/MetadataExtractor.cpp | 69 | ||||
-rw-r--r-- | bcinfo/tools/main.cpp | 53 | ||||
-rw-r--r-- | include/bcc/Renderscript/RSUtils.h | 2 | ||||
-rw-r--r-- | include/bcinfo/MetadataExtractor.h | 38 | ||||
-rw-r--r-- | lib/Core/Compiler.cpp | 33 | ||||
-rw-r--r-- | lib/Renderscript/RSEmbedInfo.cpp | 58 | ||||
-rw-r--r-- | lib/Renderscript/RSKernelExpand.cpp | 373 | ||||
-rw-r--r-- | tests/libbcc/test_reduce_general_metadata.ll | 18 |
8 files changed, 137 insertions, 507 deletions
diff --git a/bcinfo/MetadataExtractor.cpp b/bcinfo/MetadataExtractor.cpp index ba0548f..48a2ecb 100644 --- a/bcinfo/MetadataExtractor.cpp +++ b/bcinfo/MetadataExtractor.cpp @@ -155,7 +155,7 @@ static const llvm::StringRef ExportForEachMetadataName = "#rs_export_foreach"; // Name of metadata node where exported general reduce information resides // (should be synced with slang_rs_metadata.h) -static const llvm::StringRef ExportReduceNewMetadataName = "#rs_export_reduce"; +static const llvm::StringRef ExportReduceMetadataName = "#rs_export_reduce"; // Name of metadata node where RS object slot info resides (should be // synced with slang_rs_metadata.h) @@ -175,11 +175,11 @@ static const llvm::StringRef DebugInfoMetadataName = "llvm.dbg.cu"; MetadataExtractor::MetadataExtractor(const char *bitcode, size_t bitcodeSize) : mModule(nullptr), mBitcode(bitcode), mBitcodeSize(bitcodeSize), mExportVarCount(0), mExportFuncCount(0), mExportForEachSignatureCount(0), - mExportReduceCount(0), mExportReduceNewCount(0), mExportVarNameList(nullptr), + mExportReduceCount(0), mExportVarNameList(nullptr), mExportFuncNameList(nullptr), mExportForEachNameList(nullptr), mExportForEachSignatureList(nullptr), - mExportForEachInputCountList(nullptr), mExportReduceNameList(nullptr), - mExportReduceNewList(nullptr), + mExportForEachInputCountList(nullptr), + mExportReduceList(nullptr), mPragmaCount(0), mPragmaKeyList(nullptr), mPragmaValueList(nullptr), mObjectSlotCount(0), mObjectSlotList(nullptr), mRSFloatPrecision(RS_FP_Full), mIsThreadable(true), @@ -193,11 +193,11 @@ MetadataExtractor::MetadataExtractor(const char *bitcode, size_t bitcodeSize) MetadataExtractor::MetadataExtractor(const llvm::Module *module) : mModule(module), mBitcode(nullptr), mBitcodeSize(0), mExportVarCount(0), mExportFuncCount(0), mExportForEachSignatureCount(0), - mExportReduceCount(0), mExportReduceNewCount(0), mExportVarNameList(nullptr), + mExportReduceCount(0), mExportVarNameList(nullptr), mExportFuncNameList(nullptr), mExportForEachNameList(nullptr), mExportForEachSignatureList(nullptr), - mExportForEachInputCountList(nullptr), mExportReduceNameList(nullptr), - mExportReduceNewList(nullptr), + mExportForEachInputCountList(nullptr), + mExportReduceList(nullptr), mPragmaCount(0), mPragmaKeyList(nullptr), mPragmaValueList(nullptr), mObjectSlotCount(0), mObjectSlotList(nullptr), mRSFloatPrecision(RS_FP_Full), mIsThreadable(true), @@ -241,17 +241,8 @@ MetadataExtractor::~MetadataExtractor() { delete [] mExportForEachInputCountList; mExportForEachInputCountList = nullptr; - if (mExportReduceNameList) { - for (size_t i = 0; i < mExportReduceCount; i++) { - delete [] mExportReduceNameList[i]; - mExportReduceNameList[i] = nullptr; - } - } - delete [] mExportReduceNameList; - mExportReduceNameList = nullptr; - - delete [] mExportReduceNewList; - mExportReduceNewList = nullptr; + delete [] mExportReduceList; + mExportReduceList = nullptr; for (size_t i = 0; i < mPragmaCount; i++) { if (mPragmaKeyList) { @@ -496,25 +487,25 @@ bool MetadataExtractor::populateForEachMetadata( } -bool MetadataExtractor::populateReduceNewMetadata(const llvm::NamedMDNode *ReduceNewMetadata) { - mExportReduceNewCount = 0; - mExportReduceNewList = nullptr; +bool MetadataExtractor::populateReduceMetadata(const llvm::NamedMDNode *ReduceMetadata) { + mExportReduceCount = 0; + mExportReduceList = nullptr; - if (!ReduceNewMetadata || !(mExportReduceNewCount = ReduceNewMetadata->getNumOperands())) + if (!ReduceMetadata || !(mExportReduceCount = ReduceMetadata->getNumOperands())) return true; - ReduceNew *TmpReduceNewList = new ReduceNew[mExportReduceNewCount]; + Reduce *TmpReduceList = new Reduce[mExportReduceCount]; - for (size_t i = 0; i < mExportReduceNewCount; i++) { - llvm::MDNode *Node = ReduceNewMetadata->getOperand(i); + for (size_t i = 0; i < mExportReduceCount; i++) { + llvm::MDNode *Node = ReduceMetadata->getOperand(i); if (!Node || Node->getNumOperands() < 3) { ALOGE("Missing reduce metadata"); return false; } - TmpReduceNewList[i].mReduceName = createStringFromValue(Node->getOperand(0)); + TmpReduceList[i].mReduceName = createStringFromValue(Node->getOperand(0)); - if (!extractUIntFromMetadataString(&TmpReduceNewList[i].mAccumulatorDataSize, + if (!extractUIntFromMetadataString(&TmpReduceList[i].mAccumulatorDataSize, Node->getOperand(1))) { ALOGE("Non-integer accumulator data size value in reduce metadata"); return false; @@ -525,8 +516,8 @@ bool MetadataExtractor::populateReduceNewMetadata(const llvm::NamedMDNode *Reduc ALOGE("Malformed accumulator node in reduce metadata"); return false; } - TmpReduceNewList[i].mAccumulatorName = createStringFromValue(AccumulatorNode->getOperand(0)); - if (!extractUIntFromMetadataString(&TmpReduceNewList[i].mSignature, + TmpReduceList[i].mAccumulatorName = createStringFromValue(AccumulatorNode->getOperand(0)); + if (!extractUIntFromMetadataString(&TmpReduceList[i].mSignature, AccumulatorNode->getOperand(1))) { ALOGE("Non-integer signature value in reduce metadata"); return false; @@ -540,18 +531,18 @@ bool MetadataExtractor::populateReduceNewMetadata(const llvm::NamedMDNode *Reduc // into the expanded accumulator function and is otherwise // unreferenced). llvm::Function *Func = - mModule->getFunction(llvm::StringRef(TmpReduceNewList[i].mAccumulatorName)); + mModule->getFunction(llvm::StringRef(TmpReduceList[i].mAccumulatorName)); // Why calculateNumInputs() - 1? The "-1" is because we don't // want to treat the accumulator argument as an input. - TmpReduceNewList[i].mInputCount = (Func ? calculateNumInputs(Func, TmpReduceNewList[i].mSignature) - 1 : 0); + TmpReduceList[i].mInputCount = (Func ? calculateNumInputs(Func, TmpReduceList[i].mSignature) - 1 : 0); - TmpReduceNewList[i].mInitializerName = createStringFromOptionalValue(Node, 3); - TmpReduceNewList[i].mCombinerName = createStringFromOptionalValue(Node, 4); - TmpReduceNewList[i].mOutConverterName = createStringFromOptionalValue(Node, 5); - TmpReduceNewList[i].mHalterName = createStringFromOptionalValue(Node, 6); + TmpReduceList[i].mInitializerName = createStringFromOptionalValue(Node, 3); + TmpReduceList[i].mCombinerName = createStringFromOptionalValue(Node, 4); + TmpReduceList[i].mOutConverterName = createStringFromOptionalValue(Node, 5); + TmpReduceList[i].mHalterName = createStringFromOptionalValue(Node, 6); } - mExportReduceNewList = TmpReduceNewList; + mExportReduceList = TmpReduceList; return true; } @@ -631,8 +622,8 @@ bool MetadataExtractor::extract() { mModule->getNamedMetadata(ExportForEachNameMetadataName); const llvm::NamedMDNode *ExportForEachMetadata = mModule->getNamedMetadata(ExportForEachMetadataName); - const llvm::NamedMDNode *ExportReduceNewMetadata = - mModule->getNamedMetadata(ExportReduceNewMetadataName); + const llvm::NamedMDNode *ExportReduceMetadata = + mModule->getNamedMetadata(ExportReduceMetadataName); const llvm::NamedMDNode *PragmaMetadata = mModule->getNamedMetadata(PragmaMetadataName); const llvm::NamedMDNode *ObjectSlotMetadata = @@ -662,7 +653,7 @@ bool MetadataExtractor::extract() { goto err; } - if (!populateReduceNewMetadata(ExportReduceNewMetadata)) { + if (!populateReduceMetadata(ExportReduceMetadata)) { ALOGE("Could not populate export general reduction metadata"); goto err; } diff --git a/bcinfo/tools/main.cpp b/bcinfo/tools/main.cpp index b823bf9..c85fdc4 100644 --- a/bcinfo/tools/main.cpp +++ b/bcinfo/tools/main.cpp @@ -110,7 +110,7 @@ static int parseOption(int argc, char** argv) { } -static void dumpReduceNewInfo(FILE *info, const char *Kind, const char *Name) { +static void dumpReduceInfo(FILE *info, const char *Kind, const char *Name) { if (Name) fprintf(info, " %s(%s)\n", Kind, Name); } @@ -149,23 +149,17 @@ static int dumpInfo(bcinfo::MetadataExtractor *ME) { } fprintf(info, "exportReduceCount: %zu\n", ME->getExportReduceCount()); - const char **reduceNameList = ME->getExportReduceNameList(); + const bcinfo::MetadataExtractor::Reduce *reduceList = + ME->getExportReduceList(); for (size_t i = 0; i < ME->getExportReduceCount(); i++) { - fprintf(info, "%s\n", reduceNameList[i]); - } - - fprintf(info, "exportReduceNewCount: %zu\n", ME->getExportReduceNewCount()); - const bcinfo::MetadataExtractor::ReduceNew *reduceNewList = - ME->getExportReduceNewList(); - for (size_t i = 0; i < ME->getExportReduceNewCount(); i++) { - const bcinfo::MetadataExtractor::ReduceNew &reduceNew = reduceNewList[i]; - fprintf(info, "%u - %s - %u - %u\n", reduceNew.mSignature, reduceNew.mReduceName, - reduceNew.mInputCount, reduceNew.mAccumulatorDataSize); - dumpReduceNewInfo(info, "initializer", reduceNew.mInitializerName); - dumpReduceNewInfo(info, "accumulator", reduceNew.mAccumulatorName); - dumpReduceNewInfo(info, "combiner", reduceNew.mCombinerName); - dumpReduceNewInfo(info, "outconverter", reduceNew.mOutConverterName); - dumpReduceNewInfo(info, "halter", reduceNew.mHalterName); + const bcinfo::MetadataExtractor::Reduce &reduce = reduceList[i]; + fprintf(info, "%u - %s - %u - %u\n", reduce.mSignature, reduce.mReduceName, + reduce.mInputCount, reduce.mAccumulatorDataSize); + dumpReduceInfo(info, "initializer", reduce.mInitializerName); + dumpReduceInfo(info, "accumulator", reduce.mAccumulatorName); + dumpReduceInfo(info, "combiner", reduce.mCombinerName); + dumpReduceInfo(info, "outconverter", reduce.mOutConverterName); + dumpReduceInfo(info, "halter", reduce.mHalterName); } fprintf(info, "objectSlotCount: %zu\n", ME->getObjectSlotCount()); @@ -223,23 +217,16 @@ static void dumpMetadata(bcinfo::MetadataExtractor *ME) { printf("\n"); printf("exportReduceCount: %zu\n", ME->getExportReduceCount()); - const char **reduceNameList = ME->getExportReduceNameList(); + const bcinfo::MetadataExtractor::Reduce *reduceList = ME->getExportReduceList(); for (size_t i = 0; i < ME->getExportReduceCount(); i++) { - printf("func[%zu]: %s\n", i, reduceNameList[i]); - } - printf("\n"); - - printf("exportReduceNewCount: %zu\n", ME->getExportReduceNewCount()); - const bcinfo::MetadataExtractor::ReduceNew *reduceNewList = ME->getExportReduceNewList(); - for (size_t i = 0; i < ME->getExportReduceNewCount(); i++) { - const bcinfo::MetadataExtractor::ReduceNew &reduceNew = reduceNewList[i]; - printf("exportReduceNewList[%zu]: %s - 0x%08x - %u - %u\n", i, reduceNew.mReduceName, - reduceNew.mSignature, reduceNew.mInputCount, reduceNew.mAccumulatorDataSize); - dumpReduceNewInfo(stdout, "initializer", reduceNew.mInitializerName); - dumpReduceNewInfo(stdout, "accumulator", reduceNew.mAccumulatorName); - dumpReduceNewInfo(stdout, "combiner", reduceNew.mCombinerName); - dumpReduceNewInfo(stdout, "outconverter", reduceNew.mOutConverterName); - dumpReduceNewInfo(stdout, "halter", reduceNew.mHalterName); + const bcinfo::MetadataExtractor::Reduce &reduce = reduceList[i]; + printf("exportReduceList[%zu]: %s - 0x%08x - %u - %u\n", i, reduce.mReduceName, + reduce.mSignature, reduce.mInputCount, reduce.mAccumulatorDataSize); + dumpReduceInfo(stdout, "initializer", reduce.mInitializerName); + dumpReduceInfo(stdout, "accumulator", reduce.mAccumulatorName); + dumpReduceInfo(stdout, "combiner", reduce.mCombinerName); + dumpReduceInfo(stdout, "outconverter", reduce.mOutConverterName); + dumpReduceInfo(stdout, "halter", reduce.mHalterName); } printf("\n"); diff --git a/include/bcc/Renderscript/RSUtils.h b/include/bcc/Renderscript/RSUtils.h index fbd5ed0..4e80c4e 100644 --- a/include/bcc/Renderscript/RSUtils.h +++ b/include/bcc/Renderscript/RSUtils.h @@ -109,7 +109,7 @@ static inline bool isRsObjectType(const llvm::Type *T) { // we will synthesize a combiner function from the accumulator // function. Given the accumulator function name, what should be the // name of the combiner function? -static inline std::string nameReduceNewCombinerFromAccumulator(llvm::StringRef accumName) { +static inline std::string nameReduceCombinerFromAccumulator(llvm::StringRef accumName) { return std::string(accumName) + ".combiner"; } diff --git a/include/bcinfo/MetadataExtractor.h b/include/bcinfo/MetadataExtractor.h index 71a6de0..95fc27b 100644 --- a/include/bcinfo/MetadataExtractor.h +++ b/include/bcinfo/MetadataExtractor.h @@ -49,8 +49,8 @@ enum MetadataSignatureBitval { class MetadataExtractor { public: - struct ReduceNew { - // These strings are owned by the ReduceNew instance, and deleted upon its destruction. + struct Reduce { + // These strings are owned by the Reduce instance, and deleted upon its destruction. // They are assumed to have been allocated by "new []" and hence are deleted by "delete []". const char *mReduceName; const char *mInitializerName; @@ -63,13 +63,13 @@ class MetadataExtractor { uint32_t mInputCount; // of accumulator function (and of kernel itself) uint32_t mAccumulatorDataSize; // in bytes - ReduceNew() : + Reduce() : mReduceName(nullptr), mInitializerName(nullptr), mAccumulatorName(nullptr), mCombinerName(nullptr), mOutConverterName(nullptr), mHalterName(nullptr), mSignature(0), mInputCount(0), mAccumulatorDataSize(0) { } - ~ReduceNew() { + ~Reduce() { delete [] mReduceName; delete [] mInitializerName; delete [] mAccumulatorName; @@ -78,8 +78,8 @@ class MetadataExtractor { delete [] mHalterName; } - ReduceNew(const ReduceNew &) = delete; - void operator=(const ReduceNew &) = delete; + Reduce(const Reduce &) = delete; + void operator=(const Reduce &) = delete; }; private: @@ -91,14 +91,12 @@ class MetadataExtractor { size_t mExportFuncCount; size_t mExportForEachSignatureCount; size_t mExportReduceCount; - size_t mExportReduceNewCount; const char **mExportVarNameList; const char **mExportFuncNameList; const char **mExportForEachNameList; const uint32_t *mExportForEachSignatureList; const uint32_t *mExportForEachInputCountList; - const char **mExportReduceNameList; - const ReduceNew *mExportReduceNewList; + const Reduce *mExportReduceList; size_t mPragmaCount; const char **mPragmaKeyList; @@ -123,7 +121,7 @@ class MetadataExtractor { // Helper functions for extraction bool populateForEachMetadata(const llvm::NamedMDNode *Names, const llvm::NamedMDNode *Signatures); - bool populateReduceNewMetadata(const llvm::NamedMDNode *ReduceNewMetadata); + bool populateReduceMetadata(const llvm::NamedMDNode *ReduceMetadata); bool populateObjectSlotMetadata(const llvm::NamedMDNode *ObjectSlotMetadata); void populatePragmaMetadata(const llvm::NamedMDNode *PragmaMetadata); void readThreadableFlag(const llvm::NamedMDNode *ThreadableMetadata); @@ -224,31 +222,17 @@ class MetadataExtractor { } /** - * \return number of exported simple reduce kernels (slots) in this script/module. + * \return number of exported general reduce kernels (slots) in this script/module. */ size_t getExportReduceCount() const { return mExportReduceCount; } /** - * \return array of exported simple reduce kernel names. - */ - const char **getExportReduceNameList() const { - return mExportReduceNameList; - } - - /** - * \return number of exported general reduce kernels (slots) in this script/module. - */ - size_t getExportReduceNewCount() const { - return mExportReduceNewCount; - } - - /** * \return array of exported general reduce kernel descriptions. */ - const ReduceNew *getExportReduceNewList() const { - return mExportReduceNewList; + const Reduce *getExportReduceList() const { + return mExportReduceList; } /** diff --git a/lib/Core/Compiler.cpp b/lib/Core/Compiler.cpp index 1988da3..759844f 100644 --- a/lib/Core/Compiler.cpp +++ b/lib/Core/Compiler.cpp @@ -354,12 +354,10 @@ bool Compiler::addInternalizeSymbolsPass(Script &pScript, llvm::legacy::PassMana size_t exportFuncCount = me.getExportFuncCount(); size_t exportForEachCount = me.getExportForEachSignatureCount(); size_t exportReduceCount = me.getExportReduceCount(); - size_t exportReduceNewCount = me.getExportReduceNewCount(); const char **exportVarNameList = me.getExportVarNameList(); const char **exportFuncNameList = me.getExportFuncNameList(); const char **exportForEachNameList = me.getExportForEachNameList(); - const char **exportReduceNameList = me.getExportReduceNameList(); - const bcinfo::MetadataExtractor::ReduceNew *exportReduceNewList = me.getExportReduceNewList(); + const bcinfo::MetadataExtractor::Reduce *exportReduceList = me.getExportReduceList(); size_t i; for (i = 0; i < exportVarCount; ++i) { @@ -370,32 +368,29 @@ bool Compiler::addInternalizeSymbolsPass(Script &pScript, llvm::legacy::PassMana export_symbols.push_back(exportFuncNameList[i]); } - // Expanded foreach and reduce functions should not be internalized; - // nor should general reduction initializer, combiner, and - // outconverter functions. keep_funcs keeps the names of these - // functions around until createInternalizePass() is finished making - // its own copy of the visible symbols. + // Expanded foreach functions should not be internalized; nor should + // general reduction initializer, combiner, and outconverter + // functions. keep_funcs keeps the names of these functions around + // until createInternalizePass() is finished making its own copy of + // the visible symbols. std::vector<std::string> keep_funcs; - keep_funcs.reserve(exportForEachCount + exportReduceCount + exportReduceNewCount*4); + keep_funcs.reserve(exportForEachCount + exportReduceCount*4); for (i = 0; i < exportForEachCount; ++i) { keep_funcs.push_back(std::string(exportForEachNameList[i]) + ".expand"); } - for (i = 0; i < exportReduceCount; ++i) { - keep_funcs.push_back(std::string(exportReduceNameList[i]) + ".expand"); - } auto keepFuncsPushBackIfPresent = [&keep_funcs](const char *Name) { if (Name) keep_funcs.push_back(Name); }; - for (i = 0; i < exportReduceNewCount; ++i) { - keep_funcs.push_back(std::string(exportReduceNewList[i].mAccumulatorName) + ".expand"); - keepFuncsPushBackIfPresent(exportReduceNewList[i].mInitializerName); - if (exportReduceNewList[i].mCombinerName != nullptr) { - keep_funcs.push_back(exportReduceNewList[i].mCombinerName); + for (i = 0; i < exportReduceCount; ++i) { + keep_funcs.push_back(std::string(exportReduceList[i].mAccumulatorName) + ".expand"); + keepFuncsPushBackIfPresent(exportReduceList[i].mInitializerName); + if (exportReduceList[i].mCombinerName != nullptr) { + keep_funcs.push_back(exportReduceList[i].mCombinerName); } else { - keep_funcs.push_back(nameReduceNewCombinerFromAccumulator(exportReduceNewList[i].mAccumulatorName)); + keep_funcs.push_back(nameReduceCombinerFromAccumulator(exportReduceList[i].mAccumulatorName)); } - keepFuncsPushBackIfPresent(exportReduceNewList[i].mOutConverterName); + keepFuncsPushBackIfPresent(exportReduceList[i].mOutConverterName); } for (auto &symbol_name : keep_funcs) { diff --git a/lib/Renderscript/RSEmbedInfo.cpp b/lib/Renderscript/RSEmbedInfo.cpp index 2d2e69f..081bed6 100644 --- a/lib/Renderscript/RSEmbedInfo.cpp +++ b/lib/Renderscript/RSEmbedInfo.cpp @@ -75,17 +75,15 @@ public: size_t exportFuncCount = me.getExportFuncCount(); size_t exportForEachCount = me.getExportForEachSignatureCount(); size_t exportReduceCount = me.getExportReduceCount(); - size_t exportReduceNewCount = me.getExportReduceNewCount(); size_t objectSlotCount = me.getObjectSlotCount(); size_t pragmaCount = me.getPragmaCount(); const char **exportVarNameList = me.getExportVarNameList(); const char **exportFuncNameList = me.getExportFuncNameList(); const char **exportForEachNameList = me.getExportForEachNameList(); - const char **exportReduceNameList = me.getExportReduceNameList(); const uint32_t *exportForEachSignatureList = me.getExportForEachSignatureList(); - const bcinfo::MetadataExtractor::ReduceNew *exportReduceNewList = - me.getExportReduceNewList(); + const bcinfo::MetadataExtractor::Reduce *exportReduceList = + me.getExportReduceList(); const uint32_t *objectSlotList = me.getObjectSlotList(); const char **pragmaKeyList = me.getPragmaKeyList(); const char **pragmaValueList = me.getPragmaValueList(); @@ -97,18 +95,17 @@ public: // We use a simple text format here that the compatibility library // can easily parse. Each section starts out with its name // followed by a count. The count denotes the number of lines to - // parse for that particular category. Variables and Functions and - // simple reduce kernels merely put the appropriate identifier on - // the line. ForEach kernels have the encoded int signature, - // followed by a hyphen followed by the identifier (function to - // look up). General reduce kernels have the encoded int - // signature, followed by a hyphen followed by the accumulator - // data size, followed by a hyphen followed by the identifier - // (reduction name); and then for each possible constituent - // function, a hyphen followed by the identifier (function name) - // -- in the case where the function is omitted, "." is used in - // place of the identifier. Object Slots are just listed as one - // integer per line. + // parse for that particular category. Variables and Functions + // merely put the appropriate identifier on the line. ForEach + // kernels have the encoded int signature, followed by a hyphen + // followed by the identifier (function to look up). General + // reduce kernels have the encoded int signature, followed by a + // hyphen followed by the accumulator data size, followed by a + // hyphen followed by the identifier (reduction name); and then + // for each possible constituent function, a hyphen followed by + // the identifier (function name) -- in the case where the + // function is omitted, "." is used in place of the identifier. + // Object Slots are just listed as one integer per line. s << "exportVarCount: " << exportVarCount << "\n"; for (i = 0; i < exportVarCount; ++i) { @@ -127,24 +124,19 @@ public: } s << "exportReduceCount: " << exportReduceCount << "\n"; + auto reduceFnName = [](const char *Name) { return Name ? Name : "."; }; for (i = 0; i < exportReduceCount; ++i) { - s << exportReduceNameList[i] << "\n"; - } - - s << "exportReduceNewCount: " << exportReduceNewCount << "\n"; - auto reduceNewFnName = [](const char *Name) { return Name ? Name : "."; }; - for (i = 0; i < exportReduceNewCount; ++i) { - const bcinfo::MetadataExtractor::ReduceNew &reduceNew = exportReduceNewList[i]; - s << reduceNew.mSignature << " - " - << reduceNew.mAccumulatorDataSize << " - " - << reduceNew.mReduceName << " - " - << reduceNewFnName(reduceNew.mInitializerName) << " - " - << reduceNewFnName(reduceNew.mAccumulatorName) << " - " - << ((reduceNew.mCombinerName != nullptr) - ? reduceNew.mCombinerName - : nameReduceNewCombinerFromAccumulator(reduceNew.mAccumulatorName)) << " - " - << reduceNewFnName(reduceNew.mOutConverterName) << " - " - << reduceNewFnName(reduceNew.mHalterName) + const bcinfo::MetadataExtractor::Reduce &reduce = exportReduceList[i]; + s << reduce.mSignature << " - " + << reduce.mAccumulatorDataSize << " - " + << reduce.mReduceName << " - " + << reduceFnName(reduce.mInitializerName) << " - " + << reduceFnName(reduce.mAccumulatorName) << " - " + << ((reduce.mCombinerName != nullptr) + ? reduce.mCombinerName + : nameReduceCombinerFromAccumulator(reduce.mAccumulatorName)) << " - " + << reduceFnName(reduce.mOutConverterName) << " - " + << reduceFnName(reduce.mHalterName) << "\n"; } diff --git a/lib/Renderscript/RSKernelExpand.cpp b/lib/Renderscript/RSKernelExpand.cpp index d7e4996..1e27e22 100644 --- a/lib/Renderscript/RSKernelExpand.cpp +++ b/lib/Renderscript/RSKernelExpand.cpp @@ -43,8 +43,7 @@ #ifndef __DISABLE_ASSERTS // Only used in bccAssert() const int kNumExpandedForeachParams = 4; -const int kNumExpandedReduceParams = 3; -const int kNumExpandedReduceNewAccumulatorParams = 4; +const int kNumExpandedReduceAccumulatorParams = 4; #endif const char kRenderScriptTBAARootName[] = "RenderScript Distinct TBAA"; @@ -109,16 +108,13 @@ private: * for expanded functions. These must be re-calculated for each module * the pass is run on. */ - llvm::FunctionType *ExpandedForEachType, *ExpandedReduceType; + llvm::FunctionType *ExpandedForEachType; llvm::Type *RsExpandKernelDriverInfoPfxTy; uint32_t mExportForEachCount; const char **mExportForEachNameList; const uint32_t *mExportForEachSignatureList; - uint32_t mExportReduceCount; - const char **mExportReduceNameList; - // Turns on optimization of allocation stride values. bool mEnableStepOpt; @@ -310,9 +306,6 @@ private: // void (const RsExpandKernelDriverInfoPfxTy *p, uint32_t x1, uint32_t x2, uint32_t outstep) ExpandedForEachType = llvm::FunctionType::get(VoidTy, {RsExpandKernelDriverInfoPfxPtrTy, Int32Ty, Int32Ty, Int32Ty}, false); - - // void (void *inBuf, void *outBuf, uint32_t len) - ExpandedReduceType = llvm::FunctionType::get(VoidTy, {VoidPtrTy, VoidPtrTy, Int32Ty}, false); } /// @brief Create skeleton of the expanded foreach kernel. @@ -340,41 +333,6 @@ private: return ExpandedFunction; } - // Create skeleton of the expanded reduce kernel. - // - // This creates a function with the following signature: - // - // void @func.expand(i8* nocapture %inBuf, i8* nocapture %outBuf, i32 len) - // - llvm::Function *createEmptyExpandedReduceKernel(llvm::StringRef OldName) { - llvm::Function *ExpandedFunction = - llvm::Function::Create(ExpandedReduceType, - llvm::GlobalValue::ExternalLinkage, - OldName + ".expand", Module); - bccAssert(ExpandedFunction->arg_size() == kNumExpandedReduceParams); - - llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin(); - - using llvm::Attribute; - - llvm::Argument *InBuf = &(*AI++); - InBuf->setName("inBuf"); - InBuf->addAttr(llvm::AttributeSet::get(*Context, InBuf->getArgNo() + 1, llvm::makeArrayRef(Attribute::NoCapture))); - - llvm::Argument *OutBuf = &(*AI++); - OutBuf->setName("outBuf"); - OutBuf->addAttr(llvm::AttributeSet::get(*Context, OutBuf->getArgNo() + 1, llvm::makeArrayRef(Attribute::NoCapture))); - - (AI++)->setName("len"); - - llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin", - ExpandedFunction); - llvm::IRBuilder<> Builder(Begin); - Builder.CreateRetVoid(); - - return ExpandedFunction; - } - // Create skeleton of a general reduce kernel's expanded accumulator. // // This creates a function with the following signature: @@ -382,19 +340,19 @@ private: // void @func.expand(%RsExpandKernelDriverInfoPfx* nocapture %p, // i32 %x1, i32 %x2, accumType* nocapture %accum) // - llvm::Function *createEmptyExpandedReduceNewAccumulator(llvm::StringRef OldName, - llvm::Type *AccumArgTy) { + llvm::Function *createEmptyExpandedReduceAccumulator(llvm::StringRef OldName, + llvm::Type *AccumArgTy) { llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context); llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context); - llvm::FunctionType *ExpandedReduceNewAccumulatorType = + llvm::FunctionType *ExpandedReduceAccumulatorType = llvm::FunctionType::get(VoidTy, {RsExpandKernelDriverInfoPfxTy->getPointerTo(), Int32Ty, Int32Ty, AccumArgTy}, false); llvm::Function *FnExpandedAccumulator = - llvm::Function::Create(ExpandedReduceNewAccumulatorType, + llvm::Function::Create(ExpandedReduceAccumulatorType, llvm::GlobalValue::ExternalLinkage, OldName + ".expand", Module); - bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceNewAccumulatorParams); + bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams); llvm::Function::arg_iterator AI = FnExpandedAccumulator->arg_begin(); @@ -1115,272 +1073,6 @@ public: return true; } - // Expand a simple reduce-style kernel function. - // - // The input is a kernel which represents a binary operation, - // of the form - // - // define foo @func(foo %a, foo %b), - // - // (More generally, it can be of the forms - // - // define void @func(foo* %ret, foo* %a, foo* %b) - // define void @func(foo* %ret, foo1 %a, foo1 %b) - // define foo1 @func(foo2 %a, foo2 %b) - // - // as a result of argument / return value conversions. Here, "foo1" - // and "foo2" refer to possibly coerced types, and the coerced - // argument type may be different from the coerced return type. See - // "Note on coercion" below.) - // - // Note also, we do not expect to encounter any case when the - // arguments are promoted to pointers but the return value is - // unpromoted to pointer, e.g. - // - // define foo1 @func(foo* %a, foo* %b) - // - // and we will throw an assertion in this case.) - // - // The input kernel gets expanded into a kernel of the form - // - // define void @func.expand(i8* %inBuf, i8* outBuf, i32 len) - // - // which performs a serial reduction of `len` elements from `inBuf`, - // and stores the result into `outBuf`. In pseudocode, @func.expand - // does: - // - // inArr := (foo *)inBuf; - // accum := inArr[0]; - // for (i := 1; i < len; ++i) { - // accum := foo(accum, inArr[i]); - // } - // *(foo *)outBuf := accum; - // - // Note on coercion - // - // Both the return value and the argument types may undergo internal - // coercion in clang as part of call lowering. As a result, the - // return value type may differ from the argument type even if the - // types in the RenderScript signaure are the same. For instance, the - // kernel - // - // int3 add(int3 a, int3 b) { return a + b; } - // - // gets lowered by clang as - // - // define <3 x i32> @add(<4 x i32> %a.coerce, <4 x i32> %b.coerce) - // - // under AArch64. The details of this process are found in clang, - // lib/CodeGen/TargetInfo.cpp, under classifyArgumentType() and - // classifyReturnType() in ARMABIInfo, AArch64ABIInfo. If the value - // is passed by pointer, then the pointed-to type is not coerced. - // - // Since we lack the original type information, this code does loads - // and stores of allocation data by way of pointers to the coerced - // type. - bool ExpandReduce(llvm::Function *Function) { - bccAssert(Function); - - ALOGV("Expanding simple reduce kernel %s", Function->getName().str().c_str()); - - llvm::DataLayout DL(Module); - if (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING) { - DL.reset(X86_CUSTOM_DL_STRING); - } - - // TBAA Metadata - llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, *TBAAAllocation; - llvm::MDBuilder MDHelper(*Context); - - TBAARenderScriptDistinct = - MDHelper.createTBAARoot(kRenderScriptTBAARootName); - TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName, - TBAARenderScriptDistinct); - TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", - TBAARenderScript); - TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, - TBAAAllocation, 0); - - llvm::Function *ExpandedFunction = - createEmptyExpandedReduceKernel(Function->getName()); - - // Extract the expanded kernel's parameters. It is guaranteed by - // createEmptyExpandedReduceKernel that there will be 3 parameters. - auto ExpandedFunctionArgIter = ExpandedFunction->arg_begin(); - - llvm::Value *Arg_inBuf = &*(ExpandedFunctionArgIter++); - llvm::Value *Arg_outBuf = &*(ExpandedFunctionArgIter++); - llvm::Value *Arg_len = &*(ExpandedFunctionArgIter++); - - bccAssert(Function->arg_size() == 2 || Function->arg_size() == 3); - - // Check if, instead of returning a value, the original kernel has - // a pointer parameter which points to a temporary buffer into - // which the return value gets written. - const bool ReturnValuePointerStyle = (Function->arg_size() == 3); - bccAssert(Function->getReturnType()->isVoidTy() == ReturnValuePointerStyle); - - // Check if, instead of being passed by value, the inputs to the - // original kernel are passed by pointer. - auto FirstArgIter = Function->arg_begin(); - // The second argument is always an input to the original kernel. - auto SecondArgIter = std::next(FirstArgIter); - const bool InputsPointerStyle = SecondArgIter->getType()->isPointerTy(); - - // Get the output type (i.e. return type of the original kernel). - llvm::PointerType *OutPtrTy = nullptr; - llvm::Type *OutTy = nullptr; - if (ReturnValuePointerStyle) { - OutPtrTy = llvm::dyn_cast<llvm::PointerType>(FirstArgIter->getType()); - bccAssert(OutPtrTy && "Expected a pointer parameter to kernel"); - OutTy = OutPtrTy->getElementType(); - } else { - OutTy = Function->getReturnType(); - bccAssert(!OutTy->isVoidTy()); - OutPtrTy = OutTy->getPointerTo(); - } - - // Get the input type (type of the arguments to the original - // kernel). Some input types are different from the output type, - // due to explicit coercion that the compiler performs when - // lowering the parameters. See "Note on coercion" above. - llvm::PointerType *InPtrTy; - llvm::Type *InTy; - if (InputsPointerStyle) { - InPtrTy = llvm::dyn_cast<llvm::PointerType>(SecondArgIter->getType()); - bccAssert(InPtrTy && "Expected a pointer parameter to kernel"); - bccAssert(ReturnValuePointerStyle); - bccAssert(std::next(SecondArgIter)->getType() == InPtrTy && - "Input type mismatch"); - InTy = InPtrTy->getElementType(); - } else { - InTy = SecondArgIter->getType(); - InPtrTy = InTy->getPointerTo(); - if (!ReturnValuePointerStyle) { - bccAssert(InTy == FirstArgIter->getType() && "Input type mismatch"); - } else { - bccAssert(InTy == std::next(SecondArgIter)->getType() && - "Input type mismatch"); - } - } - - // The input type should take up the same amount of space in - // memory as the output type. - bccAssert(DL.getTypeAllocSize(InTy) == DL.getTypeAllocSize(OutTy)); - - // Construct the actual function body. - llvm::IRBuilder<> Builder(&*ExpandedFunction->getEntryBlock().begin()); - - // Cast input and output buffers to appropriate types. - llvm::Value *InBuf = Builder.CreatePointerCast(Arg_inBuf, InPtrTy); - llvm::Value *OutBuf = Builder.CreatePointerCast(Arg_outBuf, OutPtrTy); - - // Create a slot to pass temporary results back. This needs to be - // separate from the accumulator slot because the kernel may mark - // the return value slot as noalias. - llvm::Value *ReturnBuf = nullptr; - if (ReturnValuePointerStyle) { - ReturnBuf = Builder.CreateAlloca(OutTy, nullptr, "ret.tmp"); - } - - // Create a slot to hold the second input if the inputs are passed - // by pointer to the original kernel. We cannot directly pass a - // pointer to the input buffer, because the kernel may modify its - // inputs. - llvm::Value *SecondInputTempBuf = nullptr; - if (InputsPointerStyle) { - SecondInputTempBuf = Builder.CreateAlloca(InTy, nullptr, "in.tmp"); - } - - // Create a slot to accumulate temporary results, and fill it with - // the first value. - llvm::Value *AccumBuf = Builder.CreateAlloca(OutTy, nullptr, "accum"); - // Cast to OutPtrTy before loading, since AccumBuf has type OutPtrTy. - llvm::LoadInst *FirstElementLoad = Builder.CreateLoad( - Builder.CreatePointerCast(InBuf, OutPtrTy)); - if (gEnableRsTbaa) { - FirstElementLoad->setMetadata("tbaa", TBAAAllocation); - } - // Memory operations with AccumBuf shouldn't be marked with - // RenderScript TBAA, since this might conflict with TBAA metadata - // in the kernel function when AccumBuf is passed by pointer. - Builder.CreateStore(FirstElementLoad, AccumBuf); - - // Loop body - - // Create the loop structure. Note that the first input in the input buffer - // has already been accumulated, so that we start at index 1. - llvm::Value *IndVar; - llvm::Value *Start = llvm::ConstantInt::get(Arg_len->getType(), 1); - llvm::BasicBlock *Exit = createLoop(Builder, Start, Arg_len, &IndVar); - - llvm::Value *InputPtr = Builder.CreateInBoundsGEP(InBuf, IndVar, "next_input.gep"); - - // Set up arguments and call the original (unexpanded) kernel. - // - // The original kernel can have at most 3 arguments, which is - // achieved when the signature looks like: - // - // define void @func(foo* %ret, bar %a, bar %b) - // - // (bar can be one of foo/foo.coerce/foo*). - llvm::SmallVector<llvm::Value *, 3> KernelArgs; - - if (ReturnValuePointerStyle) { - KernelArgs.push_back(ReturnBuf); - } - - if (InputsPointerStyle) { - bccAssert(ReturnValuePointerStyle); - // Because the return buffer is copied back into the - // accumulator, it's okay if the accumulator is overwritten. - KernelArgs.push_back(AccumBuf); - - llvm::LoadInst *InputLoad = Builder.CreateLoad(InputPtr); - if (gEnableRsTbaa) { - InputLoad->setMetadata("tbaa", TBAAAllocation); - } - Builder.CreateStore(InputLoad, SecondInputTempBuf); - - KernelArgs.push_back(SecondInputTempBuf); - } else { - // InPtrTy may be different from OutPtrTy (the type of - // AccumBuf), so first cast the accumulator buffer to the - // pointer type corresponding to the input argument type. - KernelArgs.push_back( - Builder.CreateLoad(Builder.CreatePointerCast(AccumBuf, InPtrTy))); - - llvm::LoadInst *LoadedArg = Builder.CreateLoad(InputPtr); - if (gEnableRsTbaa) { - LoadedArg->setMetadata("tbaa", TBAAAllocation); - } - KernelArgs.push_back(LoadedArg); - } - - llvm::Value *RetVal = Builder.CreateCall(Function, KernelArgs); - - const uint64_t ElementSize = DL.getTypeStoreSize(OutTy); - const uint64_t ElementAlign = DL.getABITypeAlignment(OutTy); - - // Store the output in the accumulator. - if (ReturnValuePointerStyle) { - Builder.CreateMemCpy(AccumBuf, ReturnBuf, ElementSize, ElementAlign); - } else { - Builder.CreateStore(RetVal, AccumBuf); - } - - // Loop exit - Builder.SetInsertPoint(Exit, Exit->begin()); - - llvm::LoadInst *OutputLoad = Builder.CreateLoad(AccumBuf); - llvm::StoreInst *OutputStore = Builder.CreateStore(OutputLoad, OutBuf); - if (gEnableRsTbaa) { - OutputStore->setMetadata("tbaa", TBAAAllocation); - } - - return true; - } - // Certain categories of functions that make up a general // reduce-style kernel are called directly from the driver with no // expansion needed. For a function in such a category, we need to @@ -1389,7 +1081,7 @@ public: // This promotion is safe because we don't have any kind of cross // translation unit linkage model (except for linking against // RenderScript libraries), so we do not risk name clashes. - bool PromoteReduceNewFunction(const char *Name, FunctionSet &PromotedFunctions) { + bool PromoteReduceFunction(const char *Name, FunctionSet &PromotedFunctions) { if (!Name) // a presumably-optional function that is not present return false; @@ -1427,7 +1119,7 @@ public: // } // // This is very similar to foreach kernel expansion with no output. - bool ExpandReduceNewAccumulator(llvm::Function *FnAccumulator, uint32_t Signature, size_t NumInputs) { + bool ExpandReduceAccumulator(llvm::Function *FnAccumulator, uint32_t Signature, size_t NumInputs) { ALOGV("Expanding accumulator %s for general reduce kernel", FnAccumulator->getName().str().c_str()); @@ -1451,13 +1143,13 @@ public: // Create empty accumulator function. llvm::Function *FnExpandedAccumulator = - createEmptyExpandedReduceNewAccumulator(FnAccumulator->getName(), - (AccumulatorArgIter++)->getType()); + createEmptyExpandedReduceAccumulator(FnAccumulator->getName(), + (AccumulatorArgIter++)->getType()); // Extract the expanded accumulator's parameters. It is - // guaranteed by createEmptyExpandedReduceNewAccumulator that + // guaranteed by createEmptyExpandedReduceAccumulator that // there will be 4 parameters. - bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceNewAccumulatorParams); + bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams); auto ExpandedAccumulatorArgIter = FnExpandedAccumulator->arg_begin(); llvm::Value *Arg_p = &*(ExpandedAccumulatorArgIter++); llvm::Value *Arg_x1 = &*(ExpandedAccumulatorArgIter++); @@ -1507,7 +1199,7 @@ public: // %1 = load accumType, accumType* %other // call void @accumFn(accumType* %accum, accumType %1); // } - bool CreateReduceNewCombinerFromAccumulator(llvm::Function *FnAccumulator) { + bool CreateReduceCombinerFromAccumulator(llvm::Function *FnAccumulator) { ALOGV("Creating combiner from accumulator %s for general reduce kernel", FnAccumulator->getName().str().c_str()); @@ -1525,7 +1217,7 @@ public: llvm::FunctionType::get(VoidTy, { AccumulatorArgType, AccumulatorArgType }, false); llvm::Function *FnCombiner = llvm::Function::Create(CombinerType, llvm::GlobalValue::ExternalLinkage, - nameReduceNewCombinerFromAccumulator(FnAccumulator->getName()), + nameReduceCombinerFromAccumulator(FnAccumulator->getName()), Module); auto CombinerArgIter = FnCombiner->arg_begin(); @@ -1687,38 +1379,27 @@ public: } } - // Expand simple reduce_* style kernels. - mExportReduceCount = me.getExportReduceCount(); - mExportReduceNameList = me.getExportReduceNameList(); - - for (size_t i = 0; i < mExportReduceCount; ++i) { - llvm::Function *kernel = Module.getFunction(mExportReduceNameList[i]); - if (kernel) { - Changed |= ExpandReduce(kernel); - } - } - // Process general reduce_* style functions. - const size_t ExportReduceNewCount = me.getExportReduceNewCount(); - const bcinfo::MetadataExtractor::ReduceNew *ExportReduceNewList = me.getExportReduceNewList(); + const size_t ExportReduceCount = me.getExportReduceCount(); + const bcinfo::MetadataExtractor::Reduce *ExportReduceList = me.getExportReduceList(); // Note that functions can be shared between kernels FunctionSet PromotedFunctions, ExpandedAccumulators, AccumulatorsForCombiners; - for (size_t i = 0; i < ExportReduceNewCount; ++i) { - Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mInitializerName, PromotedFunctions); - Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mCombinerName, PromotedFunctions); - Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mOutConverterName, PromotedFunctions); + for (size_t i = 0; i < ExportReduceCount; ++i) { + Changed |= PromoteReduceFunction(ExportReduceList[i].mInitializerName, PromotedFunctions); + Changed |= PromoteReduceFunction(ExportReduceList[i].mCombinerName, PromotedFunctions); + Changed |= PromoteReduceFunction(ExportReduceList[i].mOutConverterName, PromotedFunctions); // Accumulator - llvm::Function *accumulator = Module.getFunction(ExportReduceNewList[i].mAccumulatorName); + llvm::Function *accumulator = Module.getFunction(ExportReduceList[i].mAccumulatorName); bccAssert(accumulator != nullptr); if (ExpandedAccumulators.insert(accumulator).second) - Changed |= ExpandReduceNewAccumulator(accumulator, - ExportReduceNewList[i].mSignature, - ExportReduceNewList[i].mInputCount); - if (!ExportReduceNewList[i].mCombinerName) { + Changed |= ExpandReduceAccumulator(accumulator, + ExportReduceList[i].mSignature, + ExportReduceList[i].mInputCount); + if (!ExportReduceList[i].mCombinerName) { if (AccumulatorsForCombiners.insert(accumulator).second) - Changed |= CreateReduceNewCombinerFromAccumulator(accumulator); + Changed |= CreateReduceCombinerFromAccumulator(accumulator); } } diff --git a/tests/libbcc/test_reduce_general_metadata.ll b/tests/libbcc/test_reduce_general_metadata.ll index b41234a..ebf1df4 100644 --- a/tests/libbcc/test_reduce_general_metadata.ll +++ b/tests/libbcc/test_reduce_general_metadata.ll @@ -3,34 +3,34 @@ ; RUN: llvm-rs-as %s -o %t ; RUN: bcinfo %t | FileCheck %s -; CHECK: exportReduceNewCount: 8 -; CHECK: exportReduceNewList[0]: addint - 0x00000001 - 1 - 4 +; CHECK: exportReduceCount: 8 +; CHECK: exportReduceList[0]: addint - 0x00000001 - 1 - 4 ; CHECK: accumulator(aiAccum) -; CHECK: exportReduceNewList[1]: mpyint - 0x00000001 - 1 - 4 +; CHECK: exportReduceList[1]: mpyint - 0x00000001 - 1 - 4 ; CHECK: initializer(mpyInit) ; CHECK: accumulator(mpyAccum) -; CHECK: exportReduceNewList[2]: dp - 0x00000001 - 2 - 4 +; CHECK: exportReduceList[2]: dp - 0x00000001 - 2 - 4 ; CHECK: accumulator(dpAccum) ; CHECK: combiner(dpSum) -; CHECK: exportReduceNewList[3]: findMinAndMax - 0x00000009 - 1 - 16 +; CHECK: exportReduceList[3]: findMinAndMax - 0x00000009 - 1 - 16 ; CHECK: initializer(fMMInit) ; CHECK: accumulator(fMMAccumulator) ; CHECK: combiner(fMMCombiner) ; CHECK: outconverter(fMMOutConverter) -; CHECK: exportReduceNewList[4]: fz - 0x00000009 - 1 - 4 +; CHECK: exportReduceList[4]: fz - 0x00000009 - 1 - 4 ; CHECK: initializer(fzInit) ; CHECK: accumulator(fzAccum) ; CHECK: combiner(fzCombine) ; CHECK: halter(fzFound) -; CHECK: exportReduceNewList[5]: fz2 - 0x00000019 - 1 - 8 +; CHECK: exportReduceList[5]: fz2 - 0x00000019 - 1 - 8 ; CHECK: initializer(fz2Init) ; CHECK: accumulator(fz2Accum) ; CHECK: combiner(fz2Combine) ; CHECK: halter(fz2Found) -; CHECK: exportReduceNewList[6]: histogram - 0x00000001 - 1 - 1024 +; CHECK: exportReduceList[6]: histogram - 0x00000001 - 1 - 1024 ; CHECK: accumulator(hsgAccum) ; CHECK: combiner(hsgCombine) -; CHECK: exportReduceNewList[7]: mode - 0x00000001 - 1 - 1024 +; CHECK: exportReduceList[7]: mode - 0x00000001 - 1 - 1024 ; CHECK: accumulator(hsgAccum) ; CHECK: combiner(hsgCombine) ; CHECK: outconverter(modeOutConvert) |