Make startCompute and CPU Fallback return by value

Additionally, this CL includes the following changes: * rename startCompute to compute * rename startComputeOnCpuFallback to computeOnCpuFallback * make initializeOutputShapes return by-value * rename initializeOutputShapes to getInitialOutputShapes * make asyncStartComputePartitioned accept plan by-reference Bug: 122316159 Test: mma Test: atest NeuralNetworksTest_static Test: atest CtsNNAPITestCases Change-Id: Ib1a95b75819a5f7a67d0462389cb5fc155660d92 Merged-In: Ib1a95b75819a5f7a67d0462389cb5fc155660d92 (cherry picked from commit 1e2ea5586f00a0dde80e752d0c7cd6cd55b30e1c)
author: Michael Butler <butlermichael@google.com> 2019-08-19 12:05:45 -0700
committer: Michael Butler <butlermichael@google.com> 2019-10-28 16:50:58 -0700
commit: 9204430a2515c2e0cc35b32ea93bab580af95614 (patch)
tree: 5c7383d946d0a831dd24e9a87c8ec38733a3ddc1
parent: e5f8fe9800c2185b7340f3afe833baff0eba197a (diff)
download: ml-9204430a2515c2e0cc35b32ea93bab580af95614.tar.gz
3 files changed, 89 insertions, 137 deletions
diff --git a/nn/runtime/BurstBuilder.h b/nn/runtime/BurstBuilder.h
index bfb9a9a38..6a3ba783e 100644
--- a/nn/runtime/BurstBuilder.h
+++ b/nn/runtime/BurstBuilder.h
@@ -31,10 +31,10 @@ class CompilationBuilder;
  * TODO: Could we "hide" the per-step burst controller instance inside
  * StepExecutor? Today it's exposed as a "sibling" to StepExecutor:
  * ExecutionPlan::next both generates a StepExecutor instance and finds a
- * pointer to a burst controller; and StepExecutor::startCompute is passed a
- * pointer to a burst controller. Instead, could ExecutionPlan::next stash the
- * burst controller in the StepExecutor, so that it doesn't have to be passed
- * to any of the StepExecutor methods?
+ * pointer to a burst controller; and StepExecutor::compute is passed a pointer
+ * to a burst controller. Instead, could ExecutionPlan::next stash the burst
+ * controller in the StepExecutor, so that it doesn't have to be passed to any
+ * of the StepExecutor methods?
  */
 
 class BurstBuilder {
diff --git a/nn/runtime/ExecutionBuilder.cpp b/nn/runtime/ExecutionBuilder.cpp
index 97e847b24..9e73118e9 100644
--- a/nn/runtime/ExecutionBuilder.cpp
+++ b/nn/runtime/ExecutionBuilder.cpp
@@ -303,12 +303,9 @@ int ExecutionBuilder::getOutputOperandRank(uint32_t index, uint32_t* rank) {
 //       For Q this is irrelevant: We only support timing in conjunction
 //         with an explicit device list; and we do not support CPU fallback
 //         with an explicit device list.  See CompilationBuilder::mExplicitDeviceList.
-static int cpuFallbackFull(ExecutionBuilder* executionBuilder,
-                           sp<ExecutionCallback>* fallbackCallback) {
+static std::tuple<int, std::vector<OutputShape>, Timing> cpuFallbackFull(
+        ExecutionBuilder* executionBuilder) {
     CHECK(executionBuilder != nullptr);
-    CHECK(fallbackCallback != nullptr);
-    *fallbackCallback = nullptr;
-
     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "cpuFallbackFull");
     VLOG(EXECUTION) << "cpuFallbackFull";
 
@@ -318,58 +315,45 @@ static int cpuFallbackFull(ExecutionBuilder* executionBuilder,
     executor.mapInputsAndOutputsTrivially();
 
     // Attempt fallback execution.
-    NN_RETURN_IF_ERROR(executor.startComputeOnCpuFallback(fallbackCallback));
-    CHECK(*fallbackCallback != nullptr);
-    (*fallbackCallback)->wait();
-    return ANEURALNETWORKS_NO_ERROR;
+    return executor.computeOnCpuFallback();
 }
 
 // Attempt synchronous execution on CPU.
-// fallbackExecutor is non-null i.f.f. ANEURALNETWORKS_NO_ERROR is returned.
-// fallbackCallback is non-null i.f.f. ANEURALNETWORKS_NO_ERROR is returned.
 // TODO: How should we handle timing in this case?
 //       For Q this is irrelevant: We only support timing in conjunction
 //         with an explicit device list; and we do not support CPU fallback
 //         with an explicit device list.  See CompilationBuilder::mExplicitDeviceList.
-static int cpuFallbackPartial(const ExecutionPlan* plan,
-                              std::shared_ptr<ExecutionPlan::Controller> controller,
-                              std::shared_ptr<StepExecutor>* fallbackExecutor,
-                              sp<ExecutionCallback>* fallbackCallback) {
-    CHECK(plan != nullptr);
-    CHECK(fallbackExecutor != nullptr);
-    *fallbackExecutor = nullptr;
-    CHECK(fallbackCallback != nullptr);
-    *fallbackCallback = nullptr;
-
+static std::tuple<int, std::vector<OutputShape>, Timing, std::shared_ptr<StepExecutor>>
+cpuFallbackPartial(const ExecutionPlan& plan,
+                   std::shared_ptr<ExecutionPlan::Controller> controller) {
     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "cpuFallbackPartial");
     VLOG(EXECUTION) << "cpuFallbackPartial";
 
     // Get fallback executor.
     std::shared_ptr<StepExecutor> executor;
-    NN_RETURN_IF_ERROR(plan->fallback(controller, &executor));
+    int n1 = plan.fallback(controller, &executor);
+    if (n1 != ANEURALNETWORKS_NO_ERROR) {
+        return {n1, {}, kNoTiming, nullptr};
+    }
     CHECK(executor != nullptr);
 
     // Attempt fallback execution.
-    NN_RETURN_IF_ERROR(executor->startComputeOnCpuFallback(fallbackCallback));
-    CHECK(*fallbackCallback != nullptr);
-    (*fallbackCallback)->wait();
-    *fallbackExecutor = executor;
-    return ANEURALNETWORKS_NO_ERROR;
+    auto [n2, outputShapes, timing] = executor->computeOnCpuFallback();
+    return {n2, std::move(outputShapes), timing, executor};
 }
 
 static void asyncStartComputePartitioned(ExecutionBuilder* executionBuilder,
-                                         const ExecutionPlan* plan,
+                                         const ExecutionPlan& plan,
                                          std::shared_ptr<ExecutionPlan::Controller> controller,
                                          bool allowFallback,
                                          const sp<ExecutionCallback>& executionCallback) {
     CHECK(executionBuilder != nullptr);
-    CHECK(plan != nullptr);
     VLOG(EXECUTION) << "ExecutionBuilder::compute (from plan, iteratively)";
-    std::vector<OutputShape> outputShapes;
+
+    std::vector<OutputShape> outputShapes = executionBuilder->getInitialOutputShapes();
     Timing timing = kNoTiming;
     // Disallow fallback when the ExecutionPlan is simple on CPU.
-    allowFallback &= !plan->isSimpleCpu();
-    executionBuilder->initializeOutputShapes(&outputShapes);
+    allowFallback &= !plan.isSimpleCpu();
 
     while (true) {
         VLOG(EXECUTION) << "looking for next StepExecutor";
@@ -377,7 +361,7 @@ static void asyncStartComputePartitioned(ExecutionBuilder* executionBuilder,
         // Get the current step of the execution.
         std::shared_ptr<StepExecutor> executor;
         std::shared_ptr<ExecutionBurstController> burstController;
-        int n = plan->next(controller, &executor, &burstController);
+        int n = plan.next(controller, &executor, &burstController);
         if (n != ANEURALNETWORKS_NO_ERROR) {
             if (allowFallback) break;
             executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
@@ -393,99 +377,75 @@ static void asyncStartComputePartitioned(ExecutionBuilder* executionBuilder,
         const bool executorIsCpu = executor->isCpu();
 
         // Attempt to execute a single step of the execution.
-        sp<ExecutionCallback> stepCallback;
-        n = executor->startCompute(&stepCallback, burstController);
+        auto [stepN, stepOutputShapes, stepTiming] = executor->compute(burstController);
 
-        // Immediately end execution if there was an error and fallback is not
-        // allowed.
-        if (n != ANEURALNETWORKS_NO_ERROR && !allowFallback) {
-            executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
-            return;
+        // Update global outputs.
+        if (!executor->updateOutputShapes(stepOutputShapes, &outputShapes)) {
+            stepN = ANEURALNETWORKS_OP_FAILED;
         }
 
-        // If execution successfully launched, process the execution.
-        if (n == ANEURALNETWORKS_NO_ERROR) {
-            stepCallback->wait();
-            ErrorStatus status = stepCallback->getStatus();
-            const auto& stepOutputShapes = stepCallback->getOutputShapes();
-
-            // Update global outputs.
-            if (!executor->updateOutputShapes(stepOutputShapes, &outputShapes)) {
-                status = ErrorStatus::GENERAL_FAILURE;
-            }
-
-            // If execution was successful, continue to next step.
-            if (status == ErrorStatus::NONE) {
-                // We only support collection of timing information in the case of a
-                // single step, so it's safe to just keep track of the last step's
-                // timing information.
-                timing = stepCallback->getTiming();
-                continue;
-            }
-
-            // OUTPUT_INSUFFICIENT_SIZE is not recoverable, so end execution.
-            if (status == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
-                executionCallback->notify(status, outputShapes, kNoTiming);
-                return;
-            }
+        // If execution was successful, continue to next step.
+        if (stepN == ANEURALNETWORKS_NO_ERROR) {
+            // We only support collection of timing information in the case of a
+            // single step, so it's safe to just keep track of the last step's
+            // timing information.
+            timing = stepTiming;
+            continue;
+        }
 
-            // If fallback is not allowed and there was an error, end execution.
-            if (!allowFallback) {
-                executionCallback->notify(status, {}, kNoTiming);
-                return;
-            }
+        // OUTPUT_INSUFFICIENT_SIZE is not recoverable, so end execution.
+        if (stepN == ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE) {
+            const ErrorStatus stepStatus = convertResultCodeToErrorStatus(stepN);
+            executionCallback->notify(stepStatus, outputShapes, kNoTiming);
+            return;
+        }
 
-            // Propagate error to fallback path.
-            n = convertErrorStatusToResultCode(status);
+        // If fallback is not allowed and there was an error, end execution.
+        if (!allowFallback) {
+            const ErrorStatus stepStatus = convertResultCodeToErrorStatus(stepN);
+            executionCallback->notify(stepStatus, {}, kNoTiming);
+            return;
         }
 
         // If CPU execution was already attempted, either:
         // (1) perform a full fallback if the plan is not simple, or
         // (2) return from the function with an error
         if (executorIsCpu) {
-            if (!plan->isSimple()) break;
-            executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
+            if (!plan.isSimple()) break;
+            executionCallback->notify(convertResultCodeToErrorStatus(stepN), {}, kNoTiming);
             return;
         }
 
         // If the code reaches this point, attempt a partial fallback to CPU.
         CHECK(allowFallback);
-        std::shared_ptr<StepExecutor> fallbackExecutor;
-        sp<ExecutionCallback> fallbackCallback;
-        n = cpuFallbackPartial(plan, controller, &fallbackExecutor, &fallbackCallback);
-
-        // Immediately fallback to full CPU execution if there was an error with
-        // the partial CPU fallback.
-        if (n != ANEURALNETWORKS_NO_ERROR) {
-            break;
-        }
-
-        // Get fallback execution results.
-        ErrorStatus fallbackStatus = fallbackCallback->getStatus();
-        const auto& fallbackOutputShapes = fallbackCallback->getOutputShapes();
+        auto [fallbackN, fallbackOutputShapes, fallbackTiming, fallbackExecutor] =
+                cpuFallbackPartial(plan, controller);
 
         // Update global outputs.
-        if (!fallbackExecutor->updateOutputShapes(fallbackOutputShapes, &outputShapes)) {
-            fallbackStatus = ErrorStatus::GENERAL_FAILURE;
+        if (fallbackExecutor != nullptr &&
+            !fallbackExecutor->updateOutputShapes(fallbackOutputShapes, &outputShapes)) {
+            fallbackN = ANEURALNETWORKS_OP_FAILED;
         }
 
         // If execution was successful, continue to next step.
-        if (fallbackStatus == ErrorStatus::NONE) {
+        if (fallbackN == ANEURALNETWORKS_NO_ERROR) {
             // We only support collection of timing information in the case of a
             // single step, so it's safe to just keep track of the last step's
             // timing information.
-            timing = fallbackCallback->getTiming();
+            timing = fallbackTiming;
             continue;
         }
 
         // OUTPUT_INSUFFICIENT_SIZE is not recoverable, so end execution.
-        if (fallbackStatus == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
+        if (fallbackN == ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE) {
+            const ErrorStatus fallbackStatus = convertResultCodeToErrorStatus(fallbackN);
             executionCallback->notify(fallbackStatus, outputShapes, kNoTiming);
             return;
         }
 
         // Do not fallback twice if the ExecutionPlan is simple.
-        if (plan->isSimple()) {
+        if (plan.isSimple()) {
+            const ErrorStatus fallbackStatus = convertResultCodeToErrorStatus(fallbackN);
             executionCallback->notify(fallbackStatus, {}, kNoTiming);
             return;
         }
@@ -498,14 +458,9 @@ static void asyncStartComputePartitioned(ExecutionBuilder* executionBuilder,
     // If the code has reached this point, a potentially recoverable error
     // occurred during the step executions. Instead, do a full execution
     // fallback on the CPU.
-    sp<ExecutionCallback> fallbackCallback;
-    int n = cpuFallbackFull(executionBuilder, &fallbackCallback);
-    if (n != ANEURALNETWORKS_NO_ERROR) {
-        executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
-        return;
-    }
-    executionCallback->notify(fallbackCallback->getStatus(), fallbackCallback->getOutputShapes(),
-                              fallbackCallback->getTiming());
+    auto [fullN, fullOutputShapes, fullTiming] = cpuFallbackFull(executionBuilder);
+    const ErrorStatus fullStatus = convertResultCodeToErrorStatus(fullN);
+    executionCallback->notify(fullStatus, fullOutputShapes, fullTiming);
 }
 
 int ExecutionBuilder::compute(sp<ExecutionCallback>* synchronizationCallback,
@@ -558,7 +513,7 @@ int ExecutionBuilder::compute(sp<ExecutionCallback>* synchronizationCallback,
         VLOG(EXECUTION) << "ExecutionBuilder::compute (synchronous API)";
         sp<ExecutionCallback> localSynchronizationCallback = new ExecutionCallback();
         localSynchronizationCallback->setOnFinish(wrappedFinish);
-        asyncStartComputePartitioned(this, mPlan, controller, allowFallback,
+        asyncStartComputePartitioned(this, *mPlan, controller, allowFallback,
                                      localSynchronizationCallback);
         localSynchronizationCallback->wait();
         if (mMeasureTiming) {
@@ -579,24 +534,28 @@ int ExecutionBuilder::compute(sp<ExecutionCallback>* synchronizationCallback,
         executionCallback->setOnFinish(wrappedFinish);
         if (DeviceManager::get()->syncExecRuntime()) {
             VLOG(EXECUTION) << "ExecutionBuilder::compute (asynchronous API, non-threaded)";
-            asyncStartComputePartitioned(this, mPlan, controller, allowFallback, executionCallback);
+            asyncStartComputePartitioned(this, *mPlan, controller, allowFallback,
+                                         executionCallback);
         } else {
             VLOG(EXECUTION) << "ExecutionBuilder::compute (asynchronous API)";
-            std::thread thread(asyncStartComputePartitioned, this, mPlan, controller, allowFallback,
-                               executionCallback);
-            executionCallback->bindThread(std::move(thread));
+            std::thread asyncExecution([this, controller, allowFallback, executionCallback] {
+                asyncStartComputePartitioned(this, *mPlan, controller, allowFallback,
+                                             executionCallback);
+            });
+            executionCallback->bindThread(std::move(asyncExecution));
         }
         *synchronizationCallback = executionCallback;
         return ANEURALNETWORKS_NO_ERROR;
     }
 }
 
-void ExecutionBuilder::initializeOutputShapes(std::vector<OutputShape>* outputShapes) const {
-    outputShapes->resize(mOutputs.size());
-    for (uint32_t i = 0; i < mOutputs.size(); i++) {
-        (*outputShapes)[i].dimensions = mOutputs[i].dimensions;
-        (*outputShapes)[i].isSufficient = true;
-    }
+std::vector<OutputShape> ExecutionBuilder::getInitialOutputShapes() const {
+    std::vector<OutputShape> outputShapes(mOutputs.size());
+    std::transform(mOutputs.begin(), mOutputs.end(), outputShapes.begin(),
+                   [](const auto& x) -> OutputShape {
+                       return {.dimensions = x.dimensions, .isSufficient = true};
+                   });
+    return outputShapes;
 }
 
 // Check if the dimensions "to" is updatable by dimensions "from", where "from" must
@@ -741,11 +700,9 @@ bool StepExecutor::isCpu() const {
     return mDevice == DeviceManager::getCpuDevice();
 }
 
-int StepExecutor::startCompute(sp<ExecutionCallback>* synchronizationCallback,
-                               const std::shared_ptr<ExecutionBurstController>& burstController) {
+std::tuple<int, std::vector<OutputShape>, Timing> StepExecutor::compute(
+        const std::shared_ptr<ExecutionBurstController>& burstController) {
     CHECK(mPreparedModel != nullptr);
-    CHECK(synchronizationCallback != nullptr);
-    *synchronizationCallback = nullptr;
 
     if (VLOG_IS_ON(EXECUTION)) {
         logArguments("input", mInputs);
@@ -757,19 +714,12 @@ int StepExecutor::startCompute(sp<ExecutionCallback>* synchronizationCallback,
             mPreparedModel->execute(mInputs, mOutputs, mMemories, burstController, measure);
     mExecutionBuilder->reportTiming(timing);
 
-    if (n != ANEURALNETWORKS_NO_ERROR && n != ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE) {
-        return n;
-    }
-
-    const ErrorStatus status = convertResultCodeToErrorStatus(n);
-    *synchronizationCallback = new ExecutionCallback();
-    (*synchronizationCallback)->notify_1_2(status, outputShapes, timing);
-    return ANEURALNETWORKS_NO_ERROR;
+    return {n, std::move(outputShapes), timing};
 }
 
 // For cpuFallback{Partial,Full}, recompile the model on CPU and then start compute.
-int StepExecutor::startComputeOnCpuFallback(sp<ExecutionCallback>* synchronizationCallback) {
-    NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "StepExecutor::startComputeOnCpuFallback");
+std::tuple<int, std::vector<OutputShape>, Timing> StepExecutor::computeOnCpuFallback() {
+    NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "StepExecutor::computeOnCpuFallback");
     VLOG(EXECUTION) << "Re-compile the model on CPU";
     mDevice = DeviceManager::getCpuDevice();
     mPreparedModel = nullptr;
@@ -780,8 +730,10 @@ int StepExecutor::startComputeOnCpuFallback(sp<ExecutionCallback>* synchronizati
             static_cast<ExecutionPreference>(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER);
     const auto [n, preparedModel] = mDevice->prepareModel(makeModel, preference, {}, {});
     mPreparedModel = preparedModel;
-    NN_RETURN_IF_ERROR(n);
-    return startCompute(synchronizationCallback, /*burstController=*/nullptr);
+    if (n != ANEURALNETWORKS_NO_ERROR) {
+        return {n, {}, kNoTiming};
+    }
+    return compute(/*burstController=*/nullptr);
 }
 
 }  // namespace nn
diff --git a/nn/runtime/ExecutionBuilder.h b/nn/runtime/ExecutionBuilder.h
index f07335a01..3d8ab3e6c 100644
--- a/nn/runtime/ExecutionBuilder.h
+++ b/nn/runtime/ExecutionBuilder.h
@@ -69,7 +69,7 @@ class ExecutionBuilder {
     int burstCompute(BurstBuilder* burst) { return compute(nullptr, burst); }
 
     // Initialize output dimensional information from ModelArgumentInfo.
-    void initializeOutputShapes(std::vector<hal::OutputShape>* outputShapes) const;
+    std::vector<hal::OutputShape> getInitialOutputShapes() const;
 
     int getOutputOperandDimensions(uint32_t index, uint32_t* dimensions);
     int getOutputOperandRank(uint32_t index, uint32_t* rank);
@@ -160,7 +160,7 @@ class StepExecutor {
     // is executing the entire model from the ExecutionBuilder).
     void mapInputsAndOutputsTrivially();
 
-    // Update output shapes returned from ExecutionCallback to ExecutionBuilder.
+    // Update output shapes with shapes returned from execution.
     bool updateOutputShapes(const std::vector<hal::OutputShape>& from,
                             std::vector<hal::OutputShape>* to);
 
@@ -189,12 +189,12 @@ class StepExecutor {
     }
 
     // Executes using the (driver, preparedModel) specified at construction time.
-    int startCompute(sp<ExecutionCallback>* synchronizationCallback,
-                     const std::shared_ptr<ExecutionBurstController>& burstController = nullptr);
+    std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> compute(
+            const std::shared_ptr<ExecutionBurstController>& burstController = nullptr);
 
     // Re-compiles and executes using the CPU, regardless of the (driver,
     // preparedModel) specified at construction time.
-    int startComputeOnCpuFallback(sp<ExecutionCallback>* synchronizationCallback);
+    std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> computeOnCpuFallback();
 
     bool isCpu() const;
author	Michael Butler <butlermichael@google.com>	2019-08-19 12:05:45 -0700
committer	Michael Butler <butlermichael@google.com>	2019-10-28 16:50:58 -0700
commit	9204430a2515c2e0cc35b32ea93bab580af95614 (patch)
tree	5c7383d946d0a831dd24e9a87c8ec38733a3ddc1
parent	e5f8fe9800c2185b7340f3afe833baff0eba197a (diff)
download	ml-9204430a2515c2e0cc35b32ea93bab580af95614.tar.gz