From 11761e37a864761382303a75ab29bb0fc0f716c3 Mon Sep 17 00:00:00 2001
From: Michael Butler <butlermichael@google.com>
Date: Tue, 15 Dec 2020 15:20:26 -0800
Subject: [PATCH] Remove 'blocking' param from NN ResilientPreparedModel and
 *Buffer

This change removes the 'blocking' parameter for the
ResilientPreparedModel::Factory and ResilientBuffer::Factory. The
'blocking' parameter is only useful for ResilientDevice::Factory, which
behind the scenes chooses between the HIDL calls IDevice::getService and
IDevice::tryGetService. The equivalent calls for IPreparedModel and
IBuffer are not used, as both are created from the IDevice object.

This change also modifies the ResilientDevice's device recovery
behavior. Prior to this change, ResilientDevice's recovery mechanism had
the following behavior:
* attempt to call a function
* if the function did not return a DEAD_OBJECT error, return
* if the function returned a DEAD_OBJECT error, attempt to recover the
  device
* whether or not the recovery succeeded, call the function again

This CL changes the behavior so that if device recovery fails,
ResilientDevice will not call the function the second time.

Bug: N/A
Test: mma
Change-Id: Icf37d05c884c740178324fcd046ea56914ef7d44
---
 .../include/nnapi/hal/ResilientBuffer.h       |  2 +-
 .../include/nnapi/hal/ResilientDevice.h       | 15 ++---
 .../nnapi/hal/ResilientPreparedModel.h        |  2 +-
 .../utils/common/src/ResilientBuffer.cpp      |  2 +-
 .../utils/common/src/ResilientDevice.cpp      | 65 +++++++++----------
 .../common/src/ResilientPreparedModel.cpp     |  2 +-
 6 files changed, 42 insertions(+), 46 deletions(-)
diff --git a/neuralnetworks/utils/common/include/nnapi/hal/ResilientBuffer.h b/neuralnetworks/utils/common/include/nnapi/hal/ResilientBuffer.h
index 996ec1ee81..9d5e3e6a05 100644
--- a/neuralnetworks/utils/common/include/nnapi/hal/ResilientBuffer.h
+++ b/neuralnetworks/utils/common/include/nnapi/hal/ResilientBuffer.h
@@ -34,7 +34,7 @@ class ResilientBuffer final : public nn::IBuffer {
     struct PrivateConstructorTag {};
 
   public:
-    using Factory = std::function<nn::GeneralResult<nn::SharedBuffer>(bool blocking)>;
+    using Factory = std::function<nn::GeneralResult<nn::SharedBuffer>()>;
 
     static nn::GeneralResult<std::shared_ptr<const ResilientBuffer>> create(Factory makeBuffer);
 
diff --git a/neuralnetworks/utils/common/include/nnapi/hal/ResilientDevice.h b/neuralnetworks/utils/common/include/nnapi/hal/ResilientDevice.h
index 4bfed6cd51..84ae799aad 100644
--- a/neuralnetworks/utils/common/include/nnapi/hal/ResilientDevice.h
+++ b/neuralnetworks/utils/common/include/nnapi/hal/ResilientDevice.h
@@ -46,8 +46,8 @@ class ResilientDevice final : public nn::IDevice,
                              nn::Capabilities capabilities, nn::SharedDevice device);
 
     nn::SharedDevice getDevice() const EXCLUDES(mMutex);
-    nn::SharedDevice recover(const nn::IDevice* failingDevice, bool blocking) const
-            EXCLUDES(mMutex);
+    nn::GeneralResult<nn::SharedDevice> recover(const nn::IDevice* failingDevice,
+                                                bool blocking) const EXCLUDES(mMutex);
 
     const std::string& getName() const override;
     const std::string& getVersionString() const override;
@@ -81,17 +81,14 @@ class ResilientDevice final : public nn::IDevice,
   private:
     bool isValidInternal() const EXCLUDES(mMutex);
     nn::GeneralResult<nn::SharedPreparedModel> prepareModelInternal(
-            bool blocking, const nn::Model& model, nn::ExecutionPreference preference,
-            nn::Priority priority, nn::OptionalTimePoint deadline,
-            const std::vector<nn::SharedHandle>& modelCache,
+            const nn::Model& model, nn::ExecutionPreference preference, nn::Priority priority,
+            nn::OptionalTimePoint deadline, const std::vector<nn::SharedHandle>& modelCache,
             const std::vector<nn::SharedHandle>& dataCache, const nn::CacheToken& token) const;
     nn::GeneralResult<nn::SharedPreparedModel> prepareModelFromCacheInternal(
-            bool blocking, nn::OptionalTimePoint deadline,
-            const std::vector<nn::SharedHandle>& modelCache,
+            nn::OptionalTimePoint deadline, const std::vector<nn::SharedHandle>& modelCache,
             const std::vector<nn::SharedHandle>& dataCache, const nn::CacheToken& token) const;
     nn::GeneralResult<nn::SharedBuffer> allocateInternal(
-            bool blocking, const nn::BufferDesc& desc,
-            const std::vector<nn::SharedPreparedModel>& preparedModels,
+            const nn::BufferDesc& desc, const std::vector<nn::SharedPreparedModel>& preparedModels,
             const std::vector<nn::BufferRole>& inputRoles,
             const std::vector<nn::BufferRole>& outputRoles) const;
 
diff --git a/neuralnetworks/utils/common/include/nnapi/hal/ResilientPreparedModel.h b/neuralnetworks/utils/common/include/nnapi/hal/ResilientPreparedModel.h
index d86c88be32..faae673ba7 100644
--- a/neuralnetworks/utils/common/include/nnapi/hal/ResilientPreparedModel.h
+++ b/neuralnetworks/utils/common/include/nnapi/hal/ResilientPreparedModel.h
@@ -34,7 +34,7 @@ class ResilientPreparedModel final : public nn::IPreparedModel {
     struct PrivateConstructorTag {};
 
   public:
-    using Factory = std::function<nn::GeneralResult<nn::SharedPreparedModel>(bool blocking)>;
+    using Factory = std::function<nn::GeneralResult<nn::SharedPreparedModel>()>;
 
     static nn::GeneralResult<std::shared_ptr<const ResilientPreparedModel>> create(
             Factory makePreparedModel);
diff --git a/neuralnetworks/utils/common/src/ResilientBuffer.cpp b/neuralnetworks/utils/common/src/ResilientBuffer.cpp
index 984295b729..cf5496ac39 100644
--- a/neuralnetworks/utils/common/src/ResilientBuffer.cpp
+++ b/neuralnetworks/utils/common/src/ResilientBuffer.cpp
@@ -36,7 +36,7 @@ nn::GeneralResult<std::shared_ptr<const ResilientBuffer>> ResilientBuffer::creat
         return NN_ERROR(nn::ErrorStatus::INVALID_ARGUMENT)
                << "utils::ResilientBuffer::create must have non-empty makeBuffer";
     }
-    auto buffer = NN_TRY(makeBuffer(/*blocking=*/true));
+    auto buffer = NN_TRY(makeBuffer());
     CHECK(buffer != nullptr);
     return std::make_shared<const ResilientBuffer>(PrivateConstructorTag{}, std::move(makeBuffer),
                                                    std::move(buffer));
diff --git a/neuralnetworks/utils/common/src/ResilientDevice.cpp b/neuralnetworks/utils/common/src/ResilientDevice.cpp
index 2f83c5c5bd..6ad3fadee6 100644
--- a/neuralnetworks/utils/common/src/ResilientDevice.cpp
+++ b/neuralnetworks/utils/common/src/ResilientDevice.cpp
@@ -49,7 +49,17 @@ auto protect(const ResilientDevice& resilientDevice, const FnType& fn, bool bloc
         return result;
     }
 
-    device = resilientDevice.recover(device.get(), blocking);
+    // Attempt recovery and return if it fails.
+    auto maybeDevice = resilientDevice.recover(device.get(), blocking);
+    if (!maybeDevice.has_value()) {
+        const auto& [resultErrorMessage, resultErrorCode] = result.error();
+        const auto& [recoveryErrorMessage, recoveryErrorCode] = maybeDevice.error();
+        return nn::error(resultErrorCode)
+               << resultErrorMessage << ", and failed to recover dead device with error "
+               << recoveryErrorCode << ": " << recoveryErrorMessage;
+    }
+    device = std::move(maybeDevice).value();
+
     return fn(*device);
 }
 
@@ -94,7 +104,8 @@ nn::SharedDevice ResilientDevice::getDevice() const {
     return mDevice;
 }
 
-nn::SharedDevice ResilientDevice::recover(const nn::IDevice* failingDevice, bool blocking) const {
+nn::GeneralResult<nn::SharedDevice> ResilientDevice::recover(const nn::IDevice* failingDevice,
+                                                             bool blocking) const {
     std::lock_guard guard(mMutex);
 
     // Another caller updated the failing device.
@@ -102,13 +113,7 @@ nn::SharedDevice ResilientDevice::recover(const nn::IDevice* failingDevice, bool
         return mDevice;
     }
 
-    auto maybeDevice = kMakeDevice(blocking);
-    if (!maybeDevice.has_value()) {
-        const auto& [message, code] = maybeDevice.error();
-        LOG(ERROR) << "Failed to recover dead device with error " << code << ": " << message;
-        return mDevice;
-    }
-    auto device = std::move(maybeDevice).value();
+    auto device = NN_TRY(kMakeDevice(blocking));
 
     // If recovered device has different metadata than what is cached (i.e., because it was
     // updated), mark the device as invalid and preserve the cached data.
@@ -176,11 +181,11 @@ nn::GeneralResult<nn::SharedPreparedModel> ResilientDevice::prepareModel(
         nn::OptionalTimePoint deadline, const std::vector<nn::SharedHandle>& modelCache,
         const std::vector<nn::SharedHandle>& dataCache, const nn::CacheToken& token) const {
     auto self = shared_from_this();
-    ResilientPreparedModel::Factory makePreparedModel =
-            [device = std::move(self), model, preference, priority, deadline, modelCache, dataCache,
-             token](bool blocking) -> nn::GeneralResult<nn::SharedPreparedModel> {
-        return device->prepareModelInternal(blocking, model, preference, priority, deadline,
-                                            modelCache, dataCache, token);
+    ResilientPreparedModel::Factory makePreparedModel = [device = std::move(self), model,
+                                                         preference, priority, deadline, modelCache,
+                                                         dataCache, token] {
+        return device->prepareModelInternal(model, preference, priority, deadline, modelCache,
+                                            dataCache, token);
     };
     return ResilientPreparedModel::create(std::move(makePreparedModel));
 }
@@ -189,11 +194,9 @@ nn::GeneralResult<nn::SharedPreparedModel> ResilientDevice::prepareModelFromCach
         nn::OptionalTimePoint deadline, const std::vector<nn::SharedHandle>& modelCache,
         const std::vector<nn::SharedHandle>& dataCache, const nn::CacheToken& token) const {
     auto self = shared_from_this();
-    ResilientPreparedModel::Factory makePreparedModel =
-            [device = std::move(self), deadline, modelCache, dataCache,
-             token](bool blocking) -> nn::GeneralResult<nn::SharedPreparedModel> {
-        return device->prepareModelFromCacheInternal(blocking, deadline, modelCache, dataCache,
-                                                     token);
+    ResilientPreparedModel::Factory makePreparedModel = [device = std::move(self), deadline,
+                                                         modelCache, dataCache, token] {
+        return device->prepareModelFromCacheInternal(deadline, modelCache, dataCache, token);
     };
     return ResilientPreparedModel::create(std::move(makePreparedModel));
 }
@@ -203,10 +206,9 @@ nn::GeneralResult<nn::SharedBuffer> ResilientDevice::allocate(
         const std::vector<nn::BufferRole>& inputRoles,
         const std::vector<nn::BufferRole>& outputRoles) const {
     auto self = shared_from_this();
-    ResilientBuffer::Factory makeBuffer =
-            [device = std::move(self), desc, preparedModels, inputRoles,
-             outputRoles](bool blocking) -> nn::GeneralResult<nn::SharedBuffer> {
-        return device->allocateInternal(blocking, desc, preparedModels, inputRoles, outputRoles);
+    ResilientBuffer::Factory makeBuffer = [device = std::move(self), desc, preparedModels,
+                                           inputRoles, outputRoles] {
+        return device->allocateInternal(desc, preparedModels, inputRoles, outputRoles);
     };
     return ResilientBuffer::create(std::move(makeBuffer));
 }
@@ -217,9 +219,8 @@ bool ResilientDevice::isValidInternal() const {
 }
 
 nn::GeneralResult<nn::SharedPreparedModel> ResilientDevice::prepareModelInternal(
-        bool blocking, const nn::Model& model, nn::ExecutionPreference preference,
-        nn::Priority priority, nn::OptionalTimePoint deadline,
-        const std::vector<nn::SharedHandle>& modelCache,
+        const nn::Model& model, nn::ExecutionPreference preference, nn::Priority priority,
+        nn::OptionalTimePoint deadline, const std::vector<nn::SharedHandle>& modelCache,
         const std::vector<nn::SharedHandle>& dataCache, const nn::CacheToken& token) const {
     if (!isValidInternal()) {
         return std::make_shared<const InvalidPreparedModel>();
@@ -229,12 +230,11 @@ nn::GeneralResult<nn::SharedPreparedModel> ResilientDevice::prepareModelInternal
         return device.prepareModel(model, preference, priority, deadline, modelCache, dataCache,
                                    token);
     };
-    return protect(*this, fn, blocking);
+    return protect(*this, fn, /*blocking=*/false);
 }
 
 nn::GeneralResult<nn::SharedPreparedModel> ResilientDevice::prepareModelFromCacheInternal(
-        bool blocking, nn::OptionalTimePoint deadline,
-        const std::vector<nn::SharedHandle>& modelCache,
+        nn::OptionalTimePoint deadline, const std::vector<nn::SharedHandle>& modelCache,
         const std::vector<nn::SharedHandle>& dataCache, const nn::CacheToken& token) const {
     if (!isValidInternal()) {
         return std::make_shared<const InvalidPreparedModel>();
@@ -242,12 +242,11 @@ nn::GeneralResult<nn::SharedPreparedModel> ResilientDevice::prepareModelFromCach
     const auto fn = [deadline, &modelCache, &dataCache, token](const nn::IDevice& device) {
         return device.prepareModelFromCache(deadline, modelCache, dataCache, token);
     };
-    return protect(*this, fn, blocking);
+    return protect(*this, fn, /*blocking=*/false);
 }
 
 nn::GeneralResult<nn::SharedBuffer> ResilientDevice::allocateInternal(
-        bool blocking, const nn::BufferDesc& desc,
-        const std::vector<nn::SharedPreparedModel>& preparedModels,
+        const nn::BufferDesc& desc, const std::vector<nn::SharedPreparedModel>& preparedModels,
         const std::vector<nn::BufferRole>& inputRoles,
         const std::vector<nn::BufferRole>& outputRoles) const {
     if (!isValidInternal()) {
@@ -256,7 +255,7 @@ nn::GeneralResult<nn::SharedBuffer> ResilientDevice::allocateInternal(
     const auto fn = [&desc, &preparedModels, &inputRoles, &outputRoles](const nn::IDevice& device) {
         return device.allocate(desc, preparedModels, inputRoles, outputRoles);
     };
-    return protect(*this, fn, blocking);
+    return protect(*this, fn, /*blocking=*/false);
 }
 
 }  // namespace android::hardware::neuralnetworks::utils
diff --git a/neuralnetworks/utils/common/src/ResilientPreparedModel.cpp b/neuralnetworks/utils/common/src/ResilientPreparedModel.cpp
index 012a1dedc3..b8acee16c9 100644
--- a/neuralnetworks/utils/common/src/ResilientPreparedModel.cpp
+++ b/neuralnetworks/utils/common/src/ResilientPreparedModel.cpp
@@ -36,7 +36,7 @@ nn::GeneralResult<std::shared_ptr<const ResilientPreparedModel>> ResilientPrepar
         return NN_ERROR(nn::ErrorStatus::INVALID_ARGUMENT)
                << "utils::ResilientPreparedModel::create must have non-empty makePreparedModel";
     }
-    auto preparedModel = NN_TRY(makePreparedModel(/*blocking=*/true));
+    auto preparedModel = NN_TRY(makePreparedModel());
     CHECK(preparedModel != nullptr);
     return std::make_shared<ResilientPreparedModel>(
             PrivateConstructorTag{}, std::move(makePreparedModel), std::move(preparedModel));