From 11761e37a864761382303a75ab29bb0fc0f716c3 Mon Sep 17 00:00:00 2001 From: Michael Butler Date: Tue, 15 Dec 2020 15:20:26 -0800 Subject: [PATCH] Remove 'blocking' param from NN ResilientPreparedModel and *Buffer This change removes the 'blocking' parameter for the ResilientPreparedModel::Factory and ResilientBuffer::Factory. The 'blocking' parameter is only useful for ResilientDevice::Factory, which behind the scenes chooses between the HIDL calls IDevice::getService and IDevice::tryGetService. The equivalent calls for IPreparedModel and IBuffer are not used, as both are created from the IDevice object. This change also modifies the ResilientDevice's device recovery behavior. Prior to this change, ResilientDevice's recovery mechanism had the following behavior: * attempt to call a function * if the function did not return a DEAD_OBJECT error, return * if the function returned a DEAD_OBJECT error, attempt to recover the device * whether or not the recovery succeeded, call the function again This CL changes the behavior so that if device recovery fails, ResilientDevice will not call the function the second time. Bug: N/A Test: mma Change-Id: Icf37d05c884c740178324fcd046ea56914ef7d44 --- .../include/nnapi/hal/ResilientBuffer.h | 2 +- .../include/nnapi/hal/ResilientDevice.h | 15 ++--- .../nnapi/hal/ResilientPreparedModel.h | 2 +- .../utils/common/src/ResilientBuffer.cpp | 2 +- .../utils/common/src/ResilientDevice.cpp | 65 +++++++++---------- .../common/src/ResilientPreparedModel.cpp | 2 +- 6 files changed, 42 insertions(+), 46 deletions(-) diff --git a/neuralnetworks/utils/common/include/nnapi/hal/ResilientBuffer.h b/neuralnetworks/utils/common/include/nnapi/hal/ResilientBuffer.h index 996ec1ee81..9d5e3e6a05 100644 --- a/neuralnetworks/utils/common/include/nnapi/hal/ResilientBuffer.h +++ b/neuralnetworks/utils/common/include/nnapi/hal/ResilientBuffer.h @@ -34,7 +34,7 @@ class ResilientBuffer final : public nn::IBuffer { struct PrivateConstructorTag {}; public: - using Factory = std::function(bool blocking)>; + using Factory = std::function()>; static nn::GeneralResult> create(Factory makeBuffer); diff --git a/neuralnetworks/utils/common/include/nnapi/hal/ResilientDevice.h b/neuralnetworks/utils/common/include/nnapi/hal/ResilientDevice.h index 4bfed6cd51..84ae799aad 100644 --- a/neuralnetworks/utils/common/include/nnapi/hal/ResilientDevice.h +++ b/neuralnetworks/utils/common/include/nnapi/hal/ResilientDevice.h @@ -46,8 +46,8 @@ class ResilientDevice final : public nn::IDevice, nn::Capabilities capabilities, nn::SharedDevice device); nn::SharedDevice getDevice() const EXCLUDES(mMutex); - nn::SharedDevice recover(const nn::IDevice* failingDevice, bool blocking) const - EXCLUDES(mMutex); + nn::GeneralResult recover(const nn::IDevice* failingDevice, + bool blocking) const EXCLUDES(mMutex); const std::string& getName() const override; const std::string& getVersionString() const override; @@ -81,17 +81,14 @@ class ResilientDevice final : public nn::IDevice, private: bool isValidInternal() const EXCLUDES(mMutex); nn::GeneralResult prepareModelInternal( - bool blocking, const nn::Model& model, nn::ExecutionPreference preference, - nn::Priority priority, nn::OptionalTimePoint deadline, - const std::vector& modelCache, + const nn::Model& model, nn::ExecutionPreference preference, nn::Priority priority, + nn::OptionalTimePoint deadline, const std::vector& modelCache, const std::vector& dataCache, const nn::CacheToken& token) const; nn::GeneralResult prepareModelFromCacheInternal( - bool blocking, nn::OptionalTimePoint deadline, - const std::vector& modelCache, + nn::OptionalTimePoint deadline, const std::vector& modelCache, const std::vector& dataCache, const nn::CacheToken& token) const; nn::GeneralResult allocateInternal( - bool blocking, const nn::BufferDesc& desc, - const std::vector& preparedModels, + const nn::BufferDesc& desc, const std::vector& preparedModels, const std::vector& inputRoles, const std::vector& outputRoles) const; diff --git a/neuralnetworks/utils/common/include/nnapi/hal/ResilientPreparedModel.h b/neuralnetworks/utils/common/include/nnapi/hal/ResilientPreparedModel.h index d86c88be32..faae673ba7 100644 --- a/neuralnetworks/utils/common/include/nnapi/hal/ResilientPreparedModel.h +++ b/neuralnetworks/utils/common/include/nnapi/hal/ResilientPreparedModel.h @@ -34,7 +34,7 @@ class ResilientPreparedModel final : public nn::IPreparedModel { struct PrivateConstructorTag {}; public: - using Factory = std::function(bool blocking)>; + using Factory = std::function()>; static nn::GeneralResult> create( Factory makePreparedModel); diff --git a/neuralnetworks/utils/common/src/ResilientBuffer.cpp b/neuralnetworks/utils/common/src/ResilientBuffer.cpp index 984295b729..cf5496ac39 100644 --- a/neuralnetworks/utils/common/src/ResilientBuffer.cpp +++ b/neuralnetworks/utils/common/src/ResilientBuffer.cpp @@ -36,7 +36,7 @@ nn::GeneralResult> ResilientBuffer::creat return NN_ERROR(nn::ErrorStatus::INVALID_ARGUMENT) << "utils::ResilientBuffer::create must have non-empty makeBuffer"; } - auto buffer = NN_TRY(makeBuffer(/*blocking=*/true)); + auto buffer = NN_TRY(makeBuffer()); CHECK(buffer != nullptr); return std::make_shared(PrivateConstructorTag{}, std::move(makeBuffer), std::move(buffer)); diff --git a/neuralnetworks/utils/common/src/ResilientDevice.cpp b/neuralnetworks/utils/common/src/ResilientDevice.cpp index 2f83c5c5bd..6ad3fadee6 100644 --- a/neuralnetworks/utils/common/src/ResilientDevice.cpp +++ b/neuralnetworks/utils/common/src/ResilientDevice.cpp @@ -49,7 +49,17 @@ auto protect(const ResilientDevice& resilientDevice, const FnType& fn, bool bloc return result; } - device = resilientDevice.recover(device.get(), blocking); + // Attempt recovery and return if it fails. + auto maybeDevice = resilientDevice.recover(device.get(), blocking); + if (!maybeDevice.has_value()) { + const auto& [resultErrorMessage, resultErrorCode] = result.error(); + const auto& [recoveryErrorMessage, recoveryErrorCode] = maybeDevice.error(); + return nn::error(resultErrorCode) + << resultErrorMessage << ", and failed to recover dead device with error " + << recoveryErrorCode << ": " << recoveryErrorMessage; + } + device = std::move(maybeDevice).value(); + return fn(*device); } @@ -94,7 +104,8 @@ nn::SharedDevice ResilientDevice::getDevice() const { return mDevice; } -nn::SharedDevice ResilientDevice::recover(const nn::IDevice* failingDevice, bool blocking) const { +nn::GeneralResult ResilientDevice::recover(const nn::IDevice* failingDevice, + bool blocking) const { std::lock_guard guard(mMutex); // Another caller updated the failing device. @@ -102,13 +113,7 @@ nn::SharedDevice ResilientDevice::recover(const nn::IDevice* failingDevice, bool return mDevice; } - auto maybeDevice = kMakeDevice(blocking); - if (!maybeDevice.has_value()) { - const auto& [message, code] = maybeDevice.error(); - LOG(ERROR) << "Failed to recover dead device with error " << code << ": " << message; - return mDevice; - } - auto device = std::move(maybeDevice).value(); + auto device = NN_TRY(kMakeDevice(blocking)); // If recovered device has different metadata than what is cached (i.e., because it was // updated), mark the device as invalid and preserve the cached data. @@ -176,11 +181,11 @@ nn::GeneralResult ResilientDevice::prepareModel( nn::OptionalTimePoint deadline, const std::vector& modelCache, const std::vector& dataCache, const nn::CacheToken& token) const { auto self = shared_from_this(); - ResilientPreparedModel::Factory makePreparedModel = - [device = std::move(self), model, preference, priority, deadline, modelCache, dataCache, - token](bool blocking) -> nn::GeneralResult { - return device->prepareModelInternal(blocking, model, preference, priority, deadline, - modelCache, dataCache, token); + ResilientPreparedModel::Factory makePreparedModel = [device = std::move(self), model, + preference, priority, deadline, modelCache, + dataCache, token] { + return device->prepareModelInternal(model, preference, priority, deadline, modelCache, + dataCache, token); }; return ResilientPreparedModel::create(std::move(makePreparedModel)); } @@ -189,11 +194,9 @@ nn::GeneralResult ResilientDevice::prepareModelFromCach nn::OptionalTimePoint deadline, const std::vector& modelCache, const std::vector& dataCache, const nn::CacheToken& token) const { auto self = shared_from_this(); - ResilientPreparedModel::Factory makePreparedModel = - [device = std::move(self), deadline, modelCache, dataCache, - token](bool blocking) -> nn::GeneralResult { - return device->prepareModelFromCacheInternal(blocking, deadline, modelCache, dataCache, - token); + ResilientPreparedModel::Factory makePreparedModel = [device = std::move(self), deadline, + modelCache, dataCache, token] { + return device->prepareModelFromCacheInternal(deadline, modelCache, dataCache, token); }; return ResilientPreparedModel::create(std::move(makePreparedModel)); } @@ -203,10 +206,9 @@ nn::GeneralResult ResilientDevice::allocate( const std::vector& inputRoles, const std::vector& outputRoles) const { auto self = shared_from_this(); - ResilientBuffer::Factory makeBuffer = - [device = std::move(self), desc, preparedModels, inputRoles, - outputRoles](bool blocking) -> nn::GeneralResult { - return device->allocateInternal(blocking, desc, preparedModels, inputRoles, outputRoles); + ResilientBuffer::Factory makeBuffer = [device = std::move(self), desc, preparedModels, + inputRoles, outputRoles] { + return device->allocateInternal(desc, preparedModels, inputRoles, outputRoles); }; return ResilientBuffer::create(std::move(makeBuffer)); } @@ -217,9 +219,8 @@ bool ResilientDevice::isValidInternal() const { } nn::GeneralResult ResilientDevice::prepareModelInternal( - bool blocking, const nn::Model& model, nn::ExecutionPreference preference, - nn::Priority priority, nn::OptionalTimePoint deadline, - const std::vector& modelCache, + const nn::Model& model, nn::ExecutionPreference preference, nn::Priority priority, + nn::OptionalTimePoint deadline, const std::vector& modelCache, const std::vector& dataCache, const nn::CacheToken& token) const { if (!isValidInternal()) { return std::make_shared(); @@ -229,12 +230,11 @@ nn::GeneralResult ResilientDevice::prepareModelInternal return device.prepareModel(model, preference, priority, deadline, modelCache, dataCache, token); }; - return protect(*this, fn, blocking); + return protect(*this, fn, /*blocking=*/false); } nn::GeneralResult ResilientDevice::prepareModelFromCacheInternal( - bool blocking, nn::OptionalTimePoint deadline, - const std::vector& modelCache, + nn::OptionalTimePoint deadline, const std::vector& modelCache, const std::vector& dataCache, const nn::CacheToken& token) const { if (!isValidInternal()) { return std::make_shared(); @@ -242,12 +242,11 @@ nn::GeneralResult ResilientDevice::prepareModelFromCach const auto fn = [deadline, &modelCache, &dataCache, token](const nn::IDevice& device) { return device.prepareModelFromCache(deadline, modelCache, dataCache, token); }; - return protect(*this, fn, blocking); + return protect(*this, fn, /*blocking=*/false); } nn::GeneralResult ResilientDevice::allocateInternal( - bool blocking, const nn::BufferDesc& desc, - const std::vector& preparedModels, + const nn::BufferDesc& desc, const std::vector& preparedModels, const std::vector& inputRoles, const std::vector& outputRoles) const { if (!isValidInternal()) { @@ -256,7 +255,7 @@ nn::GeneralResult ResilientDevice::allocateInternal( const auto fn = [&desc, &preparedModels, &inputRoles, &outputRoles](const nn::IDevice& device) { return device.allocate(desc, preparedModels, inputRoles, outputRoles); }; - return protect(*this, fn, blocking); + return protect(*this, fn, /*blocking=*/false); } } // namespace android::hardware::neuralnetworks::utils diff --git a/neuralnetworks/utils/common/src/ResilientPreparedModel.cpp b/neuralnetworks/utils/common/src/ResilientPreparedModel.cpp index 012a1dedc3..b8acee16c9 100644 --- a/neuralnetworks/utils/common/src/ResilientPreparedModel.cpp +++ b/neuralnetworks/utils/common/src/ResilientPreparedModel.cpp @@ -36,7 +36,7 @@ nn::GeneralResult> ResilientPrepar return NN_ERROR(nn::ErrorStatus::INVALID_ARGUMENT) << "utils::ResilientPreparedModel::create must have non-empty makePreparedModel"; } - auto preparedModel = NN_TRY(makePreparedModel(/*blocking=*/true)); + auto preparedModel = NN_TRY(makePreparedModel()); CHECK(preparedModel != nullptr); return std::make_shared( PrivateConstructorTag{}, std::move(makePreparedModel), std::move(preparedModel));