diff --git a/neuralnetworks/1.2/utils/Android.bp b/neuralnetworks/1.2/utils/Android.bp index 2921141484..41281ee955 100644 --- a/neuralnetworks/1.2/utils/Android.bp +++ b/neuralnetworks/1.2/utils/Android.bp @@ -27,7 +27,6 @@ cc_library_static { name: "neuralnetworks_utils_hal_1_2", defaults: ["neuralnetworks_utils_defaults"], srcs: ["src/*"], - exclude_srcs: ["src/ExecutionBurst*"], local_include_dirs: ["include/nnapi/hal/1.2/"], export_include_dirs: ["include"], cflags: ["-Wthread-safety"], @@ -41,10 +40,16 @@ cc_library_static { "android.hardware.neuralnetworks@1.0", "android.hardware.neuralnetworks@1.1", "android.hardware.neuralnetworks@1.2", + "libfmq", ], export_static_lib_headers: [ "neuralnetworks_utils_hal_common", ], + product_variables: { + debuggable: { // eng and userdebug builds + cflags: ["-DNN_DEBUGGABLE"], + }, + }, } cc_test { diff --git a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/Conversions.h b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/Conversions.h index 6fd13379ef..272cee7e88 100644 --- a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/Conversions.h +++ b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/Conversions.h @@ -52,6 +52,7 @@ GeneralResult convert(const hal::V1_2::Capabilities& capabilities) GeneralResult convert(const hal::V1_2::Model& model); GeneralResult convert(const hal::V1_2::MeasureTiming& measureTiming); GeneralResult convert(const hal::V1_2::Timing& timing); +GeneralResult convert(const hardware::hidl_memory& memory); GeneralResult> convert( const hardware::hidl_vec& extensions); diff --git a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstController.h b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstController.h index 5356a912bd..6b6fc71f65 100644 --- a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstController.h +++ b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstController.h @@ -14,23 +14,28 @@ * limitations under the License. */ -#ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H -#define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H +#ifndef ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_CONTROLLER_H +#define ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_CONTROLLER_H #include "ExecutionBurstUtils.h" -#include +#include #include -#include #include #include #include #include #include #include +#include +#include +#include +#include +#include #include #include +#include #include #include #include @@ -39,147 +44,145 @@ #include #include -namespace android::nn { +namespace android::hardware::neuralnetworks::V1_2::utils { /** - * The ExecutionBurstController class manages both the serialization and - * deserialization of data across FMQ, making it appear to the runtime as a - * regular synchronous inference. Additionally, this class manages the burst's - * memory cache. + * The ExecutionBurstController class manages both the serialization and deserialization of data + * across FMQ, making it appear to the runtime as a regular synchronous inference. Additionally, + * this class manages the burst's memory cache. */ -class ExecutionBurstController { - DISALLOW_IMPLICIT_CONSTRUCTORS(ExecutionBurstController); +class ExecutionBurstController final : public nn::IBurst { + struct PrivateConstructorTag {}; public: + using FallbackFunction = + std::function, nn::Timing>>( + const nn::Request&, nn::MeasureTiming)>; + /** - * NN runtime burst callback object and memory cache. + * NN runtime memory cache. * - * ExecutionBurstCallback associates a hidl_memory object with a slot number - * to be passed across FMQ. The ExecutionBurstServer can use this callback - * to retrieve this hidl_memory corresponding to the slot via HIDL. + * MemoryCache associates a Memory object with a slot number to be passed across FMQ. The + * ExecutionBurstServer can use this callback to retrieve a hidl_memory corresponding to the + * slot via HIDL. * - * Whenever a hidl_memory object is copied, it will duplicate the underlying - * file descriptor. Because the NN runtime currently copies the hidl_memory - * on each execution, it is difficult to associate hidl_memory objects with - * previously cached hidl_memory objects. For this reason, callers of this - * class must pair each hidl_memory object with an associated key. For - * efficiency, if two hidl_memory objects represent the same underlying - * buffer, they must use the same key. + * Whenever a hidl_memory object is copied, it will duplicate the underlying file descriptor. + * Because the NN runtime currently copies the hidl_memory on each execution, it is difficult to + * associate hidl_memory objects with previously cached hidl_memory objects. For this reason, + * callers of this class must pair each hidl_memory object with an associated key. For + * efficiency, if two hidl_memory objects represent the same underlying buffer, they must use + * the same key. + * + * This class is thread-safe. */ - class ExecutionBurstCallback : public hardware::neuralnetworks::V1_2::IBurstCallback { - DISALLOW_COPY_AND_ASSIGN(ExecutionBurstCallback); + class MemoryCache : public std::enable_shared_from_this { + struct PrivateConstructorTag {}; public: - ExecutionBurstCallback() = default; + using Task = std::function; + using Cleanup = base::ScopeGuard; + using SharedCleanup = std::shared_ptr; + using WeakCleanup = std::weak_ptr; - hardware::Return getMemories(const hardware::hidl_vec& slots, - getMemories_cb cb) override; + // Custom constructor to pre-allocate cache sizes. + MemoryCache(); /** - * This function performs one of two different actions: - * 1) If a key corresponding to a memory resource is unrecognized by the - * ExecutionBurstCallback object, the ExecutionBurstCallback object - * will allocate a slot, bind the memory to the slot, and return the - * slot identifier. - * 2) If a key corresponding to a memory resource is recognized by the - * ExecutionBurstCallback object, the ExecutionBurstCallback object - * will return the existing slot identifier. + * Add a burst context to the MemoryCache object. * - * @param memories Memory resources used in an inference. - * @param keys Unique identifiers where each element corresponds to a - * memory resource element in "memories". - * @return Unique slot identifiers where each returned slot element - * corresponds to a memory resource element in "memories". + * If this method is called, it must be called before the MemoryCache::cacheMemory or + * MemoryCache::getMemory is used. + * + * @param burstContext Burst context to be added to the MemoryCache object. */ - std::vector getSlots(const hardware::hidl_vec& memories, - const std::vector& keys); + void setBurstContext(sp burstContext); - /* - * This function performs two different actions: - * 1) Removes an entry from the cache (if present), including the local - * storage of the hidl_memory object. Note that this call does not - * free any corresponding hidl_memory object in ExecutionBurstServer, - * which is separately freed via IBurstContext::freeMemory. - * 2) Return whether a cache entry was removed and which slot was removed if - * found. If the key did not to correspond to any entry in the cache, a - * slot number of 0 is returned. The slot number and whether the entry - * existed is useful so the same slot can be freed in the - * ExecutionBurstServer's cache via IBurstContext::freeMemory. + /** + * Cache a memory object in the MemoryCache object. + * + * @param memory Memory object to be cached while the returned `SharedCleanup` is alive. + * @return A pair of (1) a unique identifier for the cache entry and (2) a ref-counted + * "hold" object which preserves the cache as long as the hold object is alive. */ - std::pair freeMemory(intptr_t key); + std::pair cacheMemory(const nn::SharedMemory& memory); + + /** + * Get the memory object corresponding to a slot identifier. + * + * @param slot Slot which identifies the memory object to retrieve. + * @return The memory object corresponding to slot, otherwise GeneralError. + */ + nn::GeneralResult getMemory(int32_t slot); private: - int32_t getSlotLocked(const hardware::hidl_memory& memory, intptr_t key); - int32_t allocateSlotLocked(); + void freeMemory(const nn::SharedMemory& memory); + int32_t allocateSlotLocked() REQUIRES(mMutex); std::mutex mMutex; - std::stack> mFreeSlots; - std::map mMemoryIdToSlot; - std::vector mMemoryCache; + std::condition_variable mCond; + sp mBurstContext GUARDED_BY(mMutex); + std::stack> mFreeSlots GUARDED_BY(mMutex); + std::map mMemoryIdToSlot GUARDED_BY(mMutex); + std::vector mMemoryCache GUARDED_BY(mMutex); + std::vector mCacheCleaner GUARDED_BY(mMutex); + }; + + /** + * HIDL Callback class to pass memory objects to the Burst server when given corresponding + * slots. + */ + class ExecutionBurstCallback : public IBurstCallback { + public: + // Precondition: memoryCache must be non-null. + explicit ExecutionBurstCallback(const std::shared_ptr& memoryCache); + + // See IBurstCallback::getMemories for information on this method. + Return getMemories(const hidl_vec& slots, getMemories_cb cb) override; + + private: + const std::weak_ptr kMemoryCache; }; /** * Creates a burst controller on a prepared model. * - * Prefer this over ExecutionBurstController's constructor. - * * @param preparedModel Model prepared for execution to execute on. - * @param pollingTimeWindow How much time (in microseconds) the - * ExecutionBurstController is allowed to poll the FMQ before waiting on - * the blocking futex. Polling may result in lower latencies at the - * potential cost of more power usage. + * @param pollingTimeWindow How much time (in microseconds) the ExecutionBurstController is + * allowed to poll the FMQ before waiting on the blocking futex. Polling may result in lower + * latencies at the potential cost of more power usage. * @return ExecutionBurstController Execution burst controller object. */ - static std::unique_ptr create( - const sp& preparedModel, + static nn::GeneralResult> create( + const sp& preparedModel, FallbackFunction fallback, std::chrono::microseconds pollingTimeWindow); - // prefer calling ExecutionBurstController::create - ExecutionBurstController(const std::shared_ptr& requestChannelSender, - const std::shared_ptr& resultChannelReceiver, - const sp& burstContext, - const sp& callback, - const sp& deathHandler = nullptr); + ExecutionBurstController(PrivateConstructorTag tag, FallbackFunction fallback, + std::unique_ptr requestChannelSender, + std::unique_ptr resultChannelReceiver, + sp callback, sp burstContext, + std::shared_ptr memoryCache, + neuralnetworks::utils::DeathHandler deathHandler); - // explicit destructor to unregister the death recipient - ~ExecutionBurstController(); + // See IBurst::cacheMemory for information on this method. + OptionalCacheHold cacheMemory(const nn::SharedMemory& memory) const override; - /** - * Execute a request on a model. - * - * @param request Arguments to be executed on a model. - * @param measure Whether to collect timing measurements, either YES or NO - * @param memoryIds Identifiers corresponding to each memory object in the - * request's pools. - * @return A tuple of: - * - result code of the execution - * - dynamic output shapes from the execution - * - any execution time measurements of the execution - * - whether or not a failed burst execution should be re-run using a - * different path (e.g., IPreparedModel::executeSynchronously) - */ - std::tuple, - hardware::neuralnetworks::V1_2::Timing, bool> - compute(const hardware::neuralnetworks::V1_0::Request& request, - hardware::neuralnetworks::V1_2::MeasureTiming measure, - const std::vector& memoryIds); - - /** - * Propagate a user's freeing of memory to the service. - * - * @param key Key corresponding to the memory object. - */ - void freeMemory(intptr_t key); + // See IBurst::execute for information on this method. + nn::ExecutionResult, nn::Timing>> execute( + const nn::Request& request, nn::MeasureTiming measure) const override; private: - std::mutex mMutex; - const std::shared_ptr mRequestChannelSender; - const std::shared_ptr mResultChannelReceiver; - const sp mBurstContext; - const sp mMemoryCache; - const sp mDeathHandler; + mutable std::atomic_flag mExecutionInFlight = ATOMIC_FLAG_INIT; + const FallbackFunction kFallback; + const std::unique_ptr mRequestChannelSender; + const std::unique_ptr mResultChannelReceiver; + const sp mBurstCallback; + const sp mBurstContext; + const std::shared_ptr mMemoryCache; + // `kDeathHandler` must come after `mRequestChannelSender` and `mResultChannelReceiver` because + // it holds references to both objects. + const neuralnetworks::utils::DeathHandler kDeathHandler; }; -} // namespace android::nn +} // namespace android::hardware::neuralnetworks::V1_2::utils -#endif // ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H +#endif // ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_CONTROLLER_H diff --git a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstServer.h b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstServer.h index 2e109b2de7..f7926f5835 100644 --- a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstServer.h +++ b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstServer.h @@ -14,19 +14,22 @@ * limitations under the License. */ -#ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_SERVER_H -#define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_SERVER_H +#ifndef ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_SERVER_H +#define ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_SERVER_H #include "ExecutionBurstUtils.h" -#include +#include #include -#include #include #include #include #include #include +#include +#include +#include +#include #include #include @@ -36,84 +39,61 @@ #include #include -namespace android::nn { +namespace android::hardware::neuralnetworks::V1_2::utils { /** - * The ExecutionBurstServer class is responsible for waiting for and - * deserializing a request object from a FMQ, performing the inference, and - * serializing the result back across another FMQ. + * The ExecutionBurstServer class is responsible for waiting for and deserializing a request object + * from a FMQ, performing the inference, and serializing the result back across another FMQ. */ -class ExecutionBurstServer : public hardware::neuralnetworks::V1_2::IBurstContext { - DISALLOW_IMPLICIT_CONSTRUCTORS(ExecutionBurstServer); +class ExecutionBurstServer : public IBurstContext { + struct PrivateConstructorTag {}; public: /** - * IBurstExecutorWithCache is a callback object passed to - * ExecutionBurstServer's factory function that is used to perform an - * execution. Because some memory resources are needed across multiple - * executions, this object also contains a local cache that can directly be - * used in the execution. + * Class to cache the memory objects for a burst object. * - * ExecutionBurstServer will never access its IBurstExecutorWithCache object - * with concurrent calls. + * This class is thread-safe. */ - class IBurstExecutorWithCache { - DISALLOW_COPY_AND_ASSIGN(IBurstExecutorWithCache); - + class MemoryCache { public: - IBurstExecutorWithCache() = default; - virtual ~IBurstExecutorWithCache() = default; + // Precondition: burstExecutor != nullptr + // Precondition: burstCallback != nullptr + MemoryCache(nn::SharedBurst burstExecutor, sp burstCallback); /** - * Checks if a cache entry specified by a slot is present in the cache. + * Get the cached memory objects corresponding to provided slot identifiers. * - * @param slot Identifier of the cache entry. - * @return 'true' if the cache entry is present in the cache, 'false' - * otherwise. + * If the slot entry is not present in the cache, this class will use IBurstCallback to + * retrieve those entries that are not present in the cache, then cache them. + * + * @param slots Identifiers of memory objects to be retrieved. + * @return A vector where each element is the memory object and a ref-counted cache "hold" + * object to preserve the cache entry of the IBurst object as long as the "hold" object + * is alive, otherwise GeneralError. Each element of the vector corresponds to the + * element of slot. */ - virtual bool isCacheEntryPresent(int32_t slot) const = 0; + nn::GeneralResult>> + getCacheEntries(const std::vector& slots); /** - * Adds an entry specified by a slot to the cache. + * Remove an entry from the cache. * - * The caller of this function must ensure that the cache entry that is - * being added is not already present in the cache. This can be checked - * via isCacheEntryPresent. - * - * @param memory Memory resource to be cached. - * @param slot Slot identifier corresponding to the memory resource. + * @param slot Identifier of the memory object to be removed from the cache. */ - virtual void addCacheEntry(const hardware::hidl_memory& memory, int32_t slot) = 0; + void removeCacheEntry(int32_t slot); - /** - * Removes an entry specified by a slot from the cache. - * - * If the cache entry corresponding to the slot number does not exist, - * the call does nothing. - * - * @param slot Slot identifier corresponding to the memory resource. - */ - virtual void removeCacheEntry(int32_t slot) = 0; + private: + nn::GeneralResult ensureCacheEntriesArePresentLocked( + const std::vector& slots) REQUIRES(mMutex); + nn::GeneralResult> + getCacheEntryLocked(int32_t slot) REQUIRES(mMutex); + void addCacheEntryLocked(int32_t slot, nn::SharedMemory memory) REQUIRES(mMutex); - /** - * Perform an execution. - * - * @param request Request object with inputs and outputs specified. - * Request::pools is empty, and DataLocation::poolIndex instead - * refers to the 'slots' argument as if it were Request::pools. - * @param slots Slots corresponding to the cached memory entries to be - * used. - * @param measure Whether timing information is requested for the - * execution. - * @return Result of the execution, including the status of the - * execution, dynamic output shapes, and any timing information. - */ - virtual std::tuple, - hardware::neuralnetworks::V1_2::Timing> - execute(const hardware::neuralnetworks::V1_0::Request& request, - const std::vector& slots, - hardware::neuralnetworks::V1_2::MeasureTiming measure) = 0; + std::mutex mMutex; + std::map> mCache + GUARDED_BY(mMutex); + nn::SharedBurst kBurstExecutor; + const sp kBurstCallback; }; /** @@ -124,85 +104,52 @@ class ExecutionBurstServer : public hardware::neuralnetworks::V1_2::IBurstContex * 2) Execute a model with the given information * 3) Send the result to the created FMQ * - * @param callback Callback used to retrieve memories corresponding to - * unrecognized slots. - * @param requestChannel Input FMQ channel through which the client passes the - * request to the service. - * @param resultChannel Output FMQ channel from which the client can retrieve - * the result of the execution. - * @param executorWithCache Object which maintains a local cache of the - * memory pools and executes using the cached memory pools. - * @param pollingTimeWindow How much time (in microseconds) the - * ExecutionBurstServer is allowed to poll the FMQ before waiting on - * the blocking futex. Polling may result in lower latencies at the - * potential cost of more power usage. - * @result IBurstContext Handle to the burst context. - */ - static sp create( - const sp& callback, - const FmqRequestDescriptor& requestChannel, const FmqResultDescriptor& resultChannel, - std::shared_ptr executorWithCache, - std::chrono::microseconds pollingTimeWindow = std::chrono::microseconds{0}); - - /** - * Create automated context to manage FMQ-based executions. - * - * This function is intended to be used by a service to automatically: - * 1) Receive data from a provided FMQ - * 2) Execute a model with the given information - * 3) Send the result to the created FMQ - * - * @param callback Callback used to retrieve memories corresponding to - * unrecognized slots. - * @param requestChannel Input FMQ channel through which the client passes the - * request to the service. - * @param resultChannel Output FMQ channel from which the client can retrieve - * the result of the execution. - * @param preparedModel PreparedModel that the burst object was created from. - * IPreparedModel::executeSynchronously will be used to perform the + * @param callback Callback used to retrieve memories corresponding to unrecognized slots. + * @param requestChannel Input FMQ channel through which the client passes the request to the + * service. + * @param resultChannel Output FMQ channel from which the client can retrieve the result of the * execution. - * @param pollingTimeWindow How much time (in microseconds) the - * ExecutionBurstServer is allowed to poll the FMQ before waiting on - * the blocking futex. Polling may result in lower latencies at the - * potential cost of more power usage. - * @result IBurstContext Handle to the burst context. + * @param burstExecutor Object which maintains a local cache of the memory pools and executes + * using the cached memory pools. + * @param pollingTimeWindow How much time (in microseconds) the ExecutionBurstServer is allowed + * to poll the FMQ before waiting on the blocking futex. Polling may result in lower + * latencies at the potential cost of more power usage. + * @return IBurstContext Handle to the burst context. */ - static sp create( - const sp& callback, - const FmqRequestDescriptor& requestChannel, const FmqResultDescriptor& resultChannel, - hardware::neuralnetworks::V1_2::IPreparedModel* preparedModel, + static nn::GeneralResult> create( + const sp& callback, + const MQDescriptorSync& requestChannel, + const MQDescriptorSync& resultChannel, nn::SharedBurst burstExecutor, std::chrono::microseconds pollingTimeWindow = std::chrono::microseconds{0}); - ExecutionBurstServer(const sp& callback, + ExecutionBurstServer(PrivateConstructorTag tag, const sp& callback, std::unique_ptr requestChannel, std::unique_ptr resultChannel, - std::shared_ptr cachedExecutor); + nn::SharedBurst burstExecutor); ~ExecutionBurstServer(); - // Used by the NN runtime to preemptively remove any stored memory. - hardware::Return freeMemory(int32_t slot) override; + // Used by the NN runtime to preemptively remove any stored memory. See + // IBurstContext::freeMemory for more information. + Return freeMemory(int32_t slot) override; private: - // Ensures all cache entries contained in mExecutorWithCache are present in - // the cache. If they are not present, they are retrieved (via - // IBurstCallback::getMemories) and added to mExecutorWithCache. - // - // This method is locked via mMutex when it is called. - void ensureCacheEntriesArePresentLocked(const std::vector& slots); - - // Work loop that will continue processing execution requests until the - // ExecutionBurstServer object is freed. + // Work loop that will continue processing execution requests until the ExecutionBurstServer + // object is freed. void task(); + nn::ExecutionResult, Timing>> execute( + const V1_0::Request& requestWithoutPools, const std::vector& slotsOfPools, + MeasureTiming measure); + std::thread mWorker; - std::mutex mMutex; std::atomic mTeardown{false}; - const sp mCallback; + const sp mCallback; const std::unique_ptr mRequestChannelReceiver; const std::unique_ptr mResultChannelSender; - const std::shared_ptr mExecutorWithCache; + const nn::SharedBurst mBurstExecutor; + MemoryCache mMemoryCache; }; -} // namespace android::nn +} // namespace android::hardware::neuralnetworks::V1_2::utils -#endif // ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_SERVER_H +#endif // ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_SERVER_H diff --git a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstUtils.h b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstUtils.h index 8a4159122e..c662bc3eed 100644 --- a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstUtils.h +++ b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/ExecutionBurstUtils.h @@ -18,15 +18,16 @@ #define ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_UTILS_H #include -#include #include #include #include +#include +#include +#include #include #include #include -#include #include #include #include @@ -38,159 +39,139 @@ namespace android::hardware::neuralnetworks::V1_2::utils { */ constexpr const size_t kExecutionBurstChannelLength = 1024; -using FmqRequestDescriptor = MQDescriptorSync; -using FmqResultDescriptor = MQDescriptorSync; +/** + * Get how long the burst controller should poll while waiting for results to be returned. + * + * This time can be affected by the property "debug.nn.burst-controller-polling-window". + * + * @return Polling time in microseconds. + */ +std::chrono::microseconds getBurstControllerPollingTimeWindow(); + +/** + * Get how long the burst server should poll while waiting for a request to be received. + * + * This time can be affected by the property "debug.nn.burst-server-polling-window". + * + * @return Polling time in microseconds. + */ +std::chrono::microseconds getBurstServerPollingTimeWindow(); /** * Function to serialize a request. * - * Prefer calling RequestChannelSender::send. - * * @param request Request object without the pool information. * @param measure Whether to collect timing information for the execution. - * @param memoryIds Slot identifiers corresponding to memory resources for the - * request. + * @param memoryIds Slot identifiers corresponding to memory resources for the request. * @return Serialized FMQ request data. */ -std::vector serialize( - const hardware::neuralnetworks::V1_0::Request& request, - hardware::neuralnetworks::V1_2::MeasureTiming measure, const std::vector& slots); +std::vector serialize(const V1_0::Request& request, MeasureTiming measure, + const std::vector& slots); /** * Deserialize the FMQ request data. * - * The three resulting fields are the Request object (where Request::pools is - * empty), slot identifiers (which are stand-ins for Request::pools), and - * whether timing information must be collected for the run. + * The three resulting fields are the Request object (where Request::pools is empty), slot + * identifiers (which are stand-ins for Request::pools), and whether timing information must be + * collected for the run. * * @param data Serialized FMQ request data. - * @return Request object if successfully deserialized, std::nullopt otherwise. + * @return Request object if successfully deserialized, otherwise an error message. */ -std::optional, - hardware::neuralnetworks::V1_2::MeasureTiming>> -deserialize(const std::vector& data); +nn::Result, MeasureTiming>> deserialize( + const std::vector& data); /** * Function to serialize results. * - * Prefer calling ResultChannelSender::send. - * * @param errorStatus Status of the execution. * @param outputShapes Dynamic shapes of the output tensors. * @param timing Timing information of the execution. * @return Serialized FMQ result data. */ -std::vector serialize( - hardware::neuralnetworks::V1_0::ErrorStatus errorStatus, - const std::vector& outputShapes, - hardware::neuralnetworks::V1_2::Timing timing); +std::vector serialize(V1_0::ErrorStatus errorStatus, + const std::vector& outputShapes, Timing timing); /** * Deserialize the FMQ result data. * - * The three resulting fields are the status of the execution, the dynamic - * shapes of the output tensors, and the timing information of the execution. + * The three resulting fields are the status of the execution, the dynamic shapes of the output + * tensors, and the timing information of the execution. * * @param data Serialized FMQ result data. - * @return Result object if successfully deserialized, std::nullopt otherwise. + * @return Result object if successfully deserialized, otherwise an error message. */ -std::optional, - hardware::neuralnetworks::V1_2::Timing>> -deserialize(const std::vector& data); +nn::Result, Timing>> deserialize( + const std::vector& data); /** - * Convert result code to error status. - * - * @param resultCode Result code to be converted. - * @return ErrorStatus Resultant error status. + * RequestChannelSender is responsible for serializing the result packet of information, sending it + * on the result channel, and signaling that the data is available. */ -hardware::neuralnetworks::V1_0::ErrorStatus legacyConvertResultCodeToErrorStatus(int resultCode); - -/** - * RequestChannelSender is responsible for serializing the result packet of - * information, sending it on the result channel, and signaling that the data is - * available. - */ -class RequestChannelSender { - using FmqRequestDescriptor = - hardware::MQDescriptorSync; - using FmqRequestChannel = - hardware::MessageQueue; +class RequestChannelSender final : public neuralnetworks::utils::IProtectedCallback { + struct PrivateConstructorTag {}; public: /** * Create the sending end of a request channel. * - * Prefer this call over the constructor. - * * @param channelLength Number of elements in the FMQ. - * @return A pair of ResultChannelReceiver and the FMQ descriptor on - * successful creation, both nullptr otherwise. + * @return A pair of ResultChannelReceiver and the FMQ descriptor on successful creation, + * GeneralError otherwise. */ - static std::pair, const FmqRequestDescriptor*> create( - size_t channelLength); + static nn::GeneralResult, + const MQDescriptorSync*>> + create(size_t channelLength); /** * Send the request to the channel. * * @param request Request object without the pool information. * @param measure Whether to collect timing information for the execution. - * @param memoryIds Slot identifiers corresponding to memory resources for - * the request. - * @return 'true' on successful send, 'false' otherwise. + * @param slots Slot identifiers corresponding to memory resources for the request. + * @return An empty `Result` on successful send, otherwise an error message. */ - bool send(const hardware::neuralnetworks::V1_0::Request& request, - hardware::neuralnetworks::V1_2::MeasureTiming measure, - const std::vector& slots); + nn::Result send(const V1_0::Request& request, MeasureTiming measure, + const std::vector& slots); /** - * Method to mark the channel as invalid, causing all future calls to - * RequestChannelSender::send to immediately return false without attempting - * to send a message across the FMQ. + * Method to mark the channel as invalid, causing all future calls to RequestChannelSender::send + * to immediately return false without attempting to send a message across the FMQ. */ - void invalidate(); + void notifyAsDeadObject() override; // prefer calling RequestChannelSender::send - bool sendPacket(const std::vector& packet); + nn::Result sendPacket(const std::vector& packet); - RequestChannelSender(std::unique_ptr fmqRequestChannel); + RequestChannelSender(PrivateConstructorTag tag, size_t channelLength); private: - const std::unique_ptr mFmqRequestChannel; + MessageQueue mFmqRequestChannel; std::atomic mValid{true}; }; /** - * RequestChannelReceiver is responsible for waiting on the channel until the - * packet is available, extracting the packet from the channel, and - * deserializing the packet. + * RequestChannelReceiver is responsible for waiting on the channel until the packet is available, + * extracting the packet from the channel, and deserializing the packet. * - * Because the receiver can wait on a packet that may never come (e.g., because - * the sending side of the packet has been closed), this object can be - * invalidated, unblocking the receiver. + * Because the receiver can wait on a packet that may never come (e.g., because the sending side of + * the packet has been closed), this object can be invalidated, unblocking the receiver. */ -class RequestChannelReceiver { - using FmqRequestChannel = - hardware::MessageQueue; +class RequestChannelReceiver final { + struct PrivateConstructorTag {}; public: /** * Create the receiving end of a request channel. * - * Prefer this call over the constructor. - * * @param requestChannel Descriptor for the request channel. - * @param pollingTimeWindow How much time (in microseconds) the - * RequestChannelReceiver is allowed to poll the FMQ before waiting on - * the blocking futex. Polling may result in lower latencies at the - * potential cost of more power usage. + * @param pollingTimeWindow How much time (in microseconds) the RequestChannelReceiver is + * allowed to poll the FMQ before waiting on the blocking futex. Polling may result in lower + * latencies at the potential cost of more power usage. * @return RequestChannelReceiver on successful creation, nullptr otherwise. */ - static std::unique_ptr create( - const FmqRequestDescriptor& requestChannel, + static nn::GeneralResult> create( + const MQDescriptorSync& requestChannel, std::chrono::microseconds pollingTimeWindow); /** @@ -200,49 +181,45 @@ class RequestChannelReceiver { * 1) The packet has been retrieved, or * 2) The receiver has been invalidated * - * @return Request object if successfully received, std::nullopt if error or - * if the receiver object was invalidated. + * @return Request object if successfully received, an appropriate message if error or if the + * receiver object was invalidated. */ - std::optional, - hardware::neuralnetworks::V1_2::MeasureTiming>> - getBlocking(); + nn::Result, MeasureTiming>> getBlocking(); /** - * Method to mark the channel as invalid, unblocking any current or future - * calls to RequestChannelReceiver::getBlocking. + * Method to mark the channel as invalid, unblocking any current or future calls to + * RequestChannelReceiver::getBlocking. */ void invalidate(); - RequestChannelReceiver(std::unique_ptr fmqRequestChannel, + RequestChannelReceiver(PrivateConstructorTag tag, + const MQDescriptorSync& requestChannel, std::chrono::microseconds pollingTimeWindow); private: - std::optional> getPacketBlocking(); + nn::Result> getPacketBlocking(); - const std::unique_ptr mFmqRequestChannel; + MessageQueue mFmqRequestChannel; std::atomic mTeardown{false}; const std::chrono::microseconds kPollingTimeWindow; }; /** - * ResultChannelSender is responsible for serializing the result packet of - * information, sending it on the result channel, and signaling that the data is - * available. + * ResultChannelSender is responsible for serializing the result packet of information, sending it + * on the result channel, and signaling that the data is available. */ -class ResultChannelSender { - using FmqResultChannel = hardware::MessageQueue; +class ResultChannelSender final { + struct PrivateConstructorTag {}; public: /** * Create the sending end of a result channel. * - * Prefer this call over the constructor. - * * @param resultChannel Descriptor for the result channel. * @return ResultChannelSender on successful creation, nullptr otherwise. */ - static std::unique_ptr create(const FmqResultDescriptor& resultChannel); + static nn::GeneralResult> create( + const MQDescriptorSync& resultChannel); /** * Send the result to the channel. @@ -250,52 +227,44 @@ class ResultChannelSender { * @param errorStatus Status of the execution. * @param outputShapes Dynamic shapes of the output tensors. * @param timing Timing information of the execution. - * @return 'true' on successful send, 'false' otherwise. */ - bool send(hardware::neuralnetworks::V1_0::ErrorStatus errorStatus, - const std::vector& outputShapes, - hardware::neuralnetworks::V1_2::Timing timing); + void send(V1_0::ErrorStatus errorStatus, const std::vector& outputShapes, + Timing timing); // prefer calling ResultChannelSender::send - bool sendPacket(const std::vector& packet); + void sendPacket(const std::vector& packet); - ResultChannelSender(std::unique_ptr fmqResultChannel); + ResultChannelSender(PrivateConstructorTag tag, + const MQDescriptorSync& resultChannel); private: - const std::unique_ptr mFmqResultChannel; + MessageQueue mFmqResultChannel; }; /** - * ResultChannelReceiver is responsible for waiting on the channel until the - * packet is available, extracting the packet from the channel, and - * deserializing the packet. + * ResultChannelReceiver is responsible for waiting on the channel until the packet is available, + * extracting the packet from the channel, and deserializing the packet. * - * Because the receiver can wait on a packet that may never come (e.g., because - * the sending side of the packet has been closed), this object can be - * invalidated, unblocking the receiver. + * Because the receiver can wait on a packet that may never come (e.g., because the sending side of + * the packet has been closed), this object can be invalidated, unblocking the receiver. */ -class ResultChannelReceiver { - using FmqResultDescriptor = - hardware::MQDescriptorSync; - using FmqResultChannel = hardware::MessageQueue; +class ResultChannelReceiver final : public neuralnetworks::utils::IProtectedCallback { + struct PrivateConstructorTag {}; public: /** * Create the receiving end of a result channel. * - * Prefer this call over the constructor. - * * @param channelLength Number of elements in the FMQ. - * @param pollingTimeWindow How much time (in microseconds) the - * ResultChannelReceiver is allowed to poll the FMQ before waiting on - * the blocking futex. Polling may result in lower latencies at the - * potential cost of more power usage. - * @return A pair of ResultChannelReceiver and the FMQ descriptor on - * successful creation, both nullptr otherwise. + * @param pollingTimeWindow How much time (in microseconds) the ResultChannelReceiver is allowed + * to poll the FMQ before waiting on the blocking futex. Polling may result in lower + * latencies at the potential cost of more power usage. + * @return A pair of ResultChannelReceiver and the FMQ descriptor on successful creation, or + * GeneralError otherwise. */ - static std::pair, const FmqResultDescriptor*> create( - size_t channelLength, std::chrono::microseconds pollingTimeWindow); + static nn::GeneralResult, + const MQDescriptorSync*>> + create(size_t channelLength, std::chrono::microseconds pollingTimeWindow); /** * Get the result from the channel. @@ -304,28 +273,25 @@ class ResultChannelReceiver { * 1) The packet has been retrieved, or * 2) The receiver has been invalidated * - * @return Result object if successfully received, std::nullopt if error or + * @return Result object if successfully received, otherwise an appropriate message if error or * if the receiver object was invalidated. */ - std::optional, - hardware::neuralnetworks::V1_2::Timing>> - getBlocking(); + nn::Result, Timing>> getBlocking(); /** - * Method to mark the channel as invalid, unblocking any current or future - * calls to ResultChannelReceiver::getBlocking. + * Method to mark the channel as invalid, unblocking any current or future calls to + * ResultChannelReceiver::getBlocking. */ - void invalidate(); + void notifyAsDeadObject() override; // prefer calling ResultChannelReceiver::getBlocking - std::optional> getPacketBlocking(); + nn::Result> getPacketBlocking(); - ResultChannelReceiver(std::unique_ptr fmqResultChannel, + ResultChannelReceiver(PrivateConstructorTag tag, size_t channelLength, std::chrono::microseconds pollingTimeWindow); private: - const std::unique_ptr mFmqResultChannel; + MessageQueue mFmqResultChannel; std::atomic mValid{true}; const std::chrono::microseconds kPollingTimeWindow; }; diff --git a/neuralnetworks/1.2/utils/src/Conversions.cpp b/neuralnetworks/1.2/utils/src/Conversions.cpp index 86a417a352..2c45583d0c 100644 --- a/neuralnetworks/1.2/utils/src/Conversions.cpp +++ b/neuralnetworks/1.2/utils/src/Conversions.cpp @@ -331,6 +331,10 @@ GeneralResult convert(const hal::V1_2::Timing& timing) { return validatedConvert(timing); } +GeneralResult convert(const hardware::hidl_memory& memory) { + return validatedConvert(memory); +} + GeneralResult> convert(const hidl_vec& extensions) { return validatedConvert(extensions); } diff --git a/neuralnetworks/1.2/utils/src/ExecutionBurstController.cpp b/neuralnetworks/1.2/utils/src/ExecutionBurstController.cpp index 2265861b41..eedf5916bc 100644 --- a/neuralnetworks/1.2/utils/src/ExecutionBurstController.cpp +++ b/neuralnetworks/1.2/utils/src/ExecutionBurstController.cpp @@ -17,283 +17,321 @@ #define LOG_TAG "ExecutionBurstController" #include "ExecutionBurstController.h" +#include "ExecutionBurstUtils.h" #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include +#include #include #include #include -#include "ExecutionBurstUtils.h" -#include "HalInterfaces.h" +#include "Callbacks.h" +#include "Conversions.h" #include "Tracing.h" #include "Utils.h" -namespace android::nn { +namespace android::hardware::neuralnetworks::V1_2::utils { namespace { -class BurstContextDeathHandler : public hardware::hidl_death_recipient { - public: - using Callback = std::function; - - BurstContextDeathHandler(const Callback& onDeathCallback) : mOnDeathCallback(onDeathCallback) { - CHECK(onDeathCallback != nullptr); +nn::GeneralResult> executionBurstResultCallback( + V1_0::ErrorStatus status, const sp& burstContext) { + HANDLE_HAL_STATUS(status) << "IPreparedModel::configureExecutionBurst failed with status " + << toString(status); + if (burstContext == nullptr) { + return NN_ERROR(nn::ErrorStatus::GENERAL_FAILURE) + << "IPreparedModel::configureExecutionBurst returned nullptr for burst"; } - - void serviceDied(uint64_t /*cookie*/, const wp& /*who*/) override { - LOG(ERROR) << "BurstContextDeathHandler::serviceDied -- service unexpectedly died!"; - mOnDeathCallback(); - } - - private: - const Callback mOnDeathCallback; -}; - -} // anonymous namespace - -hardware::Return ExecutionBurstController::ExecutionBurstCallback::getMemories( - const hardware::hidl_vec& slots, getMemories_cb cb) { - std::lock_guard guard(mMutex); - - // get all memories - hardware::hidl_vec memories(slots.size()); - std::transform(slots.begin(), slots.end(), memories.begin(), [this](int32_t slot) { - return slot < mMemoryCache.size() ? mMemoryCache[slot] : hardware::hidl_memory{}; - }); - - // ensure all memories are valid - if (!std::all_of(memories.begin(), memories.end(), - [](const hardware::hidl_memory& memory) { return memory.valid(); })) { - cb(V1_0::ErrorStatus::INVALID_ARGUMENT, {}); - return hardware::Void(); - } - - // return successful - cb(V1_0::ErrorStatus::NONE, std::move(memories)); - return hardware::Void(); + return burstContext; } -std::vector ExecutionBurstController::ExecutionBurstCallback::getSlots( - const hardware::hidl_vec& memories, - const std::vector& keys) { - std::lock_guard guard(mMutex); - - // retrieve (or bind) all slots corresponding to memories - std::vector slots; - slots.reserve(memories.size()); - for (size_t i = 0; i < memories.size(); ++i) { - slots.push_back(getSlotLocked(memories[i], keys[i])); +nn::GeneralResult> getMemoriesHelper( + const hidl_vec& slots, + const std::shared_ptr& memoryCache) { + hidl_vec memories(slots.size()); + for (size_t i = 0; i < slots.size(); ++i) { + const int32_t slot = slots[i]; + const auto memory = NN_TRY(memoryCache->getMemory(slot)); + memories[i] = NN_TRY(V1_0::utils::unvalidatedConvert(memory)); + if (!memories[i].valid()) { + return NN_ERROR() << "memory at slot " << slot << " is invalid"; + } } - return slots; + return memories; } -std::pair ExecutionBurstController::ExecutionBurstCallback::freeMemory( - intptr_t key) { - std::lock_guard guard(mMutex); +} // namespace - auto iter = mMemoryIdToSlot.find(key); - if (iter == mMemoryIdToSlot.end()) { - return {false, 0}; - } - const int32_t slot = iter->second; - mMemoryIdToSlot.erase(key); - mMemoryCache[slot] = {}; - mFreeSlots.push(slot); - return {true, slot}; +// MemoryCache methods + +ExecutionBurstController::MemoryCache::MemoryCache() { + constexpr size_t kPreallocatedCount = 1024; + std::vector freeSlotsSpace; + freeSlotsSpace.reserve(kPreallocatedCount); + mFreeSlots = std::stack>(std::move(freeSlotsSpace)); + mMemoryCache.reserve(kPreallocatedCount); + mCacheCleaner.reserve(kPreallocatedCount); } -int32_t ExecutionBurstController::ExecutionBurstCallback::getSlotLocked( - const hardware::hidl_memory& memory, intptr_t key) { - auto iter = mMemoryIdToSlot.find(key); - if (iter == mMemoryIdToSlot.end()) { - const int32_t slot = allocateSlotLocked(); - mMemoryIdToSlot[key] = slot; - mMemoryCache[slot] = memory; - return slot; - } else { +void ExecutionBurstController::MemoryCache::setBurstContext(sp burstContext) { + std::lock_guard guard(mMutex); + mBurstContext = std::move(burstContext); +} + +std::pair +ExecutionBurstController::MemoryCache::cacheMemory(const nn::SharedMemory& memory) { + std::unique_lock lock(mMutex); + base::ScopedLockAssertion lockAssert(mMutex); + + // Use existing cache entry if (1) the Memory object is in the cache and (2) the cache entry is + // not currently being freed. + auto iter = mMemoryIdToSlot.find(memory); + while (iter != mMemoryIdToSlot.end()) { const int32_t slot = iter->second; - return slot; + if (auto cleaner = mCacheCleaner.at(slot).lock()) { + return std::make_pair(slot, std::move(cleaner)); + } + + // If the code reaches this point, the Memory object was in the cache, but is currently + // being destroyed. This code waits until the cache entry has been freed, then loops to + // ensure the cache entry has been freed or has been made present by another thread. + mCond.wait(lock); + iter = mMemoryIdToSlot.find(memory); } + + // Allocate a new cache entry. + const int32_t slot = allocateSlotLocked(); + mMemoryIdToSlot[memory] = slot; + mMemoryCache[slot] = memory; + + // Create reference-counted self-cleaning cache object. + auto self = weak_from_this(); + Task cleanup = [memory, memoryCache = std::move(self)] { + if (const auto lock = memoryCache.lock()) { + lock->freeMemory(memory); + } + }; + auto cleaner = std::make_shared(std::move(cleanup)); + mCacheCleaner[slot] = cleaner; + + return std::make_pair(slot, std::move(cleaner)); } -int32_t ExecutionBurstController::ExecutionBurstCallback::allocateSlotLocked() { +nn::GeneralResult ExecutionBurstController::MemoryCache::getMemory(int32_t slot) { + std::lock_guard guard(mMutex); + if (slot < 0 || static_cast(slot) >= mMemoryCache.size()) { + return NN_ERROR() << "Invalid slot: " << slot << " vs " << mMemoryCache.size(); + } + return mMemoryCache[slot]; +} + +void ExecutionBurstController::MemoryCache::freeMemory(const nn::SharedMemory& memory) { + { + std::lock_guard guard(mMutex); + const int32_t slot = mMemoryIdToSlot.at(memory); + if (mBurstContext) { + mBurstContext->freeMemory(slot); + } + mMemoryIdToSlot.erase(memory); + mMemoryCache[slot] = {}; + mCacheCleaner[slot].reset(); + mFreeSlots.push(slot); + } + mCond.notify_all(); +} + +int32_t ExecutionBurstController::MemoryCache::allocateSlotLocked() { constexpr size_t kMaxNumberOfSlots = std::numeric_limits::max(); - // if there is a free slot, use it - if (mFreeSlots.size() > 0) { + // If there is a free slot, use it. + if (!mFreeSlots.empty()) { const int32_t slot = mFreeSlots.top(); mFreeSlots.pop(); return slot; } - // otherwise use a slot for the first time - CHECK(mMemoryCache.size() < kMaxNumberOfSlots) << "Exceeded maximum number of slots!"; + // Use a slot for the first time. + CHECK_LT(mMemoryCache.size(), kMaxNumberOfSlots) << "Exceeded maximum number of slots!"; const int32_t slot = static_cast(mMemoryCache.size()); mMemoryCache.emplace_back(); + mCacheCleaner.emplace_back(); return slot; } -std::unique_ptr ExecutionBurstController::create( - const sp& preparedModel, +// ExecutionBurstCallback methods + +ExecutionBurstController::ExecutionBurstCallback::ExecutionBurstCallback( + const std::shared_ptr& memoryCache) + : kMemoryCache(memoryCache) { + CHECK(memoryCache != nullptr); +} + +Return ExecutionBurstController::ExecutionBurstCallback::getMemories( + const hidl_vec& slots, getMemories_cb cb) { + const auto memoryCache = kMemoryCache.lock(); + if (memoryCache == nullptr) { + LOG(ERROR) << "ExecutionBurstController::ExecutionBurstCallback::getMemories called after " + "the MemoryCache has been freed"; + cb(V1_0::ErrorStatus::GENERAL_FAILURE, {}); + return Void(); + } + + const auto maybeMemories = getMemoriesHelper(slots, memoryCache); + if (!maybeMemories.has_value()) { + const auto& [message, code] = maybeMemories.error(); + LOG(ERROR) << "ExecutionBurstController::ExecutionBurstCallback::getMemories failed with " + << code << ": " << message; + cb(V1_0::ErrorStatus::INVALID_ARGUMENT, {}); + return Void(); + } + + cb(V1_0::ErrorStatus::NONE, maybeMemories.value()); + return Void(); +} + +// ExecutionBurstController methods + +nn::GeneralResult> ExecutionBurstController::create( + const sp& preparedModel, FallbackFunction fallback, std::chrono::microseconds pollingTimeWindow) { // check inputs if (preparedModel == nullptr) { - LOG(ERROR) << "ExecutionBurstController::create passed a nullptr"; - return nullptr; + return NN_ERROR() << "ExecutionBurstController::create passed a nullptr"; } - // create callback object - sp callback = new ExecutionBurstCallback(); - // create FMQ objects - auto [requestChannelSenderTemp, requestChannelDescriptor] = - RequestChannelSender::create(kExecutionBurstChannelLength); - auto [resultChannelReceiverTemp, resultChannelDescriptor] = - ResultChannelReceiver::create(kExecutionBurstChannelLength, pollingTimeWindow); - std::shared_ptr requestChannelSender = - std::move(requestChannelSenderTemp); - std::shared_ptr resultChannelReceiver = - std::move(resultChannelReceiverTemp); + auto [requestChannelSender, requestChannelDescriptor] = + NN_TRY(RequestChannelSender::create(kExecutionBurstChannelLength)); + auto [resultChannelReceiver, resultChannelDescriptor] = + NN_TRY(ResultChannelReceiver::create(kExecutionBurstChannelLength, pollingTimeWindow)); // check FMQ objects - if (!requestChannelSender || !resultChannelReceiver || !requestChannelDescriptor || - !resultChannelDescriptor) { - LOG(ERROR) << "ExecutionBurstController::create failed to create FastMessageQueue"; - return nullptr; - } + CHECK(requestChannelSender != nullptr); + CHECK(requestChannelDescriptor != nullptr); + CHECK(resultChannelReceiver != nullptr); + CHECK(resultChannelDescriptor != nullptr); + + // create memory cache + auto memoryCache = std::make_shared(); + + // create callback object + auto burstCallback = sp::make(memoryCache); + auto cb = hal::utils::CallbackValue(executionBurstResultCallback); // configure burst - V1_0::ErrorStatus errorStatus; - sp burstContext; - const hardware::Return ret = preparedModel->configureExecutionBurst( - callback, *requestChannelDescriptor, *resultChannelDescriptor, - [&errorStatus, &burstContext](V1_0::ErrorStatus status, - const sp& context) { - errorStatus = status; - burstContext = context; - }); + const Return ret = preparedModel->configureExecutionBurst( + burstCallback, *requestChannelDescriptor, *resultChannelDescriptor, cb); + HANDLE_TRANSPORT_FAILURE(ret); - // check burst - if (!ret.isOk()) { - LOG(ERROR) << "IPreparedModel::configureExecutionBurst failed with description " - << ret.description(); - return nullptr; - } - if (errorStatus != V1_0::ErrorStatus::NONE) { - LOG(ERROR) << "IPreparedModel::configureExecutionBurst failed with status " - << toString(errorStatus); - return nullptr; - } - if (burstContext == nullptr) { - LOG(ERROR) << "IPreparedModel::configureExecutionBurst returned nullptr for burst"; - return nullptr; - } + auto burstContext = NN_TRY(cb.take()); + memoryCache->setBurstContext(burstContext); // create death handler object - BurstContextDeathHandler::Callback onDeathCallback = [requestChannelSender, - resultChannelReceiver] { - requestChannelSender->invalidate(); - resultChannelReceiver->invalidate(); - }; - const sp deathHandler = new BurstContextDeathHandler(onDeathCallback); - - // linkToDeath registers a callback that will be invoked on service death to - // proactively handle service crashes. If the linkToDeath call fails, - // asynchronous calls are susceptible to hangs if the service crashes before - // providing the response. - const hardware::Return deathHandlerRet = burstContext->linkToDeath(deathHandler, 0); - if (!deathHandlerRet.isOk() || deathHandlerRet != true) { - LOG(ERROR) << "ExecutionBurstController::create -- Failed to register a death recipient " - "for the IBurstContext object."; - return nullptr; - } + auto deathHandler = NN_TRY(neuralnetworks::utils::DeathHandler::create(burstContext)); + deathHandler.protectCallbackForLifetimeOfDeathHandler(requestChannelSender.get()); + deathHandler.protectCallbackForLifetimeOfDeathHandler(resultChannelReceiver.get()); // make and return controller - return std::make_unique(requestChannelSender, resultChannelReceiver, - burstContext, callback, deathHandler); + return std::make_shared( + PrivateConstructorTag{}, std::move(fallback), std::move(requestChannelSender), + std::move(resultChannelReceiver), std::move(burstCallback), std::move(burstContext), + std::move(memoryCache), std::move(deathHandler)); } ExecutionBurstController::ExecutionBurstController( - const std::shared_ptr& requestChannelSender, - const std::shared_ptr& resultChannelReceiver, - const sp& burstContext, const sp& callback, - const sp& deathHandler) - : mRequestChannelSender(requestChannelSender), - mResultChannelReceiver(resultChannelReceiver), - mBurstContext(burstContext), - mMemoryCache(callback), - mDeathHandler(deathHandler) {} + PrivateConstructorTag /*tag*/, FallbackFunction fallback, + std::unique_ptr requestChannelSender, + std::unique_ptr resultChannelReceiver, + sp callback, sp burstContext, + std::shared_ptr memoryCache, neuralnetworks::utils::DeathHandler deathHandler) + : kFallback(std::move(fallback)), + mRequestChannelSender(std::move(requestChannelSender)), + mResultChannelReceiver(std::move(resultChannelReceiver)), + mBurstCallback(std::move(callback)), + mBurstContext(std::move(burstContext)), + mMemoryCache(std::move(memoryCache)), + kDeathHandler(std::move(deathHandler)) {} -ExecutionBurstController::~ExecutionBurstController() { - // It is safe to ignore any errors resulting from this unlinkToDeath call - // because the ExecutionBurstController object is already being destroyed - // and its underlying IBurstContext object is no longer being used by the NN - // runtime. - if (mDeathHandler) { - mBurstContext->unlinkToDeath(mDeathHandler).isOk(); +ExecutionBurstController::OptionalCacheHold ExecutionBurstController::cacheMemory( + const nn::SharedMemory& memory) const { + auto [slot, hold] = mMemoryCache->cacheMemory(memory); + return hold; +} + +nn::ExecutionResult, nn::Timing>> +ExecutionBurstController::execute(const nn::Request& request, nn::MeasureTiming measure) const { + // This is the first point when we know an execution is occurring, so begin to collect + // systraces. Note that the first point we can begin collecting systraces in + // ExecutionBurstServer is when the RequestChannelReceiver realizes there is data in the FMQ, so + // ExecutionBurstServer collects systraces at different points in the code. + NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "ExecutionBurstController::execute"); + + // if the request is valid but of a higher version than what's supported in burst execution, + // fall back to another execution path + if (const auto version = NN_TRY(hal::utils::makeExecutionFailure(nn::validate(request))); + version > nn::Version::ANDROID_Q) { + // fallback to another execution path if the packet could not be sent + if (kFallback) { + return kFallback(request, measure); + } + return NN_ERROR() << "Request object has features not supported by IBurst::execute"; } -} -static std::tuple, V1_2::Timing, bool> getExecutionResult( - V1_0::ErrorStatus status, std::vector outputShapes, V1_2::Timing timing, - bool fallback) { - auto [n, checkedOutputShapes, checkedTiming] = - getExecutionResult(convertToV1_3(status), std::move(outputShapes), timing); - return {n, convertToV1_2(checkedOutputShapes), convertToV1_2(checkedTiming), fallback}; -} + // clear pools field of request, as they will be provided via slots + const auto requestWithoutPools = + nn::Request{.inputs = request.inputs, .outputs = request.outputs, .pools = {}}; + auto hidlRequest = NN_TRY( + hal::utils::makeExecutionFailure(V1_0::utils::unvalidatedConvert(requestWithoutPools))); + const auto hidlMeasure = NN_TRY(hal::utils::makeExecutionFailure(convert(measure))); -std::tuple, V1_2::Timing, bool> -ExecutionBurstController::compute(const V1_0::Request& request, V1_2::MeasureTiming measure, - const std::vector& memoryIds) { - // This is the first point when we know an execution is occurring, so begin - // to collect systraces. Note that the first point we can begin collecting - // systraces in ExecutionBurstServer is when the RequestChannelReceiver - // realizes there is data in the FMQ, so ExecutionBurstServer collects - // systraces at different points in the code. - NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "ExecutionBurstController::compute"); + // Ensure that at most one execution is in flight at any given time. + const bool alreadyInFlight = mExecutionInFlight.test_and_set(); + if (alreadyInFlight) { + return NN_ERROR() << "IBurst already has an execution in flight"; + } + const auto guard = base::make_scope_guard([this] { mExecutionInFlight.clear(); }); - std::lock_guard guard(mMutex); + std::vector slots; + std::vector holds; + slots.reserve(request.pools.size()); + holds.reserve(request.pools.size()); + for (const auto& memoryPool : request.pools) { + auto [slot, hold] = mMemoryCache->cacheMemory(std::get(memoryPool)); + slots.push_back(slot); + holds.push_back(std::move(hold)); + } // send request packet - const std::vector slots = mMemoryCache->getSlots(request.pools, memoryIds); - const bool success = mRequestChannelSender->send(request, measure, slots); - if (!success) { - LOG(ERROR) << "Error sending FMQ packet"; - // only use fallback execution path if the packet could not be sent - return getExecutionResult(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming12, - /*fallback=*/true); + const auto sendStatus = mRequestChannelSender->send(hidlRequest, hidlMeasure, slots); + if (!sendStatus.ok()) { + // fallback to another execution path if the packet could not be sent + if (kFallback) { + return kFallback(request, measure); + } + return NN_ERROR() << "Error sending FMQ packet: " << sendStatus.error(); } // get result packet - const auto result = mResultChannelReceiver->getBlocking(); - if (!result) { - LOG(ERROR) << "Error retrieving FMQ packet"; - // only use fallback execution path if the packet could not be sent - return getExecutionResult(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming12, - /*fallback=*/false); - } - - // unpack results and return (only use fallback execution path if the - // packet could not be sent) - auto [status, outputShapes, timing] = std::move(*result); - return getExecutionResult(status, std::move(outputShapes), timing, /*fallback=*/false); + const auto [status, outputShapes, timing] = + NN_TRY(hal::utils::makeExecutionFailure(mResultChannelReceiver->getBlocking())); + return executionCallback(status, outputShapes, timing); } -void ExecutionBurstController::freeMemory(intptr_t key) { - std::lock_guard guard(mMutex); - - bool valid; - int32_t slot; - std::tie(valid, slot) = mMemoryCache->freeMemory(key); - if (valid) { - mBurstContext->freeMemory(slot).isOk(); - } -} - -} // namespace android::nn +} // namespace android::hardware::neuralnetworks::V1_2::utils diff --git a/neuralnetworks/1.2/utils/src/ExecutionBurstServer.cpp b/neuralnetworks/1.2/utils/src/ExecutionBurstServer.cpp index 022548dcd4..50af881d23 100644 --- a/neuralnetworks/1.2/utils/src/ExecutionBurstServer.cpp +++ b/neuralnetworks/1.2/utils/src/ExecutionBurstServer.cpp @@ -17,8 +17,19 @@ #define LOG_TAG "ExecutionBurstServer" #include "ExecutionBurstServer.h" +#include "Conversions.h" +#include "ExecutionBurstUtils.h" #include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -29,134 +40,146 @@ #include #include -#include "ExecutionBurstUtils.h" -#include "HalInterfaces.h" #include "Tracing.h" -namespace android::nn { +namespace android::hardware::neuralnetworks::V1_2::utils { namespace { -// DefaultBurstExecutorWithCache adapts an IPreparedModel so that it can be -// used as an IBurstExecutorWithCache. Specifically, the cache simply stores the -// hidl_memory object, and the execution forwards calls to the provided -// IPreparedModel's "executeSynchronously" method. With this class, hidl_memory -// must be mapped and unmapped for each execution. -class DefaultBurstExecutorWithCache : public ExecutionBurstServer::IBurstExecutorWithCache { - public: - DefaultBurstExecutorWithCache(V1_2::IPreparedModel* preparedModel) - : mpPreparedModel(preparedModel) {} +using neuralnetworks::utils::makeExecutionFailure; - bool isCacheEntryPresent(int32_t slot) const override { - const auto it = mMemoryCache.find(slot); - return (it != mMemoryCache.end()) && it->second.valid(); +constexpr V1_2::Timing kNoTiming = {std::numeric_limits::max(), + std::numeric_limits::max()}; + +nn::GeneralResult> getMemoriesCallback( + V1_0::ErrorStatus status, const hidl_vec& memories) { + HANDLE_HAL_STATUS(status) << "getting burst memories failed with " << toString(status); + std::vector canonicalMemories; + canonicalMemories.reserve(memories.size()); + for (const auto& memory : memories) { + canonicalMemories.push_back(NN_TRY(nn::convert(memory))); } - - void addCacheEntry(const hardware::hidl_memory& memory, int32_t slot) override { - mMemoryCache[slot] = memory; - } - - void removeCacheEntry(int32_t slot) override { mMemoryCache.erase(slot); } - - std::tuple, V1_2::Timing> execute( - const V1_0::Request& request, const std::vector& slots, - V1_2::MeasureTiming measure) override { - // convert slots to pools - hardware::hidl_vec pools(slots.size()); - std::transform(slots.begin(), slots.end(), pools.begin(), - [this](int32_t slot) { return mMemoryCache[slot]; }); - - // create full request - V1_0::Request fullRequest = request; - fullRequest.pools = std::move(pools); - - // setup execution - V1_0::ErrorStatus returnedStatus = V1_0::ErrorStatus::GENERAL_FAILURE; - hardware::hidl_vec returnedOutputShapes; - V1_2::Timing returnedTiming; - auto cb = [&returnedStatus, &returnedOutputShapes, &returnedTiming]( - V1_0::ErrorStatus status, - const hardware::hidl_vec& outputShapes, - const V1_2::Timing& timing) { - returnedStatus = status; - returnedOutputShapes = outputShapes; - returnedTiming = timing; - }; - - // execute - const hardware::Return ret = - mpPreparedModel->executeSynchronously(fullRequest, measure, cb); - if (!ret.isOk() || returnedStatus != V1_0::ErrorStatus::NONE) { - LOG(ERROR) << "IPreparedModelAdapter::execute -- Error executing"; - return {returnedStatus, std::move(returnedOutputShapes), kNoTiming}; - } - - return std::make_tuple(returnedStatus, std::move(returnedOutputShapes), returnedTiming); - } - - private: - V1_2::IPreparedModel* const mpPreparedModel; - std::map mMemoryCache; -}; + return canonicalMemories; +} } // anonymous namespace +ExecutionBurstServer::MemoryCache::MemoryCache(nn::SharedBurst burstExecutor, + sp burstCallback) + : kBurstExecutor(std::move(burstExecutor)), kBurstCallback(std::move(burstCallback)) { + CHECK(kBurstExecutor != nullptr); + CHECK(kBurstCallback != nullptr); +} + +nn::GeneralResult>> +ExecutionBurstServer::MemoryCache::getCacheEntries(const std::vector& slots) { + std::lock_guard guard(mMutex); + NN_TRY(ensureCacheEntriesArePresentLocked(slots)); + + std::vector> results; + results.reserve(slots.size()); + for (int32_t slot : slots) { + results.push_back(NN_TRY(getCacheEntryLocked(slot))); + } + + return results; +} + +nn::GeneralResult ExecutionBurstServer::MemoryCache::ensureCacheEntriesArePresentLocked( + const std::vector& slots) { + const auto slotIsKnown = [this](int32_t slot) + REQUIRES(mMutex) { return mCache.count(slot) > 0; }; + + // find unique unknown slots + std::vector unknownSlots = slots; + std::sort(unknownSlots.begin(), unknownSlots.end()); + auto unknownSlotsEnd = std::unique(unknownSlots.begin(), unknownSlots.end()); + unknownSlotsEnd = std::remove_if(unknownSlots.begin(), unknownSlotsEnd, slotIsKnown); + unknownSlots.erase(unknownSlotsEnd, unknownSlots.end()); + + // quick-exit if all slots are known + if (unknownSlots.empty()) { + return {}; + } + + auto cb = neuralnetworks::utils::CallbackValue(getMemoriesCallback); + + const auto ret = kBurstCallback->getMemories(unknownSlots, cb); + HANDLE_TRANSPORT_FAILURE(ret); + + auto returnedMemories = NN_TRY(cb.take()); + + if (returnedMemories.size() != unknownSlots.size()) { + return NN_ERROR() + << "ExecutionBurstServer::MemoryCache::ensureCacheEntriesArePresentLocked: Error " + "retrieving memories -- count mismatch between requested memories (" + << unknownSlots.size() << ") and returned memories (" << returnedMemories.size() + << ")"; + } + + // add memories to unknown slots + for (size_t i = 0; i < unknownSlots.size(); ++i) { + addCacheEntryLocked(unknownSlots[i], std::move(returnedMemories[i])); + } + + return {}; +} + +nn::GeneralResult> +ExecutionBurstServer::MemoryCache::getCacheEntryLocked(int32_t slot) { + if (const auto iter = mCache.find(slot); iter != mCache.end()) { + return iter->second; + } + return NN_ERROR() + << "ExecutionBurstServer::MemoryCache::getCacheEntryLocked failed because slot " << slot + << " is not present in the cache"; +} + +void ExecutionBurstServer::MemoryCache::addCacheEntryLocked(int32_t slot, nn::SharedMemory memory) { + auto hold = kBurstExecutor->cacheMemory(memory); + mCache.emplace(slot, std::make_pair(std::move(memory), std::move(hold))); +} + +void ExecutionBurstServer::MemoryCache::removeCacheEntry(int32_t slot) { + std::lock_guard guard(mMutex); + mCache.erase(slot); +} + // ExecutionBurstServer methods -sp ExecutionBurstServer::create( +nn::GeneralResult> ExecutionBurstServer::create( const sp& callback, const MQDescriptorSync& requestChannel, - const MQDescriptorSync& resultChannel, - std::shared_ptr executorWithCache, + const MQDescriptorSync& resultChannel, nn::SharedBurst burstExecutor, std::chrono::microseconds pollingTimeWindow) { // check inputs - if (callback == nullptr || executorWithCache == nullptr) { - LOG(ERROR) << "ExecutionBurstServer::create passed a nullptr"; - return nullptr; + if (callback == nullptr || burstExecutor == nullptr) { + return NN_ERROR() << "ExecutionBurstServer::create passed a nullptr"; } // create FMQ objects - std::unique_ptr requestChannelReceiver = - RequestChannelReceiver::create(requestChannel, pollingTimeWindow); - std::unique_ptr resultChannelSender = - ResultChannelSender::create(resultChannel); + auto requestChannelReceiver = + NN_TRY(RequestChannelReceiver::create(requestChannel, pollingTimeWindow)); + auto resultChannelSender = NN_TRY(ResultChannelSender::create(resultChannel)); // check FMQ objects - if (!requestChannelReceiver || !resultChannelSender) { - LOG(ERROR) << "ExecutionBurstServer::create failed to create FastMessageQueue"; - return nullptr; - } + CHECK(requestChannelReceiver != nullptr); + CHECK(resultChannelSender != nullptr); // make and return context - return new ExecutionBurstServer(callback, std::move(requestChannelReceiver), - std::move(resultChannelSender), std::move(executorWithCache)); + return sp::make(PrivateConstructorTag{}, callback, + std::move(requestChannelReceiver), + std::move(resultChannelSender), std::move(burstExecutor)); } -sp ExecutionBurstServer::create( - const sp& callback, const MQDescriptorSync& requestChannel, - const MQDescriptorSync& resultChannel, V1_2::IPreparedModel* preparedModel, - std::chrono::microseconds pollingTimeWindow) { - // check relevant input - if (preparedModel == nullptr) { - LOG(ERROR) << "ExecutionBurstServer::create passed a nullptr"; - return nullptr; - } - - // adapt IPreparedModel to have caching - const std::shared_ptr preparedModelAdapter = - std::make_shared(preparedModel); - - // make and return context - return ExecutionBurstServer::create(callback, requestChannel, resultChannel, - preparedModelAdapter, pollingTimeWindow); -} - -ExecutionBurstServer::ExecutionBurstServer( - const sp& callback, std::unique_ptr requestChannel, - std::unique_ptr resultChannel, - std::shared_ptr executorWithCache) +ExecutionBurstServer::ExecutionBurstServer(PrivateConstructorTag /*tag*/, + const sp& callback, + std::unique_ptr requestChannel, + std::unique_ptr resultChannel, + nn::SharedBurst burstExecutor) : mCallback(callback), mRequestChannelReceiver(std::move(requestChannel)), mResultChannelSender(std::move(resultChannel)), - mExecutorWithCache(std::move(executorWithCache)) { + mBurstExecutor(std::move(burstExecutor)), + mMemoryCache(mBurstExecutor, mCallback) { // TODO: highly document the threading behavior of this class mWorker = std::thread([this] { task(); }); } @@ -170,51 +193,9 @@ ExecutionBurstServer::~ExecutionBurstServer() { mWorker.join(); } -hardware::Return ExecutionBurstServer::freeMemory(int32_t slot) { - std::lock_guard hold(mMutex); - mExecutorWithCache->removeCacheEntry(slot); - return hardware::Void(); -} - -void ExecutionBurstServer::ensureCacheEntriesArePresentLocked(const std::vector& slots) { - const auto slotIsKnown = [this](int32_t slot) { - return mExecutorWithCache->isCacheEntryPresent(slot); - }; - - // find unique unknown slots - std::vector unknownSlots = slots; - auto unknownSlotsEnd = unknownSlots.end(); - std::sort(unknownSlots.begin(), unknownSlotsEnd); - unknownSlotsEnd = std::unique(unknownSlots.begin(), unknownSlotsEnd); - unknownSlotsEnd = std::remove_if(unknownSlots.begin(), unknownSlotsEnd, slotIsKnown); - unknownSlots.erase(unknownSlotsEnd, unknownSlots.end()); - - // quick-exit if all slots are known - if (unknownSlots.empty()) { - return; - } - - V1_0::ErrorStatus errorStatus = V1_0::ErrorStatus::GENERAL_FAILURE; - std::vector returnedMemories; - auto cb = [&errorStatus, &returnedMemories]( - V1_0::ErrorStatus status, - const hardware::hidl_vec& memories) { - errorStatus = status; - returnedMemories = memories; - }; - - const hardware::Return ret = mCallback->getMemories(unknownSlots, cb); - - if (!ret.isOk() || errorStatus != V1_0::ErrorStatus::NONE || - returnedMemories.size() != unknownSlots.size()) { - LOG(ERROR) << "Error retrieving memories"; - return; - } - - // add memories to unknown slots - for (size_t i = 0; i < unknownSlots.size(); ++i) { - mExecutorWithCache->addCacheEntry(returnedMemories[i], unknownSlots[i]); - } +Return ExecutionBurstServer::freeMemory(int32_t slot) { + mMemoryCache.removeCacheEntry(slot); + return Void(); } void ExecutionBurstServer::task() { @@ -223,38 +204,65 @@ void ExecutionBurstServer::task() { // receive request auto arguments = mRequestChannelReceiver->getBlocking(); - // if the request packet was not properly received, return a generic - // error and skip the execution + // if the request packet was not properly received, return a generic error and skip the + // execution // - // if the burst is being torn down, skip the execution so the "task" - // function can end - if (!arguments) { + // if the burst is being torn down, skip the execution so the "task" function can end + if (!arguments.has_value()) { if (!mTeardown) { mResultChannelSender->send(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming); } continue; } - // otherwise begin tracing execution - NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, - "ExecutionBurstServer getting memory, executing, and returning results"); + // unpack the arguments; types are Request, std::vector, and MeasureTiming, + // respectively + const auto [requestWithoutPools, slotsOfPools, measure] = std::move(arguments).value(); - // unpack the arguments; types are Request, std::vector, and - // MeasureTiming, respectively - const auto [requestWithoutPools, slotsOfPools, measure] = std::move(*arguments); - - // ensure executor with cache has required memory - std::lock_guard hold(mMutex); - ensureCacheEntriesArePresentLocked(slotsOfPools); - - // perform computation; types are ErrorStatus, hidl_vec, - // and Timing, respectively - const auto [errorStatus, outputShapes, returnedTiming] = - mExecutorWithCache->execute(requestWithoutPools, slotsOfPools, measure); + auto result = execute(requestWithoutPools, slotsOfPools, measure); // return result - mResultChannelSender->send(errorStatus, outputShapes, returnedTiming); + if (result.has_value()) { + const auto& [outputShapes, timing] = result.value(); + mResultChannelSender->send(V1_0::ErrorStatus::NONE, outputShapes, timing); + } else { + const auto& [message, code, outputShapes] = result.error(); + LOG(ERROR) << "IBurst::execute failed with " << code << ": " << message; + mResultChannelSender->send(convert(code).value(), convert(outputShapes).value(), + kNoTiming); + } } } -} // namespace android::nn +nn::ExecutionResult, Timing>> ExecutionBurstServer::execute( + const V1_0::Request& requestWithoutPools, const std::vector& slotsOfPools, + MeasureTiming measure) { + NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, + "ExecutionBurstServer getting memory, executing, and returning results"); + + // ensure executor with cache has required memory + const auto cacheEntries = + NN_TRY(makeExecutionFailure(mMemoryCache.getCacheEntries(slotsOfPools))); + + // convert request, populating its pools + // This code performs an unvalidated convert because the request object without its pools is + // invalid because it is incomplete. Instead, the validation is performed after the memory pools + // have been added to the request. + auto canonicalRequest = + NN_TRY(makeExecutionFailure(nn::unvalidatedConvert(requestWithoutPools))); + CHECK(canonicalRequest.pools.empty()); + std::transform(cacheEntries.begin(), cacheEntries.end(), + std::back_inserter(canonicalRequest.pools), + [](const auto& cacheEntry) { return cacheEntry.first; }); + NN_TRY(makeExecutionFailure(validate(canonicalRequest))); + + nn::MeasureTiming canonicalMeasure = NN_TRY(makeExecutionFailure(nn::convert(measure))); + + const auto [outputShapes, timing] = + NN_TRY(mBurstExecutor->execute(canonicalRequest, canonicalMeasure)); + + return std::make_pair(NN_TRY(makeExecutionFailure(convert(outputShapes))), + NN_TRY(makeExecutionFailure(convert(timing)))); +} + +} // namespace android::hardware::neuralnetworks::V1_2::utils diff --git a/neuralnetworks/1.2/utils/src/ExecutionBurstUtils.cpp b/neuralnetworks/1.2/utils/src/ExecutionBurstUtils.cpp index f0275f933a..ca3a52c17b 100644 --- a/neuralnetworks/1.2/utils/src/ExecutionBurstUtils.cpp +++ b/neuralnetworks/1.2/utils/src/ExecutionBurstUtils.cpp @@ -19,11 +19,15 @@ #include "ExecutionBurstUtils.h" #include +#include #include #include #include #include #include +#include +#include +#include #include #include @@ -39,84 +43,97 @@ namespace { constexpr V1_2::Timing kNoTiming = {std::numeric_limits::max(), std::numeric_limits::max()}; +std::chrono::microseconds getPollingTimeWindow(const std::string& property) { + constexpr int32_t kDefaultPollingTimeWindow = 0; +#ifdef NN_DEBUGGABLE + constexpr int32_t kMinPollingTimeWindow = 0; + const int32_t selectedPollingTimeWindow = + base::GetIntProperty(property, kDefaultPollingTimeWindow, kMinPollingTimeWindow); + return std::chrono::microseconds(selectedPollingTimeWindow); +#else + (void)property; + return std::chrono::microseconds(kDefaultPollingTimeWindow); +#endif // NN_DEBUGGABLE +} + +} // namespace + +std::chrono::microseconds getBurstControllerPollingTimeWindow() { + return getPollingTimeWindow("debug.nn.burst-controller-polling-window"); +} + +std::chrono::microseconds getBurstServerPollingTimeWindow() { + return getPollingTimeWindow("debug.nn.burst-server-polling-window"); } // serialize a request into a packet std::vector serialize(const V1_0::Request& request, V1_2::MeasureTiming measure, const std::vector& slots) { // count how many elements need to be sent for a request - size_t count = 2 + request.inputs.size() + request.outputs.size() + request.pools.size(); + size_t count = 2 + request.inputs.size() + request.outputs.size() + slots.size(); for (const auto& input : request.inputs) { count += input.dimensions.size(); } for (const auto& output : request.outputs) { count += output.dimensions.size(); } + CHECK_LE(count, std::numeric_limits::max()); // create buffer to temporarily store elements std::vector data; data.reserve(count); // package packetInfo - { - FmqRequestDatum datum; - datum.packetInformation( - {/*.packetSize=*/static_cast(count), - /*.numberOfInputOperands=*/static_cast(request.inputs.size()), - /*.numberOfOutputOperands=*/static_cast(request.outputs.size()), - /*.numberOfPools=*/static_cast(request.pools.size())}); - data.push_back(datum); - } + data.emplace_back(); + data.back().packetInformation( + {.packetSize = static_cast(count), + .numberOfInputOperands = static_cast(request.inputs.size()), + .numberOfOutputOperands = static_cast(request.outputs.size()), + .numberOfPools = static_cast(slots.size())}); // package input data for (const auto& input : request.inputs) { // package operand information - FmqRequestDatum datum; - datum.inputOperandInformation( - {/*.hasNoValue=*/input.hasNoValue, - /*.location=*/input.location, - /*.numberOfDimensions=*/static_cast(input.dimensions.size())}); - data.push_back(datum); + data.emplace_back(); + data.back().inputOperandInformation( + {.hasNoValue = input.hasNoValue, + .location = input.location, + .numberOfDimensions = static_cast(input.dimensions.size())}); // package operand dimensions for (uint32_t dimension : input.dimensions) { - FmqRequestDatum datum; - datum.inputOperandDimensionValue(dimension); - data.push_back(datum); + data.emplace_back(); + data.back().inputOperandDimensionValue(dimension); } } // package output data for (const auto& output : request.outputs) { // package operand information - FmqRequestDatum datum; - datum.outputOperandInformation( - {/*.hasNoValue=*/output.hasNoValue, - /*.location=*/output.location, - /*.numberOfDimensions=*/static_cast(output.dimensions.size())}); - data.push_back(datum); + data.emplace_back(); + data.back().outputOperandInformation( + {.hasNoValue = output.hasNoValue, + .location = output.location, + .numberOfDimensions = static_cast(output.dimensions.size())}); // package operand dimensions for (uint32_t dimension : output.dimensions) { - FmqRequestDatum datum; - datum.outputOperandDimensionValue(dimension); - data.push_back(datum); + data.emplace_back(); + data.back().outputOperandDimensionValue(dimension); } } // package pool identifier for (int32_t slot : slots) { - FmqRequestDatum datum; - datum.poolIdentifier(slot); - data.push_back(datum); + data.emplace_back(); + data.back().poolIdentifier(slot); } // package measureTiming - { - FmqRequestDatum datum; - datum.measureTiming(measure); - data.push_back(datum); - } + data.emplace_back(); + data.back().measureTiming(measure); + + CHECK_EQ(data.size(), count); // return packet return data; @@ -137,46 +154,38 @@ std::vector serialize(V1_0::ErrorStatus errorStatus, data.reserve(count); // package packetInfo - { - FmqResultDatum datum; - datum.packetInformation({/*.packetSize=*/static_cast(count), - /*.errorStatus=*/errorStatus, - /*.numberOfOperands=*/static_cast(outputShapes.size())}); - data.push_back(datum); - } + data.emplace_back(); + data.back().packetInformation({.packetSize = static_cast(count), + .errorStatus = errorStatus, + .numberOfOperands = static_cast(outputShapes.size())}); // package output shape data for (const auto& operand : outputShapes) { // package operand information - FmqResultDatum::OperandInformation info{}; - info.isSufficient = operand.isSufficient; - info.numberOfDimensions = static_cast(operand.dimensions.size()); - - FmqResultDatum datum; - datum.operandInformation(info); - data.push_back(datum); + data.emplace_back(); + data.back().operandInformation( + {.isSufficient = operand.isSufficient, + .numberOfDimensions = static_cast(operand.dimensions.size())}); // package operand dimensions for (uint32_t dimension : operand.dimensions) { - FmqResultDatum datum; - datum.operandDimensionValue(dimension); - data.push_back(datum); + data.emplace_back(); + data.back().operandDimensionValue(dimension); } } // package executionTiming - { - FmqResultDatum datum; - datum.executionTiming(timing); - data.push_back(datum); - } + data.emplace_back(); + data.back().executionTiming(timing); + + CHECK_EQ(data.size(), count); // return result return data; } // deserialize request -std::optional, V1_2::MeasureTiming>> deserialize( +nn::Result, V1_2::MeasureTiming>> deserialize( const std::vector& data) { using discriminator = FmqRequestDatum::hidl_discriminator; @@ -184,8 +193,7 @@ std::optional, V1_2::MeasureTimin // validate packet information if (data.size() == 0 || data[index].getDiscriminator() != discriminator::packetInformation) { - LOG(ERROR) << "FMQ Request packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Request packet ill-formed"; } // unpackage packet information @@ -198,8 +206,7 @@ std::optional, V1_2::MeasureTimin // verify packet size if (data.size() != packetSize) { - LOG(ERROR) << "FMQ Request packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Request packet ill-formed"; } // unpackage input operands @@ -208,8 +215,7 @@ std::optional, V1_2::MeasureTimin for (size_t operand = 0; operand < numberOfInputOperands; ++operand) { // validate input operand information if (data[index].getDiscriminator() != discriminator::inputOperandInformation) { - LOG(ERROR) << "FMQ Request packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Request packet ill-formed"; } // unpackage operand information @@ -226,8 +232,7 @@ std::optional, V1_2::MeasureTimin for (size_t i = 0; i < numberOfDimensions; ++i) { // validate dimension if (data[index].getDiscriminator() != discriminator::inputOperandDimensionValue) { - LOG(ERROR) << "FMQ Request packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Request packet ill-formed"; } // unpackage dimension @@ -240,7 +245,7 @@ std::optional, V1_2::MeasureTimin // store result inputs.push_back( - {/*.hasNoValue=*/hasNoValue, /*.location=*/location, /*.dimensions=*/dimensions}); + {.hasNoValue = hasNoValue, .location = location, .dimensions = dimensions}); } // unpackage output operands @@ -249,8 +254,7 @@ std::optional, V1_2::MeasureTimin for (size_t operand = 0; operand < numberOfOutputOperands; ++operand) { // validate output operand information if (data[index].getDiscriminator() != discriminator::outputOperandInformation) { - LOG(ERROR) << "FMQ Request packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Request packet ill-formed"; } // unpackage operand information @@ -267,8 +271,7 @@ std::optional, V1_2::MeasureTimin for (size_t i = 0; i < numberOfDimensions; ++i) { // validate dimension if (data[index].getDiscriminator() != discriminator::outputOperandDimensionValue) { - LOG(ERROR) << "FMQ Request packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Request packet ill-formed"; } // unpackage dimension @@ -281,7 +284,7 @@ std::optional, V1_2::MeasureTimin // store result outputs.push_back( - {/*.hasNoValue=*/hasNoValue, /*.location=*/location, /*.dimensions=*/dimensions}); + {.hasNoValue = hasNoValue, .location = location, .dimensions = dimensions}); } // unpackage pools @@ -290,8 +293,7 @@ std::optional, V1_2::MeasureTimin for (size_t pool = 0; pool < numberOfPools; ++pool) { // validate input operand information if (data[index].getDiscriminator() != discriminator::poolIdentifier) { - LOG(ERROR) << "FMQ Request packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Request packet ill-formed"; } // unpackage operand information @@ -304,8 +306,7 @@ std::optional, V1_2::MeasureTimin // validate measureTiming if (data[index].getDiscriminator() != discriminator::measureTiming) { - LOG(ERROR) << "FMQ Request packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Request packet ill-formed"; } // unpackage measureTiming @@ -314,27 +315,23 @@ std::optional, V1_2::MeasureTimin // validate packet information if (index != packetSize) { - LOG(ERROR) << "FMQ Result packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Result packet ill-formed"; } // return request - V1_0::Request request = {/*.inputs=*/inputs, /*.outputs=*/outputs, /*.pools=*/{}}; + V1_0::Request request = {.inputs = inputs, .outputs = outputs, .pools = {}}; return std::make_tuple(std::move(request), std::move(slots), measure); } // deserialize a packet into the result -std::optional, V1_2::Timing>> -deserialize(const std::vector& data) { +nn::Result, V1_2::Timing>> deserialize( + const std::vector& data) { using discriminator = FmqResultDatum::hidl_discriminator; - - std::vector outputShapes; size_t index = 0; // validate packet information if (data.size() == 0 || data[index].getDiscriminator() != discriminator::packetInformation) { - LOG(ERROR) << "FMQ Result packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Result packet ill-formed"; } // unpackage packet information @@ -346,16 +343,16 @@ deserialize(const std::vector& data) { // verify packet size if (data.size() != packetSize) { - LOG(ERROR) << "FMQ Result packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Result packet ill-formed"; } // unpackage operands + std::vector outputShapes; + outputShapes.reserve(numberOfOperands); for (size_t operand = 0; operand < numberOfOperands; ++operand) { // validate operand information if (data[index].getDiscriminator() != discriminator::operandInformation) { - LOG(ERROR) << "FMQ Result packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Result packet ill-formed"; } // unpackage operand information @@ -370,8 +367,7 @@ deserialize(const std::vector& data) { for (size_t i = 0; i < numberOfDimensions; ++i) { // validate dimension if (data[index].getDiscriminator() != discriminator::operandDimensionValue) { - LOG(ERROR) << "FMQ Result packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Result packet ill-formed"; } // unpackage dimension @@ -383,13 +379,12 @@ deserialize(const std::vector& data) { } // store result - outputShapes.push_back({/*.dimensions=*/dimensions, /*.isSufficient=*/isSufficient}); + outputShapes.push_back({.dimensions = dimensions, .isSufficient = isSufficient}); } // validate execution timing if (data[index].getDiscriminator() != discriminator::executionTiming) { - LOG(ERROR) << "FMQ Result packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Result packet ill-formed"; } // unpackage execution timing @@ -398,123 +393,113 @@ deserialize(const std::vector& data) { // validate packet information if (index != packetSize) { - LOG(ERROR) << "FMQ Result packet ill-formed"; - return std::nullopt; + return NN_ERROR() << "FMQ Result packet ill-formed"; } // return result return std::make_tuple(errorStatus, std::move(outputShapes), timing); } -V1_0::ErrorStatus legacyConvertResultCodeToErrorStatus(int resultCode) { - return convertToV1_0(convertResultCodeToErrorStatus(resultCode)); -} - // RequestChannelSender methods -std::pair, const FmqRequestDescriptor*> +nn::GeneralResult< + std::pair, const MQDescriptorSync*>> RequestChannelSender::create(size_t channelLength) { - std::unique_ptr fmqRequestChannel = - std::make_unique(channelLength, /*confEventFlag=*/true); - if (!fmqRequestChannel->isValid()) { - LOG(ERROR) << "Unable to create RequestChannelSender"; - return {nullptr, nullptr}; + auto requestChannelSender = + std::make_unique(PrivateConstructorTag{}, channelLength); + if (!requestChannelSender->mFmqRequestChannel.isValid()) { + return NN_ERROR() << "Unable to create RequestChannelSender"; } - const FmqRequestDescriptor* descriptor = fmqRequestChannel->getDesc(); - return std::make_pair(std::make_unique(std::move(fmqRequestChannel)), - descriptor); + const MQDescriptorSync* descriptor = + requestChannelSender->mFmqRequestChannel.getDesc(); + return std::make_pair(std::move(requestChannelSender), descriptor); } -RequestChannelSender::RequestChannelSender(std::unique_ptr fmqRequestChannel) - : mFmqRequestChannel(std::move(fmqRequestChannel)) {} +RequestChannelSender::RequestChannelSender(PrivateConstructorTag /*tag*/, size_t channelLength) + : mFmqRequestChannel(channelLength, /*configureEventFlagWord=*/true) {} -bool RequestChannelSender::send(const V1_0::Request& request, V1_2::MeasureTiming measure, - const std::vector& slots) { +nn::Result RequestChannelSender::send(const V1_0::Request& request, + V1_2::MeasureTiming measure, + const std::vector& slots) { const std::vector serialized = serialize(request, measure, slots); return sendPacket(serialized); } -bool RequestChannelSender::sendPacket(const std::vector& packet) { +nn::Result RequestChannelSender::sendPacket(const std::vector& packet) { if (!mValid) { - return false; + return NN_ERROR() << "FMQ object is invalid"; } - if (packet.size() > mFmqRequestChannel->availableToWrite()) { - LOG(ERROR) - << "RequestChannelSender::sendPacket -- packet size exceeds size available in FMQ"; - return false; + if (packet.size() > mFmqRequestChannel.availableToWrite()) { + return NN_ERROR() + << "RequestChannelSender::sendPacket -- packet size exceeds size available in FMQ"; } - // Always send the packet with "blocking" because this signals the futex and - // unblocks the consumer if it is waiting on the futex. - return mFmqRequestChannel->writeBlocking(packet.data(), packet.size()); + // Always send the packet with "blocking" because this signals the futex and unblocks the + // consumer if it is waiting on the futex. + const bool success = mFmqRequestChannel.writeBlocking(packet.data(), packet.size()); + if (!success) { + return NN_ERROR() + << "RequestChannelSender::sendPacket -- FMQ's writeBlocking returned an error"; + } + + return {}; } -void RequestChannelSender::invalidate() { +void RequestChannelSender::notifyAsDeadObject() { mValid = false; } // RequestChannelReceiver methods -std::unique_ptr RequestChannelReceiver::create( - const FmqRequestDescriptor& requestChannel, std::chrono::microseconds pollingTimeWindow) { - std::unique_ptr fmqRequestChannel = - std::make_unique(requestChannel); +nn::GeneralResult> RequestChannelReceiver::create( + const MQDescriptorSync& requestChannel, + std::chrono::microseconds pollingTimeWindow) { + auto requestChannelReceiver = std::make_unique( + PrivateConstructorTag{}, requestChannel, pollingTimeWindow); - if (!fmqRequestChannel->isValid()) { - LOG(ERROR) << "Unable to create RequestChannelReceiver"; - return nullptr; + if (!requestChannelReceiver->mFmqRequestChannel.isValid()) { + return NN_ERROR() << "Unable to create RequestChannelReceiver"; } - if (fmqRequestChannel->getEventFlagWord() == nullptr) { - LOG(ERROR) - << "RequestChannelReceiver::create was passed an MQDescriptor without an EventFlag"; - return nullptr; + if (requestChannelReceiver->mFmqRequestChannel.getEventFlagWord() == nullptr) { + return NN_ERROR() + << "RequestChannelReceiver::create was passed an MQDescriptor without an EventFlag"; } - return std::make_unique(std::move(fmqRequestChannel), - pollingTimeWindow); + return requestChannelReceiver; } -RequestChannelReceiver::RequestChannelReceiver(std::unique_ptr fmqRequestChannel, - std::chrono::microseconds pollingTimeWindow) - : mFmqRequestChannel(std::move(fmqRequestChannel)), kPollingTimeWindow(pollingTimeWindow) {} +RequestChannelReceiver::RequestChannelReceiver( + PrivateConstructorTag /*tag*/, const MQDescriptorSync& requestChannel, + std::chrono::microseconds pollingTimeWindow) + : mFmqRequestChannel(requestChannel), kPollingTimeWindow(pollingTimeWindow) {} -std::optional, V1_2::MeasureTiming>> +nn::Result, V1_2::MeasureTiming>> RequestChannelReceiver::getBlocking() { - const auto packet = getPacketBlocking(); - if (!packet) { - return std::nullopt; - } - - return deserialize(*packet); + const auto packet = NN_TRY(getPacketBlocking()); + return deserialize(packet); } void RequestChannelReceiver::invalidate() { mTeardown = true; // force unblock - // ExecutionBurstServer is by default waiting on a request packet. If the - // client process destroys its burst object, the server may still be waiting - // on the futex. This force unblock wakes up any thread waiting on the - // futex. - // TODO: look for a different/better way to signal/notify the futex to wake - // up any thread waiting on it - FmqRequestDatum datum; - datum.packetInformation({/*.packetSize=*/0, /*.numberOfInputOperands=*/0, - /*.numberOfOutputOperands=*/0, /*.numberOfPools=*/0}); - mFmqRequestChannel->writeBlocking(&datum, 1); + // ExecutionBurstServer is by default waiting on a request packet. If the client process + // destroys its burst object, the server may still be waiting on the futex. This force unblock + // wakes up any thread waiting on the futex. + const auto data = serialize(V1_0::Request{}, V1_2::MeasureTiming::NO, {}); + mFmqRequestChannel.writeBlocking(data.data(), data.size()); } -std::optional> RequestChannelReceiver::getPacketBlocking() { +nn::Result> RequestChannelReceiver::getPacketBlocking() { if (mTeardown) { - return std::nullopt; + return NN_ERROR() << "FMQ object is being torn down"; } - // First spend time polling if results are available in FMQ instead of - // waiting on the futex. Polling is more responsive (yielding lower - // latencies), but can take up more power, so only poll for a limited period - // of time. + // First spend time polling if results are available in FMQ instead of waiting on the futex. + // Polling is more responsive (yielding lower latencies), but can take up more power, so only + // poll for a limited period of time. auto& getCurrentTime = std::chrono::high_resolution_clock::now; const auto timeToStopPolling = getCurrentTime() + kPollingTimeWindow; @@ -522,173 +507,144 @@ std::optional> RequestChannelReceiver::getPacketBlo while (getCurrentTime() < timeToStopPolling) { // if class is being torn down, immediately return if (mTeardown.load(std::memory_order_relaxed)) { - return std::nullopt; + return NN_ERROR() << "FMQ object is being torn down"; } - // Check if data is available. If it is, immediately retrieve it and - // return. - const size_t available = mFmqRequestChannel->availableToRead(); + // Check if data is available. If it is, immediately retrieve it and return. + const size_t available = mFmqRequestChannel.availableToRead(); if (available > 0) { - // This is the first point when we know an execution is occurring, - // so begin to collect systraces. Note that a similar systrace does - // not exist at the corresponding point in - // ResultChannelReceiver::getPacketBlocking because the execution is - // already in flight. - NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, - "ExecutionBurstServer getting packet"); std::vector packet(available); - const bool success = mFmqRequestChannel->read(packet.data(), available); + const bool success = mFmqRequestChannel.readBlocking(packet.data(), available); if (!success) { - LOG(ERROR) << "Error receiving packet"; - return std::nullopt; + return NN_ERROR() << "Error receiving packet"; } - return std::make_optional(std::move(packet)); + return packet; } } - // If we get to this point, we either stopped polling because it was taking - // too long or polling was not allowed. Instead, perform a blocking call - // which uses a futex to save power. + // If we get to this point, we either stopped polling because it was taking too long or polling + // was not allowed. Instead, perform a blocking call which uses a futex to save power. // wait for request packet and read first element of request packet FmqRequestDatum datum; - bool success = mFmqRequestChannel->readBlocking(&datum, 1); - - // This is the first point when we know an execution is occurring, so begin - // to collect systraces. Note that a similar systrace does not exist at the - // corresponding point in ResultChannelReceiver::getPacketBlocking because - // the execution is already in flight. - NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION, "ExecutionBurstServer getting packet"); + bool success = mFmqRequestChannel.readBlocking(&datum, 1); // retrieve remaining elements - // NOTE: all of the data is already available at this point, so there's no - // need to do a blocking wait to wait for more data. This is known because - // in FMQ, all writes are published (made available) atomically. Currently, - // the producer always publishes the entire packet in one function call, so - // if the first element of the packet is available, the remaining elements - // are also available. - const size_t count = mFmqRequestChannel->availableToRead(); + // NOTE: all of the data is already available at this point, so there's no need to do a blocking + // wait to wait for more data. This is known because in FMQ, all writes are published (made + // available) atomically. Currently, the producer always publishes the entire packet in one + // function call, so if the first element of the packet is available, the remaining elements are + // also available. + const size_t count = mFmqRequestChannel.availableToRead(); std::vector packet(count + 1); std::memcpy(&packet.front(), &datum, sizeof(datum)); - success &= mFmqRequestChannel->read(packet.data() + 1, count); + success &= mFmqRequestChannel.read(packet.data() + 1, count); // terminate loop if (mTeardown) { - return std::nullopt; + return NN_ERROR() << "FMQ object is being torn down"; } // ensure packet was successfully received if (!success) { - LOG(ERROR) << "Error receiving packet"; - return std::nullopt; + return NN_ERROR() << "Error receiving packet"; } - return std::make_optional(std::move(packet)); + return packet; } // ResultChannelSender methods -std::unique_ptr ResultChannelSender::create( - const FmqResultDescriptor& resultChannel) { - std::unique_ptr fmqResultChannel = - std::make_unique(resultChannel); +nn::GeneralResult> ResultChannelSender::create( + const MQDescriptorSync& resultChannel) { + auto resultChannelSender = + std::make_unique(PrivateConstructorTag{}, resultChannel); - if (!fmqResultChannel->isValid()) { - LOG(ERROR) << "Unable to create RequestChannelSender"; - return nullptr; + if (!resultChannelSender->mFmqResultChannel.isValid()) { + return NN_ERROR() << "Unable to create RequestChannelSender"; } - if (fmqResultChannel->getEventFlagWord() == nullptr) { - LOG(ERROR) << "ResultChannelSender::create was passed an MQDescriptor without an EventFlag"; - return nullptr; + if (resultChannelSender->mFmqResultChannel.getEventFlagWord() == nullptr) { + return NN_ERROR() + << "ResultChannelSender::create was passed an MQDescriptor without an EventFlag"; } - return std::make_unique(std::move(fmqResultChannel)); + return resultChannelSender; } -ResultChannelSender::ResultChannelSender(std::unique_ptr fmqResultChannel) - : mFmqResultChannel(std::move(fmqResultChannel)) {} +ResultChannelSender::ResultChannelSender(PrivateConstructorTag /*tag*/, + const MQDescriptorSync& resultChannel) + : mFmqResultChannel(resultChannel) {} -bool ResultChannelSender::send(V1_0::ErrorStatus errorStatus, +void ResultChannelSender::send(V1_0::ErrorStatus errorStatus, const std::vector& outputShapes, V1_2::Timing timing) { const std::vector serialized = serialize(errorStatus, outputShapes, timing); - return sendPacket(serialized); + sendPacket(serialized); } -bool ResultChannelSender::sendPacket(const std::vector& packet) { - if (packet.size() > mFmqResultChannel->availableToWrite()) { +void ResultChannelSender::sendPacket(const std::vector& packet) { + if (packet.size() > mFmqResultChannel.availableToWrite()) { LOG(ERROR) << "ResultChannelSender::sendPacket -- packet size exceeds size available in FMQ"; const std::vector errorPacket = serialize(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming); - // Always send the packet with "blocking" because this signals the futex - // and unblocks the consumer if it is waiting on the futex. - return mFmqResultChannel->writeBlocking(errorPacket.data(), errorPacket.size()); + // Always send the packet with "blocking" because this signals the futex and unblocks the + // consumer if it is waiting on the futex. + mFmqResultChannel.writeBlocking(errorPacket.data(), errorPacket.size()); + } else { + // Always send the packet with "blocking" because this signals the futex and unblocks the + // consumer if it is waiting on the futex. + mFmqResultChannel.writeBlocking(packet.data(), packet.size()); } - - // Always send the packet with "blocking" because this signals the futex and - // unblocks the consumer if it is waiting on the futex. - return mFmqResultChannel->writeBlocking(packet.data(), packet.size()); } // ResultChannelReceiver methods -std::pair, const FmqResultDescriptor*> +nn::GeneralResult< + std::pair, const MQDescriptorSync*>> ResultChannelReceiver::create(size_t channelLength, std::chrono::microseconds pollingTimeWindow) { - std::unique_ptr fmqResultChannel = - std::make_unique(channelLength, /*confEventFlag=*/true); - if (!fmqResultChannel->isValid()) { - LOG(ERROR) << "Unable to create ResultChannelReceiver"; - return {nullptr, nullptr}; + auto resultChannelReceiver = std::make_unique( + PrivateConstructorTag{}, channelLength, pollingTimeWindow); + if (!resultChannelReceiver->mFmqResultChannel.isValid()) { + return NN_ERROR() << "Unable to create ResultChannelReceiver"; } - const FmqResultDescriptor* descriptor = fmqResultChannel->getDesc(); - return std::make_pair( - std::make_unique(std::move(fmqResultChannel), pollingTimeWindow), - descriptor); + const MQDescriptorSync* descriptor = + resultChannelReceiver->mFmqResultChannel.getDesc(); + return std::make_pair(std::move(resultChannelReceiver), descriptor); } -ResultChannelReceiver::ResultChannelReceiver(std::unique_ptr fmqResultChannel, +ResultChannelReceiver::ResultChannelReceiver(PrivateConstructorTag /*tag*/, size_t channelLength, std::chrono::microseconds pollingTimeWindow) - : mFmqResultChannel(std::move(fmqResultChannel)), kPollingTimeWindow(pollingTimeWindow) {} + : mFmqResultChannel(channelLength, /*configureEventFlagWord=*/true), + kPollingTimeWindow(pollingTimeWindow) {} -std::optional, V1_2::Timing>> +nn::Result, V1_2::Timing>> ResultChannelReceiver::getBlocking() { - const auto packet = getPacketBlocking(); - if (!packet) { - return std::nullopt; - } - - return deserialize(*packet); + const auto packet = NN_TRY(getPacketBlocking()); + return deserialize(packet); } -void ResultChannelReceiver::invalidate() { +void ResultChannelReceiver::notifyAsDeadObject() { mValid = false; // force unblock - // ExecutionBurstController waits on a result packet after sending a - // request. If the driver containing ExecutionBurstServer crashes, the - // controller may be waiting on the futex. This force unblock wakes up any - // thread waiting on the futex. - // TODO: look for a different/better way to signal/notify the futex to - // wake up any thread waiting on it - FmqResultDatum datum; - datum.packetInformation({/*.packetSize=*/0, - /*.errorStatus=*/V1_0::ErrorStatus::GENERAL_FAILURE, - /*.numberOfOperands=*/0}); - mFmqResultChannel->writeBlocking(&datum, 1); + // ExecutionBurstController waits on a result packet after sending a request. If the driver + // containing ExecutionBurstServer crashes, the controller may be waiting on the futex. This + // force unblock wakes up any thread waiting on the futex. + const auto data = serialize(V1_0::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming); + mFmqResultChannel.writeBlocking(data.data(), data.size()); } -std::optional> ResultChannelReceiver::getPacketBlocking() { +nn::Result> ResultChannelReceiver::getPacketBlocking() { if (!mValid) { - return std::nullopt; + return NN_ERROR() << "FMQ object is invalid"; } - // First spend time polling if results are available in FMQ instead of - // waiting on the futex. Polling is more responsive (yielding lower - // latencies), but can take up more power, so only poll for a limited period - // of time. + // First spend time polling if results are available in FMQ instead of waiting on the futex. + // Polling is more responsive (yielding lower latencies), but can take up more power, so only + // poll for a limited period of time. auto& getCurrentTime = std::chrono::high_resolution_clock::now; const auto timeToStopPolling = getCurrentTime() + kPollingTimeWindow; @@ -696,54 +652,49 @@ std::optional> ResultChannelReceiver::getPacketBlock while (getCurrentTime() < timeToStopPolling) { // if class is being torn down, immediately return if (!mValid.load(std::memory_order_relaxed)) { - return std::nullopt; + return NN_ERROR() << "FMQ object is invalid"; } - // Check if data is available. If it is, immediately retrieve it and - // return. - const size_t available = mFmqResultChannel->availableToRead(); + // Check if data is available. If it is, immediately retrieve it and return. + const size_t available = mFmqResultChannel.availableToRead(); if (available > 0) { std::vector packet(available); - const bool success = mFmqResultChannel->read(packet.data(), available); + const bool success = mFmqResultChannel.readBlocking(packet.data(), available); if (!success) { - LOG(ERROR) << "Error receiving packet"; - return std::nullopt; + return NN_ERROR() << "Error receiving packet"; } - return std::make_optional(std::move(packet)); + return packet; } } - // If we get to this point, we either stopped polling because it was taking - // too long or polling was not allowed. Instead, perform a blocking call - // which uses a futex to save power. + // If we get to this point, we either stopped polling because it was taking too long or polling + // was not allowed. Instead, perform a blocking call which uses a futex to save power. // wait for result packet and read first element of result packet FmqResultDatum datum; - bool success = mFmqResultChannel->readBlocking(&datum, 1); + bool success = mFmqResultChannel.readBlocking(&datum, 1); // retrieve remaining elements - // NOTE: all of the data is already available at this point, so there's no - // need to do a blocking wait to wait for more data. This is known because - // in FMQ, all writes are published (made available) atomically. Currently, - // the producer always publishes the entire packet in one function call, so - // if the first element of the packet is available, the remaining elements - // are also available. - const size_t count = mFmqResultChannel->availableToRead(); + // NOTE: all of the data is already available at this point, so there's no need to do a blocking + // wait to wait for more data. This is known because in FMQ, all writes are published (made + // available) atomically. Currently, the producer always publishes the entire packet in one + // function call, so if the first element of the packet is available, the remaining elements are + // also available. + const size_t count = mFmqResultChannel.availableToRead(); std::vector packet(count + 1); std::memcpy(&packet.front(), &datum, sizeof(datum)); - success &= mFmqResultChannel->read(packet.data() + 1, count); + success &= mFmqResultChannel.read(packet.data() + 1, count); if (!mValid) { - return std::nullopt; + return NN_ERROR() << "FMQ object is invalid"; } // ensure packet was successfully received if (!success) { - LOG(ERROR) << "Error receiving packet"; - return std::nullopt; + return NN_ERROR() << "Error receiving packet"; } - return std::make_optional(std::move(packet)); + return packet; } } // namespace android::hardware::neuralnetworks::V1_2::utils diff --git a/neuralnetworks/1.2/utils/src/PreparedModel.cpp b/neuralnetworks/1.2/utils/src/PreparedModel.cpp index 6841c5e007..71a4ea872b 100644 --- a/neuralnetworks/1.2/utils/src/PreparedModel.cpp +++ b/neuralnetworks/1.2/utils/src/PreparedModel.cpp @@ -18,6 +18,8 @@ #include "Callbacks.h" #include "Conversions.h" +#include "ExecutionBurstController.h" +#include "ExecutionBurstUtils.h" #include "Utils.h" #include @@ -27,12 +29,12 @@ #include #include #include -#include #include #include #include #include +#include #include #include #include @@ -119,7 +121,14 @@ PreparedModel::executeFenced(const nn::Request& /*request*/, } nn::GeneralResult PreparedModel::configureExecutionBurst() const { - return V1_0::utils::Burst::create(shared_from_this()); + auto self = shared_from_this(); + auto fallback = [preparedModel = std::move(self)](const nn::Request& request, + nn::MeasureTiming measure) + -> nn::ExecutionResult, nn::Timing>> { + return preparedModel->execute(request, measure, {}, {}); + }; + const auto pollingTimeWindow = getBurstControllerPollingTimeWindow(); + return ExecutionBurstController::create(kPreparedModel, std::move(fallback), pollingTimeWindow); } std::any PreparedModel::getUnderlyingResource() const { diff --git a/neuralnetworks/1.3/utils/Android.bp b/neuralnetworks/1.3/utils/Android.bp index 2b1dcc40bb..28c036a8ea 100644 --- a/neuralnetworks/1.3/utils/Android.bp +++ b/neuralnetworks/1.3/utils/Android.bp @@ -42,6 +42,7 @@ cc_library_static { "android.hardware.neuralnetworks@1.1", "android.hardware.neuralnetworks@1.2", "android.hardware.neuralnetworks@1.3", + "libfmq", ], export_static_lib_headers: [ "neuralnetworks_utils_hal_common", diff --git a/neuralnetworks/1.3/utils/include/nnapi/hal/1.3/Conversions.h b/neuralnetworks/1.3/utils/include/nnapi/hal/1.3/Conversions.h index 8e1cdb82c9..b677c62505 100644 --- a/neuralnetworks/1.3/utils/include/nnapi/hal/1.3/Conversions.h +++ b/neuralnetworks/1.3/utils/include/nnapi/hal/1.3/Conversions.h @@ -59,7 +59,6 @@ GeneralResult convert( GeneralResult convert(const hal::V1_3::ErrorStatus& errorStatus); GeneralResult convert(const hardware::hidl_handle& handle); -GeneralResult convert(const hardware::hidl_memory& memory); GeneralResult> convert( const hardware::hidl_vec& bufferRoles); diff --git a/neuralnetworks/1.3/utils/src/Conversions.cpp b/neuralnetworks/1.3/utils/src/Conversions.cpp index 320c74c2c6..9788fe1b9d 100644 --- a/neuralnetworks/1.3/utils/src/Conversions.cpp +++ b/neuralnetworks/1.3/utils/src/Conversions.cpp @@ -352,10 +352,6 @@ GeneralResult convert(const hardware::hidl_handle& handle) { return validatedConvert(handle); } -GeneralResult convert(const hardware::hidl_memory& memory) { - return validatedConvert(memory); -} - GeneralResult> convert( const hardware::hidl_vec& bufferRoles) { return validatedConvert(bufferRoles); diff --git a/neuralnetworks/1.3/utils/src/PreparedModel.cpp b/neuralnetworks/1.3/utils/src/PreparedModel.cpp index 725e4f546a..64275a3729 100644 --- a/neuralnetworks/1.3/utils/src/PreparedModel.cpp +++ b/neuralnetworks/1.3/utils/src/PreparedModel.cpp @@ -29,8 +29,9 @@ #include #include #include -#include #include +#include +#include #include #include #include @@ -199,7 +200,15 @@ PreparedModel::executeFenced(const nn::Request& request, const std::vector PreparedModel::configureExecutionBurst() const { - return V1_0::utils::Burst::create(shared_from_this()); + auto self = shared_from_this(); + auto fallback = [preparedModel = std::move(self)](const nn::Request& request, + nn::MeasureTiming measure) + -> nn::ExecutionResult, nn::Timing>> { + return preparedModel->execute(request, measure, {}, {}); + }; + const auto pollingTimeWindow = V1_2::utils::getBurstControllerPollingTimeWindow(); + return V1_2::utils::ExecutionBurstController::create(kPreparedModel, std::move(fallback), + pollingTimeWindow); } std::any PreparedModel::getUnderlyingResource() const { diff --git a/neuralnetworks/utils/common/include/nnapi/hal/ProtectCallback.h b/neuralnetworks/utils/common/include/nnapi/hal/ProtectCallback.h index c9218857ac..05110bc364 100644 --- a/neuralnetworks/utils/common/include/nnapi/hal/ProtectCallback.h +++ b/neuralnetworks/utils/common/include/nnapi/hal/ProtectCallback.h @@ -56,7 +56,7 @@ class IProtectedCallback { // Thread safe class class DeathRecipient final : public hidl_death_recipient { public: - void serviceDied(uint64_t /*cookie*/, const wp& /*who*/) override; + void serviceDied(uint64_t cookie, const wp& who) override; // Precondition: `killable` must be non-null. void add(IProtectedCallback* killable) const; // Precondition: `killable` must be non-null. @@ -64,6 +64,7 @@ class DeathRecipient final : public hidl_death_recipient { private: mutable std::mutex mMutex; + mutable bool mIsDeadObject GUARDED_BY(mMutex) = false; mutable std::vector mObjects GUARDED_BY(mMutex); }; @@ -78,14 +79,21 @@ class DeathHandler final { ~DeathHandler(); using Cleanup = std::function; + using Hold = base::ScopeGuard; + // Precondition: `killable` must be non-null. - [[nodiscard]] base::ScopeGuard protectCallback(IProtectedCallback* killable) const; + // `killable` must outlive the return value `Hold`. + [[nodiscard]] Hold protectCallback(IProtectedCallback* killable) const; + + // Precondition: `killable` must be non-null. + // `killable` must outlive the `DeathHandler`. + void protectCallbackForLifetimeOfDeathHandler(IProtectedCallback* killable) const; private: DeathHandler(sp object, sp deathRecipient); - sp kObject; - sp kDeathRecipient; + sp mObject; + sp mDeathRecipient; }; } // namespace android::hardware::neuralnetworks::utils diff --git a/neuralnetworks/utils/common/src/ProtectCallback.cpp b/neuralnetworks/utils/common/src/ProtectCallback.cpp index abe4cb675e..18e1f3bf0b 100644 --- a/neuralnetworks/utils/common/src/ProtectCallback.cpp +++ b/neuralnetworks/utils/common/src/ProtectCallback.cpp @@ -35,19 +35,25 @@ void DeathRecipient::serviceDied(uint64_t /*cookie*/, const wpnotifyAsDeadObject(); }); + mObjects.clear(); + mIsDeadObject = true; } void DeathRecipient::add(IProtectedCallback* killable) const { CHECK(killable != nullptr); std::lock_guard guard(mMutex); - mObjects.push_back(killable); + if (mIsDeadObject) { + killable->notifyAsDeadObject(); + } else { + mObjects.push_back(killable); + } } void DeathRecipient::remove(IProtectedCallback* killable) const { CHECK(killable != nullptr); std::lock_guard guard(mMutex); - const auto removedIter = std::remove(mObjects.begin(), mObjects.end(), killable); - mObjects.erase(removedIter); + const auto newEnd = std::remove(mObjects.begin(), mObjects.end(), killable); + mObjects.erase(newEnd, mObjects.end()); } nn::GeneralResult DeathHandler::create(sp object) { @@ -67,19 +73,16 @@ nn::GeneralResult DeathHandler::create(sp } DeathHandler::DeathHandler(sp object, sp deathRecipient) - : kObject(std::move(object)), kDeathRecipient(std::move(deathRecipient)) { - CHECK(kObject != nullptr); - CHECK(kDeathRecipient != nullptr); + : mObject(std::move(object)), mDeathRecipient(std::move(deathRecipient)) { + CHECK(mObject != nullptr); + CHECK(mDeathRecipient != nullptr); } DeathHandler::~DeathHandler() { - if (kObject != nullptr && kDeathRecipient != nullptr) { - const auto ret = kObject->unlinkToDeath(kDeathRecipient); - const auto maybeSuccess = handleTransportError(ret); - if (!maybeSuccess.has_value()) { - LOG(ERROR) << maybeSuccess.error().message; - } else if (!maybeSuccess.value()) { - LOG(ERROR) << "IBase::linkToDeath returned false"; + if (mObject != nullptr && mDeathRecipient != nullptr) { + const auto successful = mObject->unlinkToDeath(mDeathRecipient).isOk(); + if (!successful) { + LOG(ERROR) << "IBase::linkToDeath failed"; } } } @@ -87,9 +90,14 @@ DeathHandler::~DeathHandler() { [[nodiscard]] base::ScopeGuard DeathHandler::protectCallback( IProtectedCallback* killable) const { CHECK(killable != nullptr); - kDeathRecipient->add(killable); + mDeathRecipient->add(killable); return base::make_scope_guard( - [deathRecipient = kDeathRecipient, killable] { deathRecipient->remove(killable); }); + [deathRecipient = mDeathRecipient, killable] { deathRecipient->remove(killable); }); +} + +void DeathHandler::protectCallbackForLifetimeOfDeathHandler(IProtectedCallback* killable) const { + CHECK(killable != nullptr); + mDeathRecipient->add(killable); } } // namespace android::hardware::neuralnetworks::utils