diff --git a/compatibility_matrices/compatibility_matrix.current.xml b/compatibility_matrices/compatibility_matrix.current.xml
index fd231762f0..f1078cd44f 100644
--- a/compatibility_matrices/compatibility_matrix.current.xml
+++ b/compatibility_matrices/compatibility_matrix.current.xml
@@ -378,6 +378,13 @@
             <regex-instance>.*</regex-instance>
         </interface>
     </hal>
+    <hal format="aidl" optional="true">
+        <name>android.hardware.neuralnetworks</name>
+        <interface>
+            <name>IDevice</name>
+            <regex-instance>.*</regex-instance>
+        </interface>
+    </hal>
     <hal format="hidl" optional="true">
         <name>android.hardware.nfc</name>
         <version>1.2</version>
diff --git a/neuralnetworks/1.0/utils/include/nnapi/hal/1.0/Utils.h b/neuralnetworks/1.0/utils/include/nnapi/hal/1.0/Utils.h
index 4cec545cf0..b695f48550 100644
--- a/neuralnetworks/1.0/utils/include/nnapi/hal/1.0/Utils.h
+++ b/neuralnetworks/1.0/utils/include/nnapi/hal/1.0/Utils.h
@@ -44,6 +44,12 @@ bool valid(const Type& halObject) {
     return result.has_value();
 }
 
+template <typename Type>
+auto convertFromNonCanonical(const Type& nonCanonicalObject)
+        -> decltype(convert(nn::convert(nonCanonicalObject).value())) {
+    return convert(NN_TRY(nn::convert(nonCanonicalObject)));
+}
+
 }  // namespace android::hardware::neuralnetworks::V1_0::utils
 
 #endif  // ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_0_UTILS_H
diff --git a/neuralnetworks/1.1/utils/include/nnapi/hal/1.1/Utils.h b/neuralnetworks/1.1/utils/include/nnapi/hal/1.1/Utils.h
index 052d88e922..09597a31f8 100644
--- a/neuralnetworks/1.1/utils/include/nnapi/hal/1.1/Utils.h
+++ b/neuralnetworks/1.1/utils/include/nnapi/hal/1.1/Utils.h
@@ -47,6 +47,12 @@ bool valid(const Type& halObject) {
     return result.has_value();
 }
 
+template <typename Type>
+auto convertFromNonCanonical(const Type& nonCanonicalObject)
+        -> decltype(convert(nn::convert(nonCanonicalObject).value())) {
+    return convert(NN_TRY(nn::convert(nonCanonicalObject)));
+}
+
 }  // namespace android::hardware::neuralnetworks::V1_1::utils
 
 #endif  // ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_1_UTILS_H
diff --git a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/Utils.h b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/Utils.h
index c289fc89ab..323311439f 100644
--- a/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/Utils.h
+++ b/neuralnetworks/1.2/utils/include/nnapi/hal/1.2/Utils.h
@@ -54,6 +54,12 @@ bool valid(const Type& halObject) {
     return result.has_value();
 }
 
+template <typename Type>
+auto convertFromNonCanonical(const Type& nonCanonicalObject)
+        -> decltype(convert(nn::convert(nonCanonicalObject).value())) {
+    return convert(NN_TRY(nn::convert(nonCanonicalObject)));
+}
+
 }  // namespace android::hardware::neuralnetworks::V1_2::utils
 
 #endif  // ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_H
diff --git a/neuralnetworks/1.2/utils/src/Callbacks.cpp b/neuralnetworks/1.2/utils/src/Callbacks.cpp
index fefa122101..9f54bb12dd 100644
--- a/neuralnetworks/1.2/utils/src/Callbacks.cpp
+++ b/neuralnetworks/1.2/utils/src/Callbacks.cpp
@@ -43,6 +43,15 @@
 namespace android::hardware::neuralnetworks::V1_2::utils {
 namespace {
 
+nn::GeneralResult<nn::SharedPreparedModel> prepareModelCallback(
+        V1_0::ErrorStatus status, const sp<V1_0::IPreparedModel>& preparedModel) {
+    if (const auto dynamicPreparedModel =
+                V1_2::IPreparedModel::castFrom(preparedModel).withDefault(nullptr)) {
+        return V1_2::utils::prepareModelCallback(status, dynamicPreparedModel);
+    }
+    return V1_0::utils::prepareModelCallback(status, preparedModel);
+}
+
 nn::GeneralResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>>
 convertExecutionGeneralResultsHelper(const hidl_vec<OutputShape>& outputShapes,
                                      const Timing& timing) {
@@ -72,7 +81,7 @@ nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> executi
 
 Return<void> PreparedModelCallback::notify(V1_0::ErrorStatus status,
                                            const sp<V1_0::IPreparedModel>& preparedModel) {
-    mData.put(V1_0::utils::prepareModelCallback(status, preparedModel));
+    mData.put(prepareModelCallback(status, preparedModel));
     return Void();
 }
 
diff --git a/neuralnetworks/1.3/utils/include/nnapi/hal/1.3/Utils.h b/neuralnetworks/1.3/utils/include/nnapi/hal/1.3/Utils.h
index 29b0c806ff..3ce412cde6 100644
--- a/neuralnetworks/1.3/utils/include/nnapi/hal/1.3/Utils.h
+++ b/neuralnetworks/1.3/utils/include/nnapi/hal/1.3/Utils.h
@@ -49,6 +49,12 @@ bool valid(const Type& halObject) {
     return result.has_value();
 }
 
+template <typename Type>
+auto convertFromNonCanonical(const Type& nonCanonicalObject)
+        -> decltype(convert(nn::convert(nonCanonicalObject).value())) {
+    return convert(NN_TRY(nn::convert(nonCanonicalObject)));
+}
+
 }  // namespace android::hardware::neuralnetworks::V1_3::utils
 
 #endif  // ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_3_UTILS_H
diff --git a/neuralnetworks/1.3/utils/src/Callbacks.cpp b/neuralnetworks/1.3/utils/src/Callbacks.cpp
index af76e6a87e..8e9fb833b1 100644
--- a/neuralnetworks/1.3/utils/src/Callbacks.cpp
+++ b/neuralnetworks/1.3/utils/src/Callbacks.cpp
@@ -28,6 +28,7 @@
 #include <nnapi/IPreparedModel.h>
 #include <nnapi/Result.h>
 #include <nnapi/Types.h>
+#include <nnapi/hal/1.0/Callbacks.h>
 #include <nnapi/hal/1.0/Conversions.h>
 #include <nnapi/hal/1.0/PreparedModel.h>
 #include <nnapi/hal/1.2/Callbacks.h>
@@ -46,6 +47,20 @@
 namespace android::hardware::neuralnetworks::V1_3::utils {
 namespace {
 
+nn::GeneralResult<nn::SharedPreparedModel> prepareModelCallback(
+        V1_0::ErrorStatus status, const sp<V1_0::IPreparedModel>& preparedModel) {
+    if (const auto dynamicPreparedModel =
+                V1_3::IPreparedModel::castFrom(preparedModel).withDefault(nullptr)) {
+        const auto currentVersionStatus = NN_TRY(convertFromNonCanonical(status));
+        return V1_3::utils::prepareModelCallback(currentVersionStatus, dynamicPreparedModel);
+    }
+    if (const auto dynamicPreparedModel =
+                V1_2::IPreparedModel::castFrom(preparedModel).withDefault(nullptr)) {
+        return V1_2::utils::prepareModelCallback(status, dynamicPreparedModel);
+    }
+    return V1_0::utils::prepareModelCallback(status, preparedModel);
+}
+
 nn::GeneralResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>>
 convertExecutionGeneralResultsHelper(const hidl_vec<V1_2::OutputShape>& outputShapes,
                                      const V1_2::Timing& timing) {
@@ -82,13 +97,13 @@ nn::ExecutionResult<std::pair<std::vector<nn::OutputShape>, nn::Timing>> executi
 
 Return<void> PreparedModelCallback::notify(V1_0::ErrorStatus status,
                                            const sp<V1_0::IPreparedModel>& preparedModel) {
-    mData.put(V1_0::utils::prepareModelCallback(status, preparedModel));
+    mData.put(prepareModelCallback(status, preparedModel));
     return Void();
 }
 
 Return<void> PreparedModelCallback::notify_1_2(V1_0::ErrorStatus status,
                                                const sp<V1_2::IPreparedModel>& preparedModel) {
-    mData.put(V1_2::utils::prepareModelCallback(status, preparedModel));
+    mData.put(prepareModelCallback(status, preparedModel));
     return Void();
 }
 
diff --git a/neuralnetworks/1.3/vts/functional/Android.bp b/neuralnetworks/1.3/vts/functional/Android.bp
index b17d44559b..ee753bb951 100644
--- a/neuralnetworks/1.3/vts/functional/Android.bp
+++ b/neuralnetworks/1.3/vts/functional/Android.bp
@@ -57,6 +57,7 @@ cc_test {
         "VtsHalNeuralNetworksV1_0_utils",
         "VtsHalNeuralNetworksV1_2_utils",
         "VtsHalNeuralNetworksV1_3_utils",
+        "android.hardware.neuralnetworks-V1-ndk_platform",
         "android.hardware.neuralnetworks@1.0",
         "android.hardware.neuralnetworks@1.1",
         "android.hardware.neuralnetworks@1.2",
diff --git a/neuralnetworks/aidl/Android.bp b/neuralnetworks/aidl/Android.bp
new file mode 100644
index 0000000000..0557e43a5a
--- /dev/null
+++ b/neuralnetworks/aidl/Android.bp
@@ -0,0 +1,27 @@
+aidl_interface {
+    name: "android.hardware.neuralnetworks",
+    vendor_available: true,
+    srcs: [
+        "android/hardware/neuralnetworks/*.aidl",
+    ],
+    stability: "vintf",
+    imports: [
+        "android.hardware.common",
+    ],
+    backend: {
+        java: {
+            enabled: false,
+        },
+        cpp: {
+            enabled: false,
+        },
+        ndk: {
+            apex_available: [
+                "//apex_available:platform",
+                "com.android.neuralnetworks",
+                "test_com.android.neuralnetworks",
+            ],
+            min_sdk_version: "30",
+        },
+    },
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/BufferDesc.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/BufferDesc.aidl
new file mode 100644
index 0000000000..2074a2ad4d
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/BufferDesc.aidl
@@ -0,0 +1,22 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable BufferDesc {
+  int[] dimensions;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/BufferRole.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/BufferRole.aidl
new file mode 100644
index 0000000000..97f748bcf8
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/BufferRole.aidl
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable BufferRole {
+  int modelIndex;
+  int ioIndex;
+  float frequency;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Capabilities.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Capabilities.aidl
new file mode 100644
index 0000000000..31afafc7df
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Capabilities.aidl
@@ -0,0 +1,26 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable Capabilities {
+  android.hardware.neuralnetworks.PerformanceInfo relaxedFloat32toFloat16PerformanceScalar;
+  android.hardware.neuralnetworks.PerformanceInfo relaxedFloat32toFloat16PerformanceTensor;
+  android.hardware.neuralnetworks.OperandPerformance[] operandPerformance;
+  android.hardware.neuralnetworks.PerformanceInfo ifPerformance;
+  android.hardware.neuralnetworks.PerformanceInfo whilePerformance;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/DataLocation.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/DataLocation.aidl
new file mode 100644
index 0000000000..5b03ba038e
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/DataLocation.aidl
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable DataLocation {
+  int poolIndex;
+  long offset;
+  long length;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/DeviceBuffer.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/DeviceBuffer.aidl
new file mode 100644
index 0000000000..9cff6db999
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/DeviceBuffer.aidl
@@ -0,0 +1,23 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable DeviceBuffer {
+  android.hardware.neuralnetworks.IBuffer buffer;
+  int token;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/DeviceType.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/DeviceType.aidl
new file mode 100644
index 0000000000..dd4dae7d0e
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/DeviceType.aidl
@@ -0,0 +1,25 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@Backing(type="int") @VintfStability
+enum DeviceType {
+  OTHER = 1,
+  CPU = 2,
+  GPU = 3,
+  ACCELERATOR = 4,
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/ErrorStatus.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/ErrorStatus.aidl
new file mode 100644
index 0000000000..ba18c3801e
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/ErrorStatus.aidl
@@ -0,0 +1,30 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@Backing(type="int") @VintfStability
+enum ErrorStatus {
+  NONE = 0,
+  DEVICE_UNAVAILABLE = 1,
+  GENERAL_FAILURE = 2,
+  OUTPUT_INSUFFICIENT_SIZE = 3,
+  INVALID_ARGUMENT = 4,
+  MISSED_DEADLINE_TRANSIENT = 5,
+  MISSED_DEADLINE_PERSISTENT = 6,
+  RESOURCE_EXHAUSTED_TRANSIENT = 7,
+  RESOURCE_EXHAUSTED_PERSISTENT = 8,
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/ExecutionPreference.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/ExecutionPreference.aidl
new file mode 100644
index 0000000000..cccae5403d
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/ExecutionPreference.aidl
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@Backing(type="int") @VintfStability
+enum ExecutionPreference {
+  LOW_POWER = 0,
+  FAST_SINGLE_ANSWER = 1,
+  SUSTAINED_SPEED = 2,
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/ExecutionResult.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/ExecutionResult.aidl
new file mode 100644
index 0000000000..c17ddb9116
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/ExecutionResult.aidl
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable ExecutionResult {
+  boolean outputSufficientSize;
+  android.hardware.neuralnetworks.OutputShape[] outputShapes;
+  android.hardware.neuralnetworks.Timing timing;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Extension.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Extension.aidl
new file mode 100644
index 0000000000..9eb8896af7
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Extension.aidl
@@ -0,0 +1,23 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable Extension {
+  String name;
+  android.hardware.neuralnetworks.ExtensionOperandTypeInformation[] operandTypes;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/ExtensionNameAndPrefix.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/ExtensionNameAndPrefix.aidl
new file mode 100644
index 0000000000..a271a63128
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/ExtensionNameAndPrefix.aidl
@@ -0,0 +1,23 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable ExtensionNameAndPrefix {
+  String name;
+  char prefix;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/ExtensionOperandTypeInformation.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/ExtensionOperandTypeInformation.aidl
new file mode 100644
index 0000000000..d1c3f099b0
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/ExtensionOperandTypeInformation.aidl
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable ExtensionOperandTypeInformation {
+  char type;
+  boolean isTensor;
+  int byteSize;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/FusedActivationFunc.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/FusedActivationFunc.aidl
new file mode 100644
index 0000000000..ddd3c2abd7
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/FusedActivationFunc.aidl
@@ -0,0 +1,25 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@Backing(type="int") @VintfStability
+enum FusedActivationFunc {
+  NONE = 0,
+  RELU = 1,
+  RELU1 = 2,
+  RELU6 = 3,
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IBuffer.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IBuffer.aidl
new file mode 100644
index 0000000000..a297a6bb31
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IBuffer.aidl
@@ -0,0 +1,23 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+interface IBuffer {
+  void copyFrom(in android.hardware.neuralnetworks.Memory src, in int[] dimensions);
+  void copyTo(in android.hardware.neuralnetworks.Memory dst);
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IDevice.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IDevice.aidl
new file mode 100644
index 0000000000..38fda16b56
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IDevice.aidl
@@ -0,0 +1,36 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+interface IDevice {
+  android.hardware.neuralnetworks.DeviceBuffer allocate(in android.hardware.neuralnetworks.BufferDesc desc, in android.hardware.neuralnetworks.IPreparedModelParcel[] preparedModels, in android.hardware.neuralnetworks.BufferRole[] inputRoles, in android.hardware.neuralnetworks.BufferRole[] outputRoles);
+  android.hardware.neuralnetworks.Capabilities getCapabilities();
+  android.hardware.neuralnetworks.NumberOfCacheFiles getNumberOfCacheFilesNeeded();
+  android.hardware.neuralnetworks.Extension[] getSupportedExtensions();
+  boolean[] getSupportedOperations(in android.hardware.neuralnetworks.Model model);
+  android.hardware.neuralnetworks.DeviceType getType();
+  String getVersionString();
+  void prepareModel(in android.hardware.neuralnetworks.Model model, in android.hardware.neuralnetworks.ExecutionPreference preference, in android.hardware.neuralnetworks.Priority priority, in long deadline, in ParcelFileDescriptor[] modelCache, in ParcelFileDescriptor[] dataCache, in byte[] token, in android.hardware.neuralnetworks.IPreparedModelCallback callback);
+  void prepareModelFromCache(in long deadline, in ParcelFileDescriptor[] modelCache, in ParcelFileDescriptor[] dataCache, in byte[] token, in android.hardware.neuralnetworks.IPreparedModelCallback callback);
+  const int BYTE_SIZE_OF_CACHE_TOKEN = 32;
+  const int MAX_NUMBER_OF_CACHE_FILES = 32;
+  const int EXTENSION_TYPE_HIGH_BITS_PREFIX = 15;
+  const int EXTENSION_TYPE_LOW_BITS_TYPE = 16;
+  const int OPERAND_TYPE_BASE_MAX = 65535;
+  const int OPERATION_TYPE_BASE_MAX = 65535;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IFencedExecutionCallback.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IFencedExecutionCallback.aidl
new file mode 100644
index 0000000000..a7cf90690e
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IFencedExecutionCallback.aidl
@@ -0,0 +1,22 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+interface IFencedExecutionCallback {
+  android.hardware.neuralnetworks.ErrorStatus getExecutionInfo(out android.hardware.neuralnetworks.Timing timingLaunched, out android.hardware.neuralnetworks.Timing timingFenced);
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IPreparedModel.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IPreparedModel.aidl
new file mode 100644
index 0000000000..87677122e9
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IPreparedModel.aidl
@@ -0,0 +1,25 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+interface IPreparedModel {
+  android.hardware.neuralnetworks.ExecutionResult executeSynchronously(in android.hardware.neuralnetworks.Request request, in boolean measureTiming, in long deadline, in long loopTimeoutDuration);
+  android.hardware.neuralnetworks.IFencedExecutionCallback executeFenced(in android.hardware.neuralnetworks.Request request, in ParcelFileDescriptor[] waitFor, in boolean measureTiming, in long deadline, in long loopTimeoutDuration, in long duration, out @nullable ParcelFileDescriptor syncFence);
+  const long DEFAULT_LOOP_TIMEOUT_DURATION_NS = 2000000000;
+  const long MAXIMUM_LOOP_TIMEOUT_DURATION_NS = 15000000000;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IPreparedModelCallback.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IPreparedModelCallback.aidl
new file mode 100644
index 0000000000..d1ae2eb72b
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IPreparedModelCallback.aidl
@@ -0,0 +1,22 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+interface IPreparedModelCallback {
+  void notify(in android.hardware.neuralnetworks.ErrorStatus status, in android.hardware.neuralnetworks.IPreparedModel preparedModel);
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IPreparedModelParcel.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IPreparedModelParcel.aidl
new file mode 100644
index 0000000000..048251a361
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/IPreparedModelParcel.aidl
@@ -0,0 +1,22 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable IPreparedModelParcel {
+  android.hardware.neuralnetworks.IPreparedModel preparedModel;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Memory.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Memory.aidl
new file mode 100644
index 0000000000..aa735c02d0
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Memory.aidl
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable Memory {
+  android.hardware.common.NativeHandle handle;
+  long size;
+  String name;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Model.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Model.aidl
new file mode 100644
index 0000000000..944bd7f5ed
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Model.aidl
@@ -0,0 +1,27 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable Model {
+  android.hardware.neuralnetworks.Subgraph main;
+  android.hardware.neuralnetworks.Subgraph[] referenced;
+  byte[] operandValues;
+  android.hardware.neuralnetworks.Memory[] pools;
+  boolean relaxComputationFloat32toFloat16;
+  android.hardware.neuralnetworks.ExtensionNameAndPrefix[] extensionNameToPrefix;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/NumberOfCacheFiles.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/NumberOfCacheFiles.aidl
new file mode 100644
index 0000000000..ca5f917578
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/NumberOfCacheFiles.aidl
@@ -0,0 +1,23 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable NumberOfCacheFiles {
+  int numModelCache;
+  int numDataCache;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Operand.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Operand.aidl
new file mode 100644
index 0000000000..6615b9b42c
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Operand.aidl
@@ -0,0 +1,28 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable Operand {
+  android.hardware.neuralnetworks.OperandType type;
+  int[] dimensions;
+  float scale;
+  int zeroPoint;
+  android.hardware.neuralnetworks.OperandLifeTime lifetime;
+  android.hardware.neuralnetworks.DataLocation location;
+  @nullable android.hardware.neuralnetworks.OperandExtraParams extraParams;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OperandExtraParams.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OperandExtraParams.aidl
new file mode 100644
index 0000000000..20317c7016
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OperandExtraParams.aidl
@@ -0,0 +1,23 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+union OperandExtraParams {
+  android.hardware.neuralnetworks.SymmPerChannelQuantParams channelQuant;
+  byte[] extension;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OperandLifeTime.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OperandLifeTime.aidl
new file mode 100644
index 0000000000..1082f9ee1f
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OperandLifeTime.aidl
@@ -0,0 +1,28 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@Backing(type="int") @VintfStability
+enum OperandLifeTime {
+  TEMPORARY_VARIABLE = 0,
+  SUBGRAPH_INPUT = 1,
+  SUBGRAPH_OUTPUT = 2,
+  CONSTANT_COPY = 3,
+  CONSTANT_POOL = 4,
+  NO_VALUE = 5,
+  SUBGRAPH = 6,
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OperandPerformance.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OperandPerformance.aidl
new file mode 100644
index 0000000000..9232b4c70e
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OperandPerformance.aidl
@@ -0,0 +1,23 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable OperandPerformance {
+  android.hardware.neuralnetworks.OperandType type;
+  android.hardware.neuralnetworks.PerformanceInfo info;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OperandType.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OperandType.aidl
new file mode 100644
index 0000000000..bd95fab52a
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OperandType.aidl
@@ -0,0 +1,37 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@Backing(type="int") @VintfStability
+enum OperandType {
+  FLOAT32 = 0,
+  INT32 = 1,
+  UINT32 = 2,
+  TENSOR_FLOAT32 = 3,
+  TENSOR_INT32 = 4,
+  TENSOR_QUANT8_ASYMM = 5,
+  BOOL = 6,
+  TENSOR_QUANT16_SYMM = 7,
+  TENSOR_FLOAT16 = 8,
+  TENSOR_BOOL8 = 9,
+  FLOAT16 = 10,
+  TENSOR_QUANT8_SYMM_PER_CHANNEL = 11,
+  TENSOR_QUANT16_ASYMM = 12,
+  TENSOR_QUANT8_SYMM = 13,
+  TENSOR_QUANT8_ASYMM_SIGNED = 14,
+  SUBGRAPH = 15,
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Operation.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Operation.aidl
new file mode 100644
index 0000000000..383eba4a15
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Operation.aidl
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable Operation {
+  android.hardware.neuralnetworks.OperationType type;
+  int[] inputs;
+  int[] outputs;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OperationType.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OperationType.aidl
new file mode 100644
index 0000000000..f786829eb9
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OperationType.aidl
@@ -0,0 +1,123 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@Backing(type="int") @VintfStability
+enum OperationType {
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  RELU1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  BATCH_TO_SPACE_ND = 29,
+  DIV = 30,
+  MEAN = 31,
+  PAD = 32,
+  SPACE_TO_BATCH_ND = 33,
+  SQUEEZE = 34,
+  STRIDED_SLICE = 35,
+  SUB = 36,
+  TRANSPOSE = 37,
+  ABS = 38,
+  ARGMAX = 39,
+  ARGMIN = 40,
+  AXIS_ALIGNED_BBOX_TRANSFORM = 41,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 42,
+  BIDIRECTIONAL_SEQUENCE_RNN = 43,
+  BOX_WITH_NMS_LIMIT = 44,
+  CAST = 45,
+  CHANNEL_SHUFFLE = 46,
+  DETECTION_POSTPROCESSING = 47,
+  EQUAL = 48,
+  EXP = 49,
+  EXPAND_DIMS = 50,
+  GATHER = 51,
+  GENERATE_PROPOSALS = 52,
+  GREATER = 53,
+  GREATER_EQUAL = 54,
+  GROUPED_CONV_2D = 55,
+  HEATMAP_MAX_KEYPOINT = 56,
+  INSTANCE_NORMALIZATION = 57,
+  LESS = 58,
+  LESS_EQUAL = 59,
+  LOG = 60,
+  LOGICAL_AND = 61,
+  LOGICAL_NOT = 62,
+  LOGICAL_OR = 63,
+  LOG_SOFTMAX = 64,
+  MAXIMUM = 65,
+  MINIMUM = 66,
+  NEG = 67,
+  NOT_EQUAL = 68,
+  PAD_V2 = 69,
+  POW = 70,
+  PRELU = 71,
+  QUANTIZE = 72,
+  QUANTIZED_16BIT_LSTM = 73,
+  RANDOM_MULTINOMIAL = 74,
+  REDUCE_ALL = 75,
+  REDUCE_ANY = 76,
+  REDUCE_MAX = 77,
+  REDUCE_MIN = 78,
+  REDUCE_PROD = 79,
+  REDUCE_SUM = 80,
+  ROI_ALIGN = 81,
+  ROI_POOLING = 82,
+  RSQRT = 83,
+  SELECT = 84,
+  SIN = 85,
+  SLICE = 86,
+  SPLIT = 87,
+  SQRT = 88,
+  TILE = 89,
+  TOPK_V2 = 90,
+  TRANSPOSE_CONV_2D = 91,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 92,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 93,
+  RESIZE_NEAREST_NEIGHBOR = 94,
+  QUANTIZED_LSTM = 95,
+  IF = 96,
+  WHILE = 97,
+  ELU = 98,
+  HARD_SWISH = 99,
+  FILL = 100,
+  RANK = 101,
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OutputShape.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OutputShape.aidl
new file mode 100644
index 0000000000..1300c49b7a
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/OutputShape.aidl
@@ -0,0 +1,23 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable OutputShape {
+  int[] dimensions;
+  boolean isSufficient;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/PerformanceInfo.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/PerformanceInfo.aidl
new file mode 100644
index 0000000000..b5dc179943
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/PerformanceInfo.aidl
@@ -0,0 +1,23 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable PerformanceInfo {
+  float execTime;
+  float powerUsage;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Priority.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Priority.aidl
new file mode 100644
index 0000000000..980bee328f
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Priority.aidl
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@Backing(type="int") @VintfStability
+enum Priority {
+  LOW = 0,
+  MEDIUM = 1,
+  HIGH = 2,
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Request.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Request.aidl
new file mode 100644
index 0000000000..6f77066fa7
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Request.aidl
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable Request {
+  android.hardware.neuralnetworks.RequestArgument[] inputs;
+  android.hardware.neuralnetworks.RequestArgument[] outputs;
+  android.hardware.neuralnetworks.RequestMemoryPool[] pools;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/RequestArgument.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/RequestArgument.aidl
new file mode 100644
index 0000000000..c9560efe4b
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/RequestArgument.aidl
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable RequestArgument {
+  boolean hasNoValue;
+  android.hardware.neuralnetworks.DataLocation location;
+  int[] dimensions;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/RequestMemoryPool.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/RequestMemoryPool.aidl
new file mode 100644
index 0000000000..123e4b0af4
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/RequestMemoryPool.aidl
@@ -0,0 +1,23 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+union RequestMemoryPool {
+  android.hardware.neuralnetworks.Memory pool;
+  int token;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Subgraph.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Subgraph.aidl
new file mode 100644
index 0000000000..771d15a21d
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Subgraph.aidl
@@ -0,0 +1,25 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable Subgraph {
+  android.hardware.neuralnetworks.Operand[] operands;
+  android.hardware.neuralnetworks.Operation[] operations;
+  int[] inputIndexes;
+  int[] outputIndexes;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/SymmPerChannelQuantParams.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/SymmPerChannelQuantParams.aidl
new file mode 100644
index 0000000000..2282febed2
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/SymmPerChannelQuantParams.aidl
@@ -0,0 +1,23 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable SymmPerChannelQuantParams {
+  float[] scales;
+  int channelDim;
+}
diff --git a/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Timing.aidl b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Timing.aidl
new file mode 100644
index 0000000000..b08d34acc1
--- /dev/null
+++ b/neuralnetworks/aidl/aidl_api/android.hardware.neuralnetworks/current/android/hardware/neuralnetworks/Timing.aidl
@@ -0,0 +1,23 @@
+///////////////////////////////////////////////////////////////////////////////
+// THIS FILE IS IMMUTABLE. DO NOT EDIT IN ANY CASE.                          //
+///////////////////////////////////////////////////////////////////////////////
+
+// This file is a snapshot of an AIDL interface (or parcelable). Do not try to
+// edit this file. It looks like you are doing that because you have modified
+// an AIDL interface in a backward-incompatible way, e.g., deleting a function
+// from an interface or a field from a parcelable and it broke the build. That
+// breakage is intended.
+//
+// You must not make a backward incompatible changes to the AIDL files built
+// with the aidl_interface module type with versions property set. The module
+// type is used to build AIDL files in a way that they can be used across
+// independently updatable components of the system. If a device is shipped
+// with such a backward incompatible change, it has a high risk of breaking
+// later when a module using the interface is updated, e.g., Mainline modules.
+
+package android.hardware.neuralnetworks;
+@VintfStability
+parcelable Timing {
+  long timeOnDevice;
+  long timeInDriver;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/BufferDesc.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/BufferDesc.aidl
new file mode 100644
index 0000000000..1b92ebc988
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/BufferDesc.aidl
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * A buffer descriptor. Describes the properties of a buffer.
+ */
+@VintfStability
+parcelable BufferDesc {
+    /**
+     * Dimensions of the buffer. May have unknown dimensions or rank. A buffer with some number of
+     * unspecified dimensions is represented by setting each unspecified dimension to 0. A buffer
+     * with unspecified rank is represented by providing an empty dimensions vector.
+     */
+    int[] dimensions;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/BufferRole.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/BufferRole.aidl
new file mode 100644
index 0000000000..7877bc0180
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/BufferRole.aidl
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * Describes a role of an input or output to a prepared model.
+ */
+@VintfStability
+parcelable BufferRole {
+    /**
+     * The index of the IPreparedModel within the "preparedModel" argument passed in
+     * IDevice::allocate.
+     */
+    int modelIndex;
+    /**
+     * The index of the input or output operand.
+     */
+    int ioIndex;
+    /**
+     * A floating-point value within the range (0.0, 1.0]. Describes how likely the buffer is to be
+     * used in the specified role. This is provided as a hint to optimize the case when multiple
+     * roles prefer different buffer locations or data layouts.
+     */
+    float frequency;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/Capabilities.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/Capabilities.aidl
new file mode 100644
index 0000000000..5ce78ee96f
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/Capabilities.aidl
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.OperandPerformance;
+import android.hardware.neuralnetworks.PerformanceInfo;
+
+/**
+ * The capabilities of a driver.
+ *
+ * This represents performance of non-extension operations.
+ *
+ * Performance of an operation other than {@link OperationType::IF} and {@link OperationType::WHILE}
+ * comes from the type of its first operand.
+ */
+@VintfStability
+parcelable Capabilities {
+    /**
+     * Driver performance when operating on float32 data but performing calculations with range
+     * and/or precision as low as that of the IEEE 754 16-bit floating-point format.
+     */
+    PerformanceInfo relaxedFloat32toFloat16PerformanceScalar;
+    PerformanceInfo relaxedFloat32toFloat16PerformanceTensor;
+    /**
+     * Performance by operand type. Must be sorted by OperandType.
+     *
+     * If a particular {@link OperandType} is not present in operandPerformance, its performance is
+     * treated as { .execTime = FLT_MAX, .powerUsage = FLT_MAX }.
+     *
+     * Performance does not apply to {@link OperandType::SUBGRAPH}, and a driver must not report
+     * operand performance for {@link OperandType::SUBGRAPH}.
+     */
+    OperandPerformance[] operandPerformance;
+    /**
+     * Performance of an {@link OperationType::IF} operation is the sum of
+     * {@link Capabilities::ifPerformance} and the mean of performance for the two branch subgraphs,
+     * where performance for a subgraph is the sum of the performance of all operations within the
+     * subgraph.
+     */
+    PerformanceInfo ifPerformance;
+    /**
+     * Performance of a {@link OperationType::WHILE} operation is the sum of
+     * {@link Capabilities::whilePerformance}, performance for the condition subgraph and
+     * performance for the body subgraph, where performance for a subgraph is the sum of the
+     * performance of all operations within the subgraph.
+     */
+    PerformanceInfo whilePerformance;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/DataLocation.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/DataLocation.aidl
new file mode 100644
index 0000000000..57e3f4ade6
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/DataLocation.aidl
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * Describes the location of a data object.
+ */
+@VintfStability
+parcelable DataLocation {
+    /**
+     * The index of the memory pool where this location is found.
+     */
+    int poolIndex;
+    /**
+     * Offset in bytes from the start of the pool.
+     */
+    long offset;
+    /**
+     * The length of the data in bytes.
+     */
+    long length;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/DeviceBuffer.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/DeviceBuffer.aidl
new file mode 100644
index 0000000000..d51e1b2881
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/DeviceBuffer.aidl
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.IBuffer;
+
+/**
+ * A type that is used to represent a driver allocated buffer and token that corresponds to it.
+ */
+ @VintfStability
+ parcelable DeviceBuffer {
+    /**
+     * An IBuffer object used to interact with the device allocated buffer.
+     */
+    IBuffer buffer;
+    /**
+     * A positive token identifying the allocated buffer. The token is provided when referencing the
+     * buffer as one of the memory pools in the request of an execution. The token must not collide
+     * with the tokens of other IBuffer objects that are currently alive in the same driver service.
+     */
+    int token;
+ }
\ No newline at end of file
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/DeviceType.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/DeviceType.aidl
new file mode 100644
index 0000000000..8399d504ce
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/DeviceType.aidl
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * Device types.
+ *
+ * The type of NNAPI device.
+ */
+@VintfStability
+@Backing(type="int")
+enum DeviceType {
+    /**
+     * The device does not fall into any category below.
+     */
+    OTHER = 1,
+    /**
+     * The device runs NNAPI models on single or multi-core CPU.
+     */
+    CPU = 2,
+    /**
+     * The device can run NNAPI models and also accelerate graphics APIs such as OpenGL ES and
+     * Vulkan.
+     */
+    GPU = 3,
+    /**
+     * Dedicated accelerator for Machine Learning workloads.
+     */
+    ACCELERATOR = 4,
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/ErrorStatus.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/ErrorStatus.aidl
new file mode 100644
index 0000000000..860f86a156
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/ErrorStatus.aidl
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * Calls to neural networks AIDL interfaces may return a ServiceSpecificException with the following
+ * error codes.
+ */
+@VintfStability
+@Backing(type="int")
+enum ErrorStatus {
+    NONE,
+    DEVICE_UNAVAILABLE,
+    GENERAL_FAILURE,
+    OUTPUT_INSUFFICIENT_SIZE,
+    INVALID_ARGUMENT,
+    /**
+     * Failure because a deadline could not be met for a task, but future deadlines may still be met
+     * for the same task after a short delay.
+     */
+    MISSED_DEADLINE_TRANSIENT,
+    /**
+     * Failure because a deadline could not be met for a task, and future deadlines will likely also
+     * not be met for the same task even after a short delay.
+     */
+    MISSED_DEADLINE_PERSISTENT,
+    /**
+     * Failure because of a resource limitation within the driver, but future calls for the same
+     * task may still succeed after a short delay.
+     */
+    RESOURCE_EXHAUSTED_TRANSIENT,
+    /**
+     * Failure because of a resource limitation within the driver, and future calls for the same
+     * task will likely also fail even after a short delay.
+     */
+    RESOURCE_EXHAUSTED_PERSISTENT,
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/ExecutionPreference.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/ExecutionPreference.aidl
new file mode 100644
index 0000000000..901cb384c9
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/ExecutionPreference.aidl
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * Execution preferences.
+ */
+@VintfStability
+@Backing(type="int")
+enum ExecutionPreference {
+    /**
+     * Prefer executing in a way that minimizes battery drain. This is desirable for compilations
+     * that will be executed often.
+     */
+    LOW_POWER,
+    /**
+     * Prefer returning a single answer as fast as possible, even if this causes more power
+     * consumption.
+     */
+    FAST_SINGLE_ANSWER,
+    /**
+     * Prefer maximizing the throughput of successive frames, for example when processing successive
+     * frames coming from the camera.
+     */
+    SUSTAINED_SPEED,
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/ExecutionResult.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/ExecutionResult.aidl
new file mode 100644
index 0000000000..403fe097ee
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/ExecutionResult.aidl
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.ErrorStatus;
+import android.hardware.neuralnetworks.OutputShape;
+import android.hardware.neuralnetworks.Timing;
+
+/**
+ * A result from running a synchronous execution of a prepared model.
+ */
+@VintfStability
+parcelable ExecutionResult {
+    /**
+     * A value of "true" indicates that the execution was successful. A value of "false" indicates
+     * the execution failed because at least one output operand buffer was not large enough to store
+     * the corresponding output.
+     */
+    boolean outputSufficientSize;
+    /**
+     * A list of shape information of model output operands. The index in "outputShapes" corresponds
+     * to the index of the output operand in the Request outputs vector.
+     */
+    OutputShape[] outputShapes;
+    /**
+     * Duration of execution. Unless measure is true and the execution is successful, all times must
+     * be reported as -1. A driver may choose to report any time as -1, indicating that measurement
+     * is not available.
+     */
+    Timing timing;
+}
+
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/Extension.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/Extension.aidl
new file mode 100644
index 0000000000..159e3c15aa
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/Extension.aidl
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.ExtensionOperandTypeInformation;
+
+/**
+ * Information about an extension.
+ */
+@VintfStability
+parcelable Extension {
+    /**
+     * The extension name.
+     *
+     * The name must consist of lowercase latin letters, numbers, periods, and underscore signs. The
+     * name must contain at least one period.
+     *
+     * The name must start with the reverse domain name of the vendor.
+     *
+     * Example: com.google.test_extension
+     */
+    String name;
+    /**
+     * Information about operand types defined by the extension.
+     */
+    ExtensionOperandTypeInformation[] operandTypes;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/ExtensionNameAndPrefix.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/ExtensionNameAndPrefix.aidl
new file mode 100644
index 0000000000..76074bf416
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/ExtensionNameAndPrefix.aidl
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * The mapping between extension names and prefixes of operand and operation type values.
+ *
+ * An operand or operation whose numeric type value is above {@link IDevice::OPERAND_TYPE_BASE_MAX}
+ * or {@link IDevice::OPERATION_TYPE_BASE_MAX} respectively should be interpreted as an extension
+ * operand/operation. The low {@link IDevice::EXTENSION_TYPE_LOW_BITS_TYPE} bits of the value
+ * correspond to the type ID within the extension and the high
+ * {@link IDevice::EXTENSION_TYPE_HIGH_BITS_PREFIX} bits encode the "prefix", which maps uniquely to
+ * the extension name. The sign bit is always 0.
+ *
+ * For example, if a model contains an operation whose value is 0x7AAABBBB and extensionNameToPrefix
+ * contains an entry with prefix=0x7AAA and name="vendor.test.test_extension", then the operation
+ * should be interpreted as the operation 0xBBBB of the extension named vendor.test.test_extension.
+ *
+ * This is a one-to-one correspondence. That is, there must be at most one prefix corresponding to
+ * each extension name and at most one extension name corresponding to each prefix.
+ */
+@VintfStability
+parcelable ExtensionNameAndPrefix {
+    /**
+     * The extension name.
+     *
+     * See {@link Extension::name} for the format specification.
+     */
+    String name;
+    /**
+     * The extension prefix. Only the lowest 15 bits are used, so the value must be less than 32768.
+     */
+    char prefix;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/ExtensionOperandTypeInformation.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/ExtensionOperandTypeInformation.aidl
new file mode 100644
index 0000000000..d7f93c10b0
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/ExtensionOperandTypeInformation.aidl
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * Information about an extension operand type.
+ */
+@VintfStability
+parcelable ExtensionOperandTypeInformation {
+    /**
+     * The extension operand type.
+     */
+    char type;
+    /**
+     * Indicates whether the extension operand type represents a tensor or a scalar.
+     */
+    boolean isTensor;
+    /**
+     * The byte size of the operand (if scalar) or of a single element (if tensor).
+     */
+    int byteSize;
+}
+
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/FusedActivationFunc.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/FusedActivationFunc.aidl
new file mode 100644
index 0000000000..40f1053f41
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/FusedActivationFunc.aidl
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * Fused activation function types.
+ */
+@VintfStability
+@Backing(type="int")
+enum FusedActivationFunc {
+    NONE,
+    RELU,
+    RELU1,
+    RELU6,
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/IBuffer.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/IBuffer.aidl
new file mode 100644
index 0000000000..eb3dec6e4f
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/IBuffer.aidl
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.Memory;
+
+/**
+ * This interface represents a device memory buffer.
+ */
+@VintfStability
+interface IBuffer {
+    /**
+     * Sets the content of this buffer from a shared memory region.
+     *
+     * @param src The source shared memory region.
+     * @param dimensions Updated dimensional information. If the dimensions of the IBuffer object
+     *                   are not fully specified, then the dimensions must be fully specified here.
+     *                   If the dimensions of the IBuffer object are fully specified, then the
+     *                   dimensions may be empty here. If dimensions.size() > 0, then all dimensions
+     *                   must be specified here, and any dimension that was specified in the IBuffer
+     *                   object must have the same value here.
+     * @throws ServiceSpecificException with one of the following ErrorStatus values:
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if there is an unspecified error
+     *     - INVALID_ARGUMENT if provided memory is invalid, or if the dimensions is invalid
+     */
+    void copyFrom(in Memory src, in int[] dimensions);
+
+    /**
+     * Retrieves the content of this buffer to a shared memory region.
+     *
+     * The IBuffer object must have been initialized before the call to IBuffer::copyTo. For more
+     * information on the state of the IBuffer object, refer to IDevice::allocate.
+     *
+     * @param dst The destination shared memory region.
+     * @throws ServiceSpecificException with one of the following ErrorStatus values:
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if the IBuffer object is uninitialized, or there is an unspecified
+     *       error
+     *     - INVALID_ARGUMENT if provided memory is invalid
+     */
+    void copyTo(in Memory dst);
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/IDevice.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/IDevice.aidl
new file mode 100644
index 0000000000..0c4954c1b8
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/IDevice.aidl
@@ -0,0 +1,431 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.BufferDesc;
+import android.hardware.neuralnetworks.BufferRole;
+import android.hardware.neuralnetworks.Capabilities;
+import android.hardware.neuralnetworks.DeviceBuffer;
+import android.hardware.neuralnetworks.DeviceType;
+import android.hardware.neuralnetworks.ExecutionPreference;
+import android.hardware.neuralnetworks.Extension;
+import android.hardware.neuralnetworks.IPreparedModel;
+import android.hardware.neuralnetworks.IPreparedModelCallback;
+import android.hardware.neuralnetworks.IPreparedModelParcel;
+import android.hardware.neuralnetworks.Model;
+import android.hardware.neuralnetworks.NumberOfCacheFiles;
+import android.hardware.neuralnetworks.Priority;
+
+/**
+ * This interface represents a device driver.
+ */
+@VintfStability
+interface IDevice {
+    /**
+     * The byte size of the cache token.
+     */
+    const int BYTE_SIZE_OF_CACHE_TOKEN = 32;
+    /**
+     * The maximum number of files for each type of cache in compilation caching.
+     */
+    const int MAX_NUMBER_OF_CACHE_FILES = 32;
+
+    /**
+     * Numeric values of extension operand and operation types have the following structure:
+     * - The sign bit is always 0.
+     * - 15 high bits represent the "prefix", which corresponds uniquely to the extension name.
+     * - 16 low bits represent the type ID within the extension.
+     */
+    const int EXTENSION_TYPE_HIGH_BITS_PREFIX = 15;
+    const int EXTENSION_TYPE_LOW_BITS_TYPE = 16;
+    /**
+     * OperandType with any value above {@link IDevice::OPERAND_TYPE_BASE_MAX} must be interpreted
+     * as an extension type according to {@link Model::extensionNameToPrefix}.
+     */
+    const int OPERAND_TYPE_BASE_MAX = 0xFFFF;
+    /**
+     * OperationType with any value above {@link IDevice::OPERATION_TYPE_BASE_MAX} must be
+     * interpreted as an extension type according to {@link Model::extensionNameToPrefix}.
+     */
+    const int OPERATION_TYPE_BASE_MAX = 0xFFFF;
+
+    /**
+     * Allocates a driver-managed buffer with the properties specified by the buffer descriptor as
+     * well as the input and output roles.
+     *
+     * The allocate function must verify its inputs are correct. If there is an error, or if a
+     * certain role or property is not supported by the driver, the allocate function must return a
+     * service specific exception with an appropriate ErrorStatus. If the allocation is successful,
+     * this method must return a DeviceBuffer object with the produced IBuffer and a positive token
+     * identifying the allocated buffer. A successful allocation must accommodate all of the
+     * specified roles and buffer properties.
+     *
+     * The buffer is allocated to an uninitialized state. An uninitialized buffer may only be used
+     * in ways that are specified by outputRoles. A buffer is initialized after it is used as an
+     * output in a successful execution, or after a successful invocation of IBuffer::copyFrom on
+     * the buffer. An initialized buffer may be used according to all roles specified in inputRoles
+     * and outputRoles. A buffer will return to the uninitialized state if it is used as an output
+     * in a failed execution, or after a failed invocation of IBuffer::copyFrom on the buffer.
+     *
+     * The dimensions of the buffer can be deduced from the buffer descriptor as well as the
+     * dimensions of the corresponding model operands of the input and output roles. The dimensions
+     * or rank of the buffer may be unknown at this stage. As such, some driver services may only
+     * create a placeholder and defer the actual allocation until execution time. Note that the same
+     * buffer may be used for different shapes of outputs on different executions. When the buffer
+     * is used as an input, the input shape must be the same as the output shape from the last
+     * execution using this buffer as an output.
+     *
+     * The driver must apply proper validatation upon every usage of the buffer, and must fail the
+     * execution immediately if the usage is illegal.
+     *
+     * @param desc A buffer descriptor specifying the properties of the buffer to allocate.
+     * @param preparedModels A vector of IPreparedModel objects. Must only contain IPreparedModel
+     *                       objects from the same IDevice as this method is being invoked on.
+     * @param inputRoles A vector of roles with each specifying an input to a prepared model.
+     * @param outputRoles A vector of roles with each specifying an output to a prepared model. Each
+     *                    role specified in inputRoles and outputRoles must be unique. The
+     *                    corresponding model operands of the roles must have the same OperandType,
+     *                    scale, zero point, and ExtraParams. The dimensions of the operands and the
+     *                    dimensions specified in the buffer descriptor must be compatible with each
+     *                    other. Two dimensions are incompatible if there is at least one axis that
+     *                    is fully specified in both but has different values.
+     * @return DeviceBuffer object containing the allocated IBuffer object and a positive token that
+     *     can be used to reference the buffer as one of the memory pools.
+     * @throws ServiceSpecificException with one of the following ErrorStatus values:
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if a certain buffer property or a certain role is not supported,
+     *       or if there is an unspecified error
+     *     - INVALID_ARGUMENT if one of the input arguments is invalid
+     *     - RESOURCE_EXHAUSTED_* if the task was aborted by the driver
+     */
+    DeviceBuffer allocate(in BufferDesc desc, in IPreparedModelParcel[] preparedModels,
+        in BufferRole[] inputRoles, in BufferRole[] outputRoles);
+
+    /**
+     * Gets the capabilities of a driver.
+     *
+     * @return Capabilities of the driver.
+     * @throws ServiceSpecificException with one of the following ErrorStatus values:
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if there is an unspecified error
+     */
+    Capabilities getCapabilities();
+
+    /**
+     * Gets the caching requirements of the driver implementation.
+     *
+     * There are two types of cache file descriptors provided to the driver: model cache and data
+     * cache.
+     *
+     * The data cache is for caching constant data, possibly including preprocessed and transformed
+     * tensor buffers. Any modification to the data cache should have no worse effect than
+     * generating bad output values at execution time.
+     *
+     * The model cache is for caching security-sensitive data such as compiled executable machine
+     * code in the device's native binary format. A modification to the model cache may affect the
+     * driver's execution behavior, and a malicious client could make use of this to execute beyond
+     * the granted permission. Thus, the driver must always check whether the model cache is
+     * corrupted before preparing the model from cache.
+     *
+     * getNumberOfCacheFilesNeeded returns how many of each type of cache files the driver
+     * implementation needs to cache a single prepared model. Returning 0 for both types indicates
+     * compilation caching is not supported by this driver. The driver may still choose not to cache
+     * certain compiled models even if it reports that caching is supported.
+     *
+     * If the device reports that caching is not supported, the user may avoid calling
+     * IDevice::prepareModelFromCache or providing cache file descriptors to
+     * IDevice::prepareModel.
+     *
+     * @return NumberOfCacheFiles structure indicating how many files for model and data cache the
+     *     driver needs to cache a single prepared model. It must be less than or equal to
+     *     MAX_NUMBER_OF_CACHE_FILES.
+     * @throws ServiceSpecificException with one of the following ErrorStatus values:
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if there is an unspecified error
+     */
+    NumberOfCacheFiles getNumberOfCacheFilesNeeded();
+
+    /**
+     * Gets information about extensions supported by the driver implementation.
+     *
+     * All extension operations and operands must be fully supported for the extension to appear in
+     * the list of supported extensions.
+     *
+     * @return A list of supported extensions.
+     * @throws ServiceSpecificException with one of the following ErrorStatus values:
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if there is an unspecified error
+     */
+    Extension[] getSupportedExtensions();
+
+    /**
+     * Gets the supported operations in a model.
+     *
+     * getSupportedOperations indicates which operations of the top-level subgraph are fully
+     * supported by the vendor driver. If an operation may not be supported for any reason,
+     * getSupportedOperations must return false for that operation.
+     *
+     * The {@link OperationType::IF} and {@link OperationType::WHILE} operations may only be fully
+     * supported if the vendor driver fully supports all operations in the referenced subgraphs.
+     *
+     * @param model A model whose operations -- and their corresponding operands -- are to be
+     *              verified by the driver.
+     * @return A list of supported operations, where true indicates the operation is supported and
+     *     false indicates the operation is not supported. The index of "supported" corresponds with
+     *     the index of the operation it is describing in the main subgraph.
+     * @throws ServiceSpecificException with one of the following ErrorStatus values:
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if there is an unspecified error
+     *     - INVALID_ARGUMENT if provided model is invalid
+     */
+    boolean[] getSupportedOperations(in Model model);
+
+    /**
+     * Get the type of a given device.
+     *
+     * The device type can be used to help application developers to distribute Machine Learning
+     * workloads and other workloads such as graphical rendering. E.g., for an app which renders AR
+     * scenes based on real time object detection results, the developer could choose an ACCELERATOR
+     * type device for ML workloads, and reserve GPU for graphical rendering.
+     *
+     * @return The DeviceType of the device. Please note, this is not a bitfield of DeviceTypes.
+     *     Each device must only be of a single DeviceType.
+     * @throws ServiceSpecificException with one of the following ErrorStatus values:
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if the query resulted in an unspecified error
+     */
+    DeviceType getType();
+
+    /**
+     * Get the version string of the driver implementation.
+     *
+     * The version string must be a unique token among the set of version strings of drivers of a
+     * specific device. The token identifies the device driver's implementation. The token must not
+     * be confused with the feature level which is solely defined by the interface version. This API
+     * is opaque to the Android framework, but the Android framework may use the information for
+     * debugging or to pass on to NNAPI applications.
+     *
+     * Application developers sometimes have specific requirements to ensure good user experiences,
+     * and they need more information to make intelligent decisions when the Android framework
+     * cannot. For example, combined with the device name and other information, the token can help
+     * NNAPI applications filter devices based on their needs:
+     *     - An application demands a certain level of performance, but a specific version of the
+     *       driver cannot meet that requirement because of a performance regression.
+     *       The application can disallow the driver based on the version provided.
+     *     - An application has a minimum precision requirement, but certain versions of
+     *       the driver cannot meet that requirement because of bugs or certain optimizations.
+     *       The application can filter out versions of these drivers.
+     *
+     * @return The version string of the device implementation. Must have nonzero length.
+     * @throws ServiceSpecificException with one of the following ErrorStatus values:
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if the query resulted in an unspecified error
+     */
+    String getVersionString();
+
+    /**
+     * Asynchronously creates a prepared model for execution and optionally saves it into cache
+     * files.
+     *
+     * prepareModel is used to make any necessary transformations to or alternative representations
+     * to a model for execution, possibly including transformations on the constant data,
+     * optimization on the model's graph, or compilation into the device's native binary format. The
+     * model itself is not changed.
+     *
+     * Optionally, caching information may be provided for the driver to save the prepared model to
+     * cache files for faster model compilation time when the same model preparation is requested in
+     * the future. There are two types of cache file descriptors provided to the driver: model cache
+     * and data cache. For more information on the two types of cache, refer to
+     * getNumberOfCacheFilesNeeded.
+     *
+     * The file descriptors must be opened with read and write permission. A file may have any size,
+     * and the corresponding file descriptor may have any offset. The driver must truncate a file to
+     * zero size before writing to that file. The file descriptors may be closed by the client once
+     * the asynchronous preparation has finished. The driver must dup a file descriptor if it wants
+     * to get access to the cache file later.
+     *
+     * The model is prepared asynchronously with respect to the caller. The prepareModel function
+     * must verify the inputs to the preparedModel function related to preparing the model (as
+     * opposed to saving the prepared model to cache) are correct. If there is an error,
+     * prepareModel must immediately invoke the callback with the appropriate ErrorStatus value and
+     * nullptr for the IPreparedModel, then return a status with a service specific exception with
+     * the same ErrorStatus. If the inputs to the prepareModel function that are related to
+     * preparing the model are valid and there is no error, prepareModel must launch an asynchronous
+     * task to prepare the model in the background, and immediately return from prepareModel. If the
+     * asynchronous task fails to launch, prepareModel must immediately invoke the callback with
+     * ErrorStatus::GENERAL_FAILURE and nullptr for the IPreparedModel, then return a service
+     * specific exception with ErrorStatus::GENERAL_FAILURE.
+     *
+     * When the asynchronous task has finished preparing the model, it must immediately invoke the
+     * callback function provided as an input to prepareModel. If the model was prepared
+     * successfully, the callback object must be invoked with an error status of ErrorStatus::NONE
+     * and the produced IPreparedModel object. If an error occurred preparing the model, the
+     * callback object must be invoked with the appropriate ErrorStatus value and nullptr for the
+     * IPreparedModel.
+     *
+     * The model is prepared with a priority. This priority is relative to other prepared models
+     * owned by the same client. Higher priority executions may use more compute resources than
+     * lower priority executions, and may preempt or starve lower priority executions.
+     *
+     * prepareModel can be called with an optional deadline. If the model is not able to be prepared
+     * before the provided deadline, the model preparation may be aborted, and either
+     * {@link ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link
+     * ErrorStatus::MISSED_DEADLINE_PERSISTENT} may be returned. The error due to an abort must be
+     * sent the same way as other errors, described above.
+     *
+     * Optionally, the driver may save the prepared model to cache during the asynchronous
+     * preparation. Any error that occurs when saving to cache must not affect the status of
+     * preparing the model. Even if the input arguments related to the cache may be invalid, or the
+     * driver may fail to save to cache, the prepareModel function must finish preparing the model.
+     * The driver may choose not to save to cache even if the caching information is provided and
+     * valid.
+     *
+     * The only information that may be unknown to the model at this stage is the shape of the
+     * tensors, which may only be known at execution time. As such, some driver services may return
+     * partially prepared models, where the prepared model may only be finished when it is paired
+     * with a set of inputs to the model. Note that the same prepared model object may be used with
+     * different shapes of inputs on different (possibly concurrent) executions.
+     *
+     * Multiple threads may call prepareModel on the same model concurrently.
+     *
+     * @param model The model to be prepared for execution.
+     * @param preference Indicates the intended execution behavior of a prepared model.
+     * @param priority The priority of the prepared model relative to other prepared models owned by
+     *                 the client.
+     * @param deadline The time by which the model is expected to be prepared. The time is measured
+     *                 in nanoseconds since epoch of the steady clock (as from
+     *                 std::chrono::steady_clock). If the model cannot be prepared by the deadline,
+     *                 the preparation may be aborted. Passing -1 means the deadline is omitted.
+     *                 Other negative values are invalid.
+     * @param modelCache A vector of file descriptors for the security-sensitive cache. The length
+     *                   of the vector must either be 0 indicating that caching information is not
+     *                   provided, or match the numModelCache returned from
+     *                   getNumberOfCacheFilesNeeded. The cache file descriptors will be provided in
+     *                   the same order when retrieving the preparedModel from cache files with
+     *                   prepareModelFromCache.
+     * @param dataCache A vector of file descriptors for the constants' cache. The length of the
+     *                  vector must either be 0 indicating that caching information is not provided,
+     *                  or match the numDataCache returned from getNumberOfCacheFilesNeeded. The
+     *                  cache file descriptors will be provided in the same order when retrieving
+     *                  the preparedModel from cache files with prepareModelFromCache.
+     * @param token A caching token of length BYTE_SIZE_OF_CACHE_TOKEN identifying the prepared
+     *              model. The same token will be provided when retrieving the prepared model from
+     *              the cache files with prepareModelFromCache.  Tokens should be chosen to have a
+     *              low rate of collision for a particular application. The driver cannot detect a
+     *              collision; a collision will result in a failed execution or in a successful
+     *              execution that produces incorrect output values. If both modelCache and
+     *              dataCache are empty indicating that caching information is not provided, this
+     *              token must be ignored.
+     * @param callback A callback object used to return the error status of preparing the model for
+     *                 execution and the prepared model if successful, nullptr otherwise. The
+     *                 callback object's notify function must be called exactly once, even if the
+     *                 model could not be prepared.
+     * @throws ServiceSpecificException with one of the following ErrorStatus values:
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if there is an unspecified error
+     *     - INVALID_ARGUMENT if one of the input arguments related to preparing the model is
+     *       invalid
+     *     - MISSED_DEADLINE_* if the preparation is aborted because the model cannot be prepared by
+     *       the deadline
+     *     - RESOURCE_EXHAUSTED_* if the task was aborted by the driver
+     */
+    void prepareModel(in Model model, in ExecutionPreference preference, in Priority priority,
+        in long deadline, in ParcelFileDescriptor[] modelCache, in ParcelFileDescriptor[] dataCache,
+        in byte[] token, in IPreparedModelCallback callback);
+
+    /**
+     * Creates a prepared model from cache files for execution.
+     *
+     * prepareModelFromCache is used to retrieve a prepared model directly from cache files to avoid
+     * slow model compilation time. There are two types of cache file descriptors provided to the
+     * driver: model cache and data cache. For more information on the two types of cache files,
+     * refer to getNumberOfCacheFilesNeeded.
+     *
+     * The file descriptors must be opened with read and write permission. A file may have any size,
+     * and the corresponding file descriptor may have any offset. The driver must truncate a file to
+     * zero size before writing to that file. The file descriptors may be closed by the client once
+     * the asynchronous preparation has finished. The driver must dup a file descriptor if it wants
+     * to get access to the cache file later.
+     *
+     * The model is prepared asynchronously with respect to the caller. The prepareModelFromCache
+     * function must verify the inputs to the prepareModelFromCache function are correct, and that
+     * the security-sensitive cache has not been modified since it was last written by the driver.
+     * If there is an error, or if compilation caching is not supported, or if the
+     * security-sensitive cache has been modified, prepareModelFromCache must immediately invoke the
+     * callback with the appropriate ErrorStatus value and nullptr for the IPreparedModel, then
+     * return a status with a service specific exception with the same ErrorStatus. If the inputs to
+     * the prepareModelFromCache function are valid, the security-sensitive cache is not modified,
+     * and there is no error, prepareModelFromCache must launch an asynchronous task to prepare the
+     * model in the background, and immediately return from prepareModelFromCache. If the
+     * asynchronous task fails to launch, prepareModelFromCache must immediately invoke the callback
+     * with ErrorStatus::GENERAL_FAILURE and nullptr for the IPreparedModel, then return a service
+     * specific exception with ErrorStatus::GENERAL_FAILURE.
+     *
+     * When the asynchronous task has finished preparing the model, it must immediately invoke the
+     * callback function provided as an input to prepareModelFromCache. If the model was prepared
+     * successfully, the callback object must be invoked with an error status of ErrorStatus::NONE
+     * and the produced IPreparedModel object. If an error occurred preparing the model, the
+     * callback object must be invoked with the appropriate ErrorStatus value and nullptr for the
+     * IPreparedModel.
+     *
+     * prepareModelFromCache can be called with an optional deadline. If the model is not able to
+     * prepared before the provided deadline, the model preparation may be aborted, and either
+     * {@link ErrorStatus::MISSED_DEADLINE_TRANSIENT} or
+     * {@link ErrorStatus::MISSED_DEADLINE_PERSISTENT} may be returned. The error due to an abort
+     * must be sent the same way as other errors, described above.
+     *
+     * The only information that may be unknown to the model at this stage is the shape of the
+     * tensors, which may only be known at execution time. As such, some driver services may return
+     * partially prepared models, where the prepared model may only be finished when it is paired
+     * with a set of inputs to the model. Note that the same prepared model object may be used with
+     * different shapes of inputs on different (possibly concurrent) executions.
+     *
+     * @param deadline The time by which the model is expected to be prepared. The time is measured
+     *                 in nanoseconds since epoch of the steady clock (as from
+     *                 std::chrono::steady_clock). If the model cannot be prepared by the deadline,
+     *                 the preparation may be aborted. Passing -1 means the deadline is omitted.
+     *                 Other negative values are invalid.
+     * @param modelCache A vector of file descriptors for the security-sensitive cache. The length
+     *                   of the vector must match the numModelCache returned from
+     *                   getNumberOfCacheFilesNeeded. The cache file descriptors will be provided in
+     *                   the same order as with prepareModel.
+     * @param dataCache A vector of file descriptors for the constants' cache. The length of the
+     *                  vector must match the numDataCache returned from
+     *                  getNumberOfCacheFilesNeeded. The cache file descriptors will be provided in
+     *                  the same order as with prepareModel.
+     * @param token A caching token of length BYTE_SIZE_OF_CACHE_TOKEN identifying the prepared
+     *              model. It is the same token provided when saving the cache files with
+     *              prepareModel. Tokens should be chosen to have a low rate of collision for a
+     *              particular application. The driver cannot detect a collision; a collision will
+     *              result in a failed execution or in a successful execution that produces
+     *              incorrect output values.
+     * @param callback A callback object used to return the error status of preparing the model for
+     *                 execution and the prepared model if successful, nullptr otherwise. The
+     *                 callback object's notify function must be called exactly once, even if the
+     *                 model could not be prepared.
+     * @throws ServiceSpecificException with one of the following ErrorStatus values:
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if caching is not supported or if there is an unspecified error
+     *     - INVALID_ARGUMENT if one of the input arguments is invalid
+     *     - MISSED_DEADLINE_* if the preparation is aborted because the model cannot be prepared by
+     *       the deadline
+     *     - RESOURCE_EXHAUSTED_* if the task was aborted by the driver
+     */
+    void prepareModelFromCache(in long deadline, in ParcelFileDescriptor[] modelCache,
+        in ParcelFileDescriptor[] dataCache, in byte[] token, in IPreparedModelCallback callback);
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/IFencedExecutionCallback.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/IFencedExecutionCallback.aidl
new file mode 100644
index 0000000000..47e5916665
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/IFencedExecutionCallback.aidl
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.ErrorStatus;
+import android.hardware.neuralnetworks.Timing;
+
+/**
+ * IFencedExecutionCallback can be used to query the error status result and duration information
+ * from an IPreparedModel::executeFenced call.
+ */
+@VintfStability
+interface IFencedExecutionCallback {
+    /**
+     * The getExecutionInfo method is used by the clients to query error status result and duration
+     * information. The method must only be called after the actual evaluation has finished or
+     * resulted in an runtime error, as indicated by the status of the sync fence returned by the
+     * IPreparedModel::executeFenced call, otherwise GENERAL_FAILURE must be returned.
+     *
+     * @param out timingLaunched The duration starts when executeFenced is called and ends when
+     *                           executeFenced signals the returned syncFence. Unless measureTiming
+     *                           was set to true when launching the execution and status is NONE,
+     *                           all times must be reported as -1. A driver may choose to report any
+     *                           time as -1, indicating that particular measurement is not
+     *                           available.
+     * @param out timingFenced The duration starts when all waitFor sync fences have been signaled
+     *                         and ends when executeFenced signals the returned syncFence. Unless
+     *                         measureTiming was set to true when launching the execution and status
+     *                         is NONE, all times must be reported as -1. A driver may choose to
+     *                         report any time as -1, indicating that particular measurement is not
+     *                         available.
+     * @return Error status returned from the asynchronously dispatched execution must be:
+     *     - NONE if the asynchronous execution was successful
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if the asynchronous task resulted in an unspecified error
+     *     - MISSED_DEADLINE_* if the execution is aborted because it cannot be completed by the
+     *       deadline
+     *     - RESOURCE_EXHAUSTED_* if the task was aborted by the driver
+     */
+    ErrorStatus getExecutionInfo(out Timing timingLaunched, out Timing timingFenced);
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/IPreparedModel.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/IPreparedModel.aidl
new file mode 100644
index 0000000000..c1b2992010
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/IPreparedModel.aidl
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.common.NativeHandle;
+import android.hardware.neuralnetworks.ErrorStatus;
+import android.hardware.neuralnetworks.ExecutionResult;
+import android.hardware.neuralnetworks.IFencedExecutionCallback;
+import android.hardware.neuralnetworks.Request;
+
+/**
+ * IPreparedModel describes a model that has been prepared for execution and is used to launch
+ * executions.
+ */
+@VintfStability
+interface IPreparedModel {
+    /**
+     * Each {@link OperationType::WHILE} operation in the model has an implicit execution timeout
+     * duration associated with it ("loop timeout duration"). This duration is configurable on a
+     * per-execution basis and must not exceed 15 seconds. The default value is 2 seconds. The units
+     * are nanoseconds.
+     */
+    const long DEFAULT_LOOP_TIMEOUT_DURATION_NS = 2000000000;
+    const long MAXIMUM_LOOP_TIMEOUT_DURATION_NS = 15000000000;
+
+    /**
+     * Performs a synchronous execution on a prepared model.
+     *
+     * The execution is performed synchronously with respect to the caller. executeSynchronously
+     * must verify the inputs to the function are correct, and the usages of memory pools allocated
+     * by IDevice::allocate are valid. If there is an error, executeSynchronously must immediately
+     * return a service specific exception with the appropriate ErrorStatus value. If the inputs to
+     * the function are valid and there is no error, executeSynchronously must perform the
+     * execution, and must not return until the execution is complete.
+     *
+     * The caller must not change the content of any data object referenced by 'request' (described
+     * by the {@link DataLocation} of a {@link RequestArgument}) until executeSynchronously returns.
+     * executeSynchronously must not change the content of any of the data objects corresponding to
+     * 'request' inputs.
+     *
+     * If the prepared model was prepared from a model wherein all tensor operands have fully
+     * specified dimensions, and the inputs to the function are valid, and at execution time every
+     * operation's input operands have legal values, then the execution should complete
+     * successfully: there must be no failure unless the device itself is in a bad state.
+     *
+     * executeSynchronously may be called with an optional deadline. If the execution is not able to
+     * be completed before the provided deadline, the execution may be aborted, and either
+     * {@link ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link
+     * ErrorStatus::MISSED_DEADLINE_PERSISTENT} may be returned. The error due to an abort must be
+     * sent the same way as other errors, described above.
+     *
+     * Any number of calls to the execute* functions, in any combination, may be made concurrently,
+     * even on the same IPreparedModel object.
+     *
+     * @param request The input and output information on which the prepared model is to be
+     *                executed.
+     * @param measure Specifies whether or not to measure duration of the execution. The duration
+     *                runs from the time the driver sees the call to the executeSynchronously
+     *                function to the time the driver returns from the function.
+     * @param deadline The time by which the execution is expected to complete. The time is measured
+     *                 in nanoseconds since epoch of the steady clock (as from
+     *                 std::chrono::steady_clock). If the execution cannot be finished by the
+     *                 deadline, the execution may be aborted. Passing -1 means the deadline is
+     *                 omitted. Other negative values are invalid.
+     * @param loopTimeoutDuration The maximum amount of time in nanoseconds that should be spent
+     *                            executing a {@link OperationType::WHILE} operation. If a loop
+     *                            condition model does not output false within this duration, the
+     *                            execution must be aborted. If -1 is provided, the maximum amount
+     *                            of time is {@link DEFAULT_LOOP_TIMEOUT_DURATION_NS}. Other
+     *                            negative values are invalid. When provided, the duration must not
+     *                            exceed {@link MAXIMUM_LOOP_TIMEOUT_DURATION_NS}.
+     * @return ExecutionResult parcelable, containing the status of the execution, output shapes and
+     *     timing information.
+     * @throws ServiceSpecificException with one of the following ErrorStatus values:
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if there is an unspecified error
+     *     - INVALID_ARGUMENT if one of the input arguments is invalid
+     *     - MISSED_DEADLINE_* if the execution is aborted because it cannot be completed by the
+     *       deadline
+     *     - RESOURCE_EXHAUSTED_* if the task was aborted by the driver
+     */
+    ExecutionResult executeSynchronously(in Request request, in boolean measureTiming,
+        in long deadline, in long loopTimeoutDuration);
+
+    /**
+     * Launch a fenced asynchronous execution on a prepared model.
+     *
+     * The execution is performed asynchronously with respect to the caller. executeFenced must
+     * verify the inputs to the function are correct, and the usages of memory pools allocated by
+     * IDevice::allocate are valid. If there is an error, executeFenced must immediately return a
+     * service specific exception with the corresponding ErrorStatus. If the inputs to the function
+     * are valid and there is no error, executeFenced must dispatch an asynchronous task to perform
+     * the execution in the background, assign a sync fence that will be signaled once the execution
+     * is completed and immediately return a callback that can be used by the client to query the
+     * duration and runtime error status. If the task has finished before the call returns,
+     * syncFence file descriptor may be set to -1. The execution must wait for all the sync fences
+     * (if any) in waitFor to be signaled before starting the actual execution.
+     *
+     * When the asynchronous task has finished its execution, it must immediately signal the
+     * syncFence returned from the executeFenced call. After the syncFence is signaled, the task
+     * must not modify the content of any data object referenced by 'request' (described by the
+     * {@link DataLocation} of a {@link RequestArgument}).
+     *
+     * executeFenced may be called with an optional deadline and an optional duration. If the
+     * execution is not able to be completed before the provided deadline or within the timeout
+     * duration (measured from when all sync fences in waitFor are signaled), whichever comes
+     * earlier, the execution may be aborted, and either
+     * {@link ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link
+     * ErrorStatus::MISSED_DEADLINE_PERSISTENT} may be returned. The error due to an abort must be
+     * sent the same way as other errors, described above.
+     *
+     * If any of the sync fences in waitFor changes to error status after the executeFenced call
+     * succeeds, or the execution is aborted because it cannot finish before the deadline has been
+     * reached or the duration has elapsed, the driver must immediately set the returned syncFence
+     * to error status.
+     *
+     * Any number of calls to the execute* functions, in any combination, may be made concurrently,
+     * even on the same IPreparedModel object.
+     *
+     * @param request The input and output information on which the prepared model is to be
+     *                executed. The outputs in the request must have fully specified dimensions.
+     * @param waitFor A vector of sync fence file descriptors. Execution must not start until all
+     *                sync fences have been signaled.
+     * @param measure Specifies whether or not to measure duration of the execution.
+     * @param deadline The time by which the execution is expected to complete. The time is measured
+     *                 in nanoseconds since epoch of the steady clock (as from
+     *                 std::chrono::steady_clock).If the execution cannot be finished by the
+     *                 deadline, the execution may be aborted. Passing -1 means the deadline is
+     *                 omitted. Other negative values are invalid.
+     * @param loopTimeoutDuration The maximum amount of time in nanoseconds that should be spent
+     *                            executing a {@link OperationType::WHILE} operation. If a loop
+     *                            condition model does not output false within this duration, the
+     *                            execution must be aborted. If -1 is provided, the maximum amount
+     *                            of time is {@link DEFAULT_LOOP_TIMEOUT_DURATION_NS}. Other
+     *                            negative values are invalid. When provided, the duration must not
+     *                            exceed {@link MAXIMUM_LOOP_TIMEOUT_DURATION_NS}.
+     * @param duration The length of time in nanoseconds within which the execution is expected to
+     *                 complete after all sync fences in waitFor are signaled. If the execution
+     *                 cannot be finished within the duration, the execution may be aborted. Passing
+     *                 -1 means the duration is omitted. Other negative values are invalid.
+     * @param out syncFence The sync fence that will be signaled when the task is completed. The
+     *                      sync fence will be set to error if a critical error, e.g. hardware
+     *                      failure or kernel panic, occurs when doing execution.
+     * @return The IFencedExecutionCallback can be used to query information like duration and error
+     *     status when the execution is completed.
+     * @throws ServiceSpecificException with one of the following ErrorStatus values:
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if there is an unspecified error
+     *     - INVALID_ARGUMENT if one of the input arguments is invalid, including fences in error
+     *       states.
+     *     - MISSED_DEADLINE_* if the execution is aborted because it cannot be completed by the
+     *       deadline
+     *     - RESOURCE_EXHAUSTED_* if the task was aborted by the driver
+     */
+    IFencedExecutionCallback executeFenced(in Request request, in ParcelFileDescriptor[] waitFor,
+        in boolean measureTiming, in long deadline, in long loopTimeoutDuration, in long duration,
+        out @nullable ParcelFileDescriptor syncFence);
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/IPreparedModelCallback.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/IPreparedModelCallback.aidl
new file mode 100644
index 0000000000..adb421830c
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/IPreparedModelCallback.aidl
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.ErrorStatus;
+import android.hardware.neuralnetworks.IPreparedModel;
+
+/**
+ * IPreparedModelCallback must be used to return a prepared model produced by an asynchronous task
+ * launched from IDevice::prepareModel*.
+ */
+@VintfStability
+interface IPreparedModelCallback {
+    /**
+     * Notify must be invoked immediately after the asynchronous task holding this callback has
+     * finished preparing the model. If the model was successfully prepared, the method must be
+     * invoked with ErrorStatus::NONE and the prepared model. If the model was not able to be
+     * successfully prepared, the method must be invoked with the appropriate ErrorStatus and
+     * nullptr as the IPreparedModel. If the asynchronous task holding this callback fails to launch
+     * or if the model provided to IDevice::prepareModel is invalid, notify method must be invoked
+     * with the appropriate error as well as nullptr for the IPreparedModel.
+     *
+     * @param status Error status returned from the asynchronous model preparation task; must be:
+     *               - NONE if the asynchronous task successfully prepared the model
+     *               - DEVICE_UNAVAILABLE if driver is offline or busy
+     *               - GENERAL_FAILURE if the asynchronous task resulted in an unspecified error
+     *               - INVALID_ARGUMENT if one of the input arguments to prepareModel is invalid
+     *               - MISSED_DEADLINE_* if the preparation is aborted because the model cannot be
+     *                 prepared by the deadline
+     *               - RESOURCE_EXHAUSTED_* if the task was aborted by the driver
+     * @param preparedModel A model that has been asynchronously prepared for execution. If the
+     *                      model was unable to be prepared due to an error, nullptr must be passed
+     *                      in place of the IPreparedModel object.
+     */
+    void notify(in ErrorStatus status, in IPreparedModel preparedModel);
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/IPreparedModelParcel.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/IPreparedModelParcel.aidl
new file mode 100644
index 0000000000..f198c3f056
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/IPreparedModelParcel.aidl
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.IPreparedModel;
+
+/**
+ * A parcelable for passing a vector of IPreparedModel objects.
+ */
+@VintfStability
+parcelable IPreparedModelParcel {
+    IPreparedModel preparedModel;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/Memory.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/Memory.aidl
new file mode 100644
index 0000000000..8ecb067f30
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/Memory.aidl
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.hardware.neuralnetworks;
+import android.hardware.common.NativeHandle;
+
+import android.os.ParcelFileDescriptor;
+
+/**
+ * A type that is used to pass pieces of shared memory between processes.
+ * The type structure mimics hidl_memory type from HIDL.
+ */
+@VintfStability
+parcelable Memory {
+    NativeHandle handle;
+    long size;
+    String name;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/Model.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/Model.aidl
new file mode 100644
index 0000000000..3bb73185f4
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/Model.aidl
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.ExtensionNameAndPrefix;
+import android.hardware.neuralnetworks.Subgraph;
+import android.hardware.neuralnetworks.Memory;
+
+/**
+ * A Neural Network Model.
+ *
+ * This includes not only the execution graph, but also constant data such as weights or scalars
+ * added at construction time. The only information that may not be known is the shape of the input
+ * tensors.
+ */
+@VintfStability
+parcelable Model {
+    /**
+     * The top-level subgraph.
+     */
+    Subgraph main;
+    /**
+     * Referenced subgraphs.
+     *
+     * Each subgraph is referenced by the main subgraph or at least one other referenced subgraph.
+     *
+     * There must be no reference cycles.
+     */
+    Subgraph[] referenced;
+    /**
+     * A byte buffer containing operand data that were copied into the model.
+     *
+     * An operand's value must be located here if and only if Operand::lifetime equals
+     * OperandLifeTime::CONSTANT_COPY.
+     */
+    byte[] operandValues;
+    /**
+     * A collection of shared memory pools containing operand values.
+     *
+     * An operand's value must be located here if and only if Operand::lifetime equals
+     * OperandLifeTime::CONSTANT_POOL.
+     */
+    Memory[] pools;
+    /**
+     * 'true' indicates TENSOR_FLOAT32 may be calculated with range and/or precision as low as that
+     * of the IEEE 754 16-bit floating-point format.
+     * 'false' indicates TENSOR_FLOAT32 must be calculated using at least the range and precision of
+     * the IEEE 754 32-bit floating-point format.
+     */
+    boolean relaxComputationFloat32toFloat16;
+    /**
+     * The mapping between extension names and prefixes of operand and operation type values.
+     */
+    ExtensionNameAndPrefix[] extensionNameToPrefix;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/NumberOfCacheFiles.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/NumberOfCacheFiles.aidl
new file mode 100644
index 0000000000..1ca2676646
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/NumberOfCacheFiles.aidl
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.hardware.neuralnetworks;
+
+/**
+ * Structure indicating how many files for model and numDataCache cache the driver needs to cache a
+ * single prepared model.
+ */
+@VintfStability
+parcelable NumberOfCacheFiles {
+    int numModelCache;
+    int numDataCache;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/Operand.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/Operand.aidl
new file mode 100644
index 0000000000..243a89d719
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/Operand.aidl
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.DataLocation;
+import android.hardware.neuralnetworks.OperandExtraParams;
+import android.hardware.neuralnetworks.OperandLifeTime;
+import android.hardware.neuralnetworks.OperandType;
+
+/**
+ * Describes one operand of the model's graph.
+ */
+@VintfStability
+parcelable Operand {
+    /**
+     * The data type.
+     *
+     * Besides the values listed in {@link OperandType}, any value above
+     * {@link IDevice::OPERAND_TYPE_BASE_MAX} is possible and should be interpreted as an extension
+     * type according to {@link Model::extensionNameToPrefix}.
+     */
+    OperandType type;
+    /**
+     * Dimensions of the operand.
+     *
+     * For a scalar operand, dimensions.size() must be 0.
+     *
+     * A tensor operand with all dimensions specified has "fully specified" dimensions. Whenever
+     * possible (i.e., whenever the dimensions are known at model construction time), a tensor
+     * operand should have (but is not required to have) fully specified dimensions, in order to
+     * enable the best possible performance.
+     *
+     * If a tensor operand's dimensions are not fully specified, the dimensions of the operand are
+     * deduced from the operand dimensions and values of the operation for which that operand is an
+     * output or from the corresponding {@link OperationType::IF} or {@link OperationType::WHILE}
+     * operation input operand dimensions in the case of referenced subgraph input operands.
+     *
+     * In the following situations, a tensor operand's dimensions must be fully specified:
+     *
+     *     . The operand has lifetime CONSTANT_COPY or CONSTANT_POOL.
+     *
+     *     . The operand has lifetime SUBGRAPH_INPUT and belongs to the main subgraph. Fully
+     *       specified dimensions must either be present in the Operand or they must be provided in
+     *       the corresponding RequestArgument.
+     *       EXCEPTION: If the input is optional and omitted (by setting the hasNoValue field of the
+     *       corresponding RequestArgument to true) then it need not have fully specified
+     *       dimensions.
+     *
+     * A tensor operand with some number of unspecified dimensions is represented by setting each
+     * unspecified dimension to 0.
+     *
+     * A tensor operand with unspecified rank is represented by providing an empty dimensions
+     * vector.
+     */
+    int[] dimensions;
+    /**
+     * Quantized scale of the operand.
+     *
+     * Must be 0 when not applicable to an operand type.
+     *
+     * See {@link OperandType}.
+     */
+    float scale;
+    /**
+     * Quantized zero-point offset of the operand.
+     *
+     * Must be 0 when not applicable to an operand type.
+     *
+     * See {@link OperandType}.
+     */
+    int zeroPoint;
+    /**
+     * How the operand is used.
+     */
+    OperandLifeTime lifetime;
+    /**
+     * Where to find the data for this operand.
+     * If the lifetime is TEMPORARY_VARIABLE, SUBGRAPH_INPUT, SUBGRAPH_OUTPUT, or NO_VALUE:
+     * - All the fields must be 0.
+     * If the lifetime is CONSTANT_COPY:
+     * - location.poolIndex is 0.
+     * - location.offset is the offset in bytes into Model.operandValues.
+     * - location.length is set.
+     * If the lifetime is CONSTANT_POOL:
+     * - location.poolIndex is set.
+     * - location.offset is the offset in bytes into the specified pool.
+     * - location.length is set.
+     * If the lifetime is SUBGRAPH:
+     * - location.poolIndex is 0.
+     * - location.offset is the index of the referenced subgraph in {@link Model::referenced}.
+     * - location.length is 0.
+     */
+    DataLocation location;
+    /**
+     * Additional parameters specific to a particular operand type.
+     */
+    @nullable OperandExtraParams extraParams;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/OperandExtraParams.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/OperandExtraParams.aidl
new file mode 100644
index 0000000000..b0112aea0c
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/OperandExtraParams.aidl
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.SymmPerChannelQuantParams;
+
+/**
+ * Parameters specific to a particular operand type.
+ */
+@VintfStability
+union OperandExtraParams {
+    /**
+     * Symmetric per-channel quantization parameters.
+     *
+     * Only applicable to operands of type TENSOR_QUANT8_SYMM_PER_CHANNEL.
+     */
+    SymmPerChannelQuantParams channelQuant;
+    /**
+     * Extension operand parameters.
+     *
+     * The framework treats this as an opaque data blob.
+     * The format is up to individual extensions.
+     */
+    byte[] extension;
+}
\ No newline at end of file
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/OperandLifeTime.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/OperandLifeTime.aidl
new file mode 100644
index 0000000000..63d1971717
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/OperandLifeTime.aidl
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * How an operand is used.
+ */
+@VintfStability
+@Backing(type="int")
+enum OperandLifeTime {
+    /**
+     * The operand is internal to the model. It's created by an operation and consumed by other
+     * operations. It must be an output operand of exactly one operation.
+     */
+    TEMPORARY_VARIABLE,
+    /**
+     * The operand is an input of a subgraph. It must not be an output operand of any operation.
+     *
+     * An operand can't be both input and output of a subgraph.
+     */
+    SUBGRAPH_INPUT,
+    /**
+     * The operand is an output of a subgraph. It must be an output operand of exactly one
+     * operation.
+     *
+     * An operand can't be both input and output of a subgraph.
+     */
+    SUBGRAPH_OUTPUT,
+    /**
+     * The operand is a constant found in Model.operandValues. It must not be an output operand of
+     * any operation.
+     */
+    CONSTANT_COPY,
+    /**
+     * The operand is a constant that was specified via a Memory object. It must not be an output
+     * operand of any operation.
+     */
+    CONSTANT_POOL,
+    /**
+     * The operand does not have a value. This is valid only for optional arguments of operations.
+     */
+    NO_VALUE,
+    /**
+     * The operand is a reference to a subgraph. It must be an input to one or more
+     * {@link OperationType::IF} or {@link OperationType::WHILE} operations.
+     */
+    SUBGRAPH,
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/OperandPerformance.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/OperandPerformance.aidl
new file mode 100644
index 0000000000..9a8c2cca23
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/OperandPerformance.aidl
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.OperandType;
+import android.hardware.neuralnetworks.PerformanceInfo;
+
+/**
+ * Driver performance when operating on a particular data type. In the case of float32 data, this is
+ * used when the calculations are not relaxed.
+ */
+@VintfStability
+parcelable OperandPerformance {
+    OperandType type;
+    PerformanceInfo info;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/OperandType.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/OperandType.aidl
new file mode 100644
index 0000000000..9274b6f97e
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/OperandType.aidl
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * Operand types.
+ *
+ * The type of an operand in a model.
+ *
+ * Types prefaced with TENSOR_* must be used for tensor data (i.e., tensors
+ * with at least one dimension). Types not prefaced by TENSOR_* represent
+ * scalar values and must have no dimensions.
+ */
+@VintfStability
+@Backing(type="int")
+enum OperandType {
+    /**
+     * A 32 bit floating point scalar value.
+     */
+    FLOAT32 = 0,
+    /**
+     * A signed 32 bit integer scalar value.
+     */
+    INT32 = 1,
+    /**
+     * An unsigned 32 bit integer scalar value.
+     */
+    UINT32 = 2,
+    /**
+     * A tensor of 32 bit floating point values.
+     */
+    TENSOR_FLOAT32 = 3,
+    /**
+     * A tensor of 32 bit integer values.
+     */
+    TENSOR_INT32 = 4,
+    /**
+     * A tensor of 8 bit unsigned integers that represent real numbers.
+     *
+     * Attached to this tensor are two numbers that can be used to convert the 8 bit integer to the
+     * real value and vice versa. These two numbers are:
+     * - scale: a 32 bit floating point value greater than zero.
+     * - zeroPoint: a 32 bit integer, in range [0, 255].
+     *
+     * The formula is:
+     *   real_value = (integer_value - zeroPoint) * scale.
+     */
+    TENSOR_QUANT8_ASYMM = 5,
+    /**
+     * An 8 bit boolean scalar value.
+     *
+     * Values of this operand type are either true or false. A zero value represents false; any
+     * other value represents true.
+     */
+    BOOL = 6,
+    /**
+     * A tensor of 16 bit signed integers that represent real numbers.
+     *
+     * Attached to this tensor is a number representing real value scale that is used to convert the
+     * 16 bit number to a real value in the following way:
+     * realValue = integerValue * scale.
+     *
+     * scale is a 32 bit floating point with value greater than zero.
+     */
+    TENSOR_QUANT16_SYMM = 7,
+    /**
+     * A tensor of IEEE 754 16 bit floating point values.
+     */
+    TENSOR_FLOAT16 = 8,
+    /**
+     * A tensor of 8 bit boolean values.
+     *
+     * Values of this operand type are either true or false. A zero value represents false; any
+     * other value represents true.
+     */
+    TENSOR_BOOL8 = 9,
+    /**
+     * An IEEE 754 16 bit floating point scalar value.
+     */
+    FLOAT16 = 10,
+    /**
+     * A tensor of 8 bit signed integers that represent real numbers.
+     *
+     * This tensor is associated with additional fields that can be used to convert the 8 bit signed
+     * integer to the real value and vice versa. These fields are:
+     * - channelDim: a 32 bit unsigned integer indicating channel dimension.
+     * - scales: an array of positive 32 bit floating point values.
+     * The size of the scales array must be equal to dimensions[channelDim].
+     *
+     * {@link SymmPerChannelQuantParams} must hold the parameters for an Operand of this type.
+     * The channel dimension of this tensor must not be unknown (dimensions[channelDim] != 0).
+     *
+     * The formula is:
+     * realValue[..., C, ...] =
+     *     integerValue[..., C, ...] * scales[C]
+     * where C is an index in the Channel dimension.
+     */
+    TENSOR_QUANT8_SYMM_PER_CHANNEL = 11,
+    /**
+     * A tensor of 16 bit unsigned integers that represent real numbers.
+     *
+     * Attached to this tensor are two numbers that can be used to convert the 16 bit integer to the
+     * real value and vice versa. These two numbers are:
+     * - scale: a 32 bit floating point value greater than zero.
+     * - zeroPoint: a 32 bit integer, in range [0, 65535].
+     *
+     * The formula is:
+     * real_value = (integer_value - zeroPoint) * scale.
+     */
+    TENSOR_QUANT16_ASYMM = 12,
+    /**
+     * A tensor of 8 bit signed integers that represent real numbers.
+     *
+     * Attached to this tensor is a number representing real value scale that is used to convert the
+     * 8 bit number to a real value in the following way:
+     * realValue = integerValue * scale.
+     *
+     * scale is a 32 bit floating point with value greater than zero.
+     */
+    TENSOR_QUANT8_SYMM = 13,
+    /**
+     * A tensor of 8 bit signed integers that represent real numbers.
+     *
+     * Attached to this tensor are two numbers that can be used to convert the 8 bit integer to the
+     * real value and vice versa. These two numbers are:
+     * - scale: a 32 bit floating point value greater than zero.
+     * - zeroPoint: a 32 bit integer, in range [-128, 127].
+     *
+     * The formula is:
+     * real_value = (integer_value - zeroPoint) * scale.
+     */
+    TENSOR_QUANT8_ASYMM_SIGNED = 14,
+    /**
+     * A reference to a subgraph.
+     *
+     * Must have the lifetime {@link OperandLifeTime::SUBGRAPH}.
+     */
+    SUBGRAPH = 15,
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/Operation.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/Operation.aidl
new file mode 100644
index 0000000000..acfb4b779f
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/Operation.aidl
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.OperationType;
+
+/**
+ * Describes one operation of the model's graph.
+ */
+@VintfStability
+parcelable Operation {
+    /**
+     * The operation type.
+     *
+     * Besides the values listed in {@link OperationType}, any value above
+     * {@link IDevice::OPERATION_TYPE_BASE_MAX} is possible and should be interpreted as an
+     * extension type according to {@link Model::extensionNameToPrefix}.
+     */
+    OperationType type;
+    /**
+     * Describes the table that contains the indexes of the inputs of the operation. The offset is
+     * the index in the operandIndexes table.
+     */
+    int[] inputs;
+    /**
+     * Describes the table that contains the indexes of the outputs of the operation. The offset is
+     * the index in the operandIndexes table.
+     */
+    int[] outputs;
+}
+
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/OperationType.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/OperationType.aidl
new file mode 100644
index 0000000000..fd9da67bce
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/OperationType.aidl
@@ -0,0 +1,5132 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * Operation types.
+ *
+ * The type of an operation in a model.
+ */
+@VintfStability
+@Backing(type="int")
+enum OperationType {
+    /**
+     * Adds two tensors, element-wise.
+     *
+     * Takes two input tensors of identical {@link OperandType} and compatible
+     * dimensions. The output is the sum of both input tensors, optionally
+     * modified by an activation function.
+     *
+     * Two dimensions are compatible when:
+     *     1. they are equal, or
+     *     2. one of them is 1
+     *
+     * The size of the output is the maximum size along each dimension of the
+     * input operands. It starts with the trailing dimensions, and works its
+     * way forward.
+     *
+     * Example:
+     *
+     *     input1.dimension = {4, 1, 2}
+     *     input2.dimension = {5, 4, 3, 1}
+     *     output.dimension = {5, 4, 3, 2}
+     *
+     * Since HAL version 1.2, generic zero-sized input tensor is supported. Zero
+     * dimension is only compatible with 0 or 1. The size of the output
+     * dimension is zero if either of corresponding input dimension is zero.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     * * {@link OperandType::TENSOR_INT32} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: A tensor.
+     * * 1: A tensor of the same {@link OperandType}, and compatible dimensions
+     *      as input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scales and zeroPoint can be different from input0 scale and zeroPoint.
+     * * 2: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     *      For a {@link OperandType::TENSOR_INT32} tensor,
+     *      the {@link FusedActivationFunc} must be "NONE".
+     *
+     * Outputs:
+     * * 0: The sum, a tensor of the same {@link OperandType} as input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint can be different from inputs' scale and zeroPoint.
+     */
+    ADD = 0,
+    /**
+     * Performs a 2-D average pooling operation.
+     *
+     * The output dimensions are functions of the filter dimensions, stride, and
+     * padding.
+     *
+     * The values in the output tensor are computed as:
+     *
+     *     output[b, i, j, channel] =
+     *         sum_{di, dj}(
+     *             input[b, strides[1] * i + di, strides[2] * j + dj, channel]
+     *         ) / sum(1)
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since HAL version 1.2.
+     *
+     * Both explicit padding and implicit padding are supported.
+     *
+     * Inputs (explicit padding):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
+     *      the input.
+     *      Since HAL version 1.2, zero batches is supported for this tensor.
+     * * 1: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the left, in the ‘width’ dimension.
+     * * 2: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the right, in the ‘width’ dimension.
+     * * 3: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the top, in the ‘height’ dimension.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the bottom, in the ‘height’ dimension.
+     * * 5: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘width’ dimension.
+     * * 6: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘height’ dimension.
+     * * 7: An {@link OperandType::INT32} scalar, specifying the filter
+     *      width.
+     * * 8: An {@link OperandType::INT32} scalar, specifying the filter
+     *      height.
+     * * 9: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     * * 10: An optional {@link OperandType::BOOL} scalar, default to false.
+     *       Set to true to specify NCHW data layout for input0 and output0.
+     *       Available since HAL version 1.2.
+     *
+     * Inputs (implicit padding):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
+     *      the input.
+     *      Since HAL version 1.2, zero batches is supported for this tensor.
+     * * 1: An {@link OperandType::INT32} scalar, specifying the implicit
+     *      padding scheme, has to be one of the
+     *      following values: {0 (NONE), 1 (SAME), 2 (VALID)}.
+     * * 2: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘width’ dimension.
+     * * 3: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘height’ dimension.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the filter
+     *      width.
+     * * 5: An {@link OperandType::INT32} scalar, specifying the filter
+     *      height.
+     * * 6: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     * * 7: An optional {@link OperandType::BOOL} scalar, default to false.
+     *      Set to true to specify NCHW data layout for input0 and output0.
+     *      Available since HAL version 1.2.
+     *
+     * Outputs:
+     * * 0: The output 4-D tensor, of shape
+     *      [batches, out_height, out_width, depth].
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    AVERAGE_POOL_2D = 1,
+    /**
+     * Concatenates the input tensors along the given dimension.
+     *
+     * The input tensors must have identical {@link OperandType} and the same
+     * dimensions except the dimension along the concatenation axis.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *   (full support since HAL version 1.2, see the input section)
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0 ~ n-1: The list of n input tensors, of shape
+     *            [D0, D1, ..., Daxis(i), ..., Dm].
+     *            Before HAL version 1.2, all input tensors of
+     *            {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *            must have the same scale and zeroPoint as the output tensor.
+     *            Input tensors of
+     *            {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}
+     *            are allowed to have different scale and zeroPoint.
+     *            Since HAL version 1.2, zero-sized tensors are supported.
+     * * n: An {@link OperandType::INT32} scalar, specifying the
+     *      concatenation axis.
+     *
+     * Outputs:
+     * * 0: The output, a tensor of the same {@link OperandType} as the input
+     *      tensors. The output shape is [D0, D1, ..., sum(Daxis(i)), ..., Dm].
+     *      Since HAL version 1.2, for a {@link OperandType::TENSOR_QUANT8_ASYMM} tensor,
+     *      the scale and zeroPoint values can be different from
+     *      input tensors. Before HAL version 1.2 they have to be the same as for the input tensors.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint values can be different from input tensors.
+     */
+    CONCATENATION = 2,
+    /**
+     * Performs a 2-D convolution operation.
+     *
+     * The CONV_2D op sweeps a 2-D filter that can mix channels together over a
+     * batch of images, applying the filter to each window of each image of the
+     * appropriate size.
+     *
+     * The output dimensions are functions of the filter dimensions, stride, and
+     * padding.
+     *
+     * The values in the output tensor are computed as:
+     *
+     *     output[b, i, j, channel] =
+     *         sum_{di, dj, k} (
+     *             input[b, strides[1] * i + di, strides[2] * j + dj, k] *
+     *             filter[channel, di, dj, k]
+     *         ) + bias[channel]
+     *
+     * Supported tensor {@link OperandType} configurations:
+     * * 32 bit floating point:
+     * * * {@link OperandType::TENSOR_FLOAT32} for input, filter, output, and bias.
+     *
+     * * Quantized:
+     * * * {@link OperandType::TENSOR_QUANT8_ASYMM} for input, filter, and output.
+     * * * {@link OperandType::TENSOR_INT32} for bias (with scale set to
+     * * * input.scale * filter.scale).
+     *
+     * Available since HAL version 1.2:
+     * * 16 bit floating point:
+     * * * {@link OperandType::TENSOR_FLOAT16} for input, filter, output, and bias.
+     *
+     * * Quantized with symmetric per channel quantization for the filter:
+     * * * {@link OperandType::TENSOR_QUANT8_ASYMM} for input, and output.
+     * * * {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+     * * * {@link OperandType::TENSOR_INT32} for bias (scale set to 0.0,
+     * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
+     *
+     * Available since HAL version 1.3:
+     * * Quantized signed (since HAL version 1.3):
+     * * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} for input, filter, and output.
+     * * * {@link OperandType::TENSOR_INT32} for bias (with scale set to
+     * * * input.scale * filter.scale).
+     *
+     * * Quantized signed with filter symmetric per channel quantization (since HAL version 1.3):
+     * * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} for input, and output.
+     * * * {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+     * * * {@link OperandType::TENSOR_INT32} for bias (scale set to 0.0,
+     * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since HAL version 1.2.
+     *
+     * Both explicit padding and implicit padding are supported.
+     *
+     * Inputs (explicit padding):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
+     *      specifying the input.
+     *      Since HAL version 1.2, zero batches is supported for this tensor.
+     * * 1: A 4-D tensor, of shape
+     *      [depth_out, filter_height, filter_width, depth_in], specifying the
+     *      filter.
+     *      For tensor of type {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL}
+     *      the channel dimension (SymmPerChannelQuantParams::channelDim)
+     *      must be set to 0.
+     * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
+     *      tensor of type {@link OperandType::TENSOR_FLOAT32}
+     *      or {@link OperandType::TENSOR_FLOAT16} the bias must be of the same type.
+     *      For filter tensor of {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the bias should be of {@link OperandType::TENSOR_INT32}, with zeroPoint
+     *      of 0 and bias_scale == input_scale * filter_scale.
+     *      For filter tensor of {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL},
+     *      the bias should be of {@link OperandType::TENSOR_INT32}, with zeroPoint of 0
+     *      and bias_scale of 0. The actual scale of each value 'i' is equal to
+     *      bias_scale[i] = input_scale * filter_scale[i].
+     * * 3: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the left, in the ‘width’ dimension.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the right, in the ‘width’ dimension.
+     * * 5: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the top, in the ‘height’ dimension.
+     * * 6: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the bottom, in the ‘height’ dimension.
+     * * 7: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘width’ dimension.
+     * * 8: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘height’ dimension.
+     * * 9: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     * * 10: An optional {@link OperandType::BOOL} scalar, default to false.
+     *      Set to true to specify NCHW data layout for input0 and output0.
+     *      Available since HAL version 1.2.
+     * * 11: An optional {@link OperandType::INT32} scalar, specifying the dilation
+     *      factor for width. Defaults to 1. If set to k > 1, there will be k-1 skipped
+     *      cells between each filter element on width dimension. If this input is set,
+     *      input 12 (dilation factor for height) must be specified as well.
+     *      Available since HAL version 1.2.
+     * * 12: An optional {@link OperandType::INT32} scalar, specifying the dilation
+     *      factor for height. Defaults to 1. If set to k > 1, there will be k-1 skipped
+     *      cells between each filter element on height dimension. If this input is set,
+     *      input 11 (dilation factor for width) must be specified as well.
+     *      Available since HAL version 1.2.
+     *
+     * Inputs (implicit padding):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
+     *      specifying the input.
+     *      Since HAL version 1.2, zero batches is supported for this tensor.
+     * * 1: A 4-D tensor, of shape
+     *      [depth_out, filter_height, filter_width, depth_in], specifying the
+     *      filter.
+     *      For tensor of type {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL}
+     *      the channel dimension (SymmPerChannelQuantParams::channelDim)
+     *      must be set to 0.
+     * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
+     *      tensor of type {@link OperandType::TENSOR_FLOAT32}
+     *      or {@link OperandType::TENSOR_FLOAT16} the bias must be of the same
+     *      type.
+     *      For filter tensor of {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the bias should be of {@link OperandType::TENSOR_INT32}, with zeroPoint
+     *      of 0 and bias_scale == input_scale * filter_scale.
+     *      For filter tensor of {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL},
+     *      the bias should be of {@link OperandType::TENSOR_INT32}, with zeroPoint of 0
+     *      and bias_scale of 0. The actual scale of each value 'i' is equal to
+     *      bias_scale[i] = input_scale * filter_scale[i].
+     * * 3: An {@link OperandType::INT32} scalar, specifying the implicit
+     *      padding scheme, has to be one of the
+     *      following values: {0 (NONE), 1 (SAME), 2 (VALID)}.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘width’ dimension.
+     * * 5: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘height’ dimension.
+     * * 6: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     * * 7: An optional {@link OperandType::BOOL} scalar, default to false.
+     *      Set to true to specify NCHW data layout for input0 and output0.
+     *      Available since HAL version 1.2.
+     * * 8: An optional {@link OperandType::INT32} scalar, specifying the dilation
+     *      factor for width. Defaults to 1. If set to k > 1, there will be k-1 skipped
+     *      cells between each filter element on width dimension. If this input is set,
+     *      input 9 (dilation factor for height) must be specified as well.
+     *      Available since HAL version 1.2.
+     * * 9: An optional {@link OperandType::INT32} scalar, specifying the dilation
+     *      factor for height. Defaults to 1. If set to k > 1, there will be k-1 skipped
+     *      cells between each filter element on height dimension. If this input is set,
+     *      input 8 (dilation factor for width) must be specified as well.
+     *      Available since HAL version 1.2.
+     *
+     * Outputs:
+     * * 0: The output 4-D tensor, of shape
+     *      [batches, out_height, out_width, depth_out].
+     *      Before HAL version 1.2, for output tensor of {@link OperandType::TENSOR_QUANT8_ASYMM},
+     *      the following condition must be satisfied: output_scale > input_scale * filter_scale
+     */
+    CONV_2D = 3,
+    /**
+     * Performs a depthwise 2-D convolution operation.
+     *
+     * Given an input tensor of shape [batches, height, width, depth_in] and a
+     * filter tensor of shape [1, filter_height, filter_width, depth_out]
+     * containing depth_out convolutional filters of depth 1, DEPTHWISE_CONV
+     * applies a different filter to each input channel (expanding from 1
+     * channel to channel_multiplier channels for each), then concatenates the
+     * results together.
+     *
+     * The output has depth_out = depth_in * depth_multiplier channels.
+     * The output dimensions are functions of the filter dimensions, stride, and
+     * padding.
+     *
+     * The values in the output tensor are computed as:
+     *
+     *     output[b, i, j, k * channel_multiplier + q] =
+     *         sum_{di, dj} (
+     *             input[b, strides[1] * i + di, strides[2] * j + dj, k] *
+     *             filter[1, di, dj, k * channel_multiplier + q]
+     *         ) + bias[k * channel_multiplier + q]
+     *
+     * Supported tensor {@link OperandType} configurations:
+     * * 32 bit floating point:
+     * * * {@link OperandType::TENSOR_FLOAT32} for input, filter, output, and bias.
+     *
+     * * Quantized:
+     * * * {@link OperandType::TENSOR_QUANT8_ASYMM} for input, filter, and output.
+     * * * {@link OperandType::TENSOR_INT32} for bias (with scale set to
+     * * * input.scale * filter.scale).
+     *
+     * Available since HAL version 1.2:
+     * * 16 bit floating point:
+     * * * {@link OperandType::TENSOR_FLOAT16} for input, filter, output, and bias.
+     *
+     * * Quantized with symmetric per channel quantization for the filter:
+     * * * {@link OperandType::TENSOR_QUANT8_ASYMM} for input, and output.
+     * * * {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+     * * * {@link OperandType::TENSOR_INT32} for bias (scale set to 0.0,
+     * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
+     *
+     * Available since HAL version 1.3:
+     * * Quantized signed (since HAL version 1.3):
+     * * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} for input, filter, and output.
+     * * * {@link OperandType::TENSOR_INT32} for bias (with scale set to
+     * * * input.scale * filter.scale).
+     *
+     * * Quantized signed with filter symmetric per channel quantization (since HAL version 1.3):
+     * * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} for input, and output.
+     * * * {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+     * * * {@link OperandType::TENSOR_INT32} for bias (scale set to 0.0,
+     * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since HAL version 1.2.
+     *
+     * Both explicit padding and implicit padding are supported.
+     *
+     * Inputs (explicit padding):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
+     *      specifying the input.
+     * * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out],
+     *      specifying the filter.
+     *      For tensor of type {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL}
+     *      the channel dimension (SymmPerChannelQuantParams::channelDim)
+     *      must be set to 3.
+     * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
+     *      tensor of type {@link OperandType::TENSOR_FLOAT32}
+     *      or {@link OperandType::TENSOR_FLOAT16} the bias must be of the same type.
+     *      For filter tensor of {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the bias should be of {@link OperandType::TENSOR_INT32}, with zeroPoint
+     *      of 0 and bias_scale == input_scale * filter_scale.
+     *      For filter tensor of {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL},
+     *      the bias should be of {@link OperandType::TENSOR_INT32}, with zeroPoint of 0
+     *      and bias_scale of 0. The actual scale of each value 'i' is equal to
+     *      bias_scale[i] = input_scale * filter_scale[i].
+     * * 3: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the left, in the ‘width’ dimension.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the right, in the ‘width’ dimension.
+     * * 5: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the top, in the ‘height’ dimension.
+     * * 6: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the bottom, in the ‘height’ dimension.
+     * * 7: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘width’ dimension.
+     * * 8: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘height’ dimension.
+     * * 9: An {@link OperandType::INT32} scalar, specifying the depthwise
+     *      multiplier.
+     * * 10: An {@link OperandType::INT32} scalar, and has to be one of the
+     *       {@link FusedActivationFunc} values. Specifies the activation to
+     *       invoke on the result.
+     * * 11: An optional {@link OperandType::BOOL} scalar, default to false.
+     *       Set to true to specify NCHW data layout for input0 and output0.
+     *       Available since HAL version 1.2.
+     * * 12: An optional {@link OperandType::INT32} scalar, specifying the dilation
+     *      factor for width. Defaults to 1. If set to k > 1, there will be k-1 skipped
+     *      cells between each filter element on width dimension. If this input is set,
+     *      input 13 (dilation factor for height) must be specified as well.
+     *      Available since HAL version 1.2.
+     * * 13: An optional {@link OperandType::INT32} scalar, specifying the dilation
+     *      factor for height. Defaults to 1. If set to k > 1, there will be k-1 skipped
+     *      cells between each filter element on height dimension. If this input is set,
+     *      input 12 (dilation factor for width) must be specified as well.
+     *      Available since HAL version 1.2.
+     *
+     * Inputs (implicit padding):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
+     *      specifying the input.
+     * * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out],
+     *      specifying the filter.
+     * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
+     *      tensor of type {@link OperandType::TENSOR_FLOAT32}
+     *      or {@link OperandType::TENSOR_FLOAT16} the bias must be of the same type.
+     *      For filter tensor of {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the bias should be of {@link OperandType::TENSOR_INT32}, with zeroPoint
+     *      of 0 and bias_scale == input_scale * filter_scale.
+     *      For filter tensor of {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL},
+     *      the bias should be of {@link OperandType::TENSOR_INT32}, with zeroPoint of 0
+     *      and bias_scale of 0. The actual scale of each value 'i' is equal to
+     *      bias_scale[i] = input_scale * filter_scale[i].
+     * * 3: An {@link OperandType::INT32} scalar, specifying the implicit
+     *      padding scheme, has to be one of the
+     *      following values: {0 (NONE), 1 (SAME), 2 (VALID)}.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘width’ dimension.
+     * * 5: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘height’ dimension.
+     * * 6: An {@link OperandType::INT32} scalar, specifying the depthwise
+     *      multiplier.
+     * * 7: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     * * 8: An optional {@link OperandType::BOOL} scalar, default to false.
+     *      Set to true to specify NCHW data layout for input0 and output0.
+     *      Available since HAL version 1.2.
+     * * 9: An optional {@link OperandType::INT32} scalar, specifying the dilation
+     *      factor for width. Defaults to 1. If set to k > 1, there will be k-1 skipped
+     *      cells between each filter element on width dimension. If this input is set,
+     *      input 10 (dilation factor for height) must be specified as well.
+     *      Available since HAL version 1.2.
+     * * 10: An optional {@link OperandType::INT32} scalar, specifying the dilation
+     *      factor for height. Defaults to 1. If set to k > 1, there will be k-1 skipped
+     *      cells between each filter element on height dimension. If this input is set,
+     *      input 9 (dilation factor for width) must be specified as well.
+     *      Available since HAL version 1.2.
+     *
+     * Outputs:
+     * * 0: The output 4-D tensor, of shape
+     *      [batches, out_height, out_width, depth_out]. Before HAL version 1.2, for
+     *      output tensor of {@link OperandType::TENSOR_QUANT8_ASYMM},
+     *      the following condition must be satisfied:
+     *      output_scale > input_scale * filter_scale
+     */
+    DEPTHWISE_CONV_2D = 4,
+    /**
+     * Rearranges data from depth into blocks of spatial data.
+     *
+     * More specifically, this op outputs a copy of the input tensor where
+     * values from the depth dimension are moved in spatial blocks to the height
+     * and width dimensions. The value block_size indicates the input block size
+     * and how the data is moved.
+     *
+     * Chunks of data of size block_size * block_size from depth are rearranged
+     * into non-overlapping blocks of size block_size x block_size.
+     *
+     * The width of the output tensor is input_depth * block_size, whereas the
+     * height is input_height * block_size. The depth of the input tensor must
+     * be divisible by block_size * block_size
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since HAL version 1.2.
+     *
+     * Inputs:
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
+     *      specifying the input.
+     * * 1: An {@link OperandType::INT32} scalar, specifying the block_size.
+     *      block_size must be >=1 and block_size * block_size must be a divisor
+     *      of the input depth.
+     * * 2: An optional {@link OperandType::BOOL} scalar, default to false.
+     *      Set to true to specify NCHW data layout for input0 and output0.
+     *      Available since HAL version 1.2.
+     *
+     * Outputs:
+     * * 0: The output 4-D tensor, of shape [batch, height*block_size,
+     *      width*block_size, depth/(block_size*block_size)].
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    DEPTH_TO_SPACE = 5,
+    /**
+     * Dequantizes the input tensor.
+     *
+     * The formula is:
+     *
+     *     output = (input - zeroPoint) * scale.
+     *
+     * Supported input tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_SYMM} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported output tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}.
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: A tensor.
+     *      Since HAL version 1.2, this tensor may be zero-sized.
+     *
+     * Outputs:
+     * * 0: A tensor with the same shape as input0.
+     */
+    DEQUANTIZE = 6,
+    /**
+     * Looks up sub-tensors in the input tensor.
+     *
+     * This operator takes for input a tensor of values (Values) and
+     * a one-dimensional tensor of selection indices (Lookups).
+     * The output tensor is the concatenation of sub-tensors of Values as
+     * selected by Lookups.
+     *
+     * Think of Values as being sliced along its first dimension:
+     * The entries in Lookups select which slices are concatenated together
+     * to create the output tensor.
+     *
+     * For example, if Values has shape of [40, 200, 300] and
+     * Lookups has shape of [3], all three values found in Lookups are
+     * expected to be between 0 and 39. The resulting tensor must
+     * have shape of [3, 200, 300].
+     *
+     * If a value in Lookups is out of bounds, the operation must fail
+     * and an error must be reported.
+     *
+     * Supported value tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.3)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported value tensor rank: from 2
+     *
+     * Inputs:
+     * * 0: Lookups. A 1-D tensor of {@link OperandType::TENSOR_INT32}.
+     *      The values are indices into the first dimension of Values.
+     * * 1: Values. An n-D tensor, where n >= 2, from which sub-tensors are
+     *      extracted.
+     *
+     * Output:
+     * * 0: A n-D tensor with the same rank and shape as the Values
+     *      tensor, except for the first dimension which has the same size
+     *      as Lookups' only dimension.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input1.
+     */
+    EMBEDDING_LOOKUP = 7,
+    /**
+     * Computes element-wise floor() on the input tensor.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: A tensor.
+     *
+     * Outputs:
+     * * 0: The output tensor, of the same {@link OperandType} and dimensions as
+     *      the input tensor.
+     */
+    FLOOR = 8,
+    /**
+     * Denotes a fully (densely) connected layer, which connects all elements
+     * in the input tensor with each element in the output tensor.
+     *
+     * This layer implements the operation:
+     *
+     *     outputs = activation(inputs * weights’ + bias)
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4.
+     *
+     * Inputs:
+     * * 0: A tensor of at least rank 2, specifying the input. If rank is
+     *      greater than 2, then it gets flattened to a 2-D Tensor. The
+     *      (flattened) 2-D Tensor is reshaped (if necessary) to
+     *      [batch_size, input_size], where "input_size" corresponds to the
+     *      number of inputs to the layer, matching the second dimension of
+     *      weights, and "batch_size" is calculated by dividing the number of
+     *      elements by "input_size".
+     *      Since HAL version 1.2, zero batch_size is supported for this tensor.
+     * * 1: A 2-D tensor, specifying the weights, of shape
+     *      [num_units, input_size], where "num_units" corresponds to the number
+     *      of output nodes.
+     * * 2: A 1-D tensor, of shape [num_units], specifying the bias. For input
+     *      tensor of {@link OperandType::TENSOR_FLOAT32}, the bias should
+     *      also be of {@link OperandType::TENSOR_FLOAT32}.
+     *      For input tensor of {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the bias should be of {@link OperandType::TENSOR_INT32},
+     *      with zeroPoint of 0 and bias_scale == input_scale * filter_scale.
+     * * 3: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     *
+     * Outputs:
+     * * 0: The output tensor, of shape [batch_size, num_units]. Before HAL version 1.2, for
+     *      output tensor of {@link OperandType::TENSOR_QUANT8_ASYMM}, the following
+     *      condition must be satisfied: output_scale > input_scale * filter_scale.
+     */
+    FULLY_CONNECTED = 9,
+    /**
+     * Looks up sub-tensors in the input tensor using a key-value map.
+     *
+     * This operator takes for input a tensor of values (Values),
+     * a one-dimensional tensor of selection values (Lookups) and
+     * a one-dimensional tensor that maps these values to Values
+     * indexes. The output tensor is the concatenation of sub-tensors of
+     * Values as selected by Lookups via Keys.
+     *
+     * Think of Values as being sliced along its outer-most dimension.
+     * The output is a concatenation of selected slices, with one slice
+     * for each entry of Lookups. The slice selected is the one at the
+     * same index as the Maps entry that matches the value in Lookups.
+     *
+     * For a hit, the corresponding sub-tensor of Values is included
+     * in the Output tensor. For a miss, the corresponding sub-tensor in
+     * Output must have zero values.
+     *
+     * For example, if Values has shape of [40, 200, 300],
+     * Keys should have a shape of [40]. If Lookups tensor has shape
+     * of [3], three slices are being concatenated, so the resulting tensor
+     * must have the shape of [3, 200, 300]. If the first entry in Lookups
+     * has the value 123456, that value must be located in Keys tensor.
+     * If the sixth entry of Keys contains 123456, the sixth slice of Values
+     * must be selected. If no entry in Keys has 123456, a slice of zeroes
+     * must be concatenated.
+     *
+     * Supported value tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *
+     * Supported value tensor rank: from 2
+     *
+     * Inputs:
+     * * 0: Lookups. A 1-D {@link OperandType::TENSOR_INT32} tensor with
+     *      shape [ k ].
+     * * 1: Keys. A 1-D {@link OperandType::TENSOR_INT32} tensor with shape
+     *      [ n ]; Keys and Values pair represent a map, i.e., the ith element
+     *      in Keys (Keys[i]) is the key to select the ith sub-tensor in Values
+     *      (Values[i]), where 0 <= i <= n-1. Keys tensor *MUST* be sorted in
+     *      ascending order.
+     * * 2: Values. A tensor with shape of [ n, … ]; i.e., the first dimension
+     *      must be n.
+     *
+     * Outputs:
+     * * 0: Output. A tensor with shape [ k …].
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} tensor,
+     *      the scale and zeroPoint must be the same as input2.
+     * * 1: Hits. A boolean tensor with shape [ k ] indicates whether the lookup
+     *      hits (True) or not (False).
+     *      Stored as {@link OperandType::TENSOR_QUANT8_ASYMM} with offset 0
+     *      and scale 1.0f.
+     *      A non-zero byte represents True, a hit. A zero indicates otherwise.
+     */
+    HASHTABLE_LOOKUP = 10,
+    /**
+     * Applies L2 normalization along the axis dimension.
+     *
+     * The values in the output tensor are computed as:
+     *
+     *     output[batch, row, col, channel] =
+     *         input[batch, row, col, channel] /
+     *         sqrt(sum_{c} pow(input[batch, row, col, c], 2))
+     *
+     * By default the axis dimension is the last dimension of the input tensor.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4
+     * Tensors with rank less than 4 are only supported since HAL version 1.2.
+     *
+     * Inputs:
+     * * 0: An n-D tensor, specifying the tensor to be normalized.
+     * * 1: An optional {@link OperandType::INT32} scalar, default to -1,
+     *      specifying the dimension normalization would be performed on.
+     *      Negative index is used to specify axis from the end (e.g. -1 for
+     *      the last axis). Must be in the range [-n, n).
+     *      Available since HAL version 1.2.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} and same shape as input0.
+     *      For {@link OperandType::TENSOR_QUANT8_ASYMM},
+     *      the scale must be 1.f / 128 and the zeroPoint must be 128.
+     *      For {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the scale must be 1.f / 128 and the zeroPoint must be 0.
+     *
+     *      NOTE: Before HAL version 1.3, if the elements along an axis are all zeros,
+     *      the result is undefined. Since HAL version 1.3, if the elements along an axis
+     *      are all zeros, the result is logical zero.
+     */
+    L2_NORMALIZATION = 11,
+    /**
+     * Performs an 2-D L2 pooling operation.
+     *
+     * The output dimensions are functions of the filter dimensions, stride, and
+     * padding.
+     *
+     * The values in the output tensor are computed as:
+     *
+     *     output[b, i, j, c] =
+     *         sqrt(sum_{di, dj} pow(input[b, strides[1] * i + di, strides[2] * j + dj, c], 2) /
+     *              sum(1))
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since HAL version 1.2.
+     *
+     * Both explicit padding and implicit padding are supported.
+     *
+     * Inputs (explicit padding):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
+     *      the input.
+     *      Since HAL version 1.2, zero batches is supported for this tensor.
+     * * 1: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the left, in the ‘width’ dimension.
+     * * 2: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the right, in the ‘width’ dimension.
+     * * 3: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the top, in the ‘height’ dimension.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the bottom, in the ‘height’ dimension.
+     * * 5: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘width’ dimension.
+     * * 6: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘height’ dimension.
+     * * 7: An {@link OperandType::INT32} scalar, specifying the filter
+     *      width.
+     * * 8: An {@link OperandType::INT32} scalar, specifying the filter
+     *      height.
+     * * 9: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     * * 10: An optional {@link OperandType::BOOL} scalar, default to false.
+     *       Set to true to specify NCHW data layout for input0 and output0.
+     *       Available since HAL version 1.2.
+     *
+     * Inputs (implicit padding):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
+     *      the input.
+     *      Since HAL version 1.2, zero batches is supported for this tensor.
+     * * 1: An {@link OperandType::INT32} scalar, specifying the implicit
+     *      padding scheme, has to be one of the
+     *      following values: {0 (NONE), 1 (SAME), 2 (VALID)}.
+     * * 2: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘width’ dimension.
+     * * 3: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘height’ dimension.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the filter
+     *      width.
+     * * 5: An {@link OperandType::INT32} scalar, specifying the filter
+     *      height.
+     * * 6: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     * * 7: An optional {@link OperandType::BOOL} scalar, default to false.
+     *      Set to true to specify NCHW data layout for input0 and output0.
+     *      Available since HAL version 1.2.
+     *
+     * Outputs:
+     * * 0: The output 4-D tensor, of shape
+     *      [batches, out_height, out_width, depth].
+     */
+    L2_POOL_2D = 12,
+    /**
+     * Applies Local Response Normalization along the depth dimension.
+     *
+     * The 4-D input tensor is treated as a 3-D array of 1-D vectors (along the
+     * last dimension), and each vector is normalized independently. Within a
+     * given vector, each component is divided by the weighted, squared sum of
+     * inputs within depth_radius.
+     *
+     * The output is calculated using this formula:
+     *
+     *     sqr_sum[a, b, c, d] = sum(
+     *         pow(input[a, b, c, d - depth_radius : d + depth_radius + 1], 2))
+     *     output = input / pow((bias + alpha * sqr_sum), beta)
+     *
+     * For input tensor with rank less than 4, independently normalizes each
+     * 1-D slice along specified dimension.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: up to 4
+     * Tensors with rank less than 4 are only supported since HAL version 1.2.
+     *
+     * Inputs:
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
+     *      the input.
+     * * 1: An {@link OperandType::INT32} scalar, specifying the radius of
+     *      the normalization window.
+     * * 2: A scalar, specifying the bias, must not be zero.
+     *      For input tensor of {@link OperandType::TENSOR_FLOAT16}, the bias
+     *      value must be of {@link OperandType::FLOAT16}.
+     *      For input tensor of {@link OperandType::TENSOR_FLOAT32}, the bias
+     *      value must be of {@link OperandType::FLOAT32}.
+     * * 3: A scalar, specifying the scale factor, alpha.
+     *      For input tensor of {@link OperandType::TENSOR_FLOAT16}, the
+     *      alpha value must be of {@link OperandType::FLOAT16}.
+     *      For input tensor of {@link OperandType::TENSOR_FLOAT32}, the
+     *      alpha value must be of {@link OperandType::FLOAT32}.
+     * * 4: A scalar, specifying the exponent, beta.
+     *      For input tensor of {@link OperandType::TENSOR_FLOAT16}, the beta
+     *      value must be of {@link OperandType::FLOAT16}.
+     *      For input tensor of {@link OperandType::TENSOR_FLOAT32}, the beta
+     *      value must be of {@link OperandType::FLOAT32}.
+     * * 5: An optional {@link OperandType::INT32} scalar, default to -1,
+     *      specifying the dimension normalization would be performed on.
+     *      Negative index is used to specify axis from the end (e.g. -1 for
+     *      the last axis). Must be in the range [-n, n).
+     *      Available since HAL version 1.2.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape as input0.
+     */
+    LOCAL_RESPONSE_NORMALIZATION = 13,
+    /**
+     * Computes sigmoid activation on the input tensor element-wise.
+     *
+     * The output is calculated using this formula:
+     *
+     *     output = 1 / (1 + exp(-input))
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4.
+     *
+     * Inputs:
+     * * 0: A tensor, specifying the input.
+     *      Since HAL version 1.2, this tensor may be zero-sized.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape as input0.
+     *      For {@link OperandType::TENSOR_QUANT8_ASYMM},
+     *      the scale must be 1.f / 256 and the zeroPoint must be 0.
+     *      For {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the scale must be 1.f / 256 and the zeroPoint must be -128.
+     */
+    LOGISTIC = 14,
+    /**
+     * Projects an input to a bit vector via locality senstive hashing.
+     *
+     * Supported input tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *
+     * Supported input tensor rank: from 1
+     *
+     * Inputs:
+     * * 0: Hash functions. Dim.size == 2, DataType: Float.
+     *      Tensor[0].Dim[0]: 15 of hash functions.
+     *      Tensor[0].Dim[1]: 16 of projected output bits generated by each
+     *      hash function.
+     *      If the projection type is Sparse:
+     *      Tensor[0].Dim[1] + ceil(log2(Tensor[0].Dim[0])) <= 32
+     *
+     * * 1: Input. Dim.size >= 1, no restriction on DataType.
+     * * 2: Weight. Optional. Dim.size == 1, DataType: Float.
+     *      If not set, each input element is considered to have the same weight
+     *      of 1.0.
+     *      Tensor[1].Dim[0] == Tensor[2].Dim[0]
+     * * 3: Type:
+     *        Sparse:
+     *          Value LSHProjectionType_SPARSE(=3) (since HAL version 1.2).
+     *          Computed bit vector is considered to be sparse.
+     *          Each output element is an int32 made up of multiple bits
+     *          computed from hash functions.
+     *
+     *          NOTE: To avoid collisions across hash functions, an offset value
+     *          of k * (1 << Tensor[0].Dim[1]) will be added to each signature,
+     *          where k is the index of the hash function.
+     *
+     *          Value LSHProjectionType_SPARSE_DEPRECATED(=1).
+     *          Legacy behavior that does not include the offset value.
+     *
+     *        Dense:
+     *          Value LSHProjectionType_DENSE(=2).
+     *          Computed bit vector is considered to be dense. Each output
+     *          element represents a bit and can take the value of either
+     *          0 or 1.
+     *
+     * Outputs:
+     * * 0: If the projection type is Sparse:
+     *      Output.Dim == { Tensor[0].Dim[0] }
+     *      A tensor of int32 that represents hash signatures.
+     *
+     *      If the projection type is Dense:
+     *      Output.Dim == { Tensor[0].Dim[0] * Tensor[0].Dim[1] }
+     *      A flattened tensor that represents projected bit vectors.
+     * The offset value for sparse projections was added in HAL version 1.2.
+     */
+    LSH_PROJECTION = 15,
+    /**
+     * Performs a single time step in a Long Short-Term Memory (LSTM) layer
+     *
+     * The LSTM operation is described by the following equations.
+     *
+     * \f{eqnarray*}{
+     * i_t =& \sigma(W_{xi}x_t+W_{hi}h_{t-1}+W_{ci}C_{t-1}+b_i) & \\
+     * f_t =& \sigma(W_{xf}x_t+W_{hf}h_{t-1}+W_{cf}C_{t-1}+b_f) & \\
+     * C_t =& clip(f_t \odot C_{t-1} + i_t \odot
+     *        g(W_{xc}x_t+W_{hc}h_{t-1}+b_c),\ t_{cell}) & \\
+     * o_t =& \sigma(W_{xo}x_t+W_{ho}h_{t-1}+W_{co}C_t+b_o) & \\
+     *      & & \\
+     *      & clip(W_{proj}(o_t \odot g(C_t))+b_{proj},\ t_{proj})
+     *      & if\ there\ is\ a\ projection; \\
+     * h_t =& & \\
+     *      & o_t \odot g(C_t) & otherwise. \\
+     * \f}
+     * Where:
+     * * \f$x_t\f$ is the input,
+     * * \f$i_t\f$ is the input gate,
+     * * \f$f_t\f$ is the forget gate,
+     * * \f$C_t\f$ is the cell state,
+     * * \f$o_t\f$ is the output,
+     * * \f$h_t\f$ is the output state,
+     * * \f$\sigma\f$ is the logistic sigmoid function,
+     * * \f$g\f$ is the cell input and cell output activation function, usually
+     *   \f$tahn\f$,
+     * * \f$W_{xi}\f$ is the input-to-input weight matrix,
+     * * \f$W_{hi}\f$ is the recurrent to input weight matrix,
+     * * \f$W_{ci}\f$ is the cell-to-input weight matrix,
+     * * \f$b_i\f$ is the input gate bias,
+     * * \f$W_{xf}\f$ is the input-to-forget weight matrix,
+     * * \f$W_{hf}\f$ is the recurrent-to-forget weight matrix,
+     * * \f$W_{cf}\f$ is the cell-to-forget weight matrix,
+     * * \f$b_f\f$ is the forget gate bias,
+     * * \f$W_{xc}\f$ is the input-to-cell weight matrix,
+     * * \f$W_{hc}\f$ is the recurrent-to-cell weight matrix,
+     * * \f$b_c\f$ is the cell bias,
+     * * \f$W_{xo}\f$ is the input-to-output weight matrix,
+     * * \f$W_{ho}\f$ is the recurrent-to-output weight matrix,
+     * * \f$W_{co}\f$ is the cell-to-output weight matrix,
+     * * \f$b_o\f$ is the output gate bias,
+     * * \f$W_{proj}\f$ is the projection weight matrix,
+     * * \f$b_{proj}\f$ is the projection bias,
+     * * \f$t_{cell}\f$ is the threshold for clipping the cell state, and
+     * * \f$t_{proj}\f$ is the threshold for clipping the projected output.
+     * * \f$\odot\f$ is the
+     *   <a href="https://en.wikipedia.org/wiki/Hadamard_product_(matrices)">
+     *   Hadamard product</a> that takes two matrices and produces another
+     *   matrix, each element of which is the product of the corresponding
+     *   elements of the input matrices.
+     *
+     * Since HAL version 1.2 LSTM supports layer normalization.
+     * In case layer normalization is used, the inputs to internal activation
+     * functions (sigmoid and \f$g\f$) are normalized, rescaled and recentered
+     * following an approach from section 3.1 from
+     * https://arxiv.org/pdf/1607.06450.pdf
+     *
+     * The operation has the following independently optional inputs:
+     * * The cell-to-input weights (\f$W_{ci}\f$), cell-to-forget weights
+     *   (\f$W_{cf}\f$) and cell-to-output weights (\f$W_{co}\f$) either all
+     *   have values or neither of them have values (i.e., all set to null). If
+     *   they have values, the peephole optimization is used.
+     * * The input-to-input weights (\f$W_{xi}\f$), recurrent-to-input weights
+     *   (\f$W_{hi}\f$) and input gate bias (\f$b_i\f$) either all have values,
+     *   or none of them have values. If they have no values, coupling of input
+     *   and forget gates (CIFG) is used, in which case the input gate
+     *   (\f$i_t\f$) is calculated using the following equation instead.
+     *   \f{eqnarray*}{
+     *   i_t = 1 - f_t
+     *   \f}
+     *   In case peephole optimization is used and CIFG is not used
+     *   cell-to-input (\f$W_{ci}\f$) weights must be present. Otherwise, the
+     *   cell-to-input weights must have no value.
+     * * The projection weights (\f$W_{proj}\f$) is required only for the
+     *   recurrent projection layer, and should otherwise have no value.
+     * * The projection bias (\f$b_{proj}\f$) may (but not required to) have a
+     *   value if the recurrent projection layer exists, and should otherwise
+     *   have no value.
+     * * (HAL version 1.2 or later) The four layer normalization weights either all have
+     *   values or none of them have values. Additionally, if CIFG is used,
+     *   input layer normalization weights tensor is omitted and the other layer
+     *   normalization weights either all have values or none of them have
+     *   values. Layer normalization is used when the values of all the layer
+     *   normalization weights are present.
+     *
+     * References:
+     *
+     * The default non-peephole non-CIFG implementation is based on:
+     * http://www.bioinf.jku.at/publications/older/2604.pdf
+     * S. Hochreiter and J. Schmidhuber. "Long Short-Term Memory". Neural
+     * Computation, 9(8):1735-1780, 1997.
+     *
+     * The peephole implementation and projection layer is based on:
+     * https://research.google.com/pubs/archive/43905.pdf
+     * Hasim Sak, Andrew Senior, and Francoise Beaufays. "Long short-term memory
+     * recurrent neural network architectures for large scale acoustic
+     * modeling." INTERSPEECH, 2014.
+     * (However, the concept of peephole optimization was introduced in work
+     * prior to this paper.)
+     *
+     * The coupling of input and forget gate (CIFG) is based on:
+     * http://arxiv.org/pdf/1503.04069.pdf
+     * Greff et al. "LSTM: A Search Space Odyssey"
+     *
+     * The layer normalization is based on:
+     * https://arxiv.org/pdf/1607.06450.pdf
+     * Jimmy Ba et al. "Layer Normalization"
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * All input and output tensors must be of the same type.
+     *
+     * Inputs:
+     * * 0: The input (\f$x_t\f$).
+     *      A 2-D tensor of shape [batch_size, input_size], where “batch_size”
+     *      corresponds to the batching dimension, and “input_size” is the size
+     *      of the input.
+     * * 1: The input-to-input weights (\f$W_{xi}\f$). Optional.
+     *      A 2-D tensor of shape [num_units, input_size], where “num_units”
+     *      corresponds to the number of cell units.
+     * * 2: The input-to-forget weights (\f$W_{xf}\f$).
+     *      A 2-D tensor of shape [num_units, input_size].
+     * * 3: The input-to-cell weights (\f$W_{xc}\f$).
+     *      A 2-D tensor of shape [num_units, input_size].
+     * * 4: The input-to-output weights (\f$W_{xo}\f$).
+     *      A 2-D tensor of shape [num_units, input_size].
+     * * 5: The recurrent-to-input weights (\f$W_{hi}\f$). Optional.
+     *      A 2-D tensor of shape [num_units, output_size], where “output_size”
+     *      corresponds to either the number of cell units (i.e., “num_units”),
+     *      or the second dimension of the “projection_weights”, if defined.
+     * * 6: The recurrent-to-forget weights (\f$W_{hf}\f$).
+     *      A 2-D tensor of shape [num_units, output_size].
+     * * 7: The recurrent-to-cell weights (\f$W_{hc}\f$).
+     *      A 2-D tensor of shape [num_units, output_size].
+     * * 8: The recurrent-to-output weights (\f$W_{ho}\f$).
+     *      A 2-D tensor of shape [num_units, output_size].
+     * * 9: The cell-to-input weights (\f$W_{ci}\f$). Optional.
+     *      A 1-D tensor of shape [num_units].
+     * * 10:The cell-to-forget weights (\f$W_{cf}\f$). Optional.
+     *      A 1-D tensor of shape [num_units].
+     * * 11:The cell-to-output weights (\f$W_{co}\f$). Optional.
+     *      A 1-D tensor of shape [num_units].
+     * * 12:The input gate bias (\f$b_i\f$). Optional.
+     *      A 1-D tensor of shape [num_units].
+     * * 13:The forget gate bias (\f$b_f\f$).
+     *      A 1-D tensor of shape [num_units].
+     * * 14:The cell bias (\f$b_c\f$).
+     *      A 1-D tensor of shape [num_units].
+     * * 15:The output gate bias (\f$b_o\f$).
+     *      A 1-D tensor of shape [num_units].
+     * * 16:The projection weights (\f$W_{proj}\f$). Optional.
+     *      A 2-D tensor of shape [output_size, num_units].
+     * * 17:The projection bias (\f$b_{proj}\f$). Optional.
+     *      A 1-D tensor of shape [output_size].
+     * * 18:The output state (in) (\f$h_{t-1}\f$).
+     *      A 2-D tensor of shape [batch_size, output_size].
+     * * 19:The cell state (in) (\f$C_{t-1}\f$).
+     *      A 2-D tensor of shape [batch_size, num_units].
+     * * 20:The activation function (\f$g\f$).
+     *      A value indicating the activation function:
+     *      <ul>
+     *      <li>0: None;
+     *      <li>1: Relu;
+     *      <li>3: Relu6;
+     *      <li>4: Tanh;
+     *      <li>6: Sigmoid.
+     *      </ul>
+     * * 21:The clipping threshold (\f$t_{cell}\f$) for the cell state, such
+     *      that values are bound within [-cell_clip, cell_clip]. If set to 0.0
+     *      then clipping is disabled.
+     *      Until HAL version 1.2 this scalar must be of type {@link
+     *      OperandType::FLOAT32}. Since HAL version 1.2, if all the input
+     *      tensors have type {@link OperandType::TENSOR_FLOAT32}, this
+     *      scalar must be of the type {@link OperandType::FLOAT32},
+     *      otherwise if all the input tensors have the type {@link
+     *      OperandType::TENSOR_FLOAT16}, this scalar must be of type {@link
+     *      OperandType::FLOAT16}.
+     * * 22:The clipping threshold (\f$t_{proj}\f$) for the output from the
+     *      projection layer, such that values are bound within
+     *      [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+     *      Until HAL version 1.2 this scalar must be of type {@link
+     *      OperandType::FLOAT32}. Since HAL version 1.2, if all the input
+     *      tensors have type {@link OperandType::TENSOR_FLOAT32}, this
+     *      scalar must be of the type {@link OperandType::FLOAT32},
+     *      otherwise if all the input tensors have the type {@link
+     *      OperandType::TENSOR_FLOAT16}, this scalar must be of type {@link
+     *      OperandType::FLOAT16}.
+     * Since HAL version 1.2 there are additional inputs to this op:
+     * * 23:The input layer normalization weights.
+     *      A 1-D tensor of shape [num_units]. Used to rescale normalized inputs
+     *      to activation at input gate.
+     * * 24:The forget layer normalization weights.
+     *      A 1-D tensor of shape [num_units]. Used to rescale normalized inputs
+     *      to activation at forget gate.
+     * * 25:The cell layer normalization weights.
+     *      A 1-D tensor of shape [num_units]. Used to rescale normalized inputs
+     *      to activation at cell gate.
+     * * 26:The output layer normalization weights.
+     *      A 1-D tensor of shape [num_units]. Used to rescale normalized inputs
+     *      to activation at output gate.
+     *
+     * Outputs:
+     * * 0: The scratch buffer.
+     *      A 2-D tensor of shape [batch_size, num_units * 3] with CIFG, or
+     *      [batch_size, num_units * 4] without CIFG.
+     * * 1: The output state (out) (\f$h_t\f$).
+     *      A 2-D tensor of shape [batch_size, output_size].
+     * * 2: The cell state (out) (\f$C_t\f$).
+     *      A 2-D tensor of shape [batch_size, num_units].
+     * * 3: The output (\f$o_t\f$).
+     *      A 2-D tensor of shape [batch_size, output_size]. This is effectively
+     *      the same as the current “output state (out)” value.
+     */
+    LSTM = 16,
+    /**
+     * Performs an 2-D max pooling operation.
+     *
+     * The output dimensions are functions of the filter dimensions, stride, and
+     * padding.
+     *
+     * The values in the output tensor are computed as:
+     *
+     *     output[b, i, j, channel] =
+     *         max_{di, dj} (
+     *             input[b, strides[1] * i + di, strides[2] * j + dj, channel]
+     *         )
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since HAL version 1.2.
+     *
+     * Both explicit padding and implicit padding are supported.
+     *
+     * Inputs (explicit padding):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
+     *      the input.
+     *      Since HAL version 1.2, zero batches is supported for this tensor.
+     * * 1: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the left, in the ‘width’ dimension.
+     * * 2: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the right, in the ‘width’ dimension.
+     * * 3: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the top, in the ‘height’ dimension.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the bottom, in the ‘height’ dimension.
+     * * 5: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘width’ dimension.
+     * * 6: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘height’ dimension.
+     * * 7: An {@link OperandType::INT32} scalar, specifying the filter
+     *      width.
+     * * 8: An {@link OperandType::INT32} scalar, specifying the filter
+     *      height.
+     * * 9: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     * * 10: An optional {@link OperandType::BOOL} scalar, default to false.
+     *       Set to true to specify NCHW data layout for input0 and output0.
+     *       Available since HAL version 1.2.
+     *
+     * Inputs (implicit padding):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
+     *      the input.
+     *      Since HAL version 1.2, zero batches is supported for this tensor.
+     * * 1: An {@link OperandType::INT32} scalar, specifying the implicit
+     *      padding scheme, has to be one of the
+     *      following values: {0 (NONE), 1 (SAME), 2 (VALID)}.
+     * * 2: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘width’ dimension.
+     * * 3: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘height’ dimension.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the filter
+     *      width.
+     * * 5: An {@link OperandType::INT32} scalar, specifying the filter
+     *      height.
+     * * 6: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     * * 7: An optional {@link OperandType::BOOL} scalar, default to false.
+     *      Set to true to specify NCHW data layout for input0 and output0.
+     *      Available since HAL version 1.2.
+     *
+     * Outputs:
+     * * 0: The output 4-D tensor, of shape
+     *      [batches, out_height, out_width, depth].
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    MAX_POOL_2D = 17,
+    /**
+     * Multiplies two tensors, element-wise.
+     *
+     * Takes two input tensors of identical {@link OperandType} and compatible
+     * dimensions. The output is the product of both input tensors, optionally
+     * modified by an activation function.
+     *
+     * Two dimensions are compatible when:
+     *     1. they are equal, or
+     *     2. one of them is 1
+     *
+     * The size of the resulting output is the maximum size along each dimension
+     * of the input operands. It starts with the trailing dimensions, and works
+     * its way forward.
+     *
+     * Since HAL version 1.2, generic zero-sized input tensor is supported. Zero
+     * dimension is only compatible with 0 or 1. The size of the output
+     * dimension is zero if either of corresponding input dimension is zero.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     * * {@link OperandType::TENSOR_INT32} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: A tensor.
+     * * 1: A tensor of the same {@link OperandType}, and compatible dimensions
+     *      as input0.
+     * * 2: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     *      For a {@link OperandType::TENSOR_INT32} tensor,
+     *      the {@link FusedActivationFunc} must be "NONE".
+     *
+     * Outputs:
+     * * 0: The product, a tensor of the same {@link OperandType} as input0.
+     *      For output tensor of {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the following condition must be satisfied:
+     *      output_scale > input1_scale * input2_scale.
+     */
+    MUL = 18,
+    /**
+     * Computes rectified linear activation on the input tensor element-wise.
+     *
+     * The output is calculated using this formula:
+     *
+     *     output = max(0, input)
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4.
+     *
+     * Inputs:
+     * * 0: A tensor, specifying the input.
+     *      Since HAL version 1.2, this tensor may be zero-sized.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape as input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    RELU = 19,
+    /**
+     * Computes rectified linear 1 activation on the input tensor element-wise.
+     *
+     * The output is calculated using this formula:
+     *
+     *     output = min(1.f, max(-1.f, input))
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4.
+     *
+     * Inputs:
+     * * 0: A tensor, specifying the input.
+     *      Since HAL version 1.2, this tensor may be zero-sized.
+     *
+     * Outputs:
+     * * 0: The output tensor of the same shape as input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    RELU1 = 20,
+    /**
+     * Computes rectified linear 6 activation on the input tensor element-wise.
+     *
+     * The output is calculated using this formula:
+     *
+     *     output = min(6, max(0, input))
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4.
+     *
+     * Inputs:
+     * * 0: A tensor, specifying the input.
+     *      Since HAL version 1.2, this tensor may be zero-sized.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape as input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    RELU6 = 21,
+    /**
+     * Reshapes a tensor.
+     *
+     * Given tensor, this operation returns a tensor that has the same values as
+     * tensor, but with a newly specified shape.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4.
+     *
+     * Inputs:
+     * * 0: A tensor, specifying the tensor to be reshaped.
+     * * 1: A 1-D tensor of {@link OperandType::TENSOR_INT32}, defining the
+     *      shape of the output tensor. The number of elements implied by shape
+     *      must be the same as the number of elements in the input tensor.
+     *
+     *      If one component of shape is the special value -1, the size of that
+     *      dimension is computed so that the total size remains constant. In
+     *      particular, a shape of [-1] flattens into 1-D. At most one component
+     *      of shape can be -1.
+     *
+     * Outputs:
+     * * 0: The output tensor, of shape specified by the input shape.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    RESHAPE = 22,
+    /**
+     * Resizes images to given size using the bilinear interpretation.
+     *
+     * Resized images must be distorted if their output aspect ratio is not the
+     * same as input aspect ratio. The corner pixels of output may not be the
+     * same as corner pixels of input.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since HAL version 1.2.
+     *
+     * Both resizing by shape and resizing by scale are supported.
+     *
+     * Inputs (resizing by shape):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
+     *      the input.
+     *      Since HAL version 1.2, zero batches is supported for this tensor.
+     * * 1: An {@link OperandType::INT32} scalar, specifying the output
+     *      width of the output tensor.
+     * * 2: An {@link OperandType::INT32} scalar, specifying the output
+     *      height of the output tensor.
+     * * 3: An optional {@link OperandType::BOOL} scalar, default to false.
+     *      Set to true to specify NCHW data layout for input0 and output0.
+     *      Available since HAL version 1.2.
+     * * 4: Align corners. An optional {@link OperandType::BOOL}
+     *      scalar, default to false.  If True, the centers of the 4 corner
+     *      pixels of the input and output tensors are aligned, preserving the
+     *      values at the corner pixels.
+     *      Available since HAL version 1.3.
+     * * 5: Half pixel centers. An optional {@link OperandType::BOOL}
+     *      scalar, default to false. If True, the pixel centers are assumed to
+     *      be at (0.5, 0.5). This is the default behavior of image.resize in
+     *      TF 2.0. If this parameter is True, then align_corners parameter
+     *      must be False.
+     *      Available since HAL version 1.3.
+     *
+     * Inputs (resizing by scale, since HAL version 1.2):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
+     *      the input. Zero batches is supported for this tensor.
+     * * 1: A scalar, specifying width_scale, the scaling factor of the width
+     *      dimension from the input tensor to the output tensor. The output
+     *      width is calculated as new_width = floor(width * width_scale).
+     *      The scalar must be of {@link OperandType::FLOAT16} if input0 is
+     *      of {@link OperandType::TENSOR_FLOAT16} and of
+     *      {@link OperandType::FLOAT32} otherwise.
+     * * 2: A scalar, specifying height_scale, the scaling factor of the height
+     *      dimension from the input tensor to the output tensor. The output
+     *      height is calculated as new_height = floor(height * height_scale).
+     *      The scalar must be of {@link OperandType::FLOAT16} if input0 is
+     *      of {@link OperandType::TENSOR_FLOAT16} and of
+     *      {@link OperandType::FLOAT32} otherwise.
+     * * 3: An optional {@link OperandType::BOOL} scalar, default to false.
+     *      Set to true to specify NCHW data layout for input0 and output0.
+     * * 4: Align corners. An optional {@link OperandType::BOOL}
+     *      scalar, default to false.  If True, the centers of the 4 corner
+     *      pixels of the input and output tensors are aligned, preserving the
+     *      values at the corner pixels.
+     *      Available since HAL version 1.3.
+     * * 5: Half pixel centers. An optional {@link OperandType::BOOL}
+     *      scalar, default to false. If True, the pixel centers are assumed to
+     *      be at (0.5, 0.5). This is the default behavior of image.resize in
+     *      TF 2.0. If this parameter is True, then align_corners parameter
+     *      must be False.
+     *      Available since HAL version 1.3.
+     *
+     * Outputs:
+     * * 0: The output 4-D tensor, of shape
+     *      [batches, new_height, new_width, depth].
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    RESIZE_BILINEAR = 23,
+    /**
+     * A basic recurrent neural network layer.
+     *
+     * This layer implements the operation:
+     * outputs = state = activation(inputs * input_weights +
+     *                              state * recurrent_weights + bias)
+     *
+     * Where:
+     * * “input_weights” is a weight matrix that multiplies the inputs;
+     * * “recurrent_weights” is a weight matrix that multiplies the current
+     *    “state” which itself is the output from the previous time step
+     *    computation;
+     * * “bias” is a bias vector (added to each output vector in the batch);
+     * * “activation” is the function passed as the “fused_activation_function”
+     *   argument (if not “NONE”).
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * The input tensors must all be the same type.
+     *
+     * Inputs:
+     * * 0: input.
+     *      A 2-D tensor of shape [batch_size, input_size], where “batch_size”
+     *      corresponds to the batching dimension, and “input_size” is the size
+     *      of the input.
+     * * 1: weights.
+     *      A 2-D tensor of shape [num_units, input_size], where “num_units”
+     *      corresponds to the number of units.
+     * * 2: recurrent_weights.
+     *      A 2-D tensor of shape [num_units, num_units], with columns
+     *      corresponding to the weights from each unit.
+     * * 3: bias.
+     *      A 1-D tensor of shape [num_units].
+     * * 4: hidden state (in).
+     *      A 2-D tensor of shape [batch_size, num_units].
+     * * 5: fused_activation_function.
+     *      An optional {@link FusedActivationFunc} value indicating the
+     *      activation function. If “NONE” is specified then it results in a
+     *      linear activation.
+     *
+     * Outputs:
+     * * 0: hidden state (out).
+     *      A 2-D tensor of shape [batch_size, num_units].
+     *
+     * * 1: output.
+     *      A 2-D tensor of shape [batch_size, num_units]. This is effectively
+     *      the same as the current state value.
+     */
+    RNN = 24,
+    /**
+     * Computes the softmax activation on the input tensor element-wise, per
+     * batch, by normalizing the input vector so the maximum coefficient is
+     * zero.
+     *
+     * The output is calculated using this formula:
+     *
+     *     output[batch, i] =
+     *         exp((input[batch, i] - max(input[batch, :])) * beta) /
+     *         sum_{k}{exp((input[batch, k] - max(input[batch, :])) * beta)}
+     *
+     * For input tensor with rank other than 2, the activation will be applied
+     * independently on each 1-D slice along specified dimension.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4.
+     * Tensors with rank other than 2 or 4 are only supported since HAL version 1.2.
+     *
+     * Inputs:
+     * * 0: A 2-D or 4-D tensor, specifying the tensor to be reshaped.
+     *      Since HAL version 1.2, this tensor may be zero-sized.
+     * * 1: A scalar, specifying the positive scaling factor for the exponent,
+     *      beta. If input0 is of {@link OperandType::TENSOR_FLOAT32},
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM} or
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}, the scalar
+     *      must be of {@link OperandType::FLOAT32}.
+     *      If input0 is of {@link OperandType::TENSOR_FLOAT16}, then the
+     *      scalar must be of {@link OperandType::FLOAT16}.
+     * * 2: An optional {@link OperandType::INT32} scalar, default to -1,
+     *      specifying the dimension the activation would be performed on.
+     *      Negative index is used to specify axis from the end (e.g. -1 for
+     *      the last axis). Must be in the range [-n, n).
+     *      Available since HAL version 1.2.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape as input0.
+     *      For {@link OperandType::TENSOR_QUANT8_ASYMM},
+     *      the scale must be 1.f / 256 and the zeroPoint must be 0.
+     *      For {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the scale must be 1.f / 256 and the zeroPoint must be -128.
+     */
+    SOFTMAX = 25,
+    /**
+     * Rearranges blocks of spatial data, into depth.
+     *
+     * More specifically, this op outputs a copy of the input tensor where
+     * values from the height and width dimensions are moved to the depth
+     * dimension. The value block_size indicates the input block size and how
+     * the data is moved.
+     *
+     * Chunks of data of size block_size * block_size from depth are rearranged
+     * into non-overlapping blocks of size block_size x block_size.
+     *
+     * The depth of the output tensor is input_depth * block_size * block_size.
+     * The input tensor's height and width must be divisible by block_size.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since HAL version 1.2.
+     *
+     * Inputs:
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
+     *      specifying the input.
+     * * 1: An {@link OperandType::INT32} scalar, specifying the block_size.
+     *      block_size must be >=1 and block_size must be a divisor of both the
+     *      input height and width.
+     * * 2: An optional {@link OperandType::BOOL} scalar, default to false.
+     *      Set to true to specify NCHW data layout for input0 and output0.
+     *      Available since HAL version 1.2.
+     *
+     * Outputs:
+     * * 0: The output 4-D tensor, of shape [batches, height/block_size,
+     *      width/block_size, depth_in*block_size*block_size].
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    SPACE_TO_DEPTH = 26,
+    /**
+     * SVDF op is a kind of stateful layer derived from the notion that a
+     * densely connected layer that's processing a sequence of input frames can
+     * be approximated by using a singular value decomposition of each of its
+     * nodes. The implementation is based on:
+     *
+     * https://research.google.com/pubs/archive/43813.pdf
+     *
+     * P. Nakkiran, R. Alvarez, R. Prabhavalkar, C. Parada.
+     * “Compressing Deep Neural Networks using a Rank-Constrained Topology”.
+     * INTERSPEECH, 2015.
+     *
+     * It processes the incoming input using a 2-stage filtering mechanism:
+     * * stage 1 performs filtering on the "features" dimension, whose outputs
+     *   get pushed into a memory of fixed-size memory_size.
+     * * stage 2 performs filtering on the "time" dimension of the memory_size
+     *   memoized outputs of stage 1.
+     *
+     * Specifically, for rank 1, this layer implements the operation:
+     *
+     *     memory = push(conv1d(inputs, weights_feature, feature_dim,
+     *                          "PADDING_VALID"));
+     *     outputs = activation(memory * weights_time + bias);
+     *
+     * Where:
+     * * “weights_feature” is a weights matrix that processes the inputs (by
+     *   convolving the input with every “feature filter”), and whose outputs
+     *   get pushed, stacked in order, into the fixed-size “memory” (the oldest
+     *   entry gets dropped);
+     * * “weights_time” is a weights matrix that processes the “memory” (by a
+     *   batched matrix multiplication on the num_units);
+     * * “bias” is an optional bias vector (added to each output vector in the
+     *   batch); and
+     * * “activation” is the function passed as the “fused_activation_function”
+     *   argument (if not “NONE”).
+     *
+     * Each rank adds a dimension to the weights matrices by means of stacking
+     * the filters.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * All input tensors must be the same type.
+     *
+     * Inputs:
+     * * 0: input.
+     *      A 2-D tensor of shape [batch_size, input_size], where “batch_size”
+     *      corresponds to the batching dimension, and “input_size” is the size
+     *      of the input.
+     * * 1: weights_feature.
+     *      A 2-D tensor of shape [num_units, input_size], where “num_units”
+     *      corresponds to the number of units.
+     * * 2: weights_time.
+     *      A 2-D tensor of shape [num_units, memory_size], where “memory_size”
+     *      corresponds to the fixed-size of the memory.
+     * * 3: bias.
+     *      An optional 1-D tensor of shape [num_units].
+     * * 4: state (in).
+     *      A 2-D tensor of shape [batch_size, (memory_size - 1) * num_units * rank].
+     * * 5: rank.
+     *      The rank of the SVD approximation.
+     * * 6: fused_activation_function.
+     *      An optional {@link FusedActivationFunc} value indicating the
+     *      activation function. If “NONE” is specified then it results in a
+     *      linear activation.
+     *
+     * Outputs:
+     * * 0: state (out).
+     *      A 2-D tensor of the same {@link OperandType} as the inputs, with shape
+     *      [batch_size, (memory_size - 1) * num_units * rank].
+     * * 1: output.
+     *      A 2-D tensor of the same {@link OperandType} as the inputs, with shape
+     *      [batch_size, num_units].
+     */
+    SVDF = 27,
+    /**
+     * Computes hyperbolic tangent of input tensor element-wise.
+     *
+     * The output is calculated using this formula:
+     *
+     *     output = tanh(input)
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4.
+     *
+     * Inputs:
+     * * 0: A tensor, specifying the input.
+     *      Since HAL version 1.2, this tensor may be zero-sized.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape as input0.
+     *      For {@link OperandType::TENSOR_QUANT8_ASYMM},
+     *      the scale must be 1.f / 128 and the zeroPoint must be 128.
+     *      For {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the scale must be 1.f / 128 and the zeroPoint must be 0.
+     */
+    TANH = 28,
+    /**
+     * BatchToSpace for N-dimensional tensors.
+     *
+     * This operation reshapes the batch dimension (dimension 0) into M + 1
+     * dimensions of shape block_shape + [batch], interleaves these blocks back
+     * into the grid defined by the spatial dimensions [1, ..., M], to obtain a
+     * result with the same rank as the input.
+     *
+     * This is the reverse of SpaceToBatch.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since HAL version 1.2.
+     *
+     * Inputs:
+     * * 0: An n-D tensor, specifying the tensor to be reshaped
+     * * 1: A 1-D Tensor of {@link OperandType::TENSOR_INT32}, the block
+     *      sizes for each spatial dimension of the input tensor. All values
+     *      must be >= 1.
+     * * 2: An optional {@link OperandType::BOOL} scalar, default to false.
+     *      Set to true to specify NCHW data layout for input0 and output0.
+     *      Available since API level 29.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    BATCH_TO_SPACE_ND = 29,
+    /**
+     * Element-wise division of two tensors.
+     *
+     * Takes two input tensors of identical {@link OperandType} and compatible
+     * dimensions. The output is the result of dividing the first input tensor
+     * by the second, optionally modified by an activation function.
+     *
+     * For inputs of {@link OperandType::TENSOR_INT32}, performs
+     * "floor division" ("//" in Python). For example,
+     *     5 // 2 = 2
+     *    -5 // 2 = -3
+     *
+     * Two dimensions are compatible when:
+     *     1. they are equal, or
+     *     2. one of them is 1
+     *
+     * The size of the output is the maximum size along each dimension of the
+     * input operands. It starts with the trailing dimensions, and works its way
+     * forward.
+     *
+     * Example:
+     *     input1.dimension =    {4, 1, 2}
+     *     input2.dimension = {5, 4, 3, 1}
+     *     output.dimension = {5, 4, 3, 2}
+     *
+     * Since HAL version 1.2, generic zero-sized input tensor is supported. Zero
+     * dimension is only compatible with 0 or 1. The size of the output
+     * dimension is zero if either of corresponding input dimension is zero.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: An n-D tensor, specifying the first input.
+     * * 1: A tensor of the same {@link OperandType}, and compatible dimensions
+     *      as input0.
+     * * 2: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     *      For a {@link OperandType::TENSOR_INT32} tensor,
+     *      the {@link FusedActivationFunc} must be "NONE".
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0.
+     */
+    DIV = 30,
+    /**
+     * Computes the mean of elements across dimensions of a tensor.
+     *
+     * Reduces the input tensor along the given dimensions to reduce. Unless
+     * keep_dims is true, the rank of the tensor is reduced by 1 for each entry
+     * in axis. If keep_dims is true, the reduced dimensions are retained with
+     * length 1.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: A tensor, specifying the input.
+     * * 1: A 1-D Tensor of {@link OperandType::TENSOR_INT32}. The dimensions
+     *      to reduce. Must be in the range
+     *      [-rank(input_tensor), rank(input_tensor)).
+     *
+     *      NOTE: When the operation was introduced, the documentation
+     *      incorrectly stated that if dimensions were empty, the operation
+     *      would reduce across all dimensions. This behavior was never
+     *      implemented.
+     *
+     * * 2: An {@link OperandType::INT32} scalar, keep_dims. If positive,
+     *      retains reduced dimensions with length 1.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     *      If all dimensions are reduced and keep_dims is false, the output
+     *      shape is [1].
+     */
+    MEAN = 31,
+    /**
+     * Pads a tensor.
+     *
+     * This operation pads a tensor according to the specified paddings.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *   (full support since HAL version 1.2, see the output section)
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: An n-D tensor, specifying the tensor to be padded.
+     * * 1: A 2-D Tensor of {@link OperandType::TENSOR_INT32}, the paddings
+     *      for each spatial dimension of the input tensor. The shape of the
+     *      tensor must be {rank(input0), 2}.
+     *      padding[i, 0] specifies the number of elements to be padded in the
+     *      front of dimension i.
+     *      padding[i, 1] specifies the number of elements to be padded after the
+     *      end of dimension i.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0. The
+     *      output tensor has the same rank as input0, and each
+     *      dimension of the output tensor has the same size as the
+     *      corresponding dimension of the input tensor plus the size
+     *      of the padding:
+     *          output0.dimension[i] =
+     *              padding[i, 0] + input0.dimension[i] + padding[i, 1]
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     *
+     *      NOTE: Before HAL version 1.2, the pad value for
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM} is undefined.
+     *      Since HAL version 1.2, the pad value is always the logical zero.
+     */
+    PAD = 32,
+    /**
+     * SpaceToBatch for N-Dimensional tensors.
+     *
+     * This operation divides "spatial" dimensions [1, ..., M] of the input into
+     * a grid of blocks of shape block_shape, and interleaves these blocks with
+     * the "batch" dimension (0) such that in the output, the spatial dimensions
+     * [1, ..., M] correspond to the position within the grid, and the batch
+     * dimension combines both the position within a spatial block and the
+     * original batch position. Prior to division into blocks, the spatial
+     * dimensions of the input are optionally zero padded according to paddings.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *   (full support since HAL version 1.2, see the output section)
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     * NCHW is supported since HAL version 1.2.
+     *
+     * Inputs:
+     * * 0: An n-D tensor, specifying the input.
+     * * 1: A 1-D Tensor of {@link OperandType::TENSOR_INT32}, the block
+     *      sizes for each spatial dimension of the input tensor. All values
+     *      must be >= 1.
+     * * 2: A 2-D Tensor of {@link OperandType::TENSOR_INT32}, the paddings
+     *      for each spatial dimension of the input tensor. All values must be
+     *      >= 0. The shape of the tensor must be {M, 2}, where M is the number
+     *      of spatial dimensions.
+     *      padding[i, 0] specifies the number of element to be padded in the
+     *      front of dimension i.
+     *      padding[i, 1] specifies the number of element to be padded after the
+     *      end of dimension i.
+     * * 3: An optional {@link OperandType::BOOL} scalar, default to false.
+     *      Set to true to specify NCHW data layout for input0 and output0.
+     *      Available since HAL version 1.2.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     *
+     *      NOTE: Before HAL version 1.2, the pad value for
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM} is undefined.
+     *      Since HAL version 1.2, the pad value is always the logical zero.
+     */
+    SPACE_TO_BATCH_ND = 33,
+    /**
+     * Removes dimensions of size 1 from the shape of a tensor.
+     *
+     * Given a tensor input, this operation returns a tensor of the same
+     * {@link OperandType} with all dimensions of size 1 removed. If you don't
+     * want to remove all size 1 dimensions, you can remove specific size 1
+     * dimensions by specifying the axes (input1).
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: An n-D tensor, the tensor to be squeezed.
+     * * 1: An optional 1-D tensor of {@link OperandType::TENSOR_INT32}. The
+     *      dimensions to squeeze. If specified only squeezes the dimensions
+     *      listed. Otherwise, squeezes all dimensions. The dimension index
+     *      starts at 0. An error must be reported if squeezing a dimension that
+     *      is not 1.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0. Contains the
+     *      same data as input, but has one or more dimensions of size 1
+     *      removed.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     *      If all input dimensions are equal to 1 and are to be squeezed, the
+     *      output shape is [1].
+     */
+    SQUEEZE = 34,
+    /**
+     * Extracts a strided slice of a tensor.
+     *
+     * Roughly speaking, this op extracts a slice of size (end - begin) / stride
+     * from the given input tensor. Starting at the location specified by begin
+     * the slice continues by adding stride to the index until all dimensions
+     * are not less than end. Note that a stride can be negative, which causes a
+     * reverse slice.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: An n-D tensor, specifying the tensor to be sliced.
+     * * 1: begin, a 1-D tensor of {@link OperandType::TENSOR_INT32}. The
+     *      starts of the dimensions of the input tensor to be sliced. The
+     *      length must be of rank(input0).
+     * * 2: end, a 1-D tensor of {@link OperandType::TENSOR_INT32}. The
+     *      ends of the dimensions of the input tensor to be sliced. The length
+     *      must be of rank(input0).
+     * * 3: strides, a 1-D tensor of {@link OperandType::TENSOR_INT32}. The
+     *      strides of the dimensions of the input tensor to be sliced. The
+     *      length must be of rank(input0). The entries must be non-zero.
+     * * 4: begin_mask, an {@link OperandType::INT32} scalar. If the ith bit
+     *      of begin_mask is set, begin[i] is ignored and the fullest possible
+     *      range in that dimension is used instead.
+     * * 5: end_mask, an {@link OperandType::INT32} scalar. If the ith bit of
+     *      end_mask is set, end[i] is ignored and the fullest possible range in
+     *      that dimension is used instead.
+     * * 6: shrink_axis_mask, an {@link OperandType::INT32} scalar. If the
+     *      ith bit of shrink_axis_mask is set, the ith dimension specification
+     *      shrinks the dimensionality by 1, taking on the value at index
+     *      begin[i]. In this case, the ith specification must define a
+     *      slice of size 1, e.g. begin[i] = x, end[i] = x + 1.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0 and rank (n - k),
+     *      where k is the number of bits set in shrink_axis_mask.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     *      If shrink_axis_mask is true for all input dimensions, the output
+     *      shape is [1].
+     */
+    STRIDED_SLICE = 35,
+    /**
+     * Element-wise subtraction of two tensors.
+     *
+     * Takes two input tensors of identical {@link OperandType} and compatible
+     * dimensions. The output is the result of subtracting the second input
+     * tensor from the first one, optionally modified by an activation function.
+     *
+     * Two dimensions are compatible when:
+     *     1. they are equal, or
+     *     2. one of them is 1
+     *
+     * The size of the output is the maximum size along each dimension of the
+     * input operands. It starts with the trailing dimensions, and works its way
+     * forward.
+     *
+     * Example:
+     *     input1.dimension =    {4, 1, 2}
+     *     input2.dimension = {5, 4, 3, 1}
+     *     output.dimension = {5, 4, 3, 2}
+     *
+     * Since HAL version 1.2, generic zero-sized input tensor is supported. Zero
+     * dimension is only compatible with 0 or 1. The size of the output
+     * dimension is zero if either of corresponding input dimension is zero.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     * * {@link OperandType::TENSOR_INT32} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: An n-D tensor, specifying the first input.
+     * * 1: A tensor of the same {@link OperandType}, and compatible dimensions
+     *      as input0.
+     * * 2: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     *      For a {@link OperandType::TENSOR_INT32} tensor,
+     *      the {@link FusedActivationFunc} must be "NONE".
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint can be different from inputs' scale and zeroPoint.
+     */
+    SUB = 36,
+    /**
+     * Transposes the input tensor, permuting the dimensions according to the
+     * perm tensor.
+     *
+     * The returned tensor's dimension i corresponds to the input dimension
+     * perm[i]. If perm is not given, it is set to (n-1...0), where n is the
+     * rank of the input tensor. Hence by default, this operation performs a
+     * regular matrix transpose on 2-D input Tensors.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16} (since HAL version 1.2)
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: An n-D tensor, specifying the tensor to be transposed.
+     *      Since HAL version 1.2, this tensor may be zero-sized.
+     * * 1: An optional 1-D Tensor of {@link OperandType::TENSOR_INT32},
+     *      the permutation of the dimensions of the input tensor.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    TRANSPOSE = 37,
+    /**
+     * Computes the absolute value of a tensor, element-wise.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A tensor.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape as input0.
+     */
+    ABS = 38,
+    /**
+     * Returns the index of the largest element along an axis.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * Inputs:
+     * * 0: An n-D tensor specifying the input. Must be non-empty.
+     * * 1: An {@link OperandType::INT32} scalar specifying the axis to
+     *      reduce across. Negative index is used to specify axis from the
+     *      end (e.g. -1 for the last axis). Must be in the range [-n, n).
+     *
+     * Outputs:
+     * * 0: An (n - 1)-D {@link OperandType::TENSOR_INT32} tensor.
+     *      If input is 1-dimensional, the output shape is [1].
+     */
+    ARGMAX = 39,
+    /**
+     * Returns the index of the smallest element along an axis.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * Inputs:
+     * * 0: An n-D tensor specifying the input. Must be non-empty.
+     * * 1: An {@link OperandType::INT32} scalar specifying the axis to
+     *      reduce across. Negative index is used to specify axis from the
+     *      end (e.g. -1 for the last axis). Must be in the range [-n, n).
+     *
+     * Outputs:
+     * * 0: An (n - 1)-D {@link OperandType::TENSOR_INT32} tensor.
+     *      If input is 1-dimensional, the output shape is [1].
+     */
+    ARGMIN = 40,
+    /**
+     * Transform axis-aligned bounding box proposals using bounding box deltas.
+     *
+     * Given the positions of bounding box proposals and the corresponding
+     * bounding box deltas for each class, return the refined bounding box
+     * regions. The resulting bounding boxes are cliped against the edges of
+     * the image.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT16_ASYMM}
+     *
+     * Inputs:
+     * * 0: A 2-D Tensor of shape [num_rois, 4], specifying the locations of the
+     *      bounding box proposals, each line with format [x1, y1, x2, y2].
+     *      For tensor of type {@link OperandType::TENSOR_QUANT16_ASYMM},
+     *      the zeroPoint must be 0 and the scale must be 0.125. Zero num_rois
+     *      is supported for this tensor.
+     * * 1: A 2-D Tensor of shape [num_rois, num_classes * 4], specifying the
+     *      bounding box delta for each region of interest and each class. The
+     *      bounding box deltas are organized in the following order
+     *      [dx, dy, dw, dh], where dx and dy is the relative correction factor
+     *      for the center position of the bounding box with respect to the width
+     *      and height, dw and dh is the log-scale relative correction factor
+     *      for the width and height. For input0 of type
+     *      {@link OperandType::TENSOR_QUANT16_ASYMM}, this tensor should be
+     *      of {@link OperandType::TENSOR_QUANT8_ASYMM} or
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}. Zero num_rois is
+     *      supported for this tensor.
+     * * 2: An 1-D {@link OperandType::TENSOR_INT32} tensor, of shape
+     *      [num_rois], specifying the batch index of each box. Boxes with
+     *      the same batch index are grouped together. Zero num_rois is
+     *      supported for this tensor.
+     * * 3: A 2-D Tensor of shape [batches, 2], specifying the information of
+     *      each image in the batch, each line with format
+     *      [image_height, image_width].
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0, with shape
+     *      [num_rois, num_classes * 4], specifying the coordinates of each
+     *      output bounding box for each class, with format [x1, y1, x2, y2].
+     *      For type of {@link OperandType::TENSOR_QUANT16_ASYMM}, the
+     *      scale must be 0.125 and the zero point must be 0.
+     */
+    AXIS_ALIGNED_BBOX_TRANSFORM = 41,
+    /**
+     * A recurrent neural network layer that applies an LSTM cell to a
+     * sequence of inputs in forward and backward directions.
+     *
+     * The op supports cross-linking via an auxiliary input. Regular cell feeds
+     * one input into the two RNN cells in the following way:
+     *
+     *       INPUT  (INPUT_REVERSED)
+     *         |         |
+     *    ---------------------
+     *    | FW_LSTM   BW_LSTM |
+     *    ---------------------
+     *         |         |
+     *      FW_OUT     BW_OUT
+     *
+     * An op with cross-linking takes two inputs and feeds them into the RNN
+     * cells in the following way:
+     *
+     *       AUX_INPUT   (AUX_INPUT_REVERSED)
+     *           |             |
+     *     INPUT | (INPUT_R'D.)|
+     *       |   |       |     |
+     *    -----------------------
+     *    |  \  /        \    / |
+     *    | FW_LSTM     BW_LSTM |
+     *    -----------------------
+     *         |           |
+     *      FW_OUT      BW_OUT
+     *
+     * The cross-linking mode is enabled iff auxiliary input and auxiliary
+     * weights are present. While stacking this op on top of itself, this
+     * allows to connect both forward and backward outputs from previous cell
+     * to the next cell's input.
+     *
+     * Since HAL version 1.3 parallel linking mode is supported. The mode is
+     * enabled if auxiliary input is present but auxiliary weights are omitted.
+     * In this case, the cell feeds inputs into the RNN in the following way:
+     *
+     *       INPUT (AUX_INPUT_REVERSED)
+     *         |         |
+     *    ---------------------
+     *    | FW_LSTM   BW_LSTM |
+     *    ---------------------
+     *         |         |
+     *      FW_OUT     BW_OUT
+     *
+     * While stacking this op on top of itself, this allows to connect both
+     * forward and backward outputs from previous cell to the next cell's
+     * corresponding inputs.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: 3, either time-major or batch-major.
+     *
+     * All input and output tensors must be of the same type.
+     *
+     * Inputs:
+     * * 0: The input.
+     *      A 3-D tensor of shape:
+     *        If time-major: [max_time, batch_size, input_size]
+     *        If batch-major: [batch_size, max_time, input_size]
+     *      where "max_time" is the number of timesteps (sequence length),
+     *      "batch_size" corresponds to the batching dimension, and
+     *      "input_size" is the size of the input.
+     * * 1: The forward input-to-input weights. Optional.
+     *      A 2-D tensor of shape [fw_num_units, input_size], where “fw_num_units”
+     *      corresponds to the number of forward cell units.
+     * * 2: The forward input-to-forget weights.
+     *      A 2-D tensor of shape [fw_num_units, input_size].
+     * * 3: The forward input-to-cell weights.
+     *      A 2-D tensor of shape [fw_num_units, input_size].
+     * * 4: The forward input-to-output weights.
+     *      A 2-D tensor of shape [fw_num_units, input_size].
+     * * 5: The forward recurrent-to-input weights. Optional.
+     *      A 2-D tensor of shape [fw_num_units, fw_output_size], where “fw_output_size”
+     *      corresponds to either the number of cell units (i.e., fw_num_units),
+     *      or the second dimension of the “fw_projection_weights”, if defined.
+     * * 6: The forward recurrent-to-forget weights.
+     *      A 2-D tensor of shape [fw_num_units, fw_output_size].
+     * * 7: The forward recurrent-to-cell weights.
+     *      A 2-D tensor of shape [fw_num_units, fw_output_size].
+     * * 8: The forward recurrent-to-output weights.
+     *      A 2-D tensor of shape [fw_num_units, fw_output_size].
+     * * 9: The forward cell-to-input weights. Optional.
+     *      A 1-D tensor of shape [fw_num_units].
+     * * 10: The forward cell-to-forget weights. Optional.
+     *       A 1-D tensor of shape [fw_num_units].
+     * * 11: The forward cell-to-output weights. Optional.
+     *       A 1-D tensor of shape [fw_num_units].
+     * * 12: The forward input gate bias. Optional.
+     *       A 1-D tensor of shape [fw_num_units].
+     * * 13: The forward forget gate bias.
+     *       A 1-D tensor of shape [fw_num_units].
+     * * 14: The forward cell gate bias.
+     *       A 1-D tensor of shape [fw_num_units].
+     * * 15: The forward output gate bias.
+     *       A 1-D tensor of shape [fw_num_units].
+     * * 16: The forward projection weights. Optional.
+     *       A 2-D tensor of shape [fw_output_size, fw_num_units].
+     * * 17: The forward projection bias. Optional.
+     *       A 1-D tensor of shape [fw_output_size].
+     * * 18: The backward input-to-input weights. Optional.
+     *       A 2-D tensor of shape [bw_num_units, input_size], where “bw_num_units”
+     *       corresponds to the number of backward cell units.
+     * * 19: The backward input-to-forget weights.
+     *       A 2-D tensor of shape [bw_num_units, input_size].
+     * * 20: The backward input-to-cell weights.
+     *       A 2-D tensor of shape [bw_num_units, input_size].
+     * * 21: The backward input-to-output weights.
+     *       A 2-D tensor of shape [bw_num_units, input_size].
+     * * 22: The backward recurrent-to-input weights. Optional.
+     *       A 2-D tensor of shape [bw_num_units, bw_output_size], where “bw_output_size”
+     *       corresponds to either the number of cell units (i.e., “bw_num_units”),
+     *       or the second dimension of the “bw_projection_weights”, if defined.
+     * * 23: The backward recurrent-to-forget weights.
+     *       A 2-D tensor of shape [bw_num_units, bw_output_size].
+     * * 24: The backward recurrent-to-cell weights.
+     *       A 2-D tensor of shape [bw_num_units, bw_output_size].
+     * * 25: The backward recurrent-to-output weights.
+     *       A 2-D tensor of shape [bw_num_units, bw_output_size].
+     * * 26: The backward cell-to-input weights. Optional.
+     *       A 1-D tensor of shape [bw_num_units].
+     * * 27: The backward cell-to-forget weights. Optional.
+     *       A 1-D tensor of shape [bw_num_units].
+     * * 28: The backward cell-to-output weights. Optional.
+     *       A 1-D tensor of shape [bw_num_units].
+     * * 29: The backward input gate bias. Optional.
+     *       A 1-D tensor of shape [bw_num_units].
+     * * 30: The backward forget gate bias.
+     *       A 1-D tensor of shape [bw_num_units].
+     * * 31: The backward cell gate bias.
+     *       A 1-D tensor of shape [bw_num_units].
+     * * 32: The backward output gate bias.
+     *       A 1-D tensor of shape [bw_num_units].
+     * * 33: The backward projection weights. Optional.
+     *       A 2-D tensor of shape [bw_output_size, bw_num_units].
+     * * 34: The backward projection bias. Optional.
+     *       A 1-D tensor of shape [bw_output_size].
+     * * 35: The forward input activation state.
+     *       A 2-D tensor of shape [batch_size, bw_output_size].
+     * * 36: The forward input cell state.
+     *       A 2-D tensor of shape [batch_size, bw_num_units].
+     * * 37: The backward input activation state.
+     *       A 2-D tensor of shape [batch_size, bw_output_size].
+     * * 38: The backward input cell state.
+     *       A 2-D tensor of shape [batch_size, bw_num_units].
+     * * 39: The auxiliary input. Optional.
+     *       A 3-D tensor of shape [max_time, batch_size, aux_input_size],
+     *       where “batch_size” corresponds to the batching dimension, and
+     *       “aux_input_size” is the size of the auxiliary input. Optional. See
+     *       the docs above for the usage modes explanation.
+     * * 40: The forward auxiliary input-to-input weights.
+     *       Optional. See the docs above for the usage modes explanation.
+     *       A 2-D tensor of shape [fw_num_units, aux_input_size].
+     * * 41: The forward auxiliary input-to-forget weights.
+     *       Optional. See the docs above for the usage modes explanation.
+     *       A 2-D tensor of shape [fw_num_units, aux_input_size].
+     * * 42: The forward auxiliary input-to-cell weights.
+     *       Optional. See the docs above for the usage modes explanation.
+     *       A 2-D tensor of shape [fw_num_units, aux_input_size].
+     * * 43: The forward auxiliary input-to-output weights.
+     *       Optional. See the docs above for the usage modes explanation.
+     *       A 2-D tensor of shape [fw_num_units, aux_input_size].
+     * * 44: The backward auxiliary input-to-input weights.
+     *       Optional. See the docs above for the usage modes explanation.
+     *       A 2-D tensor of shape [bw_num_units, aux_input_size].
+     * * 45: The backward auxiliary input-to-forget weights.
+     *       Optional. See the docs above for the usage modes explanation.
+     *       A 2-D tensor of shape [bw_num_units, aux_input_size].
+     * * 46: The backward auxiliary input-to-cell weights.
+     *       Optional. See the docs above for the usage modes explanation.
+     *       A 2-D tensor of shape [bw_num_units, aux_input_size].
+     * * 47: The backward auxiliary input-to-output weights.
+     *       Optional. See the docs above for the usage modes explanation.
+     *       A 2-D tensor of shape [bw_num_units, aux_input_size].
+     * * 48: The activation function.
+     *       A value indicating the activation function:
+     *       <ul>
+     *       <li>0: None;
+     *       <li>1: Relu;
+     *       <li>3: Relu6;
+     *       <li>4: Tanh;
+     *       <li>6: Sigmoid.
+     *       </ul>
+     * * 49: The clipping threshold for the cell state, such
+     *       that values are bound within [-cell_clip, cell_clip]. If set to 0.0
+     *       then clipping is disabled.
+     *       If all the input tensors have type {@link OperandType::TENSOR_FLOAT32},
+     *       this scalar must be of the type {@link OperandType::FLOAT32},
+     *       otherwise if all the input tensors have the type
+     *       {@link OperandType::TENSOR_FLOAT16}, this scalar must be
+     *       of type {@link OperandType::FLOAT16}.
+     * * 50: The clipping threshold for the output from the
+     *       projection layer, such that values are bound within
+     *       [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+     *       If all the input tensors have type {@link OperandType::TENSOR_FLOAT32},
+     *       this scalar must be of the type {@link OperandType::FLOAT32},
+     *       otherwise if all the input tensors have the type
+     *       {@link OperandType::TENSOR_FLOAT16}, this scalar must be
+     *       of type {@link OperandType::FLOAT16}.
+     * * 51: merge_outputs
+     *       An {@link OperandType::BOOL} scalar specifying if the outputs
+     *       from forward and backward cells should be merged.
+     * * 52: time_major
+     *       An {@link OperandType::BOOL} scalar specifying the shape format
+     *       of input and output tensors.
+     * * 53: The forward input layer normalization weights. Optional.
+     *       A 1-D tensor of shape [fw_num_units]. Used to rescale normalized inputs
+     *       to activation at input gate.
+     * * 54: The forward forget layer normalization weights. Optional.
+     *       A 1-D tensor of shape [fw_num_units]. Used to rescale normalized inputs
+     *       to activation at forget gate.
+     * * 55: The forward cell layer normalization weights. Optional.
+     *       A 1-D tensor of shape [fw_num_units]. Used to rescale normalized inputs
+     *       to activation at cell gate.
+     * * 56: The forward output layer normalization weights. Optional.
+     *       A 1-D tensor of shape [fw_num_units]. Used to rescale normalized inputs
+     *       to activation at output gate.
+     * * 57: The backward input layer normalization weights. Optional.
+     *       A 1-D tensor of shape [bw_num_units]. Used to rescale normalized inputs
+     *       to activation at input gate.
+     * * 58: The backward forget layer normalization weights. Optional.
+     *       A 1-D tensor of shape [bw_num_units]. Used to rescale normalized inputs
+     *       to activation at forget gate.
+     * * 59: The backward cell layer normalization weights. Optional.
+     *       A 1-D tensor of shape [bw_num_units]. Used to rescale normalized inputs
+     *       to activation at cell gate.
+     * * 60: The backward output layer normalization weights. Optional.
+     *       A 1-D tensor of shape [bw_num_units]. Used to rescale normalized inputs
+     *       to activation at output gate.
+     *
+     * Outputs:
+     * * 0: The forward output.
+     *      A 3-D tensor of shape:
+     *        If time-major and not merge_outputs:
+     *          [max_time, batch_size, fw_output_size]
+     *        If time-major and merge_outputs:
+     *          [max_time, batch_size, fw_output_size + bw_output_size]
+     *        If batch-major and not merge_outputs:
+     *          [batch_size, max_time, fw_output_size]
+     *        If batch-major and merge_outputs:
+     *          [batch_size, max_time, fw_output_size + bw_output_size]
+     * * 1: The backward output.  Unused if merge_outputs is true.
+     *      A 3-D tensor of shape:
+     *        If time-major: [max_time, batch_size, bw_output_size]
+     *        If batch-major: [batch_size, max_time, bw_output_size]
+     * * 2: The forward activation state output.
+     *      A 2-D tensor of shape [batch_size, fw_output_size] containing an
+     *      activation state from the last time step in the sequence. This
+     *      output is optional and can be omitted. If this output is present
+     *      then outputs 3-5 must be present as well.
+     *      Available since HAL version 1.3.
+     * * 3: The forward cell state output.
+     *      A tensor of shape [batch_size, fw_cell_size] containing a cell state
+     *      from the last time step in the sequence. This output is optional
+     *      and can be omitted. If this output is present
+     *      then outputs 2, 4, 5 must be present as well.
+     *      Available since HAL version 1.3.
+     * * 4: The backward activation state output.
+     *      A 2-D tensor of shape [batch_size, bw_output_size] containing an
+     *      activation state from the last time step in the sequence. This
+     *      output is optional and can be omitted. If this output is present
+     *      then outputs 2, 3, 5 must be present as well.
+     *      Available since HAL version 1.3.
+     * * 5: The backward cell state output.
+     *      A tensor of shape [batch_size, bw_cell_size] containing a cell state
+     *      from the last time step in the sequence. This output is optional
+     *      and can be omitted. If this output is present
+     *      then outputs 2-4 must be present as well.
+     *      Available since HAL version 1.3.
+     */
+    BIDIRECTIONAL_SEQUENCE_LSTM = 42,
+    /**
+     * A recurrent neural network layer that applies a basic RNN cell to a
+     * sequence of inputs in forward and backward directions.
+     *
+     * This Op unrolls the input along the sequence dimension, and implements
+     * the following operation for each element in the sequence s =
+     * 1...sequence_length:
+     *   fw_outputs[s] = fw_state = activation(inputs[s] * fw_input_weights’ +
+     *          fw_state * fw_recurrent_weights’ + fw_bias)
+     *
+     * And for each element in sequence t = sequence_length : 1
+     *   bw_outputs[t] = bw_state = activation(inputs[t] * bw_input_weights’ +
+     *          bw_state * bw_recurrent_weights’ + bw_bias)
+     *
+     * Where:
+     * * “{fw,bw}_input_weights” is a weight matrix that multiplies the inputs;
+     * * “{fw,bw}_recurrent_weights” is a weight matrix that multiplies the
+     *    current “state” which itself is the output from the previous time step
+     *    computation;
+     * * “{fw,bw}_bias” is a bias vector (added to each output vector in the
+     *    batch);
+     * * “activation” is the function passed as the “fused_activation_function”
+     *   argument (if not “NONE”).
+     *
+     * The op supports cross-linking via an auxiliary input. Regular cell feeds
+     * one input into the two RNN cells in the following way:
+     *
+     *       INPUT  (INPUT_REVERSED)
+     *         |         |
+     *    ---------------------
+     *    | FW_RNN     BW_RNN |
+     *    ---------------------
+     *         |         |
+     *      FW_OUT     BW_OUT
+     *
+     * An op with cross-linking takes two inputs and feeds them into the RNN
+     * cells in the following way:
+     *
+     *       AUX_INPUT   (AUX_INPUT_REVERSED)
+     *           |             |
+     *     INPUT | (INPUT_R'D.)|
+     *       |   |       |     |
+     *    -----------------------
+     *    |  \  /        \    / |
+     *    | FW_RNN       BW_RNN |
+     *    -----------------------
+     *         |           |
+     *      FW_OUT      BW_OUT
+     *
+     * The cross-linking mode is enabled iff auxiliary input and auxiliary
+     * weights are present. While stacking this op on top of itself, this
+     * allows to connect both forward and backward outputs from previous cell
+     * to the next cell's input.
+     *
+     * Since HAL version 1.3 parallel linking mode is supported. The mode is
+     * enabled if auxiliary input is present but auxiliary weights are omitted.
+     * In this case, the cell feeds inputs into the RNN in the following way:
+     *
+     *       INPUT (AUX_INPUT_REVERSED)
+     *         |         |
+     *    ---------------------
+     *    | FW_RNN     BW_RNN |
+     *    ---------------------
+     *         |         |
+     *      FW_OUT     BW_OUT
+     *
+     * While stacking this op on top of itself, this allows to connect both
+     * forward and backward outputs from previous cell to the next cell's
+     * corresponding inputs.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * The input tensors must all be the same type.
+     *
+     * Inputs:
+     * * 0: input.
+     *      A 3-D tensor. The shape is defined by the input 6 (timeMajor). If
+     *      it is set to true, then the input has a shape [maxTime, batchSize,
+     *      inputSize], otherwise the input has a shape [batchSize, maxTime,
+     *      inputSize].
+     * * 1: fwWeights.
+     *      A 2-D tensor of shape [fwNumUnits, inputSize].
+     * * 2: fwRecurrentWeights.
+     *      A 2-D tensor of shape [fwNumUnits, fwNumUnits].
+     * * 3: fwBias.
+     *      A 1-D tensor of shape [fwNumUnits].
+     * * 4: fwHiddenState.
+     *      A 2-D tensor of shape [batchSize, fwNumUnits]. Specifies a hidden
+     *      state input for the first time step of the computation.
+     * * 5: bwWeights.
+     *      A 2-D tensor of shape [bwNumUnits, inputSize].
+     * * 6: bwRecurrentWeights.
+     *      A 2-D tensor of shape [bwNumUnits, bwNumUnits].
+     * * 7: bwBias.
+     *      A 1-D tensor of shape [bwNumUnits].
+     * * 8: bwHiddenState
+     *      A 2-D tensor of shape [batchSize, bwNumUnits]. Specifies a hidden
+     *      state input for the first time step of the computation.
+     * * 9: auxInput.
+     *      A 3-D tensor. The shape is defined by the input 6 (timeMajor). If
+     *      it is set to true, then the input has a shape [maxTime, batchSize,
+     *      auxInputSize], otherwise the input has a shape [batchSize, maxTime,
+     *      auxInputSize]. Can be omitted. See the docs above for the usage
+     *      modes explanation.
+     * * 10:fwAuxWeights.
+     *      A 2-D tensor of shape [fwNumUnits, auxInputSize]. Can be omitted.
+     *      See the docs above for the usage modes explanation.
+     * * 11:bwAuxWeights.
+     *      A 2-D tensor of shape [bwNumUnits, auxInputSize]. Can be omitted.
+     *      See the docs above for the usage modes explanation.
+     * * 12:fusedActivationFunction.
+     *      A {@link FusedActivationFunc} value indicating the activation function. If
+     *      “NONE” is specified then it results in a linear activation.
+     * * 13:timeMajor
+     *      An {@link OperandType::BOOL} scalar specifying the shape format
+     *      of input and output tensors.
+     * * 14:mergeOutputs
+     *      An {@link OperandType::BOOL} scalar specifying if the outputs
+     *      from forward and backward cells are separate (if set to false) or
+     *      concatenated (if set to true).
+     * Outputs:
+     * * 0: fwOutput.
+     *      A 3-D tensor. The first two dimensions of the shape are defined by
+     *      the input 6 (timeMajor) and the third dimension is defined by the
+     *      input 14 (mergeOutputs). If timeMajor is set to true, then the first
+     *      two dimensions are [maxTime, batchSize], otherwise they are set to
+     *      [batchSize, maxTime]. If mergeOutputs is set to true, then the third
+     *      dimension is equal to (fwNumUnits + bwNumUnits), otherwise it is set
+     *      to fwNumUnits.
+     * * 1: bwOutput.
+     *      A 3-D tensor. If the input 14 (mergeOutputs) is set to true, then
+     *      this tensor is not produced. The shape is defined by the input 6
+     *      (timeMajor). If it is set to true, then the shape is set to
+     *      [maxTime, batchSize, bwNumUnits], otherwise the shape is set to
+     *      [batchSize, maxTime, bwNumUnits].
+     * * 2: The forward hidden state output.
+     *      A 2-D tensor of shape [batchSize, fwNumUnits] containing a hidden
+     *      state from the last time step in the sequence. This output is
+     *      optional and can be omitted. If this output is present then output
+     *      3 must be present as well.
+     *      Available since HAL version 1.3.
+     * * 3: The backward hidden state output.
+     *      A 2-D tensor of shape [batchSize, bwNumUnits] containing a hidden
+     *      state from the last time step in the sequence. This output is
+     *      optional and can be omitted. If this output is present then output
+     *      2 must be present as well.
+     *      Available since HAL version 1.3.
+     */
+    BIDIRECTIONAL_SEQUENCE_RNN = 43,
+    /**
+     * Greedily selects a subset of bounding boxes in descending order of score.
+     *
+     * This op applies NMS algorithm to each class. In each loop of execution,
+     * the box with maximum score gets selected and removed from the pending set.
+     * The scores of the rest of boxes are lowered according to the
+     * intersection-over-union (IOU) overlapping with the previously selected
+     * boxes and a specified NMS kernel method. Any boxes with score less
+     * than a threshold are removed from the pending set.
+     *
+     * Three NMS kernels are supported:
+     * * Hard:     score_new = score_old * (1 if IoU < threshold else 0)
+     * * Linear:   score_new = score_old * (1 if IoU < threshold else 1 - IoU)
+     * * Gaussian: score_new = score_old * exp(- IoU^2 / sigma)
+     *
+     * Axis-aligned bounding boxes are represented by its upper-left corner
+     * coordinate (x1,y1) and lower-right corner coordinate (x2,y2). A valid
+     * bounding box should satisfy x1 <= x2 and y1 <= y2.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Inputs:
+     * * 0: A 2-D Tensor of shape [num_rois, num_classes], specifying the score
+     *      of each bounding box proposal. The boxes are grouped by batches in the
+     *      first dimension. Zero num_rois is supported for this tensor.
+     * * 1: A 2-D Tensor specifying the bounding boxes of shape
+     *      [num_rois, num_classes * 4], organized in the order [x1, y1, x2, y2].
+     *      The boxes are grouped by batches in the first dimension. The sequential
+     *      order of the boxes corresponds with input0. For input0 of type
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM}, this tensor should be of
+     *      {@link OperandType::TENSOR_QUANT16_ASYMM}, with zeroPoint of 0 and
+     *      scale of 0.125.
+     *      For input0 of type {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED},
+     *      this tensor should be of {@link OperandType::TENSOR_QUANT16_ASYMM},
+     *      with zeroPoint of -128 and scale of 0.125.
+     *      Zero num_rois is supported for this tensor.
+     * * 2: A 1-D {@link OperandType::TENSOR_INT32} tensor, of shape
+     *      [num_rois], specifying the batch index of each box. Boxes with
+     *      the same batch index are grouped together.
+     * * 3: An {@link OperandType::FLOAT32} scalar, score_threshold. Boxes
+     *      with scores lower than the threshold are filtered before sending
+     *      to the NMS algorithm.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the maximum
+     *      number of selected bounding boxes for each image. Set to a negative
+     *      value for unlimited number of output bounding boxes.
+     * * 5: An {@link OperandType::INT32} scalar, specifying the NMS
+     *      kernel method, options are 0:hard, 1:linear, 2:gaussian.
+     * * 6: An {@link OperandType::FLOAT32} scalar, specifying the IoU
+     *      threshold in hard and linear NMS kernel. This field is ignored if
+     *      gaussian kernel is selected.
+     * * 7: An {@link OperandType::FLOAT32} scalar, specifying the sigma in
+     *      gaussian NMS kernel. This field is ignored if gaussian kernel is
+     *      not selected.
+     * * 8: An {@link OperandType::FLOAT32} scalar, nms_score_threshold.
+     *      Boxes with scores lower than the threshold are dropped during the
+     *      score updating phase in soft NMS.
+     *
+     * Outputs:
+     * * 0: A 1-D Tensor of the same {@link OperandType} as input0, with shape
+     *      [num_output_rois], specifying the score of each output box. The boxes
+     *      are grouped by batches, but the sequential order in each batch is not
+     *      guaranteed. For type of {@link OperandType::TENSOR_QUANT8_ASYMM},
+     *      guaranteed. For type of {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      or {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the scale and zero point must be the same as input0.
+     * * 1: A 2-D Tensor of the same {@link OperandType} as input1, with shape
+     *      [num_output_rois, 4], specifying the coordinates of each
+     *      output bounding box with the same format as input1. The sequential
+     *      order of the boxes corresponds with output0. For type of
+     *      {@link OperandType::TENSOR_QUANT16_ASYMM}, the scale must be
+     *      0.125 and the zero point must be 0.
+     * * 2: A 1-D {@link OperandType::TENSOR_INT32} tensor, of shape
+     *      [num_output_rois], specifying the class of each output box. The
+     *      sequential order of the boxes corresponds with output0.
+     * * 3: A 1-D {@link OperandType::TENSOR_INT32} tensor, of shape
+     *      [num_output_rois], specifying the batch index of each box. Boxes
+     *      with the same batch index are grouped together.
+     */
+    BOX_WITH_NMS_LIMIT = 44,
+    /**
+     * Casts a tensor to a type.
+     *
+     * This operation ignores the scale and zeroPoint of quanized tensors,
+     * e.g. it treats a {@link OperandType::TENSOR_QUANT8_ASYMM} input
+     * as a tensor of uint8 values.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * Since HAL version 1.3, casting tensors of the following
+     * {@link OperandType} to the same {@link OperandType} is supported:
+     * * {@link OperandType::TENSOR_BOOL8}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT16_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT16_SYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}
+     * * {@link OperandType::TENSOR_QUANT8_SYMM}
+     *
+     * Supported tensor rank: from 1
+     *
+     * Inputs:
+     * * 0: A tensor.
+     *
+     * Outputs:
+     * * 0: A tensor with the same shape as input0.
+     */
+    CAST = 45,
+    /**
+     * Shuffle the channels of the input tensor.
+     *
+     * Given an input tensor and a integer value of num_groups, CHANNEL_SHUFFLE
+     * divide the channel dimension into num_groups groups, and reorganize the
+     * channels by grouping channels with the same index in each group.
+     *
+     * Along the channel dimension, the output is calculated using this formula:
+     *
+     *     output_channel[k * num_groups + g] = input_channel[g * group_size + k]
+     *
+     * where group_size = num_channels / num_groups
+     *
+     * The number of channels must be divisible by num_groups.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: An n-D tensor, specifying the tensor to be shuffled.
+     * * 1: An {@link OperandType::INT32} scalar, specifying the number of
+     *      groups.
+     * * 2: An {@link OperandType::INT32} scalar, specifying the dimension
+     *      channel shuffle would be performed on. Negative index is used to
+     *      specify axis from the end (e.g. -1 for the last axis). Must be in
+     *      the range [-n, n).
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} and same shape as input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    CHANNEL_SHUFFLE = 46,
+    /**
+     * Apply postprocessing steps to bounding box detections.
+     *
+     * Bounding box detections are generated by applying transformation on a set
+     * of predefined anchors with the bounding box deltas from bounding box
+     * regression. A final step of hard NMS is applied to limit the number of
+     * returned boxes.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Inputs:
+     * * 0: A 3-D Tensor of shape [batches, num_anchors, num_classes], specifying
+     *      the score of each anchor with each class. Class 0 for each
+     *      [batches, num_anchors, 0] is background and will be ignored.
+     * * 1: A 3-D Tensor of shape [batches, num_anchors, length_box_encoding], with
+     *      the first four values in length_box_encoding specifying the bounding
+     *      box deltas. The box deltas are encoded in the order of [dy, dx, dh, dw],
+     *      where dy and dx is the linear-scale relative correction factor for the
+     *      center position of the bounding box with respect to the width and height,
+     *      dh and dw is the log-scale relative correction factor for the width and
+     *      height. All the entries in length_box_encoding beyond the first four
+     *      values are ignored in this operation.
+     * * 2: A 2-D Tensor of shape [num_anchors, 4], specifying the shape of each
+     *      predefined anchor, with format [ctr_y, ctr_x, h, w], where ctr_y and
+     *      ctr_x are the center position of the box, and h and w are the height
+     *      and the width.
+     * * 3: An {@link OperandType::FLOAT32} scalar, specifying the scaling
+     *      factor for dy in bounding box deltas.
+     * * 4: An {@link OperandType::FLOAT32} scalar, specifying the scaling
+     *      factor for dx in bounding box deltas.
+     * * 5: An {@link OperandType::FLOAT32} scalar, specifying the scaling
+     *      factor for dh in bounding box deltas.
+     * * 6: An {@link OperandType::FLOAT32} scalar, specifying the scaling
+     *      factor for dw in bounding box deltas.
+     * * 7: An {@link OperandType::BOOL} scalar, set to true to use regular
+     *      multi-class NMS algorithm that do NMS separately for each class,
+     *      set to false for a faster algorithm that only do one single NMS
+     *      using the highest class score..
+     * * 8: An {@link OperandType::INT32} scalar, max_num_detections, specifying
+     *      the maximum number of boxes for the output. Boxes with the lowest
+     *      scores are discarded to meet the limit.
+     * * 9: An {@link OperandType::INT32} scalar, only used when input7 is
+     *      set to false, specifying the maximum number of classes per detection.
+     * * 10: An {@link OperandType::INT32} scalar, only used when input7 is
+     *       set to true, specifying the maximum number of detections when
+     *       applying NMS algorithm for each single class.
+     * * 11: A scalar, score_threshold. Boxes with scores lower than the
+     *       threshold are filtered before sending to the NMS algorithm. The
+     *       scalar must be of {@link OperandType::FLOAT16} if input0 is of
+     *       {@link OperandType::TENSOR_FLOAT16} and of
+     *       {@link OperandType::FLOAT32} if input0 is of
+     *       {@link OperandType::TENSOR_FLOAT32}.
+     * * 12: A scalar, specifying the IoU threshold for hard NMS. The scalar
+     *       must be of {@link OperandType::FLOAT16} if input0 is of
+     *       {@link OperandType::TENSOR_FLOAT16} and of
+     *       {@link OperandType::FLOAT32} if input0 is of
+     *       {@link OperandType::TENSOR_FLOAT32}.
+     * * 13: An {@link OperandType::BOOL} scalar, set to true to include
+     *       background class in the list of label map for the output, set
+     *       to false to not include the background. When the background
+     *       class is included, it has label 0 and the output classes start
+     *       at 1 in the label map, otherwise, the output classes start at 0.
+     *
+     * Outputs:
+     * * 0: A 2-D tensor of the same {@link OperandType} as input0, with shape
+     *      [batches, max_num_detections], specifying the score of each output
+     *      detections.
+     * * 1: A 3-D tensor of shape [batches, max_num_detections, 4], specifying the
+     *      coordinates of each output bounding box, with format
+     *      [y1, x1, y2, x2].
+     * * 2: A 2-D {@link OperandType::TENSOR_INT32} tensor, of shape
+     *      [batches, max_num_detections], specifying the class label for each
+     *      output detection.
+     * * 3: An 1-D {@link OperandType::TENSOR_INT32} tensor, of shape [batches],
+     *      specifying the number of valid output detections for each batch.
+     */
+    DETECTION_POSTPROCESSING = 47,
+    /**
+     * For input tensors x and y, computes x == y elementwise.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_BOOL8}
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * This operation supports broadcasting.
+     *
+     * Inputs:
+     * * 0: A tensor.
+     * * 1: A tensor of the same {@link OperandType} and dimensions compatible
+     *      with input0.
+     *
+     * Outputs:
+     * * 0: A tensor of {@link OperandType::TENSOR_BOOL8}.
+     */
+    EQUAL = 48,
+    /**
+     * Computes exponential of x element-wise.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A tensor.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape as input0.
+     */
+    EXP = 49,
+    /**
+     * Inserts a dimension of 1 into a tensor's shape.
+     *
+     * Given a tensor input, this operation inserts a dimension of 1 at the
+     * given dimension index of input's shape. The dimension index starts at
+     * zero; if you specify a negative dimension index, it is counted backward
+     * from the end.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * Inputs:
+     * * 0: An n-D tensor.
+     * * 1: An {@link OperandType::INT32} scalar specifying the dimension
+     *      index to expand. Must be in the range [-(n + 1), (n + 1)).
+     *
+     * Outputs:
+     * * 0: An (n + 1)-D tensor with the same {@link OperandType} and data as
+     *      input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    EXPAND_DIMS = 50,
+    /**
+     * Gathers values along an axis.
+     *
+     * Produces an output tensor with shape
+     *     input0.dimension[:axis] + indices.dimension + input0.dimension[axis + 1:]
+     * where:
+     *     # Vector indices (output is rank(input0)).
+     *     output[a_0, ..., a_n, i, b_0, ..., b_n] =
+     *       input0[a_0, ..., a_n, indices[i], b_0, ..., b_n]
+     *
+     *     # Higher rank indices (output is rank(input0) + rank(indices) - 1).
+     *     output[a_0, ..., a_n, i, ..., j, b_0, ... b_n] =
+     *       input0[a_0, ..., a_n, indices[i, ..., j], b_0, ..., b_n]
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * Inputs:
+     * * 0: An n-D tensor from which to gather values.
+     * * 1: An {@link OperandType::INT32} scalar specifying the axis.
+     *      Negative index is used to specify axis from the end
+     *      (e.g. -1 for the last axis). Must be in the range [-n, n).
+     * * 2: A k-D tensor {@link OperandType::TENSOR_INT32} of indices.
+     *      The values must be in the bounds of the corresponding dimensions
+     *      of input0.
+     *
+     * Outputs:
+     * * 0: An (n + k - 1)-D tensor with the same {@link OperandType} as input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    GATHER = 51,
+    /**
+     * Generate aixs-aligned bounding box proposals.
+     *
+     * Bounding box proposals are generated by applying transformation on a set
+     * of predefined anchors with the bounding box deltas from bounding box
+     * regression. A final step of hard NMS is applied to limit the number of
+     * returned boxes.
+     *
+     * Axis-aligned bounding boxes are represented by its upper-left corner
+     * coordinate (x1,y1) and lower-right corner coordinate (x2,y2). A valid
+     * bounding box should satisfy x1 <= x2 and y1 <= y2.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Inputs:
+     * * 0: A 4-D Tensor specifying the score of each anchor at each
+     *      location. With "NHWC" data layout, the tensor shape is
+     *      [batches, height, width, num_anchors]. With "NCHW" data layout,
+     *      the tensor shape is [batches, num_anchors, height, width].
+     * * 1: A 4-D Tensor specifying the bounding box deltas. With "NHWC" data
+     *      layout, the tensor shape is [batches, height, width, num_anchors * 4].
+     *      With "NCHW" data layout, the tensor shape is
+     *      [batches, num_anchors * 4, height, width]. The box deltas are encoded
+     *      in the order of [dx, dy, dw, dh], where dx and dy is the linear-scale
+     *      relative correction factor for the center position of the bounding box
+     *      with respect to the width and height, dw and dh is the log-scale
+     *      relative correction factor for the width and height. The last
+     *      dimensions is the channel dimension.
+     * * 2: A 2-D Tensor of shape [num_anchors, 4], specifying the shape of each
+     *      predefined anchor, with format [x1, y1, x2, y2]. For input0 of type
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM} or
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}, this tensor should be of
+     *      {@link OperandType::TENSOR_QUANT16_SYMM}, with scale of 0.125.
+     * * 3: A 2-D Tensor of shape [batches, 2], specifying the size of
+     *      each image in the batch, with format [image_height, image_width].
+     *      For input0 of type {@link OperandType::TENSOR_QUANT8_ASYMM} or
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}, this
+     *      tensor should be of {@link OperandType::TENSOR_QUANT16_SYMM}, with
+     *      scale of 0.125.
+     * * 4: An {@link OperandType::FLOAT32} scalar, specifying the ratio
+     *      from the height of original image to the height of feature map.
+     * * 5: An {@link OperandType::FLOAT32} scalar, specifying the ratio
+     *      from the width of original image to the width of feature map.
+     * * 6: An {@link OperandType::INT32} scalar, specifying the maximum
+     *      number of boxes before going into the hard NMS algorithm. Boxes
+     *      with the lowest scores are discarded to meet the limit. Set to
+     *      a non-positive value for unlimited number.
+     * * 7: An {@link OperandType::INT32} scalar, specifying the maximum
+     *      number of boxes returning from the hard NMS algorithm. Boxes
+     *      with the lowest scores are discarded to meet the limit. Set to
+     *      a non-positive value for unlimited number.
+     * * 8: An {@link OperandType::FLOAT32} scalar, specifying the IoU
+     *      threshold for hard NMS.
+     * * 9: An {@link OperandType::FLOAT32} scalar, min_size. Boxes with
+     *      height or width lower than the absolute threshold are filtered out.
+     * * 10: An {@link OperandType::BOOL} scalar, set to true to specify
+     *       NCHW data layout for input0 and input1. Set to false for NHWC.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0, of shape
+     *      [num_output_rois], specifying the score of each output box.
+     *      The boxes are grouped by batches, but the sequential order in
+     *      each batch is not guaranteed. For type of
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM} or
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}, the scale and zero
+     *      point must be the same as input0.
+     * * 1: A tensor of the same {@link OperandType} as input3, of shape
+     *      [num_output_rois, 4], specifying the coordinates of each output
+     *      bounding box for each class, with format [x1, y1, x2, y2].
+     *      The sequential order of the boxes corresponds with output0.
+     *      For type of {@link OperandType::TENSOR_QUANT16_ASYMM}, the
+     *      scale must be 0.125 and the zero point must be 0.
+     * * 2: A 1-D {@link OperandType::TENSOR_INT32} tensor, of shape
+     *      [num_output_rois], specifying the batch index of each box. Boxes
+     *      with the same batch index are grouped together.
+     */
+    GENERATE_PROPOSALS = 52,
+    /**
+     * For input tensors x and y, computes x > y elementwise.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_BOOL8}
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * This operation supports broadcasting.
+     *
+     * Inputs:
+     * * 0: A tensor.
+     * * 1: A tensor of the same {@link OperandType} and dimensions compatible
+     *      with input0.
+     *
+     * Outputs:
+     * * 0: A tensor of {@link OperandType::TENSOR_BOOL8}.
+     */
+    GREATER = 53,
+    /**
+     * For input tensors x and y, computes x >= y elementwise.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_BOOL8}
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * This operation supports broadcasting.
+     *
+     * Inputs:
+     * * 0: A tensor.
+     * * 1: A tensor of the same {@link OperandType} and dimensions compatible
+     *      with input0.
+     *
+     * Outputs:
+     * * 0: A tensor of {@link OperandType::TENSOR_BOOL8}.
+     */
+    GREATER_EQUAL = 54,
+    /**
+     * Performs a grouped 2-D convolution operation.
+     *
+     * Given an input tensor of shape [batches, height, width, depth_in] and a
+     * filter tensor of shape [depth_out, filter_height, filter_width, depth_group]
+     * containing depth_out convolutional filters of depth depth_group, GROUPED_CONV
+     * applies a group of different filters to each input channel group, then
+     * concatenates the results together.
+     *
+     * Specifically, the input channels are divided into num_groups groups, each with
+     * depth depth_group, i.e. depth_in = num_groups * depth_group. The convolutional
+     * filters are also divided into num_groups groups, i.e. depth_out is divisible
+     * by num_groups. GROUPED_CONV applies each group of filters to the corresponding
+     * input channel group, and the result are concatenated together.
+     *
+     * The output dimensions are functions of the filter dimensions, stride, and
+     * padding.
+     *
+     * The values in the output tensor are computed as:
+     *
+     *     output[b, i, j, g * channel_multiplier + q] =
+     *         sum_{di, dj, dk} (
+     *             input[b, strides[1] * i + di, strides[2] * j + dj,
+     *                   g * depth_group + dk] *
+     *             filter[g * channel_multiplier + q, di, dj, dk]
+     *         ) + bias[channel]
+     *
+     * where channel_multiplier = depth_out / num_groups
+     *
+     * Supported tensor {@link OperandType} configurations:
+     * * 16 bit floating point:
+     * * * {@link OperandType::TENSOR_FLOAT16} for input, filter, output, and bias.
+     *
+     * * 32 bit floating point:
+     * * * {@link OperandType::TENSOR_FLOAT32} for input, filter, output, and bias.
+     *
+     * * Quantized:
+     * * * {@link OperandType::TENSOR_QUANT8_ASYMM} for input, filter, and output.
+     * * * {@link OperandType::TENSOR_INT32} for bias (with scale set to
+     * * * input.scale * filter.scale).
+     *
+     * * Quantized signed (since HAL version 1.3):
+     * * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} for input, filter, and output.
+     * * * {@link OperandType::TENSOR_INT32} for bias (with scale set to
+     * * * input.scale * filter.scale).
+     *
+     * * Quantized with symmetric per channel quantization for the filter:
+     * * * {@link OperandType::TENSOR_QUANT8_ASYMM} for input, and output.
+     * * * {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+     * * * {@link OperandType::TENSOR_INT32} for bias (scale set to 0.0,
+     * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
+     *
+     * * Quantized signed with filter symmetric per channel quantization (since HAL version 1.3):
+     * * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} for input, and output.
+     * * * {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+     * * * {@link OperandType::TENSOR_INT32} for bias (scale set to 0.0,
+     * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     *
+     * Both explicit padding and implicit padding are supported.
+     *
+     * Inputs (explicit padding):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
+     *      specifying the input, where depth_in = num_groups * depth_group.
+     * * 1: A 4-D tensor, of shape
+     *      [depth_out, filter_height, filter_width, depth_group], specifying
+     *      the filter, where depth_out must be divisible by num_groups.  For
+     *      tensor of type {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL}
+     *      the channel dimension (channelDim at
+     *      {@link SymmPerChannelQuantParams}) must be set to 0.
+     * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
+     *      tensor of type {@link OperandType::TENSOR_FLOAT32} or
+     *      {@link OperandType::TENSOR_FLOAT16}, the bias must be of the same type.
+     *      For filter tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}
+     *      the bias should be of {@link OperandType::TENSOR_INT32}, with zeroPoint
+     *      of 0 and bias_scale == input_scale * filter_scale. For filter tensor
+     *      of {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
+     *      should be of {@link OperandType::TENSOR_INT32}, with zeroPoint of
+     *      0 and bias_scale of 0. The actual scale of each value 'i' is equal to
+     *      bias_scale[i] = input_scale * filter_scale[i].
+     * * 3: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the left, in the ‘width’ dimension.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the right, in the ‘width’ dimension.
+     * * 5: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the top, in the ‘height’ dimension.
+     * * 6: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the bottom, in the ‘height’ dimension.
+     * * 7: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘width’ dimension.
+     * * 8: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘height’ dimension.
+     * * 9: An {@link OperandType::INT32} scalar, specifying the number of
+     *      groups.
+     * * 10: An {@link OperandType::INT32} scalar, and has to be one of the
+     *       {@link FusedActivationFunc} values. Specifies the activation to
+     *       invoke on the result.
+     * * 11: An {@link OperandType::BOOL} scalar, set to true to specify
+     *       NCHW data layout for input0 and output0. Set to false for NHWC.
+     *
+     * Inputs (implicit padding):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
+     *      specifying the input, where depth_in = num_groups * depth_group.
+     * * 1: A 4-D tensor, of shape
+     *      [depth_out, filter_height, filter_width, depth_group], specifying
+     *      the filter, where depth_out must be divisible by num_groups.  For
+     *      tensor of type {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL}
+     *      the channel dimension (SymmPerChannelQuantParams::channelDim)
+     *      must be set to 0.
+     * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
+     *      tensor of type {@link OperandType::TENSOR_FLOAT32} or
+     *      {@link OperandType::TENSOR_FLOAT16}, the bias must be of the same
+     *      {@link OperandType::TENSOR_FLOAT16}, the bias must be of the same type.
+     *      For filter tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}
+     *      the bias should be of {@link OperandType::TENSOR_INT32}, with zeroPoint
+     *      of 0 and bias_scale == input_scale * filter_scale. For filter tensor
+     *      of {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
+     *      should be of {@link OperandType::TENSOR_INT32}, with zeroPoint of
+     *      0 and bias_scale of 0. The actual scale of each value 'i' is equal to
+     *      bias_scale[i] = input_scale * filter_scale[i].
+     * * 3: An {@link OperandType::INT32} scalar, specifying the implicit
+     *      padding scheme, has to be one of the
+     *      following values: {0 (NONE), 1 (SAME), 2 (VALID)}.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘width’ dimension.
+     * * 5: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘height’ dimension.
+     * * 6: An {@link OperandType::INT32} scalar, specifying the number of
+     *      groups.
+     * * 7: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     * * 8: An {@link OperandType::BOOL} scalar, set to true to specify
+     *      NCHW data layout for input0 and output0. Set to false for NHWC.
+     *
+     * Outputs:
+     * * 0: The output 4-D tensor, of shape
+     *      [batches, out_height, out_width, depth_out].
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint can be different from inputs' scale and zeroPoint.
+     */
+    GROUPED_CONV_2D = 55,
+    /**
+     * Localize the maximum keypoints from heatmaps.
+     *
+     * This operation approximates the accurate maximum keypoint scores and
+     * indices after bicubic upscaling by using Taylor expansion up to the
+     * quadratic term.
+     *
+     * The bounding box is represented by its upper-left corner coordinate
+     * (x1,y1) and lower-right corner coordinate (x2,y2) in the original image.
+     * A valid bounding box should satisfy x1 <= x2 and y1 <= y2.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     *
+     * Inputs:
+     * * 0: A 4-D Tensor of shape
+     *      [num_boxes, heatmap_size, heatmap_size, num_keypoints],
+     *      specifying the heatmaps, the height and width of heatmaps should
+     *      be the same, and must be greater than or equal to 2.
+     * * 1: A 2-D Tensor of shape [num_boxes, 4], specifying the bounding boxes,
+     *      each with format [x1, y1, x2, y2]. For input0 of type
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM}, this tensor should
+     *      be of {@link OperandType::TENSOR_QUANT16_ASYMM}, with zeroPoint
+     *      of 0 and scale of 0.125.
+     *      For input0 of type
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}, this tensor
+     *      should be of {@link OperandType::TENSOR_QUANT16_ASYMM}, with
+     *      zeroPoint of -128 and scale of 0.125.
+     * * 2: An {@link OperandType::BOOL} scalar, set to true to specify
+     *      NCHW data layout for input0. Set to false for NHWC.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0, with shape
+     *      [num_boxes, num_keypoints], specifying score of the keypoints.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} or
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint can be different from input0 scale and zeroPoint.
+     * * 1: A tensor of the same {@link OperandType} as input1, with shape
+     *      [num_boxes, num_keypoints, 2], specifying the location of
+     *      the keypoints, the second dimension is organized as
+     *      [keypoint_x, keypoint_y].
+     *      For type of {@link OperandType::TENSOR_QUANT16_ASYMM}, the
+     *      scale must be 0.125 and the zero point must be 0.
+     */
+    HEATMAP_MAX_KEYPOINT = 56,
+    /**
+     * Applies instance normalization to the input tensor.
+     *
+     * The values in the output tensor are computed as:
+     *
+     *     output[b, h, w, c] =
+     *         (input[b, h, w, c] - mean[b, c]) * gamma /
+     *         sqrt(var[b, c] + epsilon) + beta
+     *
+     * Where the mean and variance are computed across the spatial dimensions:
+     *
+     *     mean[b, c] =
+     *         sum_{h, w}(input[b, h, w, c]) / sum(1)
+     *
+     *     var[b, c] =
+     *         sum_{h, w}(pow(input[b, h, w, c] - mean[b, c], 2)) / sum(1)
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     *
+     * Inputs:
+     * * 0: An n-D tensor, specifying the tensor to be normalized.
+     * * 1: A scalar, specifying gamma, the scale applied to the normalized
+     *      tensor. The scalar must be of {@link OperandType::FLOAT16} if
+     *      input0 is of {@link OperandType::TENSOR_FLOAT16} and of
+     *      {@link OperandType::FLOAT32} if input0 is of
+     *      {@link OperandType::TENSOR_FLOAT32}.
+     * * 2: A scalar, specifying beta, the offset applied to the normalized
+     *      tensor. The scalar must be of {@link OperandType::FLOAT16} if
+     *      input0 is of {@link OperandType::TENSOR_FLOAT16} and of
+     *      {@link OperandType::FLOAT32} if input0 is of
+     *      {@link OperandType::TENSOR_FLOAT32}.
+     * * 3: A scalar, specifying epsilon, the small value added to variance to
+     *      avoid dividing by zero. The scalar must be of {@link OperandType::FLOAT16} if
+     *      input0 is of {@link OperandType::TENSOR_FLOAT16} and of
+     *      {@link OperandType::FLOAT32} if input0 is of
+     *      {@link OperandType::TENSOR_FLOAT32}.
+     * * 4: An {@link OperandType::BOOL} scalar, set to true to specify
+     *      NCHW data layout for input0 and output0. Set to false for NHWC.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} and same shape as input0.
+     */
+    INSTANCE_NORMALIZATION = 57,
+    /**
+     * For input tensors x and y, computes x < y elementwise.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_BOOL8}
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * This operation supports broadcasting.
+     *
+     * Inputs:
+     * * 0: A tensor.
+     * * 1: A tensor of the same {@link OperandType} and dimensions compatible
+     *      with input0.
+     *
+     * Outputs:
+     * * 0: A tensor of {@link OperandType::TENSOR_BOOL8}.
+     */
+    LESS = 58,
+    /**
+     * For input tensors x and y, computes x <= y elementwise.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_BOOL8}
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * This operation supports broadcasting.
+     *
+     * Inputs:
+     * * 0: A tensor.
+     * * 1: A tensor of the same {@link OperandType} and dimensions compatible
+     *      with input0.
+     *
+     * Outputs:
+     * * 0: A tensor of {@link OperandType::TENSOR_BOOL8}.
+     */
+    LESS_EQUAL = 59,
+    /**
+     * Computes natural logarithm of x element-wise.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A tensor.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape as input0.
+     */
+    LOG = 60,
+    /**
+     * Returns the truth value of x AND y element-wise.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_BOOL8}
+     *
+     * Supported tensor rank: from 1
+     *
+     * This operation supports broadcasting.
+     *
+     * Inputs:
+     * * 0: A tensor of {@link OperandType::TENSOR_BOOL8}.
+     * * 1: A tensor of {@link OperandType::TENSOR_BOOL8} and dimensions
+     *      compatible with input0.
+     *
+     * Outputs:
+     * * 0: A tensor of {@link OperandType::TENSOR_BOOL8}.
+     */
+    LOGICAL_AND = 61,
+    /**
+     * Computes the truth value of NOT x element-wise.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_BOOL8}
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A tensor.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape as input0.
+     */
+    LOGICAL_NOT = 62,
+    /**
+     * Returns the truth value of x OR y element-wise.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_BOOL8}
+     *
+     * Supported tensor rank: from 1
+     *
+     * This operation supports broadcasting.
+     *
+     * Inputs:
+     * * 0: A tensor of {@link OperandType::TENSOR_BOOL8}.
+     * * 1: A tensor of {@link OperandType::TENSOR_BOOL8} and dimensions
+     *      compatible with input0.
+     *
+     * Outputs:
+     * * 0: A tensor of {@link OperandType::TENSOR_BOOL8}.
+     */
+    LOGICAL_OR = 63,
+    /**
+     * Computes the log softmax activations given logits.
+     *
+     * The output is calculated using this formula:
+     *
+     *     output = logits * beta - log(reduce_sum(exp(logits * beta), axis))
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A tensor specifying the input logits.
+     * * 1: A scalar, specifying the positive scaling factor for the exponent,
+     *      beta.
+     *      For input tensor of {@link OperandType::TENSOR_FLOAT16}, the beta
+     *      value must be of {@link OperandType::FLOAT16}.
+     *      For input tensor of {@link OperandType::TENSOR_FLOAT32}, the beta
+     *      value must be of {@link OperandType::FLOAT32}.
+     * * 2: An {@link OperandType::INT32} scalar specifying the axis to
+     *      reduce across. Negative index is used to specify axis from the
+     *      end (e.g. -1 for the last axis). Must be in the range [-n, n).
+     *
+     * Outputs:
+     * * 0: The output tensor of the same {@link OperandType} and shape as
+     *      input0.
+     */
+    LOG_SOFTMAX = 64,
+    /**
+     * Returns the element-wise maximum of two tensors.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A tensor.
+     * * 1: A tensor of the same {@link OperandType} and compatible dimensions
+     *      with input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} tensor,
+     *      the scales and zeroPoint can be different from input0 scale and zeroPoint.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM} tensor,
+     *      the scale and zeroPoint can be different from inputs' scale and zeroPoint.
+     */
+    MAXIMUM = 65,
+    /**
+     * Returns the element-wise minimum of two tensors.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A tensor.
+     * * 1: A tensor of the same {@link OperandType} and compatible dimensions
+     *      with input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} tensor,
+     *      the scales and zeroPoint can be different from input0 scale and zeroPoint.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM} tensor,
+     *      the scale and zeroPoint can be different from inputs' scale and zeroPoint.
+     */
+    MINIMUM = 66,
+    /**
+     * Computes numerical negative value element-wise.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A tensor.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape as input0.
+     */
+    NEG = 67,
+    /**
+     * For input tensors x and y, computes x != y elementwise.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_BOOL8}
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * This operation supports broadcasting.
+     *
+     * Inputs:
+     * * 0: A tensor.
+     * * 1: A tensor of the same {@link OperandType} and dimensions compatible
+     *      with input0.
+     *
+     * Outputs:
+     * * 0: A tensor of {@link OperandType::TENSOR_BOOL8}.
+     */
+    NOT_EQUAL = 68,
+    /**
+     * Pads a tensor with the given constant value according to the specified
+     * paddings.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: An n-D tensor, specifying the tensor to be padded.
+     * * 1: A 2-D Tensor of {@link OperandType::TENSOR_INT32}, the paddings
+     *      for each spatial dimension of the input tensor. The shape of the
+     *      tensor must be {rank(input0), 2}.
+     *      padding[i, 0] specifies the number of elements to be padded in the
+     *      front of dimension i.
+     *      padding[i, 1] specifies the number of elements to be padded after
+     *      the end of dimension i.
+     * * 2: An scalar specifying the value to use for padding input0.
+     *      For input tensor of {@link OperandType::TENSOR_FLOAT16}, the
+     *      pad value must be of {@link OperandType::FLOAT16}.
+     *      For input tensor of {@link OperandType::TENSOR_FLOAT32}, the
+     *      pad value must be of {@link OperandType::FLOAT32}.
+     *      For input tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the pad value must be of {@link OperandType::INT32}. The
+     *      scale and zeroPoint are assumed to be the same as in input0.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0. The
+     *      output tensor has the same rank as input0, and each
+     *      dimension of the output tensor has the same size as the
+     *      corresponding dimension of the input tensor plus the size
+     *      of the padding:
+     *          output0.dimension[i] =
+     *              padding[i, 0] + input0.dimension[i] + padding[i, 1]
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    PAD_V2 = 69,
+    /**
+     * Computes the power of one value to another.
+     *
+     * Given a tensor base and a tensor exponent, this operation computes
+     * base^exponent elementwise.
+     *
+     * This operations supports broadcasting. The size of the output is the
+     * maximum size along each dimension of the input operands. It starts with
+     * the trailing dimensions, and works its way forward.
+     *
+     * For example:
+     *     base.dimension     =    {4, 1, 2}
+     *     exponent.dimension = {5, 4, 3, 1}
+     *     output.dimension   = {5, 4, 3, 2}
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: from 1
+     *
+     * Inputs:
+     * * 0: A tensor specifying the base.
+     * * 1: A tensor specifying the exponent.
+     *
+     * Outputs:
+     * * 0: An output tensor.
+     */
+    POW = 70,
+    /**
+     * Parametric Rectified Linear Unit.
+     *
+     * It follows: f(x) = alpha * x for x < 0, f(x) = x for x >= 0, where alpha
+     * is a learned array with the same {@link OperandType} and compatible
+     * dimensions as input x.
+     *
+     * Two dimensions are compatible when:
+     *     1. they are equal, or
+     *     2. one of them is 1
+     *
+     * The size of the output is the maximum size along each dimension of the
+     * input operands. It starts with the trailing dimensions, and works its way
+     * forward.
+     *
+     * Example:
+     *     input.dimension  =    {4, 1, 2}
+     *     alpha.dimension  = {5, 4, 3, 1}
+     *     output.dimension = {5, 4, 3, 2}
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * Inputs:
+     * * 0: A tensor, specifying the input.
+     * * 1: A tensor of the same {@link OperandType}, and compatible dimensions
+     *      as input0, specifying the alpha.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scales and zeroPoint can be different from input0 scale and zeroPoint.
+     */
+    PRELU = 71,
+    /**
+     * Quantizes the input tensor.
+     *
+     * The formula for {@link OperandType::TENSOR_QUANT8_ASYMM} output tensor is:
+     *
+     *     output = max(0, min(255, round(input / scale) + zeroPoint)
+     *
+     * The formula for {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} output
+     * tensor is:
+     *
+     *     output = max(-128, min(127, round(input / scale) + zeroPoint)
+     *
+     * Supported input tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported output tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * Inputs:
+     * * 0: A tensor, may be zero-sized.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape as input0, but with
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM} or.
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}.
+     */
+    QUANTIZE = 72,
+    /**
+     * A version of quantized LSTM, using 16 bit quantization for internal
+     * state.
+     *
+     * There is no projection layer, so cell state size is equal to the output
+     * size.
+     *
+     * Inputs:
+     * * 0: A 2-D tensor of type {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and shape [numBatches, inputSize] specifying the input to the LSTM
+     *      cell. Tensor is quantized with a fixed quantization range of
+     *      [-1, 127/128] (scale = 1/128, zeroPoint = 128).
+     * * 1: The input-to-input weights.
+     *      A 2-D tensor of type {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and shape [outputSize, inputSize] specifying input-to-input part of
+     *      weights for fully-connected layer inside the LSTM cell.
+     *      Quantization zero point and scale must be the same across all the
+     *      weights.
+     * * 2: The input-to-forget weights.
+     *      A 2-D tensor of type {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and shape [outputSize, inputSize] specifying input-to-forget part of
+     *      weights for fully-connected layer inside the LSTM cell.
+     *      Quantization zero point and scale must be the same across all the
+     *      weights.
+     * * 3: The input-to-cell weights.
+     *      A 2-D tensor of type {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and shape [outputSize, inputSize] specifying input-to-cell part of
+     *      weights for fully-connected layer inside the LSTM cell.
+     *      Quantization zero point and scale must be the same across all the
+     *      weights.
+     * * 4: The input-to-output weights.
+     *      A 2-D tensor of type {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and shape [outputSize, inputSize] specifying input-to-output part of
+     *      weights for fully-connected layer inside the LSTM cell.
+     *      Quantization zero point and scale must be the same across all the
+     *      weights.
+     * * 5: The recurrent-to-input weights.
+     *      A 2-D tensor of type {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and shape [outputSize, outputSize] specifying recurrent-to-input part
+     *      of weights for fully-connected layer inside the LSTM cell.
+     *      Quantization zero point and scale must be the same across all the
+     *      weights.
+     * * 6: The recurrent-to-forget weights.
+     *      A 2-D tensor of type {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and shape [outputSize, outputSize] specifying recurrent-to-forget
+     *      part of weights for fully-connected layer inside the LSTM cell.
+     *      Quantization zero point and scale must be the same across all the
+     *      weights.
+     * * 7: The recurrent-to-cell weights.
+     *      A 2-D tensor of type {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and shape [outputSize, outputSize] specifying recurrent-to-cell part
+     *      of weights for fully-connected layer inside the LSTM cell.
+     *      Quantization zero point and scale must be the same across all the
+     *      weights.
+     * * 8: The recurrent-to-output weights.
+     *      A 2-D tensor of type {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and shape [outputSize, outputSize] specifying recurrent-to-output
+     *      part of weights for fully-connected layer inside the LSTM cell.
+     *      Quantization zero point and scale must be the same across all the
+     *      weights.
+     * * 9: The input gate bias.
+     *      A 1-D tensor of type {@link OperandType::TENSOR_INT32} and shape
+     *      [outputSize] specifying the bias for the fully-connected layer
+     *      inside the LSTM cell. Bias is quantized with scale being a product
+     *      of input and weights scales and zeroPoint equal to 0.
+     * * 10:The forget gate bias.
+     *      A 1-D tensor of type {@link OperandType::TENSOR_INT32} and shape
+     *      [outputSize] specifying the bias for the fully-connected layer
+     *      inside the LSTM cell. Bias is quantized with scale being a product
+     *      of input and weights scales and zeroPoint equal to 0.
+     * * 11:The cell bias.
+     *      A 1-D tensor of type {@link OperandType::TENSOR_INT32} and shape
+     *      [outputSize] specifying the bias for the fully-connected layer
+     *      inside the LSTM cell. Bias is quantized with scale being a product
+     *      of input and weights scales and zeroPoint equal to 0.
+     * * 12:The output gate bias.
+     *      A 1-D tensor of type {@link OperandType::TENSOR_INT32} and shape
+     *      [outputSize] specifying the bias for the fully-connected layer
+     *      inside the LSTM cell. Bias is quantized with scale being a product
+     *      of input and weights scales and zeroPoint equal to 0.
+     * * 13: A 2-D tensor of type {@link OperandType::TENSOR_QUANT16_SYMM}
+     *       and shape [numBatches, outputSize] specifying the cell state from the
+     *       previous time step of the LSTM cell. It is quantized using a
+     *       quantization range of [-2^4, 2^4 * 32767/32768] (scale = 2^4 /
+     *       32768, zeroPoint = 0).
+     * * 14: A 2-D tensor of type {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *       and shape [numBathes, outputSize] specifying the output of the LSTM
+     *       cell from previous time-step. Tensor is quantized with a fixed
+     *       quantization range of [-1, 127/128] (scale = 1/128, zeroPoint =
+     *       128).
+     *
+     *
+     * Outputs:
+     * * 0: A 2-D tensor of type {@link OperandType::TENSOR_QUANT16_SYMM}
+     *      and shape [numBatches, outputSize] which contains a cell state from
+     *      the current time step. Tensor is quantized using a quantization
+     *      range of [-2^4, 2^4 * 32767/32768] (scale = 2^4 / 32768, zeroPoint =
+     *      0).
+     * * 1: A 2-D tensor of type {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and shape [numBathes, outputSize] which contains the output value.
+     *      Tensor is quantized with a fixed quantization range of [-1, 127/128]
+     *      (scale = 1/128, zeroPoint = 128).
+     */
+    QUANTIZED_16BIT_LSTM = 73,
+    /**
+     * Draws samples from a multinomial distribution.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Inputs:
+     * * 0: A 2-D tensor with shape [batches, classes], specifying the
+     *      unnormalized log-probabilities for all classes.
+     * * 1: A scalar {@link OperandType::INT32}, specifying the number of
+     *      independent samples to draw for each row slice.
+     * * 2: A 1-D {@link OperandType::TENSOR_INT32} tensor with shape [2],
+     *      specifying seeds used to initialize the random distribution. If both
+     *      provided seeds are 0, both will be randomly generated.
+     * Outputs:
+     * * 0: A 2-D {@link OperandType::TENSOR_INT32} tensor with shape
+     *      [batches, samples], containing the drawn samples.
+     */
+    RANDOM_MULTINOMIAL = 74,
+    /**
+     * Reduces a tensor by computing the "logical and" of elements along given
+     * dimensions.
+     *
+     * If keep_dims is true, the reduced dimensions are
+     * retained with length 1. Otherwise, the rank of the tensor is reduced by
+     * 1 for each entry in dimensions.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_BOOL8}
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: An n-D tensor.
+     * * 1: A 1-D tensor of {@link OperandType::TENSOR_INT32}. The dimensions
+     *      to reduce. Dimension values must be in the range [-n, n).
+     * * 2: An {@link OperandType::BOOL} scalar, keep_dims. If true,
+     *      retains reduced dimensions with length 1.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0.
+     *      If all dimensions are reduced and keep_dims is false, the output
+     *      shape is [1].
+     */
+    REDUCE_ALL = 75,
+    /**
+     * Reduces a tensor by computing the "logical or" of elements along given
+     * dimensions.
+     *
+     * If keep_dims is true, the reduced dimensions are
+     * retained with length 1. Otherwise, the rank of the tensor is reduced by
+     * 1 for each entry in dimensions.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_BOOL8}
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: An n-D tensor.
+     * * 1: A 1-D tensor of {@link OperandType::TENSOR_INT32}. The dimensions
+     *      to reduce. Dimension values must be in the range [-n, n).
+     * * 2: An {@link OperandType::BOOL} scalar, keep_dims. If true,
+     *      retains reduced dimensions with length 1.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0.
+     *      If all dimensions are reduced and keep_dims is false, the output
+     *      shape is [1].
+     */
+    REDUCE_ANY = 76,
+    /**
+     * Reduces a tensor by computing the maximum of elements along given
+     * dimensions.
+     *
+     * If keep_dims is true, the reduced dimensions are
+     * retained with length 1. Otherwise, the rank of the tensor is reduced by
+     * 1 for each entry in dimensions.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: An n-D tensor.
+     * * 1: A 1-D tensor of {@link OperandType::TENSOR_INT32}. The dimensions
+     *      to reduce. Dimension values must be in the range [-n, n).
+     * * 2: An {@link OperandType::BOOL} scalar, keep_dims. If true,
+     *      retains reduced dimensions with length 1.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0.
+     *      If all dimensions are reduced and keep_dims is false, the output
+     *      shape is [1].
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    REDUCE_MAX = 77,
+    /**
+     * Reduces a tensor by computing the minimum of elements along given
+     * dimensions.
+     *
+     * If keep_dims is true, the reduced dimensions are
+     * retained with length 1. Otherwise, the rank of the tensor is reduced by
+     * 1 for each entry in dimensions.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: An n-D tensor.
+     * * 1: A 1-D tensor of {@link OperandType::TENSOR_INT32}. The dimensions
+     *      to reduce. Dimension values must be in the range [-n, n).
+     * * 2: An {@link OperandType::BOOL} scalar, keep_dims. If true,
+     *      retains reduced dimensions with length 1.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0.
+     *      If all dimensions are reduced and keep_dims is false, the output
+     *      shape is [1].
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    REDUCE_MIN = 78,
+    /**
+     * Reduces a tensor by multiplying elements along given dimensions.
+     *
+     * If keep_dims is true, the reduced dimensions are
+     * retained with length 1. Otherwise, the rank of the tensor is reduced by
+     * 1 for each entry in dimensions.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: An n-D tensor.
+     * * 1: A 1-D tensor of {@link OperandType::TENSOR_INT32}. The dimensions
+     *      to reduce. Dimension values must be in the range [-n, n).
+     * * 2: An {@link OperandType::BOOL} scalar, keep_dims. If true,
+     *      retains reduced dimensions with length 1.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0.
+     *      If all dimensions are reduced and keep_dims is false, the output
+     *      shape is [1].
+     */
+    REDUCE_PROD = 79,
+    /**
+     * Reduces a tensor by summing elements along given dimensions.
+     *
+     * If keep_dims is true, the reduced dimensions are
+     * retained with length 1. Otherwise, the rank of the tensor is reduced by
+     * 1 for each entry in dimensions.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: up to 4
+     *
+     * Inputs:
+     * * 0: An n-D tensor.
+     * * 1: A 1-D tensor of {@link OperandType::TENSOR_INT32}. The dimensions
+     *      to reduce. Dimension values must be in the range [-n, n).
+     * * 2: An {@link OperandType::BOOL} scalar, keep_dims. If true,
+     *      retains reduced dimensions with length 1.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0.
+     *      If all dimensions are reduced and keep_dims is false, the output
+     *      shape is [1].
+     */
+    REDUCE_SUM = 80,
+    /**
+     * Select and scale the feature map of each region of interest to a unified
+     * output size by average pooling sampling points from bilinear interpolation.
+     *
+     * The region of interest is represented by its upper-left corner coordinate
+     * (x1,y1) and lower-right corner coordinate (x2,y2) in the original image.
+     * A spatial scaling factor is applied to map into feature map coordinate.
+     * A valid region of interest should satisfy x1 <= x2 and y1 <= y2.
+     *
+     * No rounding is applied in this operation. The sampling points are unified
+     * distributed in the pooling bin and their values are calculated by bilinear
+     * interpolation.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     *
+     * Inputs:
+     * * 0: A 4-D tensor, specifying the feature map.
+     * * 1: A 2-D Tensor of shape [num_rois, 4], specifying the locations of
+     *      the regions of interest, each line with format [x1, y1, x2, y2].
+     *      For input0 of type {@link OperandType::TENSOR_QUANT8_ASYMM},
+     *      this tensor should be of {@link OperandType::TENSOR_QUANT16_ASYMM},
+     *      with zeroPoint of 0 and scale of 0.125. Zero num_rois is
+     *      supported for this tensor.
+     * * 2: An 1-D {@link OperandType::TENSOR_INT32} tensor, of shape
+     *      [num_rois], specifying the batch index of each box. Boxes with
+     *      the same batch index are grouped together. Zero num_rois is
+     *      supported for this tensor.
+     * * 3: An {@link OperandType::INT32} scalar, specifying the output
+     *      height of the output tensor.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the output
+     *      width of the output tensor.
+     * * 5: An {@link OperandType::FLOAT32} scalar, specifying the ratio
+     *      from the height of original image to the height of feature map.
+     * * 6: An {@link OperandType::FLOAT32} scalar, specifying the ratio
+     *      from the width of original image to the width of feature map.
+     * * 7: An {@link OperandType::INT32} scalar, specifying the number of
+     *      sampling points in height dimension used to compute the output.
+     *      Set to 0 for adaptive value of ceil(roi_height/out_height).
+     * * 8: An {@link OperandType::INT32} scalar, specifying the number of
+     *      sampling points in width dimension used to compute the output.
+     *      Set to 0 for adaptive value of ceil(roi_width/out_width).
+     * * 9: An {@link OperandType::BOOL} scalar, set to true to specify
+     *      NCHW data layout for input0 and output0. Set to false for NHWC.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0. The output
+     *      shape is [num_rois, out_height, out_width, depth].
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint can be different from the input0 scale and zeroPoint.
+     */
+    ROI_ALIGN = 81,
+    /**
+     * Select and scale the feature map of each region of interest to a unified
+     * output size by max-pooling.
+     *
+     * The region of interest is represented by its upper-left corner coordinate
+     * (x1,y1) and lower-right corner coordinate (x2,y2) in the original image.
+     * A spatial scaling factor is applied to map into feature map coordinate.
+     * A valid region of interest should satisfy x1 <= x2 and y1 <= y2.
+     *
+     * Rounding is applied in this operation to ensure integer boundary for
+     * regions of interest and pooling bins.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     *
+     * Inputs:
+     * * 0: A 4-D tensor, specifying the feature map.
+     * * 1: A 2-D Tensor of shape [num_rois, 4], specifying the locations of
+     *      the regions of interest, each line with format [x1, y1, x2, y2].
+     *      For input0 of type {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      this tensor should be of {@link OperandType::TENSOR_QUANT16_ASYMM},
+     *      with zeroPoint of 0 and scale of 0.125.
+     * * 2: An 1-D {@link OperandType::TENSOR_INT32} tensor, of shape
+     *      [num_rois], specifying the batch index of each box. Boxes with
+     *      the same batch index are grouped together.
+     * * 3: An {@link OperandType::INT32} scalar, specifying the output
+     *      height of the output tensor.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the output
+     *      width of the output tensor.
+     * * 5: An {@link OperandType::FLOAT32} scalar, specifying the ratio
+     *      from the height of original image to the height of feature map.
+     * * 6: An {@link OperandType::FLOAT32} scalar, specifying the ratio
+     *      from the width of original image to the width of feature map.
+     * * 7: An {@link OperandType::BOOL} scalar, set to true to specify
+     *      NCHW data layout for input0 and output0. Set to false for NHWC.
+     *
+     * Outputs:
+     * * 0: A tensor of the same {@link OperandType} as input0. The output
+     *      shape is [num_rois, out_height, out_width, depth].
+     *      For input0 of type {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    ROI_POOLING = 82,
+    /**
+     * Computes reciprocal of square root of x element-wise.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A tensor.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape as input0.
+     */
+    RSQRT = 83,
+    /**
+     * Using a tensor of booleans c and input tensors x and y select values
+     * elementwise from both input tensors:
+     *
+     * O[i] = C[i] ? x[i] : y[i].
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * Inputs:
+     * * 0: A tensor of type {@link OperandType::TENSOR_BOOL8} acting as a
+     *      mask that chooses, based on the value at each element, whether the
+     *      corresponding element in the output should be taken from input1 (if
+     *      true) or input2 (if false).
+     * * 1: An input tensor of the same shape as input0.
+     * * 2: An input tensor of the same shape and type as input1.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scales and zeroPoint can be different from input1 scale and zeroPoint.
+     *
+     * Outputs:
+     * * 0: A tensor of the same type and shape as input1 and input2.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} tensor,
+     *      the scale and zeroPoint can be different from inputs' scale and zeroPoint.
+     */
+    SELECT = 84,
+    /**
+     * Computes sin of x element-wise.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A tensor.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape as input0.
+     */
+    SIN = 85,
+    /**
+     * Extracts a slice of specified size from the input tensor starting at a
+     * specified location.
+     *
+     * The starting location is specified as a 1-D tensor containing offsets
+     * for each dimension. The size is specified as a 1-D tensor containing
+     * either size of a slice along corresponding dimension or -1. In the latter
+     * case, all the remaining elements in dimension are included in the slice.
+     *
+     * A sum of begin offset and a size of a slice must not exceed size of a
+     * corresponding dimension.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * Inputs:
+     * * 0: An n-D tensor to take slice from, may be zero-sized.
+     * * 1: A 1-D tensor of type {@link OperandType::TENSOR_INT32} specifying
+     *      the beginning indices of the slice in each dimension.
+     * * 2: A 1-D tensor of type {@link OperandType::TENSOR_INT32} specifying
+     *      the size of the slice in each dimension.
+     *
+     * Outputs:
+     * * 0: An n-D tensor of the same type as the input containing the slice.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      its scale and zeroPoint has to be same as the input0 scale and zeroPoint.
+     */
+    SLICE = 86,
+    /**
+     * Splits a tensor along a given axis into num_splits subtensors.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * Inputs:
+     * * 0: An n-D tensor to split.
+     * * 1: An {@link OperandType::INT32} scalar specifying the axis along
+     *      which to split.
+     * * 2: An {@link OperandType::INT32} scalar indicating the number of
+     *      splits along given axis. Must evenly divide axis size.
+     *
+     * Outputs:
+     * * 0 ~ (num_splits - 1): Resulting subtensors.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    SPLIT = 87,
+    /**
+     * Computes square root of x element-wise.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A tensor.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape as input0.
+     */
+    SQRT = 88,
+    /**
+     * Constructs a tensor by tiling a given tensor.
+     *
+     * This operation creates a new tensor by replicating `input` `multiples`
+     * times. The output tensor's i-th dimension has `input.dims(i) * multiples[i]`
+     * elements, and the values of `input` are replicated `multiples[i]` times
+     * along the i-th dimension.
+     * For example, tiling `[a b c d]` by `[2]` produces `[a b c d a b c d]`.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * Inputs:
+     * * 0: input, an n-D tensor specifying the input.
+     * * 1: multiples, a 1-D tensor of {@link OperandType::TENSOR_INT32}.
+     *      The length of multiples must be n.
+     *
+     * Outputs:
+     * * 0: A tiled tensor of the same {@link OperandType} and rank as `input`.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    TILE = 89,
+    /**
+     * Finds values and indices of the k largest entries for the last dimension.
+     *
+     * Resulting values in each dimensions are sorted in descending order. If
+     * two values are equal, the one with larger index appears first.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: from 1
+     *
+     * Inputs:
+     * * 0: input, an n-D tensor specifying the input.
+     * * 1: k, an {@link OperandType::INT32} scalar, specifying the number of
+     *      top elements to look for along the last dimension.
+     *
+     * Outputs:
+     * * 0: An n-D tensor of the same type as the input, containing the k
+     *      largest elements along each last dimensional slice.
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     * * 1: An n-D tensor of type {@link OperandType::TENSOR_INT32}
+     *      containing the indices of values within the last dimension of input.
+     */
+    TOPK_V2 = 90,
+    /**
+     * Performs the transpose of 2-D convolution operation.
+     *
+     * This operation is sometimes called "deconvolution" after Deconvolutional
+     * Networks, but is actually the transpose (gradient) of
+     * {@link OperandType::CONV_2D} rather than an actual deconvolution.
+     *
+     * The output dimensions are functions of the filter dimensions, stride, and
+     * padding.
+     *
+     * Supported tensor {@link OperandType} configurations:
+     * * 16 bit floating point:
+     * * * {@link OperandType::TENSOR_FLOAT16} for input, filter, output, and bias.
+     *
+     * * 32 bit floating point:
+     * * * {@link OperandType::TENSOR_FLOAT32} for input, filter, output, and bias.
+     *
+     * * Quantized:
+     * * * {@link OperandType::TENSOR_QUANT8_ASYMM} for input, filter, and output.
+     * * * {@link OperandType::TENSOR_INT32} for bias (with scale set to
+     * * * input.scale * filter.scale).
+     *
+     * * Quantized with symmetric per channel quantization for the filter:
+     * * * {@link OperandType::TENSOR_QUANT8_ASYMM} for input, and output.
+     * * * {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+     * * * {@link OperandType::TENSOR_INT32} for bias (scale set to 0.0,
+     * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
+     *
+     * Available since HAL version 1.3:
+     * * Quantized signed (since HAL version 1.3):
+     * * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} for input, filter, and output.
+     * * * {@link OperandType::TENSOR_INT32} for bias (with scale set to
+     * * * input.scale * filter.scale).
+     *
+     * * Quantized signed with filter symmetric per channel quantization (since HAL version 1.3):
+     * * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} for input, and output.
+     * * * {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+     * * * {@link OperandType::TENSOR_INT32} for bias (scale set to 0.0,
+     * * * each value scaling is separate and equal to input.scale * filter.scales[channel]).
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     *
+     * Both explicit padding and implicit padding are supported.
+     *
+     * Inputs (explicit padding):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
+     *      specifying the input.
+     * * 1: A 4-D tensor, of shape
+     *      [depth_out, filter_height, filter_width, depth_in], specifying the
+     *      filter. For tensor of type
+     *      {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL} the channel
+     *      dimension (SymmPerChannelQuantParams::channelDim) must be set to 0.
+     * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
+     *      tensor of type {@link OperandType::TENSOR_FLOAT32} or
+     *      {@link OperandType::TENSOR_FLOAT16}, the bias must be of the
+     *      same type.
+     *      For filter tensor of {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the bias should be of {@link OperandType::TENSOR_INT32},
+     *      with zeroPoint of 0 and bias_scale == input_scale * filter_scale.
+     *      For filter tensor of {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL},
+     *      the bias must be of {@link OperandType::TENSOR_INT32}, with zeroPoint of 0
+     *      and bias_scale of 0. The actual scale of each value 'i' is equal to
+     *      bias_scale[i] = input_scale * filter_scale[i].
+     * * 3: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the left, in the ‘width’ dimension.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the right, in the ‘width’ dimension.
+     * * 5: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the top, in the ‘height’ dimension.
+     * * 6: An {@link OperandType::INT32} scalar, specifying the padding on
+     *      the bottom, in the ‘height’ dimension.
+     * * 7: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘width’ dimension.
+     * * 8: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘height’ dimension.
+     * * 9: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     * * 10: An {@link OperandType::BOOL} scalar, set to true to specify
+     *       NCHW data layout for input0 and output0. Set to false for NHWC.
+     *
+     * Inputs (implicit padding):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
+     *      specifying the input.
+     * * 1: A 4-D tensor, of shape
+     *      [depth_out, filter_height, filter_width, depth_in], specifying the
+     *      filter. For tensor of type
+     *      {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL} the channel
+     *      dimension (SymmPerChannelQuantParams::channelDim) must be set to 0.
+     * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
+     *      tensor of type {@link OperandType::TENSOR_FLOAT32} or
+     *      {@link OperandType::TENSOR_FLOAT16}, the bias should be of the
+     *      same type.
+     *      For filter tensor of {@link OperandType::TENSOR_QUANT8_ASYMM}
+     *      and {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED},
+     *      the bias should be of {@link OperandType::TENSOR_INT32},
+     *      with zeroPoint of 0 and bias_scale == input_scale * filter_scale.
+     *      For filter tensor of {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL},
+     *      the bias must be of {@link OperandType::TENSOR_INT32}, with zeroPoint of 0
+     *      and bias_scale of 0. The actual scale of each value 'i' is equal to
+     *      bias_scale[i] = input_scale * filter_scale[i].
+     * * 3: An {@link OperandType::TENSOR_INT32} tensor, specifying the output
+     *      tensor shape.
+     * * 4: An {@link OperandType::INT32} scalar, specifying the implicit
+     *      padding scheme, has to be one of the
+     *      following values: {0 (NONE), 1 (SAME), 2 (VALID)}.
+     * * 5: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘width’ dimension.
+     * * 6: An {@link OperandType::INT32} scalar, specifying the stride when
+     *      walking through input in the ‘height’ dimension.
+     * * 7: An {@link OperandType::INT32} scalar, and has to be one of the
+     *      {@link FusedActivationFunc} values. Specifies the activation to
+     *      invoke on the result.
+     * * 8: An {@link OperandType::BOOL} scalar, set to true to specify
+     *      NCHW data layout for input0 and output0. Set to false for NHWC.
+     *
+     * Outputs:
+     * * 0: The output 4-D tensor, of shape
+     *      [batches, out_height, out_width, depth_out].
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint can be different from inputs' scale and zeroPoint.
+     */
+    TRANSPOSE_CONV_2D = 91,
+    /**
+     * A recurrent neural network specified by an LSTM cell.
+     *
+     * Performs (fully) dynamic unrolling of input.
+     *
+     * This Op unrolls the input along the time dimension, and implements the
+     * following operation for each element in the sequence
+     * s = 1...sequence_length:
+     *   outputs[s] = projection(state = activation(LSTMOp(inputs[s])))
+     *
+     * Where LSTMOp is the LSTM op as in {@link OperandType::LSTM},
+     * the "projection" is an optional projection layer from state and output
+     * and the “activation” is the function passed as the
+     * “fused_activation_function” argument (if not “NONE”).
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: 3, either time-major or batch-major.
+     *
+     * All input and output tensors must be of the same type.
+     *
+     * Inputs:
+     * * 0: The input (\f$x_t\f$).
+     *      A 3-D tensor of shape:
+     *        If time-major: [max_time, batch_size, input_size]
+     *        If batch-major: [batch_size, max_time, input_size]
+     *      where “max_time” is the number of timesteps (sequence length),
+     *      “batch_size” corresponds to the batching dimension, and
+     *      “input_size” is the size of the input.
+     * * 1: The input-to-input weights (\f$W_{xi}\f$). Optional.
+     *      A 2-D tensor of shape [num_units, input_size], where “num_units”
+     *      corresponds to the number of cell units.
+     * * 2: The input-to-forget weights (\f$W_{xf}\f$).
+     *      A 2-D tensor of shape [num_units, input_size].
+     * * 3: The input-to-cell weights (\f$W_{xc}\f$).
+     *      A 2-D tensor of shape [num_units, input_size].
+     * * 4: The input-to-output weights (\f$W_{xo}\f$).
+     *      A 2-D tensor of shape [num_units, input_size].
+     * * 5: The recurrent-to-input weights (\f$W_{hi}\f$). Optional.
+     *      A 2-D tensor of shape [num_units, output_size], where “output_size”
+     *      corresponds to either the number of cell units (i.e., “num_units”),
+     *      or the second dimension of the “projection_weights”, if defined.
+     * * 6: The recurrent-to-forget weights (\f$W_{hf}\f$).
+     *      A 2-D tensor of shape [num_units, output_size].
+     * * 7: The recurrent-to-cell weights (\f$W_{hc}\f$).
+     *      A 2-D tensor of shape [num_units, output_size].
+     * * 8: The recurrent-to-output weights (\f$W_{ho}\f$).
+     *      A 2-D tensor of shape [num_units, output_size].
+     * * 9: The cell-to-input weights (\f$W_{ci}\f$). Optional.
+     *      A 1-D tensor of shape [num_units].
+     * * 10:The cell-to-forget weights (\f$W_{cf}\f$). Optional.
+     *      A 1-D tensor of shape [num_units].
+     * * 11:The cell-to-output weights (\f$W_{co}\f$). Optional.
+     *      A 1-D tensor of shape [num_units].
+     * * 12:The input gate bias (\f$b_i\f$). Optional.
+     *      A 1-D tensor of shape [num_units].
+     * * 13:The forget gate bias (\f$b_f\f$).
+     *      A 1-D tensor of shape [num_units].
+     * * 14:The cell bias (\f$b_c\f$).
+     *      A 1-D tensor of shape [num_units].
+     * * 15:The output gate bias (\f$b_o\f$).
+     *      A 1-D tensor of shape [num_units].
+     * * 16:The projection weights (\f$W_{proj}\f$). Optional.
+     *      A 2-D tensor of shape [output_size, num_units].
+     * * 17:The projection bias (\f$b_{proj}\f$). Optional.
+     *      A 1-D tensor of shape [output_size].
+     * * 18:The output state (in) (\f$h_{t-1}\f$).
+     *      A 2-D tensor of shape [batch_size, output_size].
+     * * 19:The cell state (in) (\f$C_{t-1}\f$).
+     *      A 2-D tensor of shape [batch_size, num_units].
+     * * 20:The activation function (\f$g\f$).
+     *      A value indicating the activation function:
+     *      <ul>
+     *      <li>0: None;
+     *      <li>1: Relu;
+     *      <li>3: Relu6;
+     *      <li>4: Tanh;
+     *      <li>6: Sigmoid.
+     *      </ul>
+     * * 21:The clipping threshold (\f$t_{cell}\f$) for the cell state, such
+     *      that values are bound within [-cell_clip, cell_clip]. If set to 0.0
+     *      then clipping is disabled.
+     * * 22:The clipping threshold (\f$t_{proj}\f$) for the output from the
+     *      projection layer, such that values are bound within
+     *      [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+     * * 23:Time-major if true, batch-major if false.
+     * * 24:The input layer normalization weights. Optional.
+     *      A 1-D tensor of shape [num_units]. Used to rescale normalized inputs
+     *      to activation at input gate.
+     * * 25:The forget layer normalization weights. Optional.
+     *      A 1-D tensor of shape [num_units]. Used to rescale normalized inputs
+     *      to activation at forget gate.
+     * * 26:The cell layer normalization weights. Optional.
+     *      A 1-D tensor of shape [num_units]. Used to rescale normalized inputs
+     *      to activation at cell gate.
+     * * 27:The output layer normalization weights. Optional.
+     *      A 1-D tensor of shape [num_units]. Used to rescale normalized inputs
+     *      to activation at output gate.
+     *
+     * Outputs:
+     * * 0: The output (\f$o_t\f$).
+     *      A 3-D tensor of shape:
+     *        If time-major: [max_time, batch_size, output_size]
+     *        If batch-major: [batch_size, max_time, output_size]
+     * * 1: A tensor of shape [batch_size, output_size] containing a hidden
+     *      state from the last time step in the sequence. This output is
+     *      optional and can be omitted. If this output is present then
+     *      output #2 must be present as well.
+     *      Available since HAL version 1.3.
+     * * 2: A tensor of shape [batch_size, cell_size] containing a cell state
+     *      from the last time step in the sequence. This output is optional
+     *      and can be omitted.
+     *      Available since HAL version 1.3.
+     */
+    UNIDIRECTIONAL_SEQUENCE_LSTM = 92,
+    /**
+     * A recurrent neural network layer that applies a basic RNN cell to a
+     * sequence of inputs.
+     *
+     * This layer unrolls the input along the sequence dimension, and implements
+     * the following operation
+     * for each element in the sequence s = 1...sequence_length:
+     *   outputs[s] = state = activation(inputs[s] * input_weights’ + state *
+     *   recurrent_weights’ + bias)
+     *
+     * Where:
+     * * “input_weights” is a weight matrix that multiplies the inputs;
+     * * “recurrent_weights” is a weight matrix that multiplies the current
+     *    “state” which itself is the output from the previous time step
+     *    computation;
+     * * “bias” is a bias vector (added to each output vector in the batch);
+     * * “activation” is the function passed as the “fused_activation_function”
+     *   argument (if not “NONE”).
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * The input tensors must all be the same type.
+     *
+     * Inputs:
+     * * 0: input.
+     *      A 3-D tensor. The shape is defined by the input 6 (timeMajor). If
+     *      it is set to 1, then the input has a shape [maxTime, batchSize,
+     *      inputSize], otherwise the input has a shape [batchSize, maxTime,
+     *      inputSize].
+     * * 1: weights.
+     *      A 2-D tensor of shape [numUnits, inputSize].
+     * * 2: recurrent_weights.
+     *      A 2-D tensor of shape [numUnits, numUnits].
+     * * 3: bias.
+     *      A 1-D tensor of shape [numUnits].
+     * * 4: hidden state
+     *      A 2-D tensor of shape [batchSize, numUnits]. Specifies a hidden
+     *      state input for the first time step of the computation.
+     * * 5: fusedActivationFunction.
+     *      A {@link FusedActivationFunc} value indicating the activation function. If
+     *      “NONE” is specified then it results in a linear activation.
+     * * 6: timeMajor
+     *      An {@link OperandType::INT32} scalar specifying the shape format
+     *      of input and output tensors. Must be set to either 0 or 1.
+     * Outputs:
+     * * 0: output.
+     *      A 3-D tensor. The shape is defined by the input 6 (timeMajor). If
+     *      it is set to 1, then the output has a shape [maxTime, batchSize,
+     *      numUnits], otherwise the output has a shape [batchSize, maxTime,
+     *      numUnits].
+     * * 1: A tensor of shape [batchSize, numUnits] containing hidden state
+     *      from the last time step in the sequence. This output is optional
+     *      and can be omitted.
+     *      Available since HAL version 1.3.
+     */
+    UNIDIRECTIONAL_SEQUENCE_RNN = 93,
+    /**
+     * Resizes images to given size using the nearest neighbor interpretation.
+     *
+     * Resized images must be distorted if their output aspect ratio is not the
+     * same as input aspect ratio. The corner pixels of output may not be the
+     * same as corner pixels of input.
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} (since HAL version 1.3)
+     *
+     * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
+     * With the default data layout NHWC, the data is stored in the order of:
+     * [batch, height, width, channels]. Alternatively, the data layout could
+     * be NCHW, the data storage order of: [batch, channels, height, width].
+     *
+     * Both resizing by shape and resizing by scale are supported.
+     *
+     * Inputs (resizing by shape):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
+     *      the input. Zero batches is supported for this tensor.
+     * * 1: An {@link OperandType::INT32} scalar, specifying the output
+     *      width of the output tensor.
+     * * 2: An {@link OperandType::INT32} scalar, specifying the output
+     *      height of the output tensor.
+     * * 3: An {@link OperandType::BOOL} scalar, default to false.
+     *      Set to true to specify NCHW data layout for input0 and output0.
+     * * 4: Align corners. An optional {@link OperandType::BOOL}
+     *      scalar, default to false.  If True, the centers of the 4 corner
+     *      pixels of the input and output tensors are aligned, preserving the
+     *      values at the corner pixels.
+     *      Available since HAL version 1.3.
+     * * 5: Half pixel centers. An optional {@link OperandType::BOOL}
+     *      scalar, default to false. If True, the pixel centers are assumed to
+     *      be at (0.5, 0.5). This is the default behavior of image.resize in
+     *      TF 2.0. If this parameter is True, then align_corners parameter
+     *      must be False.
+     *      Available since HAL version 1.3.
+     *
+     * Inputs (resizing by scale):
+     * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying
+     *      the input. Zero batches is supported for this tensor.
+     * * 1: A scalar, specifying width_scale, the scaling factor of the width
+     *      dimension from the input tensor to the output tensor. The output
+     *      width is calculated as new_width = floor(width * width_scale).
+     *      The scalar must be of {@link OperandType::FLOAT16} if input0 is
+     *      of {@link OperandType::TENSOR_FLOAT16} and of
+     *      {@link OperandType::FLOAT32} otherwise.
+     * * 2: A scalar, specifying height_scale, the scaling factor of the height
+     *      dimension from the input tensor to the output tensor. The output
+     *      height is calculated as new_height = floor(height * height_scale).
+     *      The scalar must be of {@link OperandType::FLOAT16} if input0 is
+     *      of {@link OperandType::TENSOR_FLOAT16} and of
+     *      {@link OperandType::FLOAT32} otherwise.
+     * * 3: An {@link OperandType::BOOL} scalar, default to false.
+     *      Set to true to specify NCHW data layout for input0 and output0.
+     * * 4: Align corners. An optional {@link OperandType::BOOL}
+     *      scalar, default to false.  If True, the centers of the 4 corner
+     *      pixels of the input and output tensors are aligned, preserving the
+     *      values at the corner pixels.
+     *      Available since HAL version 1.3.
+     * * 5: Half pixel centers. An optional {@link OperandType::BOOL}
+     *      scalar, default to false. If True, the pixel centers are assumed to
+     *      be at (0.5, 0.5). This is the default behavior of image.resize in
+     *      TF 2.0. If this parameter is True, then align_corners parameter
+     *      must be False.
+     *      Available since HAL version 1.3.
+     *
+     * Outputs:
+     * * 0: The output 4-D tensor, of shape
+     *      [batches, new_height, new_width, depth].
+     *      For a {@link OperandType::TENSOR_QUANT8_ASYMM} and
+     *      {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} tensor,
+     *      the scale and zeroPoint must be the same as input0.
+     */
+    RESIZE_NEAREST_NEIGHBOR = 94,
+    /**
+     * Quantized version of {@link OperationType::LSTM}.
+     *
+     * The input and the output use asymmetric quantized types, while the rest
+     * use symmetric ones.
+     *
+     * Inputs:
+     * * 0: The input to the LSTM cell.
+     *      Type: {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}
+     *      Shape: [batchSize, inputSize]
+     * * 1: The input-to-input weights. Optional.
+     *      Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, inputSize]
+     * * 2: The input-to-forget weights.
+     *      Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, inputSize]
+     * * 3: The input-to-cell weights.
+     *      Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, inputSize]
+     * * 4: The input-to-output weights.
+     *      Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, inputSize]
+     * * 5: The recurrent-to-input weights. Optional.
+     *      Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, outputSize]
+     * * 6: The recurrent-to-forget weights.
+     *      Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, outputSize]
+     * * 7: The recurrent-to-cell weights.
+     *      Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, outputSize]
+     * * 8: The recurrent-to-output weights.
+     *      Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, outputSize]
+     * * 9: The cell-to-input weights (for peephole). Optional.
+     *      Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *      Shape: [numUnits]
+     * * 10: The cell-to-forget weights (for peephole). Optional.
+     *       Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 11: The cell-to-output weights (for peephole). Optional.
+     *       Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 12: The input gate bias. Quantized with scale being the
+     *       product of input and weights scales and zeroPoint equal to 0.
+     *       Optional.
+     *       Type: {@link OperandType::TENSOR_INT32}
+     *       Shape: [numUnits]
+     * * 13: The forget gate bias. Quantized with scale being the
+     *       product of input and weights scales and zeroPoint equal to 0.
+     *       Type: {@link OperandType::TENSOR_INT32}
+     *       Shape: [numUnits]
+     * * 14: The cell bias. Quantized with scale being the
+     *       product of input and weights scales and zeroPoint equal to 0.
+     *       Type: {@link OperandType::TENSOR_INT32}
+     *       Shape: [numUnits]
+     * * 15: The output gate bias. Quantized with scale being the
+     *       product of input and weights scales and zeroPoint equal to 0.
+     *       Type: {@link OperandType::TENSOR_INT32}
+     *       Shape: [numUnits]
+     * * 16: The projection weights. Optional.
+     *       Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *       Shape: [outputSize, numUnits]
+     * * 17: The projection bias. Quantized with scale being the
+     *       product of input and weights scales and zeroPoint equal to 0.
+     *       Optional.
+     *       Type: {@link OperandType::TENSOR_INT32}
+     *       Shape: [outputSize]
+     * * 18: The output from the previous time step.
+     *       Type: {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}
+     *       Shape: [batchSize, outputSize]
+     * * 19: The cell state from the previous time step.
+     *       Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *       Shape: [batchSize, numUnits]
+     * * 20: The input layer normalization weights. Used to rescale
+     *       normalized inputs to activation at input gate. Optional.
+     *       Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 21: The forget layer normalization weights. Used to
+     *       rescale normalized inputs to activation at forget gate. Optional.
+     *       Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 22: The cell layer normalization weights. Used to rescale
+     *       normalized inputs to activation at cell gate. Optional.
+     *       Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 23: The output layer normalization weights. Used to
+     *       rescale normalized inputs to activation at output gate. Optional.
+     *       Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 24: The cell clip. If provided the cell state is clipped
+     *       by this value prior to the cell output activation. Optional.
+     *       Type: {@link OperandType::FLOAT32}.
+     * * 25: The projection clip. If provided and projection is enabled,
+     *       this is used for clipping the projected values. Optional.
+     *       Type: {@link OperandType::FLOAT32}.
+     * * 26: The scale of the intermediate result of matmul,
+     *       i.e. input to layer normalization, at input gate.
+     *       Type: {@link OperandType::FLOAT32}.
+     * * 27: The scale of the intermediate result of matmul,
+     *       i.e. input to layer normalization, at forget gate.
+     *       Type: {@link OperandType::FLOAT32}.
+     * * 28: The scale of the intermediate result of matmul,
+     *       i.e. input to layer normalization, at cell gate.
+     *       Type: {@link OperandType::FLOAT32}.
+     * * 29: The scale of the intermediate result of matmul,
+     *       i.e. input to layer normalization, at output gate.
+     *       Type: {@link OperandType::FLOAT32}.
+     * * 30: The zero point of the hidden state, i.e. input to
+     *       projection.
+     *       Type: {@link OperandType::INT32}.
+     * * 31: The scale of the hidden state, i.e. input to
+     *       projection.
+     *       Type: {@link OperandType::FLOAT32}.
+     *
+     * Outputs:
+     * * 0: The output state (out).
+     *      Type: {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}
+     *      Shape: [batchSize, outputSize]
+     * * 1: The cell state (out).
+     *      Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *      Shape: [batchSize, numUnits]
+     * * 2: The output. This is effectively the same as the current
+     *      "output state (out)" value.
+     *      Type: {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}
+     *      Shape: [batchSize, outputSize]
+     */
+    QUANTIZED_LSTM = 95,
+    /**
+     * Executes one of the two referenced subgraphs as determined by a boolean
+     * value.
+     *
+     * The inputs and outputs of the two referenced subgraphs must agree with the
+     * signature of this operation. That is, if the operation has (3 + n) inputs
+     * and m outputs, both subgraphs must have n inputs and m outputs with the same
+     * types, ranks, dimensions, scales,
+     * zeroPoints, and extraParams as the corresponding operation
+     * inputs and outputs.
+     * All of the operands mentioned must have fully specified dimensions.
+     *
+     * Inputs:
+     * * 0: A value of type {@link OperandType::TENSOR_BOOL8} and shape [1]
+     *      that determines which of the two referenced subgraphs to execute.
+     *      The operand must have fully specified dimensions.
+     * * 1: A {@link OperandType::SUBGRAPH} reference to the subgraph to be
+     *      executed if the condition is true.
+     * * 2: A {@link OperandType::SUBGRAPH} reference to the subgraph to be
+     *      executed if the condition is false.
+     * * 3 ~ (n + 2): Inputs to be passed to the subgraph selected for execution.
+     *
+     * Outputs:
+     * * 0 ~ (m - 1): Outputs produced by the selected subgraph.
+     */
+    IF = 96,
+    /**
+     * Executes the body subgraph until the condition subgraph outputs false.
+     *
+     * The inputs to this operation are the condition subgraph, the body subgraph,
+     * and operand values for the first iteration of the loop. The values are
+     * implicitly split into three groups of input-output, state-only, and
+     * input-only values, as described below.
+     *
+     * The outputs of this operation are the final values of input-output
+     * operands.
+     *
+     * Both the condition and body subgraph receive (m + k + n) inputs.
+     * * The first m (m >= 1) inputs are input-output operands. For the first
+     *   iteration, these are initialized from the corresponding inputs of the
+     *   WHILE operation. In subsequent iterations, their values come from the
+     *   corresponding outputs of the body subgraph produced during the previous
+     *   iteration.
+     * * The next k (k >= 0) inputs are state-only operands. They are similar to
+     *   the input-output operands, except that their values are no longer
+     *   available after the loop terminates.
+     * * The last n (n >= 0) inputs are input-only operands. Their values come
+     *   from the corresponding inputs of the WHILE operation.
+     *
+     * The body subgraph produces (m + k) outputs.
+     * * The first m outputs are input-output operands. They become the outputs
+     *   of the WHILE operation when a termination condition is reached.
+     * * The last k outputs are state-only operands. Their values are no longer
+     *   available after the loop terminates.
+     *
+     * The numbers m, k, and n are inferred by the driver as follows:
+     *     m = (WHILE operation output count)
+     *     k = (body subgraph output count) - m
+     *     n = (body subgraph input count) - m - k
+     *
+     * The pseudo-code below illustrates the flow of a WHILE operation with
+     * inputs condition, body, initial_input_output, initial_state, input_only
+     * (m = 1, k = 1, n = 1):
+     *
+     *     input_output = initial_input_output
+     *     state = initial_state
+     *     while condition(input_output, state, input_only):
+     *         input_output, state = body(input_output, state, input_only)
+     *     return input_output
+     *
+     * Inputs:
+     * * 0: A {@link OperandType::SUBGRAPH} reference to the condition
+     *      subgraph. The subgraph must have (m + k + n) inputs with
+     *      the same types, ranks, dimensions,
+     *      scales, zeroPoints, and extraParams as the
+     *      corresponding inputs of the WHILE operation and exactly one output
+     *      of {@link OperandType::TENSOR_BOOL8} and shape [1].
+     *      All of the operands mentioned must have fully specified dimensions.
+     * * 1: A {@link OperandType::SUBGRAPH} reference to the body subgraph.
+     *      The subgraph must have (m + k + n) inputs and (m + k) outputs with
+     *      the same types, ranks, dimensions,
+     *      scales, zeroPoints, and extraParams as the
+     *      corresponding inputs and outputs of the WHILE operation.
+     *      All of the operands mentioned must have fully specified dimensions.
+     * * (m inputs): Initial values for input-output operands.
+     * * (k inputs): Initial values for state-only operands.
+     * * (n inputs): Values for input-only operands.
+     *
+     * Outputs:
+     * * 0 ~ (m - 1): Outputs produced by the loop.
+     */
+    WHILE = 97,
+    /**
+     * Computes exponential linear activation on the input tensor element-wise.
+     *
+     * The output is calculated using the following formula:
+     *
+     *     ELU(x) = max(0, x) + min(0, alpha * (exp(x) - 1))
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A tensor, specifying the input. May be zero-sized.
+     * * 1: A scalar, specifying the alpha parameter.
+     *      For input tensor of {@link OperandType::TENSOR_FLOAT16},
+     *      the alpha value must be of {@link OperandType::FLOAT16}.
+     *      For input tensor of {@link OperandType::TENSOR_FLOAT32},
+     *      the alpha value must be of {@link OperandType::FLOAT32}.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape and type as input0.
+     */
+    ELU = 98,
+    /**
+     * Computes hard-swish activation on the input tensor element-wise.
+     *
+     * Hard swish activation is introduced in
+     * https://arxiv.org/pdf/1905.02244.pdf
+     *
+     * The output is calculated using the following formula:
+     *
+     *     h-swish(x) = x * max(0, min(6, (x + 3))) / 6
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A tensor, specifying the input. May be zero-sized.
+     *
+     * Outputs:
+     * * 0: The output tensor of same shape and type as input0.
+     *      Scale and zero point of this tensor may be different from the input
+     *      tensor's parameters.
+     */
+    HARD_SWISH = 99,
+    /**
+     * Creates a tensor filled with a scalar value.
+     *
+     * Supported output tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: A 1-D tensor, specifying the desired output tensor shape.
+     * * 1: A scalar, specifying the value to fill the output tensors with.
+     *      For output tensor of {@link OperandType::TENSOR_FLOAT16},
+     *      the scalar must be of {@link OperandType::FLOAT16}.
+     *      For output tensor of {@link OperandType::TENSOR_FLOAT32},
+     *      the scalar must be of {@link OperandType::FLOAT32}.
+     *      For output tensor of {@link OperandType::TENSOR_INT32},
+     *      the scalar must be of {@link OperandType::INT32}.
+     *
+     * Outputs:
+     * * 0: The output tensor.
+     */
+    FILL = 100,
+    /**
+     * Returns the rank of a tensor.
+     *
+     * The rank of a tensor is the number of dimensions in it. Also known as
+     * "order", "degree", "ndims".
+     *
+     * Supported tensor {@link OperandType}:
+     * * {@link OperandType::TENSOR_FLOAT16}
+     * * {@link OperandType::TENSOR_FLOAT32}
+     * * {@link OperandType::TENSOR_INT32}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT16_SYMM}
+     * * {@link OperandType::TENSOR_BOOL8}
+     * * {@link OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL}
+     * * {@link OperandType::TENSOR_QUANT16_ASYMM}
+     * * {@link OperandType::TENSOR_QUANT8_SYMM}
+     * * {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}
+     *
+     * Supported tensor rank: from 1.
+     *
+     * Inputs:
+     * * 0: The input tensor.
+     *
+     * Outputs:
+     * * 0: A scalar of {@link OperandType::INT32}, specifying the rank
+     *      of the input tensor.
+     */
+    RANK = 101,
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/OutputShape.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/OutputShape.aidl
new file mode 100644
index 0000000000..d206a2559c
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/OutputShape.aidl
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * Describes the shape information of an output operand after execution.
+ */
+@VintfStability
+parcelable OutputShape {
+    /**
+     * Dimensions of the operand.
+     */
+    int[] dimensions;
+    /**
+     * Whether the provided buffer size is sufficient for the output.
+     */
+    boolean isSufficient;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/PerformanceInfo.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/PerformanceInfo.aidl
new file mode 100644
index 0000000000..6ee29c2502
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/PerformanceInfo.aidl
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * Performance information for the reference workload.
+ *
+ * Used by a driver to report its performance characteristics.
+ */
+@VintfStability
+parcelable PerformanceInfo {
+    /**
+     * Ratio of the time taken by the driver to execute the workload compared to the time the CPU
+     * would take for the same workload. A lower number is better.
+     */
+    float execTime;
+    /**
+     * Ratio of the energy used by the driver compared to what the CPU would use for doing the same
+     * workload. A lower number is better.
+     */
+    float powerUsage;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/Priority.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/Priority.aidl
new file mode 100644
index 0000000000..fe87598829
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/Priority.aidl
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * Priority given to a prepared model for execution.
+ */
+@VintfStability
+@Backing(type="int")
+enum Priority {
+    LOW,
+    MEDIUM,
+    HIGH,
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/Request.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/Request.aidl
new file mode 100644
index 0000000000..396ff30758
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/Request.aidl
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.RequestArgument;
+import android.hardware.neuralnetworks.RequestMemoryPool;
+
+/**
+ * Inputs to be sent to and outputs to be retrieved from a prepared model.
+ *
+ * A Request serves two primary tasks:
+ * 1) Provides the input and output data to be used when executing the model.
+ * 2) Specifies any updates to the input operand metadata that were left unspecified at model
+ *    preparation time.
+ *
+ * An output must not overlap with any other output, with an input, or with an operand of lifetime
+ * CONSTANT_POOL.
+ */
+@VintfStability
+parcelable Request {
+    /**
+     * Input data and information to be used in the execution of a prepared model.
+     *
+     * The index of the input corresponds to the index in Model.main.inputIndexes.
+     *   E.g., input[i] corresponds to Model.main.inputIndexes[i].
+     */
+    RequestArgument[] inputs;
+    /**
+     * Output data and information to be used in the execution of a prepared model.
+     *
+     * The index of the output corresponds to the index in Model.main.outputIndexes.
+     *   E.g., output[i] corresponds to Model.main.outputIndexes[i].
+     */
+    RequestArgument[] outputs;
+    /**
+     * A collection of memory pools containing operand data for both the inputs and the outputs to a
+     * model.
+     */
+    RequestMemoryPool[] pools;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/RequestArgument.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/RequestArgument.aidl
new file mode 100644
index 0000000000..e615fa62b1
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/RequestArgument.aidl
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.DataLocation;
+
+/**
+ * Metadata information specifying the location of the input or output data and any updates to the
+ * input or output operand.
+ */
+@VintfStability
+parcelable RequestArgument {
+    /**
+     * If true, the argument does not have a value. This can be used for operations that take
+     * optional arguments. If true, the fields of location are set to 0 and the dimensions vector is
+     * left empty.
+     */
+    boolean hasNoValue;
+    /**
+     * The location within one of the memory pools passed in the Request.
+     */
+    DataLocation location;
+    /**
+     * Updated dimension information.
+     *
+     * If dimensions.size() > 0, dimension information was provided along with the argument. This
+     * can be the case for models that accept inputs of varying size. This can't change the rank,
+     * just the value of the dimensions that were unspecified in the model. If dimensions.size() >
+     * 0, then all dimensions must be specified here; and any dimension that was specified in the
+     * model must have the same value here.
+     *
+     * If the dimensions in the model are not fully specified, then they must be fully specified
+     * here, unless hasNoValue is set to true. If the dimensions in the model are fully specified,
+     * then either dimensions.size() may be 0, or the dimensions in the model must be identical to
+     * the dimensions here.
+     */
+    int[] dimensions;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/RequestMemoryPool.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/RequestMemoryPool.aidl
new file mode 100644
index 0000000000..166746d388
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/RequestMemoryPool.aidl
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.Memory;
+
+/**
+ * A memory pool.
+ */
+@VintfStability
+union RequestMemoryPool {
+    /**
+     * Specifies a client-managed shared memory pool.
+     */
+    Memory pool;
+    /**
+     * Specifies a driver-managed buffer. It is the token returned from IDevice::allocate, and is
+     * specific to the IDevice object.
+     */
+    int token;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/Subgraph.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/Subgraph.aidl
new file mode 100644
index 0000000000..0a76285fca
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/Subgraph.aidl
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+import android.hardware.neuralnetworks.Operand;
+import android.hardware.neuralnetworks.Operation;
+
+/**
+ * An excerpt of the execution graph.
+ */
+@VintfStability
+parcelable Subgraph {
+    /**
+     * All operands included in the subgraph.
+     */
+    Operand[] operands;
+    /**
+     * All operations included in the subgraph.
+     *
+     * The operations are sorted into execution order. Every operand with lifetime SUBGRAPH_OUTPUT
+     * or TEMPORARY_VARIABLE must be written before it is read.
+     */
+    Operation[] operations;
+    /**
+     * Input indexes of the subgraph. There must be at least one.
+     *
+     * Each value corresponds to the index of the operand in "operands".
+     */
+    int[] inputIndexes;
+    /**
+     * Output indexes of the subgraph. There must be at least one.
+     *
+     * Each value corresponds to the index of the operand in "operands".
+     */
+    int[] outputIndexes;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/SymmPerChannelQuantParams.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/SymmPerChannelQuantParams.aidl
new file mode 100644
index 0000000000..8ae41a4d3e
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/SymmPerChannelQuantParams.aidl
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * Parameters for TENSOR_QUANT8_SYMM_PER_CHANNEL operand.
+ */
+@VintfStability
+parcelable SymmPerChannelQuantParams {
+    /**
+     * Array of scaling values for each channel. Each value must be greater than zero.
+     */
+    float[] scales;
+    /**
+     * Index of the channel dimension
+     */
+    int channelDim;
+}
diff --git a/neuralnetworks/aidl/android/hardware/neuralnetworks/Timing.aidl b/neuralnetworks/aidl/android/hardware/neuralnetworks/Timing.aidl
new file mode 100644
index 0000000000..b04f74e4ee
--- /dev/null
+++ b/neuralnetworks/aidl/android/hardware/neuralnetworks/Timing.aidl
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package android.hardware.neuralnetworks;
+
+/**
+ * Timing information measured during execution. Each time is a duration from the beginning of some
+ * task to the end of that task, including time when that task is not active (for example, preempted
+ * by some other task, or waiting for some resource to become available).
+ *
+ * Times are measured in nanoseconds. When a time is not available, it must be reported as -1.
+ */
+@VintfStability
+parcelable Timing {
+    /**
+     * Execution time on device (not driver, which runs on host processor).
+     */
+    long timeOnDevice;
+    /**
+     * Execution time in driver (including time on device).
+     */
+    long timeInDriver;
+}
diff --git a/neuralnetworks/aidl/utils/Android.bp b/neuralnetworks/aidl/utils/Android.bp
new file mode 100644
index 0000000000..56017da52d
--- /dev/null
+++ b/neuralnetworks/aidl/utils/Android.bp
@@ -0,0 +1,32 @@
+//
+// Copyright (C) 2021 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+cc_library_static {
+    name: "neuralnetworks_utils_hal_aidl",
+    defaults: ["neuralnetworks_utils_defaults"],
+    srcs: ["src/*"],
+    local_include_dirs: ["include/nnapi/hal/aidl/"],
+    export_include_dirs: ["include"],
+    static_libs: [
+        "neuralnetworks_types",
+        "neuralnetworks_utils_hal_common",
+    ],
+    shared_libs: [
+        "libhidlbase",
+        "android.hardware.neuralnetworks-V1-ndk_platform",
+        "libbinder_ndk",
+    ],
+}
diff --git a/neuralnetworks/aidl/utils/OWNERS b/neuralnetworks/aidl/utils/OWNERS
new file mode 100644
index 0000000000..e4feee3496
--- /dev/null
+++ b/neuralnetworks/aidl/utils/OWNERS
@@ -0,0 +1,11 @@
+# Neuralnetworks team
+butlermichael@google.com
+dgross@google.com
+galarragas@google.com
+jeanluc@google.com
+levp@google.com
+miaowang@google.com
+pszczepaniak@google.com
+slavash@google.com
+vddang@google.com
+xusongw@google.com
diff --git a/neuralnetworks/aidl/utils/include/nnapi/hal/aidl/Conversions.h b/neuralnetworks/aidl/utils/include/nnapi/hal/aidl/Conversions.h
new file mode 100644
index 0000000000..35de5befd0
--- /dev/null
+++ b/neuralnetworks/aidl/utils/include/nnapi/hal/aidl/Conversions.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_AIDL_CONVERSIONS_H
+#define ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_AIDL_CONVERSIONS_H
+
+#include <aidl/android/hardware/neuralnetworks/BufferDesc.h>
+#include <aidl/android/hardware/neuralnetworks/BufferRole.h>
+#include <aidl/android/hardware/neuralnetworks/Capabilities.h>
+#include <aidl/android/hardware/neuralnetworks/DataLocation.h>
+#include <aidl/android/hardware/neuralnetworks/DeviceType.h>
+#include <aidl/android/hardware/neuralnetworks/ErrorStatus.h>
+#include <aidl/android/hardware/neuralnetworks/ExecutionPreference.h>
+#include <aidl/android/hardware/neuralnetworks/Extension.h>
+#include <aidl/android/hardware/neuralnetworks/ExtensionNameAndPrefix.h>
+#include <aidl/android/hardware/neuralnetworks/ExtensionOperandTypeInformation.h>
+#include <aidl/android/hardware/neuralnetworks/Memory.h>
+#include <aidl/android/hardware/neuralnetworks/Model.h>
+#include <aidl/android/hardware/neuralnetworks/Operand.h>
+#include <aidl/android/hardware/neuralnetworks/OperandExtraParams.h>
+#include <aidl/android/hardware/neuralnetworks/OperandLifeTime.h>
+#include <aidl/android/hardware/neuralnetworks/OperandPerformance.h>
+#include <aidl/android/hardware/neuralnetworks/OperandType.h>
+#include <aidl/android/hardware/neuralnetworks/Operation.h>
+#include <aidl/android/hardware/neuralnetworks/OperationType.h>
+#include <aidl/android/hardware/neuralnetworks/OutputShape.h>
+#include <aidl/android/hardware/neuralnetworks/PerformanceInfo.h>
+#include <aidl/android/hardware/neuralnetworks/Priority.h>
+#include <aidl/android/hardware/neuralnetworks/Request.h>
+#include <aidl/android/hardware/neuralnetworks/RequestArgument.h>
+#include <aidl/android/hardware/neuralnetworks/RequestMemoryPool.h>
+#include <aidl/android/hardware/neuralnetworks/Subgraph.h>
+#include <aidl/android/hardware/neuralnetworks/SymmPerChannelQuantParams.h>
+#include <aidl/android/hardware/neuralnetworks/Timing.h>
+
+#include <nnapi/Result.h>
+#include <nnapi/Types.h>
+#include <nnapi/hal/CommonUtils.h>
+
+#include <vector>
+
+namespace android::nn {
+
+GeneralResult<OperandType> unvalidatedConvert(const aidl_hal::OperandType& operandType);
+GeneralResult<OperationType> unvalidatedConvert(const aidl_hal::OperationType& operationType);
+GeneralResult<DeviceType> unvalidatedConvert(const aidl_hal::DeviceType& deviceType);
+GeneralResult<Priority> unvalidatedConvert(const aidl_hal::Priority& priority);
+GeneralResult<Capabilities> unvalidatedConvert(const aidl_hal::Capabilities& capabilities);
+GeneralResult<Capabilities::OperandPerformance> unvalidatedConvert(
+        const aidl_hal::OperandPerformance& operandPerformance);
+GeneralResult<Capabilities::PerformanceInfo> unvalidatedConvert(
+        const aidl_hal::PerformanceInfo& performanceInfo);
+GeneralResult<DataLocation> unvalidatedConvert(const aidl_hal::DataLocation& location);
+GeneralResult<Operand> unvalidatedConvert(const aidl_hal::Operand& operand);
+GeneralResult<Operand::ExtraParams> unvalidatedConvert(
+        const std::optional<aidl_hal::OperandExtraParams>& optionalExtraParams);
+GeneralResult<Operand::LifeTime> unvalidatedConvert(
+        const aidl_hal::OperandLifeTime& operandLifeTime);
+GeneralResult<Operand::SymmPerChannelQuantParams> unvalidatedConvert(
+        const aidl_hal::SymmPerChannelQuantParams& symmPerChannelQuantParams);
+GeneralResult<Operation> unvalidatedConvert(const aidl_hal::Operation& operation);
+GeneralResult<Model> unvalidatedConvert(const aidl_hal::Model& model);
+GeneralResult<Model::ExtensionNameAndPrefix> unvalidatedConvert(
+        const aidl_hal::ExtensionNameAndPrefix& extensionNameAndPrefix);
+GeneralResult<Model::OperandValues> unvalidatedConvert(const std::vector<uint8_t>& operandValues);
+GeneralResult<Model::Subgraph> unvalidatedConvert(const aidl_hal::Subgraph& subgraph);
+GeneralResult<OutputShape> unvalidatedConvert(const aidl_hal::OutputShape& outputShape);
+GeneralResult<MeasureTiming> unvalidatedConvert(bool measureTiming);
+GeneralResult<Memory> unvalidatedConvert(const aidl_hal::Memory& memory);
+GeneralResult<Timing> unvalidatedConvert(const aidl_hal::Timing& timing);
+GeneralResult<BufferDesc> unvalidatedConvert(const aidl_hal::BufferDesc& bufferDesc);
+GeneralResult<BufferRole> unvalidatedConvert(const aidl_hal::BufferRole& bufferRole);
+GeneralResult<Request> unvalidatedConvert(const aidl_hal::Request& request);
+GeneralResult<Request::Argument> unvalidatedConvert(
+        const aidl_hal::RequestArgument& requestArgument);
+GeneralResult<Request::MemoryPool> unvalidatedConvert(
+        const aidl_hal::RequestMemoryPool& memoryPool);
+GeneralResult<ErrorStatus> unvalidatedConvert(const aidl_hal::ErrorStatus& errorStatus);
+GeneralResult<ExecutionPreference> unvalidatedConvert(
+        const aidl_hal::ExecutionPreference& executionPreference);
+GeneralResult<Extension> unvalidatedConvert(const aidl_hal::Extension& extension);
+GeneralResult<Extension::OperandTypeInformation> unvalidatedConvert(
+        const aidl_hal::ExtensionOperandTypeInformation& operandTypeInformation);
+GeneralResult<SharedHandle> unvalidatedConvert(
+        const ::aidl::android::hardware::common::NativeHandle& handle);
+
+GeneralResult<ExecutionPreference> convert(
+        const aidl_hal::ExecutionPreference& executionPreference);
+GeneralResult<Memory> convert(const aidl_hal::Memory& memory);
+GeneralResult<Model> convert(const aidl_hal::Model& model);
+GeneralResult<Operand> convert(const aidl_hal::Operand& operand);
+GeneralResult<OperandType> convert(const aidl_hal::OperandType& operandType);
+GeneralResult<Priority> convert(const aidl_hal::Priority& priority);
+GeneralResult<Request::MemoryPool> convert(const aidl_hal::RequestMemoryPool& memoryPool);
+GeneralResult<Request> convert(const aidl_hal::Request& request);
+
+GeneralResult<std::vector<Operation>> convert(const std::vector<aidl_hal::Operation>& outputShapes);
+GeneralResult<std::vector<Memory>> convert(const std::vector<aidl_hal::Memory>& memories);
+
+GeneralResult<std::vector<uint32_t>> toUnsigned(const std::vector<int32_t>& vec);
+
+}  // namespace android::nn
+
+namespace aidl::android::hardware::neuralnetworks::utils {
+
+namespace nn = ::android::nn;
+
+nn::GeneralResult<Memory> unvalidatedConvert(const nn::Memory& memory);
+nn::GeneralResult<OutputShape> unvalidatedConvert(const nn::OutputShape& outputShape);
+nn::GeneralResult<ErrorStatus> unvalidatedConvert(const nn::ErrorStatus& errorStatus);
+
+nn::GeneralResult<Memory> convert(const nn::Memory& memory);
+nn::GeneralResult<ErrorStatus> convert(const nn::ErrorStatus& errorStatus);
+nn::GeneralResult<std::vector<OutputShape>> convert(
+        const std::vector<nn::OutputShape>& outputShapes);
+
+nn::GeneralResult<std::vector<int32_t>> toSigned(const std::vector<uint32_t>& vec);
+
+}  // namespace aidl::android::hardware::neuralnetworks::utils
+
+#endif  // ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_AIDL_CONVERSIONS_H
diff --git a/neuralnetworks/aidl/utils/include/nnapi/hal/aidl/Utils.h b/neuralnetworks/aidl/utils/include/nnapi/hal/aidl/Utils.h
new file mode 100644
index 0000000000..79b511dc56
--- /dev/null
+++ b/neuralnetworks/aidl/utils/include/nnapi/hal/aidl/Utils.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_AIDL_UTILS_H
+#define ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_AIDL_UTILS_H
+
+#include "nnapi/hal/aidl/Conversions.h"
+
+#include <android-base/logging.h>
+#include <nnapi/Result.h>
+#include <nnapi/Types.h>
+#include <nnapi/Validation.h>
+
+namespace aidl::android::hardware::neuralnetworks::utils {
+
+constexpr auto kDefaultPriority = Priority::MEDIUM;
+constexpr auto kVersion = nn::Version::ANDROID_S;
+
+template <typename Type>
+nn::Result<void> validate(const Type& halObject) {
+    const auto maybeCanonical = nn::convert(halObject);
+    if (!maybeCanonical.has_value()) {
+        return nn::error() << maybeCanonical.error().message;
+    }
+    return {};
+}
+
+template <typename Type>
+bool valid(const Type& halObject) {
+    const auto result = utils::validate(halObject);
+    if (!result.has_value()) {
+        LOG(ERROR) << result.error();
+    }
+    return result.has_value();
+}
+
+nn::GeneralResult<Memory> clone(const Memory& memory);
+nn::GeneralResult<Request> clone(const Request& request);
+nn::GeneralResult<RequestMemoryPool> clone(const RequestMemoryPool& requestPool);
+nn::GeneralResult<Model> clone(const Model& model);
+
+}  // namespace aidl::android::hardware::neuralnetworks::utils
+
+#endif  // ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_AIDL_UTILS_H
diff --git a/neuralnetworks/aidl/utils/src/Assertions.cpp b/neuralnetworks/aidl/utils/src/Assertions.cpp
new file mode 100644
index 0000000000..0e88091cfb
--- /dev/null
+++ b/neuralnetworks/aidl/utils/src/Assertions.cpp
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <aidl/android/hardware/neuralnetworks/DeviceType.h>
+#include <aidl/android/hardware/neuralnetworks/ErrorStatus.h>
+#include <aidl/android/hardware/neuralnetworks/ExecutionPreference.h>
+#include <aidl/android/hardware/neuralnetworks/FusedActivationFunc.h>
+#include <aidl/android/hardware/neuralnetworks/IDevice.h>
+#include <aidl/android/hardware/neuralnetworks/OperandLifeTime.h>
+#include <aidl/android/hardware/neuralnetworks/OperandType.h>
+#include <aidl/android/hardware/neuralnetworks/OperationType.h>
+#include <aidl/android/hardware/neuralnetworks/Priority.h>
+
+#include <ControlFlow.h>
+#include <nnapi/OperandTypes.h>
+#include <nnapi/OperationTypes.h>
+#include <nnapi/Types.h>
+#include <type_traits>
+
+namespace {
+
+#define COMPARE_ENUMS_TYPES(lhsType, rhsType)                                                   \
+    static_assert(                                                                              \
+            std::is_same_v<                                                                     \
+                    std::underlying_type_t<::aidl::android::hardware::neuralnetworks::lhsType>, \
+                    std::underlying_type_t<::android::nn::rhsType>>,                            \
+            "::aidl::android::hardware::neuralnetworks::" #lhsType                              \
+            " does not have the same underlying type as ::android::nn::" #rhsType)
+
+COMPARE_ENUMS_TYPES(OperandType, OperandType);
+COMPARE_ENUMS_TYPES(OperationType, OperationType);
+COMPARE_ENUMS_TYPES(Priority, Priority);
+COMPARE_ENUMS_TYPES(OperandLifeTime, Operand::LifeTime);
+COMPARE_ENUMS_TYPES(ErrorStatus, ErrorStatus);
+
+#undef COMPARE_ENUMS_TYPES
+
+#define COMPARE_ENUMS_FULL(lhsSymbol, rhsSymbol, lhsType, rhsType)                               \
+    static_assert(                                                                               \
+            static_cast<                                                                         \
+                    std::underlying_type_t<::aidl::android::hardware::neuralnetworks::lhsType>>( \
+                    ::aidl::android::hardware::neuralnetworks::lhsType::lhsSymbol) ==            \
+                    static_cast<std::underlying_type_t<::android::nn::rhsType>>(                 \
+                            ::android::nn::rhsType::rhsSymbol),                                  \
+            "::aidl::android::hardware::neuralnetworks::" #lhsType "::" #lhsSymbol               \
+            " does not match ::android::nn::" #rhsType "::" #rhsSymbol)
+
+#define COMPARE_ENUMS(symbol) COMPARE_ENUMS_FULL(symbol, symbol, OperandType, OperandType)
+
+COMPARE_ENUMS(FLOAT32);
+COMPARE_ENUMS(INT32);
+COMPARE_ENUMS(UINT32);
+COMPARE_ENUMS(TENSOR_FLOAT32);
+COMPARE_ENUMS(TENSOR_INT32);
+COMPARE_ENUMS(TENSOR_QUANT8_ASYMM);
+COMPARE_ENUMS(BOOL);
+COMPARE_ENUMS(TENSOR_QUANT16_SYMM);
+COMPARE_ENUMS(TENSOR_FLOAT16);
+COMPARE_ENUMS(TENSOR_BOOL8);
+COMPARE_ENUMS(FLOAT16);
+COMPARE_ENUMS(TENSOR_QUANT8_SYMM_PER_CHANNEL);
+COMPARE_ENUMS(TENSOR_QUANT16_ASYMM);
+COMPARE_ENUMS(TENSOR_QUANT8_SYMM);
+COMPARE_ENUMS(TENSOR_QUANT8_ASYMM_SIGNED);
+COMPARE_ENUMS(SUBGRAPH);
+
+#undef COMPARE_ENUMS
+
+#define COMPARE_ENUMS(symbol) COMPARE_ENUMS_FULL(symbol, symbol, OperationType, OperationType)
+
+COMPARE_ENUMS(ADD);
+COMPARE_ENUMS(AVERAGE_POOL_2D);
+COMPARE_ENUMS(CONCATENATION);
+COMPARE_ENUMS(CONV_2D);
+COMPARE_ENUMS(DEPTHWISE_CONV_2D);
+COMPARE_ENUMS(DEPTH_TO_SPACE);
+COMPARE_ENUMS(DEQUANTIZE);
+COMPARE_ENUMS(EMBEDDING_LOOKUP);
+COMPARE_ENUMS(FLOOR);
+COMPARE_ENUMS(FULLY_CONNECTED);
+COMPARE_ENUMS(HASHTABLE_LOOKUP);
+COMPARE_ENUMS(L2_NORMALIZATION);
+COMPARE_ENUMS(L2_POOL_2D);
+COMPARE_ENUMS(LOCAL_RESPONSE_NORMALIZATION);
+COMPARE_ENUMS(LOGISTIC);
+COMPARE_ENUMS(LSH_PROJECTION);
+COMPARE_ENUMS(LSTM);
+COMPARE_ENUMS(MAX_POOL_2D);
+COMPARE_ENUMS(MUL);
+COMPARE_ENUMS(RELU);
+COMPARE_ENUMS(RELU1);
+COMPARE_ENUMS(RELU6);
+COMPARE_ENUMS(RESHAPE);
+COMPARE_ENUMS(RESIZE_BILINEAR);
+COMPARE_ENUMS(RNN);
+COMPARE_ENUMS(SOFTMAX);
+COMPARE_ENUMS(SPACE_TO_DEPTH);
+COMPARE_ENUMS(SVDF);
+COMPARE_ENUMS(TANH);
+COMPARE_ENUMS(BATCH_TO_SPACE_ND);
+COMPARE_ENUMS(DIV);
+COMPARE_ENUMS(MEAN);
+COMPARE_ENUMS(PAD);
+COMPARE_ENUMS(SPACE_TO_BATCH_ND);
+COMPARE_ENUMS(SQUEEZE);
+COMPARE_ENUMS(STRIDED_SLICE);
+COMPARE_ENUMS(SUB);
+COMPARE_ENUMS(TRANSPOSE);
+COMPARE_ENUMS(ABS);
+COMPARE_ENUMS(ARGMAX);
+COMPARE_ENUMS(ARGMIN);
+COMPARE_ENUMS(AXIS_ALIGNED_BBOX_TRANSFORM);
+COMPARE_ENUMS(BIDIRECTIONAL_SEQUENCE_LSTM);
+COMPARE_ENUMS(BIDIRECTIONAL_SEQUENCE_RNN);
+COMPARE_ENUMS(BOX_WITH_NMS_LIMIT);
+COMPARE_ENUMS(CAST);
+COMPARE_ENUMS(CHANNEL_SHUFFLE);
+COMPARE_ENUMS(DETECTION_POSTPROCESSING);
+COMPARE_ENUMS(EQUAL);
+COMPARE_ENUMS(EXP);
+COMPARE_ENUMS(EXPAND_DIMS);
+COMPARE_ENUMS(GATHER);
+COMPARE_ENUMS(GENERATE_PROPOSALS);
+COMPARE_ENUMS(GREATER);
+COMPARE_ENUMS(GREATER_EQUAL);
+COMPARE_ENUMS(GROUPED_CONV_2D);
+COMPARE_ENUMS(HEATMAP_MAX_KEYPOINT);
+COMPARE_ENUMS(INSTANCE_NORMALIZATION);
+COMPARE_ENUMS(LESS);
+COMPARE_ENUMS(LESS_EQUAL);
+COMPARE_ENUMS(LOG);
+COMPARE_ENUMS(LOGICAL_AND);
+COMPARE_ENUMS(LOGICAL_NOT);
+COMPARE_ENUMS(LOGICAL_OR);
+COMPARE_ENUMS(LOG_SOFTMAX);
+COMPARE_ENUMS(MAXIMUM);
+COMPARE_ENUMS(MINIMUM);
+COMPARE_ENUMS(NEG);
+COMPARE_ENUMS(NOT_EQUAL);
+COMPARE_ENUMS(PAD_V2);
+COMPARE_ENUMS(POW);
+COMPARE_ENUMS(PRELU);
+COMPARE_ENUMS(QUANTIZE);
+COMPARE_ENUMS(QUANTIZED_16BIT_LSTM);
+COMPARE_ENUMS(RANDOM_MULTINOMIAL);
+COMPARE_ENUMS(REDUCE_ALL);
+COMPARE_ENUMS(REDUCE_ANY);
+COMPARE_ENUMS(REDUCE_MAX);
+COMPARE_ENUMS(REDUCE_MIN);
+COMPARE_ENUMS(REDUCE_PROD);
+COMPARE_ENUMS(REDUCE_SUM);
+COMPARE_ENUMS(ROI_ALIGN);
+COMPARE_ENUMS(ROI_POOLING);
+COMPARE_ENUMS(RSQRT);
+COMPARE_ENUMS(SELECT);
+COMPARE_ENUMS(SIN);
+COMPARE_ENUMS(SLICE);
+COMPARE_ENUMS(SPLIT);
+COMPARE_ENUMS(SQRT);
+COMPARE_ENUMS(TILE);
+COMPARE_ENUMS(TOPK_V2);
+COMPARE_ENUMS(TRANSPOSE_CONV_2D);
+COMPARE_ENUMS(UNIDIRECTIONAL_SEQUENCE_LSTM);
+COMPARE_ENUMS(UNIDIRECTIONAL_SEQUENCE_RNN);
+COMPARE_ENUMS(RESIZE_NEAREST_NEIGHBOR);
+COMPARE_ENUMS(QUANTIZED_LSTM);
+COMPARE_ENUMS(IF);
+COMPARE_ENUMS(WHILE);
+COMPARE_ENUMS(ELU);
+COMPARE_ENUMS(HARD_SWISH);
+COMPARE_ENUMS(FILL);
+COMPARE_ENUMS(RANK);
+
+#undef COMPARE_ENUMS
+
+#define COMPARE_ENUMS(symbol) COMPARE_ENUMS_FULL(symbol, symbol, Priority, Priority)
+
+COMPARE_ENUMS(LOW);
+COMPARE_ENUMS(MEDIUM);
+COMPARE_ENUMS(HIGH);
+
+#undef COMPARE_ENUMS
+
+#define COMPARE_ENUMS(lhsSymbol, rhsSymbol) \
+    COMPARE_ENUMS_FULL(lhsSymbol, rhsSymbol, OperandLifeTime, Operand::LifeTime)
+
+COMPARE_ENUMS(TEMPORARY_VARIABLE, TEMPORARY_VARIABLE);
+COMPARE_ENUMS(SUBGRAPH_INPUT, SUBGRAPH_INPUT);
+COMPARE_ENUMS(SUBGRAPH_OUTPUT, SUBGRAPH_OUTPUT);
+COMPARE_ENUMS(CONSTANT_COPY, CONSTANT_COPY);
+COMPARE_ENUMS(CONSTANT_POOL, CONSTANT_REFERENCE);
+COMPARE_ENUMS(NO_VALUE, NO_VALUE);
+COMPARE_ENUMS(SUBGRAPH, SUBGRAPH);
+
+#undef COMPARE_ENUMS
+
+#define COMPARE_ENUMS(symbol) COMPARE_ENUMS_FULL(symbol, symbol, ErrorStatus, ErrorStatus)
+
+COMPARE_ENUMS(NONE);
+COMPARE_ENUMS(DEVICE_UNAVAILABLE);
+COMPARE_ENUMS(GENERAL_FAILURE);
+COMPARE_ENUMS(OUTPUT_INSUFFICIENT_SIZE);
+COMPARE_ENUMS(INVALID_ARGUMENT);
+COMPARE_ENUMS(MISSED_DEADLINE_TRANSIENT);
+COMPARE_ENUMS(MISSED_DEADLINE_PERSISTENT);
+COMPARE_ENUMS(RESOURCE_EXHAUSTED_TRANSIENT);
+COMPARE_ENUMS(RESOURCE_EXHAUSTED_PERSISTENT);
+
+#undef COMPARE_ENUMS
+
+#define COMPARE_ENUMS(symbol) \
+    COMPARE_ENUMS_FULL(symbol, symbol, ExecutionPreference, ExecutionPreference)
+
+COMPARE_ENUMS(LOW_POWER);
+COMPARE_ENUMS(FAST_SINGLE_ANSWER);
+COMPARE_ENUMS(SUSTAINED_SPEED);
+
+#undef COMPARE_ENUMS
+
+#define COMPARE_ENUMS(symbol) COMPARE_ENUMS_FULL(symbol, symbol, DeviceType, DeviceType)
+
+COMPARE_ENUMS(OTHER);
+COMPARE_ENUMS(CPU);
+COMPARE_ENUMS(GPU);
+COMPARE_ENUMS(ACCELERATOR);
+
+#undef COMPARE_ENUMS
+
+#define COMPARE_ENUMS(symbol) \
+    COMPARE_ENUMS_FULL(symbol, symbol, FusedActivationFunc, FusedActivationFunc)
+
+COMPARE_ENUMS(NONE);
+COMPARE_ENUMS(RELU);
+COMPARE_ENUMS(RELU1);
+COMPARE_ENUMS(RELU6);
+
+#undef COMPARE_ENUMS
+
+#undef COMPARE_ENUMS_FULL
+
+#define COMPARE_CONSTANTS(halSymbol, canonicalSymbol)                     \
+    static_assert(::aidl::android::hardware::neuralnetworks::halSymbol == \
+                  ::android::nn::canonicalSymbol);
+
+COMPARE_CONSTANTS(IDevice::BYTE_SIZE_OF_CACHE_TOKEN, kByteSizeOfCacheToken);
+COMPARE_CONSTANTS(IDevice::MAX_NUMBER_OF_CACHE_FILES, kMaxNumberOfCacheFiles);
+COMPARE_CONSTANTS(IDevice::EXTENSION_TYPE_HIGH_BITS_PREFIX, kExtensionPrefixBits - 1);
+COMPARE_CONSTANTS(IDevice::EXTENSION_TYPE_LOW_BITS_TYPE, kExtensionTypeBits);
+COMPARE_CONSTANTS(IPreparedModel::DEFAULT_LOOP_TIMEOUT_DURATION_NS,
+                  operation_while::kTimeoutNsDefault);
+COMPARE_CONSTANTS(IPreparedModel::MAXIMUM_LOOP_TIMEOUT_DURATION_NS,
+                  operation_while::kTimeoutNsMaximum);
+
+#undef COMPARE_CONSTANTS
+
+}  // anonymous namespace
diff --git a/neuralnetworks/aidl/utils/src/Conversions.cpp b/neuralnetworks/aidl/utils/src/Conversions.cpp
new file mode 100644
index 0000000000..0e93b02a1e
--- /dev/null
+++ b/neuralnetworks/aidl/utils/src/Conversions.cpp
@@ -0,0 +1,582 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Conversions.h"
+
+#include <aidl/android/hardware/common/NativeHandle.h>
+#include <android-base/logging.h>
+#include <nnapi/OperandTypes.h>
+#include <nnapi/OperationTypes.h>
+#include <nnapi/Result.h>
+#include <nnapi/SharedMemory.h>
+#include <nnapi/TypeUtils.h>
+#include <nnapi/Types.h>
+#include <nnapi/Validation.h>
+#include <nnapi/hal/CommonUtils.h>
+#include <nnapi/hal/HandleError.h>
+
+#include <algorithm>
+#include <chrono>
+#include <functional>
+#include <iterator>
+#include <limits>
+#include <type_traits>
+#include <utility>
+
+#define VERIFY_NON_NEGATIVE(value) \
+    while (UNLIKELY(value < 0)) return NN_ERROR()
+
+namespace {
+
+template <typename Type>
+constexpr std::underlying_type_t<Type> underlyingType(Type value) {
+    return static_cast<std::underlying_type_t<Type>>(value);
+}
+
+constexpr auto kVersion = android::nn::Version::ANDROID_S;
+
+}  // namespace
+
+namespace android::nn {
+namespace {
+
+constexpr auto validOperandType(nn::OperandType operandType) {
+    switch (operandType) {
+        case nn::OperandType::FLOAT32:
+        case nn::OperandType::INT32:
+        case nn::OperandType::UINT32:
+        case nn::OperandType::TENSOR_FLOAT32:
+        case nn::OperandType::TENSOR_INT32:
+        case nn::OperandType::TENSOR_QUANT8_ASYMM:
+        case nn::OperandType::BOOL:
+        case nn::OperandType::TENSOR_QUANT16_SYMM:
+        case nn::OperandType::TENSOR_FLOAT16:
+        case nn::OperandType::TENSOR_BOOL8:
+        case nn::OperandType::FLOAT16:
+        case nn::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
+        case nn::OperandType::TENSOR_QUANT16_ASYMM:
+        case nn::OperandType::TENSOR_QUANT8_SYMM:
+        case nn::OperandType::TENSOR_QUANT8_ASYMM_SIGNED:
+        case nn::OperandType::SUBGRAPH:
+            return true;
+        case nn::OperandType::OEM:
+        case nn::OperandType::TENSOR_OEM_BYTE:
+            return false;
+    }
+    return nn::isExtension(operandType);
+}
+
+template <typename Input>
+using UnvalidatedConvertOutput =
+        std::decay_t<decltype(unvalidatedConvert(std::declval<Input>()).value())>;
+
+template <typename Type>
+GeneralResult<std::vector<UnvalidatedConvertOutput<Type>>> unvalidatedConvertVec(
+        const std::vector<Type>& arguments) {
+    std::vector<UnvalidatedConvertOutput<Type>> canonical;
+    canonical.reserve(arguments.size());
+    for (const auto& argument : arguments) {
+        canonical.push_back(NN_TRY(nn::unvalidatedConvert(argument)));
+    }
+    return canonical;
+}
+
+template <typename Type>
+GeneralResult<std::vector<UnvalidatedConvertOutput<Type>>> unvalidatedConvert(
+        const std::vector<Type>& arguments) {
+    return unvalidatedConvertVec(arguments);
+}
+
+template <typename Type>
+GeneralResult<UnvalidatedConvertOutput<Type>> validatedConvert(const Type& halObject) {
+    auto canonical = NN_TRY(nn::unvalidatedConvert(halObject));
+    const auto maybeVersion = validate(canonical);
+    if (!maybeVersion.has_value()) {
+        return error() << maybeVersion.error();
+    }
+    const auto version = maybeVersion.value();
+    if (version > kVersion) {
+        return NN_ERROR() << "Insufficient version: " << version << " vs required " << kVersion;
+    }
+    return canonical;
+}
+
+template <typename Type>
+GeneralResult<std::vector<UnvalidatedConvertOutput<Type>>> validatedConvert(
+        const std::vector<Type>& arguments) {
+    std::vector<UnvalidatedConvertOutput<Type>> canonical;
+    canonical.reserve(arguments.size());
+    for (const auto& argument : arguments) {
+        canonical.push_back(NN_TRY(validatedConvert(argument)));
+    }
+    return canonical;
+}
+
+}  // anonymous namespace
+
+GeneralResult<OperandType> unvalidatedConvert(const aidl_hal::OperandType& operandType) {
+    VERIFY_NON_NEGATIVE(underlyingType(operandType)) << "Negative operand types are not allowed.";
+    return static_cast<OperandType>(operandType);
+}
+
+GeneralResult<OperationType> unvalidatedConvert(const aidl_hal::OperationType& operationType) {
+    VERIFY_NON_NEGATIVE(underlyingType(operationType))
+            << "Negative operation types are not allowed.";
+    return static_cast<OperationType>(operationType);
+}
+
+GeneralResult<DeviceType> unvalidatedConvert(const aidl_hal::DeviceType& deviceType) {
+    return static_cast<DeviceType>(deviceType);
+}
+
+GeneralResult<Priority> unvalidatedConvert(const aidl_hal::Priority& priority) {
+    return static_cast<Priority>(priority);
+}
+
+GeneralResult<Capabilities> unvalidatedConvert(const aidl_hal::Capabilities& capabilities) {
+    const bool validOperandTypes = std::all_of(
+            capabilities.operandPerformance.begin(), capabilities.operandPerformance.end(),
+            [](const aidl_hal::OperandPerformance& operandPerformance) {
+                const auto maybeType = unvalidatedConvert(operandPerformance.type);
+                return !maybeType.has_value() ? false : validOperandType(maybeType.value());
+            });
+    if (!validOperandTypes) {
+        return NN_ERROR() << "Invalid OperandType when unvalidatedConverting OperandPerformance in "
+                             "Capabilities";
+    }
+
+    auto operandPerformance = NN_TRY(unvalidatedConvert(capabilities.operandPerformance));
+    auto table = NN_TRY(hal::utils::makeGeneralFailure(
+            Capabilities::OperandPerformanceTable::create(std::move(operandPerformance)),
+            nn::ErrorStatus::GENERAL_FAILURE));
+
+    return Capabilities{
+            .relaxedFloat32toFloat16PerformanceScalar = NN_TRY(
+                    unvalidatedConvert(capabilities.relaxedFloat32toFloat16PerformanceScalar)),
+            .relaxedFloat32toFloat16PerformanceTensor = NN_TRY(
+                    unvalidatedConvert(capabilities.relaxedFloat32toFloat16PerformanceTensor)),
+            .operandPerformance = std::move(table),
+            .ifPerformance = NN_TRY(unvalidatedConvert(capabilities.ifPerformance)),
+            .whilePerformance = NN_TRY(unvalidatedConvert(capabilities.whilePerformance)),
+    };
+}
+
+GeneralResult<Capabilities::OperandPerformance> unvalidatedConvert(
+        const aidl_hal::OperandPerformance& operandPerformance) {
+    return Capabilities::OperandPerformance{
+            .type = NN_TRY(unvalidatedConvert(operandPerformance.type)),
+            .info = NN_TRY(unvalidatedConvert(operandPerformance.info)),
+    };
+}
+
+GeneralResult<Capabilities::PerformanceInfo> unvalidatedConvert(
+        const aidl_hal::PerformanceInfo& performanceInfo) {
+    return Capabilities::PerformanceInfo{
+            .execTime = performanceInfo.execTime,
+            .powerUsage = performanceInfo.powerUsage,
+    };
+}
+
+GeneralResult<DataLocation> unvalidatedConvert(const aidl_hal::DataLocation& location) {
+    VERIFY_NON_NEGATIVE(location.poolIndex) << "DataLocation: pool index must not be negative";
+    VERIFY_NON_NEGATIVE(location.offset) << "DataLocation: offset must not be negative";
+    VERIFY_NON_NEGATIVE(location.length) << "DataLocation: length must not be negative";
+    if (location.offset > std::numeric_limits<uint32_t>::max()) {
+        return NN_ERROR() << "DataLocation: offset must be <= std::numeric_limits<uint32_t>::max()";
+    }
+    if (location.length > std::numeric_limits<uint32_t>::max()) {
+        return NN_ERROR() << "DataLocation: length must be <= std::numeric_limits<uint32_t>::max()";
+    }
+    return DataLocation{
+            .poolIndex = static_cast<uint32_t>(location.poolIndex),
+            .offset = static_cast<uint32_t>(location.offset),
+            .length = static_cast<uint32_t>(location.length),
+    };
+}
+
+GeneralResult<Operation> unvalidatedConvert(const aidl_hal::Operation& operation) {
+    return Operation{
+            .type = NN_TRY(unvalidatedConvert(operation.type)),
+            .inputs = NN_TRY(toUnsigned(operation.inputs)),
+            .outputs = NN_TRY(toUnsigned(operation.outputs)),
+    };
+}
+
+GeneralResult<Operand::LifeTime> unvalidatedConvert(
+        const aidl_hal::OperandLifeTime& operandLifeTime) {
+    return static_cast<Operand::LifeTime>(operandLifeTime);
+}
+
+GeneralResult<Operand> unvalidatedConvert(const aidl_hal::Operand& operand) {
+    return Operand{
+            .type = NN_TRY(unvalidatedConvert(operand.type)),
+            .dimensions = NN_TRY(toUnsigned(operand.dimensions)),
+            .scale = operand.scale,
+            .zeroPoint = operand.zeroPoint,
+            .lifetime = NN_TRY(unvalidatedConvert(operand.lifetime)),
+            .location = NN_TRY(unvalidatedConvert(operand.location)),
+            .extraParams = NN_TRY(unvalidatedConvert(operand.extraParams)),
+    };
+}
+
+GeneralResult<Operand::ExtraParams> unvalidatedConvert(
+        const std::optional<aidl_hal::OperandExtraParams>& optionalExtraParams) {
+    if (!optionalExtraParams.has_value()) {
+        return Operand::NoParams{};
+    }
+    const auto& extraParams = optionalExtraParams.value();
+    using Tag = aidl_hal::OperandExtraParams::Tag;
+    switch (extraParams.getTag()) {
+        case Tag::channelQuant:
+            return unvalidatedConvert(extraParams.get<Tag::channelQuant>());
+        case Tag::extension:
+            return extraParams.get<Tag::extension>();
+    }
+    return NN_ERROR() << "Unrecognized Operand::ExtraParams tag: "
+                      << underlyingType(extraParams.getTag());
+}
+
+GeneralResult<Operand::SymmPerChannelQuantParams> unvalidatedConvert(
+        const aidl_hal::SymmPerChannelQuantParams& symmPerChannelQuantParams) {
+    VERIFY_NON_NEGATIVE(symmPerChannelQuantParams.channelDim)
+            << "Per-channel quantization channel dimension must not be negative.";
+    return Operand::SymmPerChannelQuantParams{
+            .scales = symmPerChannelQuantParams.scales,
+            .channelDim = static_cast<uint32_t>(symmPerChannelQuantParams.channelDim),
+    };
+}
+
+GeneralResult<Model> unvalidatedConvert(const aidl_hal::Model& model) {
+    return Model{
+            .main = NN_TRY(unvalidatedConvert(model.main)),
+            .referenced = NN_TRY(unvalidatedConvert(model.referenced)),
+            .operandValues = NN_TRY(unvalidatedConvert(model.operandValues)),
+            .pools = NN_TRY(unvalidatedConvert(model.pools)),
+            .relaxComputationFloat32toFloat16 = model.relaxComputationFloat32toFloat16,
+            .extensionNameToPrefix = NN_TRY(unvalidatedConvert(model.extensionNameToPrefix)),
+    };
+}
+
+GeneralResult<Model::Subgraph> unvalidatedConvert(const aidl_hal::Subgraph& subgraph) {
+    return Model::Subgraph{
+            .operands = NN_TRY(unvalidatedConvert(subgraph.operands)),
+            .operations = NN_TRY(unvalidatedConvert(subgraph.operations)),
+            .inputIndexes = NN_TRY(toUnsigned(subgraph.inputIndexes)),
+            .outputIndexes = NN_TRY(toUnsigned(subgraph.outputIndexes)),
+    };
+}
+
+GeneralResult<Model::ExtensionNameAndPrefix> unvalidatedConvert(
+        const aidl_hal::ExtensionNameAndPrefix& extensionNameAndPrefix) {
+    return Model::ExtensionNameAndPrefix{
+            .name = extensionNameAndPrefix.name,
+            .prefix = extensionNameAndPrefix.prefix,
+    };
+}
+
+GeneralResult<Extension> unvalidatedConvert(const aidl_hal::Extension& extension) {
+    return Extension{
+            .name = extension.name,
+            .operandTypes = NN_TRY(unvalidatedConvert(extension.operandTypes)),
+    };
+}
+
+GeneralResult<Extension::OperandTypeInformation> unvalidatedConvert(
+        const aidl_hal::ExtensionOperandTypeInformation& operandTypeInformation) {
+    VERIFY_NON_NEGATIVE(operandTypeInformation.byteSize)
+            << "Extension operand type byte size must not be negative";
+    return Extension::OperandTypeInformation{
+            .type = operandTypeInformation.type,
+            .isTensor = operandTypeInformation.isTensor,
+            .byteSize = static_cast<uint32_t>(operandTypeInformation.byteSize),
+    };
+}
+
+GeneralResult<OutputShape> unvalidatedConvert(const aidl_hal::OutputShape& outputShape) {
+    return OutputShape{
+            .dimensions = NN_TRY(toUnsigned(outputShape.dimensions)),
+            .isSufficient = outputShape.isSufficient,
+    };
+}
+
+GeneralResult<MeasureTiming> unvalidatedConvert(bool measureTiming) {
+    return measureTiming ? MeasureTiming::YES : MeasureTiming::NO;
+}
+
+GeneralResult<Memory> unvalidatedConvert(const aidl_hal::Memory& memory) {
+    VERIFY_NON_NEGATIVE(memory.size) << "Memory size must not be negative";
+    return Memory{
+            .handle = NN_TRY(unvalidatedConvert(memory.handle)),
+            .size = static_cast<uint32_t>(memory.size),
+            .name = memory.name,
+    };
+}
+
+GeneralResult<Model::OperandValues> unvalidatedConvert(const std::vector<uint8_t>& operandValues) {
+    return Model::OperandValues(operandValues.data(), operandValues.size());
+}
+
+GeneralResult<BufferDesc> unvalidatedConvert(const aidl_hal::BufferDesc& bufferDesc) {
+    return BufferDesc{.dimensions = NN_TRY(toUnsigned(bufferDesc.dimensions))};
+}
+
+GeneralResult<BufferRole> unvalidatedConvert(const aidl_hal::BufferRole& bufferRole) {
+    VERIFY_NON_NEGATIVE(bufferRole.modelIndex) << "BufferRole: modelIndex must not be negative";
+    VERIFY_NON_NEGATIVE(bufferRole.ioIndex) << "BufferRole: ioIndex must not be negative";
+    return BufferRole{
+            .modelIndex = static_cast<uint32_t>(bufferRole.modelIndex),
+            .ioIndex = static_cast<uint32_t>(bufferRole.ioIndex),
+            .frequency = bufferRole.frequency,
+    };
+}
+
+GeneralResult<Request> unvalidatedConvert(const aidl_hal::Request& request) {
+    return Request{
+            .inputs = NN_TRY(unvalidatedConvert(request.inputs)),
+            .outputs = NN_TRY(unvalidatedConvert(request.outputs)),
+            .pools = NN_TRY(unvalidatedConvert(request.pools)),
+    };
+}
+
+GeneralResult<Request::Argument> unvalidatedConvert(const aidl_hal::RequestArgument& argument) {
+    const auto lifetime = argument.hasNoValue ? Request::Argument::LifeTime::NO_VALUE
+                                              : Request::Argument::LifeTime::POOL;
+    return Request::Argument{
+            .lifetime = lifetime,
+            .location = NN_TRY(unvalidatedConvert(argument.location)),
+            .dimensions = NN_TRY(toUnsigned(argument.dimensions)),
+    };
+}
+
+GeneralResult<Request::MemoryPool> unvalidatedConvert(
+        const aidl_hal::RequestMemoryPool& memoryPool) {
+    using Tag = aidl_hal::RequestMemoryPool::Tag;
+    switch (memoryPool.getTag()) {
+        case Tag::pool:
+            return unvalidatedConvert(memoryPool.get<Tag::pool>());
+        case Tag::token: {
+            const auto token = memoryPool.get<Tag::token>();
+            VERIFY_NON_NEGATIVE(token) << "Memory pool token must not be negative";
+            return static_cast<Request::MemoryDomainToken>(token);
+        }
+    }
+    return NN_ERROR() << "Invalid Request::MemoryPool tag " << underlyingType(memoryPool.getTag());
+}
+
+GeneralResult<ErrorStatus> unvalidatedConvert(const aidl_hal::ErrorStatus& status) {
+    switch (status) {
+        case aidl_hal::ErrorStatus::NONE:
+        case aidl_hal::ErrorStatus::DEVICE_UNAVAILABLE:
+        case aidl_hal::ErrorStatus::GENERAL_FAILURE:
+        case aidl_hal::ErrorStatus::OUTPUT_INSUFFICIENT_SIZE:
+        case aidl_hal::ErrorStatus::INVALID_ARGUMENT:
+        case aidl_hal::ErrorStatus::MISSED_DEADLINE_TRANSIENT:
+        case aidl_hal::ErrorStatus::MISSED_DEADLINE_PERSISTENT:
+        case aidl_hal::ErrorStatus::RESOURCE_EXHAUSTED_TRANSIENT:
+        case aidl_hal::ErrorStatus::RESOURCE_EXHAUSTED_PERSISTENT:
+            return static_cast<ErrorStatus>(status);
+    }
+    return NN_ERROR() << "Invalid ErrorStatus " << underlyingType(status);
+}
+
+GeneralResult<ExecutionPreference> unvalidatedConvert(
+        const aidl_hal::ExecutionPreference& executionPreference) {
+    return static_cast<ExecutionPreference>(executionPreference);
+}
+
+GeneralResult<SharedHandle> unvalidatedConvert(
+        const ::aidl::android::hardware::common::NativeHandle& aidlNativeHandle) {
+    std::vector<base::unique_fd> fds;
+    fds.reserve(aidlNativeHandle.fds.size());
+    for (const auto& fd : aidlNativeHandle.fds) {
+        int dupFd = dup(fd.get());
+        if (dupFd == -1) {
+            // TODO(b/120417090): is ANEURALNETWORKS_UNEXPECTED_NULL the correct error to return
+            // here?
+            return NN_ERROR() << "Failed to dup the fd";
+        }
+        fds.emplace_back(dupFd);
+    }
+
+    return std::make_shared<const Handle>(Handle{
+            .fds = std::move(fds),
+            .ints = aidlNativeHandle.ints,
+    });
+}
+
+GeneralResult<ExecutionPreference> convert(
+        const aidl_hal::ExecutionPreference& executionPreference) {
+    return validatedConvert(executionPreference);
+}
+
+GeneralResult<Memory> convert(const aidl_hal::Memory& operand) {
+    return validatedConvert(operand);
+}
+
+GeneralResult<Model> convert(const aidl_hal::Model& model) {
+    return validatedConvert(model);
+}
+
+GeneralResult<Operand> convert(const aidl_hal::Operand& operand) {
+    return unvalidatedConvert(operand);
+}
+
+GeneralResult<OperandType> convert(const aidl_hal::OperandType& operandType) {
+    return unvalidatedConvert(operandType);
+}
+
+GeneralResult<Priority> convert(const aidl_hal::Priority& priority) {
+    return validatedConvert(priority);
+}
+
+GeneralResult<Request::MemoryPool> convert(const aidl_hal::RequestMemoryPool& memoryPool) {
+    return unvalidatedConvert(memoryPool);
+}
+
+GeneralResult<Request> convert(const aidl_hal::Request& request) {
+    return validatedConvert(request);
+}
+
+GeneralResult<std::vector<Operation>> convert(const std::vector<aidl_hal::Operation>& operations) {
+    return unvalidatedConvert(operations);
+}
+
+GeneralResult<std::vector<Memory>> convert(const std::vector<aidl_hal::Memory>& memories) {
+    return validatedConvert(memories);
+}
+
+GeneralResult<std::vector<uint32_t>> toUnsigned(const std::vector<int32_t>& vec) {
+    if (!std::all_of(vec.begin(), vec.end(), [](int32_t v) { return v >= 0; })) {
+        return NN_ERROR() << "Negative value passed to conversion from signed to unsigned";
+    }
+    return std::vector<uint32_t>(vec.begin(), vec.end());
+}
+
+}  // namespace android::nn
+
+namespace aidl::android::hardware::neuralnetworks::utils {
+namespace {
+
+template <typename Input>
+using UnvalidatedConvertOutput =
+        std::decay_t<decltype(unvalidatedConvert(std::declval<Input>()).value())>;
+
+template <typename Type>
+nn::GeneralResult<std::vector<UnvalidatedConvertOutput<Type>>> unvalidatedConvertVec(
+        const std::vector<Type>& arguments) {
+    std::vector<UnvalidatedConvertOutput<Type>> halObject(arguments.size());
+    for (size_t i = 0; i < arguments.size(); ++i) {
+        halObject[i] = NN_TRY(unvalidatedConvert(arguments[i]));
+    }
+    return halObject;
+}
+
+template <typename Type>
+nn::GeneralResult<UnvalidatedConvertOutput<Type>> validatedConvert(const Type& canonical) {
+    const auto maybeVersion = nn::validate(canonical);
+    if (!maybeVersion.has_value()) {
+        return nn::error() << maybeVersion.error();
+    }
+    const auto version = maybeVersion.value();
+    if (version > kVersion) {
+        return NN_ERROR() << "Insufficient version: " << version << " vs required " << kVersion;
+    }
+    return utils::unvalidatedConvert(canonical);
+}
+
+template <typename Type>
+nn::GeneralResult<std::vector<UnvalidatedConvertOutput<Type>>> validatedConvert(
+        const std::vector<Type>& arguments) {
+    std::vector<UnvalidatedConvertOutput<Type>> halObject(arguments.size());
+    for (size_t i = 0; i < arguments.size(); ++i) {
+        halObject[i] = NN_TRY(validatedConvert(arguments[i]));
+    }
+    return halObject;
+}
+
+}  // namespace
+
+nn::GeneralResult<common::NativeHandle> unvalidatedConvert(const nn::SharedHandle& sharedHandle) {
+    common::NativeHandle aidlNativeHandle;
+    aidlNativeHandle.fds.reserve(sharedHandle->fds.size());
+    for (const auto& fd : sharedHandle->fds) {
+        int dupFd = dup(fd.get());
+        if (dupFd == -1) {
+            // TODO(b/120417090): is ANEURALNETWORKS_UNEXPECTED_NULL the correct error to return
+            // here?
+            return NN_ERROR() << "Failed to dup the fd";
+        }
+        aidlNativeHandle.fds.emplace_back(dupFd);
+    }
+    aidlNativeHandle.ints = sharedHandle->ints;
+    return aidlNativeHandle;
+}
+
+nn::GeneralResult<Memory> unvalidatedConvert(const nn::Memory& memory) {
+    if (memory.size > std::numeric_limits<int64_t>::max()) {
+        return NN_ERROR() << "Memory size doesn't fit into int64_t.";
+    }
+    return Memory{
+            .handle = NN_TRY(unvalidatedConvert(memory.handle)),
+            .size = static_cast<int64_t>(memory.size),
+            .name = memory.name,
+    };
+}
+
+nn::GeneralResult<ErrorStatus> unvalidatedConvert(const nn::ErrorStatus& errorStatus) {
+    switch (errorStatus) {
+        case nn::ErrorStatus::NONE:
+        case nn::ErrorStatus::DEVICE_UNAVAILABLE:
+        case nn::ErrorStatus::GENERAL_FAILURE:
+        case nn::ErrorStatus::OUTPUT_INSUFFICIENT_SIZE:
+        case nn::ErrorStatus::INVALID_ARGUMENT:
+        case nn::ErrorStatus::MISSED_DEADLINE_TRANSIENT:
+        case nn::ErrorStatus::MISSED_DEADLINE_PERSISTENT:
+        case nn::ErrorStatus::RESOURCE_EXHAUSTED_TRANSIENT:
+        case nn::ErrorStatus::RESOURCE_EXHAUSTED_PERSISTENT:
+            return static_cast<ErrorStatus>(errorStatus);
+        default:
+            return ErrorStatus::GENERAL_FAILURE;
+    }
+}
+
+nn::GeneralResult<OutputShape> unvalidatedConvert(const nn::OutputShape& outputShape) {
+    return OutputShape{.dimensions = NN_TRY(toSigned(outputShape.dimensions)),
+                       .isSufficient = outputShape.isSufficient};
+}
+
+nn::GeneralResult<Memory> convert(const nn::Memory& memory) {
+    return validatedConvert(memory);
+}
+
+nn::GeneralResult<ErrorStatus> convert(const nn::ErrorStatus& errorStatus) {
+    return validatedConvert(errorStatus);
+}
+
+nn::GeneralResult<std::vector<OutputShape>> convert(
+        const std::vector<nn::OutputShape>& outputShapes) {
+    return validatedConvert(outputShapes);
+}
+
+nn::GeneralResult<std::vector<int32_t>> toSigned(const std::vector<uint32_t>& vec) {
+    if (!std::all_of(vec.begin(), vec.end(),
+                     [](uint32_t v) { return v <= std::numeric_limits<int32_t>::max(); })) {
+        return NN_ERROR() << "Vector contains a value that doesn't fit into int32_t.";
+    }
+    return std::vector<int32_t>(vec.begin(), vec.end());
+}
+
+}  // namespace aidl::android::hardware::neuralnetworks::utils
diff --git a/neuralnetworks/aidl/utils/src/Utils.cpp b/neuralnetworks/aidl/utils/src/Utils.cpp
new file mode 100644
index 0000000000..8d00e5926a
--- /dev/null
+++ b/neuralnetworks/aidl/utils/src/Utils.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Utils.h"
+
+#include <nnapi/Result.h>
+
+namespace aidl::android::hardware::neuralnetworks::utils {
+namespace {
+
+using ::android::nn::GeneralResult;
+
+template <typename Type>
+nn::GeneralResult<std::vector<Type>> cloneVec(const std::vector<Type>& arguments) {
+    std::vector<Type> clonedObjects;
+    clonedObjects.reserve(arguments.size());
+    for (const auto& argument : arguments) {
+        clonedObjects.push_back(NN_TRY(clone(argument)));
+    }
+    return clonedObjects;
+}
+
+template <typename Type>
+GeneralResult<std::vector<Type>> clone(const std::vector<Type>& arguments) {
+    return cloneVec(arguments);
+}
+
+}  // namespace
+
+GeneralResult<Memory> clone(const Memory& memory) {
+    common::NativeHandle nativeHandle;
+    nativeHandle.ints = memory.handle.ints;
+    nativeHandle.fds.reserve(memory.handle.fds.size());
+    for (const auto& fd : memory.handle.fds) {
+        const int newFd = dup(fd.get());
+        if (newFd < 0) {
+            return NN_ERROR() << "Couldn't dup a file descriptor";
+        }
+        nativeHandle.fds.emplace_back(newFd);
+    }
+    return Memory{
+            .handle = std::move(nativeHandle),
+            .size = memory.size,
+            .name = memory.name,
+    };
+}
+
+GeneralResult<RequestMemoryPool> clone(const RequestMemoryPool& requestPool) {
+    using Tag = RequestMemoryPool::Tag;
+    switch (requestPool.getTag()) {
+        case Tag::pool:
+            return RequestMemoryPool::make<Tag::pool>(NN_TRY(clone(requestPool.get<Tag::pool>())));
+        case Tag::token:
+            return RequestMemoryPool::make<Tag::token>(requestPool.get<Tag::token>());
+    }
+    // Using explicit type conversion because std::variant inside the RequestMemoryPool confuses the
+    // compiler.
+    return (NN_ERROR() << "Unrecognized request pool tag: " << requestPool.getTag())
+            .
+            operator GeneralResult<RequestMemoryPool>();
+}
+
+GeneralResult<Request> clone(const Request& request) {
+    return Request{
+            .inputs = request.inputs,
+            .outputs = request.outputs,
+            .pools = NN_TRY(clone(request.pools)),
+    };
+}
+
+GeneralResult<Model> clone(const Model& model) {
+    return Model{
+            .main = model.main,
+            .referenced = model.referenced,
+            .operandValues = model.operandValues,
+            .pools = NN_TRY(clone(model.pools)),
+            .relaxComputationFloat32toFloat16 = model.relaxComputationFloat32toFloat16,
+            .extensionNameToPrefix = model.extensionNameToPrefix,
+    };
+}
+
+}  // namespace aidl::android::hardware::neuralnetworks::utils
diff --git a/neuralnetworks/aidl/vts/OWNERS b/neuralnetworks/aidl/vts/OWNERS
new file mode 100644
index 0000000000..6719a5b3a2
--- /dev/null
+++ b/neuralnetworks/aidl/vts/OWNERS
@@ -0,0 +1,12 @@
+# Neuralnetworks team
+butlermichael@google.com
+dgross@google.com
+jeanluc@google.com
+levp@google.com
+miaowang@google.com
+mikie@google.com
+mks@google.com
+pszczepaniak@google.com
+slavash@google.com
+vddang@google.com
+xusongw@google.com
diff --git a/neuralnetworks/aidl/vts/functional/Android.bp b/neuralnetworks/aidl/vts/functional/Android.bp
new file mode 100644
index 0000000000..aa7afbf6a7
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/Android.bp
@@ -0,0 +1,68 @@
+//
+// Copyright (C) 2021 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+cc_test {
+    name: "VtsHalNeuralnetworksTargetTest",
+    defaults: [
+        "neuralnetworks_vts_functional_defaults",
+        "use_libaidlvintf_gtest_helper_static",
+    ],
+    srcs: [
+        "BasicTests.cpp",
+        "Callbacks.cpp",
+        "CompilationCachingTests.cpp",
+        "GeneratedTestHarness.cpp",
+        "MemoryDomainTests.cpp",
+        "QualityOfServiceTests.cpp",
+        "TestAssertions.cpp",
+        "TestMain.cpp",
+        "Utils.cpp",
+        "ValidateModel.cpp",
+        "ValidateRequest.cpp",
+        "VtsHalNeuralnetworks.cpp",
+    ],
+    shared_libs: [
+        "libbinder_ndk",
+        "libnativewindow",
+        "libvndksupport",
+    ],
+    static_libs: [
+        "android.hardware.common-V2-ndk_platform",
+        "android.hardware.neuralnetworks-V1-ndk_platform",
+        "android.hidl.allocator@1.0",
+        "android.hidl.memory@1.0",
+        "libgmock",
+        "libhidlmemory",
+        "libneuralnetworks_generated_test_harness",
+        "libneuralnetworks_utils",
+        "libsync",
+        "neuralnetworks_utils_hal_aidl",
+    ],
+    whole_static_libs: [
+        "neuralnetworks_generated_V1_0_example",
+        "neuralnetworks_generated_V1_1_example",
+        "neuralnetworks_generated_V1_2_example",
+        "neuralnetworks_generated_V1_3_example",
+    ],
+    header_libs: [
+        "libbase_headers",
+        "libneuralnetworks_headers",
+    ],
+    test_suites: [
+        "general-tests",
+        "vts",
+    ],
+}
diff --git a/neuralnetworks/aidl/vts/functional/AndroidTest.xml b/neuralnetworks/aidl/vts/functional/AndroidTest.xml
new file mode 100644
index 0000000000..384d42078f
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/AndroidTest.xml
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Copyright (C) 2020 The Android Open Source Project
+
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+          http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS,
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+-->
+<configuration description="Runs VtsHalNeuralnetworksTargetTest.">
+    <option name="test-suite-tag" value="apct" />
+    <option name="test-suite-tag" value="apct-native" />
+
+    <target_preparer class="com.android.tradefed.targetprep.RootTargetPreparer">
+    </target_preparer>
+
+    <target_preparer class="com.android.tradefed.targetprep.PushFilePreparer">
+        <option name="cleanup" value="true" />
+        <option name="push" value="VtsHalNeuralnetworksTargetTest->/data/local/tmp/VtsHalNeuralnetworksTargetTest" />
+    </target_preparer>
+
+    <test class="com.android.tradefed.testtype.GTest" >
+        <option name="native-test-device-path" value="/data/local/tmp" />
+        <option name="module-name" value="VtsHalNeuralnetworksTargetTest" />
+        <option name="native-test-timeout" value="20m" />
+    </test>
+</configuration>
diff --git a/neuralnetworks/aidl/vts/functional/BasicTests.cpp b/neuralnetworks/aidl/vts/functional/BasicTests.cpp
new file mode 100644
index 0000000000..b2f4507c22
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/BasicTests.cpp
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "neuralnetworks_aidl_hal_test"
+
+#include <aidl/android/hardware/neuralnetworks/Capabilities.h>
+#include <aidl/android/hardware/neuralnetworks/IDevice.h>
+#include <aidl/android/hardware/neuralnetworks/Operand.h>
+#include <aidl/android/hardware/neuralnetworks/OperandType.h>
+#include <aidl/android/hardware/neuralnetworks/Priority.h>
+#include <android/binder_interface_utils.h>
+
+#include "Utils.h"
+#include "VtsHalNeuralnetworks.h"
+
+namespace aidl::android::hardware::neuralnetworks::vts::functional {
+
+using implementation::PreparedModelCallback;
+
+// create device test
+TEST_P(NeuralNetworksAidlTest, CreateDevice) {}
+
+// initialization
+TEST_P(NeuralNetworksAidlTest, GetCapabilitiesTest) {
+    Capabilities capabilities;
+    const auto retStatus = kDevice->getCapabilities(&capabilities);
+    ASSERT_TRUE(retStatus.isOk());
+
+    auto isPositive = [](const PerformanceInfo& perf) {
+        return perf.execTime > 0.0f && perf.powerUsage > 0.0f;
+    };
+
+    EXPECT_TRUE(isPositive(capabilities.relaxedFloat32toFloat16PerformanceScalar));
+    EXPECT_TRUE(isPositive(capabilities.relaxedFloat32toFloat16PerformanceTensor));
+    const auto& opPerf = capabilities.operandPerformance;
+    EXPECT_TRUE(
+            std::all_of(opPerf.begin(), opPerf.end(),
+                        [isPositive](const OperandPerformance& a) { return isPositive(a.info); }));
+    EXPECT_TRUE(std::is_sorted(opPerf.begin(), opPerf.end(),
+                               [](const OperandPerformance& a, const OperandPerformance& b) {
+                                   return a.type < b.type;
+                               }));
+    EXPECT_TRUE(std::all_of(opPerf.begin(), opPerf.end(), [](const OperandPerformance& a) {
+        return a.type != OperandType::SUBGRAPH;
+    }));
+    EXPECT_TRUE(isPositive(capabilities.ifPerformance));
+    EXPECT_TRUE(isPositive(capabilities.whilePerformance));
+}
+
+// detect cycle
+TEST_P(NeuralNetworksAidlTest, CycleTest) {
+    // opnd0 = TENSOR_FLOAT32            // model input
+    // opnd1 = TENSOR_FLOAT32            // model input
+    // opnd2 = INT32                     // model input
+    // opnd3 = ADD(opnd0, opnd4, opnd2)
+    // opnd4 = ADD(opnd1, opnd3, opnd2)
+    // opnd5 = ADD(opnd4, opnd0, opnd2)  // model output
+    //
+    //            +-----+
+    //            |     |
+    //            v     |
+    // 3 = ADD(0, 4, 2) |
+    // |                |
+    // +----------+     |
+    //            |     |
+    //            v     |
+    // 4 = ADD(1, 3, 2) |
+    // |                |
+    // +----------------+
+    // |
+    // |
+    // +-------+
+    //         |
+    //         v
+    // 5 = ADD(4, 0, 2)
+
+    const std::vector<Operand> operands = {
+            {
+                    // operands[0]
+                    .type = OperandType::TENSOR_FLOAT32,
+                    .dimensions = {1},
+                    .scale = 0.0f,
+                    .zeroPoint = 0,
+                    .lifetime = OperandLifeTime::SUBGRAPH_INPUT,
+                    .location = {.poolIndex = 0, .offset = 0, .length = 0},
+            },
+            {
+                    // operands[1]
+                    .type = OperandType::TENSOR_FLOAT32,
+                    .dimensions = {1},
+                    .scale = 0.0f,
+                    .zeroPoint = 0,
+                    .lifetime = OperandLifeTime::SUBGRAPH_INPUT,
+                    .location = {.poolIndex = 0, .offset = 0, .length = 0},
+            },
+            {
+                    // operands[2]
+                    .type = OperandType::INT32,
+                    .dimensions = {},
+                    .scale = 0.0f,
+                    .zeroPoint = 0,
+                    .lifetime = OperandLifeTime::SUBGRAPH_INPUT,
+                    .location = {.poolIndex = 0, .offset = 0, .length = 0},
+            },
+            {
+                    // operands[3]
+                    .type = OperandType::TENSOR_FLOAT32,
+                    .dimensions = {1},
+                    .scale = 0.0f,
+                    .zeroPoint = 0,
+                    .lifetime = OperandLifeTime::TEMPORARY_VARIABLE,
+                    .location = {.poolIndex = 0, .offset = 0, .length = 0},
+            },
+            {
+                    // operands[4]
+                    .type = OperandType::TENSOR_FLOAT32,
+                    .dimensions = {1},
+                    .scale = 0.0f,
+                    .zeroPoint = 0,
+                    .lifetime = OperandLifeTime::TEMPORARY_VARIABLE,
+                    .location = {.poolIndex = 0, .offset = 0, .length = 0},
+            },
+            {
+                    // operands[5]
+                    .type = OperandType::TENSOR_FLOAT32,
+                    .dimensions = {1},
+                    .scale = 0.0f,
+                    .zeroPoint = 0,
+                    .lifetime = OperandLifeTime::SUBGRAPH_OUTPUT,
+                    .location = {.poolIndex = 0, .offset = 0, .length = 0},
+            },
+    };
+
+    const std::vector<Operation> operations = {
+            {.type = OperationType::ADD, .inputs = {0, 4, 2}, .outputs = {3}},
+            {.type = OperationType::ADD, .inputs = {1, 3, 2}, .outputs = {4}},
+            {.type = OperationType::ADD, .inputs = {4, 0, 2}, .outputs = {5}},
+    };
+
+    Subgraph subgraph = {
+            .operands = operands,
+            .operations = operations,
+            .inputIndexes = {0, 1, 2},
+            .outputIndexes = {5},
+    };
+    const Model model = {
+            .main = std::move(subgraph),
+            .referenced = {},
+            .operandValues = {},
+            .pools = {},
+    };
+
+    // ensure that getSupportedOperations() checks model validity
+    std::vector<bool> supportedOps;
+    const auto supportedOpsStatus = kDevice->getSupportedOperations(model, &supportedOps);
+    ASSERT_FALSE(supportedOpsStatus.isOk());
+    ASSERT_EQ(supportedOpsStatus.getExceptionCode(), EX_SERVICE_SPECIFIC);
+    ASSERT_EQ(static_cast<ErrorStatus>(supportedOpsStatus.getServiceSpecificError()),
+              ErrorStatus::INVALID_ARGUMENT);
+
+    // ensure that prepareModel() checks model validity
+    auto preparedModelCallback = ndk::SharedRefBase::make<PreparedModelCallback>();
+    auto prepareLaunchStatus =
+            kDevice->prepareModel(model, ExecutionPreference::FAST_SINGLE_ANSWER, kDefaultPriority,
+                                  kNoDeadline, {}, {}, kEmptyCacheToken, preparedModelCallback);
+    //     Note that preparation can fail for reasons other than an
+    //     invalid model (invalid model should result in
+    //     INVALID_ARGUMENT) -- for example, perhaps not all
+    //     operations are supported, or perhaps the device hit some
+    //     kind of capacity limit.
+    ASSERT_FALSE(prepareLaunchStatus.isOk());
+    EXPECT_EQ(prepareLaunchStatus.getExceptionCode(), EX_SERVICE_SPECIFIC);
+    EXPECT_NE(static_cast<ErrorStatus>(prepareLaunchStatus.getServiceSpecificError()),
+              ErrorStatus::NONE);
+
+    EXPECT_NE(preparedModelCallback->getStatus(), ErrorStatus::NONE);
+    EXPECT_EQ(preparedModelCallback->getPreparedModel(), nullptr);
+}
+
+}  // namespace aidl::android::hardware::neuralnetworks::vts::functional
diff --git a/neuralnetworks/aidl/vts/functional/Callbacks.cpp b/neuralnetworks/aidl/vts/functional/Callbacks.cpp
new file mode 100644
index 0000000000..ca2bb48a3e
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/Callbacks.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "Callbacks"
+
+#include "Callbacks.h"
+
+#include <android-base/logging.h>
+#include <android/binder_auto_utils.h>
+#include <limits>
+
+namespace aidl::android::hardware::neuralnetworks::implementation {
+
+ndk::ScopedAStatus PreparedModelCallback::notify(
+        ErrorStatus errorStatus, const std::shared_ptr<IPreparedModel>& preparedModel) {
+    {
+        std::lock_guard<std::mutex> hold(mMutex);
+        // quick-return if object has already been notified
+        if (mNotified) {
+            return ndk::ScopedAStatus::ok();
+        }
+        // store results and mark as notified
+        mErrorStatus = errorStatus;
+        mPreparedModel = preparedModel;
+        mNotified = true;
+    }
+    mCondition.notify_all();
+    return ndk::ScopedAStatus::ok();
+}
+
+void PreparedModelCallback::wait() const {
+    std::unique_lock<std::mutex> lock(mMutex);
+    mCondition.wait(lock, [this] { return mNotified; });
+}
+
+ErrorStatus PreparedModelCallback::getStatus() const {
+    wait();
+    return mErrorStatus;
+}
+
+std::shared_ptr<IPreparedModel> PreparedModelCallback::getPreparedModel() const {
+    wait();
+    return mPreparedModel;
+}
+
+}  // namespace aidl::android::hardware::neuralnetworks::implementation
diff --git a/neuralnetworks/aidl/vts/functional/Callbacks.h b/neuralnetworks/aidl/vts/functional/Callbacks.h
new file mode 100644
index 0000000000..0eb4d5f4a6
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/Callbacks.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_HARDWARE_NEURALNETWORKS_AIDL_CALLBACKS_H
+#define ANDROID_HARDWARE_NEURALNETWORKS_AIDL_CALLBACKS_H
+
+#include <android-base/thread_annotations.h>
+#include <condition_variable>
+#include <mutex>
+
+#include <aidl/android/hardware/neuralnetworks/BnPreparedModelCallback.h>
+#include <aidl/android/hardware/neuralnetworks/ErrorStatus.h>
+#include <aidl/android/hardware/neuralnetworks/IPreparedModel.h>
+
+/*
+ * The Callback classes are used internally by the NeuralNetworks runtime to
+ * synchronize between different threads. An asynchronous task is launched
+ * paired with a callback object. When a client thread requires the output being
+ * generated by the asynchronous task, the client thread can wait for the result
+ * and be blocked until it has completed. Any wait may safely be called
+ * concurrently, even on the same callback object. When the asynchronous task
+ * has finished its workload, it must immediately call "notify". If the
+ * asynchronous task has failed to launch, the function that tried to launch the
+ * asynchronous task must immediately call "notify". This "notify" call
+ * awakens any client threads waiting on the callback object.
+ *
+ * These classes exist to enable synchronization across AIDL. When
+ * synchronization is only required in the same process, consider using
+ * std::future, std::mutex, std::condition_variable, or std::experimental::latch
+ * instead.
+ */
+
+namespace aidl::android::hardware::neuralnetworks::implementation {
+
+/**
+ * The PreparedModelCallback class is used to receive the error status of
+ * preparing a model as well as the prepared model from a task executing
+ * asynchronously with respect to the runtime. If a calling thread calls wait
+ * or get* on a PreparedModelCallback object and the corresponding asynchronous
+ * task has not finished preparing the model, the calling thread will block
+ * until the asynchronous task has called notify.
+ *
+ * If the callback object is notified more than once, only the results of the
+ * first call to notify are used, and the results from subsequent calls are
+ * discarded.
+ *
+ * This callback object is passed as an argument to IDevice::prepareModel*.
+ */
+class PreparedModelCallback : public BnPreparedModelCallback {
+  public:
+    /**
+     * IPreparedModelCallback::notify marks the callback object with the return
+     * status of the asynchronous model preparation along with the prepared
+     * model, and allows all prior and future wait calls on the
+     * PreparedModelCallback object to proceed.
+     *
+     * IPreparedModelCallback::notify must be called on a given PreparedModelCallback object.
+     *
+     * If the callback object is notified more than once, only the results of
+     * the first call to notify are used, and the results from subsequent calls
+     * are discarded.
+     *
+     * @param status Error status returned from asynchronously preparing the
+     *     model; will be:
+     *     - NONE if the asynchronous preparation was successful
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if there is an unspecified error
+     *     - INVALID_ARGUMENT if the input model is invalid
+     * @param preparedModel Returned model that has been prepared for execution,
+     *     nullptr if the model was unable to be prepared.
+     */
+    ndk::ScopedAStatus notify(ErrorStatus status,
+                              const std::shared_ptr<IPreparedModel>& preparedModel) override;
+
+    /**
+     * PreparedModelCallback::wait blocks until notify has been called on the
+     * callback object.
+     */
+    void wait() const;
+
+    /**
+     * Retrieves the error status returned from the asynchronous task launched
+     * by IDevice::prepareModel*. If IDevice::prepareModel* has not finished
+     * asynchronously preparing the model, this call will block until the
+     * asynchronous task notifies the object.
+     *
+     * @return status Error status returned from asynchronously preparing the
+     *     model; will be:
+     *     - NONE if the asynchronous preparation was successful
+     *     - DEVICE_UNAVAILABLE if driver is offline or busy
+     *     - GENERAL_FAILURE if there is an unspecified error
+     *     - INVALID_ARGUMENT if the input model is invalid
+     */
+    ErrorStatus getStatus() const;
+
+    /**
+     * Retrieves the model that has been prepared for execution from the
+     * asynchronous task launched by IDevice::prepareModel*. If
+     * IDevice::prepareModel* has not finished asynchronously preparing the
+     * model, this call will block until the asynchronous task notifies the
+     * object.
+     *
+     * @return preparedModel Returned model that has been prepared for
+     *     execution, nullptr if the model was unable to be prepared.
+     */
+    std::shared_ptr<IPreparedModel> getPreparedModel() const;
+
+  private:
+    mutable std::mutex mMutex;
+    mutable std::condition_variable mCondition;
+    bool mNotified GUARDED_BY(mMutex) = false;
+    ErrorStatus mErrorStatus = ErrorStatus::GENERAL_FAILURE;
+    std::shared_ptr<IPreparedModel> mPreparedModel;
+};
+
+}  // namespace aidl::android::hardware::neuralnetworks::implementation
+
+#endif  // ANDROID_HARDWARE_NEURALNETWORKS_AIDL_CALLBACKS_H
diff --git a/neuralnetworks/aidl/vts/functional/CompilationCachingTests.cpp b/neuralnetworks/aidl/vts/functional/CompilationCachingTests.cpp
new file mode 100644
index 0000000000..e0b529f280
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/CompilationCachingTests.cpp
@@ -0,0 +1,1177 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "neuralnetworks_aidl_hal_test"
+
+#include <android-base/logging.h>
+#include <android/binder_auto_utils.h>
+#include <android/binder_interface_utils.h>
+#include <android/binder_status.h>
+#include <fcntl.h>
+#include <ftw.h>
+#include <gtest/gtest.h>
+#include <hidlmemory/mapping.h>
+#include <unistd.h>
+
+#include <cstdio>
+#include <cstdlib>
+#include <iterator>
+#include <random>
+#include <thread>
+
+#include "Callbacks.h"
+#include "GeneratedTestHarness.h"
+#include "MemoryUtils.h"
+#include "TestHarness.h"
+#include "Utils.h"
+#include "VtsHalNeuralnetworks.h"
+
+// Forward declaration of the mobilenet generated test models in
+// frameworks/ml/nn/runtime/test/generated/.
+namespace generated_tests::mobilenet_224_gender_basic_fixed {
+const test_helper::TestModel& get_test_model();
+}  // namespace generated_tests::mobilenet_224_gender_basic_fixed
+
+namespace generated_tests::mobilenet_quantized {
+const test_helper::TestModel& get_test_model();
+}  // namespace generated_tests::mobilenet_quantized
+
+namespace aidl::android::hardware::neuralnetworks::vts::functional {
+
+using namespace test_helper;
+using implementation::PreparedModelCallback;
+
+namespace float32_model {
+
+constexpr auto get_test_model = generated_tests::mobilenet_224_gender_basic_fixed::get_test_model;
+
+}  // namespace float32_model
+
+namespace quant8_model {
+
+constexpr auto get_test_model = generated_tests::mobilenet_quantized::get_test_model;
+
+}  // namespace quant8_model
+
+namespace {
+
+enum class AccessMode { READ_WRITE, READ_ONLY, WRITE_ONLY };
+
+// Creates cache handles based on provided file groups.
+// The outer vector corresponds to handles and the inner vector is for fds held by each handle.
+void createCacheFds(const std::vector<std::string>& files, const std::vector<AccessMode>& mode,
+                    std::vector<ndk::ScopedFileDescriptor>* fds) {
+    fds->clear();
+    fds->reserve(files.size());
+    for (uint32_t i = 0; i < files.size(); i++) {
+        const auto& file = files[i];
+        int fd;
+        if (mode[i] == AccessMode::READ_ONLY) {
+            fd = open(file.c_str(), O_RDONLY);
+        } else if (mode[i] == AccessMode::WRITE_ONLY) {
+            fd = open(file.c_str(), O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR);
+        } else if (mode[i] == AccessMode::READ_WRITE) {
+            fd = open(file.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+        } else {
+            FAIL();
+        }
+        ASSERT_GE(fd, 0);
+        fds->emplace_back(fd);
+    }
+}
+
+void createCacheFds(const std::vector<std::string>& files, AccessMode mode,
+                    std::vector<ndk::ScopedFileDescriptor>* fds) {
+    createCacheFds(files, std::vector<AccessMode>(files.size(), mode), fds);
+}
+
+// Create a chain of broadcast operations. The second operand is always constant tensor [1].
+// For simplicity, activation scalar is shared. The second operand is not shared
+// in the model to let driver maintain a non-trivial size of constant data and the corresponding
+// data locations in cache.
+//
+//                --------- activation --------
+//                ↓      ↓      ↓             ↓
+// E.g. input -> ADD -> ADD -> ADD -> ... -> ADD -> output
+//                ↑      ↑      ↑             ↑
+//               [1]    [1]    [1]           [1]
+//
+// This function assumes the operation is either ADD or MUL.
+template <typename CppType, TestOperandType operandType>
+TestModel createLargeTestModelImpl(TestOperationType op, uint32_t len) {
+    EXPECT_TRUE(op == TestOperationType::ADD || op == TestOperationType::MUL);
+
+    // Model operations and operands.
+    std::vector<TestOperation> operations(len);
+    std::vector<TestOperand> operands(len * 2 + 2);
+
+    // The activation scalar, value = 0.
+    operands[0] = {
+            .type = TestOperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = len,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = TestOperandLifeTime::CONSTANT_COPY,
+            .data = TestBuffer::createFromVector<int32_t>({0}),
+    };
+
+    // The buffer value of the constant second operand. The logical value is always 1.0f.
+    CppType bufferValue;
+    // The scale of the first and second operand.
+    float scale1, scale2;
+    if (operandType == TestOperandType::TENSOR_FLOAT32) {
+        bufferValue = 1.0f;
+        scale1 = 0.0f;
+        scale2 = 0.0f;
+    } else if (op == TestOperationType::ADD) {
+        bufferValue = 1;
+        scale1 = 1.0f;
+        scale2 = 1.0f;
+    } else {
+        // To satisfy the constraint on quant8 MUL: input0.scale * input1.scale < output.scale,
+        // set input1 to have scale = 0.5f and bufferValue = 2, i.e. 1.0f in floating point.
+        bufferValue = 2;
+        scale1 = 1.0f;
+        scale2 = 0.5f;
+    }
+
+    for (uint32_t i = 0; i < len; i++) {
+        const uint32_t firstInputIndex = i * 2 + 1;
+        const uint32_t secondInputIndex = firstInputIndex + 1;
+        const uint32_t outputIndex = secondInputIndex + 1;
+
+        // The first operation input.
+        operands[firstInputIndex] = {
+                .type = operandType,
+                .dimensions = {1},
+                .numberOfConsumers = 1,
+                .scale = scale1,
+                .zeroPoint = 0,
+                .lifetime = (i == 0 ? TestOperandLifeTime::MODEL_INPUT
+                                    : TestOperandLifeTime::TEMPORARY_VARIABLE),
+                .data = (i == 0 ? TestBuffer::createFromVector<CppType>({1}) : TestBuffer()),
+        };
+
+        // The second operation input, value = 1.
+        operands[secondInputIndex] = {
+                .type = operandType,
+                .dimensions = {1},
+                .numberOfConsumers = 1,
+                .scale = scale2,
+                .zeroPoint = 0,
+                .lifetime = TestOperandLifeTime::CONSTANT_COPY,
+                .data = TestBuffer::createFromVector<CppType>({bufferValue}),
+        };
+
+        // The operation. All operations share the same activation scalar.
+        // The output operand is created as an input in the next iteration of the loop, in the case
+        // of all but the last member of the chain; and after the loop as a model output, in the
+        // case of the last member of the chain.
+        operations[i] = {
+                .type = op,
+                .inputs = {firstInputIndex, secondInputIndex, /*activation scalar*/ 0},
+                .outputs = {outputIndex},
+        };
+    }
+
+    // For TestOperationType::ADD, output = 1 + 1 * len = len + 1
+    // For TestOperationType::MUL, output = 1 * 1 ^ len = 1
+    CppType outputResult = static_cast<CppType>(op == TestOperationType::ADD ? len + 1u : 1u);
+
+    // The model output.
+    operands.back() = {
+            .type = operandType,
+            .dimensions = {1},
+            .numberOfConsumers = 0,
+            .scale = scale1,
+            .zeroPoint = 0,
+            .lifetime = TestOperandLifeTime::MODEL_OUTPUT,
+            .data = TestBuffer::createFromVector<CppType>({outputResult}),
+    };
+
+    return {
+            .main = {.operands = std::move(operands),
+                     .operations = std::move(operations),
+                     .inputIndexes = {1},
+                     .outputIndexes = {len * 2 + 1}},
+            .isRelaxed = false,
+    };
+}
+
+}  // namespace
+
+// Tag for the compilation caching tests.
+class CompilationCachingTestBase : public testing::Test {
+  protected:
+    CompilationCachingTestBase(std::shared_ptr<IDevice> device, OperandType type)
+        : kDevice(std::move(device)), kOperandType(type) {}
+
+    void SetUp() override {
+        testing::Test::SetUp();
+        ASSERT_NE(kDevice.get(), nullptr);
+
+        // Create cache directory. The cache directory and a temporary cache file is always created
+        // to test the behavior of prepareModelFromCache, even when caching is not supported.
+        char cacheDirTemp[] = "/data/local/tmp/TestCompilationCachingXXXXXX";
+        char* cacheDir = mkdtemp(cacheDirTemp);
+        ASSERT_NE(cacheDir, nullptr);
+        mCacheDir = cacheDir;
+        mCacheDir.push_back('/');
+
+        NumberOfCacheFiles numCacheFiles;
+        const auto ret = kDevice->getNumberOfCacheFilesNeeded(&numCacheFiles);
+        ASSERT_TRUE(ret.isOk());
+
+        mNumModelCache = numCacheFiles.numModelCache;
+        mNumDataCache = numCacheFiles.numDataCache;
+        ASSERT_GE(mNumModelCache, 0) << "Invalid numModelCache: " << mNumModelCache;
+        ASSERT_GE(mNumDataCache, 0) << "Invalid numDataCache: " << mNumDataCache;
+        mIsCachingSupported = mNumModelCache > 0 || mNumDataCache > 0;
+
+        // Create empty cache files.
+        mTmpCache = mCacheDir + "tmp";
+        for (uint32_t i = 0; i < mNumModelCache; i++) {
+            mModelCache.push_back({mCacheDir + "model" + std::to_string(i)});
+        }
+        for (uint32_t i = 0; i < mNumDataCache; i++) {
+            mDataCache.push_back({mCacheDir + "data" + std::to_string(i)});
+        }
+        // Placeholder handles, use AccessMode::WRITE_ONLY for createCacheFds to create files.
+        std::vector<ndk::ScopedFileDescriptor> modelHandle, dataHandle, tmpHandle;
+        createCacheFds(mModelCache, AccessMode::WRITE_ONLY, &modelHandle);
+        createCacheFds(mDataCache, AccessMode::WRITE_ONLY, &dataHandle);
+        createCacheFds({mTmpCache}, AccessMode::WRITE_ONLY, &tmpHandle);
+
+        if (!mIsCachingSupported) {
+            LOG(INFO) << "NN VTS: Early termination of test because vendor service does not "
+                         "support compilation caching.";
+            std::cout << "[          ]   Early termination of test because vendor service does not "
+                         "support compilation caching."
+                      << std::endl;
+        }
+    }
+
+    void TearDown() override {
+        // If the test passes, remove the tmp directory.  Otherwise, keep it for debugging purposes.
+        if (!testing::Test::HasFailure()) {
+            // Recursively remove the cache directory specified by mCacheDir.
+            auto callback = [](const char* entry, const struct stat*, int, struct FTW*) {
+                return remove(entry);
+            };
+            nftw(mCacheDir.c_str(), callback, 128, FTW_DEPTH | FTW_MOUNT | FTW_PHYS);
+        }
+        testing::Test::TearDown();
+    }
+
+    // Model and examples creators. According to kOperandType, the following methods will return
+    // either float32 model/examples or the quant8 variant.
+    TestModel createTestModel() {
+        if (kOperandType == OperandType::TENSOR_FLOAT32) {
+            return float32_model::get_test_model();
+        } else {
+            return quant8_model::get_test_model();
+        }
+    }
+
+    TestModel createLargeTestModel(OperationType op, uint32_t len) {
+        if (kOperandType == OperandType::TENSOR_FLOAT32) {
+            return createLargeTestModelImpl<float, TestOperandType::TENSOR_FLOAT32>(
+                    static_cast<TestOperationType>(op), len);
+        } else {
+            return createLargeTestModelImpl<uint8_t, TestOperandType::TENSOR_QUANT8_ASYMM>(
+                    static_cast<TestOperationType>(op), len);
+        }
+    }
+
+    // See if the service can handle the model.
+    bool isModelFullySupported(const Model& model) {
+        std::vector<bool> supportedOps;
+        const auto supportedCall = kDevice->getSupportedOperations(model, &supportedOps);
+        EXPECT_TRUE(supportedCall.isOk());
+        EXPECT_EQ(supportedOps.size(), model.main.operations.size());
+        if (!supportedCall.isOk() || supportedOps.size() != model.main.operations.size()) {
+            return false;
+        }
+        return std::all_of(supportedOps.begin(), supportedOps.end(),
+                           [](bool valid) { return valid; });
+    }
+
+    void saveModelToCache(const Model& model,
+                          const std::vector<ndk::ScopedFileDescriptor>& modelCache,
+                          const std::vector<ndk::ScopedFileDescriptor>& dataCache,
+                          std::shared_ptr<IPreparedModel>* preparedModel = nullptr) {
+        if (preparedModel != nullptr) *preparedModel = nullptr;
+
+        // Launch prepare model.
+        std::shared_ptr<PreparedModelCallback> preparedModelCallback =
+                ndk::SharedRefBase::make<PreparedModelCallback>();
+        std::vector<uint8_t> cacheToken(std::begin(mToken), std::end(mToken));
+        const auto prepareLaunchStatus = kDevice->prepareModel(
+                model, ExecutionPreference::FAST_SINGLE_ANSWER, kDefaultPriority, kNoDeadline,
+                modelCache, dataCache, cacheToken, preparedModelCallback);
+        ASSERT_TRUE(prepareLaunchStatus.isOk());
+
+        // Retrieve prepared model.
+        preparedModelCallback->wait();
+        ASSERT_EQ(preparedModelCallback->getStatus(), ErrorStatus::NONE);
+        if (preparedModel != nullptr) {
+            *preparedModel = preparedModelCallback->getPreparedModel();
+        }
+    }
+
+    bool checkEarlyTermination(ErrorStatus status) {
+        if (status == ErrorStatus::GENERAL_FAILURE) {
+            LOG(INFO) << "NN VTS: Early termination of test because vendor service cannot "
+                         "save the prepared model that it does not support.";
+            std::cout << "[          ]   Early termination of test because vendor service cannot "
+                         "save the prepared model that it does not support."
+                      << std::endl;
+            return true;
+        }
+        return false;
+    }
+
+    bool checkEarlyTermination(const Model& model) {
+        if (!isModelFullySupported(model)) {
+            LOG(INFO) << "NN VTS: Early termination of test because vendor service cannot "
+                         "prepare model that it does not support.";
+            std::cout << "[          ]   Early termination of test because vendor service cannot "
+                         "prepare model that it does not support."
+                      << std::endl;
+            return true;
+        }
+        return false;
+    }
+
+    void prepareModelFromCache(const std::vector<ndk::ScopedFileDescriptor>& modelCache,
+                               const std::vector<ndk::ScopedFileDescriptor>& dataCache,
+                               std::shared_ptr<IPreparedModel>* preparedModel,
+                               ErrorStatus* status) {
+        // Launch prepare model from cache.
+        std::shared_ptr<PreparedModelCallback> preparedModelCallback =
+                ndk::SharedRefBase::make<PreparedModelCallback>();
+        std::vector<uint8_t> cacheToken(std::begin(mToken), std::end(mToken));
+        const auto prepareLaunchStatus = kDevice->prepareModelFromCache(
+                kNoDeadline, modelCache, dataCache, cacheToken, preparedModelCallback);
+        ASSERT_TRUE(prepareLaunchStatus.isOk() ||
+                    prepareLaunchStatus.getExceptionCode() == EX_SERVICE_SPECIFIC)
+                << "prepareLaunchStatus: " << prepareLaunchStatus.getDescription();
+        if (!prepareLaunchStatus.isOk()) {
+            *preparedModel = nullptr;
+            *status = static_cast<ErrorStatus>(prepareLaunchStatus.getServiceSpecificError());
+            return;
+        }
+
+        // Retrieve prepared model.
+        preparedModelCallback->wait();
+        *status = preparedModelCallback->getStatus();
+        *preparedModel = preparedModelCallback->getPreparedModel();
+    }
+
+    // Absolute path to the temporary cache directory.
+    std::string mCacheDir;
+
+    // Groups of file paths for model and data cache in the tmp cache directory, initialized with
+    // size = mNum{Model|Data}Cache. The outer vector corresponds to handles and the inner vector is
+    // for fds held by each handle.
+    std::vector<std::string> mModelCache;
+    std::vector<std::string> mDataCache;
+
+    // A separate temporary file path in the tmp cache directory.
+    std::string mTmpCache;
+
+    uint8_t mToken[static_cast<uint32_t>(IDevice::BYTE_SIZE_OF_CACHE_TOKEN)] = {};
+    uint32_t mNumModelCache;
+    uint32_t mNumDataCache;
+    uint32_t mIsCachingSupported;
+
+    const std::shared_ptr<IDevice> kDevice;
+    // The primary data type of the testModel.
+    const OperandType kOperandType;
+};
+
+using CompilationCachingTestParam = std::tuple<NamedDevice, OperandType>;
+
+// A parameterized fixture of CompilationCachingTestBase. Every test will run twice, with the first
+// pass running with float32 models and the second pass running with quant8 models.
+class CompilationCachingTest : public CompilationCachingTestBase,
+                               public testing::WithParamInterface<CompilationCachingTestParam> {
+  protected:
+    CompilationCachingTest()
+        : CompilationCachingTestBase(getData(std::get<NamedDevice>(GetParam())),
+                                     std::get<OperandType>(GetParam())) {}
+};
+
+TEST_P(CompilationCachingTest, CacheSavingAndRetrieval) {
+    // Create test HIDL model and compile.
+    const TestModel& testModel = createTestModel();
+    const Model model = createModel(testModel);
+    if (checkEarlyTermination(model)) return;
+    std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+
+    // Save the compilation to cache.
+    {
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        saveModelToCache(model, modelCache, dataCache);
+    }
+
+    // Retrieve preparedModel from cache.
+    {
+        preparedModel = nullptr;
+        ErrorStatus status;
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+        if (!mIsCachingSupported) {
+            ASSERT_EQ(status, ErrorStatus::GENERAL_FAILURE);
+            ASSERT_EQ(preparedModel, nullptr);
+            return;
+        } else if (checkEarlyTermination(status)) {
+            ASSERT_EQ(preparedModel, nullptr);
+            return;
+        } else {
+            ASSERT_EQ(status, ErrorStatus::NONE);
+            ASSERT_NE(preparedModel, nullptr);
+        }
+    }
+
+    // Execute and verify results.
+    EvaluatePreparedModel(kDevice, preparedModel, testModel, /*testKind=*/TestKind::GENERAL);
+}
+
+TEST_P(CompilationCachingTest, CacheSavingAndRetrievalNonZeroOffset) {
+    // Create test HIDL model and compile.
+    const TestModel& testModel = createTestModel();
+    const Model model = createModel(testModel);
+    if (checkEarlyTermination(model)) return;
+    std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+
+    // Save the compilation to cache.
+    {
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        uint8_t placeholderBytes[] = {0, 0};
+        // Write a placeholder integer to the cache.
+        // The driver should be able to handle non-empty cache and non-zero fd offset.
+        for (uint32_t i = 0; i < modelCache.size(); i++) {
+            ASSERT_EQ(write(modelCache[i].get(), &placeholderBytes, sizeof(placeholderBytes)),
+                      sizeof(placeholderBytes));
+        }
+        for (uint32_t i = 0; i < dataCache.size(); i++) {
+            ASSERT_EQ(write(dataCache[i].get(), &placeholderBytes, sizeof(placeholderBytes)),
+                      sizeof(placeholderBytes));
+        }
+        saveModelToCache(model, modelCache, dataCache);
+    }
+
+    // Retrieve preparedModel from cache.
+    {
+        preparedModel = nullptr;
+        ErrorStatus status;
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        uint8_t placeholderByte = 0;
+        // Advance the offset of each handle by one byte.
+        // The driver should be able to handle non-zero fd offset.
+        for (uint32_t i = 0; i < modelCache.size(); i++) {
+            ASSERT_GE(read(modelCache[i].get(), &placeholderByte, 1), 0);
+        }
+        for (uint32_t i = 0; i < dataCache.size(); i++) {
+            ASSERT_GE(read(dataCache[i].get(), &placeholderByte, 1), 0);
+        }
+        prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+        if (!mIsCachingSupported) {
+            ASSERT_EQ(status, ErrorStatus::GENERAL_FAILURE);
+            ASSERT_EQ(preparedModel, nullptr);
+            return;
+        } else if (checkEarlyTermination(status)) {
+            ASSERT_EQ(preparedModel, nullptr);
+            return;
+        } else {
+            ASSERT_EQ(status, ErrorStatus::NONE);
+            ASSERT_NE(preparedModel, nullptr);
+        }
+    }
+
+    // Execute and verify results.
+    EvaluatePreparedModel(kDevice, preparedModel, testModel, /*testKind=*/TestKind::GENERAL);
+}
+
+TEST_P(CompilationCachingTest, SaveToCacheInvalidNumCache) {
+    // Create test HIDL model and compile.
+    const TestModel& testModel = createTestModel();
+    const Model model = createModel(testModel);
+    if (checkEarlyTermination(model)) return;
+
+    // Test with number of model cache files greater than mNumModelCache.
+    {
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        // Pass an additional cache file for model cache.
+        mModelCache.push_back({mTmpCache});
+        createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        mModelCache.pop_back();
+        std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+        saveModelToCache(model, modelCache, dataCache, &preparedModel);
+        ASSERT_NE(preparedModel, nullptr);
+        // Execute and verify results.
+        EvaluatePreparedModel(kDevice, preparedModel, testModel, /*testKind=*/TestKind::GENERAL);
+        // Check if prepareModelFromCache fails.
+        preparedModel = nullptr;
+        ErrorStatus status;
+        prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+        if (status != ErrorStatus::INVALID_ARGUMENT) {
+            ASSERT_EQ(status, ErrorStatus::GENERAL_FAILURE);
+        }
+        ASSERT_EQ(preparedModel, nullptr);
+    }
+
+    // Test with number of model cache files smaller than mNumModelCache.
+    if (mModelCache.size() > 0) {
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        // Pop out the last cache file.
+        auto tmp = mModelCache.back();
+        mModelCache.pop_back();
+        createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        mModelCache.push_back(tmp);
+        std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+        saveModelToCache(model, modelCache, dataCache, &preparedModel);
+        ASSERT_NE(preparedModel, nullptr);
+        // Execute and verify results.
+        EvaluatePreparedModel(kDevice, preparedModel, testModel, /*testKind=*/TestKind::GENERAL);
+        // Check if prepareModelFromCache fails.
+        preparedModel = nullptr;
+        ErrorStatus status;
+        prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+        if (status != ErrorStatus::INVALID_ARGUMENT) {
+            ASSERT_EQ(status, ErrorStatus::GENERAL_FAILURE);
+        }
+        ASSERT_EQ(preparedModel, nullptr);
+    }
+
+    // Test with number of data cache files greater than mNumDataCache.
+    {
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        // Pass an additional cache file for data cache.
+        mDataCache.push_back({mTmpCache});
+        createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        mDataCache.pop_back();
+        std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+        saveModelToCache(model, modelCache, dataCache, &preparedModel);
+        ASSERT_NE(preparedModel, nullptr);
+        // Execute and verify results.
+        EvaluatePreparedModel(kDevice, preparedModel, testModel, /*testKind=*/TestKind::GENERAL);
+        // Check if prepareModelFromCache fails.
+        preparedModel = nullptr;
+        ErrorStatus status;
+        prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+        if (status != ErrorStatus::INVALID_ARGUMENT) {
+            ASSERT_EQ(status, ErrorStatus::GENERAL_FAILURE);
+        }
+        ASSERT_EQ(preparedModel, nullptr);
+    }
+
+    // Test with number of data cache files smaller than mNumDataCache.
+    if (mDataCache.size() > 0) {
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        // Pop out the last cache file.
+        auto tmp = mDataCache.back();
+        mDataCache.pop_back();
+        createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        mDataCache.push_back(tmp);
+        std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+        saveModelToCache(model, modelCache, dataCache, &preparedModel);
+        ASSERT_NE(preparedModel, nullptr);
+        // Execute and verify results.
+        EvaluatePreparedModel(kDevice, preparedModel, testModel, /*testKind=*/TestKind::GENERAL);
+        // Check if prepareModelFromCache fails.
+        preparedModel = nullptr;
+        ErrorStatus status;
+        prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+        if (status != ErrorStatus::INVALID_ARGUMENT) {
+            ASSERT_EQ(status, ErrorStatus::GENERAL_FAILURE);
+        }
+        ASSERT_EQ(preparedModel, nullptr);
+    }
+}
+
+TEST_P(CompilationCachingTest, PrepareModelFromCacheInvalidNumCache) {
+    // Create test HIDL model and compile.
+    const TestModel& testModel = createTestModel();
+    const Model model = createModel(testModel);
+    if (checkEarlyTermination(model)) return;
+
+    // Save the compilation to cache.
+    {
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        saveModelToCache(model, modelCache, dataCache);
+    }
+
+    // Test with number of model cache files greater than mNumModelCache.
+    {
+        std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+        ErrorStatus status;
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        mModelCache.push_back({mTmpCache});
+        createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        mModelCache.pop_back();
+        prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+        if (status != ErrorStatus::GENERAL_FAILURE) {
+            ASSERT_EQ(status, ErrorStatus::INVALID_ARGUMENT);
+        }
+        ASSERT_EQ(preparedModel, nullptr);
+    }
+
+    // Test with number of model cache files smaller than mNumModelCache.
+    if (mModelCache.size() > 0) {
+        std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+        ErrorStatus status;
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        auto tmp = mModelCache.back();
+        mModelCache.pop_back();
+        createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        mModelCache.push_back(tmp);
+        prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+        if (status != ErrorStatus::GENERAL_FAILURE) {
+            ASSERT_EQ(status, ErrorStatus::INVALID_ARGUMENT);
+        }
+        ASSERT_EQ(preparedModel, nullptr);
+    }
+
+    // Test with number of data cache files greater than mNumDataCache.
+    {
+        std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+        ErrorStatus status;
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        mDataCache.push_back({mTmpCache});
+        createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        mDataCache.pop_back();
+        prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+        if (status != ErrorStatus::GENERAL_FAILURE) {
+            ASSERT_EQ(status, ErrorStatus::INVALID_ARGUMENT);
+        }
+        ASSERT_EQ(preparedModel, nullptr);
+    }
+
+    // Test with number of data cache files smaller than mNumDataCache.
+    if (mDataCache.size() > 0) {
+        std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+        ErrorStatus status;
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        auto tmp = mDataCache.back();
+        mDataCache.pop_back();
+        createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        mDataCache.push_back(tmp);
+        prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+        if (status != ErrorStatus::GENERAL_FAILURE) {
+            ASSERT_EQ(status, ErrorStatus::INVALID_ARGUMENT);
+        }
+        ASSERT_EQ(preparedModel, nullptr);
+    }
+}
+
+TEST_P(CompilationCachingTest, SaveToCacheInvalidAccessMode) {
+    // Create test HIDL model and compile.
+    const TestModel& testModel = createTestModel();
+    const Model model = createModel(testModel);
+    if (checkEarlyTermination(model)) return;
+    std::vector<AccessMode> modelCacheMode(mNumModelCache, AccessMode::READ_WRITE);
+    std::vector<AccessMode> dataCacheMode(mNumDataCache, AccessMode::READ_WRITE);
+
+    // Go through each handle in model cache, test with invalid access mode.
+    for (uint32_t i = 0; i < mNumModelCache; i++) {
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        modelCacheMode[i] = AccessMode::READ_ONLY;
+        createCacheFds(mModelCache, modelCacheMode, &modelCache);
+        createCacheFds(mDataCache, dataCacheMode, &dataCache);
+        modelCacheMode[i] = AccessMode::READ_WRITE;
+        std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+        saveModelToCache(model, modelCache, dataCache, &preparedModel);
+        ASSERT_NE(preparedModel, nullptr);
+        // Execute and verify results.
+        EvaluatePreparedModel(kDevice, preparedModel, testModel, /*testKind=*/TestKind::GENERAL);
+        // Check if prepareModelFromCache fails.
+        preparedModel = nullptr;
+        ErrorStatus status;
+        prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+        if (status != ErrorStatus::INVALID_ARGUMENT) {
+            ASSERT_EQ(status, ErrorStatus::GENERAL_FAILURE);
+        }
+        ASSERT_EQ(preparedModel, nullptr);
+    }
+
+    // Go through each handle in data cache, test with invalid access mode.
+    for (uint32_t i = 0; i < mNumDataCache; i++) {
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        dataCacheMode[i] = AccessMode::READ_ONLY;
+        createCacheFds(mModelCache, modelCacheMode, &modelCache);
+        createCacheFds(mDataCache, dataCacheMode, &dataCache);
+        dataCacheMode[i] = AccessMode::READ_WRITE;
+        std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+        saveModelToCache(model, modelCache, dataCache, &preparedModel);
+        ASSERT_NE(preparedModel, nullptr);
+        // Execute and verify results.
+        EvaluatePreparedModel(kDevice, preparedModel, testModel, /*testKind=*/TestKind::GENERAL);
+        // Check if prepareModelFromCache fails.
+        preparedModel = nullptr;
+        ErrorStatus status;
+        prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+        if (status != ErrorStatus::INVALID_ARGUMENT) {
+            ASSERT_EQ(status, ErrorStatus::GENERAL_FAILURE);
+        }
+        ASSERT_EQ(preparedModel, nullptr);
+    }
+}
+
+TEST_P(CompilationCachingTest, PrepareModelFromCacheInvalidAccessMode) {
+    // Create test HIDL model and compile.
+    const TestModel& testModel = createTestModel();
+    const Model model = createModel(testModel);
+    if (checkEarlyTermination(model)) return;
+    std::vector<AccessMode> modelCacheMode(mNumModelCache, AccessMode::READ_WRITE);
+    std::vector<AccessMode> dataCacheMode(mNumDataCache, AccessMode::READ_WRITE);
+
+    // Save the compilation to cache.
+    {
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        saveModelToCache(model, modelCache, dataCache);
+    }
+
+    // Go through each handle in model cache, test with invalid access mode.
+    for (uint32_t i = 0; i < mNumModelCache; i++) {
+        std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+        ErrorStatus status;
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        modelCacheMode[i] = AccessMode::WRITE_ONLY;
+        createCacheFds(mModelCache, modelCacheMode, &modelCache);
+        createCacheFds(mDataCache, dataCacheMode, &dataCache);
+        modelCacheMode[i] = AccessMode::READ_WRITE;
+        prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+        ASSERT_EQ(status, ErrorStatus::GENERAL_FAILURE);
+        ASSERT_EQ(preparedModel, nullptr);
+    }
+
+    // Go through each handle in data cache, test with invalid access mode.
+    for (uint32_t i = 0; i < mNumDataCache; i++) {
+        std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+        ErrorStatus status;
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        dataCacheMode[i] = AccessMode::WRITE_ONLY;
+        createCacheFds(mModelCache, modelCacheMode, &modelCache);
+        createCacheFds(mDataCache, dataCacheMode, &dataCache);
+        dataCacheMode[i] = AccessMode::READ_WRITE;
+        prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+        ASSERT_EQ(status, ErrorStatus::GENERAL_FAILURE);
+        ASSERT_EQ(preparedModel, nullptr);
+    }
+}
+
+// Copy file contents between files.
+// The vector sizes must match.
+static void copyCacheFiles(const std::vector<std::string>& from,
+                           const std::vector<std::string>& to) {
+    constexpr size_t kBufferSize = 1000000;
+    uint8_t buffer[kBufferSize];
+
+    ASSERT_EQ(from.size(), to.size());
+    for (uint32_t i = 0; i < from.size(); i++) {
+        int fromFd = open(from[i].c_str(), O_RDONLY);
+        int toFd = open(to[i].c_str(), O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR);
+        ASSERT_GE(fromFd, 0);
+        ASSERT_GE(toFd, 0);
+
+        ssize_t readBytes;
+        while ((readBytes = read(fromFd, &buffer, kBufferSize)) > 0) {
+            ASSERT_EQ(write(toFd, &buffer, readBytes), readBytes);
+        }
+        ASSERT_GE(readBytes, 0);
+
+        close(fromFd);
+        close(toFd);
+    }
+}
+
+// Number of operations in the large test model.
+constexpr uint32_t kLargeModelSize = 100;
+constexpr uint32_t kNumIterationsTOCTOU = 100;
+
+TEST_P(CompilationCachingTest, SaveToCache_TOCTOU) {
+    if (!mIsCachingSupported) return;
+
+    // Create test models and check if fully supported by the service.
+    const TestModel testModelMul = createLargeTestModel(OperationType::MUL, kLargeModelSize);
+    const Model modelMul = createModel(testModelMul);
+    if (checkEarlyTermination(modelMul)) return;
+    const TestModel testModelAdd = createLargeTestModel(OperationType::ADD, kLargeModelSize);
+    const Model modelAdd = createModel(testModelAdd);
+    if (checkEarlyTermination(modelAdd)) return;
+
+    // Save the modelMul compilation to cache.
+    auto modelCacheMul = mModelCache;
+    for (auto& cache : modelCacheMul) {
+        cache.append("_mul");
+    }
+    {
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        createCacheFds(modelCacheMul, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        saveModelToCache(modelMul, modelCache, dataCache);
+    }
+
+    // Use a different token for modelAdd.
+    mToken[0]++;
+
+    // This test is probabilistic, so we run it multiple times.
+    for (uint32_t i = 0; i < kNumIterationsTOCTOU; i++) {
+        // Save the modelAdd compilation to cache.
+        {
+            std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+            createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+            createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+
+            // Spawn a thread to copy the cache content concurrently while saving to cache.
+            std::thread thread(copyCacheFiles, std::cref(modelCacheMul), std::cref(mModelCache));
+            saveModelToCache(modelAdd, modelCache, dataCache);
+            thread.join();
+        }
+
+        // Retrieve preparedModel from cache.
+        {
+            std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+            ErrorStatus status;
+            std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+            createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+            createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+            prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+
+            // The preparation may fail or succeed, but must not crash. If the preparation succeeds,
+            // the prepared model must be executed with the correct result and not crash.
+            if (status != ErrorStatus::NONE) {
+                ASSERT_EQ(preparedModel, nullptr);
+            } else {
+                ASSERT_NE(preparedModel, nullptr);
+                EvaluatePreparedModel(kDevice, preparedModel, testModelAdd,
+                                      /*testKind=*/TestKind::GENERAL);
+            }
+        }
+    }
+}
+
+TEST_P(CompilationCachingTest, PrepareFromCache_TOCTOU) {
+    if (!mIsCachingSupported) return;
+
+    // Create test models and check if fully supported by the service.
+    const TestModel testModelMul = createLargeTestModel(OperationType::MUL, kLargeModelSize);
+    const Model modelMul = createModel(testModelMul);
+    if (checkEarlyTermination(modelMul)) return;
+    const TestModel testModelAdd = createLargeTestModel(OperationType::ADD, kLargeModelSize);
+    const Model modelAdd = createModel(testModelAdd);
+    if (checkEarlyTermination(modelAdd)) return;
+
+    // Save the modelMul compilation to cache.
+    auto modelCacheMul = mModelCache;
+    for (auto& cache : modelCacheMul) {
+        cache.append("_mul");
+    }
+    {
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        createCacheFds(modelCacheMul, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        saveModelToCache(modelMul, modelCache, dataCache);
+    }
+
+    // Use a different token for modelAdd.
+    mToken[0]++;
+
+    // This test is probabilistic, so we run it multiple times.
+    for (uint32_t i = 0; i < kNumIterationsTOCTOU; i++) {
+        // Save the modelAdd compilation to cache.
+        {
+            std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+            createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+            createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+            saveModelToCache(modelAdd, modelCache, dataCache);
+        }
+
+        // Retrieve preparedModel from cache.
+        {
+            std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+            ErrorStatus status;
+            std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+            createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+            createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+
+            // Spawn a thread to copy the cache content concurrently while preparing from cache.
+            std::thread thread(copyCacheFiles, std::cref(modelCacheMul), std::cref(mModelCache));
+            prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+            thread.join();
+
+            // The preparation may fail or succeed, but must not crash. If the preparation succeeds,
+            // the prepared model must be executed with the correct result and not crash.
+            if (status != ErrorStatus::NONE) {
+                ASSERT_EQ(preparedModel, nullptr);
+            } else {
+                ASSERT_NE(preparedModel, nullptr);
+                EvaluatePreparedModel(kDevice, preparedModel, testModelAdd,
+                                      /*testKind=*/TestKind::GENERAL);
+            }
+        }
+    }
+}
+
+TEST_P(CompilationCachingTest, ReplaceSecuritySensitiveCache) {
+    if (!mIsCachingSupported) return;
+
+    // Create test models and check if fully supported by the service.
+    const TestModel testModelMul = createLargeTestModel(OperationType::MUL, kLargeModelSize);
+    const Model modelMul = createModel(testModelMul);
+    if (checkEarlyTermination(modelMul)) return;
+    const TestModel testModelAdd = createLargeTestModel(OperationType::ADD, kLargeModelSize);
+    const Model modelAdd = createModel(testModelAdd);
+    if (checkEarlyTermination(modelAdd)) return;
+
+    // Save the modelMul compilation to cache.
+    auto modelCacheMul = mModelCache;
+    for (auto& cache : modelCacheMul) {
+        cache.append("_mul");
+    }
+    {
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        createCacheFds(modelCacheMul, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        saveModelToCache(modelMul, modelCache, dataCache);
+    }
+
+    // Use a different token for modelAdd.
+    mToken[0]++;
+
+    // Save the modelAdd compilation to cache.
+    {
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        saveModelToCache(modelAdd, modelCache, dataCache);
+    }
+
+    // Replace the model cache of modelAdd with modelMul.
+    copyCacheFiles(modelCacheMul, mModelCache);
+
+    // Retrieve the preparedModel from cache, expect failure.
+    {
+        std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+        ErrorStatus status;
+        std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+        createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+        createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+        prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+        ASSERT_EQ(status, ErrorStatus::GENERAL_FAILURE);
+        ASSERT_EQ(preparedModel, nullptr);
+    }
+}
+
+// TODO(b/179270601): restore kNamedDeviceChoices.
+static const auto kOperandTypeChoices =
+        testing::Values(OperandType::TENSOR_FLOAT32, OperandType::TENSOR_QUANT8_ASYMM);
+
+std::string printCompilationCachingTest(
+        const testing::TestParamInfo<CompilationCachingTestParam>& info) {
+    const auto& [namedDevice, operandType] = info.param;
+    const std::string type = (operandType == OperandType::TENSOR_FLOAT32 ? "float32" : "quant8");
+    return gtestCompliantName(getName(namedDevice) + "_" + type);
+}
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CompilationCachingTest);
+INSTANTIATE_TEST_SUITE_P(TestCompilationCaching, CompilationCachingTest,
+                         testing::Combine(testing::ValuesIn(getNamedDevices()),
+                                          kOperandTypeChoices),
+                         printCompilationCachingTest);
+
+using CompilationCachingSecurityTestParam = std::tuple<NamedDevice, OperandType, uint32_t>;
+
+class CompilationCachingSecurityTest
+    : public CompilationCachingTestBase,
+      public testing::WithParamInterface<CompilationCachingSecurityTestParam> {
+  protected:
+    CompilationCachingSecurityTest()
+        : CompilationCachingTestBase(getData(std::get<NamedDevice>(GetParam())),
+                                     std::get<OperandType>(GetParam())) {}
+
+    void SetUp() {
+        CompilationCachingTestBase::SetUp();
+        generator.seed(kSeed);
+    }
+
+    // Get a random integer within a closed range [lower, upper].
+    template <typename T>
+    T getRandomInt(T lower, T upper) {
+        std::uniform_int_distribution<T> dis(lower, upper);
+        return dis(generator);
+    }
+
+    // Randomly flip one single bit of the cache entry.
+    void flipOneBitOfCache(const std::string& filename, bool* skip) {
+        FILE* pFile = fopen(filename.c_str(), "r+");
+        ASSERT_EQ(fseek(pFile, 0, SEEK_END), 0);
+        long int fileSize = ftell(pFile);
+        if (fileSize == 0) {
+            fclose(pFile);
+            *skip = true;
+            return;
+        }
+        ASSERT_EQ(fseek(pFile, getRandomInt(0l, fileSize - 1), SEEK_SET), 0);
+        int readByte = fgetc(pFile);
+        ASSERT_NE(readByte, EOF);
+        ASSERT_EQ(fseek(pFile, -1, SEEK_CUR), 0);
+        ASSERT_NE(fputc(static_cast<uint8_t>(readByte) ^ (1U << getRandomInt(0, 7)), pFile), EOF);
+        fclose(pFile);
+        *skip = false;
+    }
+
+    // Randomly append bytes to the cache entry.
+    void appendBytesToCache(const std::string& filename, bool* skip) {
+        FILE* pFile = fopen(filename.c_str(), "a");
+        uint32_t appendLength = getRandomInt(1, 256);
+        for (uint32_t i = 0; i < appendLength; i++) {
+            ASSERT_NE(fputc(getRandomInt<uint8_t>(0, 255), pFile), EOF);
+        }
+        fclose(pFile);
+        *skip = false;
+    }
+
+    enum class ExpectedResult { GENERAL_FAILURE, NOT_CRASH };
+
+    // Test if the driver behaves as expected when given corrupted cache or token.
+    // The modifier will be invoked after save to cache but before prepare from cache.
+    // The modifier accepts one pointer argument "skip" as the returning value, indicating
+    // whether the test should be skipped or not.
+    void testCorruptedCache(ExpectedResult expected, std::function<void(bool*)> modifier) {
+        const TestModel& testModel = createTestModel();
+        const Model model = createModel(testModel);
+        if (checkEarlyTermination(model)) return;
+
+        // Save the compilation to cache.
+        {
+            std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+            createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+            createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+            saveModelToCache(model, modelCache, dataCache);
+        }
+
+        bool skip = false;
+        modifier(&skip);
+        if (skip) return;
+
+        // Retrieve preparedModel from cache.
+        {
+            std::shared_ptr<IPreparedModel> preparedModel = nullptr;
+            ErrorStatus status;
+            std::vector<ndk::ScopedFileDescriptor> modelCache, dataCache;
+            createCacheFds(mModelCache, AccessMode::READ_WRITE, &modelCache);
+            createCacheFds(mDataCache, AccessMode::READ_WRITE, &dataCache);
+            prepareModelFromCache(modelCache, dataCache, &preparedModel, &status);
+
+            switch (expected) {
+                case ExpectedResult::GENERAL_FAILURE:
+                    ASSERT_EQ(status, ErrorStatus::GENERAL_FAILURE);
+                    ASSERT_EQ(preparedModel, nullptr);
+                    break;
+                case ExpectedResult::NOT_CRASH:
+                    ASSERT_EQ(preparedModel == nullptr, status != ErrorStatus::NONE);
+                    break;
+                default:
+                    FAIL();
+            }
+        }
+    }
+
+    const uint32_t kSeed = std::get<uint32_t>(GetParam());
+    std::mt19937 generator;
+};
+
+TEST_P(CompilationCachingSecurityTest, CorruptedModelCache) {
+    if (!mIsCachingSupported) return;
+    for (uint32_t i = 0; i < mNumModelCache; i++) {
+        testCorruptedCache(ExpectedResult::GENERAL_FAILURE,
+                           [this, i](bool* skip) { flipOneBitOfCache(mModelCache[i], skip); });
+    }
+}
+
+TEST_P(CompilationCachingSecurityTest, WrongLengthModelCache) {
+    if (!mIsCachingSupported) return;
+    for (uint32_t i = 0; i < mNumModelCache; i++) {
+        testCorruptedCache(ExpectedResult::GENERAL_FAILURE,
+                           [this, i](bool* skip) { appendBytesToCache(mModelCache[i], skip); });
+    }
+}
+
+TEST_P(CompilationCachingSecurityTest, CorruptedDataCache) {
+    if (!mIsCachingSupported) return;
+    for (uint32_t i = 0; i < mNumDataCache; i++) {
+        testCorruptedCache(ExpectedResult::NOT_CRASH,
+                           [this, i](bool* skip) { flipOneBitOfCache(mDataCache[i], skip); });
+    }
+}
+
+TEST_P(CompilationCachingSecurityTest, WrongLengthDataCache) {
+    if (!mIsCachingSupported) return;
+    for (uint32_t i = 0; i < mNumDataCache; i++) {
+        testCorruptedCache(ExpectedResult::NOT_CRASH,
+                           [this, i](bool* skip) { appendBytesToCache(mDataCache[i], skip); });
+    }
+}
+
+TEST_P(CompilationCachingSecurityTest, WrongToken) {
+    if (!mIsCachingSupported) return;
+    testCorruptedCache(ExpectedResult::GENERAL_FAILURE, [this](bool* skip) {
+        // Randomly flip one single bit in mToken.
+        uint32_t ind =
+                getRandomInt(0u, static_cast<uint32_t>(IDevice::BYTE_SIZE_OF_CACHE_TOKEN) - 1);
+        mToken[ind] ^= (1U << getRandomInt(0, 7));
+        *skip = false;
+    });
+}
+
+std::string printCompilationCachingSecurityTest(
+        const testing::TestParamInfo<CompilationCachingSecurityTestParam>& info) {
+    const auto& [namedDevice, operandType, seed] = info.param;
+    const std::string type = (operandType == OperandType::TENSOR_FLOAT32 ? "float32" : "quant8");
+    return gtestCompliantName(getName(namedDevice) + "_" + type + "_" + std::to_string(seed));
+}
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CompilationCachingSecurityTest);
+INSTANTIATE_TEST_SUITE_P(TestCompilationCaching, CompilationCachingSecurityTest,
+                         testing::Combine(testing::ValuesIn(getNamedDevices()), kOperandTypeChoices,
+                                          testing::Range(0U, 10U)),
+                         printCompilationCachingSecurityTest);
+
+}  // namespace aidl::android::hardware::neuralnetworks::vts::functional
diff --git a/neuralnetworks/aidl/vts/functional/GeneratedTestHarness.cpp b/neuralnetworks/aidl/vts/functional/GeneratedTestHarness.cpp
new file mode 100644
index 0000000000..86d5f3f8d3
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/GeneratedTestHarness.cpp
@@ -0,0 +1,925 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GeneratedTestHarness.h"
+
+#include <aidl/android/hardware/neuralnetworks/ErrorStatus.h>
+#include <android-base/logging.h>
+#include <android/binder_auto_utils.h>
+#include <android/sync.h>
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <chrono>
+#include <iostream>
+#include <iterator>
+#include <numeric>
+#include <vector>
+
+#include <MemoryUtils.h>
+#include <android/binder_status.h>
+#include <nnapi/Result.h>
+#include <nnapi/SharedMemory.h>
+#include <nnapi/Types.h>
+#include <nnapi/hal/aidl/Conversions.h>
+#include <nnapi/hal/aidl/Utils.h>
+
+#include "Callbacks.h"
+#include "TestHarness.h"
+#include "Utils.h"
+#include "VtsHalNeuralnetworks.h"
+
+namespace aidl::android::hardware::neuralnetworks::vts::functional {
+
+namespace nn = ::android::nn;
+using namespace test_helper;
+using implementation::PreparedModelCallback;
+
+namespace {
+
+enum class OutputType { FULLY_SPECIFIED, UNSPECIFIED, INSUFFICIENT, MISSED_DEADLINE };
+
+struct TestConfig {
+    Executor executor;
+    bool measureTiming;
+    OutputType outputType;
+    MemoryType memoryType;
+    // `reportSkipping` indicates if a test should print an info message in case
+    // it is skipped. The field is set to true by default and is set to false in
+    // quantization coupling tests to suppress skipping a test
+    bool reportSkipping;
+    TestConfig(Executor executor, bool measureTiming, OutputType outputType, MemoryType memoryType)
+        : executor(executor),
+          measureTiming(measureTiming),
+          outputType(outputType),
+          memoryType(memoryType),
+          reportSkipping(true) {}
+    TestConfig(Executor executor, bool measureTiming, OutputType outputType, MemoryType memoryType,
+               bool reportSkipping)
+        : executor(executor),
+          measureTiming(measureTiming),
+          outputType(outputType),
+          memoryType(memoryType),
+          reportSkipping(reportSkipping) {}
+};
+
+enum class IOType { INPUT, OUTPUT };
+
+class DeviceMemoryAllocator {
+  public:
+    DeviceMemoryAllocator(const std::shared_ptr<IDevice>& device,
+                          const std::shared_ptr<IPreparedModel>& preparedModel,
+                          const TestModel& testModel)
+        : kDevice(device), kPreparedModel(preparedModel), kTestModel(testModel) {}
+
+    // Allocate device memory for a target input/output operand.
+    // Return {IBuffer object, token} if successful.
+    // Return {nullptr, 0} if device memory is not supported.
+    template <IOType ioType>
+    std::pair<std::shared_ptr<IBuffer>, int32_t> allocate(uint32_t index) {
+        std::pair<std::shared_ptr<IBuffer>, int32_t> buffer;
+        allocateInternal<ioType>(index, &buffer);
+        return buffer;
+    }
+
+  private:
+    template <IOType ioType>
+    void allocateInternal(int32_t index, std::pair<std::shared_ptr<IBuffer>, int32_t>* result) {
+        ASSERT_NE(result, nullptr);
+
+        // Prepare arguments.
+        BufferRole role = {.modelIndex = 0, .ioIndex = index, .frequency = 1.0f};
+        std::vector<BufferRole> inputRoles, outputRoles;
+        if constexpr (ioType == IOType::INPUT) {
+            inputRoles = {role};
+        } else {
+            outputRoles = {role};
+        }
+
+        // Allocate device memory.
+        DeviceBuffer buffer;
+        IPreparedModelParcel parcel;
+        parcel.preparedModel = kPreparedModel;
+        const auto ret = kDevice->allocate({}, {parcel}, inputRoles, outputRoles, &buffer);
+
+        // Check allocation results.
+        if (ret.isOk()) {
+            ASSERT_NE(buffer.buffer, nullptr);
+            ASSERT_GT(buffer.token, 0);
+        } else {
+            ASSERT_EQ(ret.getExceptionCode(), EX_SERVICE_SPECIFIC);
+            ASSERT_EQ(static_cast<ErrorStatus>(ret.getServiceSpecificError()),
+                      ErrorStatus::GENERAL_FAILURE);
+            buffer.buffer = nullptr;
+            buffer.token = 0;
+        }
+
+        // Initialize input data from TestBuffer.
+        if constexpr (ioType == IOType::INPUT) {
+            if (buffer.buffer != nullptr) {
+                // TestBuffer -> Shared memory.
+                const auto& testBuffer =
+                        kTestModel.main.operands[kTestModel.main.inputIndexes[index]].data;
+                ASSERT_GT(testBuffer.size(), 0);
+                const auto sharedMemory = nn::createSharedMemory(testBuffer.size()).value();
+                const auto memory = utils::convert(sharedMemory).value();
+                const auto mapping = nn::map(sharedMemory).value();
+                uint8_t* inputPtr = static_cast<uint8_t*>(std::get<void*>(mapping.pointer));
+                ASSERT_NE(inputPtr, nullptr);
+                const uint8_t* begin = testBuffer.get<uint8_t>();
+                const uint8_t* end = begin + testBuffer.size();
+                std::copy(begin, end, inputPtr);
+
+                // Shared memory -> IBuffer.
+                auto ret = buffer.buffer->copyFrom(memory, {});
+                ASSERT_TRUE(ret.isOk());
+            }
+        }
+        *result = {std::move(buffer.buffer), buffer.token};
+    }
+
+    const std::shared_ptr<IDevice> kDevice;
+    const std::shared_ptr<IPreparedModel> kPreparedModel;
+    const TestModel& kTestModel;
+};
+
+Subgraph createSubgraph(const TestSubgraph& testSubgraph, uint32_t* constCopySize,
+                        std::vector<const TestBuffer*>* constCopies, uint32_t* constRefSize,
+                        std::vector<const TestBuffer*>* constReferences) {
+    CHECK(constCopySize != nullptr);
+    CHECK(constCopies != nullptr);
+    CHECK(constRefSize != nullptr);
+    CHECK(constReferences != nullptr);
+
+    // Operands.
+    std::vector<Operand> operands(testSubgraph.operands.size());
+    for (uint32_t i = 0; i < testSubgraph.operands.size(); i++) {
+        const auto& op = testSubgraph.operands[i];
+
+        DataLocation loc = {};
+        if (op.lifetime == TestOperandLifeTime::CONSTANT_COPY) {
+            loc = {
+                    .poolIndex = 0,
+                    .offset = *constCopySize,
+                    .length = static_cast<int64_t>(op.data.size()),
+            };
+            constCopies->push_back(&op.data);
+            *constCopySize += op.data.alignedSize();
+        } else if (op.lifetime == TestOperandLifeTime::CONSTANT_REFERENCE) {
+            loc = {
+                    .poolIndex = 0,
+                    .offset = *constRefSize,
+                    .length = static_cast<int64_t>(op.data.size()),
+            };
+            constReferences->push_back(&op.data);
+            *constRefSize += op.data.alignedSize();
+        } else if (op.lifetime == TestOperandLifeTime::SUBGRAPH) {
+            loc = {
+                    .poolIndex = 0,
+                    .offset = *op.data.get<uint32_t>(),
+                    .length = 0,
+            };
+        }
+
+        std::optional<OperandExtraParams> extraParams;
+        if (op.type == TestOperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
+            using Tag = OperandExtraParams::Tag;
+            extraParams = OperandExtraParams::make<Tag::channelQuant>(SymmPerChannelQuantParams{
+                    .scales = op.channelQuant.scales,
+                    .channelDim = static_cast<int32_t>(op.channelQuant.channelDim)});
+        }
+
+        operands[i] = {.type = static_cast<OperandType>(op.type),
+                       .dimensions = utils::toSigned(op.dimensions).value(),
+                       .scale = op.scale,
+                       .zeroPoint = op.zeroPoint,
+                       .lifetime = static_cast<OperandLifeTime>(op.lifetime),
+                       .location = loc,
+                       .extraParams = std::move(extraParams)};
+    }
+
+    // Operations.
+    std::vector<Operation> operations(testSubgraph.operations.size());
+    std::transform(testSubgraph.operations.begin(), testSubgraph.operations.end(),
+                   operations.begin(), [](const TestOperation& op) -> Operation {
+                       return {.type = static_cast<OperationType>(op.type),
+                               .inputs = utils::toSigned(op.inputs).value(),
+                               .outputs = utils::toSigned(op.outputs).value()};
+                   });
+
+    return {.operands = std::move(operands),
+            .operations = std::move(operations),
+            .inputIndexes = utils::toSigned(testSubgraph.inputIndexes).value(),
+            .outputIndexes = utils::toSigned(testSubgraph.outputIndexes).value()};
+}
+
+void copyTestBuffers(const std::vector<const TestBuffer*>& buffers, uint8_t* output) {
+    uint32_t offset = 0;
+    for (const TestBuffer* buffer : buffers) {
+        const uint8_t* begin = buffer->get<uint8_t>();
+        const uint8_t* end = begin + buffer->size();
+        std::copy(begin, end, output + offset);
+        offset += buffer->alignedSize();
+    }
+}
+
+}  // namespace
+
+void waitForSyncFence(int syncFd) {
+    constexpr int kInfiniteTimeout = -1;
+    ASSERT_GT(syncFd, 0);
+    int r = sync_wait(syncFd, kInfiniteTimeout);
+    ASSERT_GE(r, 0);
+}
+
+Model createModel(const TestModel& testModel) {
+    uint32_t constCopySize = 0;
+    uint32_t constRefSize = 0;
+    std::vector<const TestBuffer*> constCopies;
+    std::vector<const TestBuffer*> constReferences;
+
+    Subgraph mainSubgraph = createSubgraph(testModel.main, &constCopySize, &constCopies,
+                                           &constRefSize, &constReferences);
+    std::vector<Subgraph> refSubgraphs(testModel.referenced.size());
+    std::transform(testModel.referenced.begin(), testModel.referenced.end(), refSubgraphs.begin(),
+                   [&constCopySize, &constCopies, &constRefSize,
+                    &constReferences](const TestSubgraph& testSubgraph) {
+                       return createSubgraph(testSubgraph, &constCopySize, &constCopies,
+                                             &constRefSize, &constReferences);
+                   });
+
+    // Constant copies.
+    std::vector<uint8_t> operandValues(constCopySize);
+    copyTestBuffers(constCopies, operandValues.data());
+
+    // Shared memory.
+    std::vector<nn::Memory> pools = {};
+    if (constRefSize > 0) {
+        const auto pool = nn::createSharedMemory(constRefSize).value();
+        pools.push_back(pool);
+
+        // load data
+        const auto mappedMemory = nn::map(pool).value();
+        uint8_t* mappedPtr = static_cast<uint8_t*>(std::get<void*>(mappedMemory.pointer));
+        CHECK(mappedPtr != nullptr);
+
+        copyTestBuffers(constReferences, mappedPtr);
+    }
+
+    std::vector<Memory> aidlPools;
+    aidlPools.reserve(pools.size());
+    for (auto& pool : pools) {
+        auto aidlPool = utils::convert(pool).value();
+        aidlPools.push_back(std::move(aidlPool));
+    }
+
+    return {.main = std::move(mainSubgraph),
+            .referenced = std::move(refSubgraphs),
+            .operandValues = std::move(operandValues),
+            .pools = std::move(aidlPools),
+            .relaxComputationFloat32toFloat16 = testModel.isRelaxed};
+}
+
+static bool isOutputSizeGreaterThanOne(const TestModel& testModel, uint32_t index) {
+    const auto byteSize = testModel.main.operands[testModel.main.outputIndexes[index]].data.size();
+    return byteSize > 1u;
+}
+
+static void makeOutputInsufficientSize(uint32_t outputIndex, Request* request) {
+    auto& length = request->outputs[outputIndex].location.length;
+    ASSERT_GT(length, 1u);
+    length -= 1u;
+}
+
+static void makeOutputDimensionsUnspecified(Model* model) {
+    for (auto i : model->main.outputIndexes) {
+        auto& dims = model->main.operands[i].dimensions;
+        std::fill(dims.begin(), dims.end(), 0);
+    }
+}
+
+// Manages the lifetime of memory resources used in an execution.
+class ExecutionContext {
+  public:
+    ExecutionContext(std::shared_ptr<IDevice> device, std::shared_ptr<IPreparedModel> preparedModel)
+        : kDevice(std::move(device)), kPreparedModel(std::move(preparedModel)) {}
+
+    std::optional<Request> createRequest(const TestModel& testModel, MemoryType memoryType);
+    std::vector<TestBuffer> getOutputBuffers(const TestModel& testModel,
+                                             const Request& request) const;
+
+  private:
+    // Get a TestBuffer with data copied from an IBuffer object.
+    void getBuffer(const std::shared_ptr<IBuffer>& buffer, size_t size,
+                   TestBuffer* testBuffer) const;
+
+    static constexpr uint32_t kInputPoolIndex = 0;
+    static constexpr uint32_t kOutputPoolIndex = 1;
+    static constexpr uint32_t kDeviceMemoryBeginIndex = 2;
+
+    const std::shared_ptr<IDevice> kDevice;
+    const std::shared_ptr<IPreparedModel> kPreparedModel;
+    std::unique_ptr<TestMemoryBase> mInputMemory, mOutputMemory;
+    std::vector<std::shared_ptr<IBuffer>> mBuffers;
+};
+
+std::optional<Request> ExecutionContext::createRequest(const TestModel& testModel,
+                                                       MemoryType memoryType) {
+    // Memory pools are organized as:
+    // - 0: Input shared memory pool
+    // - 1: Output shared memory pool
+    // - [2, 2+i): Input device memories
+    // - [2+i, 2+i+o): Output device memories
+    DeviceMemoryAllocator allocator(kDevice, kPreparedModel, testModel);
+    std::vector<int32_t> tokens;
+    mBuffers.clear();
+
+    // Model inputs.
+    std::vector<RequestArgument> inputs(testModel.main.inputIndexes.size());
+    size_t inputSize = 0;
+    for (uint32_t i = 0; i < testModel.main.inputIndexes.size(); i++) {
+        const auto& op = testModel.main.operands[testModel.main.inputIndexes[i]];
+        if (op.data.size() == 0) {
+            // Omitted input.
+            inputs[i] = {.hasNoValue = true};
+            continue;
+        } else if (memoryType == MemoryType::DEVICE) {
+            SCOPED_TRACE("Input index = " + std::to_string(i));
+            auto [buffer, token] = allocator.allocate<IOType::INPUT>(i);
+            if (buffer != nullptr) {
+                DataLocation loc = {.poolIndex = static_cast<int32_t>(mBuffers.size() +
+                                                                      kDeviceMemoryBeginIndex)};
+                mBuffers.push_back(std::move(buffer));
+                tokens.push_back(token);
+                inputs[i] = {.hasNoValue = false, .location = loc, .dimensions = {}};
+                continue;
+            }
+        }
+
+        // Reserve shared memory for input.
+        DataLocation loc = {.poolIndex = kInputPoolIndex,
+                            .offset = static_cast<int64_t>(inputSize),
+                            .length = static_cast<int64_t>(op.data.size())};
+        inputSize += op.data.alignedSize();
+        inputs[i] = {.hasNoValue = false, .location = loc, .dimensions = {}};
+    }
+
+    // Model outputs.
+    std::vector<RequestArgument> outputs(testModel.main.outputIndexes.size());
+    size_t outputSize = 0;
+    for (uint32_t i = 0; i < testModel.main.outputIndexes.size(); i++) {
+        const auto& op = testModel.main.operands[testModel.main.outputIndexes[i]];
+        if (memoryType == MemoryType::DEVICE) {
+            SCOPED_TRACE("Output index = " + std::to_string(i));
+            auto [buffer, token] = allocator.allocate<IOType::OUTPUT>(i);
+            if (buffer != nullptr) {
+                DataLocation loc = {.poolIndex = static_cast<int32_t>(mBuffers.size() +
+                                                                      kDeviceMemoryBeginIndex)};
+                mBuffers.push_back(std::move(buffer));
+                tokens.push_back(token);
+                outputs[i] = {.hasNoValue = false, .location = loc, .dimensions = {}};
+                continue;
+            }
+        }
+
+        // In the case of zero-sized output, we should at least provide a one-byte buffer.
+        // This is because zero-sized tensors are only supported internally to the driver, or
+        // reported in output shapes. It is illegal for the client to pre-specify a zero-sized
+        // tensor as model output. Otherwise, we will have two semantic conflicts:
+        // - "Zero dimension" conflicts with "unspecified dimension".
+        // - "Omitted operand buffer" conflicts with "zero-sized operand buffer".
+        size_t bufferSize = std::max<size_t>(op.data.size(), 1);
+
+        // Reserve shared memory for output.
+        DataLocation loc = {.poolIndex = kOutputPoolIndex,
+                            .offset = static_cast<int64_t>(outputSize),
+                            .length = static_cast<int64_t>(bufferSize)};
+        outputSize += op.data.size() == 0 ? TestBuffer::kAlignment : op.data.alignedSize();
+        outputs[i] = {.hasNoValue = false, .location = loc, .dimensions = {}};
+    }
+
+    if (memoryType == MemoryType::DEVICE && mBuffers.empty()) {
+        return std::nullopt;
+    }
+
+    // Memory pools.
+    if (memoryType == MemoryType::BLOB_AHWB) {
+        mInputMemory = TestBlobAHWB::create(std::max<size_t>(inputSize, 1));
+        mOutputMemory = TestBlobAHWB::create(std::max<size_t>(outputSize, 1));
+    } else {
+        mInputMemory = TestAshmem::create(std::max<size_t>(inputSize, 1));
+        mOutputMemory = TestAshmem::create(std::max<size_t>(outputSize, 1));
+    }
+    CHECK_NE(mInputMemory, nullptr);
+    CHECK_NE(mOutputMemory, nullptr);
+    std::vector<RequestMemoryPool> pools;
+    pools.reserve(kDeviceMemoryBeginIndex + mBuffers.size());
+
+    auto copiedInputMemory = utils::clone(*mInputMemory->getAidlMemory());
+    CHECK(copiedInputMemory.has_value()) << copiedInputMemory.error().message;
+    auto copiedOutputMemory = utils::clone(*mOutputMemory->getAidlMemory());
+    CHECK(copiedOutputMemory.has_value()) << copiedOutputMemory.error().message;
+
+    pools.push_back(RequestMemoryPool::make<RequestMemoryPool::Tag::pool>(
+            std::move(copiedInputMemory).value()));
+    pools.push_back(RequestMemoryPool::make<RequestMemoryPool::Tag::pool>(
+            std::move(copiedOutputMemory).value()));
+    for (const auto& token : tokens) {
+        pools.push_back(RequestMemoryPool::make<RequestMemoryPool::Tag::token>(token));
+    }
+
+    // Copy input data to the input shared memory pool.
+    uint8_t* inputPtr = mInputMemory->getPointer();
+    for (uint32_t i = 0; i < testModel.main.inputIndexes.size(); i++) {
+        if (!inputs[i].hasNoValue && inputs[i].location.poolIndex == kInputPoolIndex) {
+            const auto& op = testModel.main.operands[testModel.main.inputIndexes[i]];
+            const uint8_t* begin = op.data.get<uint8_t>();
+            const uint8_t* end = begin + op.data.size();
+            std::copy(begin, end, inputPtr + inputs[i].location.offset);
+        }
+    }
+    return Request{
+            .inputs = std::move(inputs), .outputs = std::move(outputs), .pools = std::move(pools)};
+}
+
+std::vector<TestBuffer> ExecutionContext::getOutputBuffers(const TestModel& testModel,
+                                                           const Request& request) const {
+    // Copy out output results.
+    uint8_t* outputPtr = mOutputMemory->getPointer();
+    std::vector<TestBuffer> outputBuffers;
+    for (uint32_t i = 0; i < request.outputs.size(); i++) {
+        const auto& outputLoc = request.outputs[i].location;
+        if (outputLoc.poolIndex == kOutputPoolIndex) {
+            outputBuffers.emplace_back(outputLoc.length, outputPtr + outputLoc.offset);
+        } else {
+            const auto& op = testModel.main.operands[testModel.main.outputIndexes[i]];
+            if (op.data.size() == 0) {
+                outputBuffers.emplace_back(0, nullptr);
+            } else {
+                SCOPED_TRACE("Output index = " + std::to_string(i));
+                const uint32_t bufferIndex = outputLoc.poolIndex - kDeviceMemoryBeginIndex;
+                TestBuffer buffer;
+                getBuffer(mBuffers[bufferIndex], op.data.size(), &buffer);
+                outputBuffers.push_back(std::move(buffer));
+            }
+        }
+    }
+    return outputBuffers;
+}
+
+// Get a TestBuffer with data copied from an IBuffer object.
+void ExecutionContext::getBuffer(const std::shared_ptr<IBuffer>& buffer, size_t size,
+                                 TestBuffer* testBuffer) const {
+    // IBuffer -> Shared memory.
+    auto sharedMemory = nn::createSharedMemory(size).value();
+    auto aidlMemory = utils::convert(sharedMemory).value();
+    const auto ret = buffer->copyTo(aidlMemory);
+    ASSERT_TRUE(ret.isOk());
+
+    // Shared memory -> TestBuffer.
+    const auto outputMemory = nn::map(sharedMemory).value();
+    const uint8_t* outputPtr = std::visit(
+            [](auto* ptr) { return static_cast<const uint8_t*>(ptr); }, outputMemory.pointer);
+    ASSERT_NE(outputPtr, nullptr);
+    ASSERT_NE(testBuffer, nullptr);
+    *testBuffer = TestBuffer(size, outputPtr);
+}
+
+static bool hasZeroSizedOutput(const TestModel& testModel) {
+    return std::any_of(testModel.main.outputIndexes.begin(), testModel.main.outputIndexes.end(),
+                       [&testModel](uint32_t index) {
+                           return testModel.main.operands[index].data.size() == 0;
+                       });
+}
+
+void EvaluatePreparedModel(const std::shared_ptr<IDevice>& device,
+                           const std::shared_ptr<IPreparedModel>& preparedModel,
+                           const TestModel& testModel, const TestConfig& testConfig,
+                           bool* skipped = nullptr) {
+    if (skipped != nullptr) {
+        *skipped = false;
+    }
+    // If output0 does not have size larger than one byte, we can not test with insufficient buffer.
+    if (testConfig.outputType == OutputType::INSUFFICIENT &&
+        !isOutputSizeGreaterThanOne(testModel, 0)) {
+        return;
+    }
+
+    ExecutionContext context(device, preparedModel);
+    auto maybeRequest = context.createRequest(testModel, testConfig.memoryType);
+    // Skip if testing memory domain but no device memory has been allocated.
+    if (!maybeRequest.has_value()) {
+        return;
+    }
+
+    Request request = std::move(maybeRequest).value();
+
+    constexpr uint32_t kInsufficientOutputIndex = 0;
+    if (testConfig.outputType == OutputType::INSUFFICIENT) {
+        makeOutputInsufficientSize(kInsufficientOutputIndex, &request);
+    }
+
+    int64_t loopTimeoutDuration = kOmittedTimeoutDuration;
+    // OutputType::MISSED_DEADLINE is only used by
+    // TestKind::INTINITE_LOOP_TIMEOUT tests to verify that an infinite loop is
+    // aborted after a timeout.
+    if (testConfig.outputType == OutputType::MISSED_DEADLINE) {
+        // Override the default loop timeout duration with a small value to
+        // speed up test execution.
+        constexpr int64_t kMillisecond = 1'000'000;
+        loopTimeoutDuration = 1 * kMillisecond;
+    }
+
+    ErrorStatus executionStatus;
+    std::vector<OutputShape> outputShapes;
+    Timing timing = kNoTiming;
+    switch (testConfig.executor) {
+        case Executor::SYNC: {
+            SCOPED_TRACE("synchronous");
+
+            ExecutionResult executionResult;
+            // execute
+            const auto ret = preparedModel->executeSynchronously(request, testConfig.measureTiming,
+                                                                 kNoDeadline, loopTimeoutDuration,
+                                                                 &executionResult);
+            ASSERT_TRUE(ret.isOk() || ret.getExceptionCode() == EX_SERVICE_SPECIFIC)
+                    << ret.getDescription();
+            if (ret.isOk()) {
+                executionStatus = executionResult.outputSufficientSize
+                                          ? ErrorStatus::NONE
+                                          : ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
+                outputShapes = std::move(executionResult.outputShapes);
+                timing = executionResult.timing;
+            } else {
+                executionStatus = static_cast<ErrorStatus>(ret.getServiceSpecificError());
+            }
+            break;
+        }
+        case Executor::FENCED: {
+            SCOPED_TRACE("fenced");
+            ErrorStatus result = ErrorStatus::NONE;
+            ndk::ScopedFileDescriptor syncFenceFd;
+            std::shared_ptr<IFencedExecutionCallback> fencedCallback;
+            auto ret = preparedModel->executeFenced(request, {}, testConfig.measureTiming,
+                                                    kNoDeadline, loopTimeoutDuration, kNoDuration,
+                                                    &syncFenceFd, &fencedCallback);
+            ASSERT_TRUE(ret.isOk() || ret.getExceptionCode() == EX_SERVICE_SPECIFIC)
+                    << ret.getDescription();
+            if (!ret.isOk()) {
+                result = static_cast<ErrorStatus>(ret.getServiceSpecificError());
+                executionStatus = result;
+            } else if (syncFenceFd.get() != -1) {
+                std::vector<ndk::ScopedFileDescriptor> waitFor;
+                auto dupFd = dup(syncFenceFd.get());
+                ASSERT_NE(dupFd, -1);
+                waitFor.emplace_back(dupFd);
+                // If a sync fence is returned, try start another run waiting for the sync fence.
+                ret = preparedModel->executeFenced(request, waitFor, testConfig.measureTiming,
+                                                   kNoDeadline, loopTimeoutDuration, kNoDuration,
+                                                   &syncFenceFd, &fencedCallback);
+                ASSERT_TRUE(ret.isOk());
+                waitForSyncFence(syncFenceFd.get());
+            }
+            if (result == ErrorStatus::NONE) {
+                ASSERT_NE(fencedCallback, nullptr);
+                Timing timingFenced;
+                auto ret =
+                        fencedCallback->getExecutionInfo(&timing, &timingFenced, &executionStatus);
+                ASSERT_TRUE(ret.isOk());
+            }
+            break;
+        }
+        default: {
+            FAIL() << "Unsupported execution mode for AIDL interface.";
+        }
+    }
+
+    if (testConfig.outputType != OutputType::FULLY_SPECIFIED &&
+        executionStatus == ErrorStatus::GENERAL_FAILURE) {
+        if (skipped != nullptr) {
+            *skipped = true;
+        }
+        if (!testConfig.reportSkipping) {
+            return;
+        }
+        LOG(INFO) << "NN VTS: Early termination of test because vendor service cannot "
+                     "execute model that it does not support.";
+        std::cout << "[          ]   Early termination of test because vendor service cannot "
+                     "execute model that it does not support."
+                  << std::endl;
+        GTEST_SKIP();
+    }
+    if (!testConfig.measureTiming) {
+        EXPECT_EQ(timing, kNoTiming);
+    } else {
+        if (timing.timeOnDevice != -1 && timing.timeInDriver != -1) {
+            EXPECT_LE(timing.timeOnDevice, timing.timeInDriver);
+        }
+    }
+
+    switch (testConfig.outputType) {
+        case OutputType::FULLY_SPECIFIED:
+            if (testConfig.executor == Executor::FENCED && hasZeroSizedOutput(testModel)) {
+                // Executor::FENCED does not support zero-sized output.
+                ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, executionStatus);
+                return;
+            }
+            // If the model output operands are fully specified, outputShapes must be either
+            // either empty, or have the same number of elements as the number of outputs.
+            ASSERT_EQ(ErrorStatus::NONE, executionStatus);
+            ASSERT_TRUE(outputShapes.size() == 0 ||
+                        outputShapes.size() == testModel.main.outputIndexes.size());
+            break;
+        case OutputType::UNSPECIFIED:
+            if (testConfig.executor == Executor::FENCED) {
+                // For Executor::FENCED, the output shape must be fully specified.
+                ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, executionStatus);
+                return;
+            }
+            // If the model output operands are not fully specified, outputShapes must have
+            // the same number of elements as the number of outputs.
+            ASSERT_EQ(ErrorStatus::NONE, executionStatus);
+            ASSERT_EQ(outputShapes.size(), testModel.main.outputIndexes.size());
+            break;
+        case OutputType::INSUFFICIENT:
+            if (testConfig.executor == Executor::FENCED) {
+                // For Executor::FENCED, the output shape must be fully specified.
+                ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, executionStatus);
+                return;
+            }
+            ASSERT_EQ(ErrorStatus::OUTPUT_INSUFFICIENT_SIZE, executionStatus);
+            ASSERT_EQ(outputShapes.size(), testModel.main.outputIndexes.size());
+            // Check that all returned output dimensions are at least as fully specified as the
+            // union of the information about the corresponding operand in the model and in the
+            // request. In this test, all model outputs have known rank with all dimensions
+            // unspecified, and no dimensional information is provided in the request.
+            for (uint32_t i = 0; i < outputShapes.size(); i++) {
+                ASSERT_EQ(outputShapes[i].isSufficient, i != kInsufficientOutputIndex);
+                const auto& actual = outputShapes[i].dimensions;
+                const auto& golden =
+                        testModel.main.operands[testModel.main.outputIndexes[i]].dimensions;
+                ASSERT_EQ(actual.size(), golden.size());
+                for (uint32_t j = 0; j < actual.size(); j++) {
+                    if (actual[j] == 0) continue;
+                    EXPECT_EQ(actual[j], golden[j]) << "index: " << j;
+                }
+            }
+            return;
+        case OutputType::MISSED_DEADLINE:
+            ASSERT_TRUE(executionStatus == ErrorStatus::MISSED_DEADLINE_TRANSIENT ||
+                        executionStatus == ErrorStatus::MISSED_DEADLINE_PERSISTENT)
+                    << "executionStatus = " << executionStatus;
+            return;
+    }
+
+    // Go through all outputs, check returned output shapes.
+    for (uint32_t i = 0; i < outputShapes.size(); i++) {
+        EXPECT_TRUE(outputShapes[i].isSufficient);
+        const auto& expect = testModel.main.operands[testModel.main.outputIndexes[i]].dimensions;
+        const auto unsignedActual = nn::toUnsigned(outputShapes[i].dimensions);
+        ASSERT_TRUE(unsignedActual.has_value());
+        const std::vector<uint32_t>& actual = unsignedActual.value();
+        EXPECT_EQ(expect, actual);
+    }
+
+    // Retrieve execution results.
+    const std::vector<TestBuffer> outputs = context.getOutputBuffers(testModel, request);
+
+    // We want "close-enough" results.
+    checkResults(testModel, outputs);
+}
+
+void EvaluatePreparedModel(const std::shared_ptr<IDevice>& device,
+                           const std::shared_ptr<IPreparedModel>& preparedModel,
+                           const TestModel& testModel, TestKind testKind) {
+    std::vector<OutputType> outputTypesList;
+    std::vector<bool> measureTimingList;
+    std::vector<Executor> executorList;
+    std::vector<MemoryType> memoryTypeList;
+
+    switch (testKind) {
+        case TestKind::GENERAL: {
+            outputTypesList = {OutputType::FULLY_SPECIFIED};
+            measureTimingList = {false, true};
+            executorList = {Executor::SYNC};
+            memoryTypeList = {MemoryType::ASHMEM};
+        } break;
+        case TestKind::DYNAMIC_SHAPE: {
+            outputTypesList = {OutputType::UNSPECIFIED, OutputType::INSUFFICIENT};
+            measureTimingList = {false, true};
+            executorList = {Executor::SYNC, Executor::FENCED};
+            memoryTypeList = {MemoryType::ASHMEM};
+        } break;
+        case TestKind::MEMORY_DOMAIN: {
+            outputTypesList = {OutputType::FULLY_SPECIFIED};
+            measureTimingList = {false};
+            executorList = {Executor::SYNC, Executor::FENCED};
+            memoryTypeList = {MemoryType::BLOB_AHWB, MemoryType::DEVICE};
+        } break;
+        case TestKind::FENCED_COMPUTE: {
+            outputTypesList = {OutputType::FULLY_SPECIFIED};
+            measureTimingList = {false, true};
+            executorList = {Executor::FENCED};
+            memoryTypeList = {MemoryType::ASHMEM};
+        } break;
+        case TestKind::QUANTIZATION_COUPLING: {
+            LOG(FATAL) << "Wrong TestKind for EvaluatePreparedModel";
+            return;
+        } break;
+        case TestKind::INTINITE_LOOP_TIMEOUT: {
+            outputTypesList = {OutputType::MISSED_DEADLINE};
+            measureTimingList = {false, true};
+            executorList = {Executor::SYNC, Executor::FENCED};
+            memoryTypeList = {MemoryType::ASHMEM};
+        } break;
+    }
+
+    for (const OutputType outputType : outputTypesList) {
+        for (const bool measureTiming : measureTimingList) {
+            for (const Executor executor : executorList) {
+                for (const MemoryType memoryType : memoryTypeList) {
+                    const TestConfig testConfig(executor, measureTiming, outputType, memoryType);
+                    EvaluatePreparedModel(device, preparedModel, testModel, testConfig);
+                }
+            }
+        }
+    }
+}
+
+void EvaluatePreparedCoupledModels(const std::shared_ptr<IDevice>& device,
+                                   const std::shared_ptr<IPreparedModel>& preparedModel,
+                                   const TestModel& testModel,
+                                   const std::shared_ptr<IPreparedModel>& preparedCoupledModel,
+                                   const TestModel& coupledModel) {
+    const std::vector<OutputType> outputTypesList = {OutputType::FULLY_SPECIFIED};
+    const std::vector<bool> measureTimingList = {false, true};
+    const std::vector<Executor> executorList = {Executor::SYNC, Executor::FENCED};
+
+    for (const OutputType outputType : outputTypesList) {
+        for (const bool measureTiming : measureTimingList) {
+            for (const Executor executor : executorList) {
+                const TestConfig testConfig(executor, measureTiming, outputType, MemoryType::ASHMEM,
+                                            /*reportSkipping=*/false);
+                bool baseSkipped = false;
+                EvaluatePreparedModel(device, preparedModel, testModel, testConfig, &baseSkipped);
+                bool coupledSkipped = false;
+                EvaluatePreparedModel(device, preparedCoupledModel, coupledModel, testConfig,
+                                      &coupledSkipped);
+                ASSERT_EQ(baseSkipped, coupledSkipped);
+                if (baseSkipped) {
+                    LOG(INFO) << "NN VTS: Early termination of test because vendor service cannot "
+                                 "execute model that it does not support.";
+                    std::cout << "[          ]   Early termination of test because vendor service "
+                                 "cannot "
+                                 "execute model that it does not support."
+                              << std::endl;
+                    GTEST_SKIP();
+                }
+            }
+        }
+    }
+}
+
+void Execute(const std::shared_ptr<IDevice>& device, const TestModel& testModel,
+             TestKind testKind) {
+    Model model = createModel(testModel);
+    if (testKind == TestKind::DYNAMIC_SHAPE) {
+        makeOutputDimensionsUnspecified(&model);
+    }
+
+    std::shared_ptr<IPreparedModel> preparedModel;
+    switch (testKind) {
+        case TestKind::GENERAL:
+        case TestKind::DYNAMIC_SHAPE:
+        case TestKind::MEMORY_DOMAIN:
+        case TestKind::FENCED_COMPUTE:
+        case TestKind::INTINITE_LOOP_TIMEOUT: {
+            createPreparedModel(device, model, &preparedModel);
+            if (preparedModel == nullptr) return;
+            EvaluatePreparedModel(device, preparedModel, testModel, testKind);
+        } break;
+        case TestKind::QUANTIZATION_COUPLING: {
+            ASSERT_TRUE(testModel.hasQuant8CoupledOperands());
+            createPreparedModel(device, model, &preparedModel,
+                                /*reportSkipping*/ false);
+            TestModel signedQuantizedModel = convertQuant8AsymmOperandsToSigned(testModel);
+            std::shared_ptr<IPreparedModel> preparedCoupledModel;
+            createPreparedModel(device, createModel(signedQuantizedModel), &preparedCoupledModel,
+                                /*reportSkipping*/ false);
+            // If we couldn't prepare a model with unsigned quantization, we must
+            // fail to prepare a model with signed quantization as well.
+            if (preparedModel == nullptr) {
+                ASSERT_EQ(preparedCoupledModel, nullptr);
+                // If we failed to prepare both of the models, we can safely skip
+                // the test.
+                LOG(INFO) << "NN VTS: Early termination of test because vendor service cannot "
+                             "prepare model that it does not support.";
+                std::cout
+                        << "[          ]   Early termination of test because vendor service cannot "
+                           "prepare model that it does not support."
+                        << std::endl;
+                GTEST_SKIP();
+            }
+            ASSERT_NE(preparedCoupledModel, nullptr);
+            EvaluatePreparedCoupledModels(device, preparedModel, testModel, preparedCoupledModel,
+                                          signedQuantizedModel);
+        } break;
+    }
+}
+
+void GeneratedTestBase::SetUp() {
+    testing::TestWithParam<GeneratedTestParam>::SetUp();
+    ASSERT_NE(kDevice, nullptr);
+}
+
+std::vector<NamedModel> getNamedModels(const FilterFn& filter) {
+    return TestModelManager::get().getTestModels(filter);
+}
+
+std::vector<NamedModel> getNamedModels(const FilterNameFn& filter) {
+    return TestModelManager::get().getTestModels(filter);
+}
+
+std::string printGeneratedTest(const testing::TestParamInfo<GeneratedTestParam>& info) {
+    const auto& [namedDevice, namedModel] = info.param;
+    return gtestCompliantName(getName(namedDevice) + "_" + getName(namedModel));
+}
+
+// Tag for the generated tests
+class GeneratedTest : public GeneratedTestBase {};
+
+// Tag for the dynamic output shape tests
+class DynamicOutputShapeTest : public GeneratedTest {};
+
+// Tag for the memory domain tests
+class MemoryDomainTest : public GeneratedTest {};
+
+// Tag for the fenced compute tests
+class FencedComputeTest : public GeneratedTest {};
+
+// Tag for the dynamic output shape tests
+class QuantizationCouplingTest : public GeneratedTest {};
+
+// Tag for the loop timeout tests
+class InfiniteLoopTimeoutTest : public GeneratedTest {};
+
+TEST_P(GeneratedTest, Test) {
+    Execute(kDevice, kTestModel, TestKind::GENERAL);
+}
+
+TEST_P(DynamicOutputShapeTest, Test) {
+    Execute(kDevice, kTestModel, TestKind::DYNAMIC_SHAPE);
+}
+
+TEST_P(MemoryDomainTest, Test) {
+    Execute(kDevice, kTestModel, TestKind::MEMORY_DOMAIN);
+}
+
+TEST_P(FencedComputeTest, Test) {
+    Execute(kDevice, kTestModel, TestKind::FENCED_COMPUTE);
+}
+
+TEST_P(QuantizationCouplingTest, Test) {
+    Execute(kDevice, kTestModel, TestKind::QUANTIZATION_COUPLING);
+}
+
+TEST_P(InfiniteLoopTimeoutTest, Test) {
+    Execute(kDevice, kTestModel, TestKind::INTINITE_LOOP_TIMEOUT);
+}
+
+INSTANTIATE_GENERATED_TEST(GeneratedTest,
+                           [](const TestModel& testModel) { return !testModel.expectFailure; });
+
+INSTANTIATE_GENERATED_TEST(DynamicOutputShapeTest, [](const TestModel& testModel) {
+    return !testModel.expectFailure && !testModel.hasScalarOutputs();
+});
+
+INSTANTIATE_GENERATED_TEST(MemoryDomainTest,
+                           [](const TestModel& testModel) { return !testModel.expectFailure; });
+
+INSTANTIATE_GENERATED_TEST(FencedComputeTest,
+                           [](const TestModel& testModel) { return !testModel.expectFailure; });
+
+INSTANTIATE_GENERATED_TEST(QuantizationCouplingTest, [](const TestModel& testModel) {
+    return !testModel.expectFailure && testModel.hasQuant8CoupledOperands() &&
+           testModel.main.operations.size() == 1;
+});
+
+INSTANTIATE_GENERATED_TEST(InfiniteLoopTimeoutTest, [](const TestModel& testModel) {
+    return testModel.isInfiniteLoopTimeoutTest();
+});
+
+}  // namespace aidl::android::hardware::neuralnetworks::vts::functional
diff --git a/neuralnetworks/aidl/vts/functional/GeneratedTestHarness.h b/neuralnetworks/aidl/vts/functional/GeneratedTestHarness.h
new file mode 100644
index 0000000000..ad40f06874
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/GeneratedTestHarness.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_HARDWARE_NEURALNETWORKS_AIDL_GENERATED_TEST_HARNESS_H
+#define ANDROID_HARDWARE_NEURALNETWORKS_AIDL_GENERATED_TEST_HARNESS_H
+
+#include <functional>
+#include <vector>
+
+#include <TestHarness.h>
+#include "Utils.h"
+#include "VtsHalNeuralnetworks.h"
+
+namespace aidl::android::hardware::neuralnetworks::vts::functional {
+
+using NamedModel = Named<const test_helper::TestModel*>;
+using GeneratedTestParam = std::tuple<NamedDevice, NamedModel>;
+
+class GeneratedTestBase : public testing::TestWithParam<GeneratedTestParam> {
+  protected:
+    void SetUp() override;
+    const std::shared_ptr<IDevice> kDevice = getData(std::get<NamedDevice>(GetParam()));
+    const test_helper::TestModel& kTestModel = *getData(std::get<NamedModel>(GetParam()));
+};
+
+using FilterFn = std::function<bool(const test_helper::TestModel&)>;
+std::vector<NamedModel> getNamedModels(const FilterFn& filter);
+
+using FilterNameFn = std::function<bool(const std::string&)>;
+std::vector<NamedModel> getNamedModels(const FilterNameFn& filter);
+
+std::string printGeneratedTest(const testing::TestParamInfo<GeneratedTestParam>& info);
+
+#define INSTANTIATE_GENERATED_TEST(TestSuite, filter)                                     \
+    GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(TestSuite);                             \
+    INSTANTIATE_TEST_SUITE_P(TestGenerated, TestSuite,                                    \
+                             testing::Combine(testing::ValuesIn(getNamedDevices()),       \
+                                              testing::ValuesIn(getNamedModels(filter))), \
+                             printGeneratedTest)
+
+// Tag for the validation tests, instantiated in VtsHalNeuralnetworks.cpp.
+// TODO: Clean up the hierarchy for ValidationTest.
+class ValidationTest : public GeneratedTestBase {};
+
+Model createModel(const test_helper::TestModel& testModel);
+
+void PrepareModel(const std::shared_ptr<IDevice>& device, const Model& model,
+                  std::shared_ptr<IPreparedModel>* preparedModel);
+
+enum class TestKind {
+    // Runs a test model and compares the results to a golden data
+    GENERAL,
+    // Same as GENERAL but sets dimensions for the output tensors to zeros
+    DYNAMIC_SHAPE,
+    // Same as GENERAL but use device memories for inputs and outputs
+    MEMORY_DOMAIN,
+    // Same as GENERAL but use executeFenced for exeuction
+    FENCED_COMPUTE,
+    // Tests if quantized model with TENSOR_QUANT8_ASYMM produces the same result
+    // (OK/SKIPPED/FAILED) as the model with all such tensors converted to
+    // TENSOR_QUANT8_ASYMM_SIGNED.
+    QUANTIZATION_COUPLING,
+    // Runs a test model and verifies that MISSED_DEADLINE_* is returned.
+    INTINITE_LOOP_TIMEOUT
+};
+
+void EvaluatePreparedModel(const std::shared_ptr<IDevice>& device,
+                           const std::shared_ptr<IPreparedModel>& preparedModel,
+                           const test_helper::TestModel& testModel, TestKind testKind);
+
+void waitForSyncFence(int syncFd);
+
+}  // namespace aidl::android::hardware::neuralnetworks::vts::functional
+
+#endif  // ANDROID_HARDWARE_NEURALNETWORKS_AIDL_GENERATED_TEST_HARNESS_H
diff --git a/neuralnetworks/aidl/vts/functional/LogTestCaseToLogcat.h b/neuralnetworks/aidl/vts/functional/LogTestCaseToLogcat.h
new file mode 100644
index 0000000000..c9fd432a43
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/LogTestCaseToLogcat.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_HARDWARE_NEURALNETWORKS_AIDL_LOG_TEST_CASE_TO_LOGCAT_H
+#define ANDROID_HARDWARE_NEURALNETWORKS_AIDL_LOG_TEST_CASE_TO_LOGCAT_H
+
+#include <android-base/logging.h>
+#include <gtest/gtest.h>
+
+namespace aidl::android::hardware::neuralnetworks {
+
+class LogTestCaseToLogcat : public ::testing::EmptyTestEventListener {
+  public:
+    void OnTestStart(const ::testing::TestInfo& test_info) override {
+        LOG(INFO) << "[Test Case] " << test_info.test_suite_name() << "." << test_info.name()
+                  << " BEGIN";
+    }
+
+    void OnTestEnd(const ::testing::TestInfo& test_info) override {
+        LOG(INFO) << "[Test Case] " << test_info.test_suite_name() << "." << test_info.name()
+                  << " END";
+    }
+};
+
+}  // namespace aidl::android::hardware::neuralnetworks
+
+#endif  // ANDROID_HARDWARE_NEURALNETWORKS_AIDL_LOG_TEST_CASE_TO_LOGCAT_H
diff --git a/neuralnetworks/aidl/vts/functional/MemoryDomainTests.cpp b/neuralnetworks/aidl/vts/functional/MemoryDomainTests.cpp
new file mode 100644
index 0000000000..a37a0caa29
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/MemoryDomainTests.cpp
@@ -0,0 +1,1176 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "neuralnetworks_aidl_hal_test"
+
+#include <android-base/logging.h>
+#include <android/binder_auto_utils.h>
+#include <android/binder_interface_utils.h>
+#include <android/binder_status.h>
+#include <gtest/gtest.h>
+
+#include <LegacyUtils.h>
+#include <TestHarness.h>
+#include <Utils.h>
+#include <nnapi/SharedMemory.h>
+#include <nnapi/hal/aidl/Conversions.h>
+#include <nnapi/hal/aidl/Utils.h>
+
+#include "AidlHalInterfaces.h"
+#include "Callbacks.h"
+#include "GeneratedTestHarness.h"
+#include "MemoryUtils.h"
+#include "Utils.h"
+#include "VtsHalNeuralnetworks.h"
+
+namespace aidl::android::hardware::neuralnetworks::vts::functional {
+
+using namespace test_helper;
+using implementation::PreparedModelCallback;
+
+namespace {
+
+// An AIDL driver is likely to support at least one of the following operand types.
+const std::vector<TestOperandType> kTestOperandTypeChoicesVector = {
+        TestOperandType::TENSOR_FLOAT32,
+        TestOperandType::TENSOR_FLOAT16,
+        TestOperandType::TENSOR_QUANT8_ASYMM,
+        TestOperandType::TENSOR_QUANT8_ASYMM_SIGNED,
+};
+const auto kTestOperandTypeChoices = testing::ValuesIn(kTestOperandTypeChoicesVector);
+// TODO(b/179270601): restore kNamedDeviceChoices
+
+bool isInChoices(TestOperandType type) {
+    return std::count(kTestOperandTypeChoicesVector.begin(), kTestOperandTypeChoicesVector.end(),
+                      type) > 0;
+}
+
+bool isFloat(TestOperandType type) {
+    CHECK(isInChoices(type));
+    return type == TestOperandType::TENSOR_FLOAT32 || type == TestOperandType::TENSOR_FLOAT16;
+}
+
+// Create placeholder buffers for model constants as well as inputs and outputs.
+// We only care about the size here because we will not check accuracy in validation tests.
+void createDummyData(TestModel* testModel) {
+    for (auto& operand : testModel->main.operands) {
+        if (operand.data != nullptr) continue;
+        switch (operand.lifetime) {
+            case TestOperandLifeTime::SUBGRAPH_INPUT:
+            case TestOperandLifeTime::SUBGRAPH_OUTPUT:
+            case TestOperandLifeTime::CONSTANT_COPY:
+            case TestOperandLifeTime::CONSTANT_REFERENCE: {
+                const uint32_t size = nn::nonExtensionOperandSizeOfData(
+                        static_cast<nn::OperandType>(operand.type), operand.dimensions);
+                operand.data = TestBuffer(size);
+            } break;
+            default:
+                break;
+        }
+    }
+}
+
+TestOperand createInt32Scalar(int32_t value) {
+    return {
+            .type = TestOperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = TestOperandLifeTime::CONSTANT_COPY,
+            .data = TestBuffer::createFromVector<int32_t>({value}),
+    };
+}
+
+// Construct a test model with multiple CONV_2D operations with the given operand as inputs.
+// The dimensions of the filters are chosen to ensure outputs has the same dimensions as inputs.
+// We choose CONV_2D operation because it is commonly supported by most drivers.
+TestModel createConvModel(const TestOperand& operand, uint32_t numOperations) {
+    CHECK(isInChoices(operand.type));
+
+    TestOperand weight = {.type = operand.type,
+                          .dimensions = {operand.dimensions[3], 3, 3, operand.dimensions[3]},
+                          .numberOfConsumers = 1,
+                          .scale = isFloat(operand.type) ? 0.0f : 1.0f,
+                          .zeroPoint = 0,
+                          .lifetime = TestOperandLifeTime::CONSTANT_COPY};
+
+    TestOperand bias = {
+            .type = isFloat(operand.type) ? operand.type : TestOperandType::TENSOR_INT32,
+            .dimensions = {operand.dimensions[3]},
+            .numberOfConsumers = 1,
+            .scale = operand.scale * weight.scale,
+            .zeroPoint = 0,
+            .lifetime = TestOperandLifeTime::CONSTANT_COPY};
+
+    TestOperand output = operand;
+    output.numberOfConsumers = 0;
+    output.lifetime = TestOperandLifeTime::SUBGRAPH_OUTPUT;
+
+    const std::vector<TestOperand> operands = {
+            operand,
+            std::move(weight),
+            std::move(bias),
+            createInt32Scalar(1),  // same padding
+            createInt32Scalar(1),  // width stride
+            createInt32Scalar(1),  // height stride
+            createInt32Scalar(0),  // activation = NONE
+            std::move(output),
+    };
+
+    TestModel model;
+    for (uint32_t i = 0; i < numOperations; i++) {
+        model.main.operands.insert(model.main.operands.end(), operands.begin(), operands.end());
+        const uint32_t inputIndex = operands.size() * i;
+        const uint32_t outputIndex = inputIndex + operands.size() - 1;
+        std::vector<uint32_t> inputs(operands.size() - 1);
+        std::iota(inputs.begin(), inputs.end(), inputIndex);
+        model.main.operations.push_back({.type = TestOperationType::CONV_2D,
+                                         .inputs = std::move(inputs),
+                                         .outputs = {outputIndex}});
+        model.main.inputIndexes.push_back(inputIndex);
+        model.main.outputIndexes.push_back(outputIndex);
+    }
+    createDummyData(&model);
+    return model;
+}
+
+// Construct a test model with a single ADD operation with the given operand as input0 and input1.
+// This is to cover additional cases that the CONV_2D model does not support, e.g. arbitrary input
+// operand rank, scalar input operand. We choose ADD operation because it is commonly supported by
+// most drivers.
+TestModel createSingleAddModel(const TestOperand& operand) {
+    CHECK(isInChoices(operand.type));
+
+    TestOperand act = {
+            .type = TestOperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = TestOperandLifeTime::SUBGRAPH_INPUT,
+    };
+
+    TestOperand output = operand;
+    output.numberOfConsumers = 0;
+    output.lifetime = TestOperandLifeTime::SUBGRAPH_OUTPUT;
+
+    TestModel model = {
+            .main =
+                    {
+                            .operands =
+                                    {
+                                            operand,
+                                            operand,
+                                            std::move(act),
+                                            output,
+                                    },
+                            .operations = {{.type = TestOperationType::ADD,
+                                            .inputs = {0, 1, 2},
+                                            .outputs = {3}}},
+                            .inputIndexes = {0, 1, 2},
+                            .outputIndexes = {3},
+                    },
+    };
+    createDummyData(&model);
+    return model;
+}
+
+// A placeholder invalid IPreparedModel class for MemoryDomainAllocateTest.InvalidPreparedModel
+class InvalidPreparedModel : public BnPreparedModel {
+  public:
+    ndk::ScopedAStatus executeSynchronously(const Request&, bool, int64_t, int64_t,
+                                            ExecutionResult*) override {
+        return ndk::ScopedAStatus::fromServiceSpecificError(
+                static_cast<int32_t>(ErrorStatus::GENERAL_FAILURE));
+    }
+    ndk::ScopedAStatus executeFenced(const Request&, const std::vector<ndk::ScopedFileDescriptor>&,
+                                     bool, int64_t, int64_t, int64_t, ndk::ScopedFileDescriptor*,
+                                     std::shared_ptr<IFencedExecutionCallback>*) override {
+        return ndk::ScopedAStatus::fromServiceSpecificError(
+                static_cast<int32_t>(ErrorStatus::GENERAL_FAILURE));
+    }
+};
+
+template <typename... Args>
+std::vector<RequestMemoryPool> createRequestMemoryPools(const Args&... pools) {
+    std::vector<RequestMemoryPool> memoryPools;
+    memoryPools.reserve(sizeof...(Args));
+    // This fold operator calls push_back on each of the function arguments.
+    (memoryPools.push_back(utils::clone(pools).value()), ...);
+    return memoryPools;
+};
+
+}  // namespace
+
+class MemoryDomainTestBase : public testing::Test {
+  protected:
+    MemoryDomainTestBase(std::shared_ptr<IDevice> device, TestOperandType type)
+        : kDevice(std::move(device)),
+          kTestOperandType(type),
+          kTestOperand(kTestOperandMap.at(type)),
+          kTestOperandDataSize(nn::nonExtensionOperandSizeOfData(static_cast<nn::OperandType>(type),
+                                                                 kTestOperand.dimensions)) {}
+
+    void SetUp() override {
+        testing::Test::SetUp();
+        ASSERT_NE(kDevice, nullptr);
+    }
+
+    std::shared_ptr<IPreparedModel> createConvPreparedModel(const TestOperand& testOperand,
+                                                            uint32_t numOperations = 1) {
+        const TestModel testModel = createConvModel(testOperand, numOperations);
+        const Model model = createModel(testModel);
+        std::shared_ptr<IPreparedModel> preparedModel;
+        createPreparedModel(kDevice, model, &preparedModel, /*reportSkipping=*/false);
+        return preparedModel;
+    }
+
+    std::shared_ptr<IPreparedModel> createAddPreparedModel(const TestOperand& testOperand) {
+        const TestModel testModel = createSingleAddModel(testOperand);
+        const Model model = createModel(testModel);
+        std::shared_ptr<IPreparedModel> preparedModel;
+        createPreparedModel(kDevice, model, &preparedModel, /*reportSkipping=*/false);
+        return preparedModel;
+    }
+
+    static const std::map<TestOperandType, TestOperand> kTestOperandMap;
+
+    const std::shared_ptr<IDevice> kDevice;
+    const TestOperandType kTestOperandType;
+    const TestOperand& kTestOperand;
+    const uint32_t kTestOperandDataSize;
+};
+
+const std::map<TestOperandType, TestOperand> MemoryDomainTestBase::kTestOperandMap = {
+        {TestOperandType::TENSOR_FLOAT32,
+         {
+                 .type = TestOperandType::TENSOR_FLOAT32,
+                 .dimensions = {1, 32, 32, 8},
+                 .numberOfConsumers = 1,
+                 .scale = 0.0f,
+                 .zeroPoint = 0,
+                 .lifetime = TestOperandLifeTime::SUBGRAPH_INPUT,
+         }},
+        {TestOperandType::TENSOR_FLOAT16,
+         {
+                 .type = TestOperandType::TENSOR_FLOAT16,
+                 .dimensions = {1, 32, 32, 8},
+                 .numberOfConsumers = 1,
+                 .scale = 0.0f,
+                 .zeroPoint = 0,
+                 .lifetime = TestOperandLifeTime::SUBGRAPH_INPUT,
+         }},
+        {TestOperandType::TENSOR_QUANT8_ASYMM,
+         {
+                 .type = TestOperandType::TENSOR_QUANT8_ASYMM,
+                 .dimensions = {1, 32, 32, 8},
+                 .numberOfConsumers = 1,
+                 .scale = 0.5f,
+                 .zeroPoint = 0,
+                 .lifetime = TestOperandLifeTime::SUBGRAPH_INPUT,
+         }},
+        {TestOperandType::TENSOR_QUANT8_ASYMM_SIGNED,
+         {
+                 .type = TestOperandType::TENSOR_QUANT8_ASYMM_SIGNED,
+                 .dimensions = {1, 32, 32, 8},
+                 .numberOfConsumers = 1,
+                 .scale = 0.5f,
+                 .zeroPoint = 0,
+                 .lifetime = TestOperandLifeTime::SUBGRAPH_INPUT,
+         }},
+};
+
+using MemoryDomainAllocateTestParam = std::tuple<NamedDevice, TestOperandType>;
+class MemoryDomainAllocateTest : public MemoryDomainTestBase,
+                                 public testing::WithParamInterface<MemoryDomainAllocateTestParam> {
+  protected:
+    MemoryDomainAllocateTest()
+        : MemoryDomainTestBase(getData(std::get<NamedDevice>(GetParam())),
+                               std::get<TestOperandType>(GetParam())) {}
+
+    struct AllocateTestArgs {
+        std::vector<int32_t> dimensions;
+        std::vector<std::shared_ptr<IPreparedModel>> preparedModels;
+        std::vector<BufferRole> inputRoles;
+        std::vector<BufferRole> outputRoles;
+    };
+
+    // Validation test for IDevice::allocate. The driver is expected to fail with INVALID_ARGUMENT,
+    // or GENERAL_FAILURE if memory domain is not supported.
+    void validateAllocate(AllocateTestArgs args) {
+        std::vector<IPreparedModelParcel> preparedModelParcels;
+        preparedModelParcels.reserve(args.preparedModels.size());
+        for (const auto& model : args.preparedModels) {
+            preparedModelParcels.push_back({.preparedModel = model});
+        }
+        DeviceBuffer buffer;
+        const auto ret =
+                kDevice->allocate({.dimensions = std::move(args.dimensions)}, preparedModelParcels,
+                                  args.inputRoles, args.outputRoles, &buffer);
+
+        ASSERT_EQ(ret.getExceptionCode(), EX_SERVICE_SPECIFIC);
+        ASSERT_TRUE(static_cast<ErrorStatus>(ret.getServiceSpecificError()) ==
+                            ErrorStatus::INVALID_ARGUMENT ||
+                    static_cast<ErrorStatus>(ret.getServiceSpecificError()) ==
+                            ErrorStatus::GENERAL_FAILURE);
+    }
+
+    void testConflictOperands(const std::shared_ptr<IPreparedModel>& model1,
+                              const std::shared_ptr<IPreparedModel>& model2) {
+        validateAllocate({
+                .preparedModels = {model1, model2},
+                .inputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f},
+                               {.modelIndex = 1, .ioIndex = 0, .frequency = 1.0f}},
+        });
+        validateAllocate({
+                .preparedModels = {model1, model2},
+                .inputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f}},
+                .outputRoles = {{.modelIndex = 1, .ioIndex = 0, .frequency = 1.0f}},
+        });
+        validateAllocate({
+                .preparedModels = {model1, model2},
+                .outputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f},
+                                {.modelIndex = 1, .ioIndex = 0, .frequency = 1.0f}},
+        });
+    }
+};
+
+TEST_P(MemoryDomainAllocateTest, EmptyRole) {
+    // Test with empty prepared models and roles.
+    validateAllocate({});
+
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    if (preparedModel == nullptr) return;
+
+    // Test again with non-empty prepared models but empty roles.
+    validateAllocate({
+            .preparedModels = {preparedModel},
+    });
+}
+
+TEST_P(MemoryDomainAllocateTest, NullptrPreparedModel) {
+    // Test with nullptr prepared model as input role.
+    validateAllocate({
+            .preparedModels = {nullptr},
+            .inputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f}},
+    });
+
+    // Test with nullptr prepared model as output role.
+    validateAllocate({
+            .preparedModels = {nullptr},
+            .outputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f}},
+    });
+}
+
+TEST_P(MemoryDomainAllocateTest, InvalidPreparedModel) {
+    std::shared_ptr<InvalidPreparedModel> invalidPreparedModel =
+            ndk::SharedRefBase::make<InvalidPreparedModel>();
+
+    // Test with invalid prepared model as input role.
+    validateAllocate({
+            .preparedModels = {invalidPreparedModel},
+            .inputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f}},
+    });
+
+    // Test with invalid prepared model as output role.
+    validateAllocate({
+            .preparedModels = {invalidPreparedModel},
+            .outputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f}},
+    });
+}
+
+TEST_P(MemoryDomainAllocateTest, InvalidModelIndex) {
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    if (preparedModel == nullptr) return;
+
+    // This should fail, because the model index is out of bound.
+    validateAllocate({
+            .preparedModels = {preparedModel},
+            .inputRoles = {{.modelIndex = 1, .ioIndex = 0, .frequency = 1.0f}},
+    });
+
+    // This should fail, because the model index is out of bound.
+    validateAllocate({
+            .preparedModels = {preparedModel},
+            .outputRoles = {{.modelIndex = 1, .ioIndex = 0, .frequency = 1.0f}},
+    });
+}
+
+TEST_P(MemoryDomainAllocateTest, InvalidIOIndex) {
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    if (preparedModel == nullptr) return;
+
+    // This should fail, because the model only has one input.
+    validateAllocate({
+            .preparedModels = {preparedModel},
+            .inputRoles = {{.modelIndex = 0, .ioIndex = 1, .frequency = 1.0f}},
+    });
+
+    // This should fail, because the model only has one output.
+    validateAllocate({
+            .preparedModels = {preparedModel},
+            .outputRoles = {{.modelIndex = 0, .ioIndex = 1, .frequency = 1.0f}},
+    });
+}
+
+TEST_P(MemoryDomainAllocateTest, InvalidFrequency) {
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    if (preparedModel == nullptr) return;
+
+    for (float invalidFreq : {10.0f, 0.0f, -0.5f}) {
+        // Test with invalid frequency for input roles.
+        validateAllocate({
+                .preparedModels = {preparedModel},
+                .inputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = invalidFreq}},
+        });
+        // Test with invalid frequency for output roles.
+        validateAllocate({
+                .preparedModels = {preparedModel},
+                .outputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = invalidFreq}},
+        });
+    }
+}
+
+TEST_P(MemoryDomainAllocateTest, SameRoleSpecifiedTwice) {
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    if (preparedModel == nullptr) return;
+
+    // Same role with same model index.
+    validateAllocate({
+            .preparedModels = {preparedModel},
+            .inputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f},
+                           {.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f}},
+    });
+    validateAllocate({
+            .preparedModels = {preparedModel},
+            .outputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f},
+                            {.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f}},
+    });
+
+    // Different model indexes, but logically referring to the same role.
+    validateAllocate({
+            .preparedModels = {preparedModel, preparedModel},
+            .inputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f},
+                           {.modelIndex = 1, .ioIndex = 0, .frequency = 1.0f}},
+    });
+    validateAllocate({
+            .preparedModels = {preparedModel, preparedModel},
+            .outputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f},
+                            {.modelIndex = 1, .ioIndex = 0, .frequency = 1.0f}},
+    });
+}
+
+TEST_P(MemoryDomainAllocateTest, ConflictOperandType) {
+    const std::map<TestOperandType, TestOperandType> conflictTypeMap = {
+            {TestOperandType::TENSOR_FLOAT32, TestOperandType::TENSOR_FLOAT16},
+            {TestOperandType::TENSOR_FLOAT16, TestOperandType::TENSOR_FLOAT32},
+            {TestOperandType::TENSOR_QUANT8_ASYMM, TestOperandType::TENSOR_QUANT8_ASYMM_SIGNED},
+            {TestOperandType::TENSOR_QUANT8_ASYMM_SIGNED, TestOperandType::TENSOR_QUANT8_ASYMM},
+    };
+
+    TestOperand conflictTestOperand = kTestOperand;
+    const auto it = conflictTypeMap.find(kTestOperandType);
+    ASSERT_FALSE(it == conflictTypeMap.end());
+    conflictTestOperand.type = it->second;
+
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    auto conflictPreparedModel = createConvPreparedModel(conflictTestOperand);
+    if (preparedModel == nullptr || conflictPreparedModel == nullptr) return;
+    testConflictOperands(preparedModel, conflictPreparedModel);
+}
+
+TEST_P(MemoryDomainAllocateTest, ConflictScale) {
+    if (isFloat(kTestOperandType)) return;
+
+    TestOperand conflictTestOperand = kTestOperand;
+    ASSERT_NE(conflictTestOperand.scale, 1.0f);
+    conflictTestOperand.scale = 1.0f;
+
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    auto conflictPreparedModel = createConvPreparedModel(conflictTestOperand);
+    if (preparedModel == nullptr || conflictPreparedModel == nullptr) return;
+    testConflictOperands(preparedModel, conflictPreparedModel);
+}
+
+TEST_P(MemoryDomainAllocateTest, ConflictZeroPoint) {
+    if (isFloat(kTestOperandType)) return;
+
+    TestOperand conflictTestOperand = kTestOperand;
+    ASSERT_NE(conflictTestOperand.zeroPoint, 10);
+    conflictTestOperand.zeroPoint = 10;
+
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    auto conflictPreparedModel = createConvPreparedModel(conflictTestOperand);
+    if (preparedModel == nullptr || conflictPreparedModel == nullptr) return;
+    testConflictOperands(preparedModel, conflictPreparedModel);
+}
+
+TEST_P(MemoryDomainAllocateTest, ConflictRankBetweenRoles) {
+    TestOperand conflictTestOperand = kTestOperand;
+    conflictTestOperand.dimensions.pop_back();
+
+    auto preparedModel = createAddPreparedModel(kTestOperand);
+    auto conflictPreparedModel = createAddPreparedModel(conflictTestOperand);
+    if (preparedModel == nullptr || conflictPreparedModel == nullptr) return;
+    testConflictOperands(preparedModel, conflictPreparedModel);
+}
+
+TEST_P(MemoryDomainAllocateTest, ConflictDimensionsBetweenRoles) {
+    TestOperand conflictTestOperand = kTestOperand;
+    conflictTestOperand.dimensions[0] = 4;
+
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    auto conflictPreparedModel = createConvPreparedModel(conflictTestOperand);
+    if (preparedModel == nullptr || conflictPreparedModel == nullptr) return;
+    testConflictOperands(preparedModel, conflictPreparedModel);
+}
+
+TEST_P(MemoryDomainAllocateTest, ConflictRankBetweenRoleAndDesc) {
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    if (preparedModel == nullptr) return;
+
+    auto badDimensions = utils::toSigned(kTestOperand.dimensions).value();
+    badDimensions.pop_back();
+
+    validateAllocate({
+            .dimensions = badDimensions,
+            .preparedModels = {preparedModel},
+            .inputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f}},
+    });
+    validateAllocate({
+            .dimensions = badDimensions,
+            .preparedModels = {preparedModel},
+            .outputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f}},
+    });
+}
+
+TEST_P(MemoryDomainAllocateTest, ConflictDimensionsBetweenRoleAndDesc) {
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    if (preparedModel == nullptr) return;
+
+    auto badDimensions = utils::toSigned(kTestOperand.dimensions).value();
+    badDimensions[0] = 4;
+
+    validateAllocate({
+            .dimensions = badDimensions,
+            .preparedModels = {preparedModel},
+            .inputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f}},
+    });
+    validateAllocate({
+            .dimensions = badDimensions,
+            .preparedModels = {preparedModel},
+            .outputRoles = {{.modelIndex = 0, .ioIndex = 0, .frequency = 1.0f}},
+    });
+}
+
+TEST_P(MemoryDomainAllocateTest, ConflictRankWithScalarRole) {
+    auto preparedModel = createAddPreparedModel(kTestOperand);
+    if (preparedModel == nullptr) return;
+
+    // This should fail, because the target operand is a scalar but a non-empty dimension is
+    // specified.
+    validateAllocate({
+            .dimensions = {1},
+            .preparedModels = {preparedModel},
+            .inputRoles = {{.modelIndex = 0, .ioIndex = 2, .frequency = 1.0f}},
+    });
+}
+
+std::string printMemoryDomainAllocateTest(
+        const testing::TestParamInfo<MemoryDomainAllocateTestParam>& info) {
+    const auto& [namedDevice, operandType] = info.param;
+    const std::string type = toString(static_cast<OperandType>(operandType));
+    return gtestCompliantName(getName(namedDevice) + "_" + type);
+}
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MemoryDomainAllocateTest);
+INSTANTIATE_TEST_SUITE_P(TestMemoryDomain, MemoryDomainAllocateTest,
+                         testing::Combine(testing::ValuesIn(getNamedDevices()),
+                                          kTestOperandTypeChoices),
+                         printMemoryDomainAllocateTest);
+
+class MemoryDomainCopyTestBase : public MemoryDomainTestBase {
+  protected:
+    MemoryDomainCopyTestBase(std::shared_ptr<IDevice> device, TestOperandType type)
+        : MemoryDomainTestBase(std::move(device), type) {}
+
+    // Allocates device memory for roles of a single prepared model.
+    // Returns {IBuffer, token} if success; returns {nullptr, 0} if not supported.
+    DeviceBuffer allocateBuffer(const std::shared_ptr<IPreparedModel>& preparedModel,
+                                const std::vector<int32_t>& inputIndexes,
+                                const std::vector<int32_t>& outputIndexes,
+                                const std::vector<int32_t>& dimensions) {
+        if (preparedModel == nullptr) {
+            return {.buffer = nullptr, .token = 0};
+        }
+
+        std::vector<BufferRole> inputRoles(inputIndexes.size()), outputRoles(outputIndexes.size());
+        auto trans = [](int32_t ind) -> BufferRole {
+            return {.modelIndex = 0, .ioIndex = ind, .frequency = 1.0f};
+        };
+        std::transform(inputIndexes.begin(), inputIndexes.end(), inputRoles.begin(), trans);
+        std::transform(outputIndexes.begin(), outputIndexes.end(), outputRoles.begin(), trans);
+
+        IPreparedModelParcel parcel;
+        parcel.preparedModel = preparedModel;
+
+        DeviceBuffer buffer;
+
+        const auto ret = kDevice->allocate({.dimensions = dimensions}, {parcel}, inputRoles,
+                                           outputRoles, &buffer);
+
+        if (!ret.isOk()) {
+            EXPECT_EQ(ret.getExceptionCode(), EX_SERVICE_SPECIFIC);
+            EXPECT_EQ(static_cast<ErrorStatus>(ret.getServiceSpecificError()),
+                      ErrorStatus::GENERAL_FAILURE);
+            return DeviceBuffer{
+                    .buffer = nullptr,
+                    .token = 0,
+            };
+        }
+
+        EXPECT_NE(buffer.buffer, nullptr);
+        EXPECT_GT(buffer.token, 0);
+
+        return buffer;
+    }
+
+    DeviceBuffer allocateBuffer(const std::shared_ptr<IPreparedModel>& preparedModel,
+                                const std::vector<int32_t>& inputIndexes,
+                                const std::vector<int32_t>& outputIndexes) {
+        return allocateBuffer(preparedModel, inputIndexes, outputIndexes, {});
+    }
+
+    Memory allocateSharedMemory(uint32_t size) {
+        const auto sharedMemory = nn::createSharedMemory(size).value();
+        auto memory = utils::convert(sharedMemory).value();
+        EXPECT_EQ(memory.size, size);
+        return memory;
+    }
+
+    void testCopyFrom(const std::shared_ptr<IBuffer>& buffer, const Memory& memory,
+                      const std::vector<int32_t>& dimensions, ErrorStatus expectedStatus) {
+        const auto ret = buffer->copyFrom(memory, dimensions);
+        if (expectedStatus == ErrorStatus::NONE) {
+            ASSERT_TRUE(ret.isOk());
+        } else {
+            ASSERT_EQ(ret.getExceptionCode(), EX_SERVICE_SPECIFIC);
+            ASSERT_EQ(expectedStatus, static_cast<ErrorStatus>(ret.getServiceSpecificError()));
+        }
+    }
+
+    void testCopyTo(const std::shared_ptr<IBuffer>& buffer, const Memory& memory,
+                    ErrorStatus expectedStatus) {
+        const auto ret = buffer->copyTo(memory);
+        if (expectedStatus == ErrorStatus::NONE) {
+            ASSERT_TRUE(ret.isOk());
+        } else {
+            ASSERT_EQ(ret.getExceptionCode(), EX_SERVICE_SPECIFIC);
+            ASSERT_EQ(expectedStatus, static_cast<ErrorStatus>(ret.getServiceSpecificError()));
+        }
+    }
+
+    void initializeDeviceMemory(const std::shared_ptr<IBuffer>& buffer) {
+        Memory memory = allocateSharedMemory(kTestOperandDataSize);
+        ASSERT_EQ(memory.size, kTestOperandDataSize);
+        testCopyFrom(buffer, memory, utils::toSigned(kTestOperand.dimensions).value(),
+                     ErrorStatus::NONE);
+    }
+};
+
+using MemoryDomainCopyTestParam = std::tuple<NamedDevice, TestOperandType>;
+class MemoryDomainCopyTest : public MemoryDomainCopyTestBase,
+                             public testing::WithParamInterface<MemoryDomainCopyTestParam> {
+  protected:
+    MemoryDomainCopyTest()
+        : MemoryDomainCopyTestBase(getData(std::get<NamedDevice>(GetParam())),
+                                   std::get<TestOperandType>(GetParam())) {}
+};
+
+TEST_P(MemoryDomainCopyTest, CopyFrom_InvalidMemorySize) {
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    auto [buffer, token] = allocateBuffer(preparedModel, {0}, {0});
+    if (buffer == nullptr) return;
+
+    uint32_t badMemorySize1 = kTestOperandDataSize / 2, badMemorySize2 = kTestOperandDataSize * 2;
+    Memory badMemory1 = allocateSharedMemory(badMemorySize1);
+    Memory badMemory2 = allocateSharedMemory(badMemorySize2);
+    testCopyFrom(buffer, badMemory1, {}, ErrorStatus::INVALID_ARGUMENT);
+    testCopyFrom(buffer, badMemory2, {}, ErrorStatus::INVALID_ARGUMENT);
+}
+
+TEST_P(MemoryDomainCopyTest, CopyFrom_InvalidMemorySize_DynamicShape) {
+    TestOperand testOperand = kTestOperand;
+    testOperand.dimensions[0] = 0;
+    auto preparedModel = createConvPreparedModel(testOperand);
+    auto [buffer, token] = allocateBuffer(preparedModel, {0}, {0});
+    if (buffer == nullptr) return;
+
+    uint32_t badMemorySize1 = kTestOperandDataSize / 2, badMemorySize2 = kTestOperandDataSize * 2;
+    Memory badMemory1 = allocateSharedMemory(badMemorySize1);
+    Memory badMemory2 = allocateSharedMemory(badMemorySize2);
+    Memory goodMemory = allocateSharedMemory(kTestOperandDataSize);
+
+    const auto goodDimensions = utils::toSigned(kTestOperand.dimensions).value();
+    auto badDimensions = goodDimensions;
+    badDimensions[0] = 2;
+
+    testCopyFrom(buffer, badMemory1, goodDimensions, ErrorStatus::INVALID_ARGUMENT);
+    testCopyFrom(buffer, badMemory2, goodDimensions, ErrorStatus::INVALID_ARGUMENT);
+    testCopyFrom(buffer, goodMemory, goodDimensions, ErrorStatus::NONE);
+    testCopyFrom(buffer, goodMemory, badDimensions, ErrorStatus::INVALID_ARGUMENT);
+}
+
+TEST_P(MemoryDomainCopyTest, CopyFrom_InvalidDimensions) {
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    auto [buffer, token] = allocateBuffer(preparedModel, {0}, {0});
+    if (buffer == nullptr) return;
+
+    Memory memory = allocateSharedMemory(kTestOperandDataSize);
+
+    const auto goodDimensions = utils::toSigned(kTestOperand.dimensions).value();
+    std::vector<int32_t> badDimensions = goodDimensions;
+    badDimensions.pop_back();
+    testCopyFrom(buffer, memory, badDimensions, ErrorStatus::INVALID_ARGUMENT);
+
+    badDimensions = goodDimensions;
+    badDimensions[0] = 2;
+    testCopyFrom(buffer, memory, badDimensions, ErrorStatus::INVALID_ARGUMENT);
+
+    badDimensions = goodDimensions;
+    badDimensions[0] = 0;
+    testCopyFrom(buffer, memory, badDimensions, ErrorStatus::INVALID_ARGUMENT);
+
+    testCopyFrom(buffer, memory, {}, ErrorStatus::NONE);
+    testCopyFrom(buffer, memory, goodDimensions, ErrorStatus::NONE);
+}
+
+TEST_P(MemoryDomainCopyTest, CopyFrom_InvalidDimensions_DynamicShape) {
+    TestOperand testOperand = kTestOperand;
+    testOperand.dimensions[0] = 0;
+    auto preparedModel = createConvPreparedModel(testOperand);
+    auto [buffer, token] = allocateBuffer(preparedModel, {0}, {0});
+    if (buffer == nullptr) return;
+
+    Memory memory = allocateSharedMemory(kTestOperandDataSize);
+
+    const auto goodDimensions = utils::toSigned(kTestOperand.dimensions).value();
+    std::vector<int32_t> badDimensions = goodDimensions;
+    badDimensions.pop_back();
+    testCopyFrom(buffer, memory, badDimensions, ErrorStatus::INVALID_ARGUMENT);
+
+    badDimensions = goodDimensions;
+    badDimensions[0] = 2;
+    badDimensions[3] = 4;
+    testCopyFrom(buffer, memory, badDimensions, ErrorStatus::INVALID_ARGUMENT);
+
+    badDimensions = goodDimensions;
+    badDimensions[0] = 1;
+    badDimensions[3] = 0;
+    testCopyFrom(buffer, memory, badDimensions, ErrorStatus::INVALID_ARGUMENT);
+
+    testCopyFrom(buffer, memory, {}, ErrorStatus::INVALID_ARGUMENT);
+    testCopyFrom(buffer, memory, goodDimensions, ErrorStatus::NONE);
+}
+
+TEST_P(MemoryDomainCopyTest, CopyTo_UninitializedMemory) {
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    auto [buffer, token] = allocateBuffer(preparedModel, {0}, {0});
+    if (buffer == nullptr) return;
+
+    Memory memory = allocateSharedMemory(kTestOperandDataSize);
+    testCopyTo(buffer, memory, ErrorStatus::GENERAL_FAILURE);
+}
+
+TEST_P(MemoryDomainCopyTest, CopyTo_InvalidMemorySize) {
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    auto [buffer, token] = allocateBuffer(preparedModel, {0}, {0});
+    if (buffer == nullptr) return;
+
+    uint32_t badMemorySize1 = kTestOperandDataSize / 2, badMemorySize2 = kTestOperandDataSize * 2;
+    Memory badMemory1 = allocateSharedMemory(badMemorySize1);
+    Memory badMemory2 = allocateSharedMemory(badMemorySize2);
+    Memory goodMemory = allocateSharedMemory(kTestOperandDataSize);
+
+    initializeDeviceMemory(buffer);
+    testCopyTo(buffer, badMemory1, ErrorStatus::INVALID_ARGUMENT);
+    testCopyTo(buffer, badMemory2, ErrorStatus::INVALID_ARGUMENT);
+    testCopyTo(buffer, goodMemory, ErrorStatus::NONE);
+}
+
+TEST_P(MemoryDomainCopyTest, CopyTo_InvalidMemorySize_DynamicShape) {
+    TestOperand testOperand = kTestOperand;
+    testOperand.dimensions[0] = 0;
+    auto preparedModel = createConvPreparedModel(testOperand);
+    auto [buffer, token] = allocateBuffer(preparedModel, {0}, {0});
+    if (buffer == nullptr) return;
+
+    uint32_t badMemorySize1 = kTestOperandDataSize / 2, badMemorySize2 = kTestOperandDataSize * 2;
+    Memory badMemory1 = allocateSharedMemory(badMemorySize1);
+    Memory badMemory2 = allocateSharedMemory(badMemorySize2);
+    Memory goodMemory = allocateSharedMemory(kTestOperandDataSize);
+
+    initializeDeviceMemory(buffer);
+    testCopyTo(buffer, badMemory1, ErrorStatus::INVALID_ARGUMENT);
+    testCopyTo(buffer, badMemory2, ErrorStatus::INVALID_ARGUMENT);
+    testCopyTo(buffer, goodMemory, ErrorStatus::NONE);
+}
+
+std::string printMemoryDomainCopyTest(
+        const testing::TestParamInfo<MemoryDomainCopyTestParam>& info) {
+    const auto& [namedDevice, operandType] = info.param;
+    const std::string type = toString(static_cast<OperandType>(operandType));
+    return gtestCompliantName(getName(namedDevice) + "_" + type);
+}
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MemoryDomainCopyTest);
+INSTANTIATE_TEST_SUITE_P(TestMemoryDomain, MemoryDomainCopyTest,
+                         testing::Combine(testing::ValuesIn(getNamedDevices()),
+                                          kTestOperandTypeChoices),
+                         printMemoryDomainCopyTest);
+
+using MemoryDomainExecutionTestParam = std::tuple<NamedDevice, TestOperandType, Executor>;
+class MemoryDomainExecutionTest
+    : public MemoryDomainCopyTestBase,
+      public testing::WithParamInterface<MemoryDomainExecutionTestParam> {
+  protected:
+    MemoryDomainExecutionTest()
+        : MemoryDomainCopyTestBase(getData(std::get<NamedDevice>(GetParam())),
+                                   std::get<TestOperandType>(GetParam())) {}
+
+    RequestMemoryPool createSharedMemoryPool(uint32_t size) {
+        return RequestMemoryPool(allocateSharedMemory(size));
+    }
+
+    RequestMemoryPool createDeviceMemoryPool(uint32_t token) {
+        return RequestMemoryPool(static_cast<int32_t>(token));
+    }
+
+    void testExecution(const std::shared_ptr<IPreparedModel>& preparedModel, const Request& request,
+                       ErrorStatus expectedStatus) {
+        switch (kExecutor) {
+            case Executor::SYNC:
+                EXPECT_EQ(executeSync(preparedModel, request), expectedStatus);
+                break;
+            case Executor::FENCED:
+                EXPECT_EQ(executeFenced(preparedModel, request), expectedStatus);
+                break;
+            default:
+                ASSERT_TRUE(false);
+        }
+    }
+
+    ErrorStatus executeSync(const std::shared_ptr<IPreparedModel>& preparedModel,
+                            const Request& request) {
+        ExecutionResult executionResult;
+        const auto ret = preparedModel->executeSynchronously(
+                request, false, kNoDeadline, kOmittedTimeoutDuration, &executionResult);
+
+        if (!ret.isOk()) {
+            EXPECT_EQ(ret.getExceptionCode(), EX_SERVICE_SPECIFIC);
+            return static_cast<ErrorStatus>(ret.getServiceSpecificError());
+        }
+        const ErrorStatus executionStatus = executionResult.outputSufficientSize
+                                                    ? ErrorStatus::NONE
+                                                    : ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
+        EXPECT_EQ(executionResult.timing, kNoTiming);
+        return executionStatus;
+    }
+
+    ErrorStatus executeFenced(const std::shared_ptr<IPreparedModel>& preparedModel,
+                              const Request& request) {
+        ndk::ScopedFileDescriptor syncFence;
+        std::shared_ptr<IFencedExecutionCallback> fencedCallback;
+        const auto ret = preparedModel->executeFenced(request, {}, false, kNoDeadline,
+                                                      kOmittedTimeoutDuration, kNoDuration,
+                                                      &syncFence, &fencedCallback);
+        if (!ret.isOk()) {
+            EXPECT_EQ(ret.getExceptionCode(), EX_SERVICE_SPECIFIC);
+            return static_cast<ErrorStatus>(ret.getServiceSpecificError());
+        }
+        if (syncFence.get() != -1) {
+            waitForSyncFence(syncFence.get());
+        }
+        EXPECT_NE(fencedCallback, nullptr);
+
+        ErrorStatus executionStatus = ErrorStatus::GENERAL_FAILURE;
+        Timing time = kNoTiming;
+        Timing timeFenced = kNoTiming;
+        const auto retExecutionInfo =
+                fencedCallback->getExecutionInfo(&time, &timeFenced, &executionStatus);
+        EXPECT_TRUE(retExecutionInfo.isOk());
+        EXPECT_EQ(time, kNoTiming);
+        return executionStatus;
+    }
+
+    const Executor kExecutor = std::get<Executor>(GetParam());
+};
+
+TEST_P(MemoryDomainExecutionTest, InvalidToken) {
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    if (preparedModel == nullptr) return;
+
+    RequestMemoryPool sharedMemory = createSharedMemoryPool(kTestOperandDataSize);
+    RequestMemoryPool badDeviceMemory1 = createDeviceMemoryPool(0);    // Invalid token.
+    RequestMemoryPool badDeviceMemory2 = createDeviceMemoryPool(100);  // Unknown token.
+    RequestArgument sharedMemoryArg = {
+            .location = {.poolIndex = 0, .offset = 0, .length = kTestOperandDataSize}};
+    RequestArgument deviceMemoryArg = {.location = {.poolIndex = 1}};
+
+    testExecution(preparedModel,
+                  {.inputs = {deviceMemoryArg},
+                   .outputs = {sharedMemoryArg},
+                   .pools = createRequestMemoryPools(sharedMemory, badDeviceMemory1)},
+                  ErrorStatus::INVALID_ARGUMENT);
+    testExecution(preparedModel,
+                  {.inputs = {deviceMemoryArg},
+                   .outputs = {sharedMemoryArg},
+                   .pools = createRequestMemoryPools(sharedMemory, badDeviceMemory2)},
+                  ErrorStatus::INVALID_ARGUMENT);
+    testExecution(preparedModel,
+                  {.inputs = {sharedMemoryArg},
+                   .outputs = {deviceMemoryArg},
+                   .pools = createRequestMemoryPools(sharedMemory, badDeviceMemory1)},
+                  ErrorStatus::INVALID_ARGUMENT);
+    testExecution(preparedModel,
+                  {.inputs = {sharedMemoryArg},
+                   .outputs = {deviceMemoryArg},
+                   .pools = createRequestMemoryPools(sharedMemory, badDeviceMemory2)},
+                  ErrorStatus::INVALID_ARGUMENT);
+}
+
+TEST_P(MemoryDomainExecutionTest, InvalidPreparedModel) {
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    auto [buffer, token] = allocateBuffer(preparedModel, {0}, {0});
+    if (buffer == nullptr) return;
+    auto badPreparedModel = createConvPreparedModel(kTestOperand);
+    if (badPreparedModel == nullptr) return;
+
+    RequestMemoryPool sharedMemory = createSharedMemoryPool(kTestOperandDataSize);
+    RequestMemoryPool deviceMemory = createDeviceMemoryPool(token);
+    RequestArgument sharedMemoryArg = {
+            .location = {.poolIndex = 0, .offset = 0, .length = kTestOperandDataSize}};
+    RequestArgument deviceMemoryArg = {.location = {.poolIndex = 1}};
+
+    // This should fail, because the buffer is not allocated for badPreparedModel.
+    initializeDeviceMemory(buffer);
+    testExecution(badPreparedModel,
+                  {.inputs = {deviceMemoryArg},
+                   .outputs = {sharedMemoryArg},
+                   .pools = createRequestMemoryPools(sharedMemory, deviceMemory)},
+                  ErrorStatus::INVALID_ARGUMENT);
+    testExecution(badPreparedModel,
+                  {.inputs = {sharedMemoryArg},
+                   .outputs = {deviceMemoryArg},
+                   .pools = createRequestMemoryPools(sharedMemory, deviceMemory)},
+                  ErrorStatus::INVALID_ARGUMENT);
+}
+
+TEST_P(MemoryDomainExecutionTest, InvalidIOIndex) {
+    auto preparedModel = createConvPreparedModel(kTestOperand, 2);
+    auto [buffer, token] = allocateBuffer(preparedModel, {0}, {});
+    if (buffer == nullptr) return;
+
+    RequestMemoryPool sharedMemory1 = createSharedMemoryPool(kTestOperandDataSize);
+    RequestMemoryPool sharedMemory2 = createSharedMemoryPool(kTestOperandDataSize);
+    RequestMemoryPool sharedMemory3 = createSharedMemoryPool(kTestOperandDataSize);
+    RequestMemoryPool deviceMemory = createDeviceMemoryPool(token);
+    RequestArgument sharedMemoryArg1 = {
+            .location = {.poolIndex = 0, .offset = 0, .length = kTestOperandDataSize}};
+    RequestArgument sharedMemoryArg2 = {
+            .location = {.poolIndex = 1, .offset = 0, .length = kTestOperandDataSize}};
+    RequestArgument sharedMemoryArg3 = {
+            .location = {.poolIndex = 2, .offset = 0, .length = kTestOperandDataSize}};
+    RequestArgument deviceMemoryArg = {.location = {.poolIndex = 3}};
+
+    // This should fail, because the device memory is not allocated for input 1.
+    initializeDeviceMemory(buffer);
+    testExecution(preparedModel,
+                  {.inputs = {sharedMemoryArg1, deviceMemoryArg},
+                   .outputs = {sharedMemoryArg2, sharedMemoryArg3},
+                   .pools = createRequestMemoryPools(sharedMemory1, sharedMemory2, sharedMemory3,
+                                                     deviceMemory)},
+                  ErrorStatus::INVALID_ARGUMENT);
+
+    // This should fail, because the device memory is not allocated for output 1.
+    testExecution(preparedModel,
+                  {.inputs = {sharedMemoryArg1, sharedMemoryArg2},
+                   .outputs = {sharedMemoryArg3, deviceMemoryArg},
+                   .pools = createRequestMemoryPools(sharedMemory1, sharedMemory2, sharedMemory3,
+                                                     deviceMemory)},
+                  ErrorStatus::INVALID_ARGUMENT);
+}
+
+TEST_P(MemoryDomainExecutionTest, InvalidIOType) {
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    auto [inputBuffer, inputToken] = allocateBuffer(preparedModel, {0}, {});
+    auto [outputBuffer, outputToken] = allocateBuffer(preparedModel, {}, {0});
+    if (inputBuffer == nullptr || outputBuffer == nullptr) return;
+
+    RequestMemoryPool sharedMemory = createSharedMemoryPool(kTestOperandDataSize);
+    RequestMemoryPool deviceMemory = createDeviceMemoryPool(inputToken);
+    RequestArgument sharedMemoryArg = {
+            .location = {.poolIndex = 0, .offset = 0, .length = kTestOperandDataSize}};
+    RequestArgument deviceMemoryArg = {.location = {.poolIndex = 1}};
+
+    // This should fail, because the device memory is allocated for input but used as output.
+    testExecution(preparedModel,
+                  {.inputs = {sharedMemoryArg},
+                   .outputs = {deviceMemoryArg},
+                   .pools = createRequestMemoryPools(sharedMemory, deviceMemory)},
+                  ErrorStatus::INVALID_ARGUMENT);
+
+    // This should fail, because the device memory is allocated for output but used as input.
+    deviceMemory.set<RequestMemoryPool::Tag::token>(outputToken);
+    initializeDeviceMemory(outputBuffer);
+    testExecution(preparedModel,
+                  {.inputs = {deviceMemoryArg},
+                   .outputs = {sharedMemoryArg},
+                   .pools = createRequestMemoryPools(sharedMemory, deviceMemory)},
+                  ErrorStatus::INVALID_ARGUMENT);
+}
+
+TEST_P(MemoryDomainExecutionTest, UninitializedMemory) {
+    auto preparedModel = createConvPreparedModel(kTestOperand);
+    auto [buffer, token] = allocateBuffer(preparedModel, {0}, {0});
+    if (buffer == nullptr) return;
+
+    RequestMemoryPool sharedMemory = createSharedMemoryPool(kTestOperandDataSize);
+    RequestMemoryPool deviceMemory = createDeviceMemoryPool(token);
+    RequestArgument sharedMemoryArg = {
+            .location = {.poolIndex = 0, .offset = 0, .length = kTestOperandDataSize}};
+    RequestArgument deviceMemoryArg = {.location = {.poolIndex = 1}};
+
+    // This should fail, because the device memory is not initialized.
+    testExecution(preparedModel,
+                  {.inputs = {deviceMemoryArg},
+                   .outputs = {sharedMemoryArg},
+                   .pools = createRequestMemoryPools(sharedMemory, deviceMemory)},
+                  ErrorStatus::GENERAL_FAILURE);
+
+    // This should initialize the device memory.
+    testExecution(preparedModel,
+                  {.inputs = {sharedMemoryArg},
+                   .outputs = {deviceMemoryArg},
+                   .pools = createRequestMemoryPools(sharedMemory, deviceMemory)},
+                  ErrorStatus::NONE);
+
+    // Test again with initialized device memory.
+    testExecution(preparedModel,
+                  {.inputs = {deviceMemoryArg},
+                   .outputs = {sharedMemoryArg},
+                   .pools = createRequestMemoryPools(sharedMemory, deviceMemory)},
+                  ErrorStatus::NONE);
+}
+
+TEST_P(MemoryDomainExecutionTest, SameRequestMultipleRoles) {
+    auto preparedModel = createConvPreparedModel(kTestOperand, 2);
+    auto [buffer, token] = allocateBuffer(preparedModel, {0, 1}, {0, 1});
+    if (buffer == nullptr) return;
+
+    RequestMemoryPool sharedMemory1 = createSharedMemoryPool(kTestOperandDataSize);
+    RequestMemoryPool sharedMemory2 = createSharedMemoryPool(kTestOperandDataSize);
+    RequestMemoryPool deviceMemory = createDeviceMemoryPool(token);
+    RequestArgument sharedMemoryArg1 = {
+            .location = {.poolIndex = 0, .offset = 0, .length = kTestOperandDataSize}};
+    RequestArgument sharedMemoryArg2 = {
+            .location = {.poolIndex = 1, .offset = 0, .length = kTestOperandDataSize}};
+    RequestArgument deviceMemoryArg = {.location = {.poolIndex = 2}};
+
+    // This should fail, because the same device memory cannot be used for both input and output.
+    initializeDeviceMemory(buffer);
+    testExecution(preparedModel,
+                  {.inputs = {deviceMemoryArg, sharedMemoryArg1},
+                   .outputs = {deviceMemoryArg, sharedMemoryArg2},
+                   .pools = createRequestMemoryPools(sharedMemory1, sharedMemory2, deviceMemory)},
+                  ErrorStatus::INVALID_ARGUMENT);
+
+    // This should fail, because the same device memory cannot be used for multiple outputs.
+    testExecution(preparedModel,
+                  {.inputs = {sharedMemoryArg1, sharedMemoryArg2},
+                   .outputs = {deviceMemoryArg, deviceMemoryArg},
+                   .pools = createRequestMemoryPools(sharedMemory1, sharedMemory2, deviceMemory)},
+                  ErrorStatus::INVALID_ARGUMENT);
+
+    // The same device memory can be used for multiple inputs.
+    initializeDeviceMemory(buffer);
+    testExecution(preparedModel,
+                  {.inputs = {deviceMemoryArg, deviceMemoryArg},
+                   .outputs = {sharedMemoryArg1, sharedMemoryArg2},
+                   .pools = createRequestMemoryPools(sharedMemory1, sharedMemory2, deviceMemory)},
+                  ErrorStatus::NONE);
+}
+
+TEST_P(MemoryDomainExecutionTest, InvalidDimensions) {
+    // FENCED execution does not support dynamic shape.
+    if (kExecutor == Executor::FENCED) return;
+
+    TestOperand testOperand = kTestOperand;
+    testOperand.dimensions[0] = 0;
+    auto preparedModel = createConvPreparedModel(testOperand);
+    auto deviceBuffer = allocateBuffer(preparedModel, {0}, {0},
+                                       utils::toSigned(kTestOperand.dimensions).value());
+    if (deviceBuffer.buffer == nullptr) return;
+
+    RequestMemoryPool sharedMemory = createSharedMemoryPool(kTestOperandDataSize);
+    RequestMemoryPool deviceMemory = createDeviceMemoryPool(deviceBuffer.token);
+    auto badDimensions = utils::toSigned(kTestOperand.dimensions).value();
+    badDimensions[0] = 2;
+    RequestArgument sharedMemoryArg = {
+            .location = {.poolIndex = 0, .offset = 0, .length = kTestOperandDataSize},
+            .dimensions = badDimensions};
+    RequestArgument deviceMemoryArg = {.location = {.poolIndex = 1}};
+    RequestArgument deviceMemoryArgWithBadDimensions = {.location = {.poolIndex = 1},
+                                                        .dimensions = badDimensions};
+
+    initializeDeviceMemory(deviceBuffer.buffer);
+    testExecution(preparedModel,
+                  {.inputs = {deviceMemoryArgWithBadDimensions},
+                   .outputs = {sharedMemoryArg},
+                   .pools = createRequestMemoryPools(sharedMemory, deviceMemory)},
+                  ErrorStatus::INVALID_ARGUMENT);
+
+    testExecution(preparedModel,
+                  {.inputs = {sharedMemoryArg},
+                   .outputs = {deviceMemoryArgWithBadDimensions},
+                   .pools = createRequestMemoryPools(sharedMemory, deviceMemory)},
+                  ErrorStatus::INVALID_ARGUMENT);
+
+    testExecution(preparedModel,
+                  {.inputs = {sharedMemoryArg},
+                   .outputs = {deviceMemoryArg},
+                   .pools = createRequestMemoryPools(sharedMemory, deviceMemory)},
+                  ErrorStatus::GENERAL_FAILURE);
+}
+
+const auto kExecutorChoices = testing::Values(Executor::SYNC, Executor::FENCED);
+
+std::string printMemoryDomainExecutionTest(
+        const testing::TestParamInfo<MemoryDomainExecutionTestParam>& info) {
+    const auto& [namedDevice, operandType, executor] = info.param;
+    const std::string type = toString(static_cast<OperandType>(operandType));
+    const std::string executorStr = toString(executor);
+    return gtestCompliantName(getName(namedDevice) + "_" + type + "_" + executorStr);
+}
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MemoryDomainExecutionTest);
+INSTANTIATE_TEST_SUITE_P(TestMemoryDomain, MemoryDomainExecutionTest,
+                         testing::Combine(testing::ValuesIn(getNamedDevices()),
+                                          kTestOperandTypeChoices, kExecutorChoices),
+                         printMemoryDomainExecutionTest);
+
+}  // namespace aidl::android::hardware::neuralnetworks::vts::functional
diff --git a/neuralnetworks/aidl/vts/functional/QualityOfServiceTests.cpp b/neuralnetworks/aidl/vts/functional/QualityOfServiceTests.cpp
new file mode 100644
index 0000000000..58db98f374
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/QualityOfServiceTests.cpp
@@ -0,0 +1,270 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <android/binder_enums.h>
+#include <android/binder_interface_utils.h>
+#include <android/binder_status.h>
+
+#include <nnapi/hal/aidl/Conversions.h>
+
+#include "Callbacks.h"
+#include "GeneratedTestHarness.h"
+#include "Utils.h"
+
+namespace aidl::android::hardware::neuralnetworks::vts::functional {
+
+using implementation::PreparedModelCallback;
+using test_helper::TestBuffer;
+using test_helper::TestModel;
+
+enum class DeadlineBoundType { NOW, UNLIMITED, SHORT };
+constexpr std::array<DeadlineBoundType, 3> deadlineBounds = {
+        DeadlineBoundType::NOW, DeadlineBoundType::UNLIMITED, DeadlineBoundType::SHORT};
+std::string toString(DeadlineBoundType type) {
+    switch (type) {
+        case DeadlineBoundType::NOW:
+            return "NOW";
+        case DeadlineBoundType::UNLIMITED:
+            return "UNLIMITED";
+        case DeadlineBoundType::SHORT:
+            return "SHORT";
+    }
+    LOG(FATAL) << "Unrecognized DeadlineBoundType: " << static_cast<int>(type);
+    return {};
+}
+
+constexpr auto kShortDuration = std::chrono::milliseconds{5};
+
+using Results = std::tuple<ErrorStatus, std::vector<OutputShape>, Timing>;
+using MaybeResults = std::optional<Results>;
+
+static int64_t makeDeadline(DeadlineBoundType deadlineBoundType) {
+    const auto getNanosecondsSinceEpoch = [](const auto& time) -> int64_t {
+        const auto timeSinceEpoch = time.time_since_epoch();
+        return std::chrono::duration_cast<std::chrono::nanoseconds>(timeSinceEpoch).count();
+    };
+
+    std::chrono::steady_clock::time_point timePoint;
+    switch (deadlineBoundType) {
+        case DeadlineBoundType::NOW:
+            timePoint = std::chrono::steady_clock::now();
+            break;
+        case DeadlineBoundType::UNLIMITED:
+            timePoint = std::chrono::steady_clock::time_point::max();
+            break;
+        case DeadlineBoundType::SHORT:
+            timePoint = std::chrono::steady_clock::now() + kShortDuration;
+            break;
+    }
+
+    return getNanosecondsSinceEpoch(timePoint);
+}
+
+void runPrepareModelTest(const std::shared_ptr<IDevice>& device, const Model& model,
+                         Priority priority, std::optional<DeadlineBoundType> deadlineBound) {
+    int64_t deadline = kNoDeadline;
+    if (deadlineBound.has_value()) {
+        deadline = makeDeadline(deadlineBound.value());
+    }
+
+    // see if service can handle model
+    std::vector<bool> supportedOps;
+    const auto supportedCallStatus = device->getSupportedOperations(model, &supportedOps);
+    ASSERT_TRUE(supportedCallStatus.isOk());
+    ASSERT_NE(0ul, supportedOps.size());
+    const bool fullySupportsModel =
+            std::all_of(supportedOps.begin(), supportedOps.end(), [](bool valid) { return valid; });
+
+    // launch prepare model
+    const std::shared_ptr<PreparedModelCallback> preparedModelCallback =
+            ndk::SharedRefBase::make<PreparedModelCallback>();
+    const auto prepareLaunchStatus =
+            device->prepareModel(model, ExecutionPreference::FAST_SINGLE_ANSWER, priority, deadline,
+                                 {}, {}, kEmptyCacheToken, preparedModelCallback);
+    ASSERT_TRUE(prepareLaunchStatus.isOk())
+            << "prepareLaunchStatus: " << prepareLaunchStatus.getDescription();
+
+    // retrieve prepared model
+    preparedModelCallback->wait();
+    const ErrorStatus prepareReturnStatus = preparedModelCallback->getStatus();
+    const std::shared_ptr<IPreparedModel> preparedModel = preparedModelCallback->getPreparedModel();
+
+    // The getSupportedOperations call returns a list of operations that are guaranteed not to fail
+    // if prepareModel is called, and 'fullySupportsModel' is true i.f.f. the entire model is
+    // guaranteed. If a driver has any doubt that it can prepare an operation, it must return false.
+    // So here, if a driver isn't sure if it can support an operation, but reports that it
+    // successfully prepared the model, the test can continue.
+    if (!fullySupportsModel && prepareReturnStatus != ErrorStatus::NONE) {
+        ASSERT_EQ(nullptr, preparedModel.get());
+        return;
+    }
+
+    // verify return status
+    if (!deadlineBound.has_value()) {
+        EXPECT_EQ(ErrorStatus::NONE, prepareReturnStatus);
+    } else {
+        switch (deadlineBound.value()) {
+            case DeadlineBoundType::NOW:
+            case DeadlineBoundType::SHORT:
+                // Either the driver successfully completed the task or it
+                // aborted and returned MISSED_DEADLINE_*.
+                EXPECT_TRUE(prepareReturnStatus == ErrorStatus::NONE ||
+                            prepareReturnStatus == ErrorStatus::MISSED_DEADLINE_TRANSIENT ||
+                            prepareReturnStatus == ErrorStatus::MISSED_DEADLINE_PERSISTENT);
+                break;
+            case DeadlineBoundType::UNLIMITED:
+                // If an unlimited deadline is supplied, we expect the execution to
+                // proceed normally. In this case, check it normally by breaking out
+                // of the switch statement.
+                EXPECT_EQ(ErrorStatus::NONE, prepareReturnStatus);
+                break;
+        }
+    }
+    ASSERT_EQ(prepareReturnStatus == ErrorStatus::NONE, preparedModel.get() != nullptr);
+}
+
+void runPrepareModelTests(const std::shared_ptr<IDevice>& device, const Model& model) {
+    // test priority
+    for (auto priority : ndk::enum_range<Priority>{}) {
+        SCOPED_TRACE("priority: " + toString(priority));
+        if (priority == kDefaultPriority) continue;
+        runPrepareModelTest(device, model, priority, {});
+    }
+
+    // test deadline
+    for (auto deadlineBound : deadlineBounds) {
+        SCOPED_TRACE("deadlineBound: " + toString(deadlineBound));
+        runPrepareModelTest(device, model, kDefaultPriority, deadlineBound);
+    }
+}
+
+static MaybeResults executeSynchronously(const std::shared_ptr<IPreparedModel>& preparedModel,
+                                         const Request& request, int64_t deadline) {
+    SCOPED_TRACE("synchronous");
+    const bool measure = false;
+
+    // run execution
+    ExecutionResult executionResult;
+    const auto ret = preparedModel->executeSynchronously(request, measure, deadline,
+                                                         kOmittedTimeoutDuration, &executionResult);
+    EXPECT_TRUE(ret.isOk() || ret.getExceptionCode() == EX_SERVICE_SPECIFIC)
+            << ret.getDescription();
+    if (!ret.isOk()) {
+        if (ret.getExceptionCode() != EX_SERVICE_SPECIFIC) {
+            return std::nullopt;
+        }
+        return MaybeResults(
+                {static_cast<ErrorStatus>(ret.getServiceSpecificError()), {}, kNoTiming});
+    }
+
+    // return results
+    return MaybeResults({executionResult.outputSufficientSize
+                                 ? ErrorStatus::NONE
+                                 : ErrorStatus::OUTPUT_INSUFFICIENT_SIZE,
+                         std::move(executionResult.outputShapes), executionResult.timing});
+}
+
+void runExecutionTest(const std::shared_ptr<IPreparedModel>& preparedModel,
+                      const TestModel& testModel, const Request& request,
+                      const ExecutionContext& context, DeadlineBoundType deadlineBound) {
+    const auto deadline = makeDeadline(deadlineBound);
+
+    // Perform execution and unpack results.
+    const auto results = executeSynchronously(preparedModel, request, deadline);
+    if (!results.has_value()) return;
+    const auto& [status, outputShapes, timing] = results.value();
+
+    // Verify no timing information was returned
+    EXPECT_EQ(timing, kNoTiming);
+
+    // Validate deadline information if applicable.
+    switch (deadlineBound) {
+        case DeadlineBoundType::NOW:
+        case DeadlineBoundType::SHORT:
+            // Either the driver successfully completed the task or it
+            // aborted and returned MISSED_DEADLINE_*.
+            ASSERT_TRUE(status == ErrorStatus::NONE ||
+                        status == ErrorStatus::MISSED_DEADLINE_TRANSIENT ||
+                        status == ErrorStatus::MISSED_DEADLINE_PERSISTENT);
+            break;
+        case DeadlineBoundType::UNLIMITED:
+            // If an unlimited deadline is supplied, we expect the execution to
+            // proceed normally. In this case, check it normally by breaking out
+            // of the switch statement.
+            ASSERT_EQ(ErrorStatus::NONE, status);
+            break;
+    }
+
+    // If the model output operands are fully specified, outputShapes must be either
+    // either empty, or have the same number of elements as the number of outputs.
+    ASSERT_TRUE(outputShapes.size() == 0 ||
+                outputShapes.size() == testModel.main.outputIndexes.size());
+
+    // Go through all outputs, check returned output shapes.
+    for (uint32_t i = 0; i < outputShapes.size(); i++) {
+        EXPECT_TRUE(outputShapes[i].isSufficient);
+        const auto expect =
+                utils::toSigned(testModel.main.operands[testModel.main.outputIndexes[i]].dimensions)
+                        .value();
+        const std::vector<int32_t>& actual = outputShapes[i].dimensions;
+        EXPECT_EQ(expect, actual);
+    }
+
+    // Retrieve execution results.
+    const std::vector<TestBuffer> outputs = context.getOutputBuffers(request);
+
+    // We want "close-enough" results.
+    if (status == ErrorStatus::NONE) {
+        checkResults(testModel, outputs);
+    }
+}
+
+void runExecutionTests(const std::shared_ptr<IPreparedModel>& preparedModel,
+                       const TestModel& testModel, const Request& request,
+                       const ExecutionContext& context) {
+    for (auto deadlineBound : deadlineBounds) {
+        runExecutionTest(preparedModel, testModel, request, context, deadlineBound);
+    }
+}
+
+void runTests(const std::shared_ptr<IDevice>& device, const TestModel& testModel) {
+    // setup
+    const Model model = createModel(testModel);
+
+    // run prepare model tests
+    runPrepareModelTests(device, model);
+
+    // prepare model
+    std::shared_ptr<IPreparedModel> preparedModel;
+    createPreparedModel(device, model, &preparedModel);
+    if (preparedModel == nullptr) return;
+
+    // run execution tests
+    ExecutionContext context;
+    const Request request = context.createRequest(testModel);
+    runExecutionTests(preparedModel, testModel, request, context);
+}
+
+class DeadlineTest : public GeneratedTestBase {};
+
+TEST_P(DeadlineTest, Test) {
+    runTests(kDevice, kTestModel);
+}
+
+INSTANTIATE_GENERATED_TEST(DeadlineTest,
+                           [](const TestModel& testModel) { return !testModel.expectFailure; });
+
+}  // namespace aidl::android::hardware::neuralnetworks::vts::functional
diff --git a/neuralnetworks/aidl/vts/functional/TestAssertions.cpp b/neuralnetworks/aidl/vts/functional/TestAssertions.cpp
new file mode 100644
index 0000000000..a9e945608c
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/TestAssertions.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <aidl/android/hardware/neuralnetworks/IPreparedModel.h>
+#include <aidl/android/hardware/neuralnetworks/OperandType.h>
+#include <aidl/android/hardware/neuralnetworks/OperationType.h>
+
+#include <ControlFlow.h>
+#include <TestHarness.h>
+
+namespace aidl::android::hardware::neuralnetworks {
+
+namespace nn = ::android::nn;
+
+static_assert(static_cast<uint64_t>(IPreparedModel::DEFAULT_LOOP_TIMEOUT_DURATION_NS) ==
+              nn::operation_while::kTimeoutNsDefault);
+static_assert(static_cast<uint64_t>(IPreparedModel::MAXIMUM_LOOP_TIMEOUT_DURATION_NS) ==
+              nn::operation_while::kTimeoutNsMaximum);
+
+// Make sure that the HIDL enums are compatible with the values defined in
+// frameworks/ml/nn/tools/test_generator/test_harness/include/TestHarness.h.
+using namespace test_helper;
+#define CHECK_TEST_ENUM(EnumType, enumValue) \
+    static_assert(static_cast<EnumType>(Test##EnumType::enumValue) == EnumType::enumValue)
+
+CHECK_TEST_ENUM(OperandType, FLOAT32);
+CHECK_TEST_ENUM(OperandType, INT32);
+CHECK_TEST_ENUM(OperandType, UINT32);
+CHECK_TEST_ENUM(OperandType, TENSOR_FLOAT32);
+CHECK_TEST_ENUM(OperandType, TENSOR_INT32);
+CHECK_TEST_ENUM(OperandType, TENSOR_QUANT8_ASYMM);
+CHECK_TEST_ENUM(OperandType, BOOL);
+CHECK_TEST_ENUM(OperandType, TENSOR_QUANT16_SYMM);
+CHECK_TEST_ENUM(OperandType, TENSOR_FLOAT16);
+CHECK_TEST_ENUM(OperandType, TENSOR_BOOL8);
+CHECK_TEST_ENUM(OperandType, FLOAT16);
+CHECK_TEST_ENUM(OperandType, TENSOR_QUANT8_SYMM_PER_CHANNEL);
+CHECK_TEST_ENUM(OperandType, TENSOR_QUANT16_ASYMM);
+CHECK_TEST_ENUM(OperandType, TENSOR_QUANT8_SYMM);
+CHECK_TEST_ENUM(OperandType, TENSOR_QUANT8_ASYMM_SIGNED);
+
+CHECK_TEST_ENUM(OperationType, ADD);
+CHECK_TEST_ENUM(OperationType, AVERAGE_POOL_2D);
+CHECK_TEST_ENUM(OperationType, CONCATENATION);
+CHECK_TEST_ENUM(OperationType, CONV_2D);
+CHECK_TEST_ENUM(OperationType, DEPTHWISE_CONV_2D);
+CHECK_TEST_ENUM(OperationType, DEPTH_TO_SPACE);
+CHECK_TEST_ENUM(OperationType, DEQUANTIZE);
+CHECK_TEST_ENUM(OperationType, EMBEDDING_LOOKUP);
+CHECK_TEST_ENUM(OperationType, FLOOR);
+CHECK_TEST_ENUM(OperationType, FULLY_CONNECTED);
+CHECK_TEST_ENUM(OperationType, HASHTABLE_LOOKUP);
+CHECK_TEST_ENUM(OperationType, L2_NORMALIZATION);
+CHECK_TEST_ENUM(OperationType, L2_POOL_2D);
+CHECK_TEST_ENUM(OperationType, LOCAL_RESPONSE_NORMALIZATION);
+CHECK_TEST_ENUM(OperationType, LOGISTIC);
+CHECK_TEST_ENUM(OperationType, LSH_PROJECTION);
+CHECK_TEST_ENUM(OperationType, LSTM);
+CHECK_TEST_ENUM(OperationType, MAX_POOL_2D);
+CHECK_TEST_ENUM(OperationType, MUL);
+CHECK_TEST_ENUM(OperationType, RELU);
+CHECK_TEST_ENUM(OperationType, RELU1);
+CHECK_TEST_ENUM(OperationType, RELU6);
+CHECK_TEST_ENUM(OperationType, RESHAPE);
+CHECK_TEST_ENUM(OperationType, RESIZE_BILINEAR);
+CHECK_TEST_ENUM(OperationType, RNN);
+CHECK_TEST_ENUM(OperationType, SOFTMAX);
+CHECK_TEST_ENUM(OperationType, SPACE_TO_DEPTH);
+CHECK_TEST_ENUM(OperationType, SVDF);
+CHECK_TEST_ENUM(OperationType, TANH);
+CHECK_TEST_ENUM(OperationType, BATCH_TO_SPACE_ND);
+CHECK_TEST_ENUM(OperationType, DIV);
+CHECK_TEST_ENUM(OperationType, MEAN);
+CHECK_TEST_ENUM(OperationType, PAD);
+CHECK_TEST_ENUM(OperationType, SPACE_TO_BATCH_ND);
+CHECK_TEST_ENUM(OperationType, SQUEEZE);
+CHECK_TEST_ENUM(OperationType, STRIDED_SLICE);
+CHECK_TEST_ENUM(OperationType, SUB);
+CHECK_TEST_ENUM(OperationType, TRANSPOSE);
+CHECK_TEST_ENUM(OperationType, ABS);
+CHECK_TEST_ENUM(OperationType, ARGMAX);
+CHECK_TEST_ENUM(OperationType, ARGMIN);
+CHECK_TEST_ENUM(OperationType, AXIS_ALIGNED_BBOX_TRANSFORM);
+CHECK_TEST_ENUM(OperationType, BIDIRECTIONAL_SEQUENCE_LSTM);
+CHECK_TEST_ENUM(OperationType, BIDIRECTIONAL_SEQUENCE_RNN);
+CHECK_TEST_ENUM(OperationType, BOX_WITH_NMS_LIMIT);
+CHECK_TEST_ENUM(OperationType, CAST);
+CHECK_TEST_ENUM(OperationType, CHANNEL_SHUFFLE);
+CHECK_TEST_ENUM(OperationType, DETECTION_POSTPROCESSING);
+CHECK_TEST_ENUM(OperationType, EQUAL);
+CHECK_TEST_ENUM(OperationType, EXP);
+CHECK_TEST_ENUM(OperationType, EXPAND_DIMS);
+CHECK_TEST_ENUM(OperationType, GATHER);
+CHECK_TEST_ENUM(OperationType, GENERATE_PROPOSALS);
+CHECK_TEST_ENUM(OperationType, GREATER);
+CHECK_TEST_ENUM(OperationType, GREATER_EQUAL);
+CHECK_TEST_ENUM(OperationType, GROUPED_CONV_2D);
+CHECK_TEST_ENUM(OperationType, HEATMAP_MAX_KEYPOINT);
+CHECK_TEST_ENUM(OperationType, INSTANCE_NORMALIZATION);
+CHECK_TEST_ENUM(OperationType, LESS);
+CHECK_TEST_ENUM(OperationType, LESS_EQUAL);
+CHECK_TEST_ENUM(OperationType, LOG);
+CHECK_TEST_ENUM(OperationType, LOGICAL_AND);
+CHECK_TEST_ENUM(OperationType, LOGICAL_NOT);
+CHECK_TEST_ENUM(OperationType, LOGICAL_OR);
+CHECK_TEST_ENUM(OperationType, LOG_SOFTMAX);
+CHECK_TEST_ENUM(OperationType, MAXIMUM);
+CHECK_TEST_ENUM(OperationType, MINIMUM);
+CHECK_TEST_ENUM(OperationType, NEG);
+CHECK_TEST_ENUM(OperationType, NOT_EQUAL);
+CHECK_TEST_ENUM(OperationType, PAD_V2);
+CHECK_TEST_ENUM(OperationType, POW);
+CHECK_TEST_ENUM(OperationType, PRELU);
+CHECK_TEST_ENUM(OperationType, QUANTIZE);
+CHECK_TEST_ENUM(OperationType, QUANTIZED_16BIT_LSTM);
+CHECK_TEST_ENUM(OperationType, RANDOM_MULTINOMIAL);
+CHECK_TEST_ENUM(OperationType, REDUCE_ALL);
+CHECK_TEST_ENUM(OperationType, REDUCE_ANY);
+CHECK_TEST_ENUM(OperationType, REDUCE_MAX);
+CHECK_TEST_ENUM(OperationType, REDUCE_MIN);
+CHECK_TEST_ENUM(OperationType, REDUCE_PROD);
+CHECK_TEST_ENUM(OperationType, REDUCE_SUM);
+CHECK_TEST_ENUM(OperationType, ROI_ALIGN);
+CHECK_TEST_ENUM(OperationType, ROI_POOLING);
+CHECK_TEST_ENUM(OperationType, RSQRT);
+CHECK_TEST_ENUM(OperationType, SELECT);
+CHECK_TEST_ENUM(OperationType, SIN);
+CHECK_TEST_ENUM(OperationType, SLICE);
+CHECK_TEST_ENUM(OperationType, SPLIT);
+CHECK_TEST_ENUM(OperationType, SQRT);
+CHECK_TEST_ENUM(OperationType, TILE);
+CHECK_TEST_ENUM(OperationType, TOPK_V2);
+CHECK_TEST_ENUM(OperationType, TRANSPOSE_CONV_2D);
+CHECK_TEST_ENUM(OperationType, UNIDIRECTIONAL_SEQUENCE_LSTM);
+CHECK_TEST_ENUM(OperationType, UNIDIRECTIONAL_SEQUENCE_RNN);
+CHECK_TEST_ENUM(OperationType, RESIZE_NEAREST_NEIGHBOR);
+
+#undef CHECK_TEST_ENUM
+
+}  // namespace aidl::android::hardware::neuralnetworks
diff --git a/neuralnetworks/aidl/vts/functional/TestMain.cpp b/neuralnetworks/aidl/vts/functional/TestMain.cpp
new file mode 100644
index 0000000000..1d58608fa3
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/TestMain.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <android/binder_process.h>
+#include <gtest/gtest.h>
+#include "LogTestCaseToLogcat.h"
+
+int main(int argc, char** argv) {
+    testing::InitGoogleTest(&argc, argv);
+    testing::UnitTest::GetInstance()->listeners().Append(
+            new aidl::android::hardware::neuralnetworks::LogTestCaseToLogcat());
+    ABinderProcess_startThreadPool();
+    return RUN_ALL_TESTS();
+}
diff --git a/neuralnetworks/aidl/vts/functional/Utils.cpp b/neuralnetworks/aidl/vts/functional/Utils.cpp
new file mode 100644
index 0000000000..14a496a303
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/Utils.cpp
@@ -0,0 +1,252 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Utils.h"
+
+#include <aidl/android/hardware/neuralnetworks/IPreparedModelParcel.h>
+#include <aidl/android/hardware/neuralnetworks/Operand.h>
+#include <aidl/android/hardware/neuralnetworks/OperandType.h>
+#include <android-base/logging.h>
+#include <android/binder_status.h>
+#include <android/hardware_buffer.h>
+
+#include <iostream>
+#include <limits>
+#include <numeric>
+
+#include <MemoryUtils.h>
+#include <nnapi/SharedMemory.h>
+#include <nnapi/hal/aidl/Conversions.h>
+#include <nnapi/hal/aidl/Utils.h>
+
+namespace aidl::android::hardware::neuralnetworks {
+
+using test_helper::TestBuffer;
+using test_helper::TestModel;
+
+uint32_t sizeOfData(OperandType type) {
+    switch (type) {
+        case OperandType::FLOAT32:
+        case OperandType::INT32:
+        case OperandType::UINT32:
+        case OperandType::TENSOR_FLOAT32:
+        case OperandType::TENSOR_INT32:
+            return 4;
+        case OperandType::TENSOR_QUANT16_SYMM:
+        case OperandType::TENSOR_FLOAT16:
+        case OperandType::FLOAT16:
+        case OperandType::TENSOR_QUANT16_ASYMM:
+            return 2;
+        case OperandType::TENSOR_QUANT8_ASYMM:
+        case OperandType::BOOL:
+        case OperandType::TENSOR_BOOL8:
+        case OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
+        case OperandType::TENSOR_QUANT8_SYMM:
+        case OperandType::TENSOR_QUANT8_ASYMM_SIGNED:
+            return 1;
+        case OperandType::SUBGRAPH:
+            return 0;
+        default:
+            CHECK(false) << "Invalid OperandType " << static_cast<uint32_t>(type);
+            return 0;
+    }
+}
+
+static bool isTensor(OperandType type) {
+    switch (type) {
+        case OperandType::FLOAT32:
+        case OperandType::INT32:
+        case OperandType::UINT32:
+        case OperandType::FLOAT16:
+        case OperandType::BOOL:
+        case OperandType::SUBGRAPH:
+            return false;
+        case OperandType::TENSOR_FLOAT32:
+        case OperandType::TENSOR_INT32:
+        case OperandType::TENSOR_QUANT16_SYMM:
+        case OperandType::TENSOR_FLOAT16:
+        case OperandType::TENSOR_QUANT16_ASYMM:
+        case OperandType::TENSOR_QUANT8_ASYMM:
+        case OperandType::TENSOR_BOOL8:
+        case OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
+        case OperandType::TENSOR_QUANT8_SYMM:
+        case OperandType::TENSOR_QUANT8_ASYMM_SIGNED:
+            return true;
+        default:
+            CHECK(false) << "Invalid OperandType " << static_cast<uint32_t>(type);
+            return false;
+    }
+}
+
+uint32_t sizeOfData(const Operand& operand) {
+    const uint32_t dataSize = sizeOfData(operand.type);
+    if (isTensor(operand.type) && operand.dimensions.size() == 0) return 0;
+    return std::accumulate(operand.dimensions.begin(), operand.dimensions.end(), dataSize,
+                           std::multiplies<>{});
+}
+
+std::unique_ptr<TestAshmem> TestAshmem::create(uint32_t size) {
+    auto ashmem = std::make_unique<TestAshmem>(size);
+    return ashmem->mIsValid ? std::move(ashmem) : nullptr;
+}
+
+void TestAshmem::initialize(uint32_t size) {
+    mIsValid = false;
+    ASSERT_GT(size, 0);
+    const auto sharedMemory = nn::createSharedMemory(size).value();
+    mMappedMemory = nn::map(sharedMemory).value();
+    mPtr = static_cast<uint8_t*>(std::get<void*>(mMappedMemory.pointer));
+    CHECK_NE(mPtr, nullptr);
+    mAidlMemory = utils::convert(sharedMemory).value();
+    mIsValid = true;
+}
+
+std::unique_ptr<TestBlobAHWB> TestBlobAHWB::create(uint32_t size) {
+    auto ahwb = std::make_unique<TestBlobAHWB>(size);
+    return ahwb->mIsValid ? std::move(ahwb) : nullptr;
+}
+
+void TestBlobAHWB::initialize(uint32_t size) {
+    mIsValid = false;
+    ASSERT_GT(size, 0);
+    const auto usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
+    const AHardwareBuffer_Desc desc = {
+            .width = size,
+            .height = 1,
+            .layers = 1,
+            .format = AHARDWAREBUFFER_FORMAT_BLOB,
+            .usage = usage,
+            .stride = size,
+    };
+
+    ASSERT_EQ(AHardwareBuffer_allocate(&desc, &mAhwb), 0);
+    ASSERT_NE(mAhwb, nullptr);
+
+    const auto sharedMemory = nn::createSharedMemoryFromAHWB(*mAhwb).value();
+    mMapping = nn::map(sharedMemory).value();
+    mPtr = static_cast<uint8_t*>(std::get<void*>(mMapping.pointer));
+    CHECK_NE(mPtr, nullptr);
+    mAidlMemory = utils::convert(sharedMemory).value();
+
+    mIsValid = true;
+}
+
+TestBlobAHWB::~TestBlobAHWB() {
+    if (mAhwb) {
+        AHardwareBuffer_unlock(mAhwb, nullptr);
+        AHardwareBuffer_release(mAhwb);
+    }
+}
+
+std::string gtestCompliantName(std::string name) {
+    // gtest test names must only contain alphanumeric characters
+    std::replace_if(
+            name.begin(), name.end(), [](char c) { return !std::isalnum(c); }, '_');
+    return name;
+}
+
+::std::ostream& operator<<(::std::ostream& os, ErrorStatus errorStatus) {
+    return os << toString(errorStatus);
+}
+
+Request ExecutionContext::createRequest(const TestModel& testModel, MemoryType memoryType) {
+    CHECK(memoryType == MemoryType::ASHMEM || memoryType == MemoryType::BLOB_AHWB);
+
+    // Model inputs.
+    std::vector<RequestArgument> inputs(testModel.main.inputIndexes.size());
+    size_t inputSize = 0;
+    for (uint32_t i = 0; i < testModel.main.inputIndexes.size(); i++) {
+        const auto& op = testModel.main.operands[testModel.main.inputIndexes[i]];
+        if (op.data.size() == 0) {
+            // Omitted input.
+            inputs[i] = {.hasNoValue = true};
+        } else {
+            DataLocation loc = {.poolIndex = kInputPoolIndex,
+                                .offset = static_cast<int64_t>(inputSize),
+                                .length = static_cast<int64_t>(op.data.size())};
+            inputSize += op.data.alignedSize();
+            inputs[i] = {.hasNoValue = false, .location = loc, .dimensions = {}};
+        }
+    }
+
+    // Model outputs.
+    std::vector<RequestArgument> outputs(testModel.main.outputIndexes.size());
+    size_t outputSize = 0;
+    for (uint32_t i = 0; i < testModel.main.outputIndexes.size(); i++) {
+        const auto& op = testModel.main.operands[testModel.main.outputIndexes[i]];
+
+        // In the case of zero-sized output, we should at least provide a one-byte buffer.
+        // This is because zero-sized tensors are only supported internally to the driver, or
+        // reported in output shapes. It is illegal for the client to pre-specify a zero-sized
+        // tensor as model output. Otherwise, we will have two semantic conflicts:
+        // - "Zero dimension" conflicts with "unspecified dimension".
+        // - "Omitted operand buffer" conflicts with "zero-sized operand buffer".
+        size_t bufferSize = std::max<size_t>(op.data.size(), 1);
+
+        DataLocation loc = {.poolIndex = kOutputPoolIndex,
+                            .offset = static_cast<int64_t>(outputSize),
+                            .length = static_cast<int64_t>(bufferSize)};
+        outputSize += op.data.size() == 0 ? TestBuffer::kAlignment : op.data.alignedSize();
+        outputs[i] = {.hasNoValue = false, .location = loc, .dimensions = {}};
+    }
+
+    // Allocate memory pools.
+    if (memoryType == MemoryType::ASHMEM) {
+        mInputMemory = TestAshmem::create(inputSize);
+        mOutputMemory = TestAshmem::create(outputSize);
+    } else {
+        mInputMemory = TestBlobAHWB::create(inputSize);
+        mOutputMemory = TestBlobAHWB::create(outputSize);
+    }
+    CHECK_NE(mInputMemory, nullptr);
+    CHECK_NE(mOutputMemory, nullptr);
+
+    auto copiedInputMemory = utils::clone(*mInputMemory->getAidlMemory());
+    CHECK(copiedInputMemory.has_value()) << copiedInputMemory.error().message;
+    auto copiedOutputMemory = utils::clone(*mOutputMemory->getAidlMemory());
+    CHECK(copiedOutputMemory.has_value()) << copiedOutputMemory.error().message;
+
+    std::vector<RequestMemoryPool> pools;
+    pools.push_back(RequestMemoryPool::make<RequestMemoryPool::Tag::pool>(
+            std::move(copiedInputMemory).value()));
+    pools.push_back(RequestMemoryPool::make<RequestMemoryPool::Tag::pool>(
+            std::move(copiedOutputMemory).value()));
+
+    // Copy input data to the memory pool.
+    uint8_t* inputPtr = mInputMemory->getPointer();
+    for (uint32_t i = 0; i < testModel.main.inputIndexes.size(); i++) {
+        const auto& op = testModel.main.operands[testModel.main.inputIndexes[i]];
+        if (op.data.size() > 0) {
+            const uint8_t* begin = op.data.get<uint8_t>();
+            const uint8_t* end = begin + op.data.size();
+            std::copy(begin, end, inputPtr + inputs[i].location.offset);
+        }
+    }
+
+    return {.inputs = std::move(inputs), .outputs = std::move(outputs), .pools = std::move(pools)};
+}
+
+std::vector<TestBuffer> ExecutionContext::getOutputBuffers(const Request& request) const {
+    // Copy out output results.
+    uint8_t* outputPtr = mOutputMemory->getPointer();
+    std::vector<TestBuffer> outputBuffers;
+    for (const auto& output : request.outputs) {
+        outputBuffers.emplace_back(output.location.length, outputPtr + output.location.offset);
+    }
+    return outputBuffers;
+}
+
+}  // namespace aidl::android::hardware::neuralnetworks
diff --git a/neuralnetworks/aidl/vts/functional/Utils.h b/neuralnetworks/aidl/vts/functional/Utils.h
new file mode 100644
index 0000000000..266301ca97
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/Utils.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_HARDWARE_NEURALNETWORKS_AIDL_UTILS_H
+#define ANDROID_HARDWARE_NEURALNETWORKS_AIDL_UTILS_H
+
+#include <android-base/logging.h>
+#include <android/hardware_buffer.h>
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <iosfwd>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <aidl/android/hardware/neuralnetworks/IDevice.h>
+#include <aidl/android/hardware/neuralnetworks/Memory.h>
+#include <aidl/android/hardware/neuralnetworks/Operand.h>
+#include <aidl/android/hardware/neuralnetworks/OperandType.h>
+#include <aidl/android/hardware/neuralnetworks/Priority.h>
+#include <aidl/android/hardware/neuralnetworks/Request.h>
+
+#include <TestHarness.h>
+#include <nnapi/SharedMemory.h>
+
+namespace aidl::android::hardware::neuralnetworks {
+
+namespace nn = ::android::nn;
+
+inline constexpr Priority kDefaultPriority = Priority::MEDIUM;
+
+inline constexpr Timing kNoTiming = {.timeOnDevice = -1, .timeInDriver = -1};
+inline constexpr int64_t kNoDeadline = -1;
+inline constexpr int64_t kOmittedTimeoutDuration = -1;
+inline constexpr int64_t kNoDuration = -1;
+inline const std::vector<uint8_t> kEmptyCacheToken(IDevice::BYTE_SIZE_OF_CACHE_TOKEN);
+
+// Returns the amount of space needed to store a value of the specified type.
+//
+// Aborts if the specified type is an extension type or OEM type.
+uint32_t sizeOfData(OperandType type);
+
+// Returns the amount of space needed to store a value of the dimensions and
+// type of this operand. For a non-extension, non-OEM tensor with unspecified
+// rank or at least one unspecified dimension, returns zero.
+//
+// Aborts if the specified type is an extension type or OEM type.
+uint32_t sizeOfData(const Operand& operand);
+
+// Convenience class to manage the lifetime of memory resources.
+class TestMemoryBase {
+    DISALLOW_COPY_AND_ASSIGN(TestMemoryBase);
+
+  public:
+    TestMemoryBase() = default;
+    virtual ~TestMemoryBase() = default;
+    uint8_t* getPointer() const { return mPtr; }
+    const Memory* getAidlMemory() const { return &mAidlMemory; }
+
+  protected:
+    uint8_t* mPtr = nullptr;
+    Memory mAidlMemory;
+    bool mIsValid = false;
+};
+
+class TestAshmem : public TestMemoryBase {
+  public:
+    static std::unique_ptr<TestAshmem> create(uint32_t size);
+
+    // Prefer TestAshmem::create.
+    // The constructor calls initialize, which constructs the memory resources. This is a workaround
+    // that gtest macros cannot be used directly in a constructor.
+    TestAshmem(uint32_t size) { initialize(size); }
+
+  private:
+    void initialize(uint32_t size);
+    nn::Mapping mMappedMemory;
+};
+
+class TestBlobAHWB : public TestMemoryBase {
+  public:
+    static std::unique_ptr<TestBlobAHWB> create(uint32_t size);
+
+    // Prefer TestBlobAHWB::create.
+    // The constructor calls initialize, which constructs the memory resources. This is a
+    // workaround that gtest macros cannot be used directly in a constructor.
+    TestBlobAHWB(uint32_t size) { initialize(size); }
+    ~TestBlobAHWB();
+
+  private:
+    void initialize(uint32_t size);
+    AHardwareBuffer* mAhwb = nullptr;
+    nn::Mapping mMapping;
+};
+
+enum class MemoryType { ASHMEM, BLOB_AHWB, DEVICE };
+
+// Manages the lifetime of memory resources used in an execution.
+class ExecutionContext {
+    DISALLOW_COPY_AND_ASSIGN(ExecutionContext);
+
+  public:
+    static constexpr uint32_t kInputPoolIndex = 0;
+    static constexpr uint32_t kOutputPoolIndex = 1;
+
+    ExecutionContext() = default;
+
+    // Create HIDL Request from the TestModel struct.
+    Request createRequest(const test_helper::TestModel& testModel,
+                          MemoryType memoryType = MemoryType::ASHMEM);
+
+    // After execution, copy out output results from the output memory pool.
+    std::vector<test_helper::TestBuffer> getOutputBuffers(const Request& request) const;
+
+  private:
+    std::unique_ptr<TestMemoryBase> mInputMemory, mOutputMemory;
+};
+
+template <typename Type>
+using Named = std::pair<std::string, Type>;
+
+template <typename Type>
+const std::string& getName(const Named<Type>& namedData) {
+    return namedData.first;
+}
+
+template <typename Type>
+const Type& getData(const Named<Type>& namedData) {
+    return namedData.second;
+}
+
+std::string gtestCompliantName(std::string name);
+
+// pretty-print values for error messages
+::std::ostream& operator<<(::std::ostream& os, ErrorStatus errorStatus);
+
+}  // namespace aidl::android::hardware::neuralnetworks
+
+#endif  // ANDROID_HARDWARE_NEURALNETWORKS_AIDL_UTILS_H
diff --git a/neuralnetworks/aidl/vts/functional/ValidateModel.cpp b/neuralnetworks/aidl/vts/functional/ValidateModel.cpp
new file mode 100644
index 0000000000..b84d981abd
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/ValidateModel.cpp
@@ -0,0 +1,1338 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "neuralnetworks_aidl_hal_test"
+
+#include <aidl/android/hardware/common/NativeHandle.h>
+#include <android/binder_auto_utils.h>
+#include <android/binder_enums.h>
+#include <android/binder_interface_utils.h>
+#include <nnapi/TypeUtils.h>
+#include <nnapi/hal/aidl/Conversions.h>
+#include <nnapi/hal/aidl/Utils.h>
+
+#include <optional>
+#include <type_traits>
+#include <utility>
+
+#include "Callbacks.h"
+#include "GeneratedTestHarness.h"
+#include "Utils.h"
+#include "VtsHalNeuralnetworks.h"
+
+namespace aidl::android::hardware::neuralnetworks::vts::functional {
+
+using common::NativeHandle;
+using implementation::PreparedModelCallback;
+
+using PrepareModelMutation = std::function<void(Model*, ExecutionPreference*, Priority*)>;
+
+///////////////////////// UTILITY FUNCTIONS /////////////////////////
+
+static void validateGetSupportedOperations(const std::shared_ptr<IDevice>& device,
+                                           const std::string& message, const Model& model) {
+    SCOPED_TRACE(message + " [getSupportedOperations]");
+
+    std::vector<bool> supported;
+    const auto retStatus = device->getSupportedOperations(model, &supported);
+
+    ASSERT_FALSE(retStatus.isOk());
+    ASSERT_EQ(retStatus.getExceptionCode(), EX_SERVICE_SPECIFIC);
+    ASSERT_EQ(static_cast<ErrorStatus>(retStatus.getServiceSpecificError()),
+              ErrorStatus::INVALID_ARGUMENT);
+}
+
+static void validatePrepareModel(const std::shared_ptr<IDevice>& device, const std::string& message,
+                                 const Model& model, ExecutionPreference preference,
+                                 Priority priority) {
+    SCOPED_TRACE(message + " [prepareModel]");
+
+    std::shared_ptr<PreparedModelCallback> preparedModelCallback =
+            ndk::SharedRefBase::make<PreparedModelCallback>();
+    const auto prepareLaunchStatus =
+            device->prepareModel(model, preference, priority, kNoDeadline, {}, {}, kEmptyCacheToken,
+                                 preparedModelCallback);
+    ASSERT_FALSE(prepareLaunchStatus.isOk());
+    ASSERT_EQ(prepareLaunchStatus.getExceptionCode(), EX_SERVICE_SPECIFIC);
+    ASSERT_EQ(static_cast<ErrorStatus>(prepareLaunchStatus.getServiceSpecificError()),
+              ErrorStatus::INVALID_ARGUMENT);
+
+    preparedModelCallback->wait();
+    ErrorStatus prepareReturnStatus = preparedModelCallback->getStatus();
+    ASSERT_EQ(ErrorStatus::INVALID_ARGUMENT, prepareReturnStatus);
+    std::shared_ptr<IPreparedModel> preparedModel = preparedModelCallback->getPreparedModel();
+    ASSERT_EQ(nullptr, preparedModel.get());
+}
+
+static bool validExecutionPreference(ExecutionPreference preference) {
+    return preference == ExecutionPreference::LOW_POWER ||
+           preference == ExecutionPreference::FAST_SINGLE_ANSWER ||
+           preference == ExecutionPreference::SUSTAINED_SPEED;
+}
+
+static bool validExecutionPriority(Priority priority) {
+    return priority == Priority::LOW || priority == Priority::MEDIUM || priority == Priority::HIGH;
+}
+
+// Primary validation function. This function will take a valid model, apply a
+// mutation to invalidate the model, the execution preference, or the priority,
+// then pass these to supportedOperations and/or prepareModel if that method is
+// called with an invalid argument.
+static void validate(const std::shared_ptr<IDevice>& device, const std::string& message,
+                     const Model& originalModel, const PrepareModelMutation& mutate) {
+    Model model = utils::clone(originalModel).value();
+    ExecutionPreference preference = ExecutionPreference::FAST_SINGLE_ANSWER;
+    Priority priority = kDefaultPriority;
+    mutate(&model, &preference, &priority);
+
+    if (validExecutionPreference(preference) && validExecutionPriority(priority)) {
+        validateGetSupportedOperations(device, message, model);
+    }
+
+    validatePrepareModel(device, message, model, preference, priority);
+}
+
+static uint32_t addOperand(Model* model) {
+    model->main.operands.push_back({
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::SUBGRAPH_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+    });
+    return model->main.operands.size() - 1;
+}
+
+static uint32_t addOperand(Model* model, OperandLifeTime lifetime) {
+    uint32_t index = addOperand(model);
+    model->main.operands[index].lifetime = lifetime;
+    return index;
+}
+
+// If we introduce a CONSTANT_COPY for an operand of size operandSize,
+// how much will this increase the size of the model?  This assumes
+// that we can (re)use all of model.operandValues for the operand
+// value.
+static size_t constantCopyExtraSize(const Model& model, size_t operandSize) {
+    const size_t operandValuesSize = model.operandValues.size();
+    return (operandValuesSize < operandSize) ? (operandSize - operandValuesSize) : 0;
+}
+
+// Highly specialized utility routine for converting an operand to
+// CONSTANT_COPY lifetime.
+//
+// Expects that:
+// - operand has a known size
+// - operand->lifetime has already been set to CONSTANT_COPY
+// - operand->location has been zeroed out
+//
+// Does the following:
+// - initializes operand->location to point to the beginning of model->operandValues
+// - resizes model->operandValues (if necessary) to be large enough for the operand
+//   value, padding it with zeroes on the end
+//
+// Potential problem:
+// By changing the operand to CONSTANT_COPY lifetime, this function is effectively initializing the
+// operand with unspecified (but deterministic) data. This means that the model may be invalidated
+// in two ways: not only is the lifetime of CONSTANT_COPY invalid, but the operand's value in the
+// graph may also be invalid (e.g., if the operand is used as an activation code and has an invalid
+// value). For now, this should be fine because it just means we're not testing what we think we're
+// testing in certain cases; but we can handwave this and assume we're probabilistically likely to
+// exercise the validation code over the span of the entire test set and operand space.
+//
+// Aborts if the specified operand type is an extension type or OEM type.
+static void becomeConstantCopy(Model* model, Operand* operand) {
+    // sizeOfData will abort if the specified type is an extension type or OEM type.
+    const size_t sizeOfOperand = sizeOfData(*operand);
+    EXPECT_NE(sizeOfOperand, size_t(0));
+    operand->location.poolIndex = 0;
+    operand->location.offset = 0;
+    operand->location.length = sizeOfOperand;
+    if (model->operandValues.size() < sizeOfOperand) {
+        model->operandValues.resize(sizeOfOperand);
+    }
+}
+
+// The sizeForBinder() functions estimate the size of the
+// representation of a value when sent to binder.  It's probably a bit
+// of an under-estimate, because we don't know the size of the
+// metadata in the binder format (e.g., representation of the size of
+// a vector); but at least it adds up "big" things like vector
+// contents.  However, it doesn't treat inter-field or end-of-struct
+// padding in a methodical way -- there's no attempt to be consistent
+// in whether or not padding in the native (C++) representation
+// contributes to the estimated size for the binder representation;
+// and there's no attempt to understand what padding (if any) is
+// needed in the binder representation.
+//
+// This assumes that non-metadata uses a fixed length encoding (e.g.,
+// a uint32_t is always encoded in sizeof(uint32_t) bytes, rather than
+// using an encoding whose length is related to the magnitude of the
+// encoded value).
+
+template <typename Type>
+static size_t sizeForBinder(const Type& val) {
+    static_assert(std::is_trivially_copyable_v<std::remove_reference_t<Type>>,
+                  "expected a trivially copyable type");
+    return sizeof(val);
+}
+
+template <typename Type>
+static size_t sizeForBinder(const std::vector<Type>& vec) {
+    return std::accumulate(vec.begin(), vec.end(), 0,
+                           [](size_t acc, const Type& x) { return acc + sizeForBinder(x); });
+}
+
+template <>
+size_t sizeForBinder(const SymmPerChannelQuantParams& symmPerChannelQuantParams) {
+    size_t size = 0;
+
+    size += sizeForBinder(symmPerChannelQuantParams.scales);
+    size += sizeForBinder(symmPerChannelQuantParams.channelDim);
+
+    return size;
+}
+
+template <>
+size_t sizeForBinder(const std::optional<OperandExtraParams>& optionalExtraParams) {
+    if (!optionalExtraParams.has_value()) {
+        return 0;
+    }
+    const auto& extraParams = optionalExtraParams.value();
+    using Tag = OperandExtraParams::Tag;
+    switch (extraParams.getTag()) {
+        case Tag::channelQuant:
+            return sizeForBinder(extraParams.get<Tag::channelQuant>());
+        case Tag::extension:
+            return sizeForBinder(extraParams.get<Tag::extension>());
+    }
+    LOG(FATAL) << "Unrecognized extraParams tag: " << static_cast<int>(extraParams.getTag());
+    return 0;
+}
+
+template <>
+size_t sizeForBinder(const Operand& operand) {
+    size_t size = 0;
+
+    size += sizeForBinder(operand.type);
+    size += sizeForBinder(operand.dimensions);
+    size += sizeForBinder(operand.scale);
+    size += sizeForBinder(operand.zeroPoint);
+    size += sizeForBinder(operand.lifetime);
+    size += sizeForBinder(operand.location);
+    size += sizeForBinder(operand.extraParams);
+
+    return size;
+}
+
+template <>
+size_t sizeForBinder(const Operation& operation) {
+    size_t size = 0;
+
+    size += sizeForBinder(operation.type);
+    size += sizeForBinder(operation.inputs);
+    size += sizeForBinder(operation.outputs);
+
+    return size;
+}
+
+template <>
+size_t sizeForBinder(const std::string& name) {
+    return name.size();
+}
+
+template <>
+size_t sizeForBinder(const Memory& memory) {
+    // This is just a guess.
+
+    size_t size = 0;
+    const NativeHandle& handle = memory.handle;
+    size += sizeof(decltype(handle.fds)::value_type) * handle.fds.size();
+    size += sizeof(decltype(handle.ints)::value_type) * handle.ints.size();
+    size += sizeForBinder(memory.name);
+    size += sizeof(memory);
+
+    return size;
+}
+
+template <>
+size_t sizeForBinder(const Subgraph& subgraph) {
+    size_t size = 0;
+
+    size += sizeForBinder(subgraph.operands);
+    size += sizeForBinder(subgraph.operations);
+    size += sizeForBinder(subgraph.inputIndexes);
+    size += sizeForBinder(subgraph.outputIndexes);
+
+    return size;
+}
+
+template <>
+size_t sizeForBinder(const ExtensionNameAndPrefix& extensionNameToPrefix) {
+    size_t size = 0;
+
+    size += sizeForBinder(extensionNameToPrefix.name);
+    size += sizeForBinder(extensionNameToPrefix.prefix);
+
+    return size;
+}
+
+template <>
+size_t sizeForBinder(const Model& model) {
+    size_t size = 0;
+
+    size += sizeForBinder(model.main);
+    size += sizeForBinder(model.referenced);
+    size += sizeForBinder(model.operandValues);
+    size += sizeForBinder(model.pools);
+    size += sizeForBinder(model.relaxComputationFloat32toFloat16);
+    size += sizeForBinder(model.extensionNameToPrefix);
+
+    return size;
+}
+
+// https://developer.android.com/reference/android/os/TransactionTooLargeException.html
+//
+//     "The Binder transaction buffer has a limited fixed size,
+//     currently 1Mb, which is shared by all transactions in progress
+//     for the process."
+//
+// Will our representation fit under this limit?  There are two complications:
+// - Our representation size is just approximate (see sizeForBinder()).
+// - This object may not be the only occupant of the Binder transaction buffer.
+// So we'll be very conservative: We want the representation size to be no
+// larger than half the transaction buffer size.
+//
+// If our representation grows large enough that it still fits within
+// the transaction buffer but combined with other transactions may
+// exceed the buffer size, then we may see intermittent HAL transport
+// errors.
+static bool exceedsBinderSizeLimit(size_t representationSize) {
+    // Instead of using this fixed buffer size, we might instead be able to use
+    // ProcessState::self()->getMmapSize(). However, this has a potential
+    // problem: The binder/mmap size of the current process does not necessarily
+    // indicate the binder/mmap size of the service (i.e., the other process).
+    // The only way it would be a good indication is if both the current process
+    // and the service use the default size.
+    static const size_t kHalfBufferSize = 1024 * 1024 / 2;
+
+    return representationSize > kHalfBufferSize;
+}
+
+///////////////////////// VALIDATE EXECUTION ORDER ////////////////////////////
+
+static void mutateExecutionOrderTest(const std::shared_ptr<IDevice>& device, const Model& model,
+                                     const std::vector<uint32_t>& numberOfConsumers) {
+    for (size_t operation = 0; operation < model.main.operations.size(); ++operation) {
+        const Operation& operationObj = model.main.operations[operation];
+        for (uint32_t input : operationObj.inputs) {
+            if (model.main.operands[input].lifetime == OperandLifeTime::TEMPORARY_VARIABLE ||
+                model.main.operands[input].lifetime == OperandLifeTime::SUBGRAPH_OUTPUT) {
+                // This operation reads an operand written by some
+                // other operation.  Move this operation to the
+                // beginning of the sequence, ensuring that it reads
+                // the operand before that operand is written, thereby
+                // violating execution order rules.
+                const std::string message = "mutateExecutionOrderTest: operation " +
+                                            std::to_string(operation) + " is a reader";
+                validate(device, message, model,
+                         [operation](Model* model, ExecutionPreference*, Priority*) {
+                             auto& operations = model->main.operations;
+                             std::rotate(operations.begin(), operations.begin() + operation,
+                                         operations.begin() + operation + 1);
+                         });
+                break;  // only need to do this once per operation
+            }
+        }
+        for (uint32_t output : operationObj.outputs) {
+            if (numberOfConsumers[output] > 0) {
+                // This operation writes an operand read by some other
+                // operation.  Move this operation to the end of the
+                // sequence, ensuring that it writes the operand after
+                // that operand is read, thereby violating execution
+                // order rules.
+                const std::string message = "mutateExecutionOrderTest: operation " +
+                                            std::to_string(operation) + " is a writer";
+                validate(device, message, model,
+                         [operation](Model* model, ExecutionPreference*, Priority*) {
+                             auto& operations = model->main.operations;
+                             std::rotate(operations.begin() + operation,
+                                         operations.begin() + operation + 1, operations.end());
+                         });
+                break;  // only need to do this once per operation
+            }
+        }
+    }
+}
+
+///////////////////////// VALIDATE MODEL OPERAND TYPE /////////////////////////
+
+static const int32_t invalidOperandTypes[] = {
+        -1,
+        static_cast<int32_t>(*(ndk::enum_range<OperandType>().end() - 1)) + 1,
+};
+
+static void mutateOperandTypeTest(const std::shared_ptr<IDevice>& device, const Model& model) {
+    for (size_t operand = 0; operand < model.main.operands.size(); ++operand) {
+        for (int32_t invalidOperandType : invalidOperandTypes) {
+            const std::string message = "mutateOperandTypeTest: operand " +
+                                        std::to_string(operand) + " set to value " +
+                                        std::to_string(invalidOperandType);
+            validate(device, message, model,
+                     [operand, invalidOperandType](Model* model, ExecutionPreference*, Priority*) {
+                         model->main.operands[operand].type =
+                                 static_cast<OperandType>(invalidOperandType);
+                     });
+        }
+    }
+}
+
+///////////////////////// VALIDATE OPERAND RANK /////////////////////////
+
+static uint32_t getInvalidRank(OperandType type) {
+    switch (type) {
+        case OperandType::FLOAT16:
+        case OperandType::FLOAT32:
+        case OperandType::INT32:
+        case OperandType::UINT32:
+        case OperandType::BOOL:
+            return 1;
+        case OperandType::TENSOR_BOOL8:
+        case OperandType::TENSOR_FLOAT16:
+        case OperandType::TENSOR_FLOAT32:
+        case OperandType::TENSOR_INT32:
+        case OperandType::TENSOR_QUANT8_ASYMM:
+        case OperandType::TENSOR_QUANT8_SYMM:
+        case OperandType::TENSOR_QUANT16_ASYMM:
+        case OperandType::TENSOR_QUANT16_SYMM:
+        case OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
+            return 0;
+        default:
+            return 0;
+    }
+}
+
+static void mutateOperandRankTest(const std::shared_ptr<IDevice>& device, const Model& model) {
+    for (size_t operand = 0; operand < model.main.operands.size(); ++operand) {
+        const uint32_t invalidRank = getInvalidRank(model.main.operands[operand].type);
+        if (invalidRank == 0) {
+            continue;
+        }
+        const std::string message = "mutateOperandRankTest: operand " + std::to_string(operand) +
+                                    " has rank of " + std::to_string(invalidRank);
+        validate(device, message, model,
+                 [operand, invalidRank](Model* model, ExecutionPreference*, Priority*) {
+                     model->main.operands[operand].dimensions =
+                             std::vector<int32_t>(invalidRank, 0);
+                 });
+    }
+}
+
+///////////////////////// VALIDATE OPERAND SCALE /////////////////////////
+
+static float getInvalidScale(OperandType type) {
+    switch (type) {
+        case OperandType::FLOAT16:
+        case OperandType::FLOAT32:
+        case OperandType::INT32:
+        case OperandType::UINT32:
+        case OperandType::BOOL:
+        case OperandType::TENSOR_BOOL8:
+        case OperandType::TENSOR_FLOAT16:
+        case OperandType::TENSOR_FLOAT32:
+        case OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
+        case OperandType::SUBGRAPH:
+            return 1.0f;
+        case OperandType::TENSOR_INT32:
+            return -1.0f;
+        case OperandType::TENSOR_QUANT8_SYMM:
+        case OperandType::TENSOR_QUANT8_ASYMM:
+        case OperandType::TENSOR_QUANT16_ASYMM:
+        case OperandType::TENSOR_QUANT16_SYMM:
+            return 0.0f;
+        default:
+            return 0.0f;
+    }
+}
+
+static void mutateOperandScaleTest(const std::shared_ptr<IDevice>& device, const Model& model) {
+    for (size_t operand = 0; operand < model.main.operands.size(); ++operand) {
+        const float invalidScale = getInvalidScale(model.main.operands[operand].type);
+        const std::string message = "mutateOperandScaleTest: operand " + std::to_string(operand) +
+                                    " has scale of " + std::to_string(invalidScale);
+        validate(device, message, model,
+                 [operand, invalidScale](Model* model, ExecutionPreference*, Priority*) {
+                     model->main.operands[operand].scale = invalidScale;
+                 });
+    }
+}
+
+///////////////////////// VALIDATE OPERAND ZERO POINT /////////////////////////
+
+static std::vector<int32_t> getInvalidZeroPoints(OperandType type) {
+    switch (type) {
+        case OperandType::FLOAT16:
+        case OperandType::FLOAT32:
+        case OperandType::INT32:
+        case OperandType::UINT32:
+        case OperandType::BOOL:
+        case OperandType::TENSOR_BOOL8:
+        case OperandType::TENSOR_FLOAT16:
+        case OperandType::TENSOR_FLOAT32:
+        case OperandType::TENSOR_INT32:
+        case OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
+        case OperandType::SUBGRAPH:
+            return {1};
+        case OperandType::TENSOR_QUANT8_ASYMM:
+            return {-1, 256};
+        case OperandType::TENSOR_QUANT8_SYMM:
+            return {-129, -1, 1, 128};
+        case OperandType::TENSOR_QUANT16_ASYMM:
+            return {-1, 65536};
+        case OperandType::TENSOR_QUANT16_SYMM:
+            return {-32769, -1, 1, 32768};
+        default:
+            return {};
+    }
+}
+
+static void mutateOperandZeroPointTest(const std::shared_ptr<IDevice>& device, const Model& model) {
+    for (size_t operand = 0; operand < model.main.operands.size(); ++operand) {
+        const std::vector<int32_t> invalidZeroPoints =
+                getInvalidZeroPoints(model.main.operands[operand].type);
+        for (int32_t invalidZeroPoint : invalidZeroPoints) {
+            const std::string message = "mutateOperandZeroPointTest: operand " +
+                                        std::to_string(operand) + " has zero point of " +
+                                        std::to_string(invalidZeroPoint);
+            validate(device, message, model,
+                     [operand, invalidZeroPoint](Model* model, ExecutionPreference*, Priority*) {
+                         model->main.operands[operand].zeroPoint = invalidZeroPoint;
+                     });
+        }
+    }
+}
+
+///////////////////////// VALIDATE OPERAND LIFETIME /////////////////////////////////////////////
+
+static std::vector<OperandLifeTime> getInvalidLifeTimes(const Model& model, size_t modelSize,
+                                                        const Operand& operand) {
+    // TODO: Support OperandLifeTime::CONSTANT_REFERENCE as an invalid lifetime
+    // TODO: Support OperandLifeTime::NO_VALUE as an invalid lifetime
+
+    // Ways to get an invalid lifetime:
+    // - change whether a lifetime means an operand should have a writer
+    std::vector<OperandLifeTime> ret;
+    switch (operand.lifetime) {
+        case OperandLifeTime::SUBGRAPH_OUTPUT:
+        case OperandLifeTime::TEMPORARY_VARIABLE:
+            ret = {
+                    OperandLifeTime::SUBGRAPH_INPUT,
+                    OperandLifeTime::CONSTANT_COPY,
+            };
+            break;
+        case OperandLifeTime::CONSTANT_COPY:
+        case OperandLifeTime::CONSTANT_POOL:
+        case OperandLifeTime::SUBGRAPH_INPUT:
+            ret = {
+                    OperandLifeTime::TEMPORARY_VARIABLE,
+                    OperandLifeTime::SUBGRAPH_OUTPUT,
+            };
+            break;
+        case OperandLifeTime::NO_VALUE:
+            // Not enough information to know whether
+            // TEMPORARY_VARIABLE or CONSTANT_COPY would be invalid --
+            // is this operand written (then CONSTANT_COPY would be
+            // invalid) or not (then TEMPORARY_VARIABLE would be
+            // invalid)?
+            break;
+        case OperandLifeTime::SUBGRAPH:
+            break;
+        default:
+            ADD_FAILURE();
+            break;
+    }
+
+    const size_t operandSize = sizeOfData(operand);  // will be zero if shape is unknown
+    if (!operandSize ||
+        exceedsBinderSizeLimit(modelSize + constantCopyExtraSize(model, operandSize))) {
+        // Unknown size or too-large size
+        ret.erase(std::remove(ret.begin(), ret.end(), OperandLifeTime::CONSTANT_COPY), ret.end());
+    }
+
+    return ret;
+}
+
+static void mutateOperandLifeTimeTest(const std::shared_ptr<IDevice>& device, const Model& model) {
+    const size_t modelSize = sizeForBinder(model);
+    for (size_t operand = 0; operand < model.main.operands.size(); ++operand) {
+        const std::vector<OperandLifeTime> invalidLifeTimes =
+                getInvalidLifeTimes(model, modelSize, model.main.operands[operand]);
+        for (OperandLifeTime invalidLifeTime : invalidLifeTimes) {
+            const std::string message = "mutateOperandLifetimeTest: operand " +
+                                        std::to_string(operand) + " has lifetime " +
+                                        toString(invalidLifeTime) + " instead of lifetime " +
+                                        toString(model.main.operands[operand].lifetime);
+            validate(device, message, model,
+                     [operand, invalidLifeTime](Model* model, ExecutionPreference*, Priority*) {
+                         static const DataLocation kZeroDataLocation = {};
+                         Operand& operandObj = model->main.operands[operand];
+                         switch (operandObj.lifetime) {
+                             case OperandLifeTime::SUBGRAPH_INPUT: {
+                                 auto& inputs = model->main.inputIndexes;
+                                 inputs.erase(std::remove(inputs.begin(), inputs.end(), operand),
+                                              inputs.end());
+                                 break;
+                             }
+                             case OperandLifeTime::SUBGRAPH_OUTPUT: {
+                                 auto& outputs = model->main.outputIndexes;
+                                 outputs.erase(std::remove(outputs.begin(), outputs.end(), operand),
+                                               outputs.end());
+                                 break;
+                             }
+                             default:
+                                 break;
+                         }
+                         operandObj.lifetime = invalidLifeTime;
+                         operandObj.location = kZeroDataLocation;
+                         switch (invalidLifeTime) {
+                             case OperandLifeTime::CONSTANT_COPY: {
+                                 becomeConstantCopy(model, &operandObj);
+                                 break;
+                             }
+                             case OperandLifeTime::SUBGRAPH_INPUT:
+                                 model->main.inputIndexes.push_back(operand);
+                                 break;
+                             case OperandLifeTime::SUBGRAPH_OUTPUT:
+                                 model->main.outputIndexes.push_back(operand);
+                                 break;
+                             default:
+                                 break;
+                         }
+                     });
+        }
+    }
+}
+
+///////////////////////// VALIDATE OPERAND INPUT-or-OUTPUT //////////////////////////////////////
+
+static std::optional<OperandLifeTime> getInputOutputLifeTime(const Model& model, size_t modelSize,
+                                                             const Operand& operand) {
+    // Ways to get an invalid lifetime (with respect to model inputIndexes and outputIndexes):
+    // - change whether a lifetime means an operand is a model input, a model output, or neither
+    // - preserve whether or not a lifetime means an operand should have a writer
+    switch (operand.lifetime) {
+        case OperandLifeTime::CONSTANT_COPY:
+        case OperandLifeTime::CONSTANT_POOL:
+            return OperandLifeTime::SUBGRAPH_INPUT;
+        case OperandLifeTime::SUBGRAPH_INPUT: {
+            const size_t operandSize = sizeOfData(operand);  // will be zero if shape is unknown
+            if (!operandSize ||
+                exceedsBinderSizeLimit(modelSize + constantCopyExtraSize(model, operandSize))) {
+                // Unknown size or too-large size
+                break;
+            }
+            return OperandLifeTime::CONSTANT_COPY;
+        }
+        case OperandLifeTime::SUBGRAPH_OUTPUT:
+            return OperandLifeTime::TEMPORARY_VARIABLE;
+        case OperandLifeTime::TEMPORARY_VARIABLE:
+            return OperandLifeTime::SUBGRAPH_OUTPUT;
+        case OperandLifeTime::NO_VALUE:
+            // Not enough information to know whether
+            // TEMPORARY_VARIABLE or CONSTANT_COPY would be an
+            // appropriate choice -- is this operand written (then
+            // TEMPORARY_VARIABLE would be appropriate) or not (then
+            // CONSTANT_COPY would be appropriate)?
+            break;
+        case OperandLifeTime::SUBGRAPH:
+            break;
+        default:
+            ADD_FAILURE();
+            break;
+    }
+
+    return std::nullopt;
+}
+
+static void mutateOperandInputOutputTest(const std::shared_ptr<IDevice>& device,
+                                         const Model& model) {
+    const size_t modelSize = sizeForBinder(model);
+    for (size_t operand = 0; operand < model.main.operands.size(); ++operand) {
+        const std::optional<OperandLifeTime> changedLifeTime =
+                getInputOutputLifeTime(model, modelSize, model.main.operands[operand]);
+        if (changedLifeTime) {
+            const std::string message = "mutateOperandInputOutputTest: operand " +
+                                        std::to_string(operand) + " has lifetime " +
+                                        toString(*changedLifeTime) + " instead of lifetime " +
+                                        toString(model.main.operands[operand].lifetime);
+            validate(device, message, model,
+                     [operand, changedLifeTime](Model* model, ExecutionPreference*, Priority*) {
+                         static const DataLocation kZeroDataLocation = {};
+                         Operand& operandObj = model->main.operands[operand];
+                         operandObj.lifetime = *changedLifeTime;
+                         operandObj.location = kZeroDataLocation;
+                         if (*changedLifeTime == OperandLifeTime::CONSTANT_COPY) {
+                             becomeConstantCopy(model, &operandObj);
+                         }
+                     });
+        }
+    }
+}
+
+///////////////////////// VALIDATE OPERAND NUMBER OF WRITERS ////////////////////////////////////
+
+static void mutateOperandAddWriterTest(const std::shared_ptr<IDevice>& device, const Model& model) {
+    for (size_t operation = 0; operation < model.main.operations.size(); ++operation) {
+        for (size_t badOutputNum = 0;
+             badOutputNum < model.main.operations[operation].outputs.size(); ++badOutputNum) {
+            const uint32_t outputOperandIndex =
+                    model.main.operations[operation].outputs[badOutputNum];
+            const std::string message = "mutateOperandAddWriterTest: operation " +
+                                        std::to_string(operation) + " writes to " +
+                                        std::to_string(outputOperandIndex);
+            // We'll insert a copy of the operation, all of whose
+            // OTHER output operands are newly-created -- i.e.,
+            // there'll only be a duplicate write of ONE of that
+            // operation's output operands.
+            validate(device, message, model,
+                     [operation, badOutputNum](Model* model, ExecutionPreference*, Priority*) {
+                         Operation newOperation = model->main.operations[operation];
+                         for (size_t outputNum = 0; outputNum < newOperation.outputs.size();
+                              ++outputNum) {
+                             if (outputNum == badOutputNum) continue;
+
+                             Operand operandValue =
+                                     model->main.operands[newOperation.outputs[outputNum]];
+                             if (operandValue.lifetime == OperandLifeTime::SUBGRAPH_OUTPUT) {
+                                 operandValue.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
+                             } else {
+                                 ASSERT_EQ(operandValue.lifetime,
+                                           OperandLifeTime::TEMPORARY_VARIABLE);
+                             }
+                             newOperation.outputs[outputNum] = model->main.operands.size();
+                             model->main.operands.push_back(operandValue);
+                         }
+                         // Where do we insert the extra writer (a new
+                         // operation)?  It has to be later than all the
+                         // writers of its inputs.  The easiest thing to do
+                         // is to insert it at the end of the operation
+                         // sequence.
+                         model->main.operations.push_back(newOperation);
+                     });
+        }
+    }
+}
+
+///////////////////////// VALIDATE EXTRA ??? /////////////////////////
+
+// TODO: Operand::location
+
+///////////////////////// VALIDATE OPERATION OPERAND TYPE /////////////////////////
+
+static void mutateOperand(Operand* operand, OperandType type) {
+    Operand newOperand = *operand;
+    newOperand.type = type;
+    switch (type) {
+        case OperandType::FLOAT16:
+        case OperandType::FLOAT32:
+        case OperandType::INT32:
+        case OperandType::UINT32:
+        case OperandType::BOOL:
+            newOperand.dimensions = {};
+            newOperand.scale = 0.0f;
+            newOperand.zeroPoint = 0;
+            break;
+        case OperandType::TENSOR_BOOL8:
+        case OperandType::TENSOR_FLOAT16:
+        case OperandType::TENSOR_FLOAT32:
+            newOperand.dimensions = operand->dimensions.size() > 0 ? operand->dimensions
+                                                                   : std::vector<int32_t>({1});
+            newOperand.scale = 0.0f;
+            newOperand.zeroPoint = 0;
+            break;
+        case OperandType::TENSOR_INT32:
+            newOperand.dimensions = operand->dimensions.size() > 0 ? operand->dimensions
+                                                                   : std::vector<int32_t>({1});
+            newOperand.zeroPoint = 0;
+            break;
+        case OperandType::TENSOR_QUANT8_ASYMM:
+        case OperandType::TENSOR_QUANT8_SYMM:
+        case OperandType::TENSOR_QUANT16_ASYMM:
+        case OperandType::TENSOR_QUANT16_SYMM:
+            newOperand.dimensions = operand->dimensions.size() > 0 ? operand->dimensions
+                                                                   : std::vector<int32_t>({1});
+            newOperand.scale = operand->scale != 0.0f ? operand->scale : 1.0f;
+            break;
+        case OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL: {
+            newOperand.dimensions = operand->dimensions.size() > 0 ? operand->dimensions
+                                                                   : std::vector<int32_t>({1});
+            newOperand.scale = 0.0f;
+            newOperand.zeroPoint = 0;
+
+            SymmPerChannelQuantParams channelQuant;
+            channelQuant.channelDim = 0;
+            channelQuant.scales = std::vector<float>(
+                    operand->dimensions.size() > 0 ? static_cast<size_t>(operand->dimensions[0])
+                                                   : 0);
+            for (size_t i = 0; i < channelQuant.scales.size(); ++i) {
+                channelQuant.scales[i] = 1.0f;
+            }
+            newOperand.extraParams->set<OperandExtraParams::Tag::channelQuant>(
+                    std::move(channelQuant));
+        } break;
+        default:
+            break;
+    }
+    *operand = newOperand;
+}
+
+static bool mutateOperationOperandTypeSkip(size_t operand, OperandType type, const Model& model) {
+    if (type == model.main.operands[operand].type) {
+        return true;
+    }
+    for (const Operation& operation : model.main.operations) {
+        // Skip mutateOperationOperandTypeTest for the following operations.
+        // - LSH_PROJECTION's second argument is allowed to have any type.
+        // - ARGMIN and ARGMAX's first argument can be any of
+        // TENSOR_(FLOAT16|FLOAT32|INT32|QUANT8_ASYMM).
+        // - CAST's argument can be any of TENSOR_(FLOAT16|FLOAT32|INT32|QUANT8_ASYMM).
+        // - RANDOM_MULTINOMIAL's argument can be either TENSOR_FLOAT16 or TENSOR_FLOAT32.
+        // - DEQUANTIZE input can be any of
+        // TENSOR_(QUANT8_ASYMM|QUANT8_ASYMM_SIGNED|QUANT8_SYMM|QUANT8_SYMM_PER_CHANNEL),
+        // output can be of either TENSOR_FLOAT16 or TENSOR_FLOAT32.
+        // - QUANTIZE input can be either TENSOR_FLOAT16 or TENSOR_FLOAT32
+        // - CONV_2D filter type (arg 1) can be QUANT8_ASYMM or QUANT8_SYMM_PER_CHANNEL
+        // - DEPTHWISE_CONV_2D filter type (arg 1) can be QUANT8_ASYMM or QUANT8_SYMM_PER_CHANNEL
+        // - GROUPED_CONV_2D filter type (arg 1) can be QUANT8_ASYMM or QUANT8_SYMM_PER_CHANNEL
+        // - TRANSPOSE_CONV_2D filter type (arg 1) can be QUANT8_ASYMM or QUANT8_SYMM_PER_CHANNEL
+        // - AXIS_ALIGNED_BBOX_TRANSFORM bounding boxes (arg 1) can be of
+        //     TENSOR_QUANT8_ASYMM or TENSOR_QUANT8_ASYMM_SIGNED.
+        // - RANK's input can have any TENSOR_* type.
+        switch (operation.type) {
+            case OperationType::LSH_PROJECTION: {
+                if (operand == operation.inputs[1]) {
+                    return true;
+                }
+            } break;
+            case OperationType::CAST:
+            case OperationType::ARGMAX:
+            case OperationType::ARGMIN: {
+                if (type == OperandType::TENSOR_FLOAT16 || type == OperandType::TENSOR_FLOAT32 ||
+                    type == OperandType::TENSOR_INT32 || type == OperandType::TENSOR_QUANT8_ASYMM ||
+                    type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
+                    return true;
+                }
+            } break;
+            case OperationType::QUANTIZE: {
+                if (operand == operation.inputs[0] &&
+                    (type == OperandType::TENSOR_FLOAT16 || type == OperandType::TENSOR_FLOAT32)) {
+                    return true;
+                }
+                if (operand == operation.outputs[0] &&
+                    (type == OperandType::TENSOR_QUANT8_ASYMM ||
+                     type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED)) {
+                    return true;
+                }
+            } break;
+            case OperationType::RANDOM_MULTINOMIAL: {
+                if (operand == operation.inputs[0] &&
+                    (type == OperandType::TENSOR_FLOAT16 || type == OperandType::TENSOR_FLOAT32)) {
+                    return true;
+                }
+            } break;
+            case OperationType::DEQUANTIZE: {
+                if (operand == operation.inputs[0] &&
+                    (type == OperandType::TENSOR_QUANT8_ASYMM ||
+                     type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED ||
+                     type == OperandType::TENSOR_QUANT8_SYMM ||
+                     type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL)) {
+                    return true;
+                }
+                if (operand == operation.outputs[0] &&
+                    (type == OperandType::TENSOR_FLOAT16 || type == OperandType::TENSOR_FLOAT32)) {
+                    return true;
+                }
+            } break;
+            case OperationType::TRANSPOSE_CONV_2D:
+            case OperationType::GROUPED_CONV_2D:
+            case OperationType::DEPTHWISE_CONV_2D:
+            case OperationType::CONV_2D: {
+                if (operand == operation.inputs[1] &&
+                    (type == OperandType::TENSOR_QUANT8_ASYMM ||
+                     type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL)) {
+                    return true;
+                }
+            } break;
+            case OperationType::AXIS_ALIGNED_BBOX_TRANSFORM: {
+                if (operand == operation.inputs[1] &&
+                    (type == OperandType::TENSOR_QUANT8_ASYMM ||
+                     type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED)) {
+                    return true;
+                }
+            } break;
+            case OperationType::RANK: {
+                if (operand == operation.inputs[0] &&
+                    (type == OperandType::TENSOR_FLOAT16 || type == OperandType::TENSOR_FLOAT32 ||
+                     type == OperandType::TENSOR_INT32 ||
+                     type == OperandType::TENSOR_QUANT8_ASYMM ||
+                     type == OperandType::TENSOR_QUANT16_SYMM ||
+                     type == OperandType::TENSOR_BOOL8 ||
+                     type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL ||
+                     type == OperandType::TENSOR_QUANT16_ASYMM ||
+                     type == OperandType::TENSOR_QUANT8_SYMM ||
+                     type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED)) {
+                    return true;
+                }
+            } break;
+            default:
+                break;
+        }
+    }
+    return false;
+}
+
+static void mutateOperationOperandTypeTest(const std::shared_ptr<IDevice>& device,
+                                           const Model& model) {
+    for (size_t operand = 0; operand < model.main.operands.size(); ++operand) {
+        for (OperandType invalidOperandType : ndk::enum_range<OperandType>()) {
+            if (mutateOperationOperandTypeSkip(operand, invalidOperandType, model)) {
+                continue;
+            }
+            const std::string message = "mutateOperationOperandTypeTest: operand " +
+                                        std::to_string(operand) + " set to type " +
+                                        toString(invalidOperandType);
+            validate(device, message, model,
+                     [operand, invalidOperandType](Model* model, ExecutionPreference*, Priority*) {
+                         mutateOperand(&model->main.operands[operand], invalidOperandType);
+                     });
+        }
+    }
+}
+
+///////////////////////// VALIDATE MODEL OPERATION TYPE /////////////////////////
+
+static const int32_t invalidOperationTypes[] = {
+        -1,
+        static_cast<int32_t>(*(ndk::enum_range<OperationType>().end() - 1)) + 1,
+};
+
+static void mutateOperationTypeTest(const std::shared_ptr<IDevice>& device, const Model& model) {
+    for (size_t operation = 0; operation < model.main.operations.size(); ++operation) {
+        for (int32_t invalidOperationType : invalidOperationTypes) {
+            const std::string message = "mutateOperationTypeTest: operation " +
+                                        std::to_string(operation) + " set to value " +
+                                        std::to_string(invalidOperationType);
+            validate(device, message, model,
+                     [operation, invalidOperationType](Model* model, ExecutionPreference*,
+                                                       Priority*) {
+                         model->main.operations[operation].type =
+                                 static_cast<OperationType>(invalidOperationType);
+                     });
+        }
+    }
+}
+
+///////////////////////// VALIDATE MODEL OPERATION INPUT OPERAND INDEX /////////////////////////
+
+static void mutateOperationInputOperandIndexTest(const std::shared_ptr<IDevice>& device,
+                                                 const Model& model) {
+    for (size_t operation = 0; operation < model.main.operations.size(); ++operation) {
+        const uint32_t invalidOperand = model.main.operands.size();
+        for (size_t input = 0; input < model.main.operations[operation].inputs.size(); ++input) {
+            const std::string message = "mutateOperationInputOperandIndexTest: operation " +
+                                        std::to_string(operation) + " input " +
+                                        std::to_string(input);
+            validate(device, message, model,
+                     [operation, input, invalidOperand](Model* model, ExecutionPreference*,
+                                                        Priority*) {
+                         model->main.operations[operation].inputs[input] = invalidOperand;
+                     });
+        }
+    }
+}
+
+///////////////////////// VALIDATE MODEL OPERATION OUTPUT OPERAND INDEX /////////////////////////
+
+static void mutateOperationOutputOperandIndexTest(const std::shared_ptr<IDevice>& device,
+                                                  const Model& model) {
+    for (size_t operation = 0; operation < model.main.operations.size(); ++operation) {
+        const uint32_t invalidOperand = model.main.operands.size();
+        for (size_t output = 0; output < model.main.operations[operation].outputs.size();
+             ++output) {
+            const std::string message = "mutateOperationOutputOperandIndexTest: operation " +
+                                        std::to_string(operation) + " output " +
+                                        std::to_string(output);
+            validate(device, message, model,
+                     [operation, output, invalidOperand](Model* model, ExecutionPreference*,
+                                                         Priority*) {
+                         model->main.operations[operation].outputs[output] = invalidOperand;
+                     });
+        }
+    }
+}
+
+///////////////////////// VALIDATE MODEL OPERANDS WRITTEN ///////////////////////////////////////
+
+static void mutateOperationRemoveWriteTest(const std::shared_ptr<IDevice>& device,
+                                           const Model& model,
+                                           const std::vector<uint32_t>& numberOfConsumers) {
+    for (size_t operation = 0; operation < model.main.operations.size(); ++operation) {
+        for (size_t outputNum = 0; outputNum < model.main.operations[operation].outputs.size();
+             ++outputNum) {
+            const uint32_t outputOperandIndex = model.main.operations[operation].outputs[outputNum];
+            if (numberOfConsumers[outputOperandIndex] > 0) {
+                const std::string message = "mutateOperationRemoveWriteTest: operation " +
+                                            std::to_string(operation) + " writes to " +
+                                            std::to_string(outputOperandIndex);
+                validate(device, message, model,
+                         [operation, outputNum](Model* model, ExecutionPreference*, Priority*) {
+                             int32_t& outputOperandIndex =
+                                     model->main.operations[operation].outputs[outputNum];
+                             Operand operandValue = model->main.operands[outputOperandIndex];
+                             if (operandValue.lifetime == OperandLifeTime::SUBGRAPH_OUTPUT) {
+                                 operandValue.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
+                             } else {
+                                 ASSERT_EQ(operandValue.lifetime,
+                                           OperandLifeTime::TEMPORARY_VARIABLE);
+                             }
+                             outputOperandIndex = model->main.operands.size();
+                             model->main.operands.push_back(operandValue);
+                         });
+            }
+        }
+    }
+}
+
+///////////////////////// REMOVE OPERAND FROM EVERYTHING /////////////////////////
+
+static void removeValueAndDecrementGreaterValues(std::vector<int32_t>* vec, uint32_t value) {
+    if (vec) {
+        // remove elements matching "value"
+        vec->erase(std::remove(vec->begin(), vec->end(), value), vec->end());
+
+        // decrement elements exceeding "value"
+        std::transform(vec->begin(), vec->end(), vec->begin(),
+                       [value](uint32_t v) { return v > value ? v-- : v; });
+    }
+}
+
+static void removeOperand(Model* model, uint32_t index) {
+    model->main.operands.erase(model->main.operands.begin() + index);
+    for (Operation& operation : model->main.operations) {
+        removeValueAndDecrementGreaterValues(&operation.inputs, index);
+        removeValueAndDecrementGreaterValues(&operation.outputs, index);
+    }
+    removeValueAndDecrementGreaterValues(&model->main.inputIndexes, index);
+    removeValueAndDecrementGreaterValues(&model->main.outputIndexes, index);
+}
+
+static bool removeOperandSkip(size_t operandIndex, const Model& model,
+                              const std::vector<uint32_t>& numberOfConsumers) {
+    if (numberOfConsumers[operandIndex] == 0) {
+        // Removing an unused operand has no effect.
+        return true;
+    }
+    for (const Operation& operation : model.main.operations) {
+        // Skip removeOperandTest for the following operations.
+        // - SPLIT's outputs are not checked during prepareModel.
+        if (operation.type == OperationType::SPLIT) {
+            for (const size_t index : operation.outputs) {
+                if (index == operandIndex) {
+                    return true;
+                }
+            }
+        }
+        // BIDIRECTIONAL_SEQUENCE_LSTM and BIDIRECTIONAL_SEQUENCE_RNN can have
+        // either one, two, three or four outputs depending on their
+        // mergeOutputs parameter and if state outputs are provided.
+        // UNIDIRECTIONAL_SEQUENCE_LSTM and UNIDIRECTIONAL_SEQUENCE_RNN can have
+        // either one or three outputs depending on whether state outputs are
+        // provided.
+        if (operation.type == OperationType::UNIDIRECTIONAL_SEQUENCE_LSTM ||
+            operation.type == OperationType::UNIDIRECTIONAL_SEQUENCE_RNN ||
+            operation.type == OperationType::BIDIRECTIONAL_SEQUENCE_LSTM ||
+            operation.type == OperationType::BIDIRECTIONAL_SEQUENCE_RNN) {
+            for (const size_t index : operation.outputs) {
+                if (index == operandIndex) {
+                    return true;
+                }
+            }
+        }
+    }
+    return false;
+}
+
+static void removeOperandTest(const std::shared_ptr<IDevice>& device, const Model& model,
+                              const std::vector<uint32_t>& numberOfConsumers) {
+    for (size_t operand = 0; operand < model.main.operands.size(); ++operand) {
+        if (removeOperandSkip(operand, model, numberOfConsumers)) {
+            continue;
+        }
+        const std::string message = "removeOperandTest: operand " + std::to_string(operand);
+        validate(device, message, model, [operand](Model* model, ExecutionPreference*, Priority*) {
+            removeOperand(model, operand);
+        });
+    }
+}
+
+///////////////////////// REMOVE OPERATION /////////////////////////
+
+static void removeOperation(Model* model, uint32_t index) {
+    auto& operations = model->main.operations;
+    operations.erase(operations.begin() + index);
+}
+
+static void removeOperationTest(const std::shared_ptr<IDevice>& device, const Model& model) {
+    for (size_t operation = 0; operation < model.main.operations.size(); ++operation) {
+        const std::string message = "removeOperationTest: operation " + std::to_string(operation);
+        validate(device, message, model,
+                 [operation](Model* model, ExecutionPreference*, Priority*) {
+                     removeOperation(model, operation);
+                 });
+    }
+}
+
+///////////////////////// REMOVE OPERATION INPUT /////////////////////////
+
+static bool removeOperationInputSkip(const Operation& op, size_t input) {
+    // Skip removeOperationInputTest for the following operations.
+    // - CONCATENATION has at least 2 inputs, with the last element being INT32.
+    // - CONV_2D, DEPTHWISE_CONV_2D, MAX_POOL_2D, AVERAGE_POOL_2D, L2_POOL_2D, RESIZE_BILINEAR,
+    //   SPACE_TO_DEPTH, SPACE_TO_DEPTH, SPACE_TO_BATCH_ND, BATCH_TO_SPACE_ND can have an optional
+    //   layout parameter.
+    //   RESIZE_BILINEAR and RESIZE_NEAREST_NEIGHBOR can have optional
+    //   align_corners and half_pixel_centers parameters.
+    // - L2_NORMALIZATION, LOCAL_RESPONSE_NORMALIZATION, SOFTMAX can have an optional axis
+    //   parameter.
+    switch (op.type) {
+        case OperationType::CONCATENATION: {
+            if (op.inputs.size() > 2 && input != op.inputs.size() - 1) {
+                return true;
+            }
+        } break;
+        case OperationType::DEPTHWISE_CONV_2D: {
+            if ((op.inputs.size() == 12 && input == 11) || (op.inputs.size() == 9 && input == 8)) {
+                return true;
+            }
+        } break;
+        case OperationType::CONV_2D:
+        case OperationType::AVERAGE_POOL_2D:
+        case OperationType::MAX_POOL_2D:
+        case OperationType::L2_POOL_2D: {
+            if ((op.inputs.size() == 11 && input == 10) || (op.inputs.size() == 8 && input == 7)) {
+                return true;
+            }
+        } break;
+        case OperationType::RESIZE_BILINEAR: {
+            if (op.inputs.size() >= 4 && input >= 3) {
+                return true;
+            }
+        } break;
+        case OperationType::RESIZE_NEAREST_NEIGHBOR: {
+            if (op.inputs.size() >= 5 && input >= 3) {
+                return true;
+            }
+        } break;
+        case OperationType::SPACE_TO_DEPTH:
+        case OperationType::DEPTH_TO_SPACE:
+        case OperationType::BATCH_TO_SPACE_ND: {
+            if (op.inputs.size() == 3 && input == 2) {
+                return true;
+            }
+        } break;
+        case OperationType::SPACE_TO_BATCH_ND: {
+            if (op.inputs.size() == 4 && input == 3) {
+                return true;
+            }
+        } break;
+        case OperationType::L2_NORMALIZATION: {
+            if (op.inputs.size() == 2 && input == 1) {
+                return true;
+            }
+        } break;
+        case OperationType::LOCAL_RESPONSE_NORMALIZATION: {
+            if (op.inputs.size() == 6 && input == 5) {
+                return true;
+            }
+        } break;
+        case OperationType::SOFTMAX: {
+            if (op.inputs.size() == 3 && input == 2) {
+                return true;
+            }
+        } break;
+        default:
+            break;
+    }
+    return false;
+}
+
+static void removeOperationInputTest(const std::shared_ptr<IDevice>& device, const Model& model) {
+    for (size_t operation = 0; operation < model.main.operations.size(); ++operation) {
+        for (size_t input = 0; input < model.main.operations[operation].inputs.size(); ++input) {
+            const Operation& op = model.main.operations[operation];
+            if (removeOperationInputSkip(op, input)) {
+                continue;
+            }
+            const std::string message = "removeOperationInputTest: operation " +
+                                        std::to_string(operation) + ", input " +
+                                        std::to_string(input);
+            validate(device, message, model,
+                     [operation, input](Model* model, ExecutionPreference*, Priority*) {
+                         auto& inputs = model->main.operations[operation].inputs;
+                         inputs.erase(inputs.begin() + input);
+                     });
+        }
+    }
+}
+
+///////////////////////// REMOVE OPERATION OUTPUT /////////////////////////
+
+static void removeOperationOutputTest(const std::shared_ptr<IDevice>& device, const Model& model) {
+    for (size_t operation = 0; operation < model.main.operations.size(); ++operation) {
+        for (size_t output = 0; output < model.main.operations[operation].outputs.size();
+             ++output) {
+            const std::string message = "removeOperationOutputTest: operation " +
+                                        std::to_string(operation) + ", output " +
+                                        std::to_string(output);
+            validate(device, message, model,
+                     [operation, output](Model* model, ExecutionPreference*, Priority*) {
+                         auto& outputs = model->main.operations[operation].outputs;
+                         outputs.erase(outputs.begin() + output);
+                     });
+        }
+    }
+}
+
+///////////////////////// MODEL VALIDATION /////////////////////////
+
+// TODO: remove model input
+// TODO: remove model output
+// TODO: add unused operation
+
+///////////////////////// ADD OPERATION INPUT /////////////////////////
+
+static bool addOperationInputSkip(const Operation& op) {
+    // Skip addOperationInputTest for the following operations.
+    // - L2_NORMALIZATION, LOCAL_RESPONSE_NORMALIZATION, SOFTMAX can have an optional INT32 axis
+    //   parameter.
+    if ((op.type == OperationType::L2_NORMALIZATION && op.inputs.size() == 1) ||
+        (op.type == OperationType::LOCAL_RESPONSE_NORMALIZATION && op.inputs.size() == 5) ||
+        (op.type == OperationType::SOFTMAX && op.inputs.size() == 2) ||
+        (op.type == OperationType::RESIZE_BILINEAR && op.inputs.size() < 6) ||
+        (op.type == OperationType::RESIZE_NEAREST_NEIGHBOR && op.inputs.size() < 6)) {
+        return true;
+    }
+    return false;
+}
+
+static void addOperationInputTest(const std::shared_ptr<IDevice>& device, const Model& model) {
+    for (size_t operation = 0; operation < model.main.operations.size(); ++operation) {
+        if (addOperationInputSkip(model.main.operations[operation])) {
+            continue;
+        }
+        const std::string message = "addOperationInputTest: operation " + std::to_string(operation);
+        validate(device, message, model,
+                 [operation](Model* model, ExecutionPreference*, Priority*) {
+                     uint32_t index = addOperand(model, OperandLifeTime::SUBGRAPH_INPUT);
+                     model->main.operations[operation].inputs.push_back(index);
+                     model->main.inputIndexes.push_back(index);
+                 });
+    }
+}
+
+///////////////////////// ADD OPERATION OUTPUT /////////////////////////
+
+static void addOperationOutputTest(const std::shared_ptr<IDevice>& device, const Model& model) {
+    for (size_t operation = 0; operation < model.main.operations.size(); ++operation) {
+        const std::string message =
+                "addOperationOutputTest: operation " + std::to_string(operation);
+        validate(device, message, model,
+                 [operation](Model* model, ExecutionPreference*, Priority*) {
+                     uint32_t index = addOperand(model, OperandLifeTime::SUBGRAPH_OUTPUT);
+                     model->main.operations[operation].outputs.push_back(index);
+                     model->main.outputIndexes.push_back(index);
+                 });
+    }
+}
+
+///////////////////////// VALIDATE EXECUTION PREFERENCE /////////////////////////
+
+static const int32_t invalidExecutionPreferences[] = {
+        static_cast<int32_t>(ExecutionPreference::LOW_POWER) - 1,        // lower bound
+        static_cast<int32_t>(ExecutionPreference::SUSTAINED_SPEED) + 1,  // upper bound
+};
+
+static void mutateExecutionPreferenceTest(const std::shared_ptr<IDevice>& device,
+                                          const Model& model) {
+    for (int32_t invalidPreference : invalidExecutionPreferences) {
+        const std::string message =
+                "mutateExecutionPreferenceTest: preference " + std::to_string(invalidPreference);
+        validate(device, message, model,
+                 [invalidPreference](Model*, ExecutionPreference* preference, Priority*) {
+                     *preference = static_cast<ExecutionPreference>(invalidPreference);
+                 });
+    }
+}
+
+///////////////////////// VALIDATE PRIORITY /////////////////////////
+
+static const int32_t invalidPriorities[] = {
+        static_cast<int32_t>(Priority::LOW) - 1,   // lower bound
+        static_cast<int32_t>(Priority::HIGH) + 1,  // upper bound
+};
+
+static void mutateExecutionPriorityTest(const std::shared_ptr<IDevice>& device,
+                                        const Model& model) {
+    for (int32_t invalidPriority : invalidPriorities) {
+        const std::string message =
+                "mutatePriorityTest: priority " + std::to_string(invalidPriority);
+        validate(device, message, model,
+                 [invalidPriority](Model*, ExecutionPreference*, Priority* priority) {
+                     *priority = static_cast<Priority>(invalidPriority);
+                 });
+    }
+}
+
+////////////////////////// ENTRY POINT //////////////////////////////
+
+void validateModel(const std::shared_ptr<IDevice>& device, const Model& model) {
+    const auto numberOfConsumers = nn::countNumberOfConsumers(
+            model.main.operands.size(), nn::convert(model.main.operations).value());
+    mutateExecutionOrderTest(device, model, numberOfConsumers);
+    mutateOperandTypeTest(device, model);
+    mutateOperandRankTest(device, model);
+    mutateOperandScaleTest(device, model);
+    mutateOperandZeroPointTest(device, model);
+    mutateOperandLifeTimeTest(device, model);
+    mutateOperandInputOutputTest(device, model);
+    mutateOperandAddWriterTest(device, model);
+    mutateOperationOperandTypeTest(device, model);
+    mutateOperationTypeTest(device, model);
+    mutateOperationInputOperandIndexTest(device, model);
+    mutateOperationOutputOperandIndexTest(device, model);
+    mutateOperationRemoveWriteTest(device, model, numberOfConsumers);
+    removeOperandTest(device, model, numberOfConsumers);
+    removeOperationTest(device, model);
+    removeOperationInputTest(device, model);
+    removeOperationOutputTest(device, model);
+    addOperationInputTest(device, model);
+    addOperationOutputTest(device, model);
+    mutateExecutionPreferenceTest(device, model);
+    mutateExecutionPriorityTest(device, model);
+}
+
+}  // namespace aidl::android::hardware::neuralnetworks::vts::functional
diff --git a/neuralnetworks/aidl/vts/functional/ValidateRequest.cpp b/neuralnetworks/aidl/vts/functional/ValidateRequest.cpp
new file mode 100644
index 0000000000..db8f429f13
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/ValidateRequest.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "neuralnetworks_aidl_hal_test"
+
+#include <android/binder_auto_utils.h>
+
+#include <chrono>
+
+#include <TestHarness.h>
+#include <nnapi/hal/aidl/Utils.h>
+
+#include "Callbacks.h"
+#include "GeneratedTestHarness.h"
+#include "Utils.h"
+#include "VtsHalNeuralnetworks.h"
+
+namespace aidl::android::hardware::neuralnetworks::vts::functional {
+
+using ExecutionMutation = std::function<void(Request*)>;
+
+///////////////////////// UTILITY FUNCTIONS /////////////////////////
+
+// Primary validation function. This function will take a valid request, apply a
+// mutation to it to invalidate the request, then pass it to interface calls
+// that use the request.
+static void validate(const std::shared_ptr<IPreparedModel>& preparedModel,
+                     const std::string& message, const Request& originalRequest,
+                     const ExecutionMutation& mutate) {
+    Request request = utils::clone(originalRequest).value();
+    mutate(&request);
+
+    // We'd like to test both with timing requested and without timing
+    // requested. Rather than running each test both ways, we'll decide whether
+    // to request timing by hashing the message. We do not use std::hash because
+    // it is not guaranteed stable across executions.
+    char hash = 0;
+    for (auto c : message) {
+        hash ^= c;
+    };
+    bool measure = (hash & 1);
+
+    // synchronous
+    {
+        SCOPED_TRACE(message + " [executeSynchronously]");
+        ExecutionResult executionResult;
+        const auto executeStatus = preparedModel->executeSynchronously(
+                request, measure, kNoDeadline, kOmittedTimeoutDuration, &executionResult);
+        ASSERT_FALSE(executeStatus.isOk());
+        ASSERT_EQ(executeStatus.getExceptionCode(), EX_SERVICE_SPECIFIC);
+        ASSERT_EQ(static_cast<ErrorStatus>(executeStatus.getServiceSpecificError()),
+                  ErrorStatus::INVALID_ARGUMENT);
+    }
+
+    // fenced
+    {
+        SCOPED_TRACE(message + " [executeFenced]");
+        ndk::ScopedFileDescriptor syncFence;
+        std::shared_ptr<IFencedExecutionCallback> callback;
+        const auto executeStatus = preparedModel->executeFenced(request, {}, false, kNoDeadline,
+                                                                kOmittedTimeoutDuration,
+                                                                kNoDuration, &syncFence, &callback);
+        ASSERT_FALSE(executeStatus.isOk());
+        ASSERT_EQ(executeStatus.getExceptionCode(), EX_SERVICE_SPECIFIC);
+        ASSERT_EQ(static_cast<ErrorStatus>(executeStatus.getServiceSpecificError()),
+                  ErrorStatus::INVALID_ARGUMENT);
+    }
+}
+
+///////////////////////// REMOVE INPUT ////////////////////////////////////
+
+static void removeInputTest(const std::shared_ptr<IPreparedModel>& preparedModel,
+                            const Request& request) {
+    for (size_t input = 0; input < request.inputs.size(); ++input) {
+        const std::string message = "removeInput: removed input " + std::to_string(input);
+        validate(preparedModel, message, request, [input](Request* request) {
+            request->inputs.erase(request->inputs.begin() + input);
+        });
+    }
+}
+
+///////////////////////// REMOVE OUTPUT ////////////////////////////////////
+
+static void removeOutputTest(const std::shared_ptr<IPreparedModel>& preparedModel,
+                             const Request& request) {
+    for (size_t output = 0; output < request.outputs.size(); ++output) {
+        const std::string message = "removeOutput: removed Output " + std::to_string(output);
+        validate(preparedModel, message, request, [output](Request* request) {
+            request->outputs.erase(request->outputs.begin() + output);
+        });
+    }
+}
+
+///////////////////////////// ENTRY POINT //////////////////////////////////
+
+void validateRequest(const std::shared_ptr<IPreparedModel>& preparedModel, const Request& request) {
+    removeInputTest(preparedModel, request);
+    removeOutputTest(preparedModel, request);
+}
+
+void validateRequestFailure(const std::shared_ptr<IPreparedModel>& preparedModel,
+                            const Request& request) {
+    SCOPED_TRACE("Expecting request to fail [executeSynchronously]");
+    ExecutionResult executionResult;
+    const auto executeStatus = preparedModel->executeSynchronously(
+            request, false, kNoDeadline, kOmittedTimeoutDuration, &executionResult);
+
+    ASSERT_FALSE(executeStatus.isOk());
+    ASSERT_EQ(executeStatus.getExceptionCode(), EX_SERVICE_SPECIFIC);
+    ASSERT_NE(static_cast<ErrorStatus>(executeStatus.getServiceSpecificError()), ErrorStatus::NONE);
+}
+
+}  // namespace aidl::android::hardware::neuralnetworks::vts::functional
diff --git a/neuralnetworks/aidl/vts/functional/VtsHalNeuralnetworks.cpp b/neuralnetworks/aidl/vts/functional/VtsHalNeuralnetworks.cpp
new file mode 100644
index 0000000000..2d91b8edd9
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/VtsHalNeuralnetworks.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "neuralnetworks_aidl_hal_test"
+#include "VtsHalNeuralnetworks.h"
+
+#include <android-base/logging.h>
+#include <android/binder_auto_utils.h>
+#include <android/binder_interface_utils.h>
+#include <android/binder_manager.h>
+#include <android/binder_status.h>
+#include <gtest/gtest.h>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include <TestHarness.h>
+#include <aidl/Vintf.h>
+#include <nnapi/hal/aidl/Conversions.h>
+
+#include "Callbacks.h"
+#include "GeneratedTestHarness.h"
+#include "Utils.h"
+
+namespace aidl::android::hardware::neuralnetworks::vts::functional {
+
+using implementation::PreparedModelCallback;
+
+// internal helper function
+void createPreparedModel(const std::shared_ptr<IDevice>& device, const Model& model,
+                         std::shared_ptr<IPreparedModel>* preparedModel, bool reportSkipping) {
+    ASSERT_NE(nullptr, preparedModel);
+    *preparedModel = nullptr;
+
+    // see if service can handle model
+    std::vector<bool> supportedOperations;
+    const auto supportedCallStatus = device->getSupportedOperations(model, &supportedOperations);
+    ASSERT_TRUE(supportedCallStatus.isOk());
+    ASSERT_NE(0ul, supportedOperations.size());
+    const bool fullySupportsModel = std::all_of(
+            supportedOperations.begin(), supportedOperations.end(), [](bool v) { return v; });
+
+    // launch prepare model
+    const std::shared_ptr<PreparedModelCallback> preparedModelCallback =
+            ndk::SharedRefBase::make<PreparedModelCallback>();
+    const auto prepareLaunchStatus =
+            device->prepareModel(model, ExecutionPreference::FAST_SINGLE_ANSWER, kDefaultPriority,
+                                 kNoDeadline, {}, {}, kEmptyCacheToken, preparedModelCallback);
+    ASSERT_TRUE(prepareLaunchStatus.isOk()) << prepareLaunchStatus.getDescription();
+
+    // retrieve prepared model
+    preparedModelCallback->wait();
+    const ErrorStatus prepareReturnStatus = preparedModelCallback->getStatus();
+    *preparedModel = preparedModelCallback->getPreparedModel();
+
+    // The getSupportedOperations call returns a list of operations that are guaranteed not to fail
+    // if prepareModel is called, and 'fullySupportsModel' is true i.f.f. the entire model is
+    // guaranteed. If a driver has any doubt that it can prepare an operation, it must return false.
+    // So here, if a driver isn't sure if it can support an operation, but reports that it
+    // successfully prepared the model, the test can continue.
+    if (!fullySupportsModel && prepareReturnStatus != ErrorStatus::NONE) {
+        ASSERT_EQ(nullptr, preparedModel->get());
+        if (!reportSkipping) {
+            return;
+        }
+        LOG(INFO) << "NN VTS: Early termination of test because vendor service cannot prepare "
+                     "model that it does not support.";
+        std::cout << "[          ]   Early termination of test because vendor service cannot "
+                     "prepare model that it does not support."
+                  << std::endl;
+        GTEST_SKIP();
+    }
+
+    ASSERT_EQ(ErrorStatus::NONE, prepareReturnStatus);
+    ASSERT_NE(nullptr, preparedModel->get());
+}
+
+void NeuralNetworksAidlTest::SetUp() {
+    testing::TestWithParam<NeuralNetworksAidlTestParam>::SetUp();
+    ASSERT_NE(kDevice, nullptr);
+}
+
+static NamedDevice makeNamedDevice(const std::string& name) {
+    ndk::SpAIBinder binder(AServiceManager_getService(name.c_str()));
+    return {name, IDevice::fromBinder(binder)};
+}
+
+static std::vector<NamedDevice> getNamedDevicesImpl() {
+    // Retrieves the name of all service instances that implement IDevice,
+    // including any Lazy HAL instances.
+    const std::vector<std::string> names = ::android::getAidlHalInstanceNames(IDevice::descriptor);
+
+    // Get a handle to each device and pair it with its name.
+    std::vector<NamedDevice> namedDevices;
+    namedDevices.reserve(names.size());
+    std::transform(names.begin(), names.end(), std::back_inserter(namedDevices), makeNamedDevice);
+    return namedDevices;
+}
+
+const std::vector<NamedDevice>& getNamedDevices() {
+    const static std::vector<NamedDevice> devices = getNamedDevicesImpl();
+    return devices;
+}
+
+std::string printNeuralNetworksAidlTest(
+        const testing::TestParamInfo<NeuralNetworksAidlTestParam>& info) {
+    return gtestCompliantName(getName(info.param));
+}
+
+INSTANTIATE_DEVICE_TEST(NeuralNetworksAidlTest);
+
+// Forward declaration from ValidateModel.cpp
+void validateModel(const std::shared_ptr<IDevice>& device, const Model& model);
+// Forward declaration from ValidateRequest.cpp
+void validateRequest(const std::shared_ptr<IPreparedModel>& preparedModel, const Request& request);
+// Forward declaration from ValidateRequest.cpp
+void validateRequestFailure(const std::shared_ptr<IPreparedModel>& preparedModel,
+                            const Request& request);
+
+void validateEverything(const std::shared_ptr<IDevice>& device, const Model& model,
+                        const Request& request) {
+    validateModel(device, model);
+
+    // Create IPreparedModel.
+    std::shared_ptr<IPreparedModel> preparedModel;
+    createPreparedModel(device, model, &preparedModel);
+    if (preparedModel == nullptr) return;
+
+    validateRequest(preparedModel, request);
+    // HIDL also had test that expected executeFenced to fail on received null fd (-1). This is not
+    // allowed in AIDL and will result in EX_TRANSACTION_FAILED.
+}
+
+void validateFailure(const std::shared_ptr<IDevice>& device, const Model& model,
+                     const Request& request) {
+    // TODO: Should this always succeed?
+    //       What if the invalid input is part of the model (i.e., a parameter).
+    validateModel(device, model);
+
+    // Create IPreparedModel.
+    std::shared_ptr<IPreparedModel> preparedModel;
+    createPreparedModel(device, model, &preparedModel);
+    if (preparedModel == nullptr) return;
+
+    validateRequestFailure(preparedModel, request);
+}
+
+TEST_P(ValidationTest, Test) {
+    const Model model = createModel(kTestModel);
+    ExecutionContext context;
+    const Request request = context.createRequest(kTestModel);
+    if (kTestModel.expectFailure) {
+        validateFailure(kDevice, model, request);
+    } else {
+        validateEverything(kDevice, model, request);
+    }
+}
+
+INSTANTIATE_GENERATED_TEST(ValidationTest, [](const std::string& testName) {
+    // Skip validation for the "inputs_as_internal" and "all_tensors_as_inputs"
+    // generated tests.
+    return testName.find("inputs_as_internal") == std::string::npos &&
+           testName.find("all_tensors_as_inputs") == std::string::npos;
+});
+
+std::string toString(Executor executor) {
+    switch (executor) {
+        case Executor::ASYNC:
+            return "ASYNC";
+        case Executor::SYNC:
+            return "SYNC";
+        case Executor::BURST:
+            return "BURST";
+        case Executor::FENCED:
+            return "FENCED";
+        default:
+            CHECK(false);
+    }
+}
+
+}  // namespace aidl::android::hardware::neuralnetworks::vts::functional
diff --git a/neuralnetworks/aidl/vts/functional/VtsHalNeuralnetworks.h b/neuralnetworks/aidl/vts/functional/VtsHalNeuralnetworks.h
new file mode 100644
index 0000000000..9b81ee116e
--- /dev/null
+++ b/neuralnetworks/aidl/vts/functional/VtsHalNeuralnetworks.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_HARDWARE_NEURALNETWORKS_AIDL_VTS_HAL_NEURALNETWORKS_H
+#define ANDROID_HARDWARE_NEURALNETWORKS_AIDL_VTS_HAL_NEURALNETWORKS_H
+
+#include <gtest/gtest.h>
+#include <vector>
+
+#include <aidl/android/hardware/neuralnetworks/IDevice.h>
+
+#include "Callbacks.h"
+#include "Utils.h"
+
+namespace aidl::android::hardware::neuralnetworks::vts::functional {
+
+using NamedDevice = Named<std::shared_ptr<IDevice>>;
+using NeuralNetworksAidlTestParam = NamedDevice;
+
+class NeuralNetworksAidlTest : public testing::TestWithParam<NeuralNetworksAidlTestParam> {
+  protected:
+    void SetUp() override;
+    const std::shared_ptr<IDevice> kDevice = getData(GetParam());
+};
+
+const std::vector<NamedDevice>& getNamedDevices();
+
+std::string printNeuralNetworksAidlTest(
+        const testing::TestParamInfo<NeuralNetworksAidlTestParam>& info);
+
+#define INSTANTIATE_DEVICE_TEST(TestSuite)                                                 \
+    GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(TestSuite);                              \
+    INSTANTIATE_TEST_SUITE_P(PerInstance, TestSuite, testing::ValuesIn(getNamedDevices()), \
+                             printNeuralNetworksAidlTest)
+
+// Create an IPreparedModel object. If the model cannot be prepared,
+// "preparedModel" will be nullptr instead.
+void createPreparedModel(const std::shared_ptr<IDevice>& device, const Model& model,
+                         std::shared_ptr<IPreparedModel>* preparedModel,
+                         bool reportSkipping = true);
+
+enum class Executor { ASYNC, SYNC, BURST, FENCED };
+
+std::string toString(Executor executor);
+
+}  // namespace aidl::android::hardware::neuralnetworks::vts::functional
+
+#endif  // ANDROID_HARDWARE_NEURALNETWORKS_AIDL_VTS_HAL_NEURALNETWORKS_H
diff --git a/neuralnetworks/utils/common/include/nnapi/hal/CommonUtils.h b/neuralnetworks/utils/common/include/nnapi/hal/CommonUtils.h
index b3989e5878..fef9d9cfb5 100644
--- a/neuralnetworks/utils/common/include/nnapi/hal/CommonUtils.h
+++ b/neuralnetworks/utils/common/include/nnapi/hal/CommonUtils.h
@@ -24,15 +24,21 @@
 #include <functional>
 #include <vector>
 
-// Shorthand
+// Shorthands
 namespace android::hardware::neuralnetworks {
 namespace hal = ::android::hardware::neuralnetworks;
 }  // namespace android::hardware::neuralnetworks
 
-// Shorthand
+// Shorthands
+namespace aidl::android::hardware::neuralnetworks {
+namespace aidl_hal = ::aidl::android::hardware::neuralnetworks;
+}  // namespace aidl::android::hardware::neuralnetworks
+
+// Shorthands
 namespace android::nn {
 namespace hal = ::android::hardware::neuralnetworks;
-}
+namespace aidl_hal = ::aidl::android::hardware::neuralnetworks;
+}  // namespace android::nn
 
 namespace android::hardware::neuralnetworks::utils {