From c8b0cd33b92db195e125171d7a1360af59a3ab8c Mon Sep 17 00:00:00 2001
From: Viet Dang <vddang@google.com>
Date: Tue, 26 Nov 2019 12:23:32 +0000
Subject: [PATCH] Add quantized LSTM op.

Bug: 144841609

Test: NeuralNetworksTest_static

Change-Id: I8aab51695382211e003d8fc442d33bae3006dc13
---
 current.txt                                   |   2 +-
 neuralnetworks/1.3/types.hal                  | 131 +++++++++++++++++-
 .../1.3/vts/functional/ValidateModel.cpp      |   1 -
 3 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/current.txt b/current.txt
index bab1ff551f..5dc27cd3d7 100644
--- a/current.txt
+++ b/current.txt
@@ -648,7 +648,7 @@ a3eddd9bbdc87e8c22764070037dd1154f1cf006e6fba93364c4f85d4c134a19 android.hardwar
 9e59fffceed0dd72a9799e04505db5f777bbbea1af0695ba4107ef6d967c6fda android.hardware.neuralnetworks@1.3::IDevice
 258825966435b3ed08832055bb736d81516013e405f161d9ccde9a90cfcdde83 android.hardware.neuralnetworks@1.3::IPreparedModel
 94e803236398bed1febb11cc21051bc42ec003700139b099d6c479e02a7ca3c3 android.hardware.neuralnetworks@1.3::IPreparedModelCallback
-35668befe89fc7f84d58fc1dab7dd3e4d6067c7eeccbae154fe36cd964dfaef7 android.hardware.neuralnetworks@1.3::types
+618a628f8c94d6f6e4cb401b69fa50ccb8b82191ea434e3a071252289b4f312c android.hardware.neuralnetworks@1.3::types
 3e01d4446cd69fd1c48f8572efd97487bc179564b32bd795800b97bbe10be37b android.hardware.wifi@1.4::IWifi
 7d136c169b62abdee0bb6abafb97638acd792ce2102dfccddaa5df98d4bd3df9 android.hardware.wifi.hostapd@1.2::IHostapd
 11f6448d15336361180391c8ebcdfd2d7cf77b3782d577e594d583aadc9c2877 android.hardware.wifi.hostapd@1.2::types
diff --git a/neuralnetworks/1.3/types.hal b/neuralnetworks/1.3/types.hal
index b70bd1bff4..74c259ca22 100644
--- a/neuralnetworks/1.3/types.hal
+++ b/neuralnetworks/1.3/types.hal
@@ -4746,6 +4746,135 @@ enum OperationType : int32_t {
      */
     RESIZE_NEAREST_NEIGHBOR = @1.2::OperationType:RESIZE_NEAREST_NEIGHBOR,
 
+    /**
+     * Quantized version of {@link OperationType:LSTM}.
+     *
+     * The input and the output use asymmetric quantized types, while the rest
+     * use symmetric ones.
+     *
+     * Inputs:
+     * * 0: The input to the LSTM cell.
+     *      Type: {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}
+     *      Shape: [batchSize, inputSize]
+     * * 1: The input-to-input weights. Optional.
+     *      Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, inputSize]
+     * * 2: The input-to-forget weights.
+     *      Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, inputSize]
+     * * 3: The input-to-cell weights.
+     *      Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, inputSize]
+     * * 4: The input-to-output weights.
+     *      Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, inputSize]
+     * * 5: The recurrent-to-input weights. Optional.
+     *      Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, outputSize]
+     * * 6: The recurrent-to-forget weights.
+     *      Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, outputSize]
+     * * 7: The recurrent-to-cell weights.
+     *      Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, outputSize]
+     * * 8: The recurrent-to-output weights.
+     *      Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *      Shape: [numUnits, outputSize]
+     * * 9: The cell-to-input weights (for peephole). Optional.
+     *      Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *      Shape: [numUnits]
+     * * 10: The cell-to-forget weights (for peephole). Optional.
+     *       Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 11: The cell-to-output weights (for peephole). Optional.
+     *       Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 12: The input gate bias. Quantized with scale being the
+     *       product of input and weights scales and zeroPoint equal to 0.
+     *       Optional.
+     *       Type: {@link OperandType::TENSOR_INT32}
+     *       Shape: [numUnits]
+     * * 13: The forget gate bias. Quantized with scale being the
+     *       product of input and weights scales and zeroPoint equal to 0.
+     *       Type: {@link OperandType::TENSOR_INT32}
+     *       Shape: [numUnits]
+     * * 14: The cell bias. Quantized with scale being the
+     *       product of input and weights scales and zeroPoint equal to 0.
+     *       Type: {@link OperandType::TENSOR_INT32}
+     *       Shape: [numUnits]
+     * * 15: The output gate bias. Quantized with scale being the
+     *       product of input and weights scales and zeroPoint equal to 0.
+     *       Type: {@link OperandType::TENSOR_INT32}
+     *       Shape: [numUnits]
+     * * 16: The projection weights. Optional.
+     *       Type: {@link OperandType::TENSOR_QUANT8_SYMM}
+     *       Shape: [outputSize, numUnits]
+     * * 17: The projection bias. Quantized with scale being the
+     *       product of input and weights scales and zeroPoint equal to 0.
+     *       Optional.
+     *       Type: {@link OperandType::TENSOR_INT32}
+     *       Shape: [outputSize]
+     * * 18: The output from the previous time step.
+     *       Type: {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}
+     *       Shape: [batchSize, outputSize]
+     * * 19: The cell state from the previous time step.
+     *       Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *       Shape: [batchSize, numUnits]
+     * * 20: The input layer normalization weights. Used to rescale
+     *       normalized inputs to activation at input gate. Optional.
+     *       Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 21: The forget layer normalization weights. Used to
+     *       rescale normalized inputs to activation at forget gate. Optional.
+     *       Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 22: The cell layer normalization weights. Used to rescale
+     *       normalized inputs to activation at cell gate. Optional.
+     *       Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 23: The output layer normalization weights. Used to
+     *       rescale normalized inputs to activation at output gate. Optional.
+     *       Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *       Shape: [numUnits]
+     * * 24: The cell clip. If provided the cell state is clipped
+     *       by this value prior to the cell output activation. Optional.
+     *       Type: {@link OperandType::FLOAT32}.
+     * * 25: The projection clip. If provided and projection is enabled,
+     *       this is used for clipping the projected values. Optional.
+     *       Type: {@link OperandType::FLOAT32}.
+     * * 26: The scale of the intermediate result of matmul,
+     *       i.e. input to layer normalization, at input gate.
+     *       Type: {@link OperandType::FLOAT32}.
+     * * 27: The scale of the intermediate result of matmul,
+     *       i.e. input to layer normalization, at forget gate.
+     *       Type: {@link OperandType::FLOAT32}.
+     * * 28: The scale of the intermediate result of matmul,
+     *       i.e. input to layer normalization, at cell gate.
+     *       Type: {@link OperandType::FLOAT32}.
+     * * 29: The scale of the intermediate result of matmul,
+     *       i.e. input to layer normalization, at output gate.
+     *       Type: {@link OperandType::FLOAT32}.
+     * * 30: The zero point of the hidden state, i.e. input to
+     *       projection.
+     *       Type: {@link OperandType::INT32}.
+     * * 31: The scale of the hidden state, i.e. input to
+     *       projection.
+     *       Type: {@link OperandType::FLOAT32}.
+     *
+     * Outputs:
+     * * 0: The output state (out).
+     *      Type: {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}
+     *      Shape: [batchSize, outputSize]
+     * * 1: The cell state (out).
+     *      Type: {@link OperandType::TENSOR_QUANT16_SYMM}
+     *      Shape: [batchSize, numUnits]
+     * * 2: The output. This is effectively the same as the current
+     *      "output state (out)" value.
+     *      Type: {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED}
+     *      Shape: [batchSize, outputSize]
+     */
+    QUANTIZED_LSTM = 95,
+
     /**
      * DEPRECATED. Since NNAPI 1.2, extensions are the preferred alternative to
      * OEM operation and data types.
@@ -4768,7 +4897,7 @@ enum OperationType : int32_t {
 enum OperationTypeRange : uint32_t {
     BASE_MIN        = 0,
     FUNDAMENTAL_MIN = 0,
-    FUNDAMENTAL_MAX = 94,
+    FUNDAMENTAL_MAX = 95,
     OEM_MIN         = 10000,
     OEM_MAX         = 10000,
     BASE_MAX        = 0xFFFF,
diff --git a/neuralnetworks/1.3/vts/functional/ValidateModel.cpp b/neuralnetworks/1.3/vts/functional/ValidateModel.cpp
index 14ab897c8c..8395111d78 100644
--- a/neuralnetworks/1.3/vts/functional/ValidateModel.cpp
+++ b/neuralnetworks/1.3/vts/functional/ValidateModel.cpp
@@ -27,7 +27,6 @@ using implementation::PreparedModelCallback;
 using V1_0::ErrorStatus;
 using V1_0::OperandLifeTime;
 using V1_1::ExecutionPreference;
-using V1_2::OperationTypeRange;
 using V1_2::SymmPerChannelQuantParams;
 using HidlToken =
         hidl_array<uint8_t, static_cast<uint32_t>(V1_2::Constant::BYTE_SIZE_OF_CACHE_TOKEN)>;