From c8b0cd33b92db195e125171d7a1360af59a3ab8c Mon Sep 17 00:00:00 2001 From: Viet Dang Date: Tue, 26 Nov 2019 12:23:32 +0000 Subject: [PATCH] Add quantized LSTM op. Bug: 144841609 Test: NeuralNetworksTest_static Change-Id: I8aab51695382211e003d8fc442d33bae3006dc13 --- current.txt | 2 +- neuralnetworks/1.3/types.hal | 131 +++++++++++++++++- .../1.3/vts/functional/ValidateModel.cpp | 1 - 3 files changed, 131 insertions(+), 3 deletions(-) diff --git a/current.txt b/current.txt index bab1ff551f..5dc27cd3d7 100644 --- a/current.txt +++ b/current.txt @@ -648,7 +648,7 @@ a3eddd9bbdc87e8c22764070037dd1154f1cf006e6fba93364c4f85d4c134a19 android.hardwar 9e59fffceed0dd72a9799e04505db5f777bbbea1af0695ba4107ef6d967c6fda android.hardware.neuralnetworks@1.3::IDevice 258825966435b3ed08832055bb736d81516013e405f161d9ccde9a90cfcdde83 android.hardware.neuralnetworks@1.3::IPreparedModel 94e803236398bed1febb11cc21051bc42ec003700139b099d6c479e02a7ca3c3 android.hardware.neuralnetworks@1.3::IPreparedModelCallback -35668befe89fc7f84d58fc1dab7dd3e4d6067c7eeccbae154fe36cd964dfaef7 android.hardware.neuralnetworks@1.3::types +618a628f8c94d6f6e4cb401b69fa50ccb8b82191ea434e3a071252289b4f312c android.hardware.neuralnetworks@1.3::types 3e01d4446cd69fd1c48f8572efd97487bc179564b32bd795800b97bbe10be37b android.hardware.wifi@1.4::IWifi 7d136c169b62abdee0bb6abafb97638acd792ce2102dfccddaa5df98d4bd3df9 android.hardware.wifi.hostapd@1.2::IHostapd 11f6448d15336361180391c8ebcdfd2d7cf77b3782d577e594d583aadc9c2877 android.hardware.wifi.hostapd@1.2::types diff --git a/neuralnetworks/1.3/types.hal b/neuralnetworks/1.3/types.hal index b70bd1bff4..74c259ca22 100644 --- a/neuralnetworks/1.3/types.hal +++ b/neuralnetworks/1.3/types.hal @@ -4746,6 +4746,135 @@ enum OperationType : int32_t { */ RESIZE_NEAREST_NEIGHBOR = @1.2::OperationType:RESIZE_NEAREST_NEIGHBOR, + /** + * Quantized version of {@link OperationType:LSTM}. + * + * The input and the output use asymmetric quantized types, while the rest + * use symmetric ones. + * + * Inputs: + * * 0: The input to the LSTM cell. + * Type: {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} + * Shape: [batchSize, inputSize] + * * 1: The input-to-input weights. Optional. + * Type: {@link OperandType::TENSOR_QUANT8_SYMM} + * Shape: [numUnits, inputSize] + * * 2: The input-to-forget weights. + * Type: {@link OperandType::TENSOR_QUANT8_SYMM} + * Shape: [numUnits, inputSize] + * * 3: The input-to-cell weights. + * Type: {@link OperandType::TENSOR_QUANT8_SYMM} + * Shape: [numUnits, inputSize] + * * 4: The input-to-output weights. + * Type: {@link OperandType::TENSOR_QUANT8_SYMM} + * Shape: [numUnits, inputSize] + * * 5: The recurrent-to-input weights. Optional. + * Type: {@link OperandType::TENSOR_QUANT8_SYMM} + * Shape: [numUnits, outputSize] + * * 6: The recurrent-to-forget weights. + * Type: {@link OperandType::TENSOR_QUANT8_SYMM} + * Shape: [numUnits, outputSize] + * * 7: The recurrent-to-cell weights. + * Type: {@link OperandType::TENSOR_QUANT8_SYMM} + * Shape: [numUnits, outputSize] + * * 8: The recurrent-to-output weights. + * Type: {@link OperandType::TENSOR_QUANT8_SYMM} + * Shape: [numUnits, outputSize] + * * 9: The cell-to-input weights (for peephole). Optional. + * Type: {@link OperandType::TENSOR_QUANT16_SYMM} + * Shape: [numUnits] + * * 10: The cell-to-forget weights (for peephole). Optional. + * Type: {@link OperandType::TENSOR_QUANT16_SYMM} + * Shape: [numUnits] + * * 11: The cell-to-output weights (for peephole). Optional. + * Type: {@link OperandType::TENSOR_QUANT16_SYMM} + * Shape: [numUnits] + * * 12: The input gate bias. Quantized with scale being the + * product of input and weights scales and zeroPoint equal to 0. + * Optional. + * Type: {@link OperandType::TENSOR_INT32} + * Shape: [numUnits] + * * 13: The forget gate bias. Quantized with scale being the + * product of input and weights scales and zeroPoint equal to 0. + * Type: {@link OperandType::TENSOR_INT32} + * Shape: [numUnits] + * * 14: The cell bias. Quantized with scale being the + * product of input and weights scales and zeroPoint equal to 0. + * Type: {@link OperandType::TENSOR_INT32} + * Shape: [numUnits] + * * 15: The output gate bias. Quantized with scale being the + * product of input and weights scales and zeroPoint equal to 0. + * Type: {@link OperandType::TENSOR_INT32} + * Shape: [numUnits] + * * 16: The projection weights. Optional. + * Type: {@link OperandType::TENSOR_QUANT8_SYMM} + * Shape: [outputSize, numUnits] + * * 17: The projection bias. Quantized with scale being the + * product of input and weights scales and zeroPoint equal to 0. + * Optional. + * Type: {@link OperandType::TENSOR_INT32} + * Shape: [outputSize] + * * 18: The output from the previous time step. + * Type: {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} + * Shape: [batchSize, outputSize] + * * 19: The cell state from the previous time step. + * Type: {@link OperandType::TENSOR_QUANT16_SYMM} + * Shape: [batchSize, numUnits] + * * 20: The input layer normalization weights. Used to rescale + * normalized inputs to activation at input gate. Optional. + * Type: {@link OperandType::TENSOR_QUANT16_SYMM} + * Shape: [numUnits] + * * 21: The forget layer normalization weights. Used to + * rescale normalized inputs to activation at forget gate. Optional. + * Type: {@link OperandType::TENSOR_QUANT16_SYMM} + * Shape: [numUnits] + * * 22: The cell layer normalization weights. Used to rescale + * normalized inputs to activation at cell gate. Optional. + * Type: {@link OperandType::TENSOR_QUANT16_SYMM} + * Shape: [numUnits] + * * 23: The output layer normalization weights. Used to + * rescale normalized inputs to activation at output gate. Optional. + * Type: {@link OperandType::TENSOR_QUANT16_SYMM} + * Shape: [numUnits] + * * 24: The cell clip. If provided the cell state is clipped + * by this value prior to the cell output activation. Optional. + * Type: {@link OperandType::FLOAT32}. + * * 25: The projection clip. If provided and projection is enabled, + * this is used for clipping the projected values. Optional. + * Type: {@link OperandType::FLOAT32}. + * * 26: The scale of the intermediate result of matmul, + * i.e. input to layer normalization, at input gate. + * Type: {@link OperandType::FLOAT32}. + * * 27: The scale of the intermediate result of matmul, + * i.e. input to layer normalization, at forget gate. + * Type: {@link OperandType::FLOAT32}. + * * 28: The scale of the intermediate result of matmul, + * i.e. input to layer normalization, at cell gate. + * Type: {@link OperandType::FLOAT32}. + * * 29: The scale of the intermediate result of matmul, + * i.e. input to layer normalization, at output gate. + * Type: {@link OperandType::FLOAT32}. + * * 30: The zero point of the hidden state, i.e. input to + * projection. + * Type: {@link OperandType::INT32}. + * * 31: The scale of the hidden state, i.e. input to + * projection. + * Type: {@link OperandType::FLOAT32}. + * + * Outputs: + * * 0: The output state (out). + * Type: {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} + * Shape: [batchSize, outputSize] + * * 1: The cell state (out). + * Type: {@link OperandType::TENSOR_QUANT16_SYMM} + * Shape: [batchSize, numUnits] + * * 2: The output. This is effectively the same as the current + * "output state (out)" value. + * Type: {@link OperandType::TENSOR_QUANT8_ASYMM_SIGNED} + * Shape: [batchSize, outputSize] + */ + QUANTIZED_LSTM = 95, + /** * DEPRECATED. Since NNAPI 1.2, extensions are the preferred alternative to * OEM operation and data types. @@ -4768,7 +4897,7 @@ enum OperationType : int32_t { enum OperationTypeRange : uint32_t { BASE_MIN = 0, FUNDAMENTAL_MIN = 0, - FUNDAMENTAL_MAX = 94, + FUNDAMENTAL_MAX = 95, OEM_MIN = 10000, OEM_MAX = 10000, BASE_MAX = 0xFFFF, diff --git a/neuralnetworks/1.3/vts/functional/ValidateModel.cpp b/neuralnetworks/1.3/vts/functional/ValidateModel.cpp index 14ab897c8c..8395111d78 100644 --- a/neuralnetworks/1.3/vts/functional/ValidateModel.cpp +++ b/neuralnetworks/1.3/vts/functional/ValidateModel.cpp @@ -27,7 +27,6 @@ using implementation::PreparedModelCallback; using V1_0::ErrorStatus; using V1_0::OperandLifeTime; using V1_1::ExecutionPreference; -using V1_2::OperationTypeRange; using V1_2::SymmPerChannelQuantParams; using HidlToken = hidl_array(V1_2::Constant::BYTE_SIZE_OF_CACHE_TOKEN)>;