diff --git a/current.txt b/current.txt index af44905b47..fe763279db 100644 --- a/current.txt +++ b/current.txt @@ -238,15 +238,16 @@ a432d6d9200248dc2126827bcd6cdea31dd65eff39b939f64585d27d915a5857 android.hardwar 619600109232ed64b827c8a11beed8070b1827ae464547d7aa146cf0473b4bca android.hardware.cas.native@1.0::IDescrambler 0a159f81359cd4f71bbe00972ee8403ea79351fb7c0cd48be72ebb3e424dbaef android.hardware.radio@1.0::types 09342041e17c429fce0034b9096d17849122111436a5f0053e7e59500e1cb89c android.hardware.media.omx@1.0::IOmxStore -246a56d37d57a47224562c9d077b4a2886ce6242b9311bd98a17325944c280d7 android.hardware.neuralnetworks@1.0::types 93eb3757ceaf21590fa4cd1d4a7dfe3b3794af5396100a6d25630879352abce9 android.hardware.neuralnetworks@1.0::IDevice f66f9a38541bf92001d3adcce678cd7e3da2262124befb460b1c9aea9492813b android.hardware.neuralnetworks@1.0::IExecutionCallback 953607822954435874f4b81686440a604e2a88cdd2d9164c6293f3d5772510d7 android.hardware.neuralnetworks@1.0::IPreparedModel 73e03573494ba96f0e711ab7f1956c5b2d54c3da690cd7ecf4d6d0f287447730 android.hardware.neuralnetworks@1.0::IPreparedModelCallback +246a56d37d57a47224562c9d077b4a2886ce6242b9311bd98a17325944c280d7 android.hardware.neuralnetworks@1.0::types f4945e397b5dea41bb64518dfde59be71245d8a125fd1e0acffeb57ac7b08fed android.hardware.thermal@1.1::IThermal c8bc853546dd55584611def2a9fa1d99f657e3366c976d2f60fe6b8aa6d2cb87 android.hardware.thermal@1.1::IThermalCallback # Future changes to HALs 5804ca86611d72e5481f022b3a0c1b334217f2e4988dad25730c42af2d1f4d1c android.hardware.neuralnetworks@1.0::IDevice -1488db5ffb8a7979488d1084761aab8bca2f59bc9a02d75cdefc296afeaf591b android.hardware.neuralnetworks@1.0::types +12e8dca4ab7d8aadd0ef8f1b438021938e2396139e85db2ed65783b08800aa52 android.hardware.neuralnetworks@1.0::IExecutionCallback +702f9a4cd3b7486a4b04f7155b737757ac2ca4b3548976d5782ad3cae9ff9780 android.hardware.neuralnetworks@1.0::types diff --git a/neuralnetworks/1.0/IExecutionCallback.hal b/neuralnetworks/1.0/IExecutionCallback.hal index ef0f4549dd..9c0616696d 100644 --- a/neuralnetworks/1.0/IExecutionCallback.hal +++ b/neuralnetworks/1.0/IExecutionCallback.hal @@ -28,7 +28,7 @@ interface IExecutionCallback { * ErrorStatus resulting from the execution. If the asynchronous task * is not launched, notify must be invoked with the appropriate error. * - * @return param Error status returned from launching the asynchronous task + * @param status Error status returned from launching the asynchronous task * (if the launch fails) or from the asynchronous task itself * (if the launch succeeds). Must be: * - NONE if the asynchronous execution was successful diff --git a/neuralnetworks/1.0/types.hal b/neuralnetworks/1.0/types.hal index 5b8f22cf8e..8c07fcc324 100644 --- a/neuralnetworks/1.0/types.hal +++ b/neuralnetworks/1.0/types.hal @@ -24,38 +24,40 @@ package android.hardware.neuralnetworks@1.0; * Types prefaced with TENSOR_* must be used for tensor data (i.e., tensors * with at least one dimension). Types not prefaced by TENSOR_* represent * scalar values and must have no dimensions. + * + * Although many types are defined, most operators accept just a few + * types. Most used are {@link OperandType::TENSOR_FLOAT32}, + * {@link OperandType::TENSOR_QUANT8_ASYMM}, + * and {@link OperandType::INT32}. */ enum OperandType : int32_t { - /** - * The following entries are used to declare scalars. - */ + /** A 32 bit floating point scalar value. */ FLOAT32 = 0, + /** A signed 32 bit integer scalar value. */ INT32 = 1, + /** An unsigned 32 bit integer scalar value. */ UINT32 = 2, - /** - * The following entries are used to declare tensors. - */ + /** A tensor of 32 bit floating point values. */ TENSOR_FLOAT32 = 3, + /** A tensor of 32 bit integer values. */ TENSOR_INT32 = 4, - - /** - * A tensor of 8 bit integers that represent real numbers. + /** A tensor of 8 bit integers that represent real numbers. * * Attached to this tensor are two numbers that can be used to convert the * 8 bit integer to the real value and vice versa. These two numbers are: - * - scale: a 32 bit floating point value greater than zero - * - zero_value: a 32 bit integer + * - scale: a 32 bit floating point value greater than zero. + * - zeroPoint: a 32 bit integer, in range [0, 255]. * * The formula is: - * real_value = (integer_value - zero_value) * scale. + * real_value = (integer_value - zeroPoint) * scale. */ TENSOR_QUANT8_ASYMM = 5, - /** - * The following entries are OEM specific operand types. - */ + /** OEM specific scalar value. */ OEM = 10000, + + /** A tensor of OEM specific values. */ TENSOR_OEM_BYTE = 10001, }; @@ -66,9 +68,9 @@ enum OperandType : int32_t { */ enum OperationType : int32_t { /** - * Adds two tensors, elment-wise. + * Adds two tensors, element-wise. * - * Takes two input tensors of identical type and compatible dimensions. The output + * Takes two input tensors of identical type and compatible dimensions. The output * is the sum of both input tensors, optionally modified by an activation function. * * Two dimensions are compatible when: @@ -79,22 +81,25 @@ enum OperationType : int32_t { * It starts with the trailing dimensions, and works its way forward. * * Example: - * input1.dimension = {4, 1, 2} + * + * input1.dimension = {4, 1, 2} * input2.dimension = {5, 4, 3, 1} * output.dimension = {5, 4, 3, 2} * - * Supported tensor types: {@link OperandType::TENSOR_FLOAT32} - * {@link OperandType::TENSOR_QUANT8_ASYMM} + * Supported tensor types: + * * {@link OperandType::TENSOR_FLOAT32} + * * {@link OperandType::TENSOR_QUANT8_ASYMM} + * * Supported tensor rank: up to 4 * * Inputs: - * 0: A tensor. - * 1: A tensor of the same type, and compatible dimensions as input0. - * 2: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. - * Specifies the activation to invoke on the result of each addition. + * * 0: A tensor. + * * 1: A tensor of the same type, and compatible dimensions as input0. + * * 2: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. + * Specifies the activation to invoke on the result of each addition. * - * Ouputs: - * 0: The sum, a tensor of the same type as input0. + * Outputs: + * * 0: The sum, a tensor of the same type as input0. */ ADD = 0, @@ -103,29 +108,50 @@ enum OperationType : int32_t { * * The output dimensions are functions of the filter dimensions, stride, and padding. * - * The values in output Tensor is computed as: + * The values in the output tensor are computed as: + * * output[batch, row, col, channel] = * sum_{i, j}(input[batch, row + i, col + j, channel]) / sum(1) * - * Supported tensor types: {@link OperandType::TENSOR_FLOAT32} - * {@link OperandType::TENSOR_QUANT8_ASYMM} - * Supported tensor rank: 4, with "NHWC" data layout. + * Supported tensor types: + * * {@link OperandType::TENSOR_FLOAT32} + * * {@link OperandType::TENSOR_QUANT8_ASYMM} * - * Inputs: - * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. - * 1: An INT32 value, specifying the padding on the left, in the ‘width’ dimension. - * 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension. - * 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension. - * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension. - * 5: An INT32 value, specifying the output stride in the ‘width’ dimension. - * 6: An INT32 value, specifying the output stride in the ‘height’ dimension. - * 7: An INT32 value, specifying the filter width. - * 8: An INT32 value, specifying the filter height. - * 9: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. - * Specifies the activation to invoke on the result of each addition. + * Supported tensor rank: 4, with "NHWC" (i.e., Num_samples, Height, Width, and Channels) + * data layout. * - * Ouputs: - * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth]. + * Both explicit padding and implicit padding are supported. + * + * Inputs (explicit padding): + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. + * * 1: An INT32 value, specifying the padding on the left, in the ‘width’ dimension. + * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension. + * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension. + * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension. + * * 5: An INT32 value, specifying the stride when walking through input + * in the ‘width’ dimension. + * * 6: An INT32 value, specifying the stride when walking through input + * in the ‘height’ dimension. + * * 7: An INT32 value, specifying the filter width. + * * 8: An INT32 value, specifying the filter height. + * * 9: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. + * Specifies the activation to invoke on the result of each addition. + * + * Inputs (implicit padding): + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. + * * 1: An INT32 value, specifying the implicit padding scheme, has to be one of the + * following values: {0 (NONE), 1 (SAME), 2 (VALID)}. + * * 2: An INT32 value, specifying the stride when walking through input + * in the ‘width’ dimension. + * * 3: An INT32 value, specifying the stride when walking through input + * in the ‘height’ dimension. + * * 4: An INT32 value, specifying the filter width. + * * 5: An INT32 value, specifying the filter height. + * * 6: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. + * Specifies the activation to invoke on the result of each addition. + * + * Outputs: + * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth]. */ AVERAGE_POOL_2D = 1, @@ -135,19 +161,21 @@ enum OperationType : int32_t { * The input tensors must have identical type and the same dimensions except the * dimension along the concatenation axis. * - * Supported tensor types: {@link OperandType::TENSOR_FLOAT32} - * {@link OperandType::TENSOR_QUANT8_ASYMM} + * Supported tensor types: + * * {@link OperandType::TENSOR_FLOAT32} + * * {@link OperandType::TENSOR_QUANT8_ASYMM} + * * Supported tensor rank: up to 4 * * Inputs: - * 0 ~ n: The list on n input tensors, of shape [D0, D1, ..., Daxis(i), ..., Dm] - * n+1: An INT32 value, specifying the concatenation axis. - * n+2: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. - * Specifies the activation to invoke on the result of each addition. + * * 0 ~ n-1: The list of n input tensors, of shape [D0, D1, ..., Daxis(i), ..., Dm]. + * For inputs of {@link OperandType::TENSOR_QUANT8_ASYMM} type, all + * input tensors must have the same scale and zeroPoint. + * * n: An INT32 value, specifying the concatenation axis. * - * Ouputs: - * 0: The output, a tensor of the same type as the input tensors. - The output shape is [D0, D1, ..., sum(Daxis(i)), ..., Dm]. + * Outputs: + * * 0: The output, a tensor of the same type as the input tensors. + * The output shape is [D0, D1, ..., sum(Daxis(i)), ..., Dm]. */ CONCATENATION = 2, @@ -159,7 +187,8 @@ enum OperationType : int32_t { * * The output dimensions are functions of the filter dimensions, stride, and padding. * - * The values in output Tensor is computed as: + * The values in the output tensor are computed as: + * * output[batch, row, col, channel] = * sum_{i, j} ( * input[batch, row + i, col + j, k] * @@ -167,77 +196,135 @@ enum OperationType : int32_t { * bias[channel] * ) * - * Supported tensor types: {@link OperandType::TENSOR_FLOAT32} - * {@link OperandType::TENSOR_QUANT8_ASYMM} + * Supported tensor types: + * * {@link OperandType::TENSOR_FLOAT32} + * * {@link OperandType::TENSOR_QUANT8_ASYMM} + * * Supported tensor rank: 4, with "NHWC" data layout. * - * Inputs: - * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input. - * 1: A 4-D tensor, of shape [depth_out, filter_height, filter_width, depth_in], - * specifying the filter. - * 2: A 1-D tensor, of shape [depth_out], specifying the bias. - * For input tensor of {@link OperandType::TENSOR_FLOAT32} type, the bias should - * also be of {@link OperandType::TENSOR_FLOAT32}. - * For input tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} type, the bias - * should be of {@link OperandType::TENSOR_INT32}. - * 3: An INT32 value, specifying the padding on the left, in the ‘width’ dimension. - * 4: An INT32 value, specifying the padding on the right,in the ‘width’ dimension. - * 5: An INT32 value, specifying the padding on the top, in the ‘height’ dimension. - * 6: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension. - * 7: An INT32 value, specifying the output stride in the ‘width’ dimension. - * 8: An INT32 value, specifying the output stride in the ‘height’ dimension. - * 9: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. - * Specifies the activation to invoke on the result of each addition. + * Both explicit padding and implicit padding are supported. * - * Ouputs: - * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth_out]. + * Inputs (explicit padding): + * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input. + * * 1: A 4-D tensor, of shape [depth_out, filter_height, filter_width, depth_in], + * specifying the filter. + * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. + * For input tensor of {@link OperandType::TENSOR_FLOAT32} type, the bias should + * also be of {@link OperandType::TENSOR_FLOAT32}. + * For input tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} type, the bias + * should be of {@link OperandType::TENSOR_INT32}, with zeroPoint of 0 and + * bias_scale == input_scale * filter_scale. + * * 3: An INT32 value, specifying the padding on the left, in the ‘width’ dimension. + * * 4: An INT32 value, specifying the padding on the right,in the ‘width’ dimension. + * * 5: An INT32 value, specifying the padding on the top, in the ‘height’ dimension. + * * 6: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension. + * * 7: An INT32 value, specifying the stride when walking through input + * in the ‘width’ dimension. + * * 8: An INT32 value, specifying the stride when walking through input + * in the ‘height’ dimension. + * * 9: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. + * Specifies the activation to invoke on the result of each addition. + * + * Inputs (implicit padding): + * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input. + * * 1: A 4-D tensor, of shape [depth_out, filter_height, filter_width, depth_in], + * specifying the filter. + * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. + * For input tensor of {@link OperandType::TENSOR_FLOAT32} type, the bias should + * also be of {@link OperandType::TENSOR_FLOAT32}. + * For input tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} type, the bias + * should be of {@link OperandType::TENSOR_INT32}, with zeroPoint of 0 and + * bias_scale == input_scale * filter_scale. + * * 3: An INT32 value, specifying the implicit padding scheme, has to be one of the + * following values: {0 (NONE), 1 (SAME), 2 (VALID)}. + * * 4: An INT32 value, specifying the stride when walking through input + * in the ‘width’ dimension. + * * 5: An INT32 value, specifying the stride when walking through input + * in the ‘height’ dimension. + * * 6: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. + * Specifies the activation to invoke on the result of each addition. + * + * Outputs: + * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth_out]. + * For output tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} type, the following + * condition must be satisfied: output_scale > input_scale * filter_scale. */ CONV_2D = 3, /** - * Performs an depthwise 2-D convolution operation. + * Performs a depthwise 2-D convolution operation. * * Given an input tensor of shape [batches, height, width, depth_in] and a filter - * tensor of shape [depth_out, filter_height, filter_width, depth_in] containing - * in_channels convolutional filters of depth 1, DEPTHWISE_CONV applies a different + * tensor of shape [1, filter_height, filter_width, depth_out] containing + * depth_out convolutional filters of depth 1, DEPTHWISE_CONV applies a different * filter to each input channel (expanding from 1 channel to channel_multiplier channels * for each), then concatenates the results together. * * The output has depth_out = depth_in * depth_multiplier channels. * The output dimensions are functions of the filter dimensions, stride, and padding. * - * The values in output Tensor is computed as: + * The values in the output tensor are computed as: + * * output[b, i, j, k * channel_multiplier + q] = * sum_{di, dj} ( * input[b, strides[1] * i + di, strides[2] * j + dj, k] * - * filter[di, dj, k, q] + * filter[1, di, dj, k * channel_multiplier + q] * ) * - * Supported tensor types: {@link OperandType::TENSOR_FLOAT32} - * {@link OperandType::TENSOR_QUANT8_ASYMM} + * Supported tensor types: + * * {@link OperandType::TENSOR_FLOAT32} + * * {@link OperandType::TENSOR_QUANT8_ASYMM} + * * Supported tensor rank: 4, with "NHWC" data layout. * - * Inputs: - * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input. - * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out], - * specifying the filter. - * 2: A 1-D tensor, of shape [depth_out], specifying the bias. - * For input tensor of {@link OperandType::TENSOR_FLOAT32} type, the bias should - * also be of {@link OperandType::TENSOR_FLOAT32}. - * For input tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} type, the bias - * should be of {@link OperandType::TENSOR_INT32}. - * 3: An INT32 value, specifying the padding on the left, in the ‘width’ dimension. - * 4: An INT32 value, specifying the padding on the right,in the ‘width’ dimension. - * 5: An INT32 value, specifying the padding on the top, in the ‘height’ dimension. - * 6: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension. - * 7: An INT32 value, specifying the output stride in the ‘width’ dimension. - * 8: An INT32 value, specifying the output stride in the ‘height’ dimension. - * 9: An INT32 value, specifying the depthwise multiplier. - * 10: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. - * Specifies the activation to invoke on the result of each addition. + * Both explicit padding and implicit padding are supported. * - * Ouputs: - * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth_out]. + * Inputs (explicit padding): + * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input. + * * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out], + * specifying the filter. + * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. + * For input tensor of {@link OperandType::TENSOR_FLOAT32} type, the bias should + * also be of {@link OperandType::TENSOR_FLOAT32}. + * For input tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} type, the bias + * should be of {@link OperandType::TENSOR_INT32}, with zeroPoint of 0 and + * bias_scale == input_scale * filter_scale. + * * 3: An INT32 value, specifying the padding on the left, in the ‘width’ dimension. + * * 4: An INT32 value, specifying the padding on the right,in the ‘width’ dimension. + * * 5: An INT32 value, specifying the padding on the top, in the ‘height’ dimension. + * * 6: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension. + * * 7: An INT32 value, specifying the stride when walking through input + * in the ‘width’ dimension. + * * 8: An INT32 value, specifying the stride when walking through input + * in the ‘height’ dimension. + * * 9: An INT32 value, specifying the depthwise multiplier. + * * 10: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. + * Specifies the activation to invoke on the result of each addition. + * + * Inputs (implicit padding): + * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input. + * * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out], + * specifying the filter. + * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. + * For input tensor of {@link OperandType::TENSOR_FLOAT32} type, the bias should + * also be of {@link OperandType::TENSOR_FLOAT32}. + * For input tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} type, the bias + * should be of {@link OperandType::TENSOR_INT32}, with zeroPoint of 0 and + * bias_scale == input_scale * filter_scale. + * * 3: An INT32 value, specifying the implicit padding scheme, has to be one of the + * following values: {0 (NONE), 1 (SAME), 2 (VALID)}. + * * 4: An INT32 value, specifying the stride when walking through input + * in the ‘width’ dimension. + * * 5: An INT32 value, specifying the stride when walking through input + * in the ‘height’ dimension. + * * 6: An INT32 value, specifying the depthwise multiplier. + * * 7: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. + * Specifies the activation to invoke on the result of each addition. + * + * Outputs: + * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth_out]. + * For output tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} type, the following + * condition must be satisfied: output_scale > input_scale * filter_scale. */ DEPTHWISE_CONV_2D = 4, @@ -255,18 +342,20 @@ enum OperationType : int32_t { * input_height * block_size. * The depth of the input tensor must be divisible by block_size * block_size * - * Supported tensor types: {@link OperandType::TENSOR_FLOAT32} - * {@link OperandType::TENSOR_QUANT8_ASYMM} + * Supported tensor types: + * * {@link OperandType::TENSOR_FLOAT32} + * * {@link OperandType::TENSOR_QUANT8_ASYMM} + * * Supported tensor rank: 4, with "NHWC" data layout. * * Inputs: - * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input. - * 1: An INT32 value, specifying the block_size. block_size must be >=1 and - * block_size * block_size must be a divisor of the input depth. + * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input. + * * 1: An INT32 value, specifying the block_size. block_size must be >=1 and + * block_size * block_size must be a divisor of the input depth. * - * Ouputs: - * 0: The output 4-D tensor, of shape [batch, height*block_size, width*block_size, - * depth/(block_size*block_size)]. + * Outputs: + * * 0: The output 4-D tensor, of shape [batch, height*block_size, width*block_size, + * depth/(block_size*block_size)]. */ DEPTH_TO_SPACE = 5, @@ -274,53 +363,69 @@ enum OperationType : int32_t { * Dequantizes the input tensor. * * The formula is: - * output = (input - zero_value) * scale. * - * Supported tensor types: {@link OperandType::TENSOR_QUANT8_ASYMM} + * output = (input - zeroPoint) * scale. + * + * Supported tensor types: + * * {@link OperandType::TENSOR_QUANT8_ASYMM} + * * Supported tensor rank: up to 4 * * Inputs: - * 0: A tensor of type {@link OperandType::TENSOR_QUANT8_ASYMM}. + * * 0: A tensor of type {@link OperandType::TENSOR_QUANT8_ASYMM}. * - * Ouputs: - * 0: The output tensor of same shape as input0, but with type - {@link OperandType::TENSOR_FLOAT32}. + * Outputs: + * * 0: The output tensor of same shape as input0, but with type + * {@link OperandType::TENSOR_FLOAT32}. */ DEQUANTIZE = 6, /** - * Looks up items from a given tensor. + * Looks up sub-tensors in the input tensor. * - * Each item in the output is a raw copy of the corresponding item in - * the input “values”. If the the given “lookup” indices are out of bounds, - * the op will fail and an error will be reported. + * This operator takes for input a tensor of values (Values) and + * a one-dimensional tensor of selection indices (Lookups). + * The output tensor is the concatenation of sub-tensors of Values as + * selected by Lookups. + * + * Think of Values as being sliced along its first dimension: + * The entries in Lookups select which slices are concatenated together + * to create the output tensor. + * + * For example, if Values has shape of [40, 200, 300] and + * Lookups has shape of [3], all three values found in Lookups are + * expected to be between 0 and 39. The resulting tensor must + * have shape of [3, 200, 300]. + * + * If a value in Lookups is out of bounds, the operation must fail + * and an error must be reported. * * Inputs: - * * 0: Values. An n-D tensor of any type X (where n >= 2). E.g., if n is 2, - * then the shape would be [lookup_dimension, values_dimension], where - * “lookup_dimension” corresponds to the indexing dimension in the lookup - * table, and “values_dimension” to the contents. - * * 1: Lookups. An 1-D tensor of type T, of shape [lookup_size], where - * “lookup_size” is the number of elements to look for, and each entry - * corresponds to the first dimension of the “values” tensor. + * * 0: Lookups. A 1-D tensor of {@link OperandType::TENSOR_INT32} type. + * The values are indices into the first dimension of Values. + * * 1: Values. An n-D tensor, where n >= 2, from which sub-tensors are + * extracted. * * Output: - * * 0: A n-D tensor of type X and the same rank and shape as the “values” - * tensor, except for the first dimension which has size “lookup_size”. + * * 0: A n-D tensor with the same rank and shape as the Values + * tensor, except for the first dimension which has the same size + * as Lookups' only dimension. */ EMBEDDING_LOOKUP = 7, /** * Computes element-wise floor() on the input tensor. * - * Supported tensor types: {@link OperandType::TENSOR_FLOAT32} + * Supported tensor types: + * * {@link OperandType::TENSOR_FLOAT32} + * * Supported tensor rank: up to 4 * * Inputs: - * 0: A tensor. + * * 0: A tensor. * - * Ouputs: - * 0: The output, a tensor of the same type and dimensions as input0. + * Outputs: + * * 0: The output tensor, of the same type and dimensions as the input tensor. */ FLOOR = 8, @@ -329,66 +434,104 @@ enum OperationType : int32_t { * tensor with each element in the output tensor. * * This layer implements the operation: + * * outputs = activation(inputs * weights’ + bias) * - * Supported tensor types: {@link OperandType::TENSOR_FLOAT32} - * {@link OperandType::TENSOR_QUANT8_ASYMM} + * Supported tensor types: + * * {@link OperandType::TENSOR_FLOAT32} + * * {@link OperandType::TENSOR_QUANT8_ASYMM} + * * Supported tensor rank: up to 4. * * Inputs: - * 0: A tensor, specifying the input. If rank is greater than 2, then it gets flattened to - * a 2-D Tensor. The 2-D Tensor is handled as if dimensions corresponded to shape - * [batch_size, input_size], where “batch_size” corresponds to the batching dimension, - * and “input_size” is the size of the input. - * 1: A 2-D tensor, specifying the weights, of shape [num_units, input_size], where “num_units” - * corresponds to the number of output nodes. - * 2: A 1-D tensor, of shape [num_units], specifying the bias. - * For input tensor of {@link OperandType::TENSOR_FLOAT32} type, the bias should - * also be of {@link OperandType::TENSOR_FLOAT32}. - * For input tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} type, the bias - * should be of {@link OperandType::TENSOR_INT32}. - * 3: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. - * Specifies the activation to invoke on the result of each addition. + * * 0: A tensor, specifying the input. If rank is greater than 2, then it gets flattened to + * a 2-D Tensor. The 2-D Tensor is handled as if dimensions corresponded to shape + * [batch_size, input_size], where “batch_size” corresponds to the batching dimension, + * and “input_size” is the size of the input. + * * 1: A 2-D tensor, specifying the weights, of shape [num_units, input_size], where + * "num_units" corresponds to the number of output nodes. + * * 2: A 1-D tensor, of shape [num_units], specifying the bias. + * For input tensor of {@link OperandType::TENSOR_FLOAT32} type, the bias should + * also be of {@link OperandType::TENSOR_FLOAT32}. + * For input tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} type, the bias + * should be of {@link OperandType::TENSOR_INT32}, with zeroPoint of 0 and + * bias_scale == input_scale * filter_scale. + * * 3: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. + * Specifies the activation to invoke on the result of each addition. * - * Ouputs: - * 0: The output tensor, of shape [batch_size, num_units]. + * Outputs: + * * 0: The output tensor, of shape [batch_size, num_units]. + * For output tensor of {@link OperandType::TENSOR_QUANT8_ASYMM} type, the following + * condition must be satisfied: output_scale > input_scale * filter_scale. */ FULLY_CONNECTED = 9, /** - * Looks up values of a hash table with given keys. + * Looks up sub-tensors in the input tensor using a key-value map. + * + * This operator takes for input a tensor of values (Values), + * a one-dimensional tensor of selection values (Lookups) and + * a one-dimensional tensor that maps these values to Values + * indexes. The output tensor is the concatenation of sub-tensors of + * Values as selected by Lookups via Keys. + * + * Think of Values as being sliced along its outer-most dimension. + * The output is a concatenation of selected slices, with one slice + * for each entry of Lookups. The slice selected is the one at the + * same index as the Maps entry that matches the value in Lookups. + * + * For a hit, the corresponding sub-tensor of Values is included + * in the Output tensor. For a miss, the corresponding sub-tensor in + * Output must have zero values. + * + * For example, if Values has shape of [40, 200, 300], + * Keys should have a shape of [40]. If Lookups tensor has shape + * of [3], three slices are being concatenated, so the resulting tensor + * must have the shape of [3, 200, 300]. If the first entry in Lookups + * has the value 123456, that value must be located in Keys tensor. + * If the sixth entry of Keys contains 123456, the sixth slice of Values + * must be selected. If no entry in Keys has 123456, a slice of zeroes + * must be concatenated. * * Inputs: - * * 0: Lookups. A 1-D int32 tensor with shape [ k ]. - * * 1: Keys. A 1-D int32 tensor with shape [ n ], *MUST* be sorted in - * ascending order. - * * 2: Values. A tensor with shape [ n … ]. + * * 0: Lookups. A 1-D {@link OperandType::TENSOR_INT32} tensor with shape [ k ]. + * * 1: Keys. A 1-D {@link OperandType::TENSOR_INT32} tensor with shape [ n ]; + * Keys and Values pair represent a map, i.e., the ith element + * in Keys (Keys[i]) is the key to select the ith sub-tensor + * in Values (Values[i]), where 0 <= i <= n-1. + * Keys tensor *MUST* be sorted in ascending order. + * * 2: Values. A tensor with shape of [ n, … ]; i.e., the first dimension must be n. * * Outputs: * * 0: Output. A tensor with shape [ k …]. - * * 1: Hits. A uint8 tensor with shape [ k ] indicates whether the lookup - * hits or not. + * * 1: Hits. A boolean tensor with shape [ k ] indicates whether the lookup + * hits (True) or not (False). + * Stored as {@link OperandType::TENSOR_QUANT8_ASYMM} with offset 0 and scale 1.0f. + * A non-zero byte represents True, a hit. A zero indicates otherwise. */ HASHTABLE_LOOKUP = 10, /** - * Applies L2 normalization along a the depth dimension. + * Applies L2 normalization along the depth dimension. + * + * The values in the output tensor are computed as: * - * The values in output Tensor is computed as: * output[batch, row, col, channel] = * input[batch, row, col, channel] / * sqrt(sum_{c} pow(input[batch, row, col, c], 2)) * - * For x with more dimensions, independently normalizes each 1-D slice along dimension dim. + * For input tensor with more dimensions, independently normalizes each 1-D slice along dimension dim. * - * Supported tensor types: {@link OperandType::TENSOR_FLOAT32} - * Supported tensor rank: 4, with "NHWC" data layout. + * Supported tensor types: + * * {@link OperandType::TENSOR_FLOAT32} + * + * Supported tensor rank: 4, with "NHWC" data layout (i.e., Num_samples, Height, Width, and Channels). * * Inputs: - * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. + * * 0: A 4-D tensor, of shape [batches, height, width, depth]. * - * Ouputs: - * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth]. + * Outputs: + * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth]. */ L2_NORMALIZATION = 11, @@ -397,28 +540,48 @@ enum OperationType : int32_t { * * The output dimensions are functions of the filter dimensions, stride, and padding. * - * The values in output Tensor is computed as: + * The values in the output tensor are computed as: + * * output[batch, row, col, channel] = * sqrt(sum_{i, j} pow(input[batch, row + i, col + j, channel], 2) / sum(1)) * - * Supported tensor types: {@link OperandType::TENSOR_FLOAT32} + * Supported tensor types: + * * {@link OperandType::TENSOR_FLOAT32} + * * Supported tensor rank: 4, with "NHWC" data layout. * - * Inputs: - * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. - * 1: An INT32 value, specifying the padding on the left, in the ‘width’ dimension. - * 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension. - * 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension. - * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension. - * 5: An INT32 value, specifying the output stride in the ‘width’ dimension. - * 6: An INT32 value, specifying the output stride in the ‘height’ dimension. - * 7: An INT32 value, specifying the filter width. - * 8: An INT32 value, specifying the filter height. - * 9: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. - * Specifies the activation to invoke on the result of each addition. + * Both explicit padding and implicit padding are supported. * - * Ouputs: - * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth]. + * Inputs (explicit padding): + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. + * * 1: An INT32 value, specifying the padding on the left, in the ‘width’ dimension. + * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension. + * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension. + * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension. + * * 5: An INT32 value, specifying the stride when walking through input + * in the ‘width’ dimension. + * * 6: An INT32 value, specifying the stride when walking through input + * in the ‘height’ dimension. + * * 7: An INT32 value, specifying the filter width. + * * 8: An INT32 value, specifying the filter height. + * * 9: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. + * Specifies the activation to invoke on the result of each addition. + * + * Inputs (implicit padding): + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. + * * 1: An INT32 value, specifying the implicit padding scheme, has to be one of the + * following values: {0 (NONE), 1 (SAME), 2 (VALID)}. + * * 2: An INT32 value, specifying the stride when walking through input + * in the ‘width’ dimension. + * * 3: An INT32 value, specifying the stride when walking through input + * in the ‘height’ dimension. + * * 4: An INT32 value, specifying the filter width. + * * 5: An INT32 value, specifying the filter height. + * * 6: An INT32 value, and has to be one of the {@link FusedActivationFunc} values. + * Specifies the activation to invoke on the result of each addition. + * + * Outputs: + * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth]. */ L2_POOL_2D = 12, @@ -429,41 +592,49 @@ enum OperationType : int32_t { * dimension), and each vector is normalized independently. Within a given vector, * each component is divided by the weighted, squared sum of inputs within depth_radius. * - * In details: + * The output is calculated using this formula: + * * sqr_sum[a, b, c, d] = * sum(pow(input[a, b, c, d - depth_radius : d + depth_radius + 1], 2) * output = input / pow((bias + alpha * sqr_sum), beta) * - * Supported tensor types: {@link OperandType::TENSOR_FLOAT32} + * Supported tensor types: + * * {@link OperandType::TENSOR_FLOAT32} + * * Supported tensor rank: 4, with "NHWC" data layout. * * Inputs: - * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. - * 1: An INT32 value, specifying the radius of the normalization window. - * 2: A FLOAT32 value, specifying the bias, must not be zero. - * 3: A FLOAT32 value, specifying the scale factor, alpha. - * 4: A FLOAT32 value, specifying the exponent, beta. + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. + * * 1: An INT32 value, specifying the radius of the normalization window. + * * 2: A FLOAT32 value, specifying the bias, must not be zero. + * * 3: A FLOAT32 value, specifying the scale factor, alpha. + * * 4: A FLOAT32 value, specifying the exponent, beta. * - * Ouputs: - * 0: The output tensor of same shape as input0. + * Outputs: + * * 0: The output tensor of same shape as input0. */ LOCAL_RESPONSE_NORMALIZATION = 13, /** * Computes sigmoid activation on the input tensor element-wise. * - * In details: + * The output is calculated using this formula: + * * output = 1 / (1 + exp(-input)) * - * Supported tensor types: {@link OperandType::TENSOR_FLOAT32} - * {@link OperandType::TENSOR_QUANT8_ASYMM} + * Supported tensor types: + * * {@link OperandType::TENSOR_FLOAT32} + * * {@link OperandType::TENSOR_QUANT8_ASYMM} + * * Supported tensor rank: up to 4. * * Inputs: - * 0: A tensor, specifying the input. + * * 0: A tensor, specifying the input. * - * Ouputs: - * 0: The output tensor of same shape as input0. + * Outputs: + * * 0: The output tensor of same shape as input0. + * For {@link OperandType::TENSOR_QUANT8_ASYMM} type, + * the scale must be 1.f / 256 and the zeroPoint must be 0. */ LOGISTIC = 14, @@ -502,102 +673,165 @@ enum OperationType : int32_t { LSH_PROJECTION = 15, /** - * Long short-term memory unit (LSTM) recurrent network layer. + * Performs a single time step in a Long Short-Term Memory (LSTM) layer * - * The default non-peephole implementation is based on: - * http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf + * The LSTM operation is described by the following equations. + * + * \f{eqnarray*}{ + * i_t =& \sigma(W_{xi}x_t+W_{hi}h_{t-1}+W_{ci}C_{t-1}+b_i) & \\ + * f_t =& \sigma(W_{xf}x_t+W_{hf}h_{t-1}+W_{cf}C_{t-1}+b_f) & \\ + * C_t =& clip(f_t \odot C_{t-1} + i_t \odot g(W_{xc}x_t+W_{hc}h_{t-1}+b_c),\ t_{cell})& \\ + * o_t =& \sigma(W_{xo}x_t+W_{ho}h_{t-1}+W_{co}C_t+b_o)& \\ + * & clip(W_{proj}(o_t \odot g(C_t))+b_{proj},\ t_{proj}) & if\ there\ is\ a\ projection; \\ + * h_t =& & \\ + * & o_t \odot g(C_t) & otherwise. \\ + * \f} + * Where: + * * \f$x_t\f$ is the input, + * * \f$i_t\f$ is the input gate, + * * \f$f_t\f$ is the forget gate, + * * \f$C_t\f$ is the cell state, + * * \f$o_t\f$ is the output, + * * \f$h_t\f$ is the output state, + * * \f$\sigma\f$ is the logistic sigmoid function, + * * \f$g\f$ is the cell input and cell output activation function, usually \f$tahn\f$, + * * \f$W_{xi}\f$ is the input-to-input weight matrix, + * * \f$W_{hi}\f$ is the recurrent to input weight matrix, + * * \f$W_{ci}\f$ is the cell-to-input weight matrix, + * * \f$b_i\f$ is the input gate bias, + * * \f$W_{xf}\f$ is the input-to-forget weight matrix, + * * \f$W_{hf}\f$ is the recurrent-to-forget weight matrix, + * * \f$W_{cf}\f$ is the cell-to-forget weight matrix, + * * \f$b_f\f$ is the forget gate bias, + * * \f$W_{xc}\f$ is the input-to-cell weight matrix, + * * \f$W_{hc}\f$ is the recurrent-to-cell weight matrix, + * * \f$b_c\f$ is the cell bias, + * * \f$W_{xo}\f$ is the input-to-output weight matrix, + * * \f$W_{ho}\f$ is the recurrent-to-output weight matrix, + * * \f$W_{co}\f$ is the cell-to-output weight matrix, + * * \f$b_o\f$ is the output gate bias, + * * \f$W_{proj}\f$ is the projection weight matrix, + * * \f$b_{proj}\f$ is the projection bias, + * * \f$t_{cell}\f$ is the threshold for clipping the cell state, and + * * \f$t_{proj}\f$ is the threshold for clipping the projected output. + * * \f$\odot\f$ is the + * Hadamard product that takes two matrices and produces another + * matrix, each element of which is the product of the corresponding + * elements of the input matrices. + * + * The operation has the following independently optional inputs: + * * The input-to-input weights (\f$W_{xi}\f$), recurrent-to-input weights (\f$W_{hi}\f$), + * cell-to-input (\f$W_{ci}\f$) weights, and input gate bias (\f$b_i\f$) either all have values, + * or none of them have values (i.e., all set to null). If they have no + * values, coupling of input and forget gates (CIFG) is used, in which case + * the input gate (\f$i_t\f$) is calculated using the following equation instead. + * \f{eqnarray*}{ + * i_t = 1 - f_t + * \f} + * * The cell-to-input weights (\f$W_{ci}\f$), cell-to-forget weights (\f$W_{cf}\f$), and cell-to-output + * weights (\f$W_{co}\f$) either all have values or none of them have values. + * If they have values, the peephole optimization is used. + * * The projection weights (\f$W_{proj}\f$) is required only for the recurrent projection + * layer, and should otherwise have no value. + * * The projection bias (\f$b_{proj}\f$) may (but not required to) have a value if the + * recurrent projection layer exists, and should otherwise have no value. + * + * References: + * + * The default non-peephole non-CIFG implementation is based on: + * http://www.bioinf.jku.at/publications/older/2604.pdf * S. Hochreiter and J. Schmidhuber. "Long Short-Term Memory". Neural * Computation, 9(8):1735-1780, 1997. * - * The peephole implementation is based on: + * The peephole implementation and projection layer is based on: * https://research.google.com/pubs/archive/43905.pdf * Hasim Sak, Andrew Senior, and Francoise Beaufays. "Long short-term memory * recurrent neural network architectures for large scale acoustic modeling." * INTERSPEECH, 2014. + * (However, the concept of peephole optimization was introduced in work + * prior to this paper.) * * The coupling of input and forget gate (CIFG) is based on: * http://arxiv.org/pdf/1503.04069.pdf * Greff et al. "LSTM: A Search Space Odyssey" * - * The class has the following independently optional inputs: - * * If input gate (if CIFG): “input_to_forget_weights”, - * “recurrent_to_input_weights”, “cell_to_input_weights”, “input_gate_bias”. - * * If no peephole connections: “cell_to_input_weights”, - * “cell_to_forget_weights”, “cell_to_output_weights”. - * * If no projection layer: “projection_weights” and “projection_bias”. - * * If no projection bias: “projection_bias”. - * - * Supported tensor types: + * Supported tensor types (type T): * * {@link OperandType::TENSOR_FLOAT32} * * Inputs: - * * 0: Input. + * * 0: The input (\f$x_t\f$). * A 2-D tensor of type T, of shape [batch_size, input_size], where * “batch_size” corresponds to the batching dimension, and “input_size” * is the size of the input. - * * 1: input_to_input_weights. + * * 1: The input-to-input weights (\f$W_{xi}\f$). Optional. * A 2-D tensor of type T, of shape [num_units, input_size], where * “num_units” corresponds to the number of cell units. - * * 2: input_to_forget_weights. + * * 2: The input-to-forget weights (\f$W_{xf}\f$). * A 2-D tensor of type T, of shape [num_units, input_size]. - * * 3: input_to_cell_weights. + * * 3: The input-to-cell weights (\f$W_{xc}\f$). * A 2-D tensor of type T, of shape [num_units, input_size]. - * * 4: input_to_output_weights. + * * 4: The input-to-output weights (\f$W_{xo}\f$). * A 2-D tensor of type T, of shape [num_units, input_size]. - * * 5: recurrent_to_input_weights. + * * 5: The recurrent-to-input weights (\f$W_{hi}\f$). Optional. * A 2-D tensor of type T, of shape [num_units, output_size], where * “output_size” corresponds to either the number of cell units (i.e., * “num_units”), or the second dimension of the “projection_weights”, if * defined. - * * 6: recurrent_to_forget_weights. + * * 6: The recurrent-to-forget weights (\f$W_{hf}\f$). * A 2-D tensor of type T, of shape [num_units, output_size]. - * * 7: recurrent_to_cell_weights. + * * 7: The recurrent-to-cell weights (\f$W_{hc}\f$). * A 2-D tensor of type T, of shape [num_units, output_size]. - * * 8: recurrent_to_output_weights. + * * 8: The recurrent-to-output weights (\f$W_{ho}\f$). * A 2-D tensor of type T, of shape [num_units, output_size]. - * * 9: cell_to_input_weights. + * * 9: The cell-to-input weights (\f$W_{ci}\f$). Optional. * A 1-D tensor of type T, of shape [num_units]. - * * 10:cell_to_forget_weights. + * * 10:The cell-to-forget weights (\f$W_{cf}\f$). Optional. * A 1-D tensor of type T, of shape [num_units]. - * * 11:cell_to_output_weights. + * * 11:The cell-to-output weights (\f$W_{co}\f$). Optional. * A 1-D tensor of type T, of shape [num_units]. - * * 12:input_gate_bias. + * * 12:The input gate bias (\f$b_i\f$). Optional. * A 1-D tensor of type T, of shape [num_units]. - * * 13:forget_gate_bias. + * * 13:The forget gate bias (\f$b_f\f$). * A 1-D tensor of type T, of shape [num_units]. - * * 14:cell_bias. + * * 14:The cell bias (\f$b_c\f$). * A 1-D tensor of type T, of shape [num_units]. - * * 15:output_gate_bias. + * * 15:The output gate bias (\f$b_o\f$). * A 1-D tensor of type T, of shape [num_units]. - * * 16:projection_weights. + * * 16:The projection weights (\f$W_{proj}\f$). Optional. * A 2-D tensor of type T, of shape [output_size, num_units]. - * * 17:projection_bias. + * * 17:The projection bias (\f$b_{proj}\f$). Optional. * A 1-D tensor of type T, of shape [output_size]. - * - * Parameters: - * * 18:fused_activation_function. - * An (optional) ActivationFunctionType indicating the activation - * function. - * If “NONE” is specified then it results in a linear activation. - * * 19:cell_clip. - * A clipping threshold for the cell state, such that values are bound + * * 18:The output state (in) (\f$h_{t-1}\f$). + * A 2-D tensor of type T, of shape [batch_size, output_size]. + * * 19:The cell state (in) (\f$C_{t-1}\f$). + * A 2-D tensor of type T, of shape [batch_size, num_units]. + * * 20:The activation function (\f$g\f$). + * A value indicating the activation function: + *