You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ctc_ops.h 5.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_OP_CTC_OPS_H
  17. #define GE_OP_CTC_OPS_H
  18. #include "graph/operator.h"
  19. #include "graph/operator_reg.h"
  20. namespace ge {
  21. /**
  22. *@brief Calculates the CTC Loss (log probability) for each batch entry. \n
  23. Also calculates the gradient.
  24. *@par Inputs:
  25. *@li inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
  26. *@li labels_indices: The indices of a `SparseTensor<int32, 2>`. \n
  27. `labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for \n
  28. `(batch b, time t)`.
  29. *@li labels_values: The values (labels) associated with the given batch and time.
  30. *@li sequence_length: A vector containing sequence lengths (batch).
  31. *@par Outputs:
  32. *@li loss: A vector (batch) containing log-probabilities.
  33. *@li gradient: The gradient of `loss`. 3-D, shape: `(max_time x \n
  34. batch_size x num_classes)`.
  35. *@par Attributes:
  36. *@li preprocess_collapse_repeated: Scalar, if true then repeated labels are collapsed prior to \n
  37. the CTC calculation.If not specified, defaults to false
  38. *@li ctc_merge_repeated: Scalar. If set to false, *during* CTC calculation \n
  39. repeated non-blank labels will not be merged and are interpreted as \n
  40. individual labels. This is a simplified version of CTC. \n
  41. If not specified, defaults to true
  42. *@par Third-party framework compatibility
  43. * Compatible with TensorFlow CTCLoss operator.
  44. */
  45. REG_OP(CTCLoss)
  46. .INPUT(inputs, TensorType({DT_FLOAT, DT_DOUBLE}))
  47. .INPUT(labels_indices, TensorType({DT_INT64}))
  48. .INPUT(labels_values, TensorType({DT_INT32}))
  49. .INPUT(sequence_length, TensorType({DT_INT32}))
  50. .OUTPUT(loss, TensorType({DT_FLOAT, DT_DOUBLE}))
  51. .OUTPUT(gradient, TensorType({DT_FLOAT, DT_DOUBLE}))
  52. .ATTR(preprocess_collapse_repeated, Bool, false)
  53. .ATTR(ctc_merge_repeated, Bool, true)
  54. .ATTR(ignore_longer_outputs_than_inputs, Bool, false)
  55. .OP_END_FACTORY_REG(CTCLoss)
  56. /**
  57. *@brief Performs greedy decoding on the logits given in inputs.
  58. *@par Inputs:
  59. *@li inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
  60. *@li sequence_length: A vector containing sequence lengths, size `(batch_size)`.
  61. *@par Attributes:
  62. *@li merge_repeated: If True, merge repeated classes in output.
  63. *@par Outputs:
  64. *@li decoded_indices: Indices matrix, size `(total_decoded_outputs x 2)`,\n
  65. of a `SparseTensor<int64, 2>`. The rows store: [batch, time].
  66. *@li decoded_values: Values vector, size: `(total_decoded_outputs)`,\n
  67. of a `SparseTensor<int64, 2>`. The vector stores the decoded classes.
  68. *@li decoded_shape: Shape vector, size `(2)`, of the decoded SparseTensor.\n
  69. Values are: `[batch_size, max_decoded_length]`.
  70. *@li log_probability: Matrix, size `(batch_size x 1)`, containing sequence\n
  71. log-probabilities.
  72. *@par Third-party framework compatibility
  73. * Compatible with TensorFlow CTCGreedyDecoder operator.
  74. */
  75. REG_OP(CTCGreedyDecoder)
  76. .INPUT(inputs, TensorType({DT_FLOAT, DT_DOUBLE}))
  77. .INPUT(sequence_length, TensorType({DT_INT32}))
  78. .ATTR(merge_repeated, Bool, false)
  79. .OUTPUT(decoded_indices, TensorType({DT_INT64}))
  80. .OUTPUT(decoded_values, TensorType({DT_INT64}))
  81. .OUTPUT(decoded_shape, TensorType({DT_INT64}))
  82. .OUTPUT(log_probability, TensorType({DT_FLOAT, DT_DOUBLE}))
  83. .OP_END_FACTORY_REG(CTCGreedyDecoder)
  84. /**
  85. *@brief Performs beam search decoding on the logits given in input.
  86. *@par Inputs:
  87. *@li inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
  88. *@li sequence_length: A vector containing sequence lengths, size `(batch_size)`.
  89. *@par Attributes:
  90. *@li merge_repeated: If True, merge repeated classes in output.
  91. *@par Outputs:
  92. *@li decoded_indices: A list (length: top_paths) of indices matrices. Matrix j,\n
  93. size `(total_decoded_outputs[j] x 2)`, has indices of a\n
  94. `SparseTensor<int64, 2>`. The rows store: [batch, time].
  95. *@li decoded_values: A list (length: top_paths) of values vectors. Vector j,\n
  96. size `(length total_decoded_outputs[j])`, has the values of a\n
  97. `SparseTensor<int64, 2>`. The vector stores the decoded classes for beam j.
  98. *@li decoded_shape: A list (length: top_paths) of shape vector. Vector j,\n
  99. size `(2)`, stores the shape of the decoded `SparseTensor[j]`.\n
  100. Its values are: `[batch_size, max_decoded_length[j]]`.
  101. *@li log_probability: A matrix, shaped: `(batch_size x top_paths)`. The\n
  102. sequence log-probabilities.
  103. *@par Third-party framework compatibility
  104. * Compatible with TensorFlow CTCBeamSearchDecoder operator.
  105. */
  106. REG_OP(CTCBeamSearchDecoder)
  107. .INPUT(inputs, TensorType({DT_FLOAT, DT_DOUBLE}))
  108. .INPUT(sequence_length, TensorType({DT_INT32}))
  109. .REQUIRED_ATTR(beam_width, Int)
  110. .REQUIRED_ATTR(top_paths, Int)
  111. .ATTR(merge_repeated, Bool, true)
  112. .DYNAMIC_OUTPUT(decoded_indices, TensorType({DT_INT64}))
  113. .DYNAMIC_OUTPUT(decoded_values, TensorType({DT_INT64}))
  114. .DYNAMIC_OUTPUT(decoded_shape, TensorType({DT_INT64}))
  115. .OUTPUT(log_probability, TensorType({DT_FLOAT, DT_DOUBLE}))
  116. .OP_END_FACTORY_REG(CTCBeamSearchDecoder)
  117. } // namespace ge
  118. #endif //GE_OP_CTC_OPS_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示