You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

op_tiling_registry.h 3.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef INC_REGISTER_OP_TILING_REGISTRY_H_
  17. #define INC_REGISTER_OP_TILING_REGISTRY_H_
  18. #include <functional>
  19. #include <map>
  20. #include <sstream>
  21. #include <string>
  22. #include <vector>
  23. #include "external/register/register_types.h"
  24. #include "external/graph/tensor.h"
  25. #define REGISTER_OP_TILING(optype, opfunc) REGISTER_OP_TILING_UNIQ_HELPER(optype, opfunc, __COUNTER__)
  26. #define REGISTER_OP_TILING_FUNC_NEW(optype, opfunc) REGISTER_OP_TILING_UNIQ_HELPER(optype, opfunc, __COUNTER__)
  27. #define REGISTER_OP_TILING_UNIQ_HELPER(optype, opfunc, counter) REGISTER_OP_TILING_UNIQ(optype, opfunc, counter)
  28. #define REGISTER_OP_TILING_UNIQ(optype, opfunc, counter) \
  29. static OpTilingRegistryInterf g_##optype##TilingRegistryInterf##counter(#optype, opfunc)
  30. namespace optiling {
  31. enum TensorArgType {
  32. TA_NONE,
  33. TA_SINGLE,
  34. TA_LIST,
  35. };
  36. using ByteBuffer = std::stringstream;
  37. struct TeOpTensor {
  38. std::vector<int64_t> shape;
  39. std::vector<int64_t> ori_shape;
  40. std::string format;
  41. std::string ori_format;
  42. std::string dtype;
  43. std::map<std::string, std::string> attrs;
  44. };
  45. struct TeOpTensorArg {
  46. TensorArgType arg_type;
  47. std::vector<TeOpTensor> tensor;
  48. };
  49. struct OpRunInfo {
  50. uint32_t block_dim;
  51. std::vector<int64_t> workspaces;
  52. ByteBuffer tiling_data;
  53. bool clear_atomic;
  54. };
  55. using TeOpAttrArgs = std::vector<std::string>;
  56. using TeConstTensorData = std::tuple<const uint8_t *, size_t, ge::Tensor>;
  57. struct TeOpParas {
  58. std::vector<TeOpTensorArg> inputs;
  59. std::vector<TeOpTensorArg> outputs;
  60. std::map<std::string, TeConstTensorData> const_inputs;
  61. TeOpAttrArgs attrs;
  62. std::string op_type;
  63. };
  64. struct OpCompileInfo {
  65. std::string str;
  66. std::string key;
  67. };
  68. using OpTilingFunc = std::function<bool(const TeOpParas &, const OpCompileInfo &, OpRunInfo &)>;
  69. using OpTilingFuncPtr = bool (*)(const TeOpParas &, const OpCompileInfo &, OpRunInfo &);
  70. class FMK_FUNC_HOST_VISIBILITY OpTilingRegistryInterf {
  71. public:
  72. OpTilingRegistryInterf(std::string op_type, OpTilingFunc func);
  73. ~OpTilingRegistryInterf() = default;
  74. static std::map<std::string, OpTilingFunc> &RegisteredOpInterf();
  75. };
  76. template <class T>
  77. ByteBuffer &ByteBufferPut(ByteBuffer &buf, const T &value) {
  78. buf.write(reinterpret_cast<const char *>(&value), sizeof(value));
  79. buf.flush();
  80. return buf;
  81. }
  82. template <class T>
  83. ByteBuffer &ByteBufferGet(ByteBuffer &buf, T &value) {
  84. buf.read(reinterpret_cast<char *>(&value), sizeof(value));
  85. return buf;
  86. }
  87. size_t ByteBufferGetAll(ByteBuffer &buf, char *dest, size_t dest_len);
  88. } // namespace optiling
  89. #endif // INC_REGISTER_OP_TILING_REGISTRY_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示