You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

pcmCore.proto 8.6 kB

2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. syntax = "proto3";
  2. package pcmCore;
  3. option go_package = "/pcmCore";
  4. message SyncInfoReq {
  5. int64 participantId = 1;
  6. repeated HpcInfo HpcInfoList = 2;
  7. repeated CloudInfo CloudInfoList = 3;
  8. repeated AiInfo AiInfoList = 4;
  9. repeated VmInfo VmInfoList = 5;
  10. }
  11. message AiInfo {
  12. int64 participantId = 1;
  13. int64 taskId = 2;
  14. string project_id = 3;
  15. string name = 4;
  16. string status = 5;
  17. string startTime = 6;
  18. int64 runningTime = 7;
  19. string result = 8;
  20. string jobId = 9;
  21. string createTime = 10;
  22. string imageUrl = 11;
  23. string command = 12;
  24. string flavorId = 13;
  25. string subscriptionId = 14;
  26. string itemVersionId = 15;
  27. }
  28. message CloudInfo {
  29. int64 participant = 1;
  30. int64 taskId = 2;
  31. string apiVersion = 3;
  32. string kind = 4;
  33. string namespace = 5;
  34. string name = 6;
  35. string status = 7;
  36. string startTime = 8;
  37. int64 runningTime = 9;
  38. string result = 10;
  39. string yamlString = 11;
  40. int64 id = 12;
  41. }
  42. message VmInfo {
  43. int64 participantId = 1;
  44. int64 taskId = 2;
  45. string name = 3;
  46. string flavor_ref = 4;
  47. string image_ref = 5;
  48. string network_uuid = 6;
  49. string block_uuid = 7;
  50. string source_type = 8;
  51. bool delete_on_termination = 9;
  52. string state = 10;
  53. }
  54. message HpcInfo {
  55. int64 participantId = 1;
  56. int64 taskId = 2;
  57. string jobId = 3;
  58. string name = 4;
  59. string status = 5;
  60. string startTime = 6;
  61. int64 runningTime = 7;
  62. string result = 8;
  63. string workDir = 9;
  64. string wallTime = 10;
  65. string cmdScript = 11;
  66. string derivedEs = 12;
  67. string cluster = 13;
  68. string blockId = 14;
  69. uint32 allocNodes = 15;
  70. uint32 allocCpu = 16;
  71. string version = 17;
  72. string account = 18;
  73. uint32 exitCode = 19;
  74. uint32 assocId = 20;
  75. string appType = 21;
  76. string appName = 22;
  77. string queue = 23;
  78. string submitType = 24;
  79. string nNode = 25;
  80. string stdOutFile = 26;
  81. string stdErrFile = 27;
  82. string stdInput = 28;
  83. string environment = 29;
  84. }
  85. message SyncInfoResp {
  86. int64 code = 1;
  87. string msg = 2;
  88. }
  89. message InfoListReq {
  90. int64 participantId = 1;
  91. }
  92. message InfoListResp {
  93. repeated HpcInfo HpcInfoList = 1;
  94. repeated CloudInfo CloudInfoList = 2;
  95. repeated AiInfo AiInfoList = 3;
  96. }
  97. // pcm core services
  98. service pcmCore {
  99. // SyncInfo Synchronous data information
  100. rpc SyncInfo(SyncInfoReq) returns (SyncInfoResp);
  101. // InfoList
  102. rpc InfoList(InfoListReq) returns (InfoListResp);
  103. }
  104. // participantTenant 租户信息
  105. message ParticipantTenant {
  106. string tenantName = 1; // 租户名称
  107. }
  108. // 集群标签
  109. message ParticipantLabel {
  110. int64 id = 1; // id
  111. string key = 2; // 标签名
  112. string value = 3; // 标签值
  113. }
  114. enum MessageStatus {
  115. FAIL = 0;
  116. SUCCESS = 1;
  117. UNKNOWN = 2;
  118. }
  119. message HealthCheckResp {
  120. MessageStatus messageStatus = 1;
  121. int64 code = 2;
  122. string msg = 3;
  123. }
  124. message ParticipantPhyResp {
  125. int64 code = 1;
  126. string msg = 2;
  127. int64 participantId = 3; // participant 唯一标识
  128. }
  129. // 集群静态信息返回
  130. message ListParticipantPhyResp {
  131. int64 code = 1;
  132. string msg = 2;
  133. repeated ParticipantPhyReq ParticipantPhys = 3;
  134. }
  135. // participantPhy 静态信息
  136. message ParticipantPhyReq {
  137. string name = 1; // 名称
  138. string address = 2; // 地址
  139. string networkType = 4; // 集群网络类型
  140. string networkBandwidth = 5; // 集群网络带宽
  141. string storageType = 6; // 集群存储类型
  142. string storageSpace = 7; // 集群存储空间
  143. string storageAvailSpace = 8; // 集群存储可用空间
  144. string storageBandwidth = 9; // 集群存储带宽
  145. string type = 10; // 参与者类型:CLOUD-数算集群;AI-智算集群;HPC-超算集群
  146. int64 tenantId = 11; // 租户id
  147. string tenantName = 12; // 租户名称
  148. repeated NodePhyInfo nodeInfo = 13; // 节点信息
  149. int64 participantId = 14; // participant id
  150. repeated ParticipantLabel labelInfo = 15; // 标签信息
  151. repeated QueuePhyInfo queueInfo = 16; // 队列信息
  152. int64 id = 17; // id
  153. string MetricsUrl = 18; //监控url
  154. string RpcAddress = 19;
  155. }
  156. // NodePhyInfo 节点信息
  157. message NodePhyInfo {
  158. int64 id = 1;
  159. string nodeName = 2; // 节点名称
  160. string osName = 3; // 系统名称
  161. string osVersion = 4; // 系统版本
  162. string archType = 5; // 架构类型
  163. string archName = 6; // 架构名称
  164. string archFreq = 7; // 架构频率
  165. }
  166. // QueuePhyInfo 队列信息
  167. message QueuePhyInfo {
  168. int64 id = 1; // id
  169. string aclHosts = 2; // 可用节点,多个节点用逗号隔开
  170. string queueId = 3; // 队列名称
  171. string text = 4 ; // 队列名称
  172. string queueName = 5; //队列名称
  173. string queNodes = 6; // 队列节点总数
  174. string queMinNodect = 7; // 队列最小节点数
  175. string queMaxNgpus = 8; // 队列最大GPU卡数
  176. string queMaxPpn = 9; // 使用该队列作业最大CPU核心数
  177. string queChargeRate = 10; // 费率
  178. string queMaxNcpus = 11; // 用户最大可用核心数
  179. string queMaxNdcus = 12; // 队列总DCU卡数
  180. string queMinNcpus = 13; // 队列最小CPU核数
  181. string queFreeNodes = 14; // 队列空闲节点数
  182. string queMaxNodect = 15; // 队列作业最大节点数
  183. string queMaxGpuPN = 16; // 队列单作业最大GPU卡数
  184. string queMaxWalltime = 17; // 队列最大运行时间
  185. string queMaxDcuPN = 18; // 队列单作业最大DCU卡数
  186. string queNcpus = 19; //队列cpu数
  187. string queFreeNcpus = 20; //队列空闲cpu数
  188. }
  189. // ParticipantHeartbeatReq 心跳请求
  190. message ParticipantHeartbeatReq {
  191. int64 participantId = 1; // participantId
  192. string address = 2;
  193. }
  194. // ParticipantAvailInfo Participant可用信息
  195. message ParticipantAvailReq {
  196. int64 id = 1; // id
  197. int64 availStorageSpace = 2; // 集群存储可用空间
  198. int64 userNum = 3; // 用户数量
  199. int64 pendingJobNum = 4; // 待处理作业数量
  200. int64 runningJobNum = 5; // 运行作业数量
  201. int64 participantId = 6; // 集群静态信息id
  202. repeated NodeAvailInfo nodeAvailInfo = 7; // 节点可用信息
  203. }
  204. // NodeAvailInfo 节点可用信息
  205. message NodeAvailInfo {
  206. int64 id = 1; // id
  207. string nodeName = 2; // 节点名称
  208. int64 cpuTotal = 3; // cpu核数
  209. double cpuUsable = 4; // cpu可用率
  210. int64 diskTotal = 5; // 磁盘空间
  211. int64 diskAvail = 6; // 磁盘可用空间
  212. int64 memTotal = 7; // 内存总数
  213. int64 memAvail = 8; // 内存可用数
  214. int64 gpuTotal = 9; // gpu总数
  215. int64 gpuAvail = 10; // gpu可用数
  216. int64 participantId = 11; // 集群动态信息id
  217. }
  218. // 集群可用信息
  219. message ListParticipantAvailResp {
  220. int64 code = 1;
  221. string msg = 2;
  222. repeated ParticipantAvailReq ParticipantAvails = 3;
  223. }
  224. message ParticipantResp {
  225. int64 code = 1;
  226. string msg = 2;
  227. }
  228. message ParticipantServiceResp {
  229. int64 code = 1;
  230. string msg = 2;
  231. repeated ClientInfo data = 3;
  232. }
  233. message ClientInfo {
  234. string address = 1; // @gotags: redis:"address"
  235. int64 participantId = 2; // @gotags: redis:"participantId"
  236. string clientState = 3; // @gotags: redis:"clientState"
  237. int64 lastHeartbeat = 4; // @gotags: redis:"lastHeartbeat"
  238. }
  239. message TenantInfo {
  240. int64 id = 1;
  241. string tenantName = 2;
  242. string tenantDesc = 3;
  243. }
  244. message TenantResp {
  245. int64 code = 1;
  246. string msg = 2;
  247. int64 id = 3;
  248. }
  249. message ListTenantResp {
  250. int64 code = 1;
  251. string msg = 2;
  252. repeated TenantInfo tenantInfos = 3;
  253. }
  254. // participant 参与者
  255. service participantService {
  256. // registerParticipant Participant注册接口
  257. rpc registerParticipant(ParticipantPhyReq) returns (ParticipantPhyResp) {};
  258. // reportHeartbeat 心跳请求
  259. rpc reportHeartbeat(ParticipantHeartbeatReq) returns (HealthCheckResp) {};
  260. // reportAvailable 监控数据上报
  261. rpc reportAvailable(ParticipantAvailReq) returns (ParticipantResp) {}
  262. // listParticipant 服务列表
  263. rpc listParticipant(ParticipantTenant) returns (ParticipantServiceResp) {}
  264. // listAvailable 集群动态信息列表
  265. rpc listPhyAvailable(ParticipantTenant) returns (ListParticipantAvailResp) {}
  266. // listPhyInformation 集群静态信息列表
  267. rpc listPhyInformation(ParticipantTenant) returns (ListParticipantPhyResp) {};
  268. // registerTenant 注册租户信息
  269. rpc registerTenant(TenantInfo) returns (TenantResp) {};
  270. // listTenant 租户列表信息
  271. rpc listTenant(TenantInfo) returns (ListTenantResp) {};
  272. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.