You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

pcmCore.proto 9.0 kB

2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. syntax = "proto3";
  2. package pcmCore;
  3. option go_package = "/pcmCore";
  4. message SyncInfoReq {
  5. int64 participantId = 1;
  6. repeated HpcInfo HpcInfoList = 2;
  7. repeated CloudInfo CloudInfoList = 3;
  8. repeated AiInfo AiInfoList = 4;
  9. repeated VmInfo VmInfoList = 5;
  10. }
  11. message AiInfo {
  12. int64 participantId = 1;
  13. int64 taskId = 2;
  14. string project_id = 3;
  15. string name = 4;
  16. string status = 5;
  17. string startTime = 6;
  18. int64 runningTime = 7;
  19. string result = 8;
  20. string jobId = 9;
  21. string createTime = 10;
  22. string imageUrl = 11;
  23. string command = 12;
  24. string flavorId = 13;
  25. string subscriptionId = 14;
  26. string itemVersionId = 15;
  27. }
  28. message CloudInfo {
  29. int64 participant = 1;
  30. int64 id = 2;
  31. int64 taskId = 3;
  32. string apiVersion = 4;
  33. string kind = 5;
  34. string namespace = 6;
  35. string name = 7;
  36. string status = 11;
  37. string startTime = 8;
  38. int64 runningTime = 9;
  39. string result = 10;
  40. string yamlString = 12;
  41. }
  42. message VmInfo {
  43. int64 participantId = 1;
  44. int64 taskId = 2;
  45. string name = 3;
  46. string flavor_ref = 4;
  47. string image_ref = 5;
  48. string network_uuid = 6;
  49. string block_uuid = 7;
  50. string source_type = 8;
  51. bool delete_on_termination = 9;
  52. string state = 10;
  53. }
  54. message HpcInfo {
  55. int64 participantId = 1;
  56. int64 taskId = 2;
  57. string jobId = 3;
  58. string name = 4;
  59. string status = 5;
  60. string startTime = 6;
  61. int64 runningTime = 7;
  62. string result = 8;
  63. string workDir = 9;
  64. string wallTime = 10;
  65. string cmdScript = 11;
  66. string derivedEs = 12;
  67. string cluster = 13;
  68. string blockId = 14;
  69. uint32 allocNodes = 15;
  70. uint32 allocCpu = 16;
  71. string version = 17;
  72. string account = 18;
  73. uint32 exitCode = 19;
  74. uint32 assocId = 20;
  75. string appType = 21;
  76. string appName = 22;
  77. string queue = 23;
  78. string submitType = 24;
  79. string nNode = 25;
  80. string stdOutFile = 26;
  81. string stdErrFile = 27;
  82. string stdInput = 28;
  83. string environment = 29;
  84. }
  85. message SyncInfoResp {
  86. int64 code = 1;
  87. string msg = 2;
  88. }
  89. message InfoListReq {
  90. int64 participantId = 1;
  91. }
  92. message InfoListResp {
  93. repeated HpcInfo HpcInfoList = 1;
  94. repeated CloudInfo CloudInfoList = 2;
  95. repeated AiInfo AiInfoList = 3;
  96. }
  97. // pcm core services
  98. service pcmCore {
  99. // SyncInfo Synchronous data information
  100. rpc SyncInfo(SyncInfoReq) returns (SyncInfoResp);
  101. // InfoList
  102. rpc InfoList(InfoListReq) returns (InfoListResp);
  103. }
  104. // participantTenant 租户信息
  105. message ParticipantTenant {
  106. string tenantName = 1; // 租户名称
  107. }
  108. // 集群标签
  109. message ParticipantLabel {
  110. int64 id = 1; // id
  111. string key = 2; // 标签名
  112. string value = 3; // 标签值
  113. }
  114. enum MessageStatus {
  115. FAIL = 0;
  116. SUCCESS = 1;
  117. UNKNOWN = 2;
  118. }
  119. message HealthCheckResp {
  120. MessageStatus messageStatus = 1;
  121. int64 code = 2;
  122. string msg = 3;
  123. }
  124. message ParticipantPhyResp {
  125. int64 code = 1;
  126. string msg = 2;
  127. int64 participantId = 3; // participant 唯一标识
  128. }
  129. // 集群静态信息返回
  130. message ListParticipantPhyResp {
  131. int64 code = 1;
  132. string msg = 2;
  133. repeated ParticipantPhyReq ParticipantPhys = 3;
  134. }
  135. // participantPhy 静态信息
  136. message ParticipantPhyReq {
  137. string name = 1; // 名称
  138. string address = 2; // 地址
  139. string networkType = 4; // 集群网络类型
  140. string networkBandwidth = 5; // 集群网络带宽
  141. string storageType = 6; // 集群存储类型
  142. string storageSpace = 7; // 集群存储空间
  143. string storageAvailSpace = 8; // 集群存储可用空间
  144. string storageBandwidth = 9; // 集群存储带宽
  145. string type = 10; // 参与者类型:CLOUD-数算集群;AI-智算集群;HPC-超算集群
  146. int64 tenantId = 11; // 租户id
  147. string tenantName = 12; // 租户名称
  148. repeated NodePhyInfo nodeInfo = 13; // 节点信息
  149. int64 participantId = 14; // participant id
  150. repeated ParticipantLabel labelInfo = 15; // 标签信息
  151. repeated QueuePhyInfo queueInfo = 16; // 队列信息
  152. int64 id = 17; // id
  153. string MetricsUrl = 18; //监控url
  154. string RpcAddress = 19;
  155. string Token = 20; //token
  156. }
  157. // NodePhyInfo 节点信息
  158. message NodePhyInfo {
  159. int64 id = 1;
  160. string nodeName = 2; // 节点名称
  161. string osName = 3; // 系统名称
  162. string osVersion = 4; // 系统版本
  163. string archType = 5; // 架构类型
  164. string archName = 6; // 架构名称
  165. string archFreq = 7; // 架构频率
  166. }
  167. // QueuePhyInfo 队列信息
  168. message QueuePhyInfo {
  169. int64 id = 1; // id
  170. string aclHosts = 2; // 可用节点,多个节点用逗号隔开
  171. string queueId = 3; // 队列名称
  172. string text = 4 ; // 队列名称
  173. string queueName = 5; //队列名称
  174. string queNodes = 6; // 队列节点总数
  175. string queMinNodect = 7; // 队列最小节点数
  176. string queMaxNgpus = 8; // 队列最大GPU卡数
  177. string queMaxPpn = 9; // 使用该队列作业最大CPU核心数
  178. string queChargeRate = 10; // 费率
  179. string queMaxNcpus = 11; // 用户最大可用核心数
  180. string queMaxNdcus = 12; // 队列总DCU卡数
  181. string queMinNcpus = 13; // 队列最小CPU核数
  182. string queFreeNodes = 14; // 队列空闲节点数
  183. string queMaxNodect = 15; // 队列作业最大节点数
  184. string queMaxGpuPN = 16; // 队列单作业最大GPU卡数
  185. string queMaxWalltime = 17; // 队列最大运行时间
  186. string queMaxDcuPN = 18; // 队列单作业最大DCU卡数
  187. string queNcpus = 19; //队列cpu数
  188. string queFreeNcpus = 20; //队列空闲cpu数
  189. }
  190. // ParticipantHeartbeatReq 心跳请求
  191. message ParticipantHeartbeatReq {
  192. int64 participantId = 1; // participantId
  193. string address = 2;
  194. }
  195. // ParticipantAvailInfo Participant可用信息
  196. message ParticipantAvailReq {
  197. int64 id = 1; // id
  198. int64 availStorageSpace = 2; // 集群存储可用空间
  199. int64 userNum = 3; // 用户数量
  200. int64 pendingJobNum = 4; // 待处理作业数量
  201. int64 runningJobNum = 5; // 运行作业数量
  202. int64 participantId = 6; // 集群静态信息id
  203. repeated NodeAvailInfo nodeAvailInfo = 7; // 节点可用信息
  204. }
  205. // NodeAvailInfo 节点可用信息
  206. message NodeAvailInfo {
  207. int64 id = 1; // id
  208. string nodeName = 2; // 节点名称
  209. int64 cpuTotal = 3; // cpu核数
  210. double cpuUsable = 4; // cpu可用率
  211. int64 diskTotal = 5; // 磁盘空间
  212. int64 diskAvail = 6; // 磁盘可用空间
  213. int64 memTotal = 7; // 内存总数
  214. int64 memAvail = 8; // 内存可用数
  215. int64 gpuTotal = 9; // gpu总数
  216. int64 gpuAvail = 10; // gpu可用数
  217. int64 participantId = 11; // 集群动态信息id
  218. }
  219. // 集群可用信息
  220. message ListParticipantAvailResp {
  221. int64 code = 1;
  222. string msg = 2;
  223. repeated ParticipantAvailReq ParticipantAvails = 3;
  224. }
  225. message ParticipantResp {
  226. int64 code = 1;
  227. string msg = 2;
  228. }
  229. message ParticipantServiceResp {
  230. int64 code = 1;
  231. string msg = 2;
  232. repeated ClientInfo data = 3;
  233. }
  234. message ClientInfo {
  235. string address = 1; // @gotags: redis:"address"
  236. int64 participantId = 2; // @gotags: redis:"participantId"
  237. string clientState = 3; // @gotags: redis:"clientState"
  238. int64 lastHeartbeat = 4; // @gotags: redis:"lastHeartbeat"
  239. }
  240. message TenantInfo {
  241. int64 id = 1;
  242. string tenantName = 2;
  243. string tenantDesc = 3;
  244. }
  245. message TenantResp {
  246. int64 code = 1;
  247. string msg = 2;
  248. int64 id = 3;
  249. }
  250. message ListTenantResp {
  251. int64 code = 1;
  252. string msg = 2;
  253. repeated TenantInfo tenantInfos = 3;
  254. }
  255. message ApplyListReq{
  256. }
  257. message ApplyListResp{
  258. repeated ApplyInfo infoList = 1;
  259. }
  260. message ApplyInfo{
  261. string participantName = 1;
  262. string yamlString = 2;
  263. }
  264. // participant 参与者
  265. service participantService {
  266. // registerParticipant Participant注册接口
  267. rpc registerParticipant(ParticipantPhyReq) returns (ParticipantPhyResp) {};
  268. // reportHeartbeat 心跳请求
  269. rpc reportHeartbeat(ParticipantHeartbeatReq) returns (HealthCheckResp) {};
  270. // reportAvailable 监控数据上报
  271. rpc reportAvailable(ParticipantAvailReq) returns (ParticipantResp) {}
  272. // listParticipant 服务列表
  273. rpc listParticipant(ParticipantTenant) returns (ParticipantServiceResp) {}
  274. // listAvailable 集群动态信息列表
  275. rpc listPhyAvailable(ParticipantTenant) returns (ListParticipantAvailResp) {}
  276. // listPhyInformation 集群静态信息列表
  277. rpc listPhyInformation(ParticipantTenant) returns (ListParticipantPhyResp) {};
  278. // registerTenant 注册租户信息
  279. rpc registerTenant(TenantInfo) returns (TenantResp) {};
  280. // listTenant 租户列表信息
  281. rpc listTenant(TenantInfo) returns (ListTenantResp) {};
  282. // applyList 执行任务列表
  283. rpc applyList(ApplyListReq) returns (ApplyListResp) {};
  284. // DeleteList 删除任务列表
  285. rpc deleteList(ApplyListReq) returns (ApplyListResp) {};
  286. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.