syntax = "proto3"; package pcmCore; option go_package = "/pcmCore"; message SyncInfoReq { int64 participantId = 1; string kind = 2; repeated HpcInfo HpcInfoList = 3; repeated CloudInfo CloudInfoList = 4; repeated AiInfo AiInfoList = 5; } message AiInfo { int64 participantId = 1; int64 taskId = 2; string project_id = 3; string name = 4; string status = 5; string startTime = 6; int64 runningTime = 7; string result = 8; string jobId = 9; string createTime = 10; string imageUrl = 11; string command = 12; string flavorId = 13; string subscriptionId = 14; string itemVersionId = 15; } message CloudInfo { int64 participant = 1; int64 taskId = 2; string apiVersion = 3; string kind = 4; string namespace = 5; string name = 6; string status = 7; string startTime = 8; int64 runningTime = 9; string result = 10; string yamlString = 11; } message HpcInfo { int64 participantId = 1; int64 taskId = 2; string jobId = 3; string name = 4; string status = 5; string startTime = 6; int64 runningTime = 7; string result = 8; string workDir = 9; string wallTime = 10; string cmdScript = 11; string derivedEs = 12; string cluster = 13; string blockId = 14; uint32 allocNodes = 15; uint32 allocCpu = 16; string version = 17; string account = 18; uint32 exitCode = 19; uint32 assocId = 20; string appType = 21; string appName = 22; string queue = 23; string submitType = 24; string nNode = 25; string stdOutFile = 26; string stdErrFile = 27; } message SyncInfoResp { int64 code = 1; string msg = 2; } message InfoListReq { string kind = 1; int64 participantId = 2; } message InfoListResp { repeated HpcInfo HpcInfoList = 1; repeated CloudInfo CloudInfoList = 2; repeated AiInfo AiInfoList = 3; } // pcm core services service pcmCore { // SyncInfo Synchronous data information rpc SyncInfo(SyncInfoReq) returns (SyncInfoResp); // InfoList rpc InfoList(InfoListReq) returns (InfoListResp); } // participantTenant 租户信息 message ParticipantTenant { string tenantName = 1; // 租户名称 } // 集群标签 message ParticipantLabel { int64 id = 1; // id string key = 2; // 标签名 string value = 3; // 标签值 } enum MessageStatus { FAIL = 0; SUCCESS = 1; UNKNOWN = 2; } message HealthCheckResp { MessageStatus messageStatus = 1; int64 code = 2; string msg = 3; } message ParticipantPhyResp { int64 code = 1; string msg = 2; int64 participantId = 3; // participant 唯一标识 } // 集群静态信息返回 message ListParticipantPhyResp { int64 code = 1; string msg = 2; repeated ParticipantPhyReq ParticipantPhys = 3; } // participantPhy 静态信息 message ParticipantPhyReq { string name = 1; // 名称 string address = 2; // 地址 string networkType = 4; // 集群网络类型 string networkBandwidth = 5; // 集群网络带宽 string storageType = 6; // 集群存储类型 string storageSpace = 7; // 集群存储空间 string storageAvailSpace = 8; // 集群存储可用空间 string storageBandwidth = 9; // 集群存储带宽 string type = 10; // 参与者类型:0-数算集群;1-智算集群;2-超算集群 int64 tenantId = 11; // 租户id string tenantName = 12; // 租户名称 repeated NodePhyInfo nodeInfo = 13; // 节点信息 int64 participantId = 14; // participant id repeated ParticipantLabel labelInfo = 15; // 标签信息 repeated QueuePhyInfo queueInfo = 16; // 队列信息 int64 id = 17; // id } // NodePhyInfo 节点信息 message NodePhyInfo { int64 id = 1; string nodeName = 2; // 节点名称 string osName = 3; // 系统名称 string osVersion = 4; // 系统版本 string archType = 5; // 架构类型 string archName = 6; // 架构名称 string archFreq = 7; // 架构频率 } // QueuePhyInfo 队列信息 message QueuePhyInfo { int64 id = 1; // id string aclHosts = 2; // 可用节点,多个节点用逗号隔开 string queueId = 3; // 队列名称 string text = 4 ; // 队列名称 string queueName = 5; //队列名称 string queNodes = 6; // 队列节点总数 string queMinNodect = 7; // 队列最小节点数 string queMaxNgpus = 8; // 队列最大GPU卡数 string queMaxPpn = 9; // 使用该队列作业最大CPU核心数 string queChargeRate = 10; // 费率 string queMaxNcpus = 11; // 用户最大可用核心数 string queMaxNdcus = 12; // 队列总DCU卡数 string queMinNcpus = 13; // 队列最小CPU核数 string queFreeNodes = 14; // 队列空闲节点数 string queMaxNodect = 15; // 队列作业最大节点数 string queMaxGpuPN = 16; // 队列单作业最大GPU卡数 string queMaxWalltime = 17; // 队列最大运行时间 string queMaxDcuPN = 18; // 队列单作业最大DCU卡数 string queNcpus = 19; //队列cpu数 string queFreeNcpus = 20; //队列空闲cpu数 } // ParticipantHeartbeatReq 心跳请求 message ParticipantHeartbeatReq { int64 participantId = 1; // participantId string Address = 2; } // ParticipantAvailInfo Participant可用信息 message ParticipantAvailReq { int64 id = 1; // id int64 availStorageSpace = 2; // 集群存储可用空间 int64 userNum = 3; // 用户数量 int64 pendingJobNum = 4; // 待处理作业数量 int64 runningJobNum = 5; // 运行作业数量 int64 participantId = 6; // 集群静态信息id repeated NodeAvailInfo nodeAvailInfo = 7; // 节点可用信息 } // NodeAvailInfo 节点可用信息 message NodeAvailInfo { int64 id = 1; // id string nodeName = 2; // 节点名称 int64 cpuTotal = 3; // cpu核数 double cpuUsable = 4; // cpu可用率 int64 diskTotal = 5; // 磁盘空间 int64 diskAvail = 6; // 磁盘可用空间 int64 memTotal = 7; // 内存总数 int64 memAvail = 8; // 内存可用数 int64 gpuTotal = 9; // gpu总数 int64 gpuAvail = 10; // gpu可用数 int64 participantAvailId = 11; // 集群动态信息id } // 集群可用信息 message ListParticipantAvailResp { int64 code = 1; string msg = 2; repeated ParticipantAvailReq ParticipantAvails = 3; } message ParticipantResp { int64 code = 1; string msg = 2; } message ParticipantServiceResp { int64 code = 1; string msg = 2; repeated ClientInfo data = 3; } message ClientInfo { string host = 1; // @gotags: redis:"host" string port = 2; // @gotags: redis:"port" int64 participantId = 3; // @gotags: redis:"participantId" } // participant 参与者 service participantService { // registerParticipant Participant注册接口 rpc registerParticipant(ParticipantPhyReq) returns (ParticipantPhyResp) {}; // reportHeartbeat 心跳请求 rpc reportHeartbeat(ParticipantHeartbeatReq) returns (HealthCheckResp) {}; // reportAvailable 监控数据上报 rpc reportAvailable(ParticipantAvailReq) returns (ParticipantResp) {} // listParticipant 服务列表 rpc listParticipant(ParticipantTenant) returns (ParticipantServiceResp) {} // listAvailable 集群动态信息列表 rpc listPhyAvailable(ParticipantTenant) returns (ListParticipantAvailResp) {} // listPhyInformation 集群静态信息列表 rpc listPhyInformation(ParticipantTenant) returns (ListParticipantPhyResp) {}; }