syntax = "proto3"; package pcmCore; option go_package = "/pcmCore"; message SyncInfoReq { int64 participantId = 1; repeated HpcInfo HpcInfoList = 2; repeated CloudInfo CloudInfoList = 3; repeated AiInfo AiInfoList = 4; repeated VmInfo VmInfoList = 5; } message AiInfo { int64 participantId = 1; int64 taskId = 2; string project_id = 3; string name = 4; string status = 5; string startTime = 6; int64 runningTime = 7; string result = 8; string jobId = 9; string createTime = 10; string imageUrl = 11; string command = 12; string flavorId = 13; string subscriptionId = 14; string itemVersionId = 15; } message CloudInfo { int64 participant = 1; int64 id = 2; int64 taskId = 3; string apiVersion = 4; string kind = 5; string namespace = 6; string name = 7; string status = 11; string startTime = 8; int64 runningTime = 9; string result = 10; string yamlString = 12; } message VmInfo { int64 participantId = 1; int64 taskId = 2; string name = 3; string flavor_ref = 4; string image_ref = 5; string network_uuid = 6; string block_uuid = 7; string source_type = 8; bool delete_on_termination = 9; string state = 10; } message HpcInfo { int64 participantId = 1; int64 taskId = 2; string jobId = 3; string name = 4; string status = 5; string startTime = 6; int64 runningTime = 7; string result = 8; string workDir = 9; string wallTime = 10; string cmdScript = 11; string derivedEs = 12; string cluster = 13; string blockId = 14; uint32 allocNodes = 15; uint32 allocCpu = 16; string version = 17; string account = 18; uint32 exitCode = 19; uint32 assocId = 20; string appType = 21; string appName = 22; string queue = 23; string submitType = 24; string nNode = 25; string stdOutFile = 26; string stdErrFile = 27; string stdInput = 28; string environment = 29; } message SyncInfoResp { int64 code = 1; string msg = 2; } message InfoListReq { int64 participantId = 1; } message InfoListResp { repeated HpcInfo HpcInfoList = 1; repeated CloudInfo CloudInfoList = 2; repeated AiInfo AiInfoList = 3; } // pcm core services service pcmCore { // SyncInfo Synchronous data information rpc SyncInfo(SyncInfoReq) returns (SyncInfoResp); // InfoList rpc InfoList(InfoListReq) returns (InfoListResp); } // participantTenant 租户信息 message ParticipantTenant { string tenantName = 1; // 租户名称 } // 集群标签 message ParticipantLabel { int64 id = 1; // id string key = 2; // 标签名 string value = 3; // 标签值 } enum MessageStatus { FAIL = 0; SUCCESS = 1; UNKNOWN = 2; } message HealthCheckResp { MessageStatus messageStatus = 1; int64 code = 2; string msg = 3; } message ParticipantPhyResp { int64 code = 1; string msg = 2; int64 participantId = 3; // participant 唯一标识 } // 集群静态信息返回 message ListParticipantPhyResp { int64 code = 1; string msg = 2; repeated ParticipantPhyReq ParticipantPhys = 3; } // participantPhy 静态信息 message ParticipantPhyReq { string name = 1; // 名称 string address = 2; // 地址 string networkType = 4; // 集群网络类型 string networkBandwidth = 5; // 集群网络带宽 string storageType = 6; // 集群存储类型 string storageSpace = 7; // 集群存储空间 string storageAvailSpace = 8; // 集群存储可用空间 string storageBandwidth = 9; // 集群存储带宽 string type = 10; // 参与者类型:CLOUD-数算集群;AI-智算集群;HPC-超算集群 int64 tenantId = 11; // 租户id string tenantName = 12; // 租户名称 repeated NodePhyInfo nodeInfo = 13; // 节点信息 int64 participantId = 14; // participant id repeated ParticipantLabel labelInfo = 15; // 标签信息 repeated QueuePhyInfo queueInfo = 16; // 队列信息 int64 id = 17; // id string MetricsUrl = 18; //监控url string RpcAddress = 19; string Token = 20; //token } // NodePhyInfo 节点信息 message NodePhyInfo { int64 id = 1; string nodeName = 2; // 节点名称 string osName = 3; // 系统名称 string osVersion = 4; // 系统版本 string archType = 5; // 架构类型 string archName = 6; // 架构名称 string archFreq = 7; // 架构频率 } // QueuePhyInfo 队列信息 message QueuePhyInfo { int64 id = 1; // id string aclHosts = 2; // 可用节点,多个节点用逗号隔开 string queueId = 3; // 队列名称 string text = 4 ; // 队列名称 string queueName = 5; //队列名称 string queNodes = 6; // 队列节点总数 string queMinNodect = 7; // 队列最小节点数 string queMaxNgpus = 8; // 队列最大GPU卡数 string queMaxPpn = 9; // 使用该队列作业最大CPU核心数 string queChargeRate = 10; // 费率 string queMaxNcpus = 11; // 用户最大可用核心数 string queMaxNdcus = 12; // 队列总DCU卡数 string queMinNcpus = 13; // 队列最小CPU核数 string queFreeNodes = 14; // 队列空闲节点数 string queMaxNodect = 15; // 队列作业最大节点数 string queMaxGpuPN = 16; // 队列单作业最大GPU卡数 string queMaxWalltime = 17; // 队列最大运行时间 string queMaxDcuPN = 18; // 队列单作业最大DCU卡数 string queNcpus = 19; //队列cpu数 string queFreeNcpus = 20; //队列空闲cpu数 } // ParticipantHeartbeatReq 心跳请求 message ParticipantHeartbeatReq { int64 participantId = 1; // participantId string address = 2; } // ParticipantAvailInfo Participant可用信息 message ParticipantAvailReq { int64 id = 1; // id int64 availStorageSpace = 2; // 集群存储可用空间 int64 userNum = 3; // 用户数量 int64 pendingJobNum = 4; // 待处理作业数量 int64 runningJobNum = 5; // 运行作业数量 int64 participantId = 6; // 集群静态信息id repeated NodeAvailInfo nodeAvailInfo = 7; // 节点可用信息 } // NodeAvailInfo 节点可用信息 message NodeAvailInfo { int64 id = 1; // id string nodeName = 2; // 节点名称 int64 cpuTotal = 3; // cpu核数 double cpuUsable = 4; // cpu可用率 int64 diskTotal = 5; // 磁盘空间 int64 diskAvail = 6; // 磁盘可用空间 int64 memTotal = 7; // 内存总数 int64 memAvail = 8; // 内存可用数 int64 gpuTotal = 9; // gpu总数 int64 gpuAvail = 10; // gpu可用数 int64 participantId = 11; // 集群动态信息id } // 集群可用信息 message ListParticipantAvailResp { int64 code = 1; string msg = 2; repeated ParticipantAvailReq ParticipantAvails = 3; } message ParticipantResp { int64 code = 1; string msg = 2; } message ParticipantServiceResp { int64 code = 1; string msg = 2; repeated ClientInfo data = 3; } message ClientInfo { string address = 1; // @gotags: redis:"address" int64 participantId = 2; // @gotags: redis:"participantId" string clientState = 3; // @gotags: redis:"clientState" int64 lastHeartbeat = 4; // @gotags: redis:"lastHeartbeat" } message TenantInfo { int64 id = 1; string tenantName = 2; string tenantDesc = 3; } message TenantResp { int64 code = 1; string msg = 2; int64 id = 3; } message ListTenantResp { int64 code = 1; string msg = 2; repeated TenantInfo tenantInfos = 3; } message ApplyListReq{ } message ApplyListResp{ repeated ApplyInfo infoList = 1; } message ApplyInfo{ string participantName = 1; string yamlString = 2; } // participant 参与者 service participantService { // registerParticipant Participant注册接口 rpc registerParticipant(ParticipantPhyReq) returns (ParticipantPhyResp) {}; // reportHeartbeat 心跳请求 rpc reportHeartbeat(ParticipantHeartbeatReq) returns (HealthCheckResp) {}; // reportAvailable 监控数据上报 rpc reportAvailable(ParticipantAvailReq) returns (ParticipantResp) {} // listParticipant 服务列表 rpc listParticipant(ParticipantTenant) returns (ParticipantServiceResp) {} // listAvailable 集群动态信息列表 rpc listPhyAvailable(ParticipantTenant) returns (ListParticipantAvailResp) {} // listPhyInformation 集群静态信息列表 rpc listPhyInformation(ParticipantTenant) returns (ListParticipantPhyResp) {}; // registerTenant 注册租户信息 rpc registerTenant(TenantInfo) returns (TenantResp) {}; // listTenant 租户列表信息 rpc listTenant(TenantInfo) returns (ListTenantResp) {}; // applyList 执行任务列表 rpc applyList(ApplyListReq) returns (ApplyListResp) {}; // DeleteList 删除任务列表 rpc deleteList(ApplyListReq) returns (ApplyListResp) {}; }