Browse Source

release v2.0

tags/v2.0^0
yeyue 3 years ago
parent
commit
10777e95e7
100 changed files with 3045 additions and 193 deletions
  1. +41
    -0
      dubhe-server/admin/src/main/java/org/dubhe/admin/client/ResourceQuotaClient.java
  2. +33
    -0
      dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/ResourceQuotaClientFallback.java
  3. +36
    -0
      dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserConfigMapper.java
  4. +53
    -0
      dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserConfigDTO.java
  5. +48
    -0
      dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/Config.java
  6. +53
    -0
      dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/UserConfig.java
  7. +37
    -0
      dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserConfigCreateVO.java
  8. +48
    -0
      dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserConfigVO.java
  9. +15
    -0
      dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserController.java
  10. +18
    -0
      dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserService.java
  11. +1
    -1
      dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/ResourceSpecsServiceImpl.java
  12. +92
    -3
      dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserServiceImpl.java
  13. +1
    -1
      dubhe-server/admin/src/main/resources/bootstrap.yml
  14. +100
    -0
      dubhe-server/admin/src/main/resources/mapper/UserConfigMapper.xml
  15. +1
    -1
      dubhe-server/auth/src/main/resources/bootstrap.yml
  16. +9
    -0
      dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/ApplicationNameConst.java
  17. +1
    -1
      dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/AuthConst.java
  18. +1
    -0
      dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/MagicNumConstant.java
  19. +1
    -0
      dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/NumberConstant.java
  20. +12
    -0
      dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/Permissions.java
  21. +6
    -2
      dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/StringConstant.java
  22. +2
    -0
      dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/SymbolConstant.java
  23. +10
    -0
      dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/UserConstant.java
  24. +5
    -0
      dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/context/UserContext.java
  25. +53
    -0
      dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/SysUserConfigDTO.java
  26. +5
    -0
      dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserDTO.java
  27. +6
    -1
      dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/BizEnum.java
  28. +7
    -1
      dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/ImageTypeEnum.java
  29. +60
    -0
      dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/ResultUtil.java
  30. +139
    -0
      dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/NoteBookVO.java
  31. +84
    -0
      dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/WebsocketDataResponseBody.java
  32. +1
    -1
      dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/api/FileStoreApi.java
  33. +3
    -1
      dubhe-server/common-biz/log/src/main/java/org/dubhe/biz/log/enums/LogEnum.java
  34. +15
    -15
      dubhe-server/common-biz/log/src/main/resources/logback.xml
  35. +0
    -1
      dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/AdminClient.java
  36. +0
    -7
      dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-data.yml
  37. +5
    -0
      dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-dev.yml
  38. +9
    -0
      dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-pre.yml
  39. +5
    -0
      dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-test.yml
  40. +0
    -7
      dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-dev.yml
  41. +0
    -7
      dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-prod.yml
  42. +15
    -0
      dubhe-server/common-k8s/pom.xml
  43. +68
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/docker/api/DockerApi.java
  44. +112
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/docker/api/impl/DockerApiImpl.java
  45. +97
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/docker/callback/TerminalPushImageResultCallback.java
  46. +57
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/docker/config/DockerClientFactory.java
  47. +42
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/docker/config/DubheDockerJavaConfig.java
  48. +26
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/docker/constant/DockerCallbackConstant.java
  49. +62
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/docker/domain/dto/DockerPushCallbackDTO.java
  50. +40
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/docker/enums/DockerOperationEnum.java
  51. +83
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/docker/utils/DockerCallbackTool.java
  52. +8
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/PersistentVolumeClaimApi.java
  53. +52
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/TerminalApi.java
  54. +4
    -2
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DistributeTrainApiImpl.java
  55. +3
    -1
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DubheDeploymentApiImpl.java
  56. +57
    -4
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/JupyterResourceApiImpl.java
  57. +118
    -61
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/MetricsApiImpl.java
  58. +3
    -1
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelOptJobApiImpl.java
  59. +3
    -3
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelServingApiImpl.java
  60. +9
    -4
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/NamespaceApiImpl.java
  61. +13
    -1
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/PersistentVolumeClaimApiImpl.java
  62. +2
    -2
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ResourceQuotaApiImpl.java
  63. +205
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TerminalApiImpl.java
  64. +8
    -6
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TrainJobApiImpl.java
  65. +1
    -1
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/VolumeApiImpl.java
  66. +51
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/cache/ResourceCache.java
  67. +45
    -3
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/config/K8sConfig.java
  68. +4
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/constant/K8sLabelConstants.java
  69. +5
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/constant/K8sParamConstants.java
  70. +9
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/BuildServiceBO.java
  71. +168
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DeploymentBO.java
  72. +4
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DistributeTrainBO.java
  73. +4
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/ModelServingBO.java
  74. +132
    -26
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PrometheusMetricBO.java
  75. +4
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtDeploymentBO.java
  76. +4
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJobBO.java
  77. +6
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterJobBO.java
  78. +12
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterResourceBO.java
  79. +20
    -2
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtMountDirBO.java
  80. +11
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtPersistentVolumeClaimBO.java
  81. +1
    -1
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtResourceQuotaBO.java
  82. +14
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/TerminalBO.java
  83. +3
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizContainerStatus.java
  84. +8
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizIntOrString.java
  85. +45
    -2
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizPod.java
  86. +21
    -9
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizQuantity.java
  87. +1
    -1
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizResourceQuota.java
  88. +20
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizService.java
  89. +46
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizServicePort.java
  90. +54
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuMetricsDataResultVO.java
  91. +44
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuTotalMemResultVO.java
  92. +52
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuValueVO.java
  93. +2
    -2
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/PodRangeMetricsVO.java
  94. +2
    -10
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/PtPodsVO.java
  95. +42
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/TerminalResourceVO.java
  96. +20
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/VolumeVO.java
  97. +4
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/BusinessLabelServiceNameEnum.java
  98. +40
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/ServiceTypeENum.java
  99. +42
    -0
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/WebsocketTopicEnum.java
  100. +1
    -1
      dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/K8sUtils.java

+ 41
- 0
dubhe-server/admin/src/main/java/org/dubhe/admin/client/ResourceQuotaClient.java View File

@@ -0,0 +1,41 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/
package org.dubhe.admin.client;

import org.dubhe.admin.client.fallback.ResourceQuotaClientFallback;
import org.dubhe.admin.domain.dto.UserConfigDTO;
import org.dubhe.biz.base.constant.ApplicationNameConst;
import org.dubhe.biz.base.vo.DataResponseBody;
import org.springframework.cloud.openfeign.FeignClient;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;

/**
* @description 远程调用资源配额 Client
* @date 2021-7-21
*/
@FeignClient(value = ApplicationNameConst.SERVER_K8S,fallback = ResourceQuotaClientFallback.class)
public interface ResourceQuotaClient {
/**
* 更新 ResourceQuota
*
* @param userConfigDTO 用户配置信息
* @return
*/
@PostMapping(value = "/resourceQuota/update")
DataResponseBody updateResourceQuota(@RequestBody UserConfigDTO userConfigDTO);
}

+ 33
- 0
dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/ResourceQuotaClientFallback.java View File

@@ -0,0 +1,33 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/
package org.dubhe.admin.client.fallback;

import org.dubhe.admin.client.ResourceQuotaClient;
import org.dubhe.admin.domain.dto.UserConfigDTO;
import org.dubhe.biz.base.vo.DataResponseBody;
import org.dubhe.biz.dataresponse.factory.DataResponseFactory;

/**
* @description ResourceQuotaClient 熔断处理
* @date 2021-7-21
*/
public class ResourceQuotaClientFallback implements ResourceQuotaClient {
@Override
public DataResponseBody updateResourceQuota(UserConfigDTO userConfigDTO) {
return DataResponseFactory.failed("Call ResourceQuota server updateResourceQuota error");
}
}

+ 36
- 0
dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserConfigMapper.java View File

@@ -0,0 +1,36 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/
package org.dubhe.admin.dao;

import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import org.apache.ibatis.annotations.Select;
import org.dubhe.admin.domain.entity.UserConfig;
import java.util.List;
import org.apache.ibatis.annotations.Param;

/**
* @description 用户配置 Mapper
* @date 2021-6-30
*/
public interface UserConfigMapper extends BaseMapper<UserConfig> {

/**
* 插入或更新配置
* @param userConfig 用户配置
*/
Long insertOrUpdate(UserConfig userConfig);
}

+ 53
- 0
dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserConfigDTO.java View File

@@ -0,0 +1,53 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/
package org.dubhe.admin.domain.dto;

import io.swagger.annotations.ApiModelProperty;
import lombok.Data;
import org.hibernate.validator.constraints.Length;

import javax.validation.constraints.NotNull;
import java.io.Serializable;

/**
* @description 用户配置DTO
* @date 2021-7-1
*/
@Data
public class UserConfigDTO implements Serializable {
private static final long serialVersionUID = 1L;

@NotNull(message = "用户 ID 不能为空")
@ApiModelProperty("用户 ID")
private Long userId;

@NotNull(message = "Notebook 延迟删除时间配置不能为空")
@ApiModelProperty("Notebook 延迟删除时间配置,单位:小时")
private Integer notebookDelayDeleteTime;

@NotNull(message = "CPU 资源限制配置不能为空")
@ApiModelProperty("CPU 资源限制,单位:核")
private Integer cpuLimit;

@NotNull(message = "内存资源限制配置不能为空")
@ApiModelProperty("内存资源限制,单位:Gi")
private Integer memoryLimit;

@NotNull(message = "GPU 资源限制配置不能为空")
@ApiModelProperty("GPU 资源限制,单位:块")
private Integer gpuLimit;
}

+ 48
- 0
dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/Config.java View File

@@ -0,0 +1,48 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/
package org.dubhe.admin.domain.entity;

import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
import lombok.experimental.Accessors;
import org.dubhe.biz.db.entity.BaseEntity;

/**
* @description 配置实体
* @date 2021-06-30
*/
@Data
@TableName("config")
@Accessors(chain = true)
public class Config extends BaseEntity {

@TableId(value = "id", type = IdType.AUTO)
private Long id;

@TableField(value = "name")
private String name;

@TableField(value = "default_value")
private Integer defaultValue;

@TableField(value = "description")
private String description;

}

+ 53
- 0
dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/UserConfig.java View File

@@ -0,0 +1,53 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/
package org.dubhe.admin.domain.entity;

import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
import lombok.experimental.Accessors;
import org.dubhe.biz.db.entity.BaseEntity;

/**
* @description 用户配置实体
* @date 2021-06-30
*/
@Data
@TableName("user_config")
@Accessors(chain = true)
public class UserConfig extends BaseEntity {

@TableId(value = "id", type = IdType.AUTO)
private Long id;

@TableId(value = "user_id")
private Long userId;

@TableId(value = "notebook_delay_delete_time")
private Integer notebookDelayDeleteTime;

@TableId(value = "cpu_limit")
private Integer cpuLimit;

@TableId(value = "memory_limit")
private Integer memoryLimit;

@TableId(value = "gpu_limit")
private Integer gpuLimit;
}

+ 37
- 0
dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserConfigCreateVO.java View File

@@ -0,0 +1,37 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/
package org.dubhe.admin.domain.vo;

import io.swagger.annotations.ApiModelProperty;
import lombok.Data;
import lombok.experimental.Accessors;

import java.io.Serializable;

/**
* @description 用户配置创建返回 ID
* @date 2021-7-2
*/
@Data
@Accessors(chain = true)
public class UserConfigCreateVO implements Serializable{
private static final long serialVersionUID = 1L;

@ApiModelProperty(value = "用户配置 ID")
private Long id;

}

+ 48
- 0
dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserConfigVO.java View File

@@ -0,0 +1,48 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/
package org.dubhe.admin.domain.vo;

import io.swagger.annotations.ApiModelProperty;
import lombok.Data;
import lombok.experimental.Accessors;

import java.io.Serializable;

/**
* @description 用户配置 VO
* @date 2021-7-1
*/
@Data
@Accessors(chain = true)
public class UserConfigVO implements Serializable {
private static final long serialVersionUID = 1L;

@ApiModelProperty("用户 ID")
private Long userId;

@ApiModelProperty("Notebook 延迟删除时间配置,单位:小时")
private Integer notebookDelayDeleteTime;

@ApiModelProperty("CPU 资源限制,单位:核")
private Integer cpuLimit;

@ApiModelProperty("内存资源限制,单位:Gi")
private Integer memoryLimit;

@ApiModelProperty("GPU 资源限制,单位:块")
private Integer gpuLimit;
}

+ 15
- 0
dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserController.java View File

@@ -19,6 +19,7 @@ package org.dubhe.admin.rest;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import org.dubhe.admin.domain.dto.UserConfigDTO;
import org.dubhe.admin.domain.dto.UserCreateDTO;
import org.dubhe.admin.domain.dto.UserDeleteDTO;
import org.dubhe.admin.domain.dto.UserQueryDTO;
@@ -30,6 +31,7 @@ import org.dubhe.biz.base.dto.UserDTO;
import org.dubhe.biz.base.vo.DataResponseBody;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.validation.annotation.Validated;
import org.springframework.web.bind.annotation.*;

import javax.servlet.http.HttpServletResponse;
@@ -84,6 +86,19 @@ public class UserController {
return new DataResponseBody();
}

@ApiOperation("根据用户ID查询用户配置")
@GetMapping(value = "/getUserConfig")
public DataResponseBody getUserConfig(@RequestParam(value = "userId") Long userId) {
return new DataResponseBody(userService.findUserConfig(userId));
}

@ApiOperation("新增或修改用户配置")
@PutMapping(value = "/setUserConfig")
@PreAuthorize(Permissions.USER_CONFIG_EDIT)
public DataResponseBody setUserConfig(@Validated @RequestBody UserConfigDTO userConfigDTO) {
return new DataResponseBody(userService.createOrUpdateUserConfig(userConfigDTO));
}

/**
* 此接口提供给Auth模块获取用户信息使用
* 因Auth获取用户信息在登录时是未登录状态,请不要在此添加权限校验


+ 18
- 0
dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserService.java View File

@@ -20,6 +20,8 @@ import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.baomidou.mybatisplus.extension.service.IService;
import org.dubhe.admin.domain.dto.*;
import org.dubhe.admin.domain.entity.User;
import org.dubhe.admin.domain.vo.UserConfigCreateVO;
import org.dubhe.admin.domain.vo.UserConfigVO;
import org.dubhe.biz.base.dto.TeamDTO;
import org.dubhe.biz.base.dto.UserDTO;
import org.dubhe.biz.base.vo.DataResponseBody;
@@ -221,4 +223,20 @@ public interface UserService extends AdminUserService, IService<User> {
* @return org.dubhe.domain.dto.UserDTO 用户信息DTO集合
*/
List<UserDTO> getUserList(List<Long> ids);

/**
* 根据用户 ID 查询用户配置
*
* @param userId 用户 ID
* @return org.dubhe.admin.domain.vo.UserConfigVO 用户配置 VO
*/
UserConfigVO findUserConfig(Long userId);

/**
* 创建或更新用户配置
*
* @param userConfigDTO 用户配置
* @return org.dubhe.admin.domain.vo.UserConfigCreateVO 用户配置 VO
*/
UserConfigCreateVO createOrUpdateUserConfig(UserConfigDTO userConfigDTO);
}

+ 1
- 1
dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/ResourceSpecsServiceImpl.java View File

@@ -67,7 +67,7 @@ public class ResourceSpecsServiceImpl implements ResourceSpecsService {
public Map<String, Object> getResourceSpecs(ResourceSpecsQueryDTO resourceSpecsQueryDTO) {
Page page = resourceSpecsQueryDTO.toPage();
//排序字段
String sort = null == resourceSpecsQueryDTO.getSort() ? StringConstant.CREATE_TIME_SQL : resourceSpecsQueryDTO.getSort();
String sort = null == resourceSpecsQueryDTO.getSort() ? StringConstant.ID : resourceSpecsQueryDTO.getSort();
QueryWrapper<ResourceSpecs> queryResourceSpecsWrapper = new QueryWrapper<>();
queryResourceSpecsWrapper.like(resourceSpecsQueryDTO.getSpecsName() != null, "specs_name", resourceSpecsQueryDTO.getSpecsName())
.eq(resourceSpecsQueryDTO.getResourcesPoolType() != null, "resources_pool_type", resourceSpecsQueryDTO.getResourcesPoolType())


+ 92
- 3
dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserServiceImpl.java View File

@@ -22,17 +22,22 @@ import cn.hutool.crypto.asymmetric.KeyType;
import cn.hutool.crypto.asymmetric.RSA;
import com.alibaba.fastjson.JSONObject;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import org.dubhe.admin.client.AuthServiceClient;
import org.dubhe.admin.client.ResourceQuotaClient;
import org.dubhe.admin.dao.*;
import org.dubhe.admin.domain.dto.*;
import org.dubhe.admin.domain.entity.Role;
import org.dubhe.admin.domain.entity.User;
import org.dubhe.admin.domain.entity.UserAvatar;
import org.dubhe.admin.domain.entity.UserConfig;
import org.dubhe.admin.domain.entity.UserRole;
import org.dubhe.admin.domain.vo.EmailVo;
import org.dubhe.admin.domain.vo.UserConfigCreateVO;
import org.dubhe.admin.domain.vo.UserConfigVO;
import org.dubhe.admin.domain.vo.UserVO;
import org.dubhe.admin.enums.UserMailCodeEnum;
import org.dubhe.admin.event.EmailEventPublisher;
@@ -92,6 +97,18 @@ public class UserServiceImpl extends ServiceImpl<UserMapper, User> implements Us
@Value("${initial_password}")
private String initialPassword;

@Value("${user.config.notebook-delay-delete-time}")
private Integer defaultNotebookDelayDeleteTime;

@Value("${user.config.cpu-limit}")
private Integer cpuLimit;

@Value("${user.config.memory-limit}")
private Integer memoryLimit;

@Value("${user.config.gpu-limit}")
private Integer gpuLimit;

@Autowired
private UserMapper userMapper;

@@ -130,6 +147,13 @@ public class UserServiceImpl extends ServiceImpl<UserMapper, User> implements Us
@Autowired
private PermissionMapper permissionMapper;

@Autowired
private UserConfigMapper userConfigMapper;

@Autowired
ResourceQuotaClient resourceQuotaClient;


/**
* 测试标识 true:允许debug false:拒绝debug
*/
@@ -224,7 +248,15 @@ public class UserServiceImpl extends ServiceImpl<UserMapper, User> implements Us
for (Role role : resources.getRoles()) {
roleMapper.tiedUserRole(user.getId(), role.getId());
}

UserConfigDTO userConfigDTO = new UserConfigDTO();
userConfigDTO.setUserId(user.getId());
userConfigDTO.setCpuLimit(cpuLimit);
userConfigDTO.setMemoryLimit(memoryLimit);
userConfigDTO.setGpuLimit(gpuLimit);
DataResponseBody dataResponseBody = resourceQuotaClient.updateResourceQuota(userConfigDTO);
if (!dataResponseBody.succeed()){
throw new BusinessException("用户配置更新失败");
}
return userConvert.toDto(user);
}

@@ -316,10 +348,24 @@ public class UserServiceImpl extends ServiceImpl<UserMapper, User> implements Us
return sysRoleDTO;
}).collect(Collectors.toList()));
}
//获取用户配置
SysUserConfigDTO sysUserConfigDTO = getUserConfig(user.getId());
dto.setUserConfig(sysUserConfigDTO);
return dto;

}

private SysUserConfigDTO getUserConfig(Long userId) {
UserConfig userConfig = userConfigMapper.selectOne(new QueryWrapper<>(new UserConfig().setUserId(userId)));
SysUserConfigDTO sysUserConfigDTO= new SysUserConfigDTO();
if (userConfig == null){
return sysUserConfigDTO.setCpuLimit(cpuLimit).setMemoryLimit(memoryLimit)
.setGpuLimit(gpuLimit).setNotebookDelayDeleteTime(defaultNotebookDelayDeleteTime);
}
BeanUtils.copyProperties(userConfig, sysUserConfigDTO);
return sysUserConfigDTO;
}


/**
* 修改用户个人中心信息
@@ -695,6 +741,48 @@ public class UserServiceImpl extends ServiceImpl<UserMapper, User> implements Us
return userConvert.toDto(users);
}

/**
* 根据用户 ID 查询用户配置
*
* @param userId 用户 ID
* @return org.dubhe.admin.domain.vo.UserConfigVO 用户配置 VO
*/
@Override
public UserConfigVO findUserConfig(Long userId) {
// 查询用户配置
UserConfig userConfig = userConfigMapper.selectOne(new QueryWrapper<>(new UserConfig().setUserId(userId)));
UserConfigVO userConfigVO = new UserConfigVO();
// 如果用户配置为空,则返回
if (userConfig == null){
return userConfigVO.setUserId(userId).setCpuLimit(cpuLimit).setMemoryLimit(memoryLimit)
.setGpuLimit(gpuLimit).setNotebookDelayDeleteTime(defaultNotebookDelayDeleteTime);
}
// 封装用户配置 VO
BeanUtils.copyProperties(userConfig, userConfigVO);
return userConfigVO;
}

/**
* 创建或更新用户配置
*
* @param userConfigDTO 用户配置
* @return org.dubhe.admin.domain.vo.UserConfigCreateVO 用户配置 VO
*/
@Override
@Transactional(rollbackFor = Exception.class)
public UserConfigCreateVO createOrUpdateUserConfig(UserConfigDTO userConfigDTO) {
DataResponseBody dataResponseBody = resourceQuotaClient.updateResourceQuota(userConfigDTO);
if (!dataResponseBody.succeed()){
throw new BusinessException("用户配置更新失败");
}
UserConfig userConfig = new UserConfig();
BeanUtils.copyProperties(userConfigDTO, userConfig);
userConfigMapper.insertOrUpdate(userConfig);
// 封装用户配置 VO
UserConfigCreateVO userConfigCreateVO = new UserConfigCreateVO().setId(userConfig.getId());
return userConfigCreateVO;
}


/**
* 校验验证码
@@ -900,8 +988,9 @@ public class UserServiceImpl extends ServiceImpl<UserMapper, User> implements Us
}).collect(Collectors.toList());
dto.setRoles(roleDTOS);
}


//获取用户配置
SysUserConfigDTO sysUserConfigDTO = getUserConfig(user.getId());
dto.setUserConfig(sysUserConfigDTO);
return DataResponseFactory.success(dto);
}
}

+ 1
- 1
dubhe-server/admin/src/main/resources/bootstrap.yml View File

@@ -31,7 +31,7 @@ spring:
refresh: true
discovery:
enabled: true
namespace: dubhe-server-cloud-prod
namespace: dubhe-server-cloud-dev
group: dubhe
server-addr: 127.0.0.1:8848


+ 100
- 0
dubhe-server/admin/src/main/resources/mapper/UserConfigMapper.xml View File

@@ -0,0 +1,100 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
<mapper namespace="org.dubhe.admin.dao.UserConfigMapper">

<!--保存用户配置-->
<insert id="insertOrUpdate" parameterType="org.dubhe.admin.domain.entity.UserConfig">
<selectKey resultType="java.lang.Long" order="AFTER" keyProperty="id">
SELECT id FROM user_config WHERE user_id = #{userId}
</selectKey>
insert into user_config
<trim prefix="(" suffix=")" suffixOverrides=",">
<if test="userId != null">
user_id,
</if>
<if test="notebookDelayDeleteTime != null">
notebook_delay_delete_time,
</if>
<if test="cpuLimit != null">
cpu_limit,
</if>
<if test="memoryLimit != null">
memory_limit,
</if>
<if test="gpuLimit != null">
gpu_limit,
</if>
<if test="createUserId != null">
create_user_id,
</if>
<if test="updateTime != null">
update_time,
</if>
<if test="updateUserId != null">
update_user_id,
</if>
<if test="deleted != null">
deleted,
</if>
</trim>
<trim prefix="values (" suffix=")" suffixOverrides=",">
<if test="userId != null">
#{userId},
</if>
<if test="notebookDelayDeleteTime != null">
#{notebookDelayDeleteTime},
</if>
<if test="cpuLimit != null">
#{cpuLimit},
</if>
<if test="memoryLimit != null">
#{memoryLimit},
</if>
<if test="gpuLimit != null">
#{gpuLimit},
</if>
<if test="createUserId != null">
#{createUserId},
</if>
<if test="updateTime != null">
#{updateTime},
</if>
<if test="updateUserId != null">
#{updateUserId},
</if>
<if test="deleted != null">
#{deleted},
</if>
</trim>
ON DUPLICATE KEY UPDATE
<trim suffixOverrides=",">
<if test="notebookDelayDeleteTime != null">
notebook_delay_delete_time = #{notebookDelayDeleteTime},
</if>
<if test="cpuLimit != null">
cpu_limit = #{cpuLimit},
</if>
<if test="memoryLimit != null">
memory_limit = #{memoryLimit},
</if>
<if test="gpuLimit != null">
gpu_limit = #{gpuLimit},
</if>
<if test="createTime != null">
create_time = #{createTime},
</if>
<if test="createUserId != null">
create_user_id = #{createUserId},
</if>
<if test="updateTime != null">
update_time = #{updateTime},
</if>
<if test="updateUserId != null">
update_user_id = #{updateUserId},
</if>
<if test="deleted != null">
deleted = #{deleted},
</if>
</trim>
</insert>
</mapper>

+ 1
- 1
dubhe-server/auth/src/main/resources/bootstrap.yml View File

@@ -23,7 +23,7 @@ spring:
refresh: true
discovery:
enabled: true
namespace: dubhe-server-cloud-prod
namespace: dubhe-server-cloud-dev
group: dubhe
server-addr: 127.0.0.1:8848


+ 9
- 0
dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/ApplicationNameConst.java View File

@@ -81,4 +81,13 @@ public class ApplicationNameConst {
*/
public final static String SERVER_DATA_DCM = "dubhe-data-dcm";

/**
* k8s
*/
public final static String SERVER_K8S = "dubhe-k8s";

/**
* 专业版终端
*/
public final static String TERMINAL = "dubhe-terminal";
}

+ 1
- 1
dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/AuthConst.java View File

@@ -56,7 +56,7 @@ public class AuthConst {
public final static String[] DEFAULT_PERMIT_PATHS = {"/swagger**/**", "/webjars/**", "/v2/api-docs/**", "/doc.html/**",
"/users/findUserByUsername", "/auth/login", "/auth/code",
"/datasets/files/annotations/auto","/datasets/versions/**/convert/finish", "/datasets/enhance/finish",
"/auth/getCodeBySentEmail","/auth/userRegister",
"/auth/getCodeBySentEmail","/auth/userRegister","/ws/**",
StringConstant.RECYCLE_CALL_URI+"**"
};



+ 1
- 0
dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/MagicNumConstant.java View File

@@ -39,6 +39,7 @@ public final class MagicNumConstant {
public static final int ELEVEN = 11;
public static final int SIXTEEN = 16;
public static final int TWENTY = 20;
public static final int TWENTY_TWO = 22;
public static final int THIRTY_TWO = 32;
public static final int FIFTY = 50;
public static final int SIXTY = 60;


+ 1
- 0
dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/NumberConstant.java View File

@@ -43,6 +43,7 @@ public class NumberConstant {
public final static int HOUR_SECOND = 60 * 60;
public final static int DAY_SECOND = 60 * 60 * 24;
public final static int WEEK_SECOND = 60 * 60 * 24 * 7;
public final static int MONTH_SECOND = 60 * 60 * 24 * 30;
public final static int MAX_PAGE_SIZE = 2000;
public final static int MAX_MESSAGE_LENGTH = 1024 * 1024 * 1024;
}

+ 12
- 0
dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/Permissions.java View File

@@ -140,6 +140,8 @@ public final class Permissions {
public static final String USER_EDIT = "hasAuthority('ROLE_system:user:edit')";
public static final String USER_DELETE = "hasAuthority('ROLE_system:user:delete')";
public static final String USER_DOWNLOAD = "hasAuthority('ROLE_system:user:download')";
public static final String USER_CONFIG_EDIT = "hasAuthority('ROLE_system:user:configEdit')";
public static final String USER_RESOURCE_INFO = "hasAuthority('ROLE_system:user:resourceInfo')";

/**
* 控制台:角色管理
@@ -200,6 +202,16 @@ public final class Permissions {
public static final String SPECS_EDIT = "hasAuthority('ROLE_system:specs:edit')";
public static final String SPECS_DELETE = "hasAuthority('ROLE_system:specs:delete')";

/**
* 专业版:终端
*/
public static final String TERMINAL_CREATE = "hasAuthority('ROLE_terminal:specs:create')";
public static final String TERMINAL_RESTART = "hasAuthority('ROLE_terminal:specs:restart')";
public static final String TERMINAL_PRESAVE = "hasAuthority('ROLE_terminal:specs:save')";
public static final String TERMINAL_DELETE = "hasAuthority('ROLE_terminal:specs:delete')";
public static final String TERMINAL_DETAIL = "hasAuthority('ROLE_terminal:specs:detail')";
public static final String TERMINAL_LIST = "hasAuthority('ROLE_terminal:specs:list')";

private Permissions() {
}
}

+ 6
- 2
dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/StringConstant.java View File

@@ -79,6 +79,9 @@ public final class StringConstant {
public static final String RECYCLE_CALL_URI = "/api/recycle/call/";
public static final String K8S_CALLBACK_PATH_DEPLOYMENT = "/api/k8s/callback/deployment";
public static final String MULTIPART = "multipart/form-data";

public static final String PIP_SITE_PACKAGE ="pip-site-package";

/**
* 分页内容
*/
@@ -105,9 +108,10 @@ public final class StringConstant {
public static final String STEP_LOW = "step";

/**
* 测试环境
* 任务缓存
*/
public static final String PROFILE_ACTIVE_TEST = "test";
public static final String CACHE_TASK_ID ="task_id";
public static final String CACHE_TASK_NAME ="task_name";


private StringConstant() {


+ 2
- 0
dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/SymbolConstant.java View File

@@ -48,6 +48,8 @@ public class SymbolConstant {
public static final String EVENT_SEPARATOR = "&&";
public static final String POST = "POST";
public static final String HTTP_SLASH = "http://";
public static final String PORT = "port";
public static final String LOCAL_HOST = "localhost";

private SymbolConstant() {
}


+ 10
- 0
dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/UserConstant.java View File

@@ -91,4 +91,14 @@ public class UserConstant {
*/
public final static int REGISTER_ROLE_ID = 2;

/**
* 默认资源用户ID
*/
public final static Long DEFAULT_ORIGIN_USER_ID = 0L;

/**
* 默认创建人ID
*/
public final static Long DEFAULT_CREATE_USER_ID = 0L;

}

+ 5
- 0
dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/context/UserContext.java View File

@@ -18,6 +18,7 @@ package org.dubhe.biz.base.context;

import lombok.Data;
import org.dubhe.biz.base.dto.SysRoleDTO;
import org.dubhe.biz.base.dto.SysUserConfigDTO;

import java.io.Serializable;
import java.util.List;
@@ -72,5 +73,9 @@ public class UserContext implements Serializable {
* 头像路径
*/
private String userAvatarPath;
/**
* 用户配置
*/
private SysUserConfigDTO userConfig;

}

+ 53
- 0
dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/SysUserConfigDTO.java View File

@@ -0,0 +1,53 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/
package org.dubhe.biz.base.dto;

import lombok.Data;
import lombok.experimental.Accessors;
import java.io.Serializable;

/**
* @description 系统用户配置 DTO
* @date 2021-7-5
*/
@Data
@Accessors(chain = true)
public class SysUserConfigDTO implements Serializable{

private static final long serialVersionUID = 1L;

/**
* Notebook 延迟删除时间配置
*/
private Integer notebookDelayDeleteTime;

/**
* CPU 资源限制配置
*/
private Integer cpuLimit;

/**
* 内存资源限制配置
*/
private Integer memoryLimit;

/**
* GPU 资源限制配置
*/
private Integer gpuLimit;

}

+ 5
- 0
dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserDTO.java View File

@@ -59,5 +59,10 @@ public class UserDTO implements Serializable {
*/
private List<SysRoleDTO> roles;

/**
* 用户配置
*/
private SysUserConfigDTO userConfig;


}

+ 6
- 1
dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/BizEnum.java View File

@@ -56,7 +56,12 @@ public enum BizEnum {
/**
* 度量管理
*/
MEASURE("度量管理", "measure", 5);
MEASURE("度量管理", "measure", 5),
/**
* 专业版终端
*/
TERMINAL("专业版终端", "terminal", 7),
;

/**
* 业务模块名称


+ 7
- 1
dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/ImageTypeEnum.java View File

@@ -42,7 +42,13 @@ public enum ImageTypeEnum {
/**
* Serving镜像
*/
SERVING("Serving镜像", "serving", 2);
SERVING("Serving镜像", "serving", 2),

/**
* terminal镜像
*/
TERMINAL("terminal镜像", "terminal", 3)
;

/**
* 镜像项目名称


+ 60
- 0
dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/ResultUtil.java View File

@@ -0,0 +1,60 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.biz.base.utils;

import cn.hutool.core.util.StrUtil;
import org.dubhe.biz.base.exception.BusinessException;

/**
* 调用结果处理工具类
*
*/
public class ResultUtil {
/**
* 判断调用结果非空
*
* @param object
* @param errorMessageTemplate
* @param params
*/
public static void notNull(Object object, String errorMessageTemplate, Object... params) {
if (object == null) {
throw new BusinessException(StrUtil.format(errorMessageTemplate, params));
}
}

/**
* 判断调用结果相等
*
* @param object1
* @param object2
* @param errorMessageTemplate
* @param params
*/
public static void isEquals(Object object1, Object object2, String errorMessageTemplate, Object... params) {
if(object1 == null) {
if (object2 == null) {
return;
}
throw new BusinessException(String.format(errorMessageTemplate, params));
}
if (!object1.equals(object2)) {
throw new BusinessException(String.format(errorMessageTemplate, params));
}
}
}

+ 139
- 0
dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/NoteBookVO.java View File

@@ -0,0 +1,139 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.biz.base.vo;

import lombok.Data;

import java.io.Serializable;
import java.util.Date;

/**
* @description 返回前端请求体
* @date 2020-04-28
*/
@Data
public class NoteBookVO implements Serializable {

/**
* ID
*/
private Long id;

/**
* 所属用户
*/
private Long userId;

/**
* NoteBook 名称
*/
private String name;

/**
* NoteBook 名称
*/
private String noteBookName;

/**
* 备注描述
*/
private String description;

/**
* 可访问jupyter地址
*/
private String url;

/**
* CPU数量
*/
private Integer cpuNum;

/**
* GPU数量
*/
private Integer gpuNum;

/**
* 内存大小(M)
*/
private Integer memNum;

/**
* 硬盘内存大小(M)
*/
private Integer diskMemNum;

/**
* 0运行,1停止, 2删除, 3启动中,4停止中,5删除中,6运行异常(暂未启用)
*/
private Integer status;

/**
* 状态对应的详情信息
*/
private String statusDetail;

/**
* k8s响应状态码
*/
private String k8sStatusCode;

/**
* k8s响应状态信息
*/
private String k8sStatusInfo;

private String k8sImageName;

/**
* k8s中pvc存储路径
*/
private String k8sPvcPath;

private Date createTime;

private Date updateTime;


/**
* 数据集名称
*/
private String dataSourceName;

/**
* 数据集路径
*/
private String dataSourcePath;

/**
* 算法ID
*/
private Long algorithmId;

/**
* 资源拥有者ID
*/
private Long originUserId;


/**
* pip包路径
*/
private String pipSitePackagePath;
}

+ 84
- 0
dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/WebsocketDataResponseBody.java View File

@@ -0,0 +1,84 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.biz.base.vo;


import lombok.Data;
import org.dubhe.biz.base.constant.ResponseCode;
import org.slf4j.MDC;

import java.io.Serializable;

/**
* @description Websocket 统一的公共响应体
* @date 2021-07-20
*/
@Data
public class WebsocketDataResponseBody<T> implements Serializable {

/**
* 返回状态码
*/
private Integer code;
/**
* 返回信息
*/
private String msg;
/**
* 返回主题
*/
private String topic;
/**
* 泛型数据
*/
private T data;
/**
* 链路追踪ID
*/
private String traceId;

public WebsocketDataResponseBody() {
this(ResponseCode.SUCCESS, null,null);
}


public WebsocketDataResponseBody(String topic, T data) {
this(ResponseCode.SUCCESS, null, topic, data);
}

public WebsocketDataResponseBody(Integer code, String msg, String topic) {
this(code, msg, topic, null);
}

public WebsocketDataResponseBody(Integer code, String msg, String topic, T data) {
this.code = code;
this.msg = msg;
this.topic = topic;
this.data = data;
this.traceId = MDC.get("traceId");
}

/**
* 判断是否响应成功
* @return ture 成功,false 失败
*/
public boolean succeed(){
return ResponseCode.SUCCESS.equals(this.code);
}

}

+ 1
- 1
dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/api/FileStoreApi.java View File

@@ -57,7 +57,7 @@ public interface FileStoreApi {
*/
default String formatPath(String path) {
if (!StringUtils.isEmpty(path)) {
return path.replaceAll("///*", File.separator);
return path.replaceAll("///*", "/");
}
return path;
}


+ 3
- 1
dubhe-server/common-biz/log/src/main/java/org/dubhe/biz/log/enums/LogEnum.java View File

@@ -77,7 +77,9 @@ public enum LogEnum {
//度量
MEASURE,
//云端Serving
SERVING;
SERVING,
//专业版终端
TERMINAL;

/**
* 判断日志类型不能为空


+ 15
- 15
dubhe-server/common-biz/log/src/main/resources/logback.xml View File

@@ -25,10 +25,10 @@
<!-- 滚动记录文件,先将日志记录到指定文件,复合条件后日志将记录到其他文件 -->
<appender name="info_file"
class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>logs/${log.path}/info/dubhe-info.log</file>
<file>/data/logs/${log.path}/info/dubhe-info.log</file>
<rollingPolicy
class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<fileNamePattern>logs/${log.path}/info/dubhe-${app.active}-info-%d{yyyy-MM-dd}.%i.log
<fileNamePattern>/data/logs/${log.path}/info/dubhe-${app.active}-info-%d{yyyy-MM-dd}.%i.log
</fileNamePattern>
<!-- 单个日志文件最多50MB, 14天的日志周期,最大不能超过250MB -->
<maxFileSize>50MB</maxFileSize>
@@ -52,10 +52,10 @@
<!-- 滚动记录文件,先将日志记录到指定文件,复合条件后日志将记录到其他文件 -->
<appender name="debug_info"
class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>logs/${log.path}/debug/dubhe-debug.log</file>
<file>/data/logs/${log.path}/debug/dubhe-debug.log</file>
<rollingPolicy
class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<fileNamePattern>logs/${log.path}/debug/dubhe-${app.active}-debug-%d{yyyy-MM-dd}.%i.log
<fileNamePattern>/data/logs/${log.path}/debug/dubhe-${app.active}-debug-%d{yyyy-MM-dd}.%i.log
</fileNamePattern>
<!-- 单个日志文件最多50MB, 14天的日志周期,最大不能超过250MB -->
<maxFileSize>50MB</maxFileSize>
@@ -79,10 +79,10 @@
<!-- 滚动记录文件,先将日志记录到指定文件,复合条件后日志将记录到其他文件 -->
<appender name="error_file"
class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>logs/${log.path}/error/dubhe-error.log</file>
<file>/data/logs/${log.path}/error/dubhe-error.log</file>
<rollingPolicy
class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<fileNamePattern>logs/${log.path}/error/dubhe-${app.active}-error-%d{yyyy-MM-dd}.%i.log
<fileNamePattern>/data/logs/${log.path}/error/dubhe-${app.active}-error-%d{yyyy-MM-dd}.%i.log
</fileNamePattern>
<!-- 单个日志文件最多50MB, 14天的日志周期,最大不能超过250MB -->
<maxFileSize>50MB</maxFileSize>
@@ -106,10 +106,10 @@
<!-- 滚动记录文件,先将日志记录到指定文件,复合条件后日志将记录到其他文件 -->
<appender name="warn_file"
class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>logs/${log.path}/warn/dubhe-warn.log</file>
<file>/data/logs/${log.path}/warn/dubhe-warn.log</file>
<rollingPolicy
class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<fileNamePattern>logs/${log.path}/warn/dubhe-${app.active}-warn-%d{yyyy-MM-dd}.%i.log
<fileNamePattern>/data/logs/${log.path}/warn/dubhe-${app.active}-warn-%d{yyyy-MM-dd}.%i.log
</fileNamePattern>
<!-- 单个日志文件最多50MB, 14天的日志周期,最大不能超过250MB -->
<maxFileSize>50MB</maxFileSize>
@@ -134,10 +134,10 @@
<!-- 滚动记录文件,先将日志记录到指定文件,复合条件后日志将记录到其他文件 -->
<appender name="trace_file"
class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>logs/${log.path}/trace/dubhe-trace.log</file>
<file>/data/logs/${log.path}/trace/dubhe-trace.log</file>
<rollingPolicy
class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<fileNamePattern>logs/${log.path}/trace/dubhe-${app.active}-trace-%d{yyyy-MM-dd}.%i.log
<fileNamePattern>/data/logs/${log.path}/trace/dubhe-${app.active}-trace-%d{yyyy-MM-dd}.%i.log
</fileNamePattern>
<!-- 单个日志文件最多50MB, 14天的日志周期,最大不能超过250MB -->
<maxFileSize>50MB</maxFileSize>
@@ -162,10 +162,10 @@
<!-- 滚动记录文件,先将日志记录到指定文件,复合条件后日志将记录到其他文件 -->
<appender name="schedule_file"
class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>logs/${log.path}/info/dubhe-schedule.log</file>
<file>/data/logs/${log.path}/info/dubhe-schedule.log</file>
<rollingPolicy
class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<fileNamePattern>logs/${log.path}/info/dubhe-${app.active}-schedule-%d{yyyy-MM-dd}.%i.log
<fileNamePattern>/data/logs/${log.path}/info/dubhe-${app.active}-schedule-%d{yyyy-MM-dd}.%i.log
</fileNamePattern>
<!-- 单个日志文件最多50MB, 14天的日志周期,最大不能超过250MB -->
<maxFileSize>50MB</maxFileSize>
@@ -189,10 +189,10 @@
<!-- 滚动记录文件,先将日志记录到指定文件,复合条件后日志将记录到其他文件 -->
<appender name="global_request_file"
class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>logs/${log.path}/info/dubhe-request.log</file>
<file>/data/logs/${log.path}/info/dubhe-request.log</file>
<rollingPolicy
class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<fileNamePattern>logs/${log.path}/info/dubhe-${app.active}-request-%d{yyyy-MM-dd}.%i.log
<fileNamePattern>/data/logs/${log.path}/info/dubhe-${app.active}-request-%d{yyyy-MM-dd}.%i.log
</fileNamePattern>
<!-- 单个日志文件最多50MB, 14天的日志周期,最大不能超过250MB -->
<maxFileSize>50MB</maxFileSize>
@@ -259,4 +259,4 @@
<appender-ref ref="console" />
</logger>

</configuration>
</configuration>

+ 0
- 1
dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/AdminClient.java View File

@@ -47,5 +47,4 @@ public interface AdminClient {

@GetMapping(value = "/users/findByIds")
DataResponseBody<List<UserDTO>> getUserList(@RequestParam(value = "ids") List<Long> ids);

}

+ 0
- 7
dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-data.yml View File

@@ -1,7 +0,0 @@
spring:
cloud:
nacos:
config:
namespace: dubhe-server-cloud-test-data
discovery:
namespace: dubhe-server-cloud-test-data

+ 5
- 0
dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-dev.yml View File

@@ -1,7 +1,12 @@
spring:
cloud:
nacos:
username: nacos
password: Tianshu
context-path: /nacos
config:
namespace: dubhe-server-cloud-dev
server-addr: 10.105.1.132:8848
discovery:
namespace: dubhe-server-cloud-dev
server-addr: 10.105.1.132:8848

+ 9
- 0
dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-pre.yml View File

@@ -0,0 +1,9 @@
spring:
cloud:
nacos:
config:
namespace: dubhe-server-cloud-pre
server-addr: 10.105.1.133:8848
discovery:
namespace: dubhe-server-cloud-pre
server-addr: 10.105.1.133:8848

+ 5
- 0
dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-test.yml View File

@@ -1,7 +1,12 @@
spring:
cloud:
nacos:
username: nacos
password: Tianshu
context-path: /nacos
config:
namespace: dubhe-server-cloud-test
server-addr: 10.105.1.132:8848
discovery:
namespace: dubhe-server-cloud-test
server-addr: 10.105.1.132:8848

+ 0
- 7
dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-dev.yml View File

@@ -1,7 +0,0 @@
spring:
cloud:
nacos:
config:
namespace: dubhe-server-cloud-prod
discovery:
namespace: dubhe-server-cloud-prod

+ 0
- 7
dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-prod.yml View File

@@ -1,7 +0,0 @@
spring:
cloud:
nacos:
config:
namespace: dubhe-server-cloud-prod
discovery:
namespace: dubhe-server-cloud-prod

+ 15
- 0
dubhe-server/common-k8s/pom.xml View File

@@ -72,6 +72,21 @@
<version>0.0.1-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.auth0</groupId>
<artifactId>java-jwt</artifactId>
<version>3.4.0</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-websocket</artifactId>
</dependency>

<dependency>
<groupId>com.github.docker-java</groupId>
<artifactId>docker-java</artifactId>
<version>3.2.11</version>
</dependency>
</dependencies>

<build>


+ 68
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/docker/api/DockerApi.java View File

@@ -0,0 +1,68 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.docker.api;

import com.github.dockerjava.api.DockerClient;
import com.github.dockerjava.api.async.ResultCallback;
import com.github.dockerjava.api.async.ResultCallbackTemplate;
import com.github.dockerjava.api.model.PushResponseItem;

/**
* @description docker api
* @date 2021-07-06
*/
public interface DockerApi {

/**
* 非强制删除镜像
*
* @param dockerClient docker连接
* @param image repository:tag
* @return boolean 成功true,失败false
*/
boolean removeImage(DockerClient dockerClient,String image);
/**
* 删除镜像
*
* @param dockerClient docker连接
* @param image repository:tag
* @param force true:强制删除 false:非强制
* @return boolean 成功true,失败false
*/
boolean removeImage(DockerClient dockerClient,String image,boolean force);

/**
* docker commit
*
* @param dockerClient docker连接
* @param containerId 容器id
* @param repository 仓库
* @param tag 标签
* @return
*/
String commit(DockerClient dockerClient,String containerId,String repository,String tag);

/**
* 推送镜像
*
* @param dockerClient docker连接
* @param image repository:tag
* @return
*/
boolean push(DockerClient dockerClient, String image, ResultCallbackTemplate resultCallback);
}

+ 112
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/docker/api/impl/DockerApiImpl.java View File

@@ -0,0 +1,112 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.docker.api.impl;

import com.github.dockerjava.api.DockerClient;
import com.github.dockerjava.api.async.ResultCallbackTemplate;
import com.github.dockerjava.api.command.CommitCmd;
import com.github.dockerjava.api.model.AuthConfig;
import org.dubhe.biz.log.enums.LogEnum;
import org.dubhe.biz.log.utils.LogUtil;
import org.dubhe.docker.api.DockerApi;
import org.dubhe.docker.config.DubheDockerJavaConfig;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

/**
* @description docker api实现类
* @date 2021-07-06
*/
@Service
public class DockerApiImpl implements DockerApi {
@Autowired
private DubheDockerJavaConfig dubheDockerJavaConfig;
/**
* 非强制删除镜像
*
* @param dockerClient docker连接
* @param image repository:tag
* @return boolean 成功true,失败false
*/
@Override
public boolean removeImage(DockerClient dockerClient, String image) {
LogUtil.info(LogEnum.TERMINAL, "DockerApiImpl removeImage image:{}",image);
try{
dockerClient.removeImageCmd(image).withForce(false).exec();
return true;
}catch (Exception e){
LogUtil.error(LogEnum.TERMINAL, "DockerApiImpl removeImage error:{}",e.getMessage(), e);
return false;
}
}

/**
* 删除镜像
*
* @param dockerClient docker连接
* @param image repository:tag
* @param force true:强制删除 false:非强制
* @return boolean 成功true,失败false
*/
@Override
public boolean removeImage(DockerClient dockerClient, String image, boolean force) {
LogUtil.info(LogEnum.TERMINAL, "DockerApiImpl removeImage image:{} force:{}",image,force);
try{
dockerClient.removeImageCmd(image).withForce(force).exec();
return true;
}catch (Exception e){
LogUtil.error(LogEnum.TERMINAL, "DockerApiImpl removeImage error:{}",e.getMessage(), e);
return false;
}
}

/**
* docker commit
*
* @param dockerClient docker连接
* @param containerId 容器id
* @param repository 仓库
* @param tag 标签
* @return
*/
@Override
public String commit(DockerClient dockerClient, String containerId, String repository, String tag) {
LogUtil.info(LogEnum.TERMINAL, "DockerApiImpl commit containerId:{} repository:{} tag:{}",containerId,repository,tag);
try{
CommitCmd commitCmd = dockerClient.commitCmd(containerId).withRepository(repository).withTag(tag);
return commitCmd.exec();
}catch (Exception e){
LogUtil.error(LogEnum.TERMINAL, "DockerApiImpl removeImage error:{}",e.getMessage(), e);
return e.getMessage();
}
}

@Override
public boolean push(DockerClient dockerClient, String image, ResultCallbackTemplate resultCallback) {
LogUtil.info(LogEnum.TERMINAL, "DockerApiImpl push image:{}",image);
try{
AuthConfig authConfig = new AuthConfig();
authConfig.withUsername(dubheDockerJavaConfig.getHarborUserName()).withPassword(dubheDockerJavaConfig.getHarborPassword());
dockerClient.pushImageCmd(image).withAuthConfig(authConfig).exec(resultCallback);
return true;
}catch (Exception e){
LogUtil.error(LogEnum.TERMINAL, "DockerApiImpl push error:{}",e.getMessage(), e);
return false;
}
}
}

+ 97
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/docker/callback/TerminalPushImageResultCallback.java View File

@@ -0,0 +1,97 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.docker.callback;

import com.github.dockerjava.api.DockerClient;
import com.github.dockerjava.api.async.ResultCallbackTemplate;
import com.github.dockerjava.api.model.PushResponseItem;
import org.dubhe.biz.base.constant.MagicNumConstant;
import org.dubhe.biz.base.exception.BusinessException;
import org.dubhe.biz.log.enums.LogEnum;
import org.dubhe.biz.log.utils.LogUtil;
import org.dubhe.docker.domain.dto.DockerPushCallbackDTO;
import org.dubhe.docker.utils.DockerCallbackTool;

import java.io.IOException;

/**
* @description 镜像推送回调
* @date 2021-07-22
*/
public class TerminalPushImageResultCallback extends ResultCallbackTemplate<TerminalPushImageResultCallback,PushResponseItem> {
private Long terminalId;
//回调地址
private String url;

private PushResponseItem latestItem = null;

private DockerClient dockerClient;

private Long userId;

public TerminalPushImageResultCallback(){

}

public TerminalPushImageResultCallback(String url, Long terminalId, DockerClient dockerClient,Long userId){
this.url = url;
this.terminalId = terminalId;
this.dockerClient = dockerClient;
this.userId = userId;
}

@Override
public void onNext(PushResponseItem item) {
this.latestItem = item;
LogUtil.info(LogEnum.TERMINAL,"push image item: {}",item.toString());
if (item.getErrorDetail() != null){
try {
DockerCallbackTool.sendPushCallback(new DockerPushCallbackDTO(terminalId,item.getErrorDetail().getMessage(),true,userId),url, MagicNumConstant.THREE);
} finally {
try {
dockerClient.close();
} catch (IOException e) {
LogUtil.error(LogEnum.TERMINAL,"push terminalId {} error:"+e.getMessage(),terminalId,e);
throw new BusinessException("push error:"+e.getMessage());
}
}
}
}

@Override
public void onError(Throwable throwable){
super.onError(throwable);
LogUtil.error(LogEnum.TERMINAL,"push image onError: {}",throwable.getMessage());
}

@Override
public void onComplete(){
super.onComplete();
LogUtil.info(LogEnum.TERMINAL,"push image onComplete terminalId: {}",terminalId);
try{
DockerCallbackTool.sendPushCallback(new DockerPushCallbackDTO(terminalId,userId),url,MagicNumConstant.THREE);
}finally {
try {
dockerClient.close();
} catch (IOException e) {
LogUtil.error(LogEnum.TERMINAL,"push terminalId {} error:"+e.getMessage(),terminalId,e);
throw new BusinessException("push error:"+e.getMessage());
}
}
}
}

+ 57
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/docker/config/DockerClientFactory.java View File

@@ -0,0 +1,57 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.docker.config;

import com.github.dockerjava.api.DockerClient;
import com.github.dockerjava.core.DefaultDockerClientConfig;
import com.github.dockerjava.core.DockerClientBuilder;
import com.github.dockerjava.core.DockerClientConfig;
import org.dubhe.biz.base.constant.SymbolConstant;
import org.dubhe.biz.log.enums.LogEnum;
import org.dubhe.biz.log.utils.LogUtil;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

/**
* @description docker client 工厂类
* @date 2021-07-05
*/
@Component
public class DockerClientFactory {
@Autowired
private DubheDockerJavaConfig dubheDockerJavaConfig;

/**
* 创建连接
*
* @param host ip或域名
* @return DockerClient
*/
public DockerClient getDockerClient(String host){
try{
DockerClientConfig custom = DefaultDockerClientConfig.createDefaultConfigBuilder()
.withDockerHost("tcp://"+host+ SymbolConstant.COLON + dubheDockerJavaConfig.getDockerRemoteApiPort())
.withDockerTlsVerify(false)
.build();
return DockerClientBuilder.getInstance(custom).build();
}catch (Exception e){
LogUtil.error(LogEnum.TERMINAL, "DockerClientFactory getDockerClient error:{}",e.getMessage(), e);
return null;
}
}
}

+ 42
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/docker/config/DubheDockerJavaConfig.java View File

@@ -0,0 +1,42 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.docker.config;

import lombok.Getter;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Configuration;

/**
* @description docker-java相关配置
* @date 2021-07-05
*/
@Getter
@Configuration
public class DubheDockerJavaConfig {
@Value("${docker.remote-api-port}")
private String dockerRemoteApiPort;

@Value("${harbor.address}")
private String harborAddress;

@Value("${harbor.username}")
private String harborUserName;

@Value("${harbor.password}")
private String harborPassword;
}

+ 26
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/docker/constant/DockerCallbackConstant.java View File

@@ -0,0 +1,26 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.docker.constant;

/**
* @description
* @date 2021-07-27
*/
public class DockerCallbackConstant {
public static final String DOCKER_CALLBACK_URI = "/api/docker/callback/";
}

+ 62
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/docker/domain/dto/DockerPushCallbackDTO.java View File

@@ -0,0 +1,62 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.docker.domain.dto;

import io.swagger.annotations.ApiModelProperty;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;
import org.dubhe.biz.base.constant.MagicNumConstant;

import javax.validation.constraints.Min;
import javax.validation.constraints.NotNull;

/**
* @description docker 推送镜像回调
* @date 2021-07-27
*/
@Data
@NoArgsConstructor
@Accessors(chain = true)
public class DockerPushCallbackDTO {
@ApiModelProperty(value = "terminalId", required = true)
@Min(value = MagicNumConstant.ONE, message = "id数值不合法")
private Long terminalId;

@ApiModelProperty(value = "错误信息", required = false)
private String errorMessage;

@NotNull
@ApiModelProperty(value = "是否错误 true:错误 false:成功")
private boolean error;

@ApiModelProperty(value = "用户id", required = false)
private Long userId;

public DockerPushCallbackDTO(Long terminalId,Long userId){
this.terminalId = terminalId;
this.userId = userId;
}

public DockerPushCallbackDTO(Long terminalId, String errorMessage, boolean error,Long userId){
this.terminalId = terminalId;
this.errorMessage = errorMessage;
this.error = error;
this.userId = userId;
}
}

+ 40
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/docker/enums/DockerOperationEnum.java View File

@@ -0,0 +1,40 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.docker.enums;

/**
* @description docker操作枚举
* @date 2021-07-27
*/
public enum DockerOperationEnum {
/**
* 推送
*/
PUSH("push"),
;

private String operation;

DockerOperationEnum(String operation) {
this.operation = operation;
}

public String getType() {
return operation;
}
}

+ 83
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/docker/utils/DockerCallbackTool.java View File

@@ -0,0 +1,83 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.docker.utils;

import cn.hutool.http.HttpRequest;
import cn.hutool.http.HttpResponse;
import cn.hutool.http.HttpStatus;
import com.alibaba.fastjson.JSON;
import org.dubhe.biz.base.constant.MagicNumConstant;
import org.dubhe.biz.base.constant.SymbolConstant;
import org.dubhe.biz.log.enums.LogEnum;
import org.dubhe.biz.log.utils.LogUtil;
import org.dubhe.docker.config.DubheDockerJavaConfig;
import org.dubhe.docker.constant.DockerCallbackConstant;
import org.dubhe.docker.domain.dto.DockerPushCallbackDTO;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

/**
* @description docker回调相关工具类
* @date 2020-07-13
*/
@Component
public class DockerCallbackTool {
/**
* http请求超时时间 单位毫秒
*/
private static final int TIMEOUT_MILLISECOND = 20 * 1000;

@Autowired
private DubheDockerJavaConfig dubheDockerJavaConfig;

/**
* 获取回调地址
*
* @param host 主机
* @param port 端口
* @param action 动作
* @return
*/
public String getCallbackUrl(String host,String port,String action){
return SymbolConstant.HTTP_SLASH+host+SymbolConstant.COLON+port+ DockerCallbackConstant.DOCKER_CALLBACK_URI+action;
}

/**
* 镜像推送回调
*
* @param dockerPushCallbackDTO 回调参数
* @param url 回调地址
* @param count 重试计数
*/
public static void sendPushCallback(DockerPushCallbackDTO dockerPushCallbackDTO, String url,Integer count){
try{
LogUtil.info(LogEnum.TERMINAL, "{} sendPushCallback {} count {}", url, dockerPushCallbackDTO,count);
HttpResponse httpResponse = HttpRequest.post(url)
.body(JSON.toJSONString(dockerPushCallbackDTO))
.timeout(TIMEOUT_MILLISECOND)
.execute();
LogUtil.info(LogEnum.TERMINAL, "{} sendPushCallback {} count {} status:{}", url, dockerPushCallbackDTO,count,httpResponse.getStatus());
//重试
if (HttpStatus.HTTP_OK != httpResponse.getStatus() && count > MagicNumConstant.ZERO){
sendPushCallback(dockerPushCallbackDTO,url,--count);
}
}catch (Exception e){
LogUtil.error(LogEnum.TERMINAL, "{} sendPushCallback {} count {} error:{} ", url, dockerPushCallbackDTO,count,e.getMessage(),e);
}
}
}

+ 8
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/PersistentVolumeClaimApi.java View File

@@ -103,6 +103,14 @@ public interface PersistentVolumeClaimApi {
*/
boolean deletePv(String pvName);

/**
* 删除PV
*
* @param resourceName 资源名称
* @return boolean true成功 false失败
*/
boolean deletePvByResourceName(String resourceName);

/**
* 查询PV
*


+ 52
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/TerminalApi.java View File

@@ -0,0 +1,52 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.k8s.api;

import org.dubhe.k8s.domain.PtBaseResult;
import org.dubhe.k8s.domain.bo.TerminalBO;
import org.dubhe.k8s.domain.vo.TerminalResourceVO;

/**
* @description 专业版终端接口
* @date 2021-06-29
*/
public interface TerminalApi {
/**
* 创建
*
* @param bo
* @return BizDeployment
*/
TerminalResourceVO create(TerminalBO bo);

/**
* 删除
* @param namespace 命名空间
* @param resourceName 资源名称
* @return PtBaseResult 基础结果类
*/
PtBaseResult delete(String namespace, String resourceName);

/**
* 查询
* @param namespace 命名空间
* @param resourceName 资源名称
* @return
*/
TerminalResourceVO get(String namespace, String resourceName);
}

+ 4
- 2
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DistributeTrainApiImpl.java View File

@@ -147,7 +147,7 @@ public class DistributeTrainApiImpl implements DistributeTrainApi {
@Override
public BizDistributeTrain create(DistributeTrainBO bo) {
LogUtil.info(LogEnum.BIZ_K8S, "Params of creating DistributeTrain--create:{}", bo);
LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(bo.getNamespace(), bo.getCpuNum() * bo.getSize(), bo.getMemNum() * bo.getSize(), bo.getGpuNum() * bo.getSize());
LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(bo.getNamespace(), bo.getCpuNum() * bo.getSize(), bo.getMemNum() * bo.getSize(), bo.getGpuNum() == null?0:bo.getGpuNum() * bo.getSize());
if (!LimitsOfResourcesEnum.ADEQUATE.equals(limitsOfResources)) {
return new BizDistributeTrain().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), limitsOfResources.getMessage());
}
@@ -183,6 +183,7 @@ public class DistributeTrainApiImpl implements DistributeTrainApi {
private Map<String, String> env;
private Map<String, String> baseLabels;
private String businessLabel;
private String taskIdentifyLabel;
private Integer delayCreate;
private Integer delayDelete;
private TaskYamlBO taskYamlBO;
@@ -201,7 +202,8 @@ public class DistributeTrainApiImpl implements DistributeTrainApi {
this.slaveCmd = bo.getSlaveCmd();
this.env = bo.getEnv();
this.businessLabel = bo.getBusinessLabel();
this.baseLabels = LabelUtils.getChildLabels(baseName, distributeTrainName, K8sKindEnum.DISTRIBUTETRAIN.getKind(), businessLabel);
this.taskIdentifyLabel = bo.getTaskIdentifyLabel();
this.baseLabels = LabelUtils.getChildLabels(baseName, distributeTrainName, K8sKindEnum.DISTRIBUTETRAIN.getKind(), businessLabel, taskIdentifyLabel);
this.delayCreate = bo.getDelayCreateTime();
this.delayDelete = bo.getDelayDeleteTime();
this.taskYamlBO = new TaskYamlBO();


+ 3
- 1
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DubheDeploymentApiImpl.java View File

@@ -225,6 +225,7 @@ public class DubheDeploymentApiImpl implements DubheDeploymentApi {
private Map<String, Quantity> resourcesLimitsMap;
private Map<String, String> baseLabels;
private String businessLabel;
private String taskIdentifyLabel;
private Integer gpuNum;


@@ -249,6 +250,7 @@ public class DubheDeploymentApiImpl implements DubheDeploymentApi {
Optional.ofNullable(bo.getGpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.GPU_RESOURCE_KEY, new Quantity(v.toString())));
Optional.ofNullable(bo.getMemNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_MEMORY_KEY, new Quantity(v.toString(), K8sParamConstants.MEM_UNIT)));
this.businessLabel = bo.getBusinessLabel();
this.taskIdentifyLabel = bo.getTaskIdentifyLabel();
this.baseLabels = LabelUtils.getBaseLabels(baseName, businessLabel);

this.datasetReadOnly = true;
@@ -310,7 +312,7 @@ public class DubheDeploymentApiImpl implements DubheDeploymentApi {
* @return Deployment Deployment 业务类
*/
private Deployment buildDeployment() {
Map<String, String> childLabels = LabelUtils.getChildLabels(baseName, deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), businessLabel);
Map<String, String> childLabels = LabelUtils.getChildLabels(baseName, deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), businessLabel, taskIdentifyLabel);
LabelSelector labelSelector = new LabelSelector();
labelSelector.setMatchLabels(childLabels);
return new DeploymentBuilder()


+ 57
- 4
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/JupyterResourceApiImpl.java View File

@@ -26,6 +26,7 @@ import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import io.fabric8.kubernetes.api.model.Container;
import io.fabric8.kubernetes.api.model.ContainerPortBuilder;
import io.fabric8.kubernetes.api.model.EmptyDirVolumeSource;
import io.fabric8.kubernetes.api.model.EnvVar;
import io.fabric8.kubernetes.api.model.EnvVarBuilder;
import io.fabric8.kubernetes.api.model.IntOrString;
@@ -124,14 +125,19 @@ public class JupyterResourceApiImpl implements JupyterResourceApi {

private static final String DATASET = "/dataset";
private static final String WORKSPACE = "/workspace";
private static final String DSHM_PATH = "/dev/shm";
private static final String K8S_PIP_SITE_PACKAGE = "/home/admin/.local/lib/python3.8/site-packages";

private static final String PVC_DATASET = "pvc-dataset";
private static final String PVC_WORKSPACE = "pvc-workspace";
private static final String PVC_PIP_SITE_PACKAGE = "pvc-pip-site-package";

private static final String CONTAINER_NAME = "web";
private static final Integer CONTAINER_PORT = 8888;
private static final Integer SVC_PORT = 32680;
private static final String NOTEBOOK_MAX_UPLOAD_SIZE = "100m";
private static final String DSHM = "dshm";
private static final String DSHM_MEDIUM = "Memory";

public JupyterResourceApiImpl(K8sUtils k8sUtils) {
this.k8sUtils = k8sUtils;
@@ -161,7 +167,7 @@ public class JupyterResourceApiImpl implements JupyterResourceApi {
return new PtJupyterDeployVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), lack.getMessage());
}

if (!fileStoreApi.createDirs(bo.getWorkspaceDir(), bo.getDatasetDir())) {
if (!fileStoreApi.createDirs(bo.getWorkspaceDir(), bo.getDatasetDir(),bo.getPipSitePackageDir())) {
return new PtJupyterDeployVO().error(K8sResponseEnum.INTERNAL_SERVER_ERROR.getCode(), K8sResponseEnum.INTERNAL_SERVER_ERROR.getMessage());
}
resourceCache.deletePodCacheByResourceName(bo.getNamespace(), bo.getName());
@@ -299,9 +305,12 @@ public class JupyterResourceApiImpl implements JupyterResourceApi {
private String image;
private String datasetDir;
private String datasetMountPath;
private String pipSitePackageDir;
private String pipSitePackageMountPath;
private String workspaceMountPath;
private String workspaceDir;
private Boolean useGpu;
private Quantity shmMemory;

//数据集默认只读
private boolean datasetReadOnly;
@@ -316,6 +325,7 @@ public class JupyterResourceApiImpl implements JupyterResourceApi {
private String baseUrl;
private String secondaryDomain;
private String businessLabel;
private String taskIdentifyLabel;
private Integer delayDelete;

private List<VolumeMount> volumeMounts;
@@ -329,6 +339,8 @@ public class JupyterResourceApiImpl implements JupyterResourceApi {
this.image = bo.getImage();
this.datasetDir = bo.getDatasetDir();
this.datasetMountPath = StringUtils.isEmpty(bo.getDatasetMountPath()) ? DATASET : bo.getDatasetMountPath();
this.pipSitePackageDir=bo.getPipSitePackageDir();
this.pipSitePackageMountPath=StringUtils.isEmpty(bo.getPipSitePackageMountPath()) ? K8S_PIP_SITE_PACKAGE : bo.getPipSitePackageMountPath();
this.workspaceDir = bo.getWorkspaceDir();
this.workspaceMountPath = StringUtils.isEmpty(bo.getWorkspaceMountPath()) ? WORKSPACE : bo.getWorkspaceMountPath();
Optional.ofNullable(bo.getDatasetReadOnly()).ifPresent(v -> datasetReadOnly = v);
@@ -342,12 +354,15 @@ public class JupyterResourceApiImpl implements JupyterResourceApi {
Optional.ofNullable(bo.getCpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_CPU_KEY, new Quantity(v.toString(), K8sParamConstants.CPU_UNIT)));
Optional.ofNullable(bo.getGpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.GPU_RESOURCE_KEY, new Quantity(v.toString())));
Optional.ofNullable(bo.getMemNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_MEMORY_KEY, new Quantity(v.toString(), K8sParamConstants.MEM_UNIT)));

this.shmMemory = new Quantity("1024",K8sParamConstants.MEM_UNIT);
// 共享内存设置为容器内存的一半(参考 Linux 的默认设置)
Optional.ofNullable(bo.getMemNum()).ifPresent(v -> shmMemory.setAmount(String.valueOf(v/2)));
this.host = k8sUtils.getHost();
this.businessLabel = bo.getBusinessLabel();
this.taskIdentifyLabel = bo.getTaskIdentifyLabel();
this.delayDelete = bo.getDelayDeleteTime();
this.baseLabels = LabelUtils.getBaseLabels(baseName, businessLabel);
this.podLabels = LabelUtils.getChildLabels(baseName, statefulSetName, K8sKindEnum.STATEFULSET.getKind(), businessLabel);
this.podLabels = LabelUtils.getChildLabels(baseName, statefulSetName, K8sKindEnum.STATEFULSET.getKind(), businessLabel, taskIdentifyLabel);
//生成附属资源的名称
generateResourceName();

@@ -454,6 +469,41 @@ public class JupyterResourceApiImpl implements JupyterResourceApi {
}
}

/**
* 构建 Shm VolumeMount
*/
private void buildShmFsVolume() {
volumeMounts.add(new VolumeMountBuilder()
.withName(DSHM)
.withMountPath(DSHM_PATH)
.build());

volumes.add(new VolumeBuilder()
.withName(DSHM)
.withEmptyDir(new EmptyDirVolumeSource(DSHM_MEDIUM, shmMemory))
.build());
}

/**
* 挂载pip包路径
*/
private void buildPipSitePackageFsVolume(){
if (StrUtil.isNotBlank(pipSitePackageDir)) {
volumeMounts.add(new VolumeMountBuilder()
.withName(PVC_PIP_SITE_PACKAGE)
.withMountPath(pipSitePackageMountPath)
.build());

volumes.add(new VolumeBuilder()
.withName(PVC_PIP_SITE_PACKAGE)
.withNewHostPath()
.withPath(pipSitePackageDir)
.withType(K8sParamConstants.HOST_PATH_TYPE)
.endHostPath()
.build());
}
}

/**
* 构建VolumeMount
*/
@@ -498,8 +548,10 @@ public class JupyterResourceApiImpl implements JupyterResourceApi {
* @return JupyterDeployer Notebook 部署类
*/
private JupyterDeployer buildFsVolumes() {
buildPipSitePackageFsVolume();
buildDatasetFsVolume();
buildWorkspaceFsVolume();
buildShmFsVolume();
return this;
}

@@ -509,8 +561,10 @@ public class JupyterResourceApiImpl implements JupyterResourceApi {
* @return JupyterDeployer Notebook 部署类
*/
private JupyterDeployer buildFsPvcVolumes() {
buildPipSitePackageFsVolume();
buildDatasetFsVolume();
buildWorkspaceFsPvcVolume();
buildShmFsVolume();
return this;
}

@@ -585,7 +639,6 @@ public class JupyterResourceApiImpl implements JupyterResourceApi {
.withNewSpec()
.withTerminationGracePeriodSeconds(ZERO_LONG)
.addToNodeSelector(gpuLabel)
.withTerminationGracePeriodSeconds(SIXTY_LONG)
.addToContainers(container)
.addToVolumes(volumes.toArray(new Volume[0]))
.endSpec()


+ 118
- 61
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/MetricsApiImpl.java View File

@@ -18,7 +18,6 @@
package org.dubhe.k8s.api.impl;

import cn.hutool.core.util.NumberUtil;
import cn.hutool.core.util.StrUtil;
import io.fabric8.kubernetes.api.model.Quantity;
import io.fabric8.kubernetes.api.model.metrics.v1beta1.ContainerMetrics;
import io.fabric8.kubernetes.api.model.metrics.v1beta1.NodeMetricsList;
@@ -40,10 +39,7 @@ import org.dubhe.k8s.domain.dto.PodQueryDTO;
import org.dubhe.k8s.domain.resource.BizContainer;
import org.dubhe.k8s.domain.resource.BizPod;
import org.dubhe.k8s.domain.resource.BizQuantity;
import org.dubhe.k8s.domain.vo.PodRangeMetricsVO;
import org.dubhe.k8s.domain.vo.PtContainerMetricsVO;
import org.dubhe.k8s.domain.vo.PtNodeMetricsVO;
import org.dubhe.k8s.domain.vo.PtPodsVO;
import org.dubhe.k8s.domain.vo.*;
import org.dubhe.k8s.utils.BizConvertUtils;
import org.dubhe.k8s.utils.K8sUtils;
import org.dubhe.k8s.utils.PrometheusUtil;
@@ -83,6 +79,17 @@ public class MetricsApiImpl implements MetricsApi {
*/
@Value("${k8s.prometheus.gpu-query-param}")
private String k8sPrometheusGpuQueryParam;
/**
* prometheus gpu显存总量指标查询参数
*/
@Value("${k8s.prometheus.gpu-mem-total-query-param}")
private String k8sPrometheusGpuMemTotalQueryParam;

/**
* prometheus gpu显存使用量指标查询参数
*/
@Value("${k8s.prometheus.gpu-mem-use-query-param}")
private String k8sPrometheusGpuMemUseQueryParam;
/**
* prometheus cpu指标范围查询参数
*/
@@ -99,6 +106,18 @@ public class MetricsApiImpl implements MetricsApi {
@Value("${k8s.prometheus.gpu-range-query-param}")
private String k8sPrometheusGpuRangeQueryParam;

/**
* prometheus gpu显存总量指标范围查询参数
*/
@Value("${k8s.prometheus.gpu-mem-total-range-query-param}")
private String k8sPrometheusGpuMemTotalRangeQueryParam;

/**
* prometheus gpu显存使用量指标范围查询参数
*/
@Value("${k8s.prometheus.gpu-mem-use-range-query-param}")
private String k8sPrometheusGpuMemUseRangeQueryParam;

public MetricsApiImpl(K8sUtils k8sUtils) {
this.client = k8sUtils.getClient();
}
@@ -141,7 +160,7 @@ public class MetricsApiImpl implements MetricsApi {
List<PtPodsVO> list = new ArrayList<>();
/**将Pod和podName形成映射关系**/
Map<String, List<BizPod>> listMap = client.pods().inAnyNamespace().list().getItems().parallelStream().map(obj -> BizConvertUtils.toBizPod(obj)).collect(Collectors.groupingBy(BizPod::getName));
if(null == listMap) {
if (null == listMap) {
return list;
}
metrics.getItems().stream().forEach(metric -> {
@@ -149,7 +168,7 @@ public class MetricsApiImpl implements MetricsApi {
List<ContainerMetrics> containers = metric.getContainers();
containers.stream().forEach(containerMetrics -> {
Map<String, Quantity> usage = containerMetrics.getUsage();
PtPodsVO ptContainerMetricsResult = new PtPodsVO(metric.getMetadata().getNamespace(),metric.getMetadata().getName(),
PtPodsVO ptContainerMetricsResult = new PtPodsVO(metric.getMetadata().getNamespace(), metric.getMetadata().getName(),
usage.get(K8sParamConstants.QUANTITY_CPU_KEY).getAmount(),
usage.get(K8sParamConstants.QUANTITY_CPU_KEY).getFormat(),
usage.get(K8sParamConstants.QUANTITY_MEMORY_KEY).getAmount(),
@@ -158,7 +177,7 @@ public class MetricsApiImpl implements MetricsApi {
listMap.get(metric.getMetadata().getName()).get(0).getPhase(), null);

List<BizContainer> containerList = listMap.get(metric.getMetadata().getName()).get(0).getContainers();
countGpuUsed(containerList,ptContainerMetricsResult);
countGpuUsed(containerList, ptContainerMetricsResult);
list.add(ptContainerMetricsResult);
});
});
@@ -185,7 +204,7 @@ public class MetricsApiImpl implements MetricsApi {
containers.stream().forEach(containerMetrics ->
{
Map<String, Quantity> usage = containerMetrics.getUsage();
PtPodsVO ptContainerMetricsResult = new PtPodsVO(metric.getMetadata().getNamespace(),metric.getMetadata().getName(),
PtPodsVO ptContainerMetricsResult = new PtPodsVO(metric.getMetadata().getNamespace(), metric.getMetadata().getName(),
usage.get(K8sParamConstants.QUANTITY_CPU_KEY).getAmount(),
usage.get(K8sParamConstants.QUANTITY_CPU_KEY).getFormat(),
usage.get(K8sParamConstants.QUANTITY_MEMORY_KEY).getAmount(),
@@ -194,7 +213,7 @@ public class MetricsApiImpl implements MetricsApi {
bizPod.getPhase(), null
);
List<BizContainer> containerList = bizPod.getContainers();
countGpuUsed(containerList,ptContainerMetricsResult);
countGpuUsed(containerList, ptContainerMetricsResult);
list.add(ptContainerMetricsResult);
});
}
@@ -215,7 +234,7 @@ public class MetricsApiImpl implements MetricsApi {
* @param containerList BizContainer对象
* @param ptContainerMetricsResult 封装pod信息
*/
private void countGpuUsed(List<BizContainer> containerList,PtPodsVO ptContainerMetricsResult){
private void countGpuUsed(List<BizContainer> containerList, PtPodsVO ptContainerMetricsResult) {
for (BizContainer container : containerList) {
Map<String, BizQuantity> limits = container.getLimits();
if (limits == null) {
@@ -238,24 +257,24 @@ public class MetricsApiImpl implements MetricsApi {
@Override
public List<PtPodsVO> getPodMetricsRealTime(String namespace, String resourceName) {
List<PtPodsVO> ptPodsVOS = new ArrayList<>();
if (StringUtils.isEmpty(namespace) || StringUtils.isEmpty(resourceName)){
if (StringUtils.isEmpty(namespace) || StringUtils.isEmpty(resourceName)) {
return ptPodsVOS;
}
List<BizPod> pods = podApi.getListByResourceName(namespace,resourceName);
if (CollectionUtils.isEmpty(pods)){
List<BizPod> pods = podApi.getListByResourceName(namespace, resourceName);
if (CollectionUtils.isEmpty(pods)) {
return ptPodsVOS;
}
List<PodMetrics> podMetricsList = client.top().pods().metrics(namespace).getItems();
if (!CollectionUtils.isEmpty(pods)){
Map<String,PodMetrics> podMetricsMap = podMetricsList.stream().collect(Collectors.toMap(obj -> obj.getMetadata().getName(), obj -> obj));
for (BizPod pod : pods){
List<PtPodsVO> ptPodsVOList = getPtPodsVO(pod,podMetricsMap.get(pod.getName()));
if (!CollectionUtils.isEmpty(ptPodsVOList)){
if (!CollectionUtils.isEmpty(pods)) {
Map<String, PodMetrics> podMetricsMap = podMetricsList.stream().collect(Collectors.toMap(obj -> obj.getMetadata().getName(), obj -> obj));
for (BizPod pod : pods) {
List<PtPodsVO> ptPodsVOList = getPtPodsVO(pod, podMetricsMap.get(pod.getName()));
if (!CollectionUtils.isEmpty(ptPodsVOList)) {
ptPodsVOS.addAll(ptPodsVOList);
}
}
}
for (PtPodsVO ptPodsVO : ptPodsVOS){
for (PtPodsVO ptPodsVO : ptPodsVOS) {
generateGpuUsage(ptPodsVO);
ptPodsVO.calculationPercent();
}
@@ -271,21 +290,21 @@ public class MetricsApiImpl implements MetricsApi {
@Override
public List<PtPodsVO> getPodMetricsRealTimeByPodName(String namespace, String podName) {
List<PtPodsVO> ptPodsVOS = new ArrayList<>();
if (StringUtils.isEmpty(namespace) || StringUtils.isEmpty(podName)){
if (StringUtils.isEmpty(namespace) || StringUtils.isEmpty(podName)) {
return ptPodsVOS;
}
BizPod pod = podApi.get(namespace,podName);
if (null == pod){
BizPod pod = podApi.get(namespace, podName);
if (null == pod) {
return ptPodsVOS;
}
PodMetrics podMetrics = null;
try{
podMetrics = client.top().pods().metrics(namespace,podName);
}catch (KubernetesClientException e){
try {
podMetrics = client.top().pods().metrics(namespace, podName);
} catch (KubernetesClientException e) {
LogUtil.error(LogEnum.BIZ_K8S, "MetricsApiImpl.getPodMetricsRealTimeByPodName error:{}", e);
}
ptPodsVOS = getPtPodsVO(pod,podMetrics);
for (PtPodsVO ptPodsVO : ptPodsVOS){
ptPodsVOS = getPtPodsVO(pod, podMetrics);
for (PtPodsVO ptPodsVO : ptPodsVOS) {
generateGpuUsage(ptPodsVO);
ptPodsVO.calculationPercent();
}
@@ -301,8 +320,8 @@ public class MetricsApiImpl implements MetricsApi {
@Override
public List<PtPodsVO> getPodMetricsRealTimeByPodName(String namespace, List<String> podNames) {
List<PtPodsVO> ptPodsVOS = new ArrayList<>();
for (String podName : podNames){
ptPodsVOS.addAll(getPodMetricsRealTimeByPodName(namespace,podName));
for (String podName : podNames) {
ptPodsVOS.addAll(getPodMetricsRealTimeByPodName(namespace, podName));
}
return ptPodsVOS;
}
@@ -315,16 +334,16 @@ public class MetricsApiImpl implements MetricsApi {
@Override
public List<PodRangeMetricsVO> getPodRangeMetrics(PodQueryDTO podQueryDTO) {
List<PodRangeMetricsVO> podRangeMetricsVOS = new ArrayList<>();
if (StringUtils.isEmpty(podQueryDTO.getNamespace()) || StringUtils.isEmpty(podQueryDTO.getResourceName())){
if (StringUtils.isEmpty(podQueryDTO.getNamespace()) || StringUtils.isEmpty(podQueryDTO.getResourceName())) {
return podRangeMetricsVOS;
}
List<BizPod> pods = podApi.getListByResourceName(podQueryDTO.getNamespace(),podQueryDTO.getResourceName());
if (CollectionUtils.isEmpty(pods)){
List<BizPod> pods = podApi.getListByResourceName(podQueryDTO.getNamespace(), podQueryDTO.getResourceName());
if (CollectionUtils.isEmpty(pods)) {
return podRangeMetricsVOS;
}
podQueryDTO.generateDefaultParam();
for (BizPod pod : pods){
podRangeMetricsVOS.add(getPodRangeMetricsVO(pod,podQueryDTO));
for (BizPod pod : pods) {
podRangeMetricsVOS.add(getPodRangeMetricsVO(pod, podQueryDTO));
}
return podRangeMetricsVOS;
}
@@ -337,16 +356,16 @@ public class MetricsApiImpl implements MetricsApi {
@Override
public List<PodRangeMetricsVO> getPodRangeMetricsByPodName(PodQueryDTO podQueryDTO) {
List<PodRangeMetricsVO> podRangeMetricsVOS = new ArrayList<>();
if (StringUtils.isEmpty(podQueryDTO.getNamespace()) || CollectionUtils.isEmpty(podQueryDTO.getPodNames())){
if (StringUtils.isEmpty(podQueryDTO.getNamespace()) || CollectionUtils.isEmpty(podQueryDTO.getPodNames())) {
return podRangeMetricsVOS;
}
List<BizPod> pods = podApi.get(podQueryDTO.getNamespace(),podQueryDTO.getPodNames());
if (null == pods){
List<BizPod> pods = podApi.get(podQueryDTO.getNamespace(), podQueryDTO.getPodNames());
if (null == pods) {
return podRangeMetricsVOS;
}
podQueryDTO.generateDefaultParam();
for (BizPod pod : pods){
podRangeMetricsVOS.add(getPodRangeMetricsVO(pod,podQueryDTO));
for (BizPod pod : pods) {
podRangeMetricsVOS.add(getPodRangeMetricsVO(pod, podQueryDTO));
}
return podRangeMetricsVOS;
}
@@ -358,22 +377,40 @@ public class MetricsApiImpl implements MetricsApi {
* @param podQueryDTO 查询参数
* @return PodRangeMetricsVO Pod历史监控指标 VO
*/
private PodRangeMetricsVO getPodRangeMetricsVO(BizPod pod,PodQueryDTO podQueryDTO){
private PodRangeMetricsVO getPodRangeMetricsVO(BizPod pod, PodQueryDTO podQueryDTO) {
PodRangeMetricsVO podRangeMetricsVO = new PodRangeMetricsVO(pod.getName());
PrometheusMetricBO cpuRangeMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl+k8sPrometheusQueryRange,PrometheusUtil.getQueryParamMap(k8sPrometheusCpuRangeQueryParam,pod.getName(),podQueryDTO));
PrometheusMetricBO memRangeMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl+k8sPrometheusQueryRange,PrometheusUtil.getQueryParamMap(k8sPrometheusMemRangeQueryParam,pod.getName(),podQueryDTO));
PrometheusMetricBO gpuRangeMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl+k8sPrometheusQueryRange,PrometheusUtil.getQueryParamMap(k8sPrometheusGpuRangeQueryParam,pod.getName(),podQueryDTO));
PrometheusMetricBO cpuRangeMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQueryRange, PrometheusUtil.getQueryParamMap(k8sPrometheusCpuRangeQueryParam, pod.getName(), podQueryDTO));
PrometheusMetricBO memRangeMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQueryRange, PrometheusUtil.getQueryParamMap(k8sPrometheusMemRangeQueryParam, pod.getName(), podQueryDTO));
PrometheusMetricBO gpuRangeMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQueryRange, PrometheusUtil.getQueryParamMap(k8sPrometheusGpuRangeQueryParam, pod.getName(), podQueryDTO));
PrometheusMetricBO gpuMemTotalRangeMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQueryRange, PrometheusUtil.getQueryParamMap(k8sPrometheusGpuMemTotalRangeQueryParam, pod.getName(), podQueryDTO));
PrometheusMetricBO gpuMemUseRangeMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQueryRange, PrometheusUtil.getQueryParamMap(k8sPrometheusGpuMemUseRangeQueryParam, pod.getName(), podQueryDTO));

StringFormat cpuMetricsFormat = (value)->{
StringFormat cpuMetricsFormat = (value) -> {
return value == null ? String.valueOf(MagicNumConstant.ZERO) : NumberUtil.round(Double.valueOf(value.toString()), MagicNumConstant.TWO).toString();
};
podRangeMetricsVO.setCpuMetrics(cpuRangeMetrics.getValues(cpuMetricsFormat));

StringFormat memMetricsFormat = (value)->{
StringFormat memMetricsFormat = (value) -> {
return NumberUtil.isNumber(String.valueOf(value)) ? String.valueOf(Long.valueOf(String.valueOf(value)) / MagicNumConstant.BINARY_TEN_EXP) : String.valueOf(MagicNumConstant.ZERO);
};
podRangeMetricsVO.setMemoryMetrics(memRangeMetrics.getValues(memMetricsFormat));
podRangeMetricsVO.setGpuMetrics(gpuRangeMetrics.getResults());
Map<String, List<MetricsDataResultValueVO>> gpuMetricsResults = gpuRangeMetrics.getGpuMetricsResults();
List<GpuTotalMemResultVO> gpuTotalMemResults = gpuMemTotalRangeMetrics.getGpuTotalMemResults();
Map<String, List<MetricsDataResultValueVO>> gpuMemResults = gpuMemUseRangeMetrics.getGpuMemResults();
List<GpuMetricsDataResultVO> gpuMetricsDataResultVOS = gpuTotalMemResults.stream().map(x -> {
GpuMetricsDataResultVO gpuMetricsDataResultVO = new GpuMetricsDataResultVO();
gpuMetricsDataResultVO.setAccId(x.getAccId()).setTotalMemValues(x.getGpuTotalMemValue());
if (gpuMemResults.containsKey(x.getAccId())) {
gpuMetricsDataResultVO.setGpuMemValues(gpuMemResults.get(x.getAccId()));
}
if (gpuMetricsResults.containsKey(x.getAccId())) {
gpuMetricsDataResultVO.setGpuMetricsValues(gpuMetricsResults.get(x.getAccId()));
}
return gpuMetricsDataResultVO;
}
).collect(Collectors.toList());

podRangeMetricsVO.setGpuMetrics(gpuMetricsDataResultVOS);
return podRangeMetricsVO;
}

@@ -381,12 +418,31 @@ public class MetricsApiImpl implements MetricsApi {
* 查询Gpu使用率
* @param ptPodsVO pod信息
*/
private void generateGpuUsage(PtPodsVO ptPodsVO){
PrometheusMetricBO prometheusMetricBO = PrometheusUtil.getQuery(k8sPrometheusUrl+k8sPrometheusQuery, PrometheusUtil.getQueryParamMap(k8sPrometheusGpuQueryParam,ptPodsVO.getPodName()));
if (prometheusMetricBO == null){
private void generateGpuUsage(PtPodsVO ptPodsVO) {
PrometheusMetricBO prometheusMetricBO = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQuery, PrometheusUtil.getQueryParamMap(k8sPrometheusGpuQueryParam, ptPodsVO.getPodName()));
PrometheusMetricBO gpuMemTotalMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQuery, PrometheusUtil.getQueryParamMap(k8sPrometheusGpuMemTotalQueryParam, ptPodsVO.getPodName()));
PrometheusMetricBO gpuMemUseMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQuery, PrometheusUtil.getQueryParamMap(k8sPrometheusGpuMemUseQueryParam, ptPodsVO.getPodName()));

if (prometheusMetricBO == null || gpuMemTotalMetrics == null || gpuMemUseMetrics == null) {
return;
}
ptPodsVO.setGpuUsagePersent(prometheusMetricBO.getGpuUsage());
List<GpuTotalMemResultVO> gpuTotalMemValue = gpuMemTotalMetrics.getGpuTotalMemValue();
Map<String, String> gpuMemValue = gpuMemUseMetrics.getGpuMemValue();
Map<String, Float> gpuUsage = prometheusMetricBO.getGpuUsage();

List<GpuValueVO> gpuValueVOS = gpuTotalMemValue.stream().map(x -> {
GpuValueVO gpuValueVO = new GpuValueVO();
gpuValueVO.setAccId(x.getAccId()).setGpuTotalMemValue(x.getGpuTotalMemValue());
if (gpuMemValue.containsKey(x.getAccId())) {
gpuValueVO.setGpuMemValue(gpuMemValue.get(x.getAccId()));
}
if (gpuUsage.containsKey(x.getAccId())) {
gpuValueVO.setUsage(gpuUsage.get(x.getAccId()));
}
return gpuValueVO;
}
).collect(Collectors.toList());
ptPodsVO.setGpuUsagePersent(gpuValueVOS);
}

/**
@@ -395,22 +451,22 @@ public class MetricsApiImpl implements MetricsApi {
* @param metric 查询指标
* @return List<PtPodsVO> pod信息列表
*/
private List<PtPodsVO> getPtPodsVO(BizPod bizPod,PodMetrics metric){
private List<PtPodsVO> getPtPodsVO(BizPod bizPod, PodMetrics metric) {
List<PtPodsVO> ptPodsVOList = new ArrayList<>();
if (metric == null){
if (metric == null) {
return ptPodsVOList;
}
Map<String,ContainerMetrics> containerMetricsMap = metric.getContainers().stream().collect(Collectors.toMap(obj -> obj.getName(), obj -> obj));
for (BizContainer container : bizPod.getContainers()){
Map<String, ContainerMetrics> containerMetricsMap = metric.getContainers().stream().collect(Collectors.toMap(obj -> obj.getName(), obj -> obj));
for (BizContainer container : bizPod.getContainers()) {
Map<String, BizQuantity> request = container.getRequests();
if (containerMetricsMap.get(container.getName()) == null){
if (containerMetricsMap.get(container.getName()) == null) {
continue;
}
Map<String, Quantity> usage = containerMetricsMap.get(container.getName()).getUsage();
PtPodsVO ptContainerMetricsResult = new PtPodsVO(metric.getMetadata().getNamespace(),metric.getMetadata().getName(),
request.get(K8sParamConstants.QUANTITY_CPU_KEY) ==null ? null : request.get(K8sParamConstants.QUANTITY_CPU_KEY).getAmount(),
PtPodsVO ptContainerMetricsResult = new PtPodsVO(metric.getMetadata().getNamespace(), metric.getMetadata().getName(),
request.get(K8sParamConstants.QUANTITY_CPU_KEY) == null ? null : request.get(K8sParamConstants.QUANTITY_CPU_KEY).getAmount(),
usage.get(K8sParamConstants.QUANTITY_CPU_KEY).getAmount(),
request.get(K8sParamConstants.QUANTITY_CPU_KEY) ==null ? null : request.get(K8sParamConstants.QUANTITY_CPU_KEY).getFormat(),
request.get(K8sParamConstants.QUANTITY_CPU_KEY) == null ? null : request.get(K8sParamConstants.QUANTITY_CPU_KEY).getFormat(),
usage.get(K8sParamConstants.QUANTITY_CPU_KEY).getFormat(),
request.get(K8sParamConstants.QUANTITY_MEMORY_KEY) == null ? null : request.get(K8sParamConstants.QUANTITY_MEMORY_KEY).getAmount(),
usage.get(K8sParamConstants.QUANTITY_MEMORY_KEY).getAmount(),
@@ -430,7 +486,8 @@ public class MetricsApiImpl implements MetricsApi {
ptContainerMetricsResult.setGpuUsed(count);
}
ptPodsVOList.add(ptContainerMetricsResult);
};
}
;

return ptPodsVOList;
}
@@ -443,7 +500,7 @@ public class MetricsApiImpl implements MetricsApi {
*/
@Override
public List<PtContainerMetricsVO> getContainerMetrics(String namespace) {
if(StringUtils.isEmpty(namespace)){
if (StringUtils.isEmpty(namespace)) {
return Collections.EMPTY_LIST;
}
try {


+ 3
- 1
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelOptJobApiImpl.java View File

@@ -212,6 +212,7 @@ public class ModelOptJobApiImpl implements ModelOptJobApi {
private List<VolumeMount> volumeMounts;
private List<Volume> volumes;
private String businessLabel;
private String taskIdentifyLabel;
private Integer gpuNum;

private String errCode;
@@ -231,6 +232,7 @@ public class ModelOptJobApiImpl implements ModelOptJobApi {
Optional.ofNullable(bo.getGpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.GPU_RESOURCE_KEY, new Quantity(v.toString())));
Optional.ofNullable(bo.getMemNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_MEMORY_KEY, new Quantity(v.toString(), K8sParamConstants.MEM_UNIT)));
this.businessLabel = bo.getBusinessLabel();
this.taskIdentifyLabel = bo.getTaskIdentifyLabel();
this.fsMounts = bo.getFsMounts();
this.baseLabels = LabelUtils.getBaseLabels(baseName, businessLabel);

@@ -372,7 +374,7 @@ public class ModelOptJobApiImpl implements ModelOptJobApi {
* @return Job 任务job类
*/
private Job buildJob() {
Map<String, String> childLabels = LabelUtils.getChildLabels(baseName, jobName, K8sKindEnum.JOB.getKind(), businessLabel);
Map<String, String> childLabels = LabelUtils.getChildLabels(baseName, jobName, K8sKindEnum.JOB.getKind(), businessLabel, taskIdentifyLabel);
return new JobBuilder()
.withNewMetadata()
.withName(jobName)


+ 3
- 3
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelServingApiImpl.java View File

@@ -148,7 +148,7 @@ public class ModelServingApiImpl implements ModelServingApi {

//标签生成
Map<String, String> baseLabels = LabelUtils.getBaseLabels(bo.getResourceName(), bo.getBusinessLabel());
Map<String, String> podLabels = LabelUtils.getChildLabels(bo.getResourceName(), deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), bo.getBusinessLabel());
Map<String, String> podLabels = LabelUtils.getChildLabels(bo.getResourceName(), deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), bo.getBusinessLabel(), bo.getTaskIdentifyLabel());

//部署deployment
Deployment deployment = buildDeployment(bo, volumeVO, deploymentName);
@@ -266,7 +266,7 @@ public class ModelServingApiImpl implements ModelServingApi {
* @return Deployment
*/
private Deployment buildDeployment(ModelServingBO bo, VolumeVO volumeVO, String deploymentName) {
Map<String, String> childLabels = LabelUtils.getChildLabels(bo.getResourceName(), deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), bo.getBusinessLabel());
Map<String, String> childLabels = LabelUtils.getChildLabels(bo.getResourceName(), deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), bo.getBusinessLabel(),bo.getTaskIdentifyLabel());
LabelSelector labelSelector = new LabelSelector();
labelSelector.setMatchLabels(childLabels);
return new DeploymentBuilder()
@@ -285,7 +285,7 @@ public class ModelServingApiImpl implements ModelServingApi {
.withNamespace(bo.getNamespace())
.endMetadata()
.withNewSpec()
.addToNodeSelector(k8sUtils.gpuSelector(bo.getGpuNum()))
.addToNodeSelector(K8sUtils.gpuSelector(bo.getGpuNum()))
.addToContainers(buildContainer(bo, volumeVO, deploymentName))
.addToVolumes(volumeVO.getVolumes().toArray(new Volume[0]))
.withRestartPolicy(RestartPolicyEnum.ALWAYS.getRestartPolicy())


+ 9
- 4
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/NamespaceApiImpl.java View File

@@ -24,6 +24,7 @@ import io.fabric8.kubernetes.api.model.NamespaceList;
import io.fabric8.kubernetes.api.model.ResourceQuota;
import io.fabric8.kubernetes.client.KubernetesClient;
import io.fabric8.kubernetes.client.KubernetesClientException;
import org.dubhe.biz.base.service.UserContextService;
import org.dubhe.biz.log.enums.LogEnum;
import org.dubhe.k8s.annotation.K8sValidation;
import org.dubhe.k8s.api.NamespaceApi;
@@ -60,13 +61,16 @@ public class NamespaceApiImpl implements NamespaceApi {
@Autowired
private ResourceQuotaApi resourceQuotaApi;

@Value("${k8s.namespace-limits.cpu}")
@Autowired
private UserContextService userContextService;

@Value("${user.config.cpu-limit}")
private Integer cpuLimit;

@Value("${k8s.namespace-limits.memory}")
@Value("${user.config.memory-limit}")
private Integer memoryLimit;

@Value("${k8s.namespace-limits.gpu}")
@Value("${user.config.gpu-limit}")
private Integer gpuLimit;


@@ -110,7 +114,8 @@ public class NamespaceApiImpl implements NamespaceApi {
if (StringUtils.isEmpty(namespace)) {
return new BizNamespace().baseErrorBadRequest();
}
return BizConvertUtils.toBizNamespace(client.namespaces().withName(namespace).get());
Namespace namespaceEntity = client.namespaces().withName(namespace).get();
return BizConvertUtils.toBizNamespace(namespaceEntity);
}

/**


+ 13
- 1
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/PersistentVolumeClaimApiImpl.java View File

@@ -27,6 +27,7 @@ import org.dubhe.biz.base.utils.StringUtils;
import org.dubhe.biz.log.enums.LogEnum;
import org.dubhe.biz.log.utils.LogUtil;
import org.dubhe.k8s.api.PersistentVolumeClaimApi;
import org.dubhe.k8s.constant.K8sLabelConstants;
import org.dubhe.k8s.constant.K8sParamConstants;
import org.dubhe.k8s.domain.PtBaseResult;
import org.dubhe.k8s.domain.bo.PtPersistentVolumeClaimBO;
@@ -137,7 +138,7 @@ public class PersistentVolumeClaimApiImpl implements PersistentVolumeClaimApi {
//创建pv
PersistentVolume pv = new PersistentVolumeBuilder()
.withNewMetadata().addToLabels(pvLabels).withName(bo.getPvcName() + PV_SUFFIX).endMetadata()
.withNewSpec().addToCapacity(STORAGE, new Quantity(bo.getRequest())).addNewAccessMode(AccessModeEnum.READ_WRITE_ONCE.getType()).withNewPersistentVolumeReclaimPolicy(PvReclaimPolicyEnum.RECYCLE.getPolicy())
.withNewSpec().addToCapacity(STORAGE, new Quantity(bo.getRequest())).addNewAccessMode(AccessModeEnum.READ_WRITE_ONCE.getType()).withNewPersistentVolumeReclaimPolicy(StringUtils.isNotEmpty(bo.getReclaimPolicy())?PvReclaimPolicyEnum.RECYCLE.getPolicy():bo.getReclaimPolicy())
.withNewHostPath().withNewPath(bo.getPath()).withType(K8sParamConstants.HOST_PATH_TYPE).endHostPath()
.endSpec()
.build();
@@ -356,6 +357,17 @@ public class PersistentVolumeClaimApiImpl implements PersistentVolumeClaimApi {
return client.persistentVolumes().withName(pvName).delete();
}

/**
* 删除PV
*
* @param resourceName 资源名称
* @return boolean true成功 false失败
*/
@Override
public boolean deletePvByResourceName(String resourceName) {
return client.persistentVolumes().withLabel(K8sLabelConstants.BASE_TAG_SOURCE,resourceName).delete();
}

/**
* 查询PV
*


+ 2
- 2
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ResourceQuotaApiImpl.java View File

@@ -90,7 +90,7 @@ public class ResourceQuotaApiImpl implements ResourceQuotaApi {
resourceQuota = new ResourceQuotaBuilder().withNewMetadata().withName(bo.getName()).endMetadata()
.withNewSpec().withHard(hard).endSpec().build();
}
BizResourceQuota bizResourceQuota = BizConvertUtils.toBizResourceQuota(client.resourceQuotas().inNamespace(bo.getNamespace()).create(resourceQuota));
BizResourceQuota bizResourceQuota = BizConvertUtils.toBizResourceQuota(client.resourceQuotas().inNamespace(bo.getNamespace()).createOrReplace(resourceQuota));
LogUtil.info(LogEnum.BIZ_K8S,"Output {}", bizResourceQuota);
return bizResourceQuota;
} catch (KubernetesClientException e) {
@@ -217,7 +217,7 @@ public class ResourceQuotaApiImpl implements ResourceQuotaApi {
}

BizQuantity memRemainder = remainder.get(K8sParamConstants.RESOURCE_QUOTA_MEMORY_LIMITS_KEY);
if (memRemainder != null && memRemainder != null){
if (memRemainder != null && memNum != null){
if (UnitConvertUtils.memFormatToMi(memRemainder.getAmount(),memRemainder.getFormat()) < memNum){
return LimitsOfResourcesEnum.LIMITS_OF_MEM;
}


+ 205
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TerminalApiImpl.java View File

@@ -0,0 +1,205 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.k8s.api.impl;

import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.util.RandomUtil;
import cn.hutool.core.util.StrUtil;
import io.fabric8.kubernetes.api.model.Quantity;
import io.fabric8.kubernetes.api.model.Service;
import io.fabric8.kubernetes.api.model.ServiceList;
import io.fabric8.kubernetes.api.model.apps.Deployment;
import io.fabric8.kubernetes.api.model.apps.DeploymentList;
import io.fabric8.kubernetes.client.KubernetesClient;
import io.fabric8.kubernetes.client.KubernetesClientException;
import org.dubhe.biz.base.constant.MagicNumConstant;
import org.dubhe.biz.base.constant.SymbolConstant;
import org.dubhe.biz.file.api.FileStoreApi;
import org.dubhe.biz.log.enums.LogEnum;
import org.dubhe.biz.log.utils.LogUtil;
import org.dubhe.k8s.api.NodeApi;
import org.dubhe.k8s.api.PersistentVolumeClaimApi;
import org.dubhe.k8s.api.PodApi;
import org.dubhe.k8s.api.ResourceIisolationApi;
import org.dubhe.k8s.api.ResourceQuotaApi;
import org.dubhe.k8s.api.TerminalApi;
import org.dubhe.k8s.api.VolumeApi;
import org.dubhe.k8s.constant.K8sParamConstants;
import org.dubhe.k8s.domain.PtBaseResult;
import org.dubhe.k8s.domain.bo.BuildFsVolumeBO;
import org.dubhe.k8s.domain.bo.BuildServiceBO;
import org.dubhe.k8s.domain.bo.TerminalBO;
import org.dubhe.k8s.domain.vo.PtJupyterDeployVO;
import org.dubhe.k8s.domain.vo.TerminalResourceVO;
import org.dubhe.k8s.domain.vo.VolumeVO;
import org.dubhe.k8s.enums.K8sKindEnum;
import org.dubhe.k8s.enums.K8sResponseEnum;
import org.dubhe.k8s.enums.LackOfResourcesEnum;
import org.dubhe.k8s.enums.LimitsOfResourcesEnum;
import org.dubhe.k8s.enums.ServiceTypeENum;
import org.dubhe.k8s.utils.BizConvertUtils;
import org.dubhe.k8s.utils.K8sUtils;
import org.dubhe.k8s.utils.LabelUtils;
import org.dubhe.k8s.utils.ResourceBuildUtils;
import org.dubhe.k8s.utils.YamlUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.util.CollectionUtils;

import javax.annotation.Resource;
import java.util.Map;

/**
* @description 专业版终端接口实现
* @date 2021-06-29
*/
public class TerminalApiImpl implements TerminalApi {
private K8sUtils k8sUtils;
private KubernetesClient client;
@Resource(name = "hostFileStoreApiImpl")
private FileStoreApi fileStoreApi;
@Autowired
private VolumeApi volumeApi;
@Autowired
private PersistentVolumeClaimApi persistentVolumeClaimApi;
@Autowired
private NodeApi nodeApi;
@Autowired
private PodApi podApi;
@Autowired
private ResourceQuotaApi resourceQuotaApi;
@Autowired
private ResourceIisolationApi resourceIisolationApi;

public TerminalApiImpl(K8sUtils k8sUtils) {
this.k8sUtils = k8sUtils;
this.client = k8sUtils.getClient();
}

/**
* 创建
*
* @param bo
* @return BizDeployment
*/
@Override
public TerminalResourceVO create(TerminalBO bo) {
try {
LogUtil.info(LogEnum.BIZ_K8S, "Params of creating TerminalApiImpl--create:{}", bo);
//资源配额校验
LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(bo.getNamespace(), bo.getCpuNum(), bo.getMemNum(), bo.getGpuNum());
if (!LimitsOfResourcesEnum.ADEQUATE.equals(limitsOfResources)) {
return new TerminalResourceVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), limitsOfResources.getMessage());
}
LackOfResourcesEnum lack = nodeApi.isAllocatable(bo.getCpuNum(), bo.getMemNum(), bo.getGpuNum());
if (!LackOfResourcesEnum.ADEQUATE.equals(lack)) {
return new TerminalResourceVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), lack.getMessage());
}
if (!fileStoreApi.createDirs(bo.getDirList().toArray(new String[MagicNumConstant.ZERO]))) {
return new TerminalResourceVO().error(K8sResponseEnum.INTERNAL_SERVER_ERROR.getCode(), K8sResponseEnum.INTERNAL_SERVER_ERROR.getMessage());
}

//存储卷构建
VolumeVO volumeVO = volumeApi.buildFsVolumes(new BuildFsVolumeBO(bo.getNamespace(), bo.getResourceName(), bo.getFsMounts()));
if (!K8sResponseEnum.SUCCESS.getCode().equals(volumeVO.getCode())) {
return new TerminalResourceVO().error(volumeVO.getCode(), volumeVO.getMessage());
}

//共享存储
Integer ShmMemAmount = bo.getMemNum() == null?MagicNumConstant.BINARY_TEN_EXP:bo.getMemNum()/MagicNumConstant.TWO;
volumeVO.addShmFsVolume(new Quantity(String.valueOf(ShmMemAmount),K8sParamConstants.MEM_UNIT));

//名称生成
String deploymentName = StrUtil.format(K8sParamConstants.RESOURCE_NAME_TEMPLATE, bo.getResourceName(), RandomUtil.randomString(MagicNumConstant.EIGHT));
String svcName = StrUtil.format(K8sParamConstants.SUB_RESOURCE_NAME_TEMPLATE, bo.getResourceName(), K8sParamConstants.SVC_SUFFIX, RandomUtil.randomString(MagicNumConstant.FIVE));

//标签生成
Map<String, String> baseLabels = LabelUtils.getBaseLabels(bo.getResourceName(), bo.getBusinessLabel());
Map<String, String> podLabels = LabelUtils.getChildLabels(bo.getResourceName(), deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), bo.getBusinessLabel(), bo.getTaskIdentifyLabel());

//部署deployment
Deployment deployment = ResourceBuildUtils.buildDeployment(bo, volumeVO, deploymentName);
LogUtil.info(LogEnum.BIZ_K8S, "Ready to deploy {}, yaml信息为{}", deploymentName, YamlUtils.dumpAsYaml(deployment));
resourceIisolationApi.addIisolationInfo(deployment);
Deployment deploymentResult = client.apps().deployments().inNamespace(bo.getNamespace()).create(deployment);

//部署service
BuildServiceBO buildServiceBO = new BuildServiceBO(bo.getNamespace(), svcName, baseLabels, podLabels, ServiceTypeENum.NODE_PORT.getType());
if (!CollectionUtils.isEmpty(bo.getPorts())){
bo.getPorts().forEach(port -> {
buildServiceBO.addPort(ResourceBuildUtils.buildServicePort(port, port, SymbolConstant.PORT+SymbolConstant.HYPHEN+port));
});
}
Service service = ResourceBuildUtils.buildService(buildServiceBO);
LogUtil.info(LogEnum.BIZ_K8S, "Ready to deploy {}, yaml信息为{}", svcName, YamlUtils.dumpAsYaml(service));
Service serviceResult = client.services().create(service);
return new TerminalResourceVO(BizConvertUtils.toBizDeployment(deploymentResult),BizConvertUtils.toBizService(serviceResult));
}catch (KubernetesClientException e) {
LogUtil.error(LogEnum.BIZ_K8S, "TerminalApiImpl.create error, param:{} error:", bo, e);
return new TerminalResourceVO().error(String.valueOf(e.getCode()), e.getMessage());
}
}

/**
* 删除
* @param namespace 命名空间
* @param resourceName 资源名称
* @return PtBaseResult 基础结果类
*/
@Override
public PtBaseResult delete(String namespace, String resourceName) {
try {
LogUtil.info(LogEnum.BIZ_K8S, "delete Terminal namespace:{} resourceName:{}",namespace,resourceName);
DeploymentList deploymentList = client.apps().deployments().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).list();
if (deploymentList == null || deploymentList.getItems().size() == 0){
return new PtBaseResult();
}
persistentVolumeClaimApi.delete(namespace,resourceName);
persistentVolumeClaimApi.deletePvByResourceName(resourceName);
Boolean res = client.services().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).delete()
&& client.apps().deployments().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).delete();
if (res) {
return new PtBaseResult();
} else {
return K8sResponseEnum.REPEAT.toPtBaseResult();
}
} catch (KubernetesClientException e) {
LogUtil.error(LogEnum.BIZ_K8S, "delete error:", e);
return new PtBaseResult(String.valueOf(e.getCode()), e.getMessage());
}
}

/**
* 查询
* @param namespace 命名空间
* @param resourceName 资源名称
* @return
*/
@Override
public TerminalResourceVO get(String namespace, String resourceName) {
try {
ServiceList svcList = client.services().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).list();
Service svc = CollectionUtil.isEmpty(svcList.getItems()) ? null : svcList.getItems().get(0);
DeploymentList deploymentList = client.apps().deployments().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).list();
Deployment deployment = CollectionUtil.isEmpty(deploymentList.getItems()) ? null : deploymentList.getItems().get(0);
return new TerminalResourceVO(BizConvertUtils.toBizDeployment(deployment), BizConvertUtils.toBizService(svc));
} catch (KubernetesClientException e) {
LogUtil.error(LogEnum.BIZ_K8S, "get error:", e);
return new TerminalResourceVO().error(String.valueOf(e.getCode()), e.getMessage());
}
}
}

+ 8
- 6
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TrainJobApiImpl.java View File

@@ -151,7 +151,7 @@ public class TrainJobApiImpl implements TrainJobApi {
/**
* 根据命名空间和资源名删除Job
*
* @param namespace 命名空间
* @param namespace 命名空间
* @param resourceName 资源名称
* @return Boolean true成功 false失败
*/
@@ -224,6 +224,7 @@ public class TrainJobApiImpl implements TrainJobApi {
private List<VolumeMount> volumeMounts;
private List<Volume> volumes;
private String businessLabel;
private String taskIdentifyLabel;
private Integer delayCreate;
private Integer delayDelete;
private TaskYamlBO taskYamlBO;
@@ -249,6 +250,7 @@ public class TrainJobApiImpl implements TrainJobApi {

this.fsMounts = bo.getFsMounts();
businessLabel = bo.getBusinessLabel();
this.taskIdentifyLabel = bo.getTaskIdentifyLabel();
this.baseLabels = LabelUtils.getBaseLabels(baseName,bo.getBusinessLabel());

this.volumeMounts = new ArrayList<>();
@@ -345,8 +347,8 @@ public class TrainJobApiImpl implements TrainJobApi {
* 挂载存储
*
* @param mountPath 挂载路径
* @param dirBO 挂载路径参数
* @param num 名称序号
* @param dirBO 挂载路径参数
* @param num 名称序号
* @return boolean true成功 false失败
*/
private boolean buildFsVolumes(String mountPath,PtMountDirBO dirBO,int num){
@@ -369,8 +371,8 @@ public class TrainJobApiImpl implements TrainJobApi {
* 按照存储资源声明挂载存储
*
* @param mountPath 挂载路径
* @param dirBO 挂载路径参数
* @param i 名称序号
* @param dirBO 挂载路径参数
* @param i 名称序号
* @return boolean true成功 false失败
*/
private boolean buildFsPvcVolumes(String mountPath,PtMountDirBO dirBO,int i){
@@ -456,7 +458,7 @@ public class TrainJobApiImpl implements TrainJobApi {
.withNewTemplate()
.withNewMetadata()
.withName(jobName)
.addToLabels(LabelUtils.getChildLabels(baseName, jobName, K8sKindEnum.JOB.getKind(),businessLabel))
.addToLabels(LabelUtils.getChildLabels(baseName, jobName, K8sKindEnum.JOB.getKind(),businessLabel, taskIdentifyLabel))
.withNamespace(namespace)
.endMetadata()
.withNewSpec()


+ 1
- 1
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/VolumeApiImpl.java View File

@@ -61,7 +61,7 @@ public class VolumeApiImpl implements VolumeApi {
for (Map.Entry<String, PtMountDirBO> mount : bo.getFsMounts().entrySet()) {
boolean availableMount = (mount != null && StringUtils.isNotEmpty(mount.getKey()) && mount.getValue() != null && StringUtils.isNotEmpty(mount.getValue().getDir()));
if (availableMount){
boolean success = mount.getValue().isRecycle()?buildFsPvcVolumes(bo,volumeVO,mount.getKey(),mount.getValue(),i):buildFsVolumes(volumeVO,mount.getKey(),mount.getValue(),i);
boolean success = (mount.getValue().isRecycle() || (StringUtils.isNotEmpty(mount.getValue().getLimit()) || StringUtils.isNotEmpty(mount.getValue().getRequest())))?buildFsPvcVolumes(bo,volumeVO,mount.getKey(),mount.getValue(),i):buildFsVolumes(volumeVO,mount.getKey(),mount.getValue(),i);
if (!success){
break;
}


+ 51
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/cache/ResourceCache.java View File

@@ -19,6 +19,8 @@ package org.dubhe.k8s.cache;

import cn.hutool.core.collection.CollectionUtil;
import org.dubhe.biz.base.constant.MagicNumConstant;
import org.dubhe.biz.base.constant.NumberConstant;
import org.dubhe.biz.base.constant.StringConstant;
import org.dubhe.biz.log.enums.LogEnum;
import org.dubhe.biz.redis.utils.RedisUtils;
import org.dubhe.k8s.api.PodApi;
@@ -33,6 +35,7 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.util.CollectionUtils;

import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@@ -188,6 +191,17 @@ public class ResourceCache {
}
}

/**
* 查询该 podName 缓存是否存在
*
* @param podName Pod的名称
* @return boolean true 存在 false 不存在
*/
public boolean isPodNameCached(String podName){
String resourceName = (String) redisUtils.get(podNamePrefix +podName);
return StringUtils.isNotEmpty(resourceName);
}

/**
* 删除pod名称缓存
*
@@ -235,4 +249,41 @@ public class ResourceCache {
return false;
}
}

/**
* 添加任务身份标识缓存
*
* @param taskIdentify 任务身份标识
* @param taskId 任务 ID
* @param taskName 任务名称
* @param taskIdPrefix 任务 ID 前缀
* @return boolean true 添加成功 false添加失败
*/
public boolean addTaskCache(String taskIdentify, Long taskId, String taskName, String taskIdPrefix){
return redisUtils.hmset(taskIdentify, new HashMap<String, Object>(){{
put(StringConstant.CACHE_TASK_ID, taskId);
put(StringConstant.CACHE_TASK_NAME, taskName);
}}, NumberConstant.MONTH_SECOND) && redisUtils.set(taskIdPrefix + String.valueOf(taskId), taskIdentify, NumberConstant.MONTH_SECOND);
}

/**
* 获取任务身份标识
*
* @param taskId 任务 ID
* @param taskName 任务名称
* @param taskIdPrefix 任务 ID 前缀
* @return String 任务身份标识
*/
public String getTaskIdentify(Long taskId, String taskName, String taskIdPrefix){
String taskIdentify = (String) redisUtils.get(taskIdPrefix + String.valueOf(taskId));
if (taskIdentify == null){
taskIdentify = StringUtils.getUUID();
redisUtils.hmset(taskIdentify, new HashMap<String, Object>(){{
put(StringConstant.CACHE_TASK_ID, taskId);
put(StringConstant.CACHE_TASK_NAME, taskName);
}}, NumberConstant.MONTH_SECOND);
redisUtils.set(taskIdPrefix + taskId, taskIdentify, NumberConstant.MONTH_SECOND);
}
return taskIdentify;
}
}

+ 45
- 3
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/config/K8sConfig.java View File

@@ -23,8 +23,38 @@ import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.dubhe.biz.log.enums.LogEnum;
import org.dubhe.biz.log.utils.LogUtil;
import org.dubhe.k8s.api.*;
import org.dubhe.k8s.api.impl.*;
import org.dubhe.k8s.api.DistributeTrainApi;
import org.dubhe.k8s.api.DubheDeploymentApi;
import org.dubhe.k8s.api.JupyterResourceApi;
import org.dubhe.k8s.api.LimitRangeApi;
import org.dubhe.k8s.api.LogMonitoringApi;
import org.dubhe.k8s.api.MetricsApi;
import org.dubhe.k8s.api.ModelOptJobApi;
import org.dubhe.k8s.api.ModelServingApi;
import org.dubhe.k8s.api.NamespaceApi;
import org.dubhe.k8s.api.NativeResourceApi;
import org.dubhe.k8s.api.NodeApi;
import org.dubhe.k8s.api.PersistentVolumeClaimApi;
import org.dubhe.k8s.api.PodApi;
import org.dubhe.k8s.api.ResourceQuotaApi;
import org.dubhe.k8s.api.TerminalApi;
import org.dubhe.k8s.api.TrainJobApi;
import org.dubhe.k8s.api.impl.DistributeTrainApiImpl;
import org.dubhe.k8s.api.impl.DubheDeploymentApiImpl;
import org.dubhe.k8s.api.impl.JupyterResourceApiImpl;
import org.dubhe.k8s.api.impl.LimitRangeApiImpl;
import org.dubhe.k8s.api.impl.LogMonitoringApiImpl;
import org.dubhe.k8s.api.impl.MetricsApiImpl;
import org.dubhe.k8s.api.impl.ModelOptJobApiImpl;
import org.dubhe.k8s.api.impl.ModelServingApiImpl;
import org.dubhe.k8s.api.impl.NamespaceApiImpl;
import org.dubhe.k8s.api.impl.NativeResourceApiImpl;
import org.dubhe.k8s.api.impl.NodeApiImpl;
import org.dubhe.k8s.api.impl.PersistentVolumeClaimApiImpl;
import org.dubhe.k8s.api.impl.PodApiImpl;
import org.dubhe.k8s.api.impl.ResourceQuotaApiImpl;
import org.dubhe.k8s.api.impl.TerminalApiImpl;
import org.dubhe.k8s.api.impl.TrainJobApiImpl;
import org.dubhe.k8s.cache.ResourceCache;
import org.dubhe.k8s.properties.ClusterProperties;
import org.dubhe.k8s.utils.K8sUtils;
@@ -37,6 +67,7 @@ import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.socket.server.standard.ServerEndpointExporter;

import java.io.IOException;

@@ -166,7 +197,6 @@ public class K8sConfig {
public RequestConfig.Builder customizeRequestConfig(RequestConfig.Builder builder) {
builder.setSocketTimeout(TEN_THOUSAND);
return builder;

}
}));
}
@@ -175,4 +205,16 @@ public class K8sConfig {
public ModelServingApi modelServingApi(K8sUtils k8sUtils){
return new ModelServingApiImpl(k8sUtils);
}

@Bean
public ServerEndpointExporter handlerAdapter() {
return new ServerEndpointExporter();
}


@Bean
public TerminalApi terminalApi(K8sUtils k8sUtils){
return new TerminalApiImpl(k8sUtils);
}

}

+ 4
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/constant/K8sLabelConstants.java View File

@@ -38,6 +38,10 @@ public class K8sLabelConstants {
* 业务标签,用于标识业务,由业务层传入
*/
public final static String BASE_TAG_BUSINESS = "platform/business";
/**
* 任务身份标签,用于标识任务身份,由业务层传入
*/
public final static String BASE_TAG_TASK_IDENTIFY = "platform/task-identify";
/**
* 运行环境标签,用于对不同环境回调进行分流
*/


+ 5
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/constant/K8sParamConstants.java View File

@@ -94,4 +94,9 @@ public class K8sParamConstants {
*/
public static final String RESOURCE_QUOTA_GPU_LIMITS_KEY = "requests.nvidia.com/gpu";

//pod containerID 前缀
public static final String CONTAINER_ID_PREFIX = "docker://";

public static final String WAITING_REASON_CONTAINER_CREATING = "ContainerCreating";

}

+ 9
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/BuildServiceBO.java View File

@@ -37,6 +37,7 @@ public class BuildServiceBO {
private Map<String, String> labels;
private Map<String, String> selector;
private List<ServicePort> ports;
private String type;

public BuildServiceBO(String namespace, String name, Map<String, String> labels, Map<String, String> selector){
this.namespace = namespace;
@@ -45,6 +46,14 @@ public class BuildServiceBO {
this.selector = selector;
}

public BuildServiceBO(String namespace, String name, Map<String, String> labels, Map<String, String> selector,String type){
this.namespace = namespace;
this.name = name;
this.labels = labels;
this.selector = selector;
this.type = type;
}

/**
* 添加端口
* @param port


+ 168
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DeploymentBO.java View File

@@ -0,0 +1,168 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.k8s.domain.bo;

import cn.hutool.core.collection.CollectionUtil;
import lombok.Data;
import lombok.experimental.Accessors;
import org.dubhe.biz.base.constant.MagicNumConstant;
import org.dubhe.biz.base.utils.StringUtils;
import org.dubhe.k8s.annotation.K8sValidation;
import org.dubhe.k8s.enums.ValidationTypeEnum;
import org.springframework.util.CollectionUtils;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/**
* @description deployment BO
* @date 2021-06-30
*/
@Data
@Accessors(chain = true)
public class DeploymentBO {
/**
* 命名空间
**/
@K8sValidation(ValidationTypeEnum.K8S_RESOURCE_NAME)
private String namespace;
/**
* 资源名称
**/
@K8sValidation(ValidationTypeEnum.K8S_RESOURCE_NAME)
private String resourceName;
/**
* Number of desired pods
*/
private Integer replicas;
/**
* GPU数量
**/
private Integer gpuNum;
/**
* 内存数量 单位Mi
**/
private Integer memNum;
/**
* CPU数量
**/
private Integer cpuNum;
/**
* 镜像名称
**/
private String image;
/**
* 执行命令
**/
private List<String> cmdLines;
/**
* 文件存储服务挂载 key:pod内挂载路径 value:文件存储路径及配置
**/
private Map<String, PtMountDirBO> fsMounts;
/**
* 业务标签,用于标识业务模块
**/
@K8sValidation(ValidationTypeEnum.K8S_RESOURCE_NAME)
private String businessLabel;
/**
* 任务身份标签,用于标识任务身份
**/
private String taskIdentifyLabel;
/**
* 端口
*/
private Set<Integer> ports;

/**
* 获取nfs路径
* @return
*/
public List<String> getDirList(){
if (CollectionUtil.isNotEmpty(fsMounts)){
return fsMounts.values().stream().map(PtMountDirBO::getDir).collect(Collectors.toList());
}
return new ArrayList<>();
}

/**
* 设置nfs挂载
* @param mountPath 容器内路径
* @param dir nfs路径
* @return
*/
public DeploymentBO putfsMounts(String mountPath, String dir){
if (StringUtils.isNotEmpty(mountPath) && StringUtils.isNotEmpty(dir)){
if (fsMounts == null){
fsMounts = new HashMap<>(MagicNumConstant.TWO);
}
fsMounts.put(mountPath,new PtMountDirBO(dir));
}
return this;
}

/**
* 设置nfs挂载
* @param mountPath 容器内路径
* @param dir nfs路径及配置
* @return
*/
public DeploymentBO putfsMounts(String mountPath, PtMountDirBO dir){
if (StringUtils.isNotEmpty(mountPath) && dir != null){
if (fsMounts == null){
fsMounts = new HashMap<>(MagicNumConstant.TWO);
}
fsMounts.put(mountPath,dir);
}
return this;
}

/**
* 添加端口
*
* @param port
*/
public void addPort(Integer port){
if (port == null){
return;
}
if (ports == null){
ports = new HashSet<>();
}
ports.add(port);
}

/**
* 添加端口
*
* @param ports
*/
public void addPorts(Set<Integer> ports){
if (CollectionUtils.isEmpty(ports)){
return;
}
if (ports == null){
ports = new HashSet<>();
}
ports.addAll(ports);
}
}

+ 4
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DistributeTrainBO.java View File

@@ -84,6 +84,10 @@ public class DistributeTrainBO {
* 业务标签,用于标识业务模块
**/
private String businessLabel;
/**
* 任务身份标签,用于标识任务身份
*/
private String taskIdentifyLabel;
/**
* 延时创建时间,单位:分钟
***/


+ 4
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/ModelServingBO.java View File

@@ -81,6 +81,10 @@ public class ModelServingBO {
**/
@K8sValidation(ValidationTypeEnum.K8S_RESOURCE_NAME)
private String businessLabel;
/**
* 任务身份标签,用于标识任务身份
**/
private String taskIdentifyLabel;
/**
* http服务端口,null则不开放http服务
*/


+ 132
- 26
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PrometheusMetricBO.java View File

@@ -17,15 +17,18 @@

package org.dubhe.k8s.domain.bo;

import cn.hutool.core.util.NumberUtil;
import lombok.Data;
import org.dubhe.biz.base.constant.MagicNumConstant;
import org.dubhe.biz.base.functional.StringFormat;
import org.dubhe.k8s.domain.vo.GpuUsageVO;
import org.dubhe.k8s.domain.vo.MetricsDataResultVO;
import org.dubhe.k8s.domain.vo.GpuTotalMemResultVO;
import org.dubhe.k8s.domain.vo.MetricsDataResultValueVO;
import org.springframework.util.CollectionUtils;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* @description Gpu 指标 BO
@@ -35,34 +38,70 @@ import java.util.List;
public class PrometheusMetricBO {
private String status;
private MetricData data;

/**
* 获取Gpu 使用率
* @return List<GpuUsageVO> gpu使用列表
* @return Map<String, Float> gpu使用率列表
*/
public Map<String, Float> getGpuUsage() {
Map<String, Float> gpuUsageMap = new HashMap<>();
if (data == null || CollectionUtils.isEmpty(data.getResult())) {
return gpuUsageMap;
}
for (MetricResult result : data.getResult()) {
gpuUsageMap.put(result.getMetric().getAcc_id(), Float.valueOf(result.getValue().get(1).toString()));
}
return gpuUsageMap;
}

/**
* 获取GPU显存使用量
* @return Map<String, String> gpu使用量列表
*/
public Map<String, String> getGpuMemValue() {
Map<String, String> gpuMemValueMap = new HashMap<>();
if (data == null || CollectionUtils.isEmpty(data.getResult())) {
return gpuMemValueMap;
}
StringFormat memMetricsFormat = (value) -> {
return NumberUtil.isNumber(String.valueOf(value)) ? String.valueOf(Long.valueOf(String.valueOf(value)) / MagicNumConstant.BINARY_TEN_EXP) : String.valueOf(MagicNumConstant.ZERO);
};
for (MetricResult result : data.getResult()) {
gpuMemValueMap.put(result.getMetric().getAcc_id(), memMetricsFormat.format(result.getValue().get(1).toString()));
}
return gpuMemValueMap;
}

/**
* 获取GPU显存总大小
* @return List<GpuTotalMemResultVO> GPU显存总大小列表
*/
public List<GpuUsageVO> getGpuUsage(){
List<GpuUsageVO> gpuUsageVOList = new ArrayList<>();
if (data == null || CollectionUtils.isEmpty(data.getResult())){
return gpuUsageVOList;
public List<GpuTotalMemResultVO> getGpuTotalMemValue() {
List<GpuTotalMemResultVO> gpuTotalMemValueVOList = new ArrayList<>();
if (data == null || CollectionUtils.isEmpty(data.getResult())) {
return gpuTotalMemValueVOList;
}
for (MetricResult result : data.getResult()){
gpuUsageVOList.add(new GpuUsageVO(result.getMetric().getAcc_id(),Float.valueOf(result.getValue().get(1).toString())));
StringFormat memMetricsFormat = (value) -> {
return NumberUtil.isNumber(String.valueOf(value)) ? String.valueOf(Long.valueOf(String.valueOf(value)) / MagicNumConstant.BINARY_TEN_EXP) : String.valueOf(MagicNumConstant.ZERO);
};
for (MetricResult result : data.getResult()) {
gpuTotalMemValueVOList.add(new GpuTotalMemResultVO(result.getMetric().getAcc_id(), memMetricsFormat.format(result.getValue().get(1).toString())));
}
return gpuUsageVOList;
return gpuTotalMemValueVOList;
}


/**
* 获取value 列表
* @return List<MetricsDataResultValueVO> 监控指标列表
*/
public List<MetricsDataResultValueVO> getValues(StringFormat stringFormat){
public List<MetricsDataResultValueVO> getValues(StringFormat stringFormat) {
List<MetricsDataResultValueVO> list = new ArrayList<>();
if (data == null || CollectionUtils.isEmpty(data.getResult())){
if (data == null || CollectionUtils.isEmpty(data.getResult())) {
return list;
}
for (MetricResult result : data.getResult()){
result.getValues().forEach(obj->{
list.add(new MetricsDataResultValueVO(obj.get(0).toString(),stringFormat.format(obj.get(1).toString())));
for (MetricResult result : data.getResult()) {
result.getValues().forEach(obj -> {
list.add(new MetricsDataResultValueVO(obj.get(0).toString(), stringFormat.format(obj.get(1).toString())));
});
}
return list;
@@ -72,31 +111,98 @@ public class PrometheusMetricBO {
* 获取value 列表
* @return List<MetricsDataResultValueVO> 监控指标列表
*/
public List<MetricsDataResultValueVO> getValues(MetricResult metricResult){
public List<MetricsDataResultValueVO> getValues(MetricResult metricResult) {
List<MetricsDataResultValueVO> list = new ArrayList<>();
if (metricResult == null || CollectionUtils.isEmpty(metricResult.getValues())){
if (metricResult == null || CollectionUtils.isEmpty(metricResult.getValues())) {
return list;
}
metricResult.getValues().forEach(obj->{
list.add(new MetricsDataResultValueVO(obj.get(0).toString(),obj.get(1).toString()));
metricResult.getValues().forEach(obj -> {
list.add(new MetricsDataResultValueVO(obj.get(0).toString(), obj.get(1).toString()));
});
return list;
}

/**
* 获取 result列表
* 获取 GPU使用率result列表
* @return List<MetricsDataResultVO> 监控指标列表
*/
public List<MetricsDataResultVO> getResults(){
List<MetricsDataResultVO> list = new ArrayList<>();
if (data == null || CollectionUtils.isEmpty(data.getResult())){
public Map<String, List<MetricsDataResultValueVO>> getGpuMetricsResults() {
Map<String, List<MetricsDataResultValueVO>> map = new HashMap<>();
if (data == null || CollectionUtils.isEmpty(data.getResult())) {
return map;
}
for (MetricResult result : data.getResult()) {
map.put(result.getMetric().getAcc_id(), getValues(result));
}
return map;
}

/**
* 获取value 列表
* @return List<MetricsDataResultValueVO> 监控指标列表
*/
public List<MetricsDataResultValueVO> getFormatValues(MetricResult metricResult, StringFormat stringFormat) {
List<MetricsDataResultValueVO> list = new ArrayList<>();
if (metricResult == null || CollectionUtils.isEmpty(metricResult.getValues())) {
return list;
}
for (MetricResult result : data.getResult()){
list.add(new MetricsDataResultVO(result.getMetric().getAcc_id(),getValues(result)));
metricResult.getValues().forEach(obj -> {
list.add(new MetricsDataResultValueVO(obj.get(0).toString(), stringFormat.format(obj.get(1).toString())));
});
return list;
}

/**
* 获取 GPU显存使用量result列表
* @return Map<String, List < MetricsDataResultValueVO>> 监控指标列表
*/
public Map<String, List<MetricsDataResultValueVO>> getGpuMemResults() {
Map<String, List<MetricsDataResultValueVO>> map = new HashMap<>();
if (data == null || CollectionUtils.isEmpty(data.getResult())) {
return map;
}
StringFormat memMetricsFormat = (value) -> {
return NumberUtil.isNumber(String.valueOf(value)) ? String.valueOf(Long.valueOf(String.valueOf(value)) / MagicNumConstant.BINARY_TEN_EXP) : String.valueOf(MagicNumConstant.ZERO);
};
for (MetricResult result : data.getResult()) {
map.put(result.getMetric().getAcc_id(), getFormatValues(result, memMetricsFormat));
}
return map;
}

/**
* 获取value 列表
* @return List<MetricsDataResultValueVO> 监控指标列表
*/
public String getGpuTotalValues(MetricResult metricResult, StringFormat stringFormat) {
List<String> strings = new ArrayList<>();
if (metricResult == null || CollectionUtils.isEmpty(metricResult.getValues())) {
return "";
}
metricResult.getValues().forEach(obj -> {
strings.add(stringFormat.format(obj.get(1).toString()));
});
return strings.get(0);
}

/**
* 获取 GPU显存总量result列表
* @return List<MetricsDataResultVO> 监控指标列表
*/
public List<GpuTotalMemResultVO> getGpuTotalMemResults() {
List<GpuTotalMemResultVO> list = new ArrayList<>();
if (data == null || CollectionUtils.isEmpty(data.getResult())) {
return list;
}
StringFormat memMetricsFormat = (value) -> {
return NumberUtil.isNumber(String.valueOf(value)) ? String.valueOf(Long.valueOf(String.valueOf(value)) / MagicNumConstant.BINARY_TEN_EXP) : String.valueOf(MagicNumConstant.ZERO);
};
for (MetricResult result : data.getResult()) {
list.add(new GpuTotalMemResultVO(result.getMetric().getAcc_id(), getGpuTotalValues(result, memMetricsFormat)));
}
return list;
}

}

@Data


+ 4
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtDeploymentBO.java View File

@@ -57,4 +57,8 @@ public class PtDeploymentBO {
* 业务标签,用于标识业务模块
**/
private String businessLabel;
/**
* 任务身份标签,用于标识任务身份
**/
private String taskIdentifyLabel;
}

+ 4
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJobBO.java View File

@@ -57,4 +57,8 @@ public class PtJobBO {
* 业务标签,用于标识业务模块
**/
private String businessLabel;
/**
* 任务身份标签,用于标识任务身份
**/
private String taskIdentifyLabel;
}

+ 6
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterJobBO.java View File

@@ -65,10 +65,16 @@ public class PtJupyterJobBO {
private GraphicsCardTypeEnum graphicsCardType;
/**业务标签,用于标识业务模块**/
private String businessLabel;
/**任务身份标签,用于标识任务身份**/
private String taskIdentifyLabel;
/**延时创建时间,单位:分钟**/
private Integer delayCreateTime;
/**定时删除时间,相对于实际创建时间,单位:分钟**/
private Integer delayDeleteTime;
/**pip包路径**/
private String pipSitePackagePath;
/**pip包挂载路径**/
private String pipSitePackageMountPath;


public List<String> getDirList(){


+ 12
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterResourceBO.java View File

@@ -97,8 +97,20 @@ public class PtJupyterResourceBO {
* 业务标签,用于标识业务模块
**/
private String businessLabel;
/**
* 任务身份标签,用于标识任务唯一身份
**/
private String taskIdentifyLabel;
/**
* 定时删除时间,单位:分钟
**/
private Integer delayDeleteTime;
/**
* pip包路径
*/
private String pipSitePackageDir;
/**
* k8s内pip包路径
*/
private String pipSitePackageMountPath;
}

+ 20
- 2
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtMountDirBO.java View File

@@ -33,9 +33,9 @@ public class PtMountDirBO {
private String dir;
/**是否只读 ture:是 false:否**/
private boolean readOnly;
/**是否回收 true:创建pv、pvc进行挂载,删除时同时删除数据 false:直接挂载**/
/**是否回收 true:创建pv、pvc进行挂载,删除时同时删除数据 false且request和limit均为空:直接挂载**/
private boolean recycle;
/**存储配额 示例:500Mi 仅在pvc=true时生效**/
/**存储配额 示例:500Mi* 仅在pvc=true时生效*/
private String request;
/**存储限额 示例:500Mi 仅在pvc=true时生效**/
private String limit;
@@ -48,4 +48,22 @@ public class PtMountDirBO {
this.dir = dir;
this.request = request;
}

public PtMountDirBO(String dir, boolean readOnly){
this.dir = dir;
this.readOnly = readOnly;
}

public PtMountDirBO(String dir, String request,boolean readOnly){
this.dir = dir;
this.request = request;
this.readOnly = readOnly;
}

public PtMountDirBO(String dir, String request, String limit,boolean readOnly){
this.dir = dir;
this.request = request;
this.limit = limit;
this.readOnly = readOnly;
}
}

+ 11
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtPersistentVolumeClaimBO.java View File

@@ -23,6 +23,7 @@ import org.dubhe.k8s.domain.resource.BizQuantity;
import lombok.Data;
import lombok.experimental.Accessors;
import org.dubhe.k8s.enums.AccessModeEnum;
import org.dubhe.k8s.enums.PvReclaimPolicyEnum;

import java.util.HashMap;
import java.util.HashSet;
@@ -79,6 +80,11 @@ public class PtPersistentVolumeClaimBO {
**/
private String path;

/**
* 回收策略
*/
private String reclaimPolicy;

public PtPersistentVolumeClaimBO() {

}
@@ -107,5 +113,10 @@ public class PtPersistentVolumeClaimBO {
add(AccessModeEnum.READ_WRITE_ONCE.getType());
}};
this.setPvcName(resourceName+"-"+RandomUtil.randomString(MagicNumConstant.FIVE));
if (bo.isRecycle()){
this.reclaimPolicy = PvReclaimPolicyEnum.RECYCLE.getPolicy();
}else {
this.reclaimPolicy = PvReclaimPolicyEnum.RETAIN.getPolicy();
}
}
}

+ 1
- 1
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtResourceQuotaBO.java View File

@@ -57,7 +57,7 @@ public class PtResourceQuotaBO {
}

/**
* 添加cpu 限制
* 添加memory限制
* @param amount 值
* @param format 单位
*/


+ 14
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/TerminalBO.java View File

@@ -0,0 +1,14 @@
package org.dubhe.k8s.domain.bo;

import lombok.Data;
import lombok.experimental.Accessors;

/**
* @description 专业版终端 BO
* @date 2021-06-30
*/
@Data
@Accessors(chain = true)
public class TerminalBO extends DeploymentBO{

}

+ 3
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizContainerStatus.java View File

@@ -38,4 +38,7 @@ public class BizContainerStatus {
*/
@K8sField("state:waiting")
private BizContainerStateWaiting waiting;

@K8sField("containerID")
private String containerID;
}

+ 8
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizIntOrString.java View File

@@ -34,4 +34,12 @@ public class BizIntOrString {
private Integer Kind;
@K8sField("StrVal")
private String StrVal;

public boolean equals(Integer value){
return IntVal != null && IntVal.equals(value);
}

public boolean equals(String value){
return StrVal != null && StrVal.equals(value);
}
}

+ 45
- 2
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizPod.java View File

@@ -18,12 +18,18 @@
package org.dubhe.k8s.domain.resource;

import cn.hutool.core.util.StrUtil;
import com.alibaba.fastjson.JSON;
import org.dubhe.biz.base.constant.MagicNumConstant;
import org.dubhe.biz.base.utils.StringUtils;
import org.dubhe.k8s.annotation.K8sField;
import org.dubhe.k8s.constant.K8sParamConstants;
import org.dubhe.k8s.domain.PtBaseResult;
import com.google.common.collect.Maps;
import lombok.Data;
import lombok.experimental.Accessors;
import org.dubhe.k8s.constant.K8sLabelConstants;
import org.dubhe.k8s.enums.PodPhaseEnum;
import org.springframework.util.CollectionUtils;

import java.util.List;
import java.util.Map;
@@ -59,6 +65,8 @@ public class BizPod extends PtBaseResult<BizPod> {
private String podIp;
@K8sField("spec:volumes")
private List<BizVolume> volumes;
@K8sField("status:hostIP")
private String hostIP;

/**
* Pending:待处理
@@ -90,10 +98,20 @@ public class BizPod extends PtBaseResult<BizPod> {
*/
private String completedTime;

/**
* 获取业务标签
*/
public String getBusinessLabel() {
return labels.get(K8sLabelConstants.BASE_TAG_BUSINESS);
}

/**
* 获取任务身份标识
*/
public String getTaskIdentifyLabel() {
return labels.get(K8sLabelConstants.BASE_TAG_TASK_IDENTIFY);
}

/**
* 根据键获取label
*
@@ -114,15 +132,40 @@ public class BizPod extends PtBaseResult<BizPod> {
if (containerStatuses == null) {
return null;
}
containerStatuses.stream().map(obj -> {
containerStatuses.forEach(obj -> {
if (obj.getTerminated() != null) {
messages.append(StrUtil.format(CONTAINER_STATE_MESSAGE, name, phase, obj.getTerminated().getReason(), obj.getTerminated().getMessage()));
}
if (obj.getWaiting() != null) {
messages.append(StrUtil.format(CONTAINER_STATE_MESSAGE, name, phase, obj.getWaiting().getReason(), obj.getWaiting().getMessage()));
}
return null;
});
return messages.toString();
}

//获取 容器镜像id
public String getContainerId(){
String containerID = null;
if (!CollectionUtils.isEmpty(containerStatuses)){
for (BizContainerStatus bizContainerStatus : containerStatuses){
if (StringUtils.isNotEmpty(bizContainerStatus.getContainerID())){
containerID = bizContainerStatus.getContainerID();
}
}
}
if (StringUtils.isNotEmpty(containerID)){
return containerID.replace(K8sParamConstants.CONTAINER_ID_PREFIX,"");
}
return containerID;
}

public String getRealPodPhase(){
if (PodPhaseEnum.RUNNING.getPhase().equals(phase) && !CollectionUtils.isEmpty(containerStatuses) && containerStatuses.get(MagicNumConstant.ZERO).getWaiting() != null){
String waitingReason = containerStatuses.get(MagicNumConstant.ZERO).getWaiting().getReason();
if(waitingReason != null && !K8sParamConstants.WAITING_REASON_CONTAINER_CREATING.equals(waitingReason)){
return PodPhaseEnum.FAILED.getPhase();
}
}
return phase;
}
}

+ 21
- 9
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizQuantity.java View File

@@ -20,7 +20,10 @@ package org.dubhe.k8s.domain.resource;
import lombok.Data;
import lombok.experimental.Accessors;
import org.dubhe.biz.base.utils.MathUtils;
import org.dubhe.biz.base.utils.StringUtils;
import org.dubhe.k8s.annotation.K8sField;
import org.dubhe.k8s.constant.K8sParamConstants;
import org.dubhe.k8s.utils.UnitConvertUtils;

/**
* @description BizQuantity实体类
@@ -43,19 +46,28 @@ public class BizQuantity {
this.format = format;
}

public boolean isIllegal() {
return true;
}

/**
* 单位相同时相减
* 不同单位相减
*
* @param bizQuantity 减数
* @return
* @param limitsKey 类型
* @return BizQuantity
*/
public BizQuantity reduce(BizQuantity bizQuantity){
if (bizQuantity == null || !bizQuantity.getFormat().equals(format)){
public BizQuantity reduce(BizQuantity bizQuantity,String limitsKey){
if (bizQuantity == null || StringUtils.isAllEmpty(limitsKey)){
return this;
}
return new BizQuantity(MathUtils.reduce(amount,bizQuantity.getAmount()),format);
switch (limitsKey){
case K8sParamConstants.RESOURCE_QUOTA_CPU_LIMITS_KEY :
Long cpuDiff = UnitConvertUtils.cpuFormatToN(amount,format) - UnitConvertUtils.cpuFormatToN(bizQuantity.getAmount(),bizQuantity.getFormat());
return new BizQuantity(String.valueOf(cpuDiff),K8sParamConstants.CPU_UNIT_N);
case K8sParamConstants.RESOURCE_QUOTA_MEMORY_LIMITS_KEY :
Long memDiff = UnitConvertUtils.memFormatToMi(amount,format) - UnitConvertUtils.memFormatToMi(bizQuantity.getAmount(),bizQuantity.getFormat());
return new BizQuantity(String.valueOf(memDiff),K8sParamConstants.MEM_UNIT);
case K8sParamConstants.RESOURCE_QUOTA_GPU_LIMITS_KEY :
return new BizQuantity(MathUtils.reduce(amount,bizQuantity.getAmount()),format);
default:
return this;
}
}
}

+ 1
- 1
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizResourceQuota.java View File

@@ -68,7 +68,7 @@ public class BizResourceQuota extends PtBaseResult<BizResourceQuota> {
if (!CollectionUtils.isEmpty(hard)){
for (Map.Entry<String, BizQuantity> entry : hard.entrySet()) {
if (used.get(entry.getKey()) != null){
remainder.put(entry.getKey(),entry.getValue().reduce(used.get(entry.getKey())));
remainder.put(entry.getKey(),entry.getValue().reduce(used.get(entry.getKey()),entry.getKey()));
}
}
}


+ 20
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizService.java View File

@@ -20,6 +20,9 @@ package org.dubhe.k8s.domain.resource;
import lombok.Data;
import lombok.experimental.Accessors;
import org.dubhe.k8s.annotation.K8sField;
import org.springframework.util.CollectionUtils;

import java.util.List;

/**
* @description Kubernetes Service
@@ -30,8 +33,25 @@ import org.dubhe.k8s.annotation.K8sField;
public class BizService {
@K8sField("metadata:name")
private String name;

@K8sField("metadata:namespace")
private String namespace;

@K8sField("metadata:uid")
private String uid;

@K8sField("spec:ports")
private List<BizServicePort> ports;

public BizServicePort getServicePortByTargetPort(Integer targetPort){
if (CollectionUtils.isEmpty(ports) || targetPort == null){
return null;
}
for (BizServicePort port : ports) {
if (port.getTargetPort() != null && port.getTargetPort().equals(targetPort)){
return port;
}
}
return null;
}
}

+ 46
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizServicePort.java View File

@@ -0,0 +1,46 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.k8s.domain.resource;

import lombok.Data;
import lombok.experimental.Accessors;
import org.dubhe.k8s.annotation.K8sField;

/**
* @description Kubernetes ServicePort
* @date 2020-09-09
*/
@Data
@Accessors(chain = true)
public class BizServicePort {

@K8sField("name")
private String name;

@K8sField("nodePort")
private Integer nodePort;

@K8sField("port")
private Integer port;

@K8sField("protocol")
private String protocol;

@K8sField("targetPort")
private BizIntOrString targetPort;
}

+ 54
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuMetricsDataResultVO.java View File

@@ -0,0 +1,54 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/
package org.dubhe.k8s.domain.vo;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;

import java.util.List;

/**
* @description GPU监控数据
* @date 2021-07-22
*/
@Data
@NoArgsConstructor
@AllArgsConstructor
@Accessors(chain = true)
public class GpuMetricsDataResultVO {
/**
* 显卡编号
*/
private String accId;

/**
* GPU显存总大小
*/
private String totalMemValues;

/**
* GPU使用率监控指标值
*/
List<MetricsDataResultValueVO> gpuMetricsValues;

/**
* GPU显存使用量监控指标值
*/
List<MetricsDataResultValueVO> gpuMemValues;
}

+ 44
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuTotalMemResultVO.java View File

@@ -0,0 +1,44 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/
package org.dubhe.k8s.domain.vo;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;

/**
* @description GPU显存总量result
* @date 2021-07-22
*/
@Data
@NoArgsConstructor
@AllArgsConstructor
@Accessors(chain = true)
public class GpuTotalMemResultVO {

/**
* 显卡编号
*/
private String accId;

/**
* GPU显存总大小
*/
private String gpuTotalMemValue;

}

+ 52
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuValueVO.java View File

@@ -0,0 +1,52 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/
package org.dubhe.k8s.domain.vo;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;

/**
* @description GPU实时监控数据
* @date 2021-07-23
*/
@Data
@NoArgsConstructor
@AllArgsConstructor
@Accessors(chain = true)
public class GpuValueVO {
/**
* 显卡id
*/
private String accId;

/**
* 使用率 百分比
*/
Float usage;

/**
* GPU显存总大小
*/
private String gpuTotalMemValue;

/**
* GPU显存使用量
*/
String gpuMemValue;
}

+ 2
- 2
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/PodRangeMetricsVO.java View File

@@ -40,9 +40,9 @@ public class PodRangeMetricsVO {
*/
List<MetricsDataResultValueVO> cpuMetrics;
/**
* gpu 监控指标 value为使用百分比
* gpu 监控指标
*/
List<MetricsDataResultVO> gpuMetrics;
List<GpuMetricsDataResultVO> gpuMetrics;
/**
* 内存 监控指标 value为占用内存 单位 Ki
*/


+ 2
- 10
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/PtPodsVO.java View File

@@ -24,10 +24,8 @@ import org.dubhe.biz.base.constant.MagicNumConstant;
import org.dubhe.biz.base.utils.MathUtils;
import org.dubhe.biz.base.utils.StringUtils;
import org.dubhe.k8s.utils.UnitConvertUtils;
import org.springframework.util.CollectionUtils;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

/**
@@ -98,9 +96,9 @@ public class PtPodsVO implements Serializable {
**/
private String gpuUsed;
/**
* gpu使用百分比
* gpu实时监控数据
*/
private List<GpuUsageVO> gpuUsagePersent;
private List<GpuValueVO> gpuUsagePersent;

public PtPodsVO(String namespace,String podName,String cpuRequestAmount,String cpuUsageAmount,String cpuRequestFormat,String cpuUsageFormat,String memoryRequestAmount,String memoryUsageAmount,String memoryRequestFormat,String memoryUsageFormat,String nodeName,String status,String gpuUsed){
this.namespace = namespace;
@@ -142,10 +140,4 @@ public class PtPodsVO implements Serializable {
}
}

public void addGpuUsage(String accId,Float usage){
if (CollectionUtils.isEmpty(gpuUsagePersent)){
gpuUsagePersent = new ArrayList<>();
}
gpuUsagePersent.add(new GpuUsageVO(accId,usage));
}
}

+ 42
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/TerminalResourceVO.java View File

@@ -0,0 +1,42 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.k8s.domain.vo;

import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;
import org.dubhe.k8s.domain.PtBaseResult;
import org.dubhe.k8s.domain.resource.BizDeployment;
import org.dubhe.k8s.domain.resource.BizService;

/**
* @description 专业版终端 VO
* @date 2021-06-30
*/
@Data
@NoArgsConstructor
@Accessors(chain = true)
public class TerminalResourceVO extends PtBaseResult<TerminalResourceVO> {
private BizDeployment bizDeployment;
private BizService bizService;

public TerminalResourceVO(BizDeployment bizDeployment, BizService bizService){
this.bizDeployment = bizDeployment;
this.bizService = bizService;
}
}

+ 20
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/VolumeVO.java View File

@@ -17,10 +17,15 @@

package org.dubhe.k8s.domain.vo;

import io.fabric8.kubernetes.api.model.EmptyDirVolumeSource;
import io.fabric8.kubernetes.api.model.Quantity;
import io.fabric8.kubernetes.api.model.Volume;
import io.fabric8.kubernetes.api.model.VolumeBuilder;
import io.fabric8.kubernetes.api.model.VolumeMount;
import io.fabric8.kubernetes.api.model.VolumeMountBuilder;
import lombok.Data;
import lombok.experimental.Accessors;
import org.dubhe.k8s.constant.K8sParamConstants;
import org.dubhe.k8s.domain.PtBaseResult;

import java.util.ArrayList;
@@ -57,4 +62,19 @@ public class VolumeVO extends PtBaseResult<VolumeVO> {
}
volumes.add(volume);
}

/**
* 添加shm
*/
public void addShmFsVolume(Quantity shmMemory){
addVolumeMount(new VolumeMountBuilder()
.withName(K8sParamConstants.SHM_NAME)
.withMountPath(K8sParamConstants.SHM_MOUNTPATH)
.build());

addVolume(new VolumeBuilder()
.withName(K8sParamConstants.SHM_NAME)
.withEmptyDir(new EmptyDirVolumeSource(K8sParamConstants.SHM_MEDIUM, shmMemory))
.build());
}
}

+ 4
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/BusinessLabelServiceNameEnum.java View File

@@ -48,6 +48,10 @@ public enum BusinessLabelServiceNameEnum {
* 批量服务
*/
BATCH_SERVING(BizEnum.BATCH_SERVING.getBizCode(), ApplicationNameConst.SERVER_SERVING),
/**
* 专业版终端
*/
TERMINAL(BizEnum.TERMINAL.getBizCode(), ApplicationNameConst.TERMINAL),
;
/**
* 业务标签


+ 40
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/ServiceTypeENum.java View File

@@ -0,0 +1,40 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/

package org.dubhe.k8s.enums;

import lombok.Getter;

/**
* @description service 类型
* @date 2021-07-26
*/
@Getter
public enum ServiceTypeENum {
CLUSTER_IP("ClusterIP"),
NODE_PORT("NodePort"),
LOAD_BALANCER("LoadBalancer"),
;

private String type;

ServiceTypeENum(String type) {
this.type = type;
}


}

+ 42
- 0
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/WebsocketTopicEnum.java View File

@@ -0,0 +1,42 @@
/**
* Copyright 2020 Tianshu AI Platform. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================
*/
package org.dubhe.k8s.enums;


/**
* @description Websocket tocpic 枚举
* @date 2021-7-20
*/
public enum WebsocketTopicEnum {

/**
* 资源监控 topic
*/
RESOURCE_MONITOR("resourceMonitor"),
;

private String topic;

WebsocketTopicEnum(String topic) {
this.topic = topic;
}

public String getTopic() {
return topic;
}

}

+ 1
- 1
dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/K8sUtils.java View File

@@ -150,7 +150,7 @@ public class K8sUtils implements ApplicationContextAware {
* @param gpuNum
* @return
*/
public Map<String, String> gpuSelector(Integer gpuNum) {
public static Map<String, String> gpuSelector(Integer gpuNum) {
Map<String, String> gpuSelector = new HashMap<>(2);
if (gpuNum != null && gpuNum > 0) {
gpuSelector.put(K8sLabelConstants.NODE_GPU_LABEL_KEY, K8sLabelConstants.NODE_GPU_LABEL_VALUE);


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save