diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/ResourceQuotaClient.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/ResourceQuotaClient.java new file mode 100644 index 0000000..0aca0d7 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/ResourceQuotaClient.java @@ -0,0 +1,41 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.client; + +import org.dubhe.admin.client.fallback.ResourceQuotaClientFallback; +import org.dubhe.admin.domain.dto.UserConfigDTO; +import org.dubhe.biz.base.constant.ApplicationNameConst; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.springframework.cloud.openfeign.FeignClient; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; + +/** + * @description 远程调用资源配额 Client + * @date 2021-7-21 + */ +@FeignClient(value = ApplicationNameConst.SERVER_K8S,fallback = ResourceQuotaClientFallback.class) +public interface ResourceQuotaClient { + /** + * 更新 ResourceQuota + * + * @param userConfigDTO 用户配置信息 + * @return + */ + @PostMapping(value = "/resourceQuota/update") + DataResponseBody updateResourceQuota(@RequestBody UserConfigDTO userConfigDTO); +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/ResourceQuotaClientFallback.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/ResourceQuotaClientFallback.java new file mode 100644 index 0000000..3adf93f --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/client/fallback/ResourceQuotaClientFallback.java @@ -0,0 +1,33 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.client.fallback; + +import org.dubhe.admin.client.ResourceQuotaClient; +import org.dubhe.admin.domain.dto.UserConfigDTO; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.dataresponse.factory.DataResponseFactory; + +/** + * @description ResourceQuotaClient 熔断处理 + * @date 2021-7-21 + */ +public class ResourceQuotaClientFallback implements ResourceQuotaClient { + @Override + public DataResponseBody updateResourceQuota(UserConfigDTO userConfigDTO) { + return DataResponseFactory.failed("Call ResourceQuota server updateResourceQuota error"); + } +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserConfigMapper.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserConfigMapper.java new file mode 100644 index 0000000..270d4c2 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/dao/UserConfigMapper.java @@ -0,0 +1,36 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.dao; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import org.apache.ibatis.annotations.Select; +import org.dubhe.admin.domain.entity.UserConfig; +import java.util.List; +import org.apache.ibatis.annotations.Param; + +/** + * @description 用户配置 Mapper + * @date 2021-6-30 + */ +public interface UserConfigMapper extends BaseMapper { + + /** + * 插入或更新配置 + * @param userConfig 用户配置 + */ + Long insertOrUpdate(UserConfig userConfig); +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserConfigDTO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserConfigDTO.java new file mode 100644 index 0000000..95a3ef0 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/dto/UserConfigDTO.java @@ -0,0 +1,53 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.dto; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import org.hibernate.validator.constraints.Length; + +import javax.validation.constraints.NotNull; +import java.io.Serializable; + +/** + * @description 用户配置DTO + * @date 2021-7-1 + */ +@Data +public class UserConfigDTO implements Serializable { + private static final long serialVersionUID = 1L; + + @NotNull(message = "用户 ID 不能为空") + @ApiModelProperty("用户 ID") + private Long userId; + + @NotNull(message = "Notebook 延迟删除时间配置不能为空") + @ApiModelProperty("Notebook 延迟删除时间配置,单位:小时") + private Integer notebookDelayDeleteTime; + + @NotNull(message = "CPU 资源限制配置不能为空") + @ApiModelProperty("CPU 资源限制,单位:核") + private Integer cpuLimit; + + @NotNull(message = "内存资源限制配置不能为空") + @ApiModelProperty("内存资源限制,单位:Gi") + private Integer memoryLimit; + + @NotNull(message = "GPU 资源限制配置不能为空") + @ApiModelProperty("GPU 资源限制,单位:块") + private Integer gpuLimit; +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/Config.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/Config.java new file mode 100644 index 0000000..769c362 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/Config.java @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.entity; + +import com.baomidou.mybatisplus.annotation.IdType; +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.db.entity.BaseEntity; + +/** + * @description 配置实体 + * @date 2021-06-30 + */ +@Data +@TableName("config") +@Accessors(chain = true) +public class Config extends BaseEntity { + + @TableId(value = "id", type = IdType.AUTO) + private Long id; + + @TableField(value = "name") + private String name; + + @TableField(value = "default_value") + private Integer defaultValue; + + @TableField(value = "description") + private String description; + +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/UserConfig.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/UserConfig.java new file mode 100644 index 0000000..5513718 --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/entity/UserConfig.java @@ -0,0 +1,53 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.entity; + +import com.baomidou.mybatisplus.annotation.IdType; +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.db.entity.BaseEntity; + +/** + * @description 用户配置实体 + * @date 2021-06-30 + */ +@Data +@TableName("user_config") +@Accessors(chain = true) +public class UserConfig extends BaseEntity { + + @TableId(value = "id", type = IdType.AUTO) + private Long id; + + @TableId(value = "user_id") + private Long userId; + + @TableId(value = "notebook_delay_delete_time") + private Integer notebookDelayDeleteTime; + + @TableId(value = "cpu_limit") + private Integer cpuLimit; + + @TableId(value = "memory_limit") + private Integer memoryLimit; + + @TableId(value = "gpu_limit") + private Integer gpuLimit; +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserConfigCreateVO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserConfigCreateVO.java new file mode 100644 index 0000000..c754c6e --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserConfigCreateVO.java @@ -0,0 +1,37 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.vo; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.experimental.Accessors; + +import java.io.Serializable; + +/** + * @description 用户配置创建返回 ID + * @date 2021-7-2 + */ +@Data +@Accessors(chain = true) +public class UserConfigCreateVO implements Serializable{ + private static final long serialVersionUID = 1L; + + @ApiModelProperty(value = "用户配置 ID") + private Long id; + +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserConfigVO.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserConfigVO.java new file mode 100644 index 0000000..5a5e29c --- /dev/null +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/domain/vo/UserConfigVO.java @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.admin.domain.vo; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.experimental.Accessors; + +import java.io.Serializable; + +/** + * @description 用户配置 VO + * @date 2021-7-1 + */ +@Data +@Accessors(chain = true) +public class UserConfigVO implements Serializable { + private static final long serialVersionUID = 1L; + + @ApiModelProperty("用户 ID") + private Long userId; + + @ApiModelProperty("Notebook 延迟删除时间配置,单位:小时") + private Integer notebookDelayDeleteTime; + + @ApiModelProperty("CPU 资源限制,单位:核") + private Integer cpuLimit; + + @ApiModelProperty("内存资源限制,单位:Gi") + private Integer memoryLimit; + + @ApiModelProperty("GPU 资源限制,单位:块") + private Integer gpuLimit; +} diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserController.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserController.java index ef70bda..576db7e 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserController.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/rest/UserController.java @@ -19,6 +19,7 @@ package org.dubhe.admin.rest; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import io.swagger.annotations.Api; import io.swagger.annotations.ApiOperation; +import org.dubhe.admin.domain.dto.UserConfigDTO; import org.dubhe.admin.domain.dto.UserCreateDTO; import org.dubhe.admin.domain.dto.UserDeleteDTO; import org.dubhe.admin.domain.dto.UserQueryDTO; @@ -30,6 +31,7 @@ import org.dubhe.biz.base.dto.UserDTO; import org.dubhe.biz.base.vo.DataResponseBody; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.security.access.prepost.PreAuthorize; +import org.springframework.validation.annotation.Validated; import org.springframework.web.bind.annotation.*; import javax.servlet.http.HttpServletResponse; @@ -84,6 +86,19 @@ public class UserController { return new DataResponseBody(); } + @ApiOperation("根据用户ID查询用户配置") + @GetMapping(value = "/getUserConfig") + public DataResponseBody getUserConfig(@RequestParam(value = "userId") Long userId) { + return new DataResponseBody(userService.findUserConfig(userId)); + } + + @ApiOperation("新增或修改用户配置") + @PutMapping(value = "/setUserConfig") + @PreAuthorize(Permissions.USER_CONFIG_EDIT) + public DataResponseBody setUserConfig(@Validated @RequestBody UserConfigDTO userConfigDTO) { + return new DataResponseBody(userService.createOrUpdateUserConfig(userConfigDTO)); + } + /** * 此接口提供给Auth模块获取用户信息使用 * 因Auth获取用户信息在登录时是未登录状态,请不要在此添加权限校验 diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserService.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserService.java index b73b0f2..c7589cc 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserService.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/UserService.java @@ -20,6 +20,8 @@ import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import com.baomidou.mybatisplus.extension.service.IService; import org.dubhe.admin.domain.dto.*; import org.dubhe.admin.domain.entity.User; +import org.dubhe.admin.domain.vo.UserConfigCreateVO; +import org.dubhe.admin.domain.vo.UserConfigVO; import org.dubhe.biz.base.dto.TeamDTO; import org.dubhe.biz.base.dto.UserDTO; import org.dubhe.biz.base.vo.DataResponseBody; @@ -221,4 +223,20 @@ public interface UserService extends AdminUserService, IService { * @return org.dubhe.domain.dto.UserDTO 用户信息DTO集合 */ List getUserList(List ids); + + /** + * 根据用户 ID 查询用户配置 + * + * @param userId 用户 ID + * @return org.dubhe.admin.domain.vo.UserConfigVO 用户配置 VO + */ + UserConfigVO findUserConfig(Long userId); + + /** + * 创建或更新用户配置 + * + * @param userConfigDTO 用户配置 + * @return org.dubhe.admin.domain.vo.UserConfigCreateVO 用户配置 VO + */ + UserConfigCreateVO createOrUpdateUserConfig(UserConfigDTO userConfigDTO); } diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/ResourceSpecsServiceImpl.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/ResourceSpecsServiceImpl.java index b3ad0da..4f3dc05 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/ResourceSpecsServiceImpl.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/ResourceSpecsServiceImpl.java @@ -67,7 +67,7 @@ public class ResourceSpecsServiceImpl implements ResourceSpecsService { public Map getResourceSpecs(ResourceSpecsQueryDTO resourceSpecsQueryDTO) { Page page = resourceSpecsQueryDTO.toPage(); //排序字段 - String sort = null == resourceSpecsQueryDTO.getSort() ? StringConstant.CREATE_TIME_SQL : resourceSpecsQueryDTO.getSort(); + String sort = null == resourceSpecsQueryDTO.getSort() ? StringConstant.ID : resourceSpecsQueryDTO.getSort(); QueryWrapper queryResourceSpecsWrapper = new QueryWrapper<>(); queryResourceSpecsWrapper.like(resourceSpecsQueryDTO.getSpecsName() != null, "specs_name", resourceSpecsQueryDTO.getSpecsName()) .eq(resourceSpecsQueryDTO.getResourcesPoolType() != null, "resources_pool_type", resourceSpecsQueryDTO.getResourcesPoolType()) diff --git a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserServiceImpl.java b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserServiceImpl.java index 5fbe9d3..babb286 100644 --- a/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserServiceImpl.java +++ b/dubhe-server/admin/src/main/java/org/dubhe/admin/service/impl/UserServiceImpl.java @@ -22,17 +22,22 @@ import cn.hutool.crypto.asymmetric.KeyType; import cn.hutool.crypto.asymmetric.RSA; import com.alibaba.fastjson.JSONObject; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; +import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; import com.baomidou.mybatisplus.core.metadata.IPage; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; import org.dubhe.admin.client.AuthServiceClient; +import org.dubhe.admin.client.ResourceQuotaClient; import org.dubhe.admin.dao.*; import org.dubhe.admin.domain.dto.*; import org.dubhe.admin.domain.entity.Role; import org.dubhe.admin.domain.entity.User; import org.dubhe.admin.domain.entity.UserAvatar; +import org.dubhe.admin.domain.entity.UserConfig; import org.dubhe.admin.domain.entity.UserRole; import org.dubhe.admin.domain.vo.EmailVo; +import org.dubhe.admin.domain.vo.UserConfigCreateVO; +import org.dubhe.admin.domain.vo.UserConfigVO; import org.dubhe.admin.domain.vo.UserVO; import org.dubhe.admin.enums.UserMailCodeEnum; import org.dubhe.admin.event.EmailEventPublisher; @@ -92,6 +97,18 @@ public class UserServiceImpl extends ServiceImpl implements Us @Value("${initial_password}") private String initialPassword; + @Value("${user.config.notebook-delay-delete-time}") + private Integer defaultNotebookDelayDeleteTime; + + @Value("${user.config.cpu-limit}") + private Integer cpuLimit; + + @Value("${user.config.memory-limit}") + private Integer memoryLimit; + + @Value("${user.config.gpu-limit}") + private Integer gpuLimit; + @Autowired private UserMapper userMapper; @@ -130,6 +147,13 @@ public class UserServiceImpl extends ServiceImpl implements Us @Autowired private PermissionMapper permissionMapper; + @Autowired + private UserConfigMapper userConfigMapper; + + @Autowired + ResourceQuotaClient resourceQuotaClient; + + /** * 测试标识 true:允许debug false:拒绝debug */ @@ -224,7 +248,15 @@ public class UserServiceImpl extends ServiceImpl implements Us for (Role role : resources.getRoles()) { roleMapper.tiedUserRole(user.getId(), role.getId()); } - + UserConfigDTO userConfigDTO = new UserConfigDTO(); + userConfigDTO.setUserId(user.getId()); + userConfigDTO.setCpuLimit(cpuLimit); + userConfigDTO.setMemoryLimit(memoryLimit); + userConfigDTO.setGpuLimit(gpuLimit); + DataResponseBody dataResponseBody = resourceQuotaClient.updateResourceQuota(userConfigDTO); + if (!dataResponseBody.succeed()){ + throw new BusinessException("用户配置更新失败"); + } return userConvert.toDto(user); } @@ -316,10 +348,24 @@ public class UserServiceImpl extends ServiceImpl implements Us return sysRoleDTO; }).collect(Collectors.toList())); } + //获取用户配置 + SysUserConfigDTO sysUserConfigDTO = getUserConfig(user.getId()); + dto.setUserConfig(sysUserConfigDTO); return dto; } + private SysUserConfigDTO getUserConfig(Long userId) { + UserConfig userConfig = userConfigMapper.selectOne(new QueryWrapper<>(new UserConfig().setUserId(userId))); + SysUserConfigDTO sysUserConfigDTO= new SysUserConfigDTO(); + if (userConfig == null){ + return sysUserConfigDTO.setCpuLimit(cpuLimit).setMemoryLimit(memoryLimit) + .setGpuLimit(gpuLimit).setNotebookDelayDeleteTime(defaultNotebookDelayDeleteTime); + } + BeanUtils.copyProperties(userConfig, sysUserConfigDTO); + return sysUserConfigDTO; + } + /** * 修改用户个人中心信息 @@ -695,6 +741,48 @@ public class UserServiceImpl extends ServiceImpl implements Us return userConvert.toDto(users); } + /** + * 根据用户 ID 查询用户配置 + * + * @param userId 用户 ID + * @return org.dubhe.admin.domain.vo.UserConfigVO 用户配置 VO + */ + @Override + public UserConfigVO findUserConfig(Long userId) { + // 查询用户配置 + UserConfig userConfig = userConfigMapper.selectOne(new QueryWrapper<>(new UserConfig().setUserId(userId))); + UserConfigVO userConfigVO = new UserConfigVO(); + // 如果用户配置为空,则返回 + if (userConfig == null){ + return userConfigVO.setUserId(userId).setCpuLimit(cpuLimit).setMemoryLimit(memoryLimit) + .setGpuLimit(gpuLimit).setNotebookDelayDeleteTime(defaultNotebookDelayDeleteTime); + } + // 封装用户配置 VO + BeanUtils.copyProperties(userConfig, userConfigVO); + return userConfigVO; + } + + /** + * 创建或更新用户配置 + * + * @param userConfigDTO 用户配置 + * @return org.dubhe.admin.domain.vo.UserConfigCreateVO 用户配置 VO + */ + @Override + @Transactional(rollbackFor = Exception.class) + public UserConfigCreateVO createOrUpdateUserConfig(UserConfigDTO userConfigDTO) { + DataResponseBody dataResponseBody = resourceQuotaClient.updateResourceQuota(userConfigDTO); + if (!dataResponseBody.succeed()){ + throw new BusinessException("用户配置更新失败"); + } + UserConfig userConfig = new UserConfig(); + BeanUtils.copyProperties(userConfigDTO, userConfig); + userConfigMapper.insertOrUpdate(userConfig); + // 封装用户配置 VO + UserConfigCreateVO userConfigCreateVO = new UserConfigCreateVO().setId(userConfig.getId()); + return userConfigCreateVO; + } + /** * 校验验证码 @@ -900,8 +988,9 @@ public class UserServiceImpl extends ServiceImpl implements Us }).collect(Collectors.toList()); dto.setRoles(roleDTOS); } - - + //获取用户配置 + SysUserConfigDTO sysUserConfigDTO = getUserConfig(user.getId()); + dto.setUserConfig(sysUserConfigDTO); return DataResponseFactory.success(dto); } } diff --git a/dubhe-server/admin/src/main/resources/bootstrap.yml b/dubhe-server/admin/src/main/resources/bootstrap.yml index e5b0ebc..d5c9584 100644 --- a/dubhe-server/admin/src/main/resources/bootstrap.yml +++ b/dubhe-server/admin/src/main/resources/bootstrap.yml @@ -31,7 +31,7 @@ spring: refresh: true discovery: enabled: true - namespace: dubhe-server-cloud-prod + namespace: dubhe-server-cloud-dev group: dubhe server-addr: 127.0.0.1:8848 diff --git a/dubhe-server/admin/src/main/resources/mapper/UserConfigMapper.xml b/dubhe-server/admin/src/main/resources/mapper/UserConfigMapper.xml new file mode 100644 index 0000000..e0c0119 --- /dev/null +++ b/dubhe-server/admin/src/main/resources/mapper/UserConfigMapper.xml @@ -0,0 +1,100 @@ + + + + + + + + SELECT id FROM user_config WHERE user_id = #{userId} + + insert into user_config + + + user_id, + + + notebook_delay_delete_time, + + + cpu_limit, + + + memory_limit, + + + gpu_limit, + + + create_user_id, + + + update_time, + + + update_user_id, + + + deleted, + + + + + #{userId}, + + + #{notebookDelayDeleteTime}, + + + #{cpuLimit}, + + + #{memoryLimit}, + + + #{gpuLimit}, + + + #{createUserId}, + + + #{updateTime}, + + + #{updateUserId}, + + + #{deleted}, + + + ON DUPLICATE KEY UPDATE + + + notebook_delay_delete_time = #{notebookDelayDeleteTime}, + + + cpu_limit = #{cpuLimit}, + + + memory_limit = #{memoryLimit}, + + + gpu_limit = #{gpuLimit}, + + + create_time = #{createTime}, + + + create_user_id = #{createUserId}, + + + update_time = #{updateTime}, + + + update_user_id = #{updateUserId}, + + + deleted = #{deleted}, + + + + \ No newline at end of file diff --git a/dubhe-server/auth/src/main/resources/bootstrap.yml b/dubhe-server/auth/src/main/resources/bootstrap.yml index 2e2179e..de12431 100644 --- a/dubhe-server/auth/src/main/resources/bootstrap.yml +++ b/dubhe-server/auth/src/main/resources/bootstrap.yml @@ -23,7 +23,7 @@ spring: refresh: true discovery: enabled: true - namespace: dubhe-server-cloud-prod + namespace: dubhe-server-cloud-dev group: dubhe server-addr: 127.0.0.1:8848 diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/ApplicationNameConst.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/ApplicationNameConst.java index 34d61d8..a04c803 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/ApplicationNameConst.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/ApplicationNameConst.java @@ -81,4 +81,13 @@ public class ApplicationNameConst { */ public final static String SERVER_DATA_DCM = "dubhe-data-dcm"; + /** + * k8s + */ + public final static String SERVER_K8S = "dubhe-k8s"; + + /** + * 专业版终端 + */ + public final static String TERMINAL = "dubhe-terminal"; } diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/AuthConst.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/AuthConst.java index 384679a..3a08ce2 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/AuthConst.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/AuthConst.java @@ -56,7 +56,7 @@ public class AuthConst { public final static String[] DEFAULT_PERMIT_PATHS = {"/swagger**/**", "/webjars/**", "/v2/api-docs/**", "/doc.html/**", "/users/findUserByUsername", "/auth/login", "/auth/code", "/datasets/files/annotations/auto","/datasets/versions/**/convert/finish", "/datasets/enhance/finish", - "/auth/getCodeBySentEmail","/auth/userRegister", + "/auth/getCodeBySentEmail","/auth/userRegister","/ws/**", StringConstant.RECYCLE_CALL_URI+"**" }; diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/MagicNumConstant.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/MagicNumConstant.java index fe03ad6..8f406e8 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/MagicNumConstant.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/MagicNumConstant.java @@ -39,6 +39,7 @@ public final class MagicNumConstant { public static final int ELEVEN = 11; public static final int SIXTEEN = 16; public static final int TWENTY = 20; + public static final int TWENTY_TWO = 22; public static final int THIRTY_TWO = 32; public static final int FIFTY = 50; public static final int SIXTY = 60; diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/NumberConstant.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/NumberConstant.java index bb177ef..53306ea 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/NumberConstant.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/NumberConstant.java @@ -43,6 +43,7 @@ public class NumberConstant { public final static int HOUR_SECOND = 60 * 60; public final static int DAY_SECOND = 60 * 60 * 24; public final static int WEEK_SECOND = 60 * 60 * 24 * 7; + public final static int MONTH_SECOND = 60 * 60 * 24 * 30; public final static int MAX_PAGE_SIZE = 2000; public final static int MAX_MESSAGE_LENGTH = 1024 * 1024 * 1024; } diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/Permissions.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/Permissions.java index 5946fad..d572156 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/Permissions.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/Permissions.java @@ -140,6 +140,8 @@ public final class Permissions { public static final String USER_EDIT = "hasAuthority('ROLE_system:user:edit')"; public static final String USER_DELETE = "hasAuthority('ROLE_system:user:delete')"; public static final String USER_DOWNLOAD = "hasAuthority('ROLE_system:user:download')"; + public static final String USER_CONFIG_EDIT = "hasAuthority('ROLE_system:user:configEdit')"; + public static final String USER_RESOURCE_INFO = "hasAuthority('ROLE_system:user:resourceInfo')"; /** * 控制台:角色管理 @@ -200,6 +202,16 @@ public final class Permissions { public static final String SPECS_EDIT = "hasAuthority('ROLE_system:specs:edit')"; public static final String SPECS_DELETE = "hasAuthority('ROLE_system:specs:delete')"; + /** + * 专业版:终端 + */ + public static final String TERMINAL_CREATE = "hasAuthority('ROLE_terminal:specs:create')"; + public static final String TERMINAL_RESTART = "hasAuthority('ROLE_terminal:specs:restart')"; + public static final String TERMINAL_PRESAVE = "hasAuthority('ROLE_terminal:specs:save')"; + public static final String TERMINAL_DELETE = "hasAuthority('ROLE_terminal:specs:delete')"; + public static final String TERMINAL_DETAIL = "hasAuthority('ROLE_terminal:specs:detail')"; + public static final String TERMINAL_LIST = "hasAuthority('ROLE_terminal:specs:list')"; + private Permissions() { } } diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/StringConstant.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/StringConstant.java index fd10aee..5d35700 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/StringConstant.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/StringConstant.java @@ -79,6 +79,9 @@ public final class StringConstant { public static final String RECYCLE_CALL_URI = "/api/recycle/call/"; public static final String K8S_CALLBACK_PATH_DEPLOYMENT = "/api/k8s/callback/deployment"; public static final String MULTIPART = "multipart/form-data"; + + public static final String PIP_SITE_PACKAGE ="pip-site-package"; + /** * 分页内容 */ @@ -105,9 +108,10 @@ public final class StringConstant { public static final String STEP_LOW = "step"; /** - * 测试环境 + * 任务缓存 */ - public static final String PROFILE_ACTIVE_TEST = "test"; + public static final String CACHE_TASK_ID ="task_id"; + public static final String CACHE_TASK_NAME ="task_name"; private StringConstant() { diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/SymbolConstant.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/SymbolConstant.java index 4190208..db633fd 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/SymbolConstant.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/SymbolConstant.java @@ -48,6 +48,8 @@ public class SymbolConstant { public static final String EVENT_SEPARATOR = "&&"; public static final String POST = "POST"; public static final String HTTP_SLASH = "http://"; + public static final String PORT = "port"; + public static final String LOCAL_HOST = "localhost"; private SymbolConstant() { } diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/UserConstant.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/UserConstant.java index c382617..f4eb296 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/UserConstant.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/constant/UserConstant.java @@ -91,4 +91,14 @@ public class UserConstant { */ public final static int REGISTER_ROLE_ID = 2; + /** + * 默认资源用户ID + */ + public final static Long DEFAULT_ORIGIN_USER_ID = 0L; + + /** + * 默认创建人ID + */ + public final static Long DEFAULT_CREATE_USER_ID = 0L; + } diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/context/UserContext.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/context/UserContext.java index 4a764e5..645e6b7 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/context/UserContext.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/context/UserContext.java @@ -18,6 +18,7 @@ package org.dubhe.biz.base.context; import lombok.Data; import org.dubhe.biz.base.dto.SysRoleDTO; +import org.dubhe.biz.base.dto.SysUserConfigDTO; import java.io.Serializable; import java.util.List; @@ -72,5 +73,9 @@ public class UserContext implements Serializable { * 头像路径 */ private String userAvatarPath; + /** + * 用户配置 + */ + private SysUserConfigDTO userConfig; } diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/SysUserConfigDTO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/SysUserConfigDTO.java new file mode 100644 index 0000000..14296d4 --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/SysUserConfigDTO.java @@ -0,0 +1,53 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.biz.base.dto; + +import lombok.Data; +import lombok.experimental.Accessors; +import java.io.Serializable; + +/** + * @description 系统用户配置 DTO + * @date 2021-7-5 + */ +@Data +@Accessors(chain = true) +public class SysUserConfigDTO implements Serializable{ + + private static final long serialVersionUID = 1L; + + /** + * Notebook 延迟删除时间配置 + */ + private Integer notebookDelayDeleteTime; + + /** + * CPU 资源限制配置 + */ + private Integer cpuLimit; + + /** + * 内存资源限制配置 + */ + private Integer memoryLimit; + + /** + * GPU 资源限制配置 + */ + private Integer gpuLimit; + +} diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserDTO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserDTO.java index 1572a16..d10de2b 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserDTO.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/dto/UserDTO.java @@ -59,5 +59,10 @@ public class UserDTO implements Serializable { */ private List roles; + /** + * 用户配置 + */ + private SysUserConfigDTO userConfig; + } diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/BizEnum.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/BizEnum.java index 56579ff..00b7d78 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/BizEnum.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/BizEnum.java @@ -56,7 +56,12 @@ public enum BizEnum { /** * 度量管理 */ - MEASURE("度量管理", "measure", 5); + MEASURE("度量管理", "measure", 5), + /** + * 专业版终端 + */ + TERMINAL("专业版终端", "terminal", 7), + ; /** * 业务模块名称 diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/ImageTypeEnum.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/ImageTypeEnum.java index 9bd9e26..9d8acfa 100644 --- a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/ImageTypeEnum.java +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/enums/ImageTypeEnum.java @@ -42,7 +42,13 @@ public enum ImageTypeEnum { /** * Serving镜像 */ - SERVING("Serving镜像", "serving", 2); + SERVING("Serving镜像", "serving", 2), + + /** + * terminal镜像 + */ + TERMINAL("terminal镜像", "terminal", 3) + ; /** * 镜像项目名称 diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/ResultUtil.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/ResultUtil.java new file mode 100644 index 0000000..8c1d9c4 --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/utils/ResultUtil.java @@ -0,0 +1,60 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.biz.base.utils; + +import cn.hutool.core.util.StrUtil; +import org.dubhe.biz.base.exception.BusinessException; + +/** + * 调用结果处理工具类 + * + */ +public class ResultUtil { + /** + * 判断调用结果非空 + * + * @param object + * @param errorMessageTemplate + * @param params + */ + public static void notNull(Object object, String errorMessageTemplate, Object... params) { + if (object == null) { + throw new BusinessException(StrUtil.format(errorMessageTemplate, params)); + } + } + + /** + * 判断调用结果相等 + * + * @param object1 + * @param object2 + * @param errorMessageTemplate + * @param params + */ + public static void isEquals(Object object1, Object object2, String errorMessageTemplate, Object... params) { + if(object1 == null) { + if (object2 == null) { + return; + } + throw new BusinessException(String.format(errorMessageTemplate, params)); + } + if (!object1.equals(object2)) { + throw new BusinessException(String.format(errorMessageTemplate, params)); + } + } +} \ No newline at end of file diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/NoteBookVO.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/NoteBookVO.java new file mode 100644 index 0000000..830ed9f --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/NoteBookVO.java @@ -0,0 +1,139 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.biz.base.vo; + +import lombok.Data; + +import java.io.Serializable; +import java.util.Date; + +/** + * @description 返回前端请求体 + * @date 2020-04-28 + */ +@Data +public class NoteBookVO implements Serializable { + + /** + * ID + */ + private Long id; + + /** + * 所属用户 + */ + private Long userId; + + /** + * NoteBook 名称 + */ + private String name; + + /** + * NoteBook 名称 + */ + private String noteBookName; + + /** + * 备注描述 + */ + private String description; + + /** + * 可访问jupyter地址 + */ + private String url; + + /** + * CPU数量 + */ + private Integer cpuNum; + + /** + * GPU数量 + */ + private Integer gpuNum; + + /** + * 内存大小(M) + */ + private Integer memNum; + + /** + * 硬盘内存大小(M) + */ + private Integer diskMemNum; + + /** + * 0运行,1停止, 2删除, 3启动中,4停止中,5删除中,6运行异常(暂未启用) + */ + private Integer status; + + /** + * 状态对应的详情信息 + */ + private String statusDetail; + + /** + * k8s响应状态码 + */ + private String k8sStatusCode; + + /** + * k8s响应状态信息 + */ + private String k8sStatusInfo; + + private String k8sImageName; + + /** + * k8s中pvc存储路径 + */ + private String k8sPvcPath; + + private Date createTime; + + private Date updateTime; + + + /** + * 数据集名称 + */ + private String dataSourceName; + + /** + * 数据集路径 + */ + private String dataSourcePath; + + /** + * 算法ID + */ + private Long algorithmId; + + /** + * 资源拥有者ID + */ + private Long originUserId; + + + /** + * pip包路径 + */ + private String pipSitePackagePath; +} diff --git a/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/WebsocketDataResponseBody.java b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/WebsocketDataResponseBody.java new file mode 100644 index 0000000..2117469 --- /dev/null +++ b/dubhe-server/common-biz/base/src/main/java/org/dubhe/biz/base/vo/WebsocketDataResponseBody.java @@ -0,0 +1,84 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.biz.base.vo; + + +import lombok.Data; +import org.dubhe.biz.base.constant.ResponseCode; +import org.slf4j.MDC; + +import java.io.Serializable; + +/** + * @description Websocket 统一的公共响应体 + * @date 2021-07-20 + */ +@Data +public class WebsocketDataResponseBody implements Serializable { + + /** + * 返回状态码 + */ + private Integer code; + /** + * 返回信息 + */ + private String msg; + /** + * 返回主题 + */ + private String topic; + /** + * 泛型数据 + */ + private T data; + /** + * 链路追踪ID + */ + private String traceId; + + public WebsocketDataResponseBody() { + this(ResponseCode.SUCCESS, null,null); + } + + + public WebsocketDataResponseBody(String topic, T data) { + this(ResponseCode.SUCCESS, null, topic, data); + } + + public WebsocketDataResponseBody(Integer code, String msg, String topic) { + this(code, msg, topic, null); + } + + public WebsocketDataResponseBody(Integer code, String msg, String topic, T data) { + this.code = code; + this.msg = msg; + this.topic = topic; + this.data = data; + this.traceId = MDC.get("traceId"); + } + + /** + * 判断是否响应成功 + * @return ture 成功,false 失败 + */ + public boolean succeed(){ + return ResponseCode.SUCCESS.equals(this.code); + } + +} diff --git a/dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/api/FileStoreApi.java b/dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/api/FileStoreApi.java index 8a2f6e4..f74f84e 100644 --- a/dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/api/FileStoreApi.java +++ b/dubhe-server/common-biz/file/src/main/java/org/dubhe/biz/file/api/FileStoreApi.java @@ -57,7 +57,7 @@ public interface FileStoreApi { */ default String formatPath(String path) { if (!StringUtils.isEmpty(path)) { - return path.replaceAll("///*", File.separator); + return path.replaceAll("///*", "/"); } return path; } diff --git a/dubhe-server/common-biz/log/src/main/java/org/dubhe/biz/log/enums/LogEnum.java b/dubhe-server/common-biz/log/src/main/java/org/dubhe/biz/log/enums/LogEnum.java index 0a09a89..6106118 100644 --- a/dubhe-server/common-biz/log/src/main/java/org/dubhe/biz/log/enums/LogEnum.java +++ b/dubhe-server/common-biz/log/src/main/java/org/dubhe/biz/log/enums/LogEnum.java @@ -77,7 +77,9 @@ public enum LogEnum { //度量 MEASURE, //云端Serving - SERVING; + SERVING, + //专业版终端 + TERMINAL; /** * 判断日志类型不能为空 diff --git a/dubhe-server/common-biz/log/src/main/resources/logback.xml b/dubhe-server/common-biz/log/src/main/resources/logback.xml index efb061f..bc8007d 100644 --- a/dubhe-server/common-biz/log/src/main/resources/logback.xml +++ b/dubhe-server/common-biz/log/src/main/resources/logback.xml @@ -25,10 +25,10 @@ - logs/${log.path}/info/dubhe-info.log + /data/logs/${log.path}/info/dubhe-info.log - logs/${log.path}/info/dubhe-${app.active}-info-%d{yyyy-MM-dd}.%i.log + /data/logs/${log.path}/info/dubhe-${app.active}-info-%d{yyyy-MM-dd}.%i.log 50MB @@ -52,10 +52,10 @@ - logs/${log.path}/debug/dubhe-debug.log + /data/logs/${log.path}/debug/dubhe-debug.log - logs/${log.path}/debug/dubhe-${app.active}-debug-%d{yyyy-MM-dd}.%i.log + /data/logs/${log.path}/debug/dubhe-${app.active}-debug-%d{yyyy-MM-dd}.%i.log 50MB @@ -79,10 +79,10 @@ - logs/${log.path}/error/dubhe-error.log + /data/logs/${log.path}/error/dubhe-error.log - logs/${log.path}/error/dubhe-${app.active}-error-%d{yyyy-MM-dd}.%i.log + /data/logs/${log.path}/error/dubhe-${app.active}-error-%d{yyyy-MM-dd}.%i.log 50MB @@ -106,10 +106,10 @@ - logs/${log.path}/warn/dubhe-warn.log + /data/logs/${log.path}/warn/dubhe-warn.log - logs/${log.path}/warn/dubhe-${app.active}-warn-%d{yyyy-MM-dd}.%i.log + /data/logs/${log.path}/warn/dubhe-${app.active}-warn-%d{yyyy-MM-dd}.%i.log 50MB @@ -134,10 +134,10 @@ - logs/${log.path}/trace/dubhe-trace.log + /data/logs/${log.path}/trace/dubhe-trace.log - logs/${log.path}/trace/dubhe-${app.active}-trace-%d{yyyy-MM-dd}.%i.log + /data/logs/${log.path}/trace/dubhe-${app.active}-trace-%d{yyyy-MM-dd}.%i.log 50MB @@ -162,10 +162,10 @@ - logs/${log.path}/info/dubhe-schedule.log + /data/logs/${log.path}/info/dubhe-schedule.log - logs/${log.path}/info/dubhe-${app.active}-schedule-%d{yyyy-MM-dd}.%i.log + /data/logs/${log.path}/info/dubhe-${app.active}-schedule-%d{yyyy-MM-dd}.%i.log 50MB @@ -189,10 +189,10 @@ - logs/${log.path}/info/dubhe-request.log + /data/logs/${log.path}/info/dubhe-request.log - logs/${log.path}/info/dubhe-${app.active}-request-%d{yyyy-MM-dd}.%i.log + /data/logs/${log.path}/info/dubhe-${app.active}-request-%d{yyyy-MM-dd}.%i.log 50MB @@ -259,4 +259,4 @@ - \ No newline at end of file + diff --git a/dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/AdminClient.java b/dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/AdminClient.java index 8b826df..d107715 100644 --- a/dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/AdminClient.java +++ b/dubhe-server/common-cloud/auth-config/src/main/java/org/dubhe/cloud/authconfig/service/AdminClient.java @@ -47,5 +47,4 @@ public interface AdminClient { @GetMapping(value = "/users/findByIds") DataResponseBody> getUserList(@RequestParam(value = "ids") List ids); - } diff --git a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-data.yml b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-data.yml deleted file mode 100644 index 23e7d7e..0000000 --- a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-data.yml +++ /dev/null @@ -1,7 +0,0 @@ -spring: - cloud: - nacos: - config: - namespace: dubhe-server-cloud-test-data - discovery: - namespace: dubhe-server-cloud-test-data diff --git a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-dev.yml b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-dev.yml index 0b3a9d3..85610a8 100644 --- a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-dev.yml +++ b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-dev.yml @@ -1,7 +1,12 @@ spring: cloud: nacos: + username: nacos + password: Tianshu + context-path: /nacos config: namespace: dubhe-server-cloud-dev + server-addr: 10.105.1.132:8848 discovery: namespace: dubhe-server-cloud-dev + server-addr: 10.105.1.132:8848 diff --git a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-pre.yml b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-pre.yml new file mode 100644 index 0000000..b8cc6ee --- /dev/null +++ b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-pre.yml @@ -0,0 +1,9 @@ +spring: + cloud: + nacos: + config: + namespace: dubhe-server-cloud-pre + server-addr: 10.105.1.133:8848 + discovery: + namespace: dubhe-server-cloud-pre + server-addr: 10.105.1.133:8848 diff --git a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-test.yml b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-test.yml index 405d24e..9352755 100644 --- a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-test.yml +++ b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-cloud-test.yml @@ -1,7 +1,12 @@ spring: cloud: nacos: + username: nacos + password: Tianshu + context-path: /nacos config: namespace: dubhe-server-cloud-test + server-addr: 10.105.1.132:8848 discovery: namespace: dubhe-server-cloud-test + server-addr: 10.105.1.132:8848 diff --git a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-dev.yml b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-dev.yml deleted file mode 100644 index 429a30f..0000000 --- a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-dev.yml +++ /dev/null @@ -1,7 +0,0 @@ -spring: - cloud: - nacos: - config: - namespace: dubhe-server-cloud-prod - discovery: - namespace: dubhe-server-cloud-prod diff --git a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-prod.yml b/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-prod.yml deleted file mode 100644 index 429a30f..0000000 --- a/dubhe-server/common-cloud/configuration/src/main/resources/bootstrap-prod.yml +++ /dev/null @@ -1,7 +0,0 @@ -spring: - cloud: - nacos: - config: - namespace: dubhe-server-cloud-prod - discovery: - namespace: dubhe-server-cloud-prod diff --git a/dubhe-server/common-k8s/pom.xml b/dubhe-server/common-k8s/pom.xml index 5d49fe1..2307efe 100644 --- a/dubhe-server/common-k8s/pom.xml +++ b/dubhe-server/common-k8s/pom.xml @@ -72,6 +72,21 @@ 0.0.1-SNAPSHOT compile + + com.auth0 + java-jwt + 3.4.0 + + + org.springframework.boot + spring-boot-starter-websocket + + + + com.github.docker-java + docker-java + 3.2.11 + diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/api/DockerApi.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/api/DockerApi.java new file mode 100644 index 0000000..62df18e --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/api/DockerApi.java @@ -0,0 +1,68 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.docker.api; + +import com.github.dockerjava.api.DockerClient; +import com.github.dockerjava.api.async.ResultCallback; +import com.github.dockerjava.api.async.ResultCallbackTemplate; +import com.github.dockerjava.api.model.PushResponseItem; + +/** + * @description docker api + * @date 2021-07-06 + */ +public interface DockerApi { + + /** + * 非强制删除镜像 + * + * @param dockerClient docker连接 + * @param image repository:tag + * @return boolean 成功true,失败false + */ + boolean removeImage(DockerClient dockerClient,String image); + /** + * 删除镜像 + * + * @param dockerClient docker连接 + * @param image repository:tag + * @param force true:强制删除 false:非强制 + * @return boolean 成功true,失败false + */ + boolean removeImage(DockerClient dockerClient,String image,boolean force); + + /** + * docker commit + * + * @param dockerClient docker连接 + * @param containerId 容器id + * @param repository 仓库 + * @param tag 标签 + * @return + */ + String commit(DockerClient dockerClient,String containerId,String repository,String tag); + + /** + * 推送镜像 + * + * @param dockerClient docker连接 + * @param image repository:tag + * @return + */ + boolean push(DockerClient dockerClient, String image, ResultCallbackTemplate resultCallback); +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/api/impl/DockerApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/api/impl/DockerApiImpl.java new file mode 100644 index 0000000..91fb8a3 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/api/impl/DockerApiImpl.java @@ -0,0 +1,112 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.docker.api.impl; + +import com.github.dockerjava.api.DockerClient; +import com.github.dockerjava.api.async.ResultCallbackTemplate; +import com.github.dockerjava.api.command.CommitCmd; +import com.github.dockerjava.api.model.AuthConfig; +import org.dubhe.biz.log.enums.LogEnum; +import org.dubhe.biz.log.utils.LogUtil; +import org.dubhe.docker.api.DockerApi; +import org.dubhe.docker.config.DubheDockerJavaConfig; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +/** + * @description docker api实现类 + * @date 2021-07-06 + */ +@Service +public class DockerApiImpl implements DockerApi { + @Autowired + private DubheDockerJavaConfig dubheDockerJavaConfig; + /** + * 非强制删除镜像 + * + * @param dockerClient docker连接 + * @param image repository:tag + * @return boolean 成功true,失败false + */ + @Override + public boolean removeImage(DockerClient dockerClient, String image) { + LogUtil.info(LogEnum.TERMINAL, "DockerApiImpl removeImage image:{}",image); + try{ + dockerClient.removeImageCmd(image).withForce(false).exec(); + return true; + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL, "DockerApiImpl removeImage error:{}",e.getMessage(), e); + return false; + } + } + + /** + * 删除镜像 + * + * @param dockerClient docker连接 + * @param image repository:tag + * @param force true:强制删除 false:非强制 + * @return boolean 成功true,失败false + */ + @Override + public boolean removeImage(DockerClient dockerClient, String image, boolean force) { + LogUtil.info(LogEnum.TERMINAL, "DockerApiImpl removeImage image:{} force:{}",image,force); + try{ + dockerClient.removeImageCmd(image).withForce(force).exec(); + return true; + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL, "DockerApiImpl removeImage error:{}",e.getMessage(), e); + return false; + } + } + + /** + * docker commit + * + * @param dockerClient docker连接 + * @param containerId 容器id + * @param repository 仓库 + * @param tag 标签 + * @return + */ + @Override + public String commit(DockerClient dockerClient, String containerId, String repository, String tag) { + LogUtil.info(LogEnum.TERMINAL, "DockerApiImpl commit containerId:{} repository:{} tag:{}",containerId,repository,tag); + try{ + CommitCmd commitCmd = dockerClient.commitCmd(containerId).withRepository(repository).withTag(tag); + return commitCmd.exec(); + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL, "DockerApiImpl removeImage error:{}",e.getMessage(), e); + return e.getMessage(); + } + } + + @Override + public boolean push(DockerClient dockerClient, String image, ResultCallbackTemplate resultCallback) { + LogUtil.info(LogEnum.TERMINAL, "DockerApiImpl push image:{}",image); + try{ + AuthConfig authConfig = new AuthConfig(); + authConfig.withUsername(dubheDockerJavaConfig.getHarborUserName()).withPassword(dubheDockerJavaConfig.getHarborPassword()); + dockerClient.pushImageCmd(image).withAuthConfig(authConfig).exec(resultCallback); + return true; + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL, "DockerApiImpl push error:{}",e.getMessage(), e); + return false; + } + } +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/callback/TerminalPushImageResultCallback.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/callback/TerminalPushImageResultCallback.java new file mode 100644 index 0000000..b409514 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/callback/TerminalPushImageResultCallback.java @@ -0,0 +1,97 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.docker.callback; + +import com.github.dockerjava.api.DockerClient; +import com.github.dockerjava.api.async.ResultCallbackTemplate; +import com.github.dockerjava.api.model.PushResponseItem; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.exception.BusinessException; +import org.dubhe.biz.log.enums.LogEnum; +import org.dubhe.biz.log.utils.LogUtil; +import org.dubhe.docker.domain.dto.DockerPushCallbackDTO; +import org.dubhe.docker.utils.DockerCallbackTool; + +import java.io.IOException; + +/** + * @description 镜像推送回调 + * @date 2021-07-22 + */ +public class TerminalPushImageResultCallback extends ResultCallbackTemplate { + private Long terminalId; + //回调地址 + private String url; + + private PushResponseItem latestItem = null; + + private DockerClient dockerClient; + + private Long userId; + + public TerminalPushImageResultCallback(){ + + } + + public TerminalPushImageResultCallback(String url, Long terminalId, DockerClient dockerClient,Long userId){ + this.url = url; + this.terminalId = terminalId; + this.dockerClient = dockerClient; + this.userId = userId; + } + + @Override + public void onNext(PushResponseItem item) { + this.latestItem = item; + LogUtil.info(LogEnum.TERMINAL,"push image item: {}",item.toString()); + if (item.getErrorDetail() != null){ + try { + DockerCallbackTool.sendPushCallback(new DockerPushCallbackDTO(terminalId,item.getErrorDetail().getMessage(),true,userId),url, MagicNumConstant.THREE); + } finally { + try { + dockerClient.close(); + } catch (IOException e) { + LogUtil.error(LogEnum.TERMINAL,"push terminalId {} error:"+e.getMessage(),terminalId,e); + throw new BusinessException("push error:"+e.getMessage()); + } + } + } + } + + @Override + public void onError(Throwable throwable){ + super.onError(throwable); + LogUtil.error(LogEnum.TERMINAL,"push image onError: {}",throwable.getMessage()); + } + + @Override + public void onComplete(){ + super.onComplete(); + LogUtil.info(LogEnum.TERMINAL,"push image onComplete terminalId: {}",terminalId); + try{ + DockerCallbackTool.sendPushCallback(new DockerPushCallbackDTO(terminalId,userId),url,MagicNumConstant.THREE); + }finally { + try { + dockerClient.close(); + } catch (IOException e) { + LogUtil.error(LogEnum.TERMINAL,"push terminalId {} error:"+e.getMessage(),terminalId,e); + throw new BusinessException("push error:"+e.getMessage()); + } + } + } +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/config/DockerClientFactory.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/config/DockerClientFactory.java new file mode 100644 index 0000000..0ab7818 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/config/DockerClientFactory.java @@ -0,0 +1,57 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.docker.config; + +import com.github.dockerjava.api.DockerClient; +import com.github.dockerjava.core.DefaultDockerClientConfig; +import com.github.dockerjava.core.DockerClientBuilder; +import com.github.dockerjava.core.DockerClientConfig; +import org.dubhe.biz.base.constant.SymbolConstant; +import org.dubhe.biz.log.enums.LogEnum; +import org.dubhe.biz.log.utils.LogUtil; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +/** + * @description docker client 工厂类 + * @date 2021-07-05 + */ +@Component +public class DockerClientFactory { + @Autowired + private DubheDockerJavaConfig dubheDockerJavaConfig; + + /** + * 创建连接 + * + * @param host ip或域名 + * @return DockerClient + */ + public DockerClient getDockerClient(String host){ + try{ + DockerClientConfig custom = DefaultDockerClientConfig.createDefaultConfigBuilder() + .withDockerHost("tcp://"+host+ SymbolConstant.COLON + dubheDockerJavaConfig.getDockerRemoteApiPort()) + .withDockerTlsVerify(false) + .build(); + return DockerClientBuilder.getInstance(custom).build(); + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL, "DockerClientFactory getDockerClient error:{}",e.getMessage(), e); + return null; + } + } +} \ No newline at end of file diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/config/DubheDockerJavaConfig.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/config/DubheDockerJavaConfig.java new file mode 100644 index 0000000..1452c9c --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/config/DubheDockerJavaConfig.java @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.docker.config; + +import lombok.Getter; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Configuration; + +/** + * @description docker-java相关配置 + * @date 2021-07-05 + */ +@Getter +@Configuration +public class DubheDockerJavaConfig { + @Value("${docker.remote-api-port}") + private String dockerRemoteApiPort; + + @Value("${harbor.address}") + private String harborAddress; + + @Value("${harbor.username}") + private String harborUserName; + + @Value("${harbor.password}") + private String harborPassword; +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/constant/DockerCallbackConstant.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/constant/DockerCallbackConstant.java new file mode 100644 index 0000000..0cc9c59 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/constant/DockerCallbackConstant.java @@ -0,0 +1,26 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.docker.constant; + +/** + * @description + * @date 2021-07-27 + */ +public class DockerCallbackConstant { + public static final String DOCKER_CALLBACK_URI = "/api/docker/callback/"; +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/domain/dto/DockerPushCallbackDTO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/domain/dto/DockerPushCallbackDTO.java new file mode 100644 index 0000000..9c9e3d9 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/domain/dto/DockerPushCallbackDTO.java @@ -0,0 +1,62 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.docker.domain.dto; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.constant.MagicNumConstant; + +import javax.validation.constraints.Min; +import javax.validation.constraints.NotNull; + +/** + * @description docker 推送镜像回调 + * @date 2021-07-27 + */ +@Data +@NoArgsConstructor +@Accessors(chain = true) +public class DockerPushCallbackDTO { + @ApiModelProperty(value = "terminalId", required = true) + @Min(value = MagicNumConstant.ONE, message = "id数值不合法") + private Long terminalId; + + @ApiModelProperty(value = "错误信息", required = false) + private String errorMessage; + + @NotNull + @ApiModelProperty(value = "是否错误 true:错误 false:成功") + private boolean error; + + @ApiModelProperty(value = "用户id", required = false) + private Long userId; + + public DockerPushCallbackDTO(Long terminalId,Long userId){ + this.terminalId = terminalId; + this.userId = userId; + } + + public DockerPushCallbackDTO(Long terminalId, String errorMessage, boolean error,Long userId){ + this.terminalId = terminalId; + this.errorMessage = errorMessage; + this.error = error; + this.userId = userId; + } +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/enums/DockerOperationEnum.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/enums/DockerOperationEnum.java new file mode 100644 index 0000000..021df40 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/enums/DockerOperationEnum.java @@ -0,0 +1,40 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.docker.enums; + +/** + * @description docker操作枚举 + * @date 2021-07-27 + */ +public enum DockerOperationEnum { + /** + * 推送 + */ + PUSH("push"), + ; + + private String operation; + + DockerOperationEnum(String operation) { + this.operation = operation; + } + + public String getType() { + return operation; + } +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/utils/DockerCallbackTool.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/utils/DockerCallbackTool.java new file mode 100644 index 0000000..e583a57 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/docker/utils/DockerCallbackTool.java @@ -0,0 +1,83 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.docker.utils; + +import cn.hutool.http.HttpRequest; +import cn.hutool.http.HttpResponse; +import cn.hutool.http.HttpStatus; +import com.alibaba.fastjson.JSON; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.constant.SymbolConstant; +import org.dubhe.biz.log.enums.LogEnum; +import org.dubhe.biz.log.utils.LogUtil; +import org.dubhe.docker.config.DubheDockerJavaConfig; +import org.dubhe.docker.constant.DockerCallbackConstant; +import org.dubhe.docker.domain.dto.DockerPushCallbackDTO; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +/** + * @description docker回调相关工具类 + * @date 2020-07-13 + */ +@Component +public class DockerCallbackTool { + /** + * http请求超时时间 单位毫秒 + */ + private static final int TIMEOUT_MILLISECOND = 20 * 1000; + + @Autowired + private DubheDockerJavaConfig dubheDockerJavaConfig; + + /** + * 获取回调地址 + * + * @param host 主机 + * @param port 端口 + * @param action 动作 + * @return + */ + public String getCallbackUrl(String host,String port,String action){ + return SymbolConstant.HTTP_SLASH+host+SymbolConstant.COLON+port+ DockerCallbackConstant.DOCKER_CALLBACK_URI+action; + } + + /** + * 镜像推送回调 + * + * @param dockerPushCallbackDTO 回调参数 + * @param url 回调地址 + * @param count 重试计数 + */ + public static void sendPushCallback(DockerPushCallbackDTO dockerPushCallbackDTO, String url,Integer count){ + try{ + LogUtil.info(LogEnum.TERMINAL, "{} sendPushCallback {} count {}", url, dockerPushCallbackDTO,count); + HttpResponse httpResponse = HttpRequest.post(url) + .body(JSON.toJSONString(dockerPushCallbackDTO)) + .timeout(TIMEOUT_MILLISECOND) + .execute(); + LogUtil.info(LogEnum.TERMINAL, "{} sendPushCallback {} count {} status:{}", url, dockerPushCallbackDTO,count,httpResponse.getStatus()); + //重试 + if (HttpStatus.HTTP_OK != httpResponse.getStatus() && count > MagicNumConstant.ZERO){ + sendPushCallback(dockerPushCallbackDTO,url,--count); + } + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL, "{} sendPushCallback {} count {} error:{} ", url, dockerPushCallbackDTO,count,e.getMessage(),e); + } + } +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/PersistentVolumeClaimApi.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/PersistentVolumeClaimApi.java index cbcdf0a..e3f734f 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/PersistentVolumeClaimApi.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/PersistentVolumeClaimApi.java @@ -103,6 +103,14 @@ public interface PersistentVolumeClaimApi { */ boolean deletePv(String pvName); + /** + * 删除PV + * + * @param resourceName 资源名称 + * @return boolean true成功 false失败 + */ + boolean deletePvByResourceName(String resourceName); + /** * 查询PV * diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/TerminalApi.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/TerminalApi.java new file mode 100644 index 0000000..c033a0f --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/TerminalApi.java @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.k8s.api; + +import org.dubhe.k8s.domain.PtBaseResult; +import org.dubhe.k8s.domain.bo.TerminalBO; +import org.dubhe.k8s.domain.vo.TerminalResourceVO; + +/** + * @description 专业版终端接口 + * @date 2021-06-29 + */ +public interface TerminalApi { + /** + * 创建 + * + * @param bo + * @return BizDeployment + */ + TerminalResourceVO create(TerminalBO bo); + + /** + * 删除 + * @param namespace 命名空间 + * @param resourceName 资源名称 + * @return PtBaseResult 基础结果类 + */ + PtBaseResult delete(String namespace, String resourceName); + + /** + * 查询 + * @param namespace 命名空间 + * @param resourceName 资源名称 + * @return + */ + TerminalResourceVO get(String namespace, String resourceName); +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DistributeTrainApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DistributeTrainApiImpl.java index 1b5c9e5..0a2c506 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DistributeTrainApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DistributeTrainApiImpl.java @@ -147,7 +147,7 @@ public class DistributeTrainApiImpl implements DistributeTrainApi { @Override public BizDistributeTrain create(DistributeTrainBO bo) { LogUtil.info(LogEnum.BIZ_K8S, "Params of creating DistributeTrain--create:{}", bo); - LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(bo.getNamespace(), bo.getCpuNum() * bo.getSize(), bo.getMemNum() * bo.getSize(), bo.getGpuNum() * bo.getSize()); + LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(bo.getNamespace(), bo.getCpuNum() * bo.getSize(), bo.getMemNum() * bo.getSize(), bo.getGpuNum() == null?0:bo.getGpuNum() * bo.getSize()); if (!LimitsOfResourcesEnum.ADEQUATE.equals(limitsOfResources)) { return new BizDistributeTrain().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), limitsOfResources.getMessage()); } @@ -183,6 +183,7 @@ public class DistributeTrainApiImpl implements DistributeTrainApi { private Map env; private Map baseLabels; private String businessLabel; + private String taskIdentifyLabel; private Integer delayCreate; private Integer delayDelete; private TaskYamlBO taskYamlBO; @@ -201,7 +202,8 @@ public class DistributeTrainApiImpl implements DistributeTrainApi { this.slaveCmd = bo.getSlaveCmd(); this.env = bo.getEnv(); this.businessLabel = bo.getBusinessLabel(); - this.baseLabels = LabelUtils.getChildLabels(baseName, distributeTrainName, K8sKindEnum.DISTRIBUTETRAIN.getKind(), businessLabel); + this.taskIdentifyLabel = bo.getTaskIdentifyLabel(); + this.baseLabels = LabelUtils.getChildLabels(baseName, distributeTrainName, K8sKindEnum.DISTRIBUTETRAIN.getKind(), businessLabel, taskIdentifyLabel); this.delayCreate = bo.getDelayCreateTime(); this.delayDelete = bo.getDelayDeleteTime(); this.taskYamlBO = new TaskYamlBO(); diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DubheDeploymentApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DubheDeploymentApiImpl.java index f688dd1..1ff656f 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DubheDeploymentApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/DubheDeploymentApiImpl.java @@ -225,6 +225,7 @@ public class DubheDeploymentApiImpl implements DubheDeploymentApi { private Map resourcesLimitsMap; private Map baseLabels; private String businessLabel; + private String taskIdentifyLabel; private Integer gpuNum; @@ -249,6 +250,7 @@ public class DubheDeploymentApiImpl implements DubheDeploymentApi { Optional.ofNullable(bo.getGpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.GPU_RESOURCE_KEY, new Quantity(v.toString()))); Optional.ofNullable(bo.getMemNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_MEMORY_KEY, new Quantity(v.toString(), K8sParamConstants.MEM_UNIT))); this.businessLabel = bo.getBusinessLabel(); + this.taskIdentifyLabel = bo.getTaskIdentifyLabel(); this.baseLabels = LabelUtils.getBaseLabels(baseName, businessLabel); this.datasetReadOnly = true; @@ -310,7 +312,7 @@ public class DubheDeploymentApiImpl implements DubheDeploymentApi { * @return Deployment Deployment 业务类 */ private Deployment buildDeployment() { - Map childLabels = LabelUtils.getChildLabels(baseName, deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), businessLabel); + Map childLabels = LabelUtils.getChildLabels(baseName, deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), businessLabel, taskIdentifyLabel); LabelSelector labelSelector = new LabelSelector(); labelSelector.setMatchLabels(childLabels); return new DeploymentBuilder() diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/JupyterResourceApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/JupyterResourceApiImpl.java index e74b306..2d82e0a 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/JupyterResourceApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/JupyterResourceApiImpl.java @@ -26,6 +26,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; import io.fabric8.kubernetes.api.model.Container; import io.fabric8.kubernetes.api.model.ContainerPortBuilder; +import io.fabric8.kubernetes.api.model.EmptyDirVolumeSource; import io.fabric8.kubernetes.api.model.EnvVar; import io.fabric8.kubernetes.api.model.EnvVarBuilder; import io.fabric8.kubernetes.api.model.IntOrString; @@ -124,14 +125,19 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { private static final String DATASET = "/dataset"; private static final String WORKSPACE = "/workspace"; + private static final String DSHM_PATH = "/dev/shm"; + private static final String K8S_PIP_SITE_PACKAGE = "/home/admin/.local/lib/python3.8/site-packages"; private static final String PVC_DATASET = "pvc-dataset"; private static final String PVC_WORKSPACE = "pvc-workspace"; + private static final String PVC_PIP_SITE_PACKAGE = "pvc-pip-site-package"; private static final String CONTAINER_NAME = "web"; private static final Integer CONTAINER_PORT = 8888; private static final Integer SVC_PORT = 32680; private static final String NOTEBOOK_MAX_UPLOAD_SIZE = "100m"; + private static final String DSHM = "dshm"; + private static final String DSHM_MEDIUM = "Memory"; public JupyterResourceApiImpl(K8sUtils k8sUtils) { this.k8sUtils = k8sUtils; @@ -161,7 +167,7 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { return new PtJupyterDeployVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), lack.getMessage()); } - if (!fileStoreApi.createDirs(bo.getWorkspaceDir(), bo.getDatasetDir())) { + if (!fileStoreApi.createDirs(bo.getWorkspaceDir(), bo.getDatasetDir(),bo.getPipSitePackageDir())) { return new PtJupyterDeployVO().error(K8sResponseEnum.INTERNAL_SERVER_ERROR.getCode(), K8sResponseEnum.INTERNAL_SERVER_ERROR.getMessage()); } resourceCache.deletePodCacheByResourceName(bo.getNamespace(), bo.getName()); @@ -299,9 +305,12 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { private String image; private String datasetDir; private String datasetMountPath; + private String pipSitePackageDir; + private String pipSitePackageMountPath; private String workspaceMountPath; private String workspaceDir; private Boolean useGpu; + private Quantity shmMemory; //数据集默认只读 private boolean datasetReadOnly; @@ -316,6 +325,7 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { private String baseUrl; private String secondaryDomain; private String businessLabel; + private String taskIdentifyLabel; private Integer delayDelete; private List volumeMounts; @@ -329,6 +339,8 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { this.image = bo.getImage(); this.datasetDir = bo.getDatasetDir(); this.datasetMountPath = StringUtils.isEmpty(bo.getDatasetMountPath()) ? DATASET : bo.getDatasetMountPath(); + this.pipSitePackageDir=bo.getPipSitePackageDir(); + this.pipSitePackageMountPath=StringUtils.isEmpty(bo.getPipSitePackageMountPath()) ? K8S_PIP_SITE_PACKAGE : bo.getPipSitePackageMountPath(); this.workspaceDir = bo.getWorkspaceDir(); this.workspaceMountPath = StringUtils.isEmpty(bo.getWorkspaceMountPath()) ? WORKSPACE : bo.getWorkspaceMountPath(); Optional.ofNullable(bo.getDatasetReadOnly()).ifPresent(v -> datasetReadOnly = v); @@ -342,12 +354,15 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { Optional.ofNullable(bo.getCpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_CPU_KEY, new Quantity(v.toString(), K8sParamConstants.CPU_UNIT))); Optional.ofNullable(bo.getGpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.GPU_RESOURCE_KEY, new Quantity(v.toString()))); Optional.ofNullable(bo.getMemNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_MEMORY_KEY, new Quantity(v.toString(), K8sParamConstants.MEM_UNIT))); - + this.shmMemory = new Quantity("1024",K8sParamConstants.MEM_UNIT); + // 共享内存设置为容器内存的一半(参考 Linux 的默认设置) + Optional.ofNullable(bo.getMemNum()).ifPresent(v -> shmMemory.setAmount(String.valueOf(v/2))); this.host = k8sUtils.getHost(); this.businessLabel = bo.getBusinessLabel(); + this.taskIdentifyLabel = bo.getTaskIdentifyLabel(); this.delayDelete = bo.getDelayDeleteTime(); this.baseLabels = LabelUtils.getBaseLabels(baseName, businessLabel); - this.podLabels = LabelUtils.getChildLabels(baseName, statefulSetName, K8sKindEnum.STATEFULSET.getKind(), businessLabel); + this.podLabels = LabelUtils.getChildLabels(baseName, statefulSetName, K8sKindEnum.STATEFULSET.getKind(), businessLabel, taskIdentifyLabel); //生成附属资源的名称 generateResourceName(); @@ -454,6 +469,41 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { } } + /** + * 构建 Shm VolumeMount + */ + private void buildShmFsVolume() { + volumeMounts.add(new VolumeMountBuilder() + .withName(DSHM) + .withMountPath(DSHM_PATH) + .build()); + + volumes.add(new VolumeBuilder() + .withName(DSHM) + .withEmptyDir(new EmptyDirVolumeSource(DSHM_MEDIUM, shmMemory)) + .build()); + } + + /** + * 挂载pip包路径 + */ + private void buildPipSitePackageFsVolume(){ + if (StrUtil.isNotBlank(pipSitePackageDir)) { + volumeMounts.add(new VolumeMountBuilder() + .withName(PVC_PIP_SITE_PACKAGE) + .withMountPath(pipSitePackageMountPath) + .build()); + + volumes.add(new VolumeBuilder() + .withName(PVC_PIP_SITE_PACKAGE) + .withNewHostPath() + .withPath(pipSitePackageDir) + .withType(K8sParamConstants.HOST_PATH_TYPE) + .endHostPath() + .build()); + } + } + /** * 构建VolumeMount */ @@ -498,8 +548,10 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { * @return JupyterDeployer Notebook 部署类 */ private JupyterDeployer buildFsVolumes() { + buildPipSitePackageFsVolume(); buildDatasetFsVolume(); buildWorkspaceFsVolume(); + buildShmFsVolume(); return this; } @@ -509,8 +561,10 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { * @return JupyterDeployer Notebook 部署类 */ private JupyterDeployer buildFsPvcVolumes() { + buildPipSitePackageFsVolume(); buildDatasetFsVolume(); buildWorkspaceFsPvcVolume(); + buildShmFsVolume(); return this; } @@ -585,7 +639,6 @@ public class JupyterResourceApiImpl implements JupyterResourceApi { .withNewSpec() .withTerminationGracePeriodSeconds(ZERO_LONG) .addToNodeSelector(gpuLabel) - .withTerminationGracePeriodSeconds(SIXTY_LONG) .addToContainers(container) .addToVolumes(volumes.toArray(new Volume[0])) .endSpec() diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/MetricsApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/MetricsApiImpl.java index b4f1e9d..2e3ee27 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/MetricsApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/MetricsApiImpl.java @@ -18,7 +18,6 @@ package org.dubhe.k8s.api.impl; import cn.hutool.core.util.NumberUtil; -import cn.hutool.core.util.StrUtil; import io.fabric8.kubernetes.api.model.Quantity; import io.fabric8.kubernetes.api.model.metrics.v1beta1.ContainerMetrics; import io.fabric8.kubernetes.api.model.metrics.v1beta1.NodeMetricsList; @@ -40,10 +39,7 @@ import org.dubhe.k8s.domain.dto.PodQueryDTO; import org.dubhe.k8s.domain.resource.BizContainer; import org.dubhe.k8s.domain.resource.BizPod; import org.dubhe.k8s.domain.resource.BizQuantity; -import org.dubhe.k8s.domain.vo.PodRangeMetricsVO; -import org.dubhe.k8s.domain.vo.PtContainerMetricsVO; -import org.dubhe.k8s.domain.vo.PtNodeMetricsVO; -import org.dubhe.k8s.domain.vo.PtPodsVO; +import org.dubhe.k8s.domain.vo.*; import org.dubhe.k8s.utils.BizConvertUtils; import org.dubhe.k8s.utils.K8sUtils; import org.dubhe.k8s.utils.PrometheusUtil; @@ -83,6 +79,17 @@ public class MetricsApiImpl implements MetricsApi { */ @Value("${k8s.prometheus.gpu-query-param}") private String k8sPrometheusGpuQueryParam; + /** + * prometheus gpu显存总量指标查询参数 + */ + @Value("${k8s.prometheus.gpu-mem-total-query-param}") + private String k8sPrometheusGpuMemTotalQueryParam; + + /** + * prometheus gpu显存使用量指标查询参数 + */ + @Value("${k8s.prometheus.gpu-mem-use-query-param}") + private String k8sPrometheusGpuMemUseQueryParam; /** * prometheus cpu指标范围查询参数 */ @@ -99,6 +106,18 @@ public class MetricsApiImpl implements MetricsApi { @Value("${k8s.prometheus.gpu-range-query-param}") private String k8sPrometheusGpuRangeQueryParam; + /** + * prometheus gpu显存总量指标范围查询参数 + */ + @Value("${k8s.prometheus.gpu-mem-total-range-query-param}") + private String k8sPrometheusGpuMemTotalRangeQueryParam; + + /** + * prometheus gpu显存使用量指标范围查询参数 + */ + @Value("${k8s.prometheus.gpu-mem-use-range-query-param}") + private String k8sPrometheusGpuMemUseRangeQueryParam; + public MetricsApiImpl(K8sUtils k8sUtils) { this.client = k8sUtils.getClient(); } @@ -141,7 +160,7 @@ public class MetricsApiImpl implements MetricsApi { List list = new ArrayList<>(); /**将Pod和podName形成映射关系**/ Map> listMap = client.pods().inAnyNamespace().list().getItems().parallelStream().map(obj -> BizConvertUtils.toBizPod(obj)).collect(Collectors.groupingBy(BizPod::getName)); - if(null == listMap) { + if (null == listMap) { return list; } metrics.getItems().stream().forEach(metric -> { @@ -149,7 +168,7 @@ public class MetricsApiImpl implements MetricsApi { List containers = metric.getContainers(); containers.stream().forEach(containerMetrics -> { Map usage = containerMetrics.getUsage(); - PtPodsVO ptContainerMetricsResult = new PtPodsVO(metric.getMetadata().getNamespace(),metric.getMetadata().getName(), + PtPodsVO ptContainerMetricsResult = new PtPodsVO(metric.getMetadata().getNamespace(), metric.getMetadata().getName(), usage.get(K8sParamConstants.QUANTITY_CPU_KEY).getAmount(), usage.get(K8sParamConstants.QUANTITY_CPU_KEY).getFormat(), usage.get(K8sParamConstants.QUANTITY_MEMORY_KEY).getAmount(), @@ -158,7 +177,7 @@ public class MetricsApiImpl implements MetricsApi { listMap.get(metric.getMetadata().getName()).get(0).getPhase(), null); List containerList = listMap.get(metric.getMetadata().getName()).get(0).getContainers(); - countGpuUsed(containerList,ptContainerMetricsResult); + countGpuUsed(containerList, ptContainerMetricsResult); list.add(ptContainerMetricsResult); }); }); @@ -185,7 +204,7 @@ public class MetricsApiImpl implements MetricsApi { containers.stream().forEach(containerMetrics -> { Map usage = containerMetrics.getUsage(); - PtPodsVO ptContainerMetricsResult = new PtPodsVO(metric.getMetadata().getNamespace(),metric.getMetadata().getName(), + PtPodsVO ptContainerMetricsResult = new PtPodsVO(metric.getMetadata().getNamespace(), metric.getMetadata().getName(), usage.get(K8sParamConstants.QUANTITY_CPU_KEY).getAmount(), usage.get(K8sParamConstants.QUANTITY_CPU_KEY).getFormat(), usage.get(K8sParamConstants.QUANTITY_MEMORY_KEY).getAmount(), @@ -194,7 +213,7 @@ public class MetricsApiImpl implements MetricsApi { bizPod.getPhase(), null ); List containerList = bizPod.getContainers(); - countGpuUsed(containerList,ptContainerMetricsResult); + countGpuUsed(containerList, ptContainerMetricsResult); list.add(ptContainerMetricsResult); }); } @@ -215,7 +234,7 @@ public class MetricsApiImpl implements MetricsApi { * @param containerList BizContainer对象 * @param ptContainerMetricsResult 封装pod信息 */ - private void countGpuUsed(List containerList,PtPodsVO ptContainerMetricsResult){ + private void countGpuUsed(List containerList, PtPodsVO ptContainerMetricsResult) { for (BizContainer container : containerList) { Map limits = container.getLimits(); if (limits == null) { @@ -238,24 +257,24 @@ public class MetricsApiImpl implements MetricsApi { @Override public List getPodMetricsRealTime(String namespace, String resourceName) { List ptPodsVOS = new ArrayList<>(); - if (StringUtils.isEmpty(namespace) || StringUtils.isEmpty(resourceName)){ + if (StringUtils.isEmpty(namespace) || StringUtils.isEmpty(resourceName)) { return ptPodsVOS; } - List pods = podApi.getListByResourceName(namespace,resourceName); - if (CollectionUtils.isEmpty(pods)){ + List pods = podApi.getListByResourceName(namespace, resourceName); + if (CollectionUtils.isEmpty(pods)) { return ptPodsVOS; } List podMetricsList = client.top().pods().metrics(namespace).getItems(); - if (!CollectionUtils.isEmpty(pods)){ - Map podMetricsMap = podMetricsList.stream().collect(Collectors.toMap(obj -> obj.getMetadata().getName(), obj -> obj)); - for (BizPod pod : pods){ - List ptPodsVOList = getPtPodsVO(pod,podMetricsMap.get(pod.getName())); - if (!CollectionUtils.isEmpty(ptPodsVOList)){ + if (!CollectionUtils.isEmpty(pods)) { + Map podMetricsMap = podMetricsList.stream().collect(Collectors.toMap(obj -> obj.getMetadata().getName(), obj -> obj)); + for (BizPod pod : pods) { + List ptPodsVOList = getPtPodsVO(pod, podMetricsMap.get(pod.getName())); + if (!CollectionUtils.isEmpty(ptPodsVOList)) { ptPodsVOS.addAll(ptPodsVOList); } } } - for (PtPodsVO ptPodsVO : ptPodsVOS){ + for (PtPodsVO ptPodsVO : ptPodsVOS) { generateGpuUsage(ptPodsVO); ptPodsVO.calculationPercent(); } @@ -271,21 +290,21 @@ public class MetricsApiImpl implements MetricsApi { @Override public List getPodMetricsRealTimeByPodName(String namespace, String podName) { List ptPodsVOS = new ArrayList<>(); - if (StringUtils.isEmpty(namespace) || StringUtils.isEmpty(podName)){ + if (StringUtils.isEmpty(namespace) || StringUtils.isEmpty(podName)) { return ptPodsVOS; } - BizPod pod = podApi.get(namespace,podName); - if (null == pod){ + BizPod pod = podApi.get(namespace, podName); + if (null == pod) { return ptPodsVOS; } PodMetrics podMetrics = null; - try{ - podMetrics = client.top().pods().metrics(namespace,podName); - }catch (KubernetesClientException e){ + try { + podMetrics = client.top().pods().metrics(namespace, podName); + } catch (KubernetesClientException e) { LogUtil.error(LogEnum.BIZ_K8S, "MetricsApiImpl.getPodMetricsRealTimeByPodName error:{}", e); } - ptPodsVOS = getPtPodsVO(pod,podMetrics); - for (PtPodsVO ptPodsVO : ptPodsVOS){ + ptPodsVOS = getPtPodsVO(pod, podMetrics); + for (PtPodsVO ptPodsVO : ptPodsVOS) { generateGpuUsage(ptPodsVO); ptPodsVO.calculationPercent(); } @@ -301,8 +320,8 @@ public class MetricsApiImpl implements MetricsApi { @Override public List getPodMetricsRealTimeByPodName(String namespace, List podNames) { List ptPodsVOS = new ArrayList<>(); - for (String podName : podNames){ - ptPodsVOS.addAll(getPodMetricsRealTimeByPodName(namespace,podName)); + for (String podName : podNames) { + ptPodsVOS.addAll(getPodMetricsRealTimeByPodName(namespace, podName)); } return ptPodsVOS; } @@ -315,16 +334,16 @@ public class MetricsApiImpl implements MetricsApi { @Override public List getPodRangeMetrics(PodQueryDTO podQueryDTO) { List podRangeMetricsVOS = new ArrayList<>(); - if (StringUtils.isEmpty(podQueryDTO.getNamespace()) || StringUtils.isEmpty(podQueryDTO.getResourceName())){ + if (StringUtils.isEmpty(podQueryDTO.getNamespace()) || StringUtils.isEmpty(podQueryDTO.getResourceName())) { return podRangeMetricsVOS; } - List pods = podApi.getListByResourceName(podQueryDTO.getNamespace(),podQueryDTO.getResourceName()); - if (CollectionUtils.isEmpty(pods)){ + List pods = podApi.getListByResourceName(podQueryDTO.getNamespace(), podQueryDTO.getResourceName()); + if (CollectionUtils.isEmpty(pods)) { return podRangeMetricsVOS; } podQueryDTO.generateDefaultParam(); - for (BizPod pod : pods){ - podRangeMetricsVOS.add(getPodRangeMetricsVO(pod,podQueryDTO)); + for (BizPod pod : pods) { + podRangeMetricsVOS.add(getPodRangeMetricsVO(pod, podQueryDTO)); } return podRangeMetricsVOS; } @@ -337,16 +356,16 @@ public class MetricsApiImpl implements MetricsApi { @Override public List getPodRangeMetricsByPodName(PodQueryDTO podQueryDTO) { List podRangeMetricsVOS = new ArrayList<>(); - if (StringUtils.isEmpty(podQueryDTO.getNamespace()) || CollectionUtils.isEmpty(podQueryDTO.getPodNames())){ + if (StringUtils.isEmpty(podQueryDTO.getNamespace()) || CollectionUtils.isEmpty(podQueryDTO.getPodNames())) { return podRangeMetricsVOS; } - List pods = podApi.get(podQueryDTO.getNamespace(),podQueryDTO.getPodNames()); - if (null == pods){ + List pods = podApi.get(podQueryDTO.getNamespace(), podQueryDTO.getPodNames()); + if (null == pods) { return podRangeMetricsVOS; } podQueryDTO.generateDefaultParam(); - for (BizPod pod : pods){ - podRangeMetricsVOS.add(getPodRangeMetricsVO(pod,podQueryDTO)); + for (BizPod pod : pods) { + podRangeMetricsVOS.add(getPodRangeMetricsVO(pod, podQueryDTO)); } return podRangeMetricsVOS; } @@ -358,22 +377,40 @@ public class MetricsApiImpl implements MetricsApi { * @param podQueryDTO 查询参数 * @return PodRangeMetricsVO Pod历史监控指标 VO */ - private PodRangeMetricsVO getPodRangeMetricsVO(BizPod pod,PodQueryDTO podQueryDTO){ + private PodRangeMetricsVO getPodRangeMetricsVO(BizPod pod, PodQueryDTO podQueryDTO) { PodRangeMetricsVO podRangeMetricsVO = new PodRangeMetricsVO(pod.getName()); - PrometheusMetricBO cpuRangeMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl+k8sPrometheusQueryRange,PrometheusUtil.getQueryParamMap(k8sPrometheusCpuRangeQueryParam,pod.getName(),podQueryDTO)); - PrometheusMetricBO memRangeMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl+k8sPrometheusQueryRange,PrometheusUtil.getQueryParamMap(k8sPrometheusMemRangeQueryParam,pod.getName(),podQueryDTO)); - PrometheusMetricBO gpuRangeMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl+k8sPrometheusQueryRange,PrometheusUtil.getQueryParamMap(k8sPrometheusGpuRangeQueryParam,pod.getName(),podQueryDTO)); + PrometheusMetricBO cpuRangeMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQueryRange, PrometheusUtil.getQueryParamMap(k8sPrometheusCpuRangeQueryParam, pod.getName(), podQueryDTO)); + PrometheusMetricBO memRangeMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQueryRange, PrometheusUtil.getQueryParamMap(k8sPrometheusMemRangeQueryParam, pod.getName(), podQueryDTO)); + PrometheusMetricBO gpuRangeMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQueryRange, PrometheusUtil.getQueryParamMap(k8sPrometheusGpuRangeQueryParam, pod.getName(), podQueryDTO)); + PrometheusMetricBO gpuMemTotalRangeMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQueryRange, PrometheusUtil.getQueryParamMap(k8sPrometheusGpuMemTotalRangeQueryParam, pod.getName(), podQueryDTO)); + PrometheusMetricBO gpuMemUseRangeMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQueryRange, PrometheusUtil.getQueryParamMap(k8sPrometheusGpuMemUseRangeQueryParam, pod.getName(), podQueryDTO)); - StringFormat cpuMetricsFormat = (value)->{ + StringFormat cpuMetricsFormat = (value) -> { return value == null ? String.valueOf(MagicNumConstant.ZERO) : NumberUtil.round(Double.valueOf(value.toString()), MagicNumConstant.TWO).toString(); }; podRangeMetricsVO.setCpuMetrics(cpuRangeMetrics.getValues(cpuMetricsFormat)); - StringFormat memMetricsFormat = (value)->{ + StringFormat memMetricsFormat = (value) -> { return NumberUtil.isNumber(String.valueOf(value)) ? String.valueOf(Long.valueOf(String.valueOf(value)) / MagicNumConstant.BINARY_TEN_EXP) : String.valueOf(MagicNumConstant.ZERO); }; podRangeMetricsVO.setMemoryMetrics(memRangeMetrics.getValues(memMetricsFormat)); - podRangeMetricsVO.setGpuMetrics(gpuRangeMetrics.getResults()); + Map> gpuMetricsResults = gpuRangeMetrics.getGpuMetricsResults(); + List gpuTotalMemResults = gpuMemTotalRangeMetrics.getGpuTotalMemResults(); + Map> gpuMemResults = gpuMemUseRangeMetrics.getGpuMemResults(); + List gpuMetricsDataResultVOS = gpuTotalMemResults.stream().map(x -> { + GpuMetricsDataResultVO gpuMetricsDataResultVO = new GpuMetricsDataResultVO(); + gpuMetricsDataResultVO.setAccId(x.getAccId()).setTotalMemValues(x.getGpuTotalMemValue()); + if (gpuMemResults.containsKey(x.getAccId())) { + gpuMetricsDataResultVO.setGpuMemValues(gpuMemResults.get(x.getAccId())); + } + if (gpuMetricsResults.containsKey(x.getAccId())) { + gpuMetricsDataResultVO.setGpuMetricsValues(gpuMetricsResults.get(x.getAccId())); + } + return gpuMetricsDataResultVO; + } + ).collect(Collectors.toList()); + + podRangeMetricsVO.setGpuMetrics(gpuMetricsDataResultVOS); return podRangeMetricsVO; } @@ -381,12 +418,31 @@ public class MetricsApiImpl implements MetricsApi { * 查询Gpu使用率 * @param ptPodsVO pod信息 */ - private void generateGpuUsage(PtPodsVO ptPodsVO){ - PrometheusMetricBO prometheusMetricBO = PrometheusUtil.getQuery(k8sPrometheusUrl+k8sPrometheusQuery, PrometheusUtil.getQueryParamMap(k8sPrometheusGpuQueryParam,ptPodsVO.getPodName())); - if (prometheusMetricBO == null){ + private void generateGpuUsage(PtPodsVO ptPodsVO) { + PrometheusMetricBO prometheusMetricBO = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQuery, PrometheusUtil.getQueryParamMap(k8sPrometheusGpuQueryParam, ptPodsVO.getPodName())); + PrometheusMetricBO gpuMemTotalMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQuery, PrometheusUtil.getQueryParamMap(k8sPrometheusGpuMemTotalQueryParam, ptPodsVO.getPodName())); + PrometheusMetricBO gpuMemUseMetrics = PrometheusUtil.getQuery(k8sPrometheusUrl + k8sPrometheusQuery, PrometheusUtil.getQueryParamMap(k8sPrometheusGpuMemUseQueryParam, ptPodsVO.getPodName())); + + if (prometheusMetricBO == null || gpuMemTotalMetrics == null || gpuMemUseMetrics == null) { return; } - ptPodsVO.setGpuUsagePersent(prometheusMetricBO.getGpuUsage()); + List gpuTotalMemValue = gpuMemTotalMetrics.getGpuTotalMemValue(); + Map gpuMemValue = gpuMemUseMetrics.getGpuMemValue(); + Map gpuUsage = prometheusMetricBO.getGpuUsage(); + + List gpuValueVOS = gpuTotalMemValue.stream().map(x -> { + GpuValueVO gpuValueVO = new GpuValueVO(); + gpuValueVO.setAccId(x.getAccId()).setGpuTotalMemValue(x.getGpuTotalMemValue()); + if (gpuMemValue.containsKey(x.getAccId())) { + gpuValueVO.setGpuMemValue(gpuMemValue.get(x.getAccId())); + } + if (gpuUsage.containsKey(x.getAccId())) { + gpuValueVO.setUsage(gpuUsage.get(x.getAccId())); + } + return gpuValueVO; + } + ).collect(Collectors.toList()); + ptPodsVO.setGpuUsagePersent(gpuValueVOS); } /** @@ -395,22 +451,22 @@ public class MetricsApiImpl implements MetricsApi { * @param metric 查询指标 * @return List pod信息列表 */ - private List getPtPodsVO(BizPod bizPod,PodMetrics metric){ + private List getPtPodsVO(BizPod bizPod, PodMetrics metric) { List ptPodsVOList = new ArrayList<>(); - if (metric == null){ + if (metric == null) { return ptPodsVOList; } - Map containerMetricsMap = metric.getContainers().stream().collect(Collectors.toMap(obj -> obj.getName(), obj -> obj)); - for (BizContainer container : bizPod.getContainers()){ + Map containerMetricsMap = metric.getContainers().stream().collect(Collectors.toMap(obj -> obj.getName(), obj -> obj)); + for (BizContainer container : bizPod.getContainers()) { Map request = container.getRequests(); - if (containerMetricsMap.get(container.getName()) == null){ + if (containerMetricsMap.get(container.getName()) == null) { continue; } Map usage = containerMetricsMap.get(container.getName()).getUsage(); - PtPodsVO ptContainerMetricsResult = new PtPodsVO(metric.getMetadata().getNamespace(),metric.getMetadata().getName(), - request.get(K8sParamConstants.QUANTITY_CPU_KEY) ==null ? null : request.get(K8sParamConstants.QUANTITY_CPU_KEY).getAmount(), + PtPodsVO ptContainerMetricsResult = new PtPodsVO(metric.getMetadata().getNamespace(), metric.getMetadata().getName(), + request.get(K8sParamConstants.QUANTITY_CPU_KEY) == null ? null : request.get(K8sParamConstants.QUANTITY_CPU_KEY).getAmount(), usage.get(K8sParamConstants.QUANTITY_CPU_KEY).getAmount(), - request.get(K8sParamConstants.QUANTITY_CPU_KEY) ==null ? null : request.get(K8sParamConstants.QUANTITY_CPU_KEY).getFormat(), + request.get(K8sParamConstants.QUANTITY_CPU_KEY) == null ? null : request.get(K8sParamConstants.QUANTITY_CPU_KEY).getFormat(), usage.get(K8sParamConstants.QUANTITY_CPU_KEY).getFormat(), request.get(K8sParamConstants.QUANTITY_MEMORY_KEY) == null ? null : request.get(K8sParamConstants.QUANTITY_MEMORY_KEY).getAmount(), usage.get(K8sParamConstants.QUANTITY_MEMORY_KEY).getAmount(), @@ -430,7 +486,8 @@ public class MetricsApiImpl implements MetricsApi { ptContainerMetricsResult.setGpuUsed(count); } ptPodsVOList.add(ptContainerMetricsResult); - }; + } + ; return ptPodsVOList; } @@ -443,7 +500,7 @@ public class MetricsApiImpl implements MetricsApi { */ @Override public List getContainerMetrics(String namespace) { - if(StringUtils.isEmpty(namespace)){ + if (StringUtils.isEmpty(namespace)) { return Collections.EMPTY_LIST; } try { diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelOptJobApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelOptJobApiImpl.java index acd5939..8062e9d 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelOptJobApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelOptJobApiImpl.java @@ -212,6 +212,7 @@ public class ModelOptJobApiImpl implements ModelOptJobApi { private List volumeMounts; private List volumes; private String businessLabel; + private String taskIdentifyLabel; private Integer gpuNum; private String errCode; @@ -231,6 +232,7 @@ public class ModelOptJobApiImpl implements ModelOptJobApi { Optional.ofNullable(bo.getGpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.GPU_RESOURCE_KEY, new Quantity(v.toString()))); Optional.ofNullable(bo.getMemNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_MEMORY_KEY, new Quantity(v.toString(), K8sParamConstants.MEM_UNIT))); this.businessLabel = bo.getBusinessLabel(); + this.taskIdentifyLabel = bo.getTaskIdentifyLabel(); this.fsMounts = bo.getFsMounts(); this.baseLabels = LabelUtils.getBaseLabels(baseName, businessLabel); @@ -372,7 +374,7 @@ public class ModelOptJobApiImpl implements ModelOptJobApi { * @return Job 任务job类 */ private Job buildJob() { - Map childLabels = LabelUtils.getChildLabels(baseName, jobName, K8sKindEnum.JOB.getKind(), businessLabel); + Map childLabels = LabelUtils.getChildLabels(baseName, jobName, K8sKindEnum.JOB.getKind(), businessLabel, taskIdentifyLabel); return new JobBuilder() .withNewMetadata() .withName(jobName) diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelServingApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelServingApiImpl.java index 5169171..40058a0 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelServingApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ModelServingApiImpl.java @@ -148,7 +148,7 @@ public class ModelServingApiImpl implements ModelServingApi { //标签生成 Map baseLabels = LabelUtils.getBaseLabels(bo.getResourceName(), bo.getBusinessLabel()); - Map podLabels = LabelUtils.getChildLabels(bo.getResourceName(), deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), bo.getBusinessLabel()); + Map podLabels = LabelUtils.getChildLabels(bo.getResourceName(), deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), bo.getBusinessLabel(), bo.getTaskIdentifyLabel()); //部署deployment Deployment deployment = buildDeployment(bo, volumeVO, deploymentName); @@ -266,7 +266,7 @@ public class ModelServingApiImpl implements ModelServingApi { * @return Deployment */ private Deployment buildDeployment(ModelServingBO bo, VolumeVO volumeVO, String deploymentName) { - Map childLabels = LabelUtils.getChildLabels(bo.getResourceName(), deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), bo.getBusinessLabel()); + Map childLabels = LabelUtils.getChildLabels(bo.getResourceName(), deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), bo.getBusinessLabel(),bo.getTaskIdentifyLabel()); LabelSelector labelSelector = new LabelSelector(); labelSelector.setMatchLabels(childLabels); return new DeploymentBuilder() @@ -285,7 +285,7 @@ public class ModelServingApiImpl implements ModelServingApi { .withNamespace(bo.getNamespace()) .endMetadata() .withNewSpec() - .addToNodeSelector(k8sUtils.gpuSelector(bo.getGpuNum())) + .addToNodeSelector(K8sUtils.gpuSelector(bo.getGpuNum())) .addToContainers(buildContainer(bo, volumeVO, deploymentName)) .addToVolumes(volumeVO.getVolumes().toArray(new Volume[0])) .withRestartPolicy(RestartPolicyEnum.ALWAYS.getRestartPolicy()) diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/NamespaceApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/NamespaceApiImpl.java index 8fe5dc6..96d17c1 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/NamespaceApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/NamespaceApiImpl.java @@ -24,6 +24,7 @@ import io.fabric8.kubernetes.api.model.NamespaceList; import io.fabric8.kubernetes.api.model.ResourceQuota; import io.fabric8.kubernetes.client.KubernetesClient; import io.fabric8.kubernetes.client.KubernetesClientException; +import org.dubhe.biz.base.service.UserContextService; import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.k8s.annotation.K8sValidation; import org.dubhe.k8s.api.NamespaceApi; @@ -60,13 +61,16 @@ public class NamespaceApiImpl implements NamespaceApi { @Autowired private ResourceQuotaApi resourceQuotaApi; - @Value("${k8s.namespace-limits.cpu}") + @Autowired + private UserContextService userContextService; + + @Value("${user.config.cpu-limit}") private Integer cpuLimit; - @Value("${k8s.namespace-limits.memory}") + @Value("${user.config.memory-limit}") private Integer memoryLimit; - @Value("${k8s.namespace-limits.gpu}") + @Value("${user.config.gpu-limit}") private Integer gpuLimit; @@ -110,7 +114,8 @@ public class NamespaceApiImpl implements NamespaceApi { if (StringUtils.isEmpty(namespace)) { return new BizNamespace().baseErrorBadRequest(); } - return BizConvertUtils.toBizNamespace(client.namespaces().withName(namespace).get()); + Namespace namespaceEntity = client.namespaces().withName(namespace).get(); + return BizConvertUtils.toBizNamespace(namespaceEntity); } /** diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/PersistentVolumeClaimApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/PersistentVolumeClaimApiImpl.java index f1046e4..38bdf13 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/PersistentVolumeClaimApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/PersistentVolumeClaimApiImpl.java @@ -27,6 +27,7 @@ import org.dubhe.biz.base.utils.StringUtils; import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; import org.dubhe.k8s.api.PersistentVolumeClaimApi; +import org.dubhe.k8s.constant.K8sLabelConstants; import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.PtBaseResult; import org.dubhe.k8s.domain.bo.PtPersistentVolumeClaimBO; @@ -137,7 +138,7 @@ public class PersistentVolumeClaimApiImpl implements PersistentVolumeClaimApi { //创建pv PersistentVolume pv = new PersistentVolumeBuilder() .withNewMetadata().addToLabels(pvLabels).withName(bo.getPvcName() + PV_SUFFIX).endMetadata() - .withNewSpec().addToCapacity(STORAGE, new Quantity(bo.getRequest())).addNewAccessMode(AccessModeEnum.READ_WRITE_ONCE.getType()).withNewPersistentVolumeReclaimPolicy(PvReclaimPolicyEnum.RECYCLE.getPolicy()) + .withNewSpec().addToCapacity(STORAGE, new Quantity(bo.getRequest())).addNewAccessMode(AccessModeEnum.READ_WRITE_ONCE.getType()).withNewPersistentVolumeReclaimPolicy(StringUtils.isNotEmpty(bo.getReclaimPolicy())?PvReclaimPolicyEnum.RECYCLE.getPolicy():bo.getReclaimPolicy()) .withNewHostPath().withNewPath(bo.getPath()).withType(K8sParamConstants.HOST_PATH_TYPE).endHostPath() .endSpec() .build(); @@ -356,6 +357,17 @@ public class PersistentVolumeClaimApiImpl implements PersistentVolumeClaimApi { return client.persistentVolumes().withName(pvName).delete(); } + /** + * 删除PV + * + * @param resourceName 资源名称 + * @return boolean true成功 false失败 + */ + @Override + public boolean deletePvByResourceName(String resourceName) { + return client.persistentVolumes().withLabel(K8sLabelConstants.BASE_TAG_SOURCE,resourceName).delete(); + } + /** * 查询PV * diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ResourceQuotaApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ResourceQuotaApiImpl.java index 4d6289c..5136e81 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ResourceQuotaApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/ResourceQuotaApiImpl.java @@ -90,7 +90,7 @@ public class ResourceQuotaApiImpl implements ResourceQuotaApi { resourceQuota = new ResourceQuotaBuilder().withNewMetadata().withName(bo.getName()).endMetadata() .withNewSpec().withHard(hard).endSpec().build(); } - BizResourceQuota bizResourceQuota = BizConvertUtils.toBizResourceQuota(client.resourceQuotas().inNamespace(bo.getNamespace()).create(resourceQuota)); + BizResourceQuota bizResourceQuota = BizConvertUtils.toBizResourceQuota(client.resourceQuotas().inNamespace(bo.getNamespace()).createOrReplace(resourceQuota)); LogUtil.info(LogEnum.BIZ_K8S,"Output {}", bizResourceQuota); return bizResourceQuota; } catch (KubernetesClientException e) { @@ -217,7 +217,7 @@ public class ResourceQuotaApiImpl implements ResourceQuotaApi { } BizQuantity memRemainder = remainder.get(K8sParamConstants.RESOURCE_QUOTA_MEMORY_LIMITS_KEY); - if (memRemainder != null && memRemainder != null){ + if (memRemainder != null && memNum != null){ if (UnitConvertUtils.memFormatToMi(memRemainder.getAmount(),memRemainder.getFormat()) < memNum){ return LimitsOfResourcesEnum.LIMITS_OF_MEM; } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TerminalApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TerminalApiImpl.java new file mode 100644 index 0000000..01c5c8d --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TerminalApiImpl.java @@ -0,0 +1,205 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.k8s.api.impl; + +import cn.hutool.core.collection.CollectionUtil; +import cn.hutool.core.util.RandomUtil; +import cn.hutool.core.util.StrUtil; +import io.fabric8.kubernetes.api.model.Quantity; +import io.fabric8.kubernetes.api.model.Service; +import io.fabric8.kubernetes.api.model.ServiceList; +import io.fabric8.kubernetes.api.model.apps.Deployment; +import io.fabric8.kubernetes.api.model.apps.DeploymentList; +import io.fabric8.kubernetes.client.KubernetesClient; +import io.fabric8.kubernetes.client.KubernetesClientException; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.constant.SymbolConstant; +import org.dubhe.biz.file.api.FileStoreApi; +import org.dubhe.biz.log.enums.LogEnum; +import org.dubhe.biz.log.utils.LogUtil; +import org.dubhe.k8s.api.NodeApi; +import org.dubhe.k8s.api.PersistentVolumeClaimApi; +import org.dubhe.k8s.api.PodApi; +import org.dubhe.k8s.api.ResourceIisolationApi; +import org.dubhe.k8s.api.ResourceQuotaApi; +import org.dubhe.k8s.api.TerminalApi; +import org.dubhe.k8s.api.VolumeApi; +import org.dubhe.k8s.constant.K8sParamConstants; +import org.dubhe.k8s.domain.PtBaseResult; +import org.dubhe.k8s.domain.bo.BuildFsVolumeBO; +import org.dubhe.k8s.domain.bo.BuildServiceBO; +import org.dubhe.k8s.domain.bo.TerminalBO; +import org.dubhe.k8s.domain.vo.PtJupyterDeployVO; +import org.dubhe.k8s.domain.vo.TerminalResourceVO; +import org.dubhe.k8s.domain.vo.VolumeVO; +import org.dubhe.k8s.enums.K8sKindEnum; +import org.dubhe.k8s.enums.K8sResponseEnum; +import org.dubhe.k8s.enums.LackOfResourcesEnum; +import org.dubhe.k8s.enums.LimitsOfResourcesEnum; +import org.dubhe.k8s.enums.ServiceTypeENum; +import org.dubhe.k8s.utils.BizConvertUtils; +import org.dubhe.k8s.utils.K8sUtils; +import org.dubhe.k8s.utils.LabelUtils; +import org.dubhe.k8s.utils.ResourceBuildUtils; +import org.dubhe.k8s.utils.YamlUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.util.CollectionUtils; + +import javax.annotation.Resource; +import java.util.Map; + +/** + * @description 专业版终端接口实现 + * @date 2021-06-29 + */ +public class TerminalApiImpl implements TerminalApi { + private K8sUtils k8sUtils; + private KubernetesClient client; + @Resource(name = "hostFileStoreApiImpl") + private FileStoreApi fileStoreApi; + @Autowired + private VolumeApi volumeApi; + @Autowired + private PersistentVolumeClaimApi persistentVolumeClaimApi; + @Autowired + private NodeApi nodeApi; + @Autowired + private PodApi podApi; + @Autowired + private ResourceQuotaApi resourceQuotaApi; + @Autowired + private ResourceIisolationApi resourceIisolationApi; + + public TerminalApiImpl(K8sUtils k8sUtils) { + this.k8sUtils = k8sUtils; + this.client = k8sUtils.getClient(); + } + + /** + * 创建 + * + * @param bo + * @return BizDeployment + */ + @Override + public TerminalResourceVO create(TerminalBO bo) { + try { + LogUtil.info(LogEnum.BIZ_K8S, "Params of creating TerminalApiImpl--create:{}", bo); + //资源配额校验 + LimitsOfResourcesEnum limitsOfResources = resourceQuotaApi.reachLimitsOfResources(bo.getNamespace(), bo.getCpuNum(), bo.getMemNum(), bo.getGpuNum()); + if (!LimitsOfResourcesEnum.ADEQUATE.equals(limitsOfResources)) { + return new TerminalResourceVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), limitsOfResources.getMessage()); + } + LackOfResourcesEnum lack = nodeApi.isAllocatable(bo.getCpuNum(), bo.getMemNum(), bo.getGpuNum()); + if (!LackOfResourcesEnum.ADEQUATE.equals(lack)) { + return new TerminalResourceVO().error(K8sResponseEnum.LACK_OF_RESOURCES.getCode(), lack.getMessage()); + } + if (!fileStoreApi.createDirs(bo.getDirList().toArray(new String[MagicNumConstant.ZERO]))) { + return new TerminalResourceVO().error(K8sResponseEnum.INTERNAL_SERVER_ERROR.getCode(), K8sResponseEnum.INTERNAL_SERVER_ERROR.getMessage()); + } + + //存储卷构建 + VolumeVO volumeVO = volumeApi.buildFsVolumes(new BuildFsVolumeBO(bo.getNamespace(), bo.getResourceName(), bo.getFsMounts())); + if (!K8sResponseEnum.SUCCESS.getCode().equals(volumeVO.getCode())) { + return new TerminalResourceVO().error(volumeVO.getCode(), volumeVO.getMessage()); + } + + //共享存储 + Integer ShmMemAmount = bo.getMemNum() == null?MagicNumConstant.BINARY_TEN_EXP:bo.getMemNum()/MagicNumConstant.TWO; + volumeVO.addShmFsVolume(new Quantity(String.valueOf(ShmMemAmount),K8sParamConstants.MEM_UNIT)); + + //名称生成 + String deploymentName = StrUtil.format(K8sParamConstants.RESOURCE_NAME_TEMPLATE, bo.getResourceName(), RandomUtil.randomString(MagicNumConstant.EIGHT)); + String svcName = StrUtil.format(K8sParamConstants.SUB_RESOURCE_NAME_TEMPLATE, bo.getResourceName(), K8sParamConstants.SVC_SUFFIX, RandomUtil.randomString(MagicNumConstant.FIVE)); + + //标签生成 + Map baseLabels = LabelUtils.getBaseLabels(bo.getResourceName(), bo.getBusinessLabel()); + Map podLabels = LabelUtils.getChildLabels(bo.getResourceName(), deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), bo.getBusinessLabel(), bo.getTaskIdentifyLabel()); + + //部署deployment + Deployment deployment = ResourceBuildUtils.buildDeployment(bo, volumeVO, deploymentName); + LogUtil.info(LogEnum.BIZ_K8S, "Ready to deploy {}, yaml信息为{}", deploymentName, YamlUtils.dumpAsYaml(deployment)); + resourceIisolationApi.addIisolationInfo(deployment); + Deployment deploymentResult = client.apps().deployments().inNamespace(bo.getNamespace()).create(deployment); + + //部署service + BuildServiceBO buildServiceBO = new BuildServiceBO(bo.getNamespace(), svcName, baseLabels, podLabels, ServiceTypeENum.NODE_PORT.getType()); + if (!CollectionUtils.isEmpty(bo.getPorts())){ + bo.getPorts().forEach(port -> { + buildServiceBO.addPort(ResourceBuildUtils.buildServicePort(port, port, SymbolConstant.PORT+SymbolConstant.HYPHEN+port)); + }); + } + Service service = ResourceBuildUtils.buildService(buildServiceBO); + LogUtil.info(LogEnum.BIZ_K8S, "Ready to deploy {}, yaml信息为{}", svcName, YamlUtils.dumpAsYaml(service)); + Service serviceResult = client.services().create(service); + return new TerminalResourceVO(BizConvertUtils.toBizDeployment(deploymentResult),BizConvertUtils.toBizService(serviceResult)); + }catch (KubernetesClientException e) { + LogUtil.error(LogEnum.BIZ_K8S, "TerminalApiImpl.create error, param:{} error:", bo, e); + return new TerminalResourceVO().error(String.valueOf(e.getCode()), e.getMessage()); + } + } + + /** + * 删除 + * @param namespace 命名空间 + * @param resourceName 资源名称 + * @return PtBaseResult 基础结果类 + */ + @Override + public PtBaseResult delete(String namespace, String resourceName) { + try { + LogUtil.info(LogEnum.BIZ_K8S, "delete Terminal namespace:{} resourceName:{}",namespace,resourceName); + DeploymentList deploymentList = client.apps().deployments().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).list(); + if (deploymentList == null || deploymentList.getItems().size() == 0){ + return new PtBaseResult(); + } + persistentVolumeClaimApi.delete(namespace,resourceName); + persistentVolumeClaimApi.deletePvByResourceName(resourceName); + Boolean res = client.services().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).delete() + && client.apps().deployments().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).delete(); + if (res) { + return new PtBaseResult(); + } else { + return K8sResponseEnum.REPEAT.toPtBaseResult(); + } + } catch (KubernetesClientException e) { + LogUtil.error(LogEnum.BIZ_K8S, "delete error:", e); + return new PtBaseResult(String.valueOf(e.getCode()), e.getMessage()); + } + } + + /** + * 查询 + * @param namespace 命名空间 + * @param resourceName 资源名称 + * @return + */ + @Override + public TerminalResourceVO get(String namespace, String resourceName) { + try { + ServiceList svcList = client.services().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).list(); + Service svc = CollectionUtil.isEmpty(svcList.getItems()) ? null : svcList.getItems().get(0); + DeploymentList deploymentList = client.apps().deployments().inNamespace(namespace).withLabels(LabelUtils.withEnvResourceName(resourceName)).list(); + Deployment deployment = CollectionUtil.isEmpty(deploymentList.getItems()) ? null : deploymentList.getItems().get(0); + return new TerminalResourceVO(BizConvertUtils.toBizDeployment(deployment), BizConvertUtils.toBizService(svc)); + } catch (KubernetesClientException e) { + LogUtil.error(LogEnum.BIZ_K8S, "get error:", e); + return new TerminalResourceVO().error(String.valueOf(e.getCode()), e.getMessage()); + } + } +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TrainJobApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TrainJobApiImpl.java index 009f5cd..8e61e87 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TrainJobApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/TrainJobApiImpl.java @@ -151,7 +151,7 @@ public class TrainJobApiImpl implements TrainJobApi { /** * 根据命名空间和资源名删除Job * - * @param namespace 命名空间 + * @param namespace 命名空间 * @param resourceName 资源名称 * @return Boolean true成功 false失败 */ @@ -224,6 +224,7 @@ public class TrainJobApiImpl implements TrainJobApi { private List volumeMounts; private List volumes; private String businessLabel; + private String taskIdentifyLabel; private Integer delayCreate; private Integer delayDelete; private TaskYamlBO taskYamlBO; @@ -249,6 +250,7 @@ public class TrainJobApiImpl implements TrainJobApi { this.fsMounts = bo.getFsMounts(); businessLabel = bo.getBusinessLabel(); + this.taskIdentifyLabel = bo.getTaskIdentifyLabel(); this.baseLabels = LabelUtils.getBaseLabels(baseName,bo.getBusinessLabel()); this.volumeMounts = new ArrayList<>(); @@ -345,8 +347,8 @@ public class TrainJobApiImpl implements TrainJobApi { * 挂载存储 * * @param mountPath 挂载路径 - * @param dirBO 挂载路径参数 - * @param num 名称序号 + * @param dirBO 挂载路径参数 + * @param num 名称序号 * @return boolean true成功 false失败 */ private boolean buildFsVolumes(String mountPath,PtMountDirBO dirBO,int num){ @@ -369,8 +371,8 @@ public class TrainJobApiImpl implements TrainJobApi { * 按照存储资源声明挂载存储 * * @param mountPath 挂载路径 - * @param dirBO 挂载路径参数 - * @param i 名称序号 + * @param dirBO 挂载路径参数 + * @param i 名称序号 * @return boolean true成功 false失败 */ private boolean buildFsPvcVolumes(String mountPath,PtMountDirBO dirBO,int i){ @@ -456,7 +458,7 @@ public class TrainJobApiImpl implements TrainJobApi { .withNewTemplate() .withNewMetadata() .withName(jobName) - .addToLabels(LabelUtils.getChildLabels(baseName, jobName, K8sKindEnum.JOB.getKind(),businessLabel)) + .addToLabels(LabelUtils.getChildLabels(baseName, jobName, K8sKindEnum.JOB.getKind(),businessLabel, taskIdentifyLabel)) .withNamespace(namespace) .endMetadata() .withNewSpec() diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/VolumeApiImpl.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/VolumeApiImpl.java index 003bbbb..546eb6b 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/VolumeApiImpl.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/api/impl/VolumeApiImpl.java @@ -61,7 +61,7 @@ public class VolumeApiImpl implements VolumeApi { for (Map.Entry mount : bo.getFsMounts().entrySet()) { boolean availableMount = (mount != null && StringUtils.isNotEmpty(mount.getKey()) && mount.getValue() != null && StringUtils.isNotEmpty(mount.getValue().getDir())); if (availableMount){ - boolean success = mount.getValue().isRecycle()?buildFsPvcVolumes(bo,volumeVO,mount.getKey(),mount.getValue(),i):buildFsVolumes(volumeVO,mount.getKey(),mount.getValue(),i); + boolean success = (mount.getValue().isRecycle() || (StringUtils.isNotEmpty(mount.getValue().getLimit()) || StringUtils.isNotEmpty(mount.getValue().getRequest())))?buildFsPvcVolumes(bo,volumeVO,mount.getKey(),mount.getValue(),i):buildFsVolumes(volumeVO,mount.getKey(),mount.getValue(),i); if (!success){ break; } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/cache/ResourceCache.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/cache/ResourceCache.java index 577b59a..7963b29 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/cache/ResourceCache.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/cache/ResourceCache.java @@ -19,6 +19,8 @@ package org.dubhe.k8s.cache; import cn.hutool.core.collection.CollectionUtil; import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.constant.NumberConstant; +import org.dubhe.biz.base.constant.StringConstant; import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.redis.utils.RedisUtils; import org.dubhe.k8s.api.PodApi; @@ -33,6 +35,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.util.CollectionUtils; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -188,6 +191,17 @@ public class ResourceCache { } } + /** + * 查询该 podName 缓存是否存在 + * + * @param podName Pod的名称 + * @return boolean true 存在 false 不存在 + */ + public boolean isPodNameCached(String podName){ + String resourceName = (String) redisUtils.get(podNamePrefix +podName); + return StringUtils.isNotEmpty(resourceName); + } + /** * 删除pod名称缓存 * @@ -235,4 +249,41 @@ public class ResourceCache { return false; } } + + /** + * 添加任务身份标识缓存 + * + * @param taskIdentify 任务身份标识 + * @param taskId 任务 ID + * @param taskName 任务名称 + * @param taskIdPrefix 任务 ID 前缀 + * @return boolean true 添加成功 false添加失败 + */ + public boolean addTaskCache(String taskIdentify, Long taskId, String taskName, String taskIdPrefix){ + return redisUtils.hmset(taskIdentify, new HashMap(){{ + put(StringConstant.CACHE_TASK_ID, taskId); + put(StringConstant.CACHE_TASK_NAME, taskName); + }}, NumberConstant.MONTH_SECOND) && redisUtils.set(taskIdPrefix + String.valueOf(taskId), taskIdentify, NumberConstant.MONTH_SECOND); + } + + /** + * 获取任务身份标识 + * + * @param taskId 任务 ID + * @param taskName 任务名称 + * @param taskIdPrefix 任务 ID 前缀 + * @return String 任务身份标识 + */ + public String getTaskIdentify(Long taskId, String taskName, String taskIdPrefix){ + String taskIdentify = (String) redisUtils.get(taskIdPrefix + String.valueOf(taskId)); + if (taskIdentify == null){ + taskIdentify = StringUtils.getUUID(); + redisUtils.hmset(taskIdentify, new HashMap(){{ + put(StringConstant.CACHE_TASK_ID, taskId); + put(StringConstant.CACHE_TASK_NAME, taskName); + }}, NumberConstant.MONTH_SECOND); + redisUtils.set(taskIdPrefix + taskId, taskIdentify, NumberConstant.MONTH_SECOND); + } + return taskIdentify; + } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/config/K8sConfig.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/config/K8sConfig.java index e01b988..5ca236a 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/config/K8sConfig.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/config/K8sConfig.java @@ -23,8 +23,38 @@ import org.apache.http.HttpHost; import org.apache.http.client.config.RequestConfig; import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; -import org.dubhe.k8s.api.*; -import org.dubhe.k8s.api.impl.*; +import org.dubhe.k8s.api.DistributeTrainApi; +import org.dubhe.k8s.api.DubheDeploymentApi; +import org.dubhe.k8s.api.JupyterResourceApi; +import org.dubhe.k8s.api.LimitRangeApi; +import org.dubhe.k8s.api.LogMonitoringApi; +import org.dubhe.k8s.api.MetricsApi; +import org.dubhe.k8s.api.ModelOptJobApi; +import org.dubhe.k8s.api.ModelServingApi; +import org.dubhe.k8s.api.NamespaceApi; +import org.dubhe.k8s.api.NativeResourceApi; +import org.dubhe.k8s.api.NodeApi; +import org.dubhe.k8s.api.PersistentVolumeClaimApi; +import org.dubhe.k8s.api.PodApi; +import org.dubhe.k8s.api.ResourceQuotaApi; +import org.dubhe.k8s.api.TerminalApi; +import org.dubhe.k8s.api.TrainJobApi; +import org.dubhe.k8s.api.impl.DistributeTrainApiImpl; +import org.dubhe.k8s.api.impl.DubheDeploymentApiImpl; +import org.dubhe.k8s.api.impl.JupyterResourceApiImpl; +import org.dubhe.k8s.api.impl.LimitRangeApiImpl; +import org.dubhe.k8s.api.impl.LogMonitoringApiImpl; +import org.dubhe.k8s.api.impl.MetricsApiImpl; +import org.dubhe.k8s.api.impl.ModelOptJobApiImpl; +import org.dubhe.k8s.api.impl.ModelServingApiImpl; +import org.dubhe.k8s.api.impl.NamespaceApiImpl; +import org.dubhe.k8s.api.impl.NativeResourceApiImpl; +import org.dubhe.k8s.api.impl.NodeApiImpl; +import org.dubhe.k8s.api.impl.PersistentVolumeClaimApiImpl; +import org.dubhe.k8s.api.impl.PodApiImpl; +import org.dubhe.k8s.api.impl.ResourceQuotaApiImpl; +import org.dubhe.k8s.api.impl.TerminalApiImpl; +import org.dubhe.k8s.api.impl.TrainJobApiImpl; import org.dubhe.k8s.cache.ResourceCache; import org.dubhe.k8s.properties.ClusterProperties; import org.dubhe.k8s.utils.K8sUtils; @@ -37,6 +67,7 @@ import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import org.springframework.web.socket.server.standard.ServerEndpointExporter; import java.io.IOException; @@ -166,7 +197,6 @@ public class K8sConfig { public RequestConfig.Builder customizeRequestConfig(RequestConfig.Builder builder) { builder.setSocketTimeout(TEN_THOUSAND); return builder; - } })); } @@ -175,4 +205,16 @@ public class K8sConfig { public ModelServingApi modelServingApi(K8sUtils k8sUtils){ return new ModelServingApiImpl(k8sUtils); } + + @Bean + public ServerEndpointExporter handlerAdapter() { + return new ServerEndpointExporter(); + } + + + @Bean + public TerminalApi terminalApi(K8sUtils k8sUtils){ + return new TerminalApiImpl(k8sUtils); + } + } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/constant/K8sLabelConstants.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/constant/K8sLabelConstants.java index ae59366..40dd43b 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/constant/K8sLabelConstants.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/constant/K8sLabelConstants.java @@ -38,6 +38,10 @@ public class K8sLabelConstants { * 业务标签,用于标识业务,由业务层传入 */ public final static String BASE_TAG_BUSINESS = "platform/business"; + /** + * 任务身份标签,用于标识任务身份,由业务层传入 + */ + public final static String BASE_TAG_TASK_IDENTIFY = "platform/task-identify"; /** * 运行环境标签,用于对不同环境回调进行分流 */ diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/constant/K8sParamConstants.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/constant/K8sParamConstants.java index bab9517..912cd36 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/constant/K8sParamConstants.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/constant/K8sParamConstants.java @@ -94,4 +94,9 @@ public class K8sParamConstants { */ public static final String RESOURCE_QUOTA_GPU_LIMITS_KEY = "requests.nvidia.com/gpu"; + //pod containerID 前缀 + public static final String CONTAINER_ID_PREFIX = "docker://"; + + public static final String WAITING_REASON_CONTAINER_CREATING = "ContainerCreating"; + } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/BuildServiceBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/BuildServiceBO.java index b37e8f2..1af0f24 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/BuildServiceBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/BuildServiceBO.java @@ -37,6 +37,7 @@ public class BuildServiceBO { private Map labels; private Map selector; private List ports; + private String type; public BuildServiceBO(String namespace, String name, Map labels, Map selector){ this.namespace = namespace; @@ -45,6 +46,14 @@ public class BuildServiceBO { this.selector = selector; } + public BuildServiceBO(String namespace, String name, Map labels, Map selector,String type){ + this.namespace = namespace; + this.name = name; + this.labels = labels; + this.selector = selector; + this.type = type; + } + /** * 添加端口 * @param port diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DeploymentBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DeploymentBO.java new file mode 100644 index 0000000..43a66f6 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DeploymentBO.java @@ -0,0 +1,168 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.k8s.domain.bo; + +import cn.hutool.core.collection.CollectionUtil; +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.utils.StringUtils; +import org.dubhe.k8s.annotation.K8sValidation; +import org.dubhe.k8s.enums.ValidationTypeEnum; +import org.springframework.util.CollectionUtils; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * @description deployment BO + * @date 2021-06-30 + */ +@Data +@Accessors(chain = true) +public class DeploymentBO { + /** + * 命名空间 + **/ + @K8sValidation(ValidationTypeEnum.K8S_RESOURCE_NAME) + private String namespace; + /** + * 资源名称 + **/ + @K8sValidation(ValidationTypeEnum.K8S_RESOURCE_NAME) + private String resourceName; + /** + * Number of desired pods + */ + private Integer replicas; + /** + * GPU数量 + **/ + private Integer gpuNum; + /** + * 内存数量 单位Mi + **/ + private Integer memNum; + /** + * CPU数量 + **/ + private Integer cpuNum; + /** + * 镜像名称 + **/ + private String image; + /** + * 执行命令 + **/ + private List cmdLines; + /** + * 文件存储服务挂载 key:pod内挂载路径 value:文件存储路径及配置 + **/ + private Map fsMounts; + /** + * 业务标签,用于标识业务模块 + **/ + @K8sValidation(ValidationTypeEnum.K8S_RESOURCE_NAME) + private String businessLabel; + /** + * 任务身份标签,用于标识任务身份 + **/ + private String taskIdentifyLabel; + /** + * 端口 + */ + private Set ports; + + /** + * 获取nfs路径 + * @return + */ + public List getDirList(){ + if (CollectionUtil.isNotEmpty(fsMounts)){ + return fsMounts.values().stream().map(PtMountDirBO::getDir).collect(Collectors.toList()); + } + return new ArrayList<>(); + } + + /** + * 设置nfs挂载 + * @param mountPath 容器内路径 + * @param dir nfs路径 + * @return + */ + public DeploymentBO putfsMounts(String mountPath, String dir){ + if (StringUtils.isNotEmpty(mountPath) && StringUtils.isNotEmpty(dir)){ + if (fsMounts == null){ + fsMounts = new HashMap<>(MagicNumConstant.TWO); + } + fsMounts.put(mountPath,new PtMountDirBO(dir)); + } + return this; + } + + /** + * 设置nfs挂载 + * @param mountPath 容器内路径 + * @param dir nfs路径及配置 + * @return + */ + public DeploymentBO putfsMounts(String mountPath, PtMountDirBO dir){ + if (StringUtils.isNotEmpty(mountPath) && dir != null){ + if (fsMounts == null){ + fsMounts = new HashMap<>(MagicNumConstant.TWO); + } + fsMounts.put(mountPath,dir); + } + return this; + } + + /** + * 添加端口 + * + * @param port + */ + public void addPort(Integer port){ + if (port == null){ + return; + } + if (ports == null){ + ports = new HashSet<>(); + } + ports.add(port); + } + + /** + * 添加端口 + * + * @param ports + */ + public void addPorts(Set ports){ + if (CollectionUtils.isEmpty(ports)){ + return; + } + if (ports == null){ + ports = new HashSet<>(); + } + ports.addAll(ports); + } +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DistributeTrainBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DistributeTrainBO.java index c8565bd..d8802cc 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DistributeTrainBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/DistributeTrainBO.java @@ -84,6 +84,10 @@ public class DistributeTrainBO { * 业务标签,用于标识业务模块 **/ private String businessLabel; + /** + * 任务身份标签,用于标识任务身份 + */ + private String taskIdentifyLabel; /** * 延时创建时间,单位:分钟 ***/ diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/ModelServingBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/ModelServingBO.java index dbbaa4c..b58e2f4 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/ModelServingBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/ModelServingBO.java @@ -81,6 +81,10 @@ public class ModelServingBO { **/ @K8sValidation(ValidationTypeEnum.K8S_RESOURCE_NAME) private String businessLabel; + /** + * 任务身份标签,用于标识任务身份 + **/ + private String taskIdentifyLabel; /** * http服务端口,null则不开放http服务 */ diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PrometheusMetricBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PrometheusMetricBO.java index 5be6584..1c5cb88 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PrometheusMetricBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PrometheusMetricBO.java @@ -17,15 +17,18 @@ package org.dubhe.k8s.domain.bo; +import cn.hutool.core.util.NumberUtil; import lombok.Data; +import org.dubhe.biz.base.constant.MagicNumConstant; import org.dubhe.biz.base.functional.StringFormat; -import org.dubhe.k8s.domain.vo.GpuUsageVO; -import org.dubhe.k8s.domain.vo.MetricsDataResultVO; +import org.dubhe.k8s.domain.vo.GpuTotalMemResultVO; import org.dubhe.k8s.domain.vo.MetricsDataResultValueVO; import org.springframework.util.CollectionUtils; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; /** * @description Gpu 指标 BO @@ -35,34 +38,70 @@ import java.util.List; public class PrometheusMetricBO { private String status; private MetricData data; - /** * 获取Gpu 使用率 - * @return List gpu使用列表 + * @return Map gpu使用率列表 + */ + public Map getGpuUsage() { + Map gpuUsageMap = new HashMap<>(); + if (data == null || CollectionUtils.isEmpty(data.getResult())) { + return gpuUsageMap; + } + for (MetricResult result : data.getResult()) { + gpuUsageMap.put(result.getMetric().getAcc_id(), Float.valueOf(result.getValue().get(1).toString())); + } + return gpuUsageMap; + } + + /** + * 获取GPU显存使用量 + * @return Map gpu使用量列表 + */ + public Map getGpuMemValue() { + Map gpuMemValueMap = new HashMap<>(); + if (data == null || CollectionUtils.isEmpty(data.getResult())) { + return gpuMemValueMap; + } + StringFormat memMetricsFormat = (value) -> { + return NumberUtil.isNumber(String.valueOf(value)) ? String.valueOf(Long.valueOf(String.valueOf(value)) / MagicNumConstant.BINARY_TEN_EXP) : String.valueOf(MagicNumConstant.ZERO); + }; + for (MetricResult result : data.getResult()) { + gpuMemValueMap.put(result.getMetric().getAcc_id(), memMetricsFormat.format(result.getValue().get(1).toString())); + } + return gpuMemValueMap; + } + + /** + * 获取GPU显存总大小 + * @return List GPU显存总大小列表 */ - public List getGpuUsage(){ - List gpuUsageVOList = new ArrayList<>(); - if (data == null || CollectionUtils.isEmpty(data.getResult())){ - return gpuUsageVOList; + public List getGpuTotalMemValue() { + List gpuTotalMemValueVOList = new ArrayList<>(); + if (data == null || CollectionUtils.isEmpty(data.getResult())) { + return gpuTotalMemValueVOList; } - for (MetricResult result : data.getResult()){ - gpuUsageVOList.add(new GpuUsageVO(result.getMetric().getAcc_id(),Float.valueOf(result.getValue().get(1).toString()))); + StringFormat memMetricsFormat = (value) -> { + return NumberUtil.isNumber(String.valueOf(value)) ? String.valueOf(Long.valueOf(String.valueOf(value)) / MagicNumConstant.BINARY_TEN_EXP) : String.valueOf(MagicNumConstant.ZERO); + }; + for (MetricResult result : data.getResult()) { + gpuTotalMemValueVOList.add(new GpuTotalMemResultVO(result.getMetric().getAcc_id(), memMetricsFormat.format(result.getValue().get(1).toString()))); } - return gpuUsageVOList; + return gpuTotalMemValueVOList; } + /** * 获取value 列表 * @return List 监控指标列表 */ - public List getValues(StringFormat stringFormat){ + public List getValues(StringFormat stringFormat) { List list = new ArrayList<>(); - if (data == null || CollectionUtils.isEmpty(data.getResult())){ + if (data == null || CollectionUtils.isEmpty(data.getResult())) { return list; } - for (MetricResult result : data.getResult()){ - result.getValues().forEach(obj->{ - list.add(new MetricsDataResultValueVO(obj.get(0).toString(),stringFormat.format(obj.get(1).toString()))); + for (MetricResult result : data.getResult()) { + result.getValues().forEach(obj -> { + list.add(new MetricsDataResultValueVO(obj.get(0).toString(), stringFormat.format(obj.get(1).toString()))); }); } return list; @@ -72,31 +111,98 @@ public class PrometheusMetricBO { * 获取value 列表 * @return List 监控指标列表 */ - public List getValues(MetricResult metricResult){ + public List getValues(MetricResult metricResult) { List list = new ArrayList<>(); - if (metricResult == null || CollectionUtils.isEmpty(metricResult.getValues())){ + if (metricResult == null || CollectionUtils.isEmpty(metricResult.getValues())) { return list; } - metricResult.getValues().forEach(obj->{ - list.add(new MetricsDataResultValueVO(obj.get(0).toString(),obj.get(1).toString())); + metricResult.getValues().forEach(obj -> { + list.add(new MetricsDataResultValueVO(obj.get(0).toString(), obj.get(1).toString())); }); return list; } /** - * 获取 result列表 + * 获取 GPU使用率result列表 * @return List 监控指标列表 */ - public List getResults(){ - List list = new ArrayList<>(); - if (data == null || CollectionUtils.isEmpty(data.getResult())){ + public Map> getGpuMetricsResults() { + Map> map = new HashMap<>(); + if (data == null || CollectionUtils.isEmpty(data.getResult())) { + return map; + } + for (MetricResult result : data.getResult()) { + map.put(result.getMetric().getAcc_id(), getValues(result)); + } + return map; + } + + /** + * 获取value 列表 + * @return List 监控指标列表 + */ + public List getFormatValues(MetricResult metricResult, StringFormat stringFormat) { + List list = new ArrayList<>(); + if (metricResult == null || CollectionUtils.isEmpty(metricResult.getValues())) { return list; } - for (MetricResult result : data.getResult()){ - list.add(new MetricsDataResultVO(result.getMetric().getAcc_id(),getValues(result))); + metricResult.getValues().forEach(obj -> { + list.add(new MetricsDataResultValueVO(obj.get(0).toString(), stringFormat.format(obj.get(1).toString()))); + }); + return list; + } + + /** + * 获取 GPU显存使用量result列表 + * @return Map> 监控指标列表 + */ + public Map> getGpuMemResults() { + Map> map = new HashMap<>(); + if (data == null || CollectionUtils.isEmpty(data.getResult())) { + return map; + } + StringFormat memMetricsFormat = (value) -> { + return NumberUtil.isNumber(String.valueOf(value)) ? String.valueOf(Long.valueOf(String.valueOf(value)) / MagicNumConstant.BINARY_TEN_EXP) : String.valueOf(MagicNumConstant.ZERO); + }; + for (MetricResult result : data.getResult()) { + map.put(result.getMetric().getAcc_id(), getFormatValues(result, memMetricsFormat)); + } + return map; + } + + /** + * 获取value 列表 + * @return List 监控指标列表 + */ + public String getGpuTotalValues(MetricResult metricResult, StringFormat stringFormat) { + List strings = new ArrayList<>(); + if (metricResult == null || CollectionUtils.isEmpty(metricResult.getValues())) { + return ""; + } + metricResult.getValues().forEach(obj -> { + strings.add(stringFormat.format(obj.get(1).toString())); + }); + return strings.get(0); + } + + /** + * 获取 GPU显存总量result列表 + * @return List 监控指标列表 + */ + public List getGpuTotalMemResults() { + List list = new ArrayList<>(); + if (data == null || CollectionUtils.isEmpty(data.getResult())) { + return list; + } + StringFormat memMetricsFormat = (value) -> { + return NumberUtil.isNumber(String.valueOf(value)) ? String.valueOf(Long.valueOf(String.valueOf(value)) / MagicNumConstant.BINARY_TEN_EXP) : String.valueOf(MagicNumConstant.ZERO); + }; + for (MetricResult result : data.getResult()) { + list.add(new GpuTotalMemResultVO(result.getMetric().getAcc_id(), getGpuTotalValues(result, memMetricsFormat))); } return list; } + } @Data diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtDeploymentBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtDeploymentBO.java index 6805534..e4f0261 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtDeploymentBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtDeploymentBO.java @@ -57,4 +57,8 @@ public class PtDeploymentBO { * 业务标签,用于标识业务模块 **/ private String businessLabel; + /** + * 任务身份标签,用于标识任务身份 + **/ + private String taskIdentifyLabel; } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJobBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJobBO.java index e48dd7e..2ea9590 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJobBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJobBO.java @@ -57,4 +57,8 @@ public class PtJobBO { * 业务标签,用于标识业务模块 **/ private String businessLabel; + /** + * 任务身份标签,用于标识任务身份 + **/ + private String taskIdentifyLabel; } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterJobBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterJobBO.java index e22c499..55e45af 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterJobBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterJobBO.java @@ -65,10 +65,16 @@ public class PtJupyterJobBO { private GraphicsCardTypeEnum graphicsCardType; /**业务标签,用于标识业务模块**/ private String businessLabel; + /**任务身份标签,用于标识任务身份**/ + private String taskIdentifyLabel; /**延时创建时间,单位:分钟**/ private Integer delayCreateTime; /**定时删除时间,相对于实际创建时间,单位:分钟**/ private Integer delayDeleteTime; + /**pip包路径**/ + private String pipSitePackagePath; + /**pip包挂载路径**/ + private String pipSitePackageMountPath; public List getDirList(){ diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterResourceBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterResourceBO.java index 3779d1f..f070686 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterResourceBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtJupyterResourceBO.java @@ -97,8 +97,20 @@ public class PtJupyterResourceBO { * 业务标签,用于标识业务模块 **/ private String businessLabel; + /** + * 任务身份标签,用于标识任务唯一身份 + **/ + private String taskIdentifyLabel; /** * 定时删除时间,单位:分钟 **/ private Integer delayDeleteTime; + /** + * pip包路径 + */ + private String pipSitePackageDir; + /** + * k8s内pip包路径 + */ + private String pipSitePackageMountPath; } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtMountDirBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtMountDirBO.java index 091fa8e..1f73d32 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtMountDirBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtMountDirBO.java @@ -33,9 +33,9 @@ public class PtMountDirBO { private String dir; /**是否只读 ture:是 false:否**/ private boolean readOnly; - /**是否回收 true:创建pv、pvc进行挂载,删除时同时删除数据 false:直接挂载**/ + /**是否回收 true:创建pv、pvc进行挂载,删除时同时删除数据 false且request和limit均为空:直接挂载**/ private boolean recycle; - /**存储配额 示例:500Mi 仅在pvc=true时生效**/ + /**存储配额 示例:500Mi* 仅在pvc=true时生效*/ private String request; /**存储限额 示例:500Mi 仅在pvc=true时生效**/ private String limit; @@ -48,4 +48,22 @@ public class PtMountDirBO { this.dir = dir; this.request = request; } + + public PtMountDirBO(String dir, boolean readOnly){ + this.dir = dir; + this.readOnly = readOnly; + } + + public PtMountDirBO(String dir, String request,boolean readOnly){ + this.dir = dir; + this.request = request; + this.readOnly = readOnly; + } + + public PtMountDirBO(String dir, String request, String limit,boolean readOnly){ + this.dir = dir; + this.request = request; + this.limit = limit; + this.readOnly = readOnly; + } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtPersistentVolumeClaimBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtPersistentVolumeClaimBO.java index 534bafd..a6dbdb2 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtPersistentVolumeClaimBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtPersistentVolumeClaimBO.java @@ -23,6 +23,7 @@ import org.dubhe.k8s.domain.resource.BizQuantity; import lombok.Data; import lombok.experimental.Accessors; import org.dubhe.k8s.enums.AccessModeEnum; +import org.dubhe.k8s.enums.PvReclaimPolicyEnum; import java.util.HashMap; import java.util.HashSet; @@ -79,6 +80,11 @@ public class PtPersistentVolumeClaimBO { **/ private String path; + /** + * 回收策略 + */ + private String reclaimPolicy; + public PtPersistentVolumeClaimBO() { } @@ -107,5 +113,10 @@ public class PtPersistentVolumeClaimBO { add(AccessModeEnum.READ_WRITE_ONCE.getType()); }}; this.setPvcName(resourceName+"-"+RandomUtil.randomString(MagicNumConstant.FIVE)); + if (bo.isRecycle()){ + this.reclaimPolicy = PvReclaimPolicyEnum.RECYCLE.getPolicy(); + }else { + this.reclaimPolicy = PvReclaimPolicyEnum.RETAIN.getPolicy(); + } } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtResourceQuotaBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtResourceQuotaBO.java index e423f75..9358b41 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtResourceQuotaBO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/PtResourceQuotaBO.java @@ -57,7 +57,7 @@ public class PtResourceQuotaBO { } /** - * 添加cpu 限制 + * 添加memory限制 * @param amount 值 * @param format 单位 */ diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/TerminalBO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/TerminalBO.java new file mode 100644 index 0000000..b9782f4 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/bo/TerminalBO.java @@ -0,0 +1,14 @@ +package org.dubhe.k8s.domain.bo; + +import lombok.Data; +import lombok.experimental.Accessors; + +/** + * @description 专业版终端 BO + * @date 2021-06-30 + */ +@Data +@Accessors(chain = true) +public class TerminalBO extends DeploymentBO{ + +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizContainerStatus.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizContainerStatus.java index 8beaa63..afbbd73 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizContainerStatus.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizContainerStatus.java @@ -38,4 +38,7 @@ public class BizContainerStatus { */ @K8sField("state:waiting") private BizContainerStateWaiting waiting; + + @K8sField("containerID") + private String containerID; } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizIntOrString.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizIntOrString.java index 2403190..91c1fee 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizIntOrString.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizIntOrString.java @@ -34,4 +34,12 @@ public class BizIntOrString { private Integer Kind; @K8sField("StrVal") private String StrVal; + + public boolean equals(Integer value){ + return IntVal != null && IntVal.equals(value); + } + + public boolean equals(String value){ + return StrVal != null && StrVal.equals(value); + } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizPod.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizPod.java index 80bcbdd..f43fd41 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizPod.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizPod.java @@ -18,12 +18,18 @@ package org.dubhe.k8s.domain.resource; import cn.hutool.core.util.StrUtil; +import com.alibaba.fastjson.JSON; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.utils.StringUtils; import org.dubhe.k8s.annotation.K8sField; +import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.PtBaseResult; import com.google.common.collect.Maps; import lombok.Data; import lombok.experimental.Accessors; import org.dubhe.k8s.constant.K8sLabelConstants; +import org.dubhe.k8s.enums.PodPhaseEnum; +import org.springframework.util.CollectionUtils; import java.util.List; import java.util.Map; @@ -59,6 +65,8 @@ public class BizPod extends PtBaseResult { private String podIp; @K8sField("spec:volumes") private List volumes; + @K8sField("status:hostIP") + private String hostIP; /** * Pending:待处理 @@ -90,10 +98,20 @@ public class BizPod extends PtBaseResult { */ private String completedTime; + /** + * 获取业务标签 + */ public String getBusinessLabel() { return labels.get(K8sLabelConstants.BASE_TAG_BUSINESS); } + /** + * 获取任务身份标识 + */ + public String getTaskIdentifyLabel() { + return labels.get(K8sLabelConstants.BASE_TAG_TASK_IDENTIFY); + } + /** * 根据键获取label * @@ -114,15 +132,40 @@ public class BizPod extends PtBaseResult { if (containerStatuses == null) { return null; } - containerStatuses.stream().map(obj -> { + containerStatuses.forEach(obj -> { if (obj.getTerminated() != null) { messages.append(StrUtil.format(CONTAINER_STATE_MESSAGE, name, phase, obj.getTerminated().getReason(), obj.getTerminated().getMessage())); } if (obj.getWaiting() != null) { messages.append(StrUtil.format(CONTAINER_STATE_MESSAGE, name, phase, obj.getWaiting().getReason(), obj.getWaiting().getMessage())); } - return null; }); return messages.toString(); } + + //获取 容器镜像id + public String getContainerId(){ + String containerID = null; + if (!CollectionUtils.isEmpty(containerStatuses)){ + for (BizContainerStatus bizContainerStatus : containerStatuses){ + if (StringUtils.isNotEmpty(bizContainerStatus.getContainerID())){ + containerID = bizContainerStatus.getContainerID(); + } + } + } + if (StringUtils.isNotEmpty(containerID)){ + return containerID.replace(K8sParamConstants.CONTAINER_ID_PREFIX,""); + } + return containerID; + } + + public String getRealPodPhase(){ + if (PodPhaseEnum.RUNNING.getPhase().equals(phase) && !CollectionUtils.isEmpty(containerStatuses) && containerStatuses.get(MagicNumConstant.ZERO).getWaiting() != null){ + String waitingReason = containerStatuses.get(MagicNumConstant.ZERO).getWaiting().getReason(); + if(waitingReason != null && !K8sParamConstants.WAITING_REASON_CONTAINER_CREATING.equals(waitingReason)){ + return PodPhaseEnum.FAILED.getPhase(); + } + } + return phase; + } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizQuantity.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizQuantity.java index 5447d37..bb0f374 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizQuantity.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizQuantity.java @@ -20,7 +20,10 @@ package org.dubhe.k8s.domain.resource; import lombok.Data; import lombok.experimental.Accessors; import org.dubhe.biz.base.utils.MathUtils; +import org.dubhe.biz.base.utils.StringUtils; import org.dubhe.k8s.annotation.K8sField; +import org.dubhe.k8s.constant.K8sParamConstants; +import org.dubhe.k8s.utils.UnitConvertUtils; /** * @description BizQuantity实体类 @@ -43,19 +46,28 @@ public class BizQuantity { this.format = format; } - public boolean isIllegal() { - return true; - } - /** - * 单位相同时相减 + * 不同单位相减 + * * @param bizQuantity 减数 - * @return + * @param limitsKey 类型 + * @return BizQuantity */ - public BizQuantity reduce(BizQuantity bizQuantity){ - if (bizQuantity == null || !bizQuantity.getFormat().equals(format)){ + public BizQuantity reduce(BizQuantity bizQuantity,String limitsKey){ + if (bizQuantity == null || StringUtils.isAllEmpty(limitsKey)){ return this; } - return new BizQuantity(MathUtils.reduce(amount,bizQuantity.getAmount()),format); + switch (limitsKey){ + case K8sParamConstants.RESOURCE_QUOTA_CPU_LIMITS_KEY : + Long cpuDiff = UnitConvertUtils.cpuFormatToN(amount,format) - UnitConvertUtils.cpuFormatToN(bizQuantity.getAmount(),bizQuantity.getFormat()); + return new BizQuantity(String.valueOf(cpuDiff),K8sParamConstants.CPU_UNIT_N); + case K8sParamConstants.RESOURCE_QUOTA_MEMORY_LIMITS_KEY : + Long memDiff = UnitConvertUtils.memFormatToMi(amount,format) - UnitConvertUtils.memFormatToMi(bizQuantity.getAmount(),bizQuantity.getFormat()); + return new BizQuantity(String.valueOf(memDiff),K8sParamConstants.MEM_UNIT); + case K8sParamConstants.RESOURCE_QUOTA_GPU_LIMITS_KEY : + return new BizQuantity(MathUtils.reduce(amount,bizQuantity.getAmount()),format); + default: + return this; + } } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizResourceQuota.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizResourceQuota.java index 369fc60..df6f3fc 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizResourceQuota.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizResourceQuota.java @@ -68,7 +68,7 @@ public class BizResourceQuota extends PtBaseResult { if (!CollectionUtils.isEmpty(hard)){ for (Map.Entry entry : hard.entrySet()) { if (used.get(entry.getKey()) != null){ - remainder.put(entry.getKey(),entry.getValue().reduce(used.get(entry.getKey()))); + remainder.put(entry.getKey(),entry.getValue().reduce(used.get(entry.getKey()),entry.getKey())); } } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizService.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizService.java index c3a9c31..9b55ab7 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizService.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizService.java @@ -20,6 +20,9 @@ package org.dubhe.k8s.domain.resource; import lombok.Data; import lombok.experimental.Accessors; import org.dubhe.k8s.annotation.K8sField; +import org.springframework.util.CollectionUtils; + +import java.util.List; /** * @description Kubernetes Service @@ -30,8 +33,25 @@ import org.dubhe.k8s.annotation.K8sField; public class BizService { @K8sField("metadata:name") private String name; + @K8sField("metadata:namespace") private String namespace; + @K8sField("metadata:uid") private String uid; + + @K8sField("spec:ports") + private List ports; + + public BizServicePort getServicePortByTargetPort(Integer targetPort){ + if (CollectionUtils.isEmpty(ports) || targetPort == null){ + return null; + } + for (BizServicePort port : ports) { + if (port.getTargetPort() != null && port.getTargetPort().equals(targetPort)){ + return port; + } + } + return null; + } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizServicePort.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizServicePort.java new file mode 100644 index 0000000..eccdccb --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/resource/BizServicePort.java @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.k8s.domain.resource; + +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.k8s.annotation.K8sField; + +/** + * @description Kubernetes ServicePort + * @date 2020-09-09 + */ +@Data +@Accessors(chain = true) +public class BizServicePort { + + @K8sField("name") + private String name; + + @K8sField("nodePort") + private Integer nodePort; + + @K8sField("port") + private Integer port; + + @K8sField("protocol") + private String protocol; + + @K8sField("targetPort") + private BizIntOrString targetPort; +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuMetricsDataResultVO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuMetricsDataResultVO.java new file mode 100644 index 0000000..83d3e69 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuMetricsDataResultVO.java @@ -0,0 +1,54 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.domain.vo; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.Accessors; + +import java.util.List; + +/** + * @description GPU监控数据 + * @date 2021-07-22 + */ +@Data +@NoArgsConstructor +@AllArgsConstructor +@Accessors(chain = true) +public class GpuMetricsDataResultVO { + /** + * 显卡编号 + */ + private String accId; + + /** + * GPU显存总大小 + */ + private String totalMemValues; + + /** + * GPU使用率监控指标值 + */ + List gpuMetricsValues; + + /** + * GPU显存使用量监控指标值 + */ + List gpuMemValues; +} \ No newline at end of file diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuTotalMemResultVO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuTotalMemResultVO.java new file mode 100644 index 0000000..9b6c8a5 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuTotalMemResultVO.java @@ -0,0 +1,44 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.domain.vo; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.Accessors; + +/** + * @description GPU显存总量result + * @date 2021-07-22 + */ +@Data +@NoArgsConstructor +@AllArgsConstructor +@Accessors(chain = true) +public class GpuTotalMemResultVO { + + /** + * 显卡编号 + */ + private String accId; + + /** + * GPU显存总大小 + */ + private String gpuTotalMemValue; + +} \ No newline at end of file diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuValueVO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuValueVO.java new file mode 100644 index 0000000..e722a9f --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/GpuValueVO.java @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.domain.vo; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.Accessors; + +/** + * @description GPU实时监控数据 + * @date 2021-07-23 + */ +@Data +@NoArgsConstructor +@AllArgsConstructor +@Accessors(chain = true) +public class GpuValueVO { + /** + * 显卡id + */ + private String accId; + + /** + * 使用率 百分比 + */ + Float usage; + + /** + * GPU显存总大小 + */ + private String gpuTotalMemValue; + + /** + * GPU显存使用量 + */ + String gpuMemValue; +} \ No newline at end of file diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/PodRangeMetricsVO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/PodRangeMetricsVO.java index 359fae7..d2aa2f4 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/PodRangeMetricsVO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/PodRangeMetricsVO.java @@ -40,9 +40,9 @@ public class PodRangeMetricsVO { */ List cpuMetrics; /** - * gpu 监控指标 value为使用百分比 + * gpu 监控指标 */ - List gpuMetrics; + List gpuMetrics; /** * 内存 监控指标 value为占用内存 单位 Ki */ diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/PtPodsVO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/PtPodsVO.java index 5c746f3..cf75f57 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/PtPodsVO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/PtPodsVO.java @@ -24,10 +24,8 @@ import org.dubhe.biz.base.constant.MagicNumConstant; import org.dubhe.biz.base.utils.MathUtils; import org.dubhe.biz.base.utils.StringUtils; import org.dubhe.k8s.utils.UnitConvertUtils; -import org.springframework.util.CollectionUtils; import java.io.Serializable; -import java.util.ArrayList; import java.util.List; /** @@ -98,9 +96,9 @@ public class PtPodsVO implements Serializable { **/ private String gpuUsed; /** - * gpu使用百分比 + * gpu实时监控数据 */ - private List gpuUsagePersent; + private List gpuUsagePersent; public PtPodsVO(String namespace,String podName,String cpuRequestAmount,String cpuUsageAmount,String cpuRequestFormat,String cpuUsageFormat,String memoryRequestAmount,String memoryUsageAmount,String memoryRequestFormat,String memoryUsageFormat,String nodeName,String status,String gpuUsed){ this.namespace = namespace; @@ -142,10 +140,4 @@ public class PtPodsVO implements Serializable { } } - public void addGpuUsage(String accId,Float usage){ - if (CollectionUtils.isEmpty(gpuUsagePersent)){ - gpuUsagePersent = new ArrayList<>(); - } - gpuUsagePersent.add(new GpuUsageVO(accId,usage)); - } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/TerminalResourceVO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/TerminalResourceVO.java new file mode 100644 index 0000000..971133b4 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/TerminalResourceVO.java @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.k8s.domain.vo; + +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.Accessors; +import org.dubhe.k8s.domain.PtBaseResult; +import org.dubhe.k8s.domain.resource.BizDeployment; +import org.dubhe.k8s.domain.resource.BizService; + +/** + * @description 专业版终端 VO + * @date 2021-06-30 + */ +@Data +@NoArgsConstructor +@Accessors(chain = true) +public class TerminalResourceVO extends PtBaseResult { + private BizDeployment bizDeployment; + private BizService bizService; + + public TerminalResourceVO(BizDeployment bizDeployment, BizService bizService){ + this.bizDeployment = bizDeployment; + this.bizService = bizService; + } +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/VolumeVO.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/VolumeVO.java index 2aad091..6510d1e 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/VolumeVO.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/domain/vo/VolumeVO.java @@ -17,10 +17,15 @@ package org.dubhe.k8s.domain.vo; +import io.fabric8.kubernetes.api.model.EmptyDirVolumeSource; +import io.fabric8.kubernetes.api.model.Quantity; import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeBuilder; import io.fabric8.kubernetes.api.model.VolumeMount; +import io.fabric8.kubernetes.api.model.VolumeMountBuilder; import lombok.Data; import lombok.experimental.Accessors; +import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.PtBaseResult; import java.util.ArrayList; @@ -57,4 +62,19 @@ public class VolumeVO extends PtBaseResult { } volumes.add(volume); } + + /** + * 添加shm + */ + public void addShmFsVolume(Quantity shmMemory){ + addVolumeMount(new VolumeMountBuilder() + .withName(K8sParamConstants.SHM_NAME) + .withMountPath(K8sParamConstants.SHM_MOUNTPATH) + .build()); + + addVolume(new VolumeBuilder() + .withName(K8sParamConstants.SHM_NAME) + .withEmptyDir(new EmptyDirVolumeSource(K8sParamConstants.SHM_MEDIUM, shmMemory)) + .build()); + } } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/BusinessLabelServiceNameEnum.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/BusinessLabelServiceNameEnum.java index 66c24d0..275add2 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/BusinessLabelServiceNameEnum.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/BusinessLabelServiceNameEnum.java @@ -48,6 +48,10 @@ public enum BusinessLabelServiceNameEnum { * 批量服务 */ BATCH_SERVING(BizEnum.BATCH_SERVING.getBizCode(), ApplicationNameConst.SERVER_SERVING), + /** + * 专业版终端 + */ + TERMINAL(BizEnum.TERMINAL.getBizCode(), ApplicationNameConst.TERMINAL), ; /** * 业务标签 diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/ServiceTypeENum.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/ServiceTypeENum.java new file mode 100644 index 0000000..e4ed1ed --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/ServiceTypeENum.java @@ -0,0 +1,40 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.k8s.enums; + +import lombok.Getter; + +/** + * @description service 类型 + * @date 2021-07-26 + */ +@Getter +public enum ServiceTypeENum { + CLUSTER_IP("ClusterIP"), + NODE_PORT("NodePort"), + LOAD_BALANCER("LoadBalancer"), + ; + + private String type; + + ServiceTypeENum(String type) { + this.type = type; + } + + +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/WebsocketTopicEnum.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/WebsocketTopicEnum.java new file mode 100644 index 0000000..86e4163 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/enums/WebsocketTopicEnum.java @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.k8s.enums; + + +/** + * @description Websocket tocpic 枚举 + * @date 2021-7-20 + */ +public enum WebsocketTopicEnum { + + /** + * 资源监控 topic + */ + RESOURCE_MONITOR("resourceMonitor"), + ; + + private String topic; + + WebsocketTopicEnum(String topic) { + this.topic = topic; + } + + public String getTopic() { + return topic; + } + +} diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/K8sUtils.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/K8sUtils.java index 557d643..73bab81 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/K8sUtils.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/K8sUtils.java @@ -150,7 +150,7 @@ public class K8sUtils implements ApplicationContextAware { * @param gpuNum * @return */ - public Map gpuSelector(Integer gpuNum) { + public static Map gpuSelector(Integer gpuNum) { Map gpuSelector = new HashMap<>(2); if (gpuNum != null && gpuNum > 0) { gpuSelector.put(K8sLabelConstants.NODE_GPU_LABEL_KEY, K8sLabelConstants.NODE_GPU_LABEL_VALUE); diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/LabelUtils.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/LabelUtils.java index 7ba42a8..73ea105 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/LabelUtils.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/LabelUtils.java @@ -94,11 +94,14 @@ public class LabelUtils { * @param labels 可变参数标签Map * @return */ - public static Map getChildLabels(String resourceName, String pName, String pKind, String business, Map... labels) { + public static Map getChildLabels(String resourceName, String pName, String pKind, String business, String taskIdentify, Map... labels) { Map labelMap = getChildLabels(resourceName, pName, pKind, labels); if (null != business) { labelMap.put(K8sLabelConstants.BASE_TAG_BUSINESS, business); } + if (null != taskIdentify){ + labelMap.put(K8sLabelConstants.BASE_TAG_TASK_IDENTIFY, taskIdentify); + } return labelMap; } diff --git a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/ResourceBuildUtils.java b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/ResourceBuildUtils.java index b74b630..03e0d9d 100644 --- a/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/ResourceBuildUtils.java +++ b/dubhe-server/common-k8s/src/main/java/org/dubhe/k8s/utils/ResourceBuildUtils.java @@ -17,6 +17,14 @@ package org.dubhe.k8s.utils; +import cn.hutool.core.collection.CollectionUtil; +import com.google.common.collect.Maps; +import io.fabric8.kubernetes.api.model.Container; +import io.fabric8.kubernetes.api.model.ContainerBuilder; +import io.fabric8.kubernetes.api.model.ContainerPort; +import io.fabric8.kubernetes.api.model.ContainerPortBuilder; +import io.fabric8.kubernetes.api.model.LabelSelector; +import io.fabric8.kubernetes.api.model.Quantity; import io.fabric8.kubernetes.api.model.Secret; import io.fabric8.kubernetes.api.model.SecretBuilder; import io.fabric8.kubernetes.api.model.Service; @@ -24,6 +32,9 @@ import io.fabric8.kubernetes.api.model.ServiceBuilder; import io.fabric8.kubernetes.api.model.ServicePort; import io.fabric8.kubernetes.api.model.ServicePortBuilder; import io.fabric8.kubernetes.api.model.Toleration; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.apps.Deployment; +import io.fabric8.kubernetes.api.model.apps.DeploymentBuilder; import io.fabric8.kubernetes.api.model.extensions.Ingress; import io.fabric8.kubernetes.api.model.extensions.IngressBuilder; import io.fabric8.kubernetes.api.model.extensions.IngressRule; @@ -34,10 +45,23 @@ import org.dubhe.biz.base.constant.SymbolConstant; import org.dubhe.k8s.constant.K8sParamConstants; import org.dubhe.k8s.domain.bo.BuildIngressBO; import org.dubhe.k8s.domain.bo.BuildServiceBO; +import org.dubhe.k8s.domain.bo.DeploymentBO; +import org.dubhe.k8s.domain.bo.ModelServingBO; +import org.dubhe.k8s.domain.vo.VolumeVO; +import org.dubhe.k8s.enums.ImagePullPolicyEnum; +import org.dubhe.k8s.enums.K8sKindEnum; import org.dubhe.k8s.enums.K8sTolerationEffectEnum; import org.dubhe.k8s.enums.K8sTolerationOperatorEnum; +import org.dubhe.k8s.enums.RestartPolicyEnum; +import org.dubhe.k8s.enums.ShellCommandEnum; +import org.springframework.util.CollectionUtils; +import org.springframework.util.StringUtils; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import java.util.Map; +import java.util.Optional; /** * @description 构建 Kubernetes 资源对象 @@ -51,7 +75,7 @@ public class ResourceBuildUtils { * @return */ public static Service buildService(BuildServiceBO bo) { - return new ServiceBuilder() + Service service = new ServiceBuilder() .withNewMetadata() .withName(bo.getName()) .addToLabels(bo.getLabels()) @@ -62,6 +86,10 @@ public class ResourceBuildUtils { .withSelector(bo.getSelector()) .endSpec() .build(); + if (!StringUtils.isEmpty(bo.getType())){ + service.getSpec().setType(bo.getType()); + } + return service; } /** @@ -202,4 +230,74 @@ public class ResourceBuildUtils { public static Toleration buildNoScheduleEqualToleration(String key,String value){ return new Toleration(K8sTolerationEffectEnum.NOSCHEDULE.getEffect(),key, K8sTolerationOperatorEnum.EQUAL.getOperator(),null,value); } + + /** + * 构建Deployment + * + * @return Deployment + */ + public static Deployment buildDeployment(DeploymentBO bo, VolumeVO volumeVO, String deploymentName) { + Map childLabels = LabelUtils.getChildLabels(bo.getResourceName(), deploymentName, K8sKindEnum.DEPLOYMENT.getKind(), bo.getBusinessLabel(),bo.getTaskIdentifyLabel()); + LabelSelector labelSelector = new LabelSelector(); + labelSelector.setMatchLabels(childLabels); + return new DeploymentBuilder() + .withNewMetadata() + .withName(deploymentName) + .addToLabels(LabelUtils.getBaseLabels(bo.getResourceName(), bo.getBusinessLabel())) + .withNamespace(bo.getNamespace()) + .endMetadata() + .withNewSpec() + .withReplicas(bo.getReplicas()) + .withSelector(labelSelector) + .withNewTemplate() + .withNewMetadata() + .withName(deploymentName) + .addToLabels(childLabels) + .withNamespace(bo.getNamespace()) + .endMetadata() + .withNewSpec() + .addToNodeSelector(K8sUtils.gpuSelector(bo.getGpuNum())) + .addToContainers(buildContainer(bo, volumeVO, deploymentName)) + .addToVolumes(volumeVO.getVolumes().toArray(new Volume[0])) + .withRestartPolicy(RestartPolicyEnum.ALWAYS.getRestartPolicy()) + .endSpec() + .endTemplate() + .endSpec() + .build(); + } + + /** + * 构建 Container + * @param bo + * @param volumeVO + * @param name + * @return + */ + public static Container buildContainer(DeploymentBO bo, VolumeVO volumeVO, String name) { + Map resourcesLimitsMap = Maps.newHashMap(); + Optional.ofNullable(bo.getCpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_CPU_KEY, new Quantity(v.toString(), K8sParamConstants.CPU_UNIT))); + Optional.ofNullable(bo.getGpuNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.GPU_RESOURCE_KEY, new Quantity(v.toString()))); + Optional.ofNullable(bo.getMemNum()).ifPresent(v -> resourcesLimitsMap.put(K8sParamConstants.QUANTITY_MEMORY_KEY, new Quantity(v.toString(), K8sParamConstants.MEM_UNIT))); + Container container = new ContainerBuilder() + .withNewName(name) + .withNewImage(bo.getImage()) + .withNewImagePullPolicy(ImagePullPolicyEnum.IFNOTPRESENT.getPolicy()) + .withVolumeMounts(volumeVO.getVolumeMounts()) + .withNewResources().addToLimits(resourcesLimitsMap).endResources() + .build(); + if (bo.getCmdLines() != null) { + container.setCommand(Arrays.asList(ShellCommandEnum.BIN_BANSH.getShell())); + container.setArgs(bo.getCmdLines()); + } + List ports = new ArrayList<>(); + if (!CollectionUtils.isEmpty(bo.getPorts())){ + bo.getPorts().forEach(port ->{ + ports.add(new ContainerPortBuilder() + .withContainerPort(port) + .withName(SymbolConstant.PORT+SymbolConstant.HYPHEN+port).build()); + }); + container.setPorts(ports); + } + return container; + } } diff --git a/dubhe-server/common-k8s/src/main/resources/kubeconfig_dev b/dubhe-server/common-k8s/src/main/resources/kubeconfig_dev new file mode 100644 index 0000000..1144d12 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/resources/kubeconfig_dev @@ -0,0 +1,19 @@ +apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUN5RENDQWJDZ0F3SUJBZ0lCQURBTkJna3Foa2lHOXcwQkFRc0ZBREFWTVJNd0VRWURWUVFERXdwcmRXSmwKY201bGRHVnpNQjRYRFRJeE1EY3lOekF5TkRVd00xb1hEVE14TURjeU5UQXlORFV3TTFvd0ZURVRNQkVHQTFVRQpBeE1LYTNWaVpYSnVaWFJsY3pDQ0FTSXdEUVlKS29aSWh2Y05BUUVCQlFBRGdnRVBBRENDQVFvQ2dnRUJBTG0zCkpvYVdBd2M0OUlUdHhKeWI4U21ReWRNNHkrSy8vUTZjMTYxRUxFS3RpZzhQSGVTS2IxQnlUUkN2L0xNRnB6OVMKMU1GSWdmNk5jbmZYejFRQ0JMMm9GU2doK2daVG4xc3Z0dkRlRFNjN0V1dERlZVN2KzlDMWpvaXBzWi85ZmN5dQpJU1hHS0pBS1pobUdsR2UyMWQ5M2ZVOEpGNkhUOUdVb0dtZGNZb3E0akVrUHJiN1luRHpDeE9oajFvZmlDTXQ1CkNROFhrc3dIMW9LUTJWZXVpbG9TalpoMDZKY2tEYjhrQnZBeEJFYXNuRWMreE0ydXY5dnJycVRKemE3WDVVak0KOUVFNjZDcVV6K3d1eisxeW9UT3JiTnJCdXdEelhROWRqQy9nM3NpY2xNNGtNN1VjdGtha1pObkljbjhKMzVwbgpIekRtckFaa2ZEZE1JSXVLMTZzQ0F3RUFBYU1qTUNFd0RnWURWUjBQQVFIL0JBUURBZ0trTUE4R0ExVWRFd0VCCi93UUZNQU1CQWY4d0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dFQkFDMW0wVjN4NU11d2Y0ZzI5d3RPelNGdGZTLzUKYlBnUTFtbHNrUmlyOGw0MWh0Z0V1WStwOWw2Q2VZMExmdVN5SDVBYmMwY2gvVFVDQ3Fha0JkUUU2Smd6OFdsdAprS083bGxHUXNUbnZwb0JvaCtBcEhvemdmcnpHaXpsVGpKL2psall4a1ZGRFZNcU15QUxzQ2FJL0Fmd1dxRFVSCktFYXArdEJVNlA4ZlQ0Z0c1c2FqTEY5dEh3djZ1eVNOcGJ2WENZNHRlQXN3dVFDUUY4bzRHQWEzWW4yNjRVK1MKU3VMNnZWcnRuUkNldzJHSmlxZlEyamZicktMbkVHTUIrQk0vQVdCWnU4MUNxUGkrTCtETW8ycFZCTDZZb1R0RQpHRlZxR0tzY25kd09Ycktxc3NSNUZwcU5iSG9vK1Z5RVlzTGNFN1hLWDFOUFVKdWsvUWU5aXkremkybz0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + server: https://10.5.26.91:6443 + name: kubernetes +contexts: +- context: + cluster: kubernetes + user: kubernetes-admin + name: kubernetes-admin@kubernetes +current-context: kubernetes-admin@kubernetes +kind: Config +preferences: {} +users: +- name: kubernetes-admin + user: + client-certificate-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUM4akNDQWRxZ0F3SUJBZ0lJZEkwTVVNY0hUWlV3RFFZSktvWklodmNOQVFFTEJRQXdGVEVUTUJFR0ExVUUKQXhNS2EzVmlaWEp1WlhSbGN6QWVGdzB5TVRBM01qY3dNalExTUROYUZ3MHlNakEzTWpjd01qUTFNRFZhTURReApGekFWQmdOVkJBb1REbk41YzNSbGJUcHRZWE4wWlhKek1Sa3dGd1lEVlFRREV4QnJkV0psY201bGRHVnpMV0ZrCmJXbHVNSUlCSWpBTkJna3Foa2lHOXcwQkFRRUZBQU9DQVE4QU1JSUJDZ0tDQVFFQWxJK3NmVVE3L1JaRG1WTi8Kd2FCNHZOVzFnNFhXcm40d2tCT0hjU0d2M2VuZmgwVGd6TzFjTVJVUy8yRUZuTTlJTTMwdU9DcTF2N1AzRngxUQpuYjNDWFFRSnBmWGc3THVMVnRjdFNac2tNNXJCWnE2Ky9vM01RdUVIcE5xS3FxUG13MDg2NnV1OTRMeEpuWlVUClc3UGszZGM3aGF5S3ozdjFRVjV1UVFFMHI5YlkyM0JVbTBtYnM0dEh4REFyMlFyNDB1TWYxZWFxY21ib0R4N3UKZU9zdmZORll1azNwaVpVTFQ4WUJoRUZVVmFiUmMvZG16Q0JseGxzL1lVK2V0YjY5YmFZS0ZpanNXeTBzV21DYgpoZWdDZmZ3WXV5OC8weFV1elMyRzJMS213NjdTWGkwaGNaZzZsYUowV2lVQnA5MzcxK0lVbG5PNlp4cUVIaS9ZCldaSWVkUUlEQVFBQm95Y3dKVEFPQmdOVkhROEJBZjhFQkFNQ0JhQXdFd1lEVlIwbEJBd3dDZ1lJS3dZQkJRVUgKQXdJd0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dFQkFCZkQxNXFzR25HVEVGaGNxQ0ZOSWE0YUs5eHNhNCtqZlVwcQpZZ3M5bkRqdXB6U2x5cHNoeGpvU3VwQWIzN293eHBRZWNQelplMWNreHh4SWJMT3hsV096ZnVWUnVRTXE1WjRHCjl2cGJDUXJSNTl4MXhKSDNQZlZSOUFrQmhPZWlzaXZoYi9sME5tbm1VS0ppMld4NFpjaGJvQUZvTXJxQXFSZjIKNnJ4QzRycE5GdW0rZ2UyL2cwMkhTY3dQMExKejlmY1pGNnFuSk5pNW8vbnk5WlY5S3RicHJyK1FIdldHQWxhVQpka3pzK1RCZDg3MVB1QmRsbnBzNFFuTllacU9WbVp1MkxPcGY2b3dOcGdYK3NlL3oxTVg2RDJiVXZSbS9DYy9kCmhrUTRTOHIrRFRjYmJaVU0zK2I1bnZWQlNNOGxELzRPWmxrMThPeThXV0VpODlSYjhMOD0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + client-key-data: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFcEFJQkFBS0NBUUVBbEkrc2ZVUTcvUlpEbVZOL3dhQjR2TlcxZzRYV3JuNHdrQk9IY1NHdjNlbmZoMFRnCnpPMWNNUlVTLzJFRm5NOUlNMzB1T0NxMXY3UDNGeDFRbmIzQ1hRUUpwZlhnN0x1TFZ0Y3RTWnNrTTVyQlpxNisKL28zTVF1RUhwTnFLcXFQbXcwODY2dXU5NEx4Sm5aVVRXN1BrM2RjN2hheUt6M3YxUVY1dVFRRTByOWJZMjNCVQptMG1iczR0SHhEQXIyUXI0MHVNZjFlYXFjbWJvRHg3dWVPc3ZmTkZZdWszcGlaVUxUOFlCaEVGVVZhYlJjL2RtCnpDQmx4bHMvWVUrZXRiNjliYVlLRmlqc1d5MHNXbUNiaGVnQ2Zmd1l1eTgvMHhVdXpTMkcyTEttdzY3U1hpMGgKY1pnNmxhSjBXaVVCcDkzNzErSVVsbk82WnhxRUhpL1lXWkllZFFJREFRQUJBb0lCQUZIZDFzejFGQ1hFNWgyUgpRUDdmd29rMUw2QzUyWE14QU92MGh4ZlYyNGRyd1ExVzdKYWhXc09GZklHVTRyWG1HRUNmaEIvWmtXeUI3UEVrCjYyN3h5akRkNzc4di92SVY2NVJBWWFtRlArN2NJaVhHdUdPb21sNWtpWVRVRmNReExCS0hEODJ6ZVdCbUFoOC8Kcm5wRGFTVEMzZjhzOVJXZmxxaWVOQjNJWDFOaU9CQkRFVTlncDRXTUlJMlFmZjhYTXh5M2o2dDRRaVI0YXhaMgpWcGcwdS8xUnNRYmJDcHBud1B2bHB4K1ZNUmVMME5idlFrUDFCWWRzdnZNYTJ3UWxqZXIyTGdLZWJFUTJ4RnR6CmI4NlI3eWdqMEhkTkFrRVQ3bFlIQ3YyMzhPRjFuNzQ0YVdLejNPTlliTVl3czYvSDI2SmI5Tng4UWpMcmh0QncKeWwxMVdvVUNnWUVBeEFGekNZdGptcnB2a29VS3lWMzByY1NzTXgycGhrbVFodlBiMEg0QlNTdGxocURWUzAyeApNWURRTHE3dnlGMi9VbFNoeGNPejFoRkVZQnpMNXFDeHJoWTR4bzM1aitqUUt2M1NpNEJrT2RQbS85S0tVR2U4CnhDWm0vKzF5akQ5aUw5bHJxaHVZVWpUa3VueVJaczlLOGNvbUh3K1l3Mkp1ZTJlU29hbEZPNzhDZ1lFQXdnaVcKaklWY21EWVFnQnRLMEo0R1ZlcG42dHkrdFA0MTRxZjZ1ckg0bEJrNTMzYm5YWFRVUklHcmtJd2U3RmhVVU9Tbgp6QkRDRU90OXkwV3FDNm1PdFNGSXp0bWlzUGpoZzFrckNPMEtSbmhRZ2krVkVUc09zMkVpdmFMeTBpUUZMTEFtCmRWWVB5ZTV6bUJBNFpmK0FHSS9zaFVjYm52NE5wclZhN0hUYndzc0NnWUVBZ00vckNEVEE2dWpuZGtWVkIyMWwKNWVWQXZmOGFjQlJjbFRRaUcrMFRiMERENGl4RXdNNTRTdTQ2NHRNNDVsY2w2emVQY1diaUF5Y3A1bmhHVUhULwpFRWV1WEY3bHlmdlpBc2Y0M0pFdVRRckQzeEJBc2dMYS9aVzNUeTUvZjBnUWdSM0VNZkVjczduckw5dTV1dlUwCmJaN0tXdE5uTTlDMmh3ZFVTRVgwS0FVQ2dZRUFxZk9Sd2hWUnJEUzJzUzYya2hISDVOc0RHSEV2VFNMWW1lV2UKTklhTC8xUTFINmxyTDYyVCswbEV0OEF6bWJvb2FBNVBkdXM2d0UwMENxYkRNYUhiVUlXTjJ6Zy9TeGxEbGFPMQprUVRtbWlEcUM0MlgyTmJNb291elBUMEx5TW9xQ2lGa0hpUTZnaG10QUFVOUVRSUE5Z0tpSFg3U0Q2bmcxU05HCk41U0hwSThDZ1lCKzVWMWRqdnB1YVd3N2FUK3pDQlB5ZExEQWJweW1ZT3cwUm9VdzByQ0REZ2tQb3RSM2UvTEMKbk92aFJ6MmNETWJmSXRxY1JxYmJ4UEVmOGUvNjZyYlNYYm5MN2Y2OXhrdlpDSVFLZVRpMWtLVS90VUE4R1NidQphTitTTnZKY1NuM2tlV3BRb2Nnck55UFRSVHhYRGg3U3pXc1Q0MEFDZ0I2OXdIVUhXUVRxdXc9PQotLS0tLUVORCBSU0EgUFJJVkFURSBLRVktLS0tLQo= diff --git a/dubhe-server/common-k8s/src/main/resources/kubeconfig_pre b/dubhe-server/common-k8s/src/main/resources/kubeconfig_pre new file mode 100644 index 0000000..ce15511 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/resources/kubeconfig_pre @@ -0,0 +1,19 @@ +apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUN5RENDQWJDZ0F3SUJBZ0lCQURBTkJna3Foa2lHOXcwQkFRc0ZBREFWTVJNd0VRWURWUVFERXdwcmRXSmwKY201bGRHVnpNQjRYRFRJeE1EY3hNekEzTlRNek5sb1hEVE14TURjeE1UQTNOVE16Tmxvd0ZURVRNQkVHQTFVRQpBeE1LYTNWaVpYSnVaWFJsY3pDQ0FTSXdEUVlKS29aSWh2Y05BUUVCQlFBRGdnRVBBRENDQVFvQ2dnRUJBTEZlCmp5KzlwTjNybnJpYTlpbDhzOEp0WWZoekVMUU1LbXVmb0Y0VEhjTnFObGtOVDJiL2pTcUUzbGdRYTR2Zm9oL1AKUFVOaWVkZEZvczFoRGNxRnF0QmRpaXJ5QXhSNERPeTJsMWpjSWVpVHJNaHYyd1JZc09ZMG4yanZITFdGdENCaQphY0lEeXFmNmhHM01SV2ZNUEhPVVdDQ0JIUmxLRFlnYkNDZXp5NWxxUEF4QXpmcUpEalUxWkNUVmNNa2Z3R1FDCmFSYVZ4aTFiRHhRSk9Ud2dTTGlJdEJIUGhrdGtOcnY1MU1XZzRSWkVUbWF3azg5Y09oWExpSEg3ZExFZjFvK0oKVE80Q3J1K2pyb3lIRkNvQmo0bGpKZHd4N0ZRS25XZFpEbWZiOFFoZEdyTkRuK21WR3BRNnZSRGN5YzloSVJsMwpyM3U4bkM2TS9mZ29UTHhWaTFFQ0F3RUFBYU1qTUNFd0RnWURWUjBQQVFIL0JBUURBZ0trTUE4R0ExVWRFd0VCCi93UUZNQU1CQWY4d0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dFQkFIbDZMYWpscFZLMk9PbG1JYzRVK3A4bTZoVGEKd2NCTW5MZjVJcExwME9CRmZ6M1hvVi9EVUlmTE1LU2k2bzY3a1Y0aFYzNDZMTnErU244elBUZEZkbDdjZUlvbQp2azc3YXE4c2VERk5xbTJMSytSUldmUWZQbFQrSGhiWFVpS1lOUUhmanlXdjh6QzFGaWpBQTBqaGVjWVBaSnU4ClpUb2JaenhPaS8yOGNwSmNOSFFDSW14OG16UkU2c3JTRjFsbE4yaWF0eTQ1cWhEZWwyZHpmSldaeTFzTXVwc2YKaTc4Z25qdlhFU3BYaGoweGZNbEdMNnA3QkVjdUw1YkgyN3Y3djJjelh2WVJnbk0vamhscGVzbVRsNzFiSE1KQQpnSzlpR2pqMHhoZ1Jnb3h2VHNQNVZLSUszTFZVbGxNNXJhNXYxTjdOOXpmQVZybnprRGRoaXNZQzZCUT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + server: https://10.105.0.29:6443 + name: kubernetes +contexts: +- context: + cluster: kubernetes + user: kubernetes-admin + name: kubernetes-admin@kubernetes +current-context: kubernetes-admin@kubernetes +kind: Config +preferences: {} +users: +- name: kubernetes-admin + user: + client-certificate-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUM4akNDQWRxZ0F3SUJBZ0lJVjB5b2I4RnFSdGt3RFFZSktvWklodmNOQVFFTEJRQXdGVEVUTUJFR0ExVUUKQXhNS2EzVmlaWEp1WlhSbGN6QWVGdzB5TVRBM01UTXdOelV6TXpaYUZ3MHlNakEzTVRNd056VXpNemhhTURReApGekFWQmdOVkJBb1REbk41YzNSbGJUcHRZWE4wWlhKek1Sa3dGd1lEVlFRREV4QnJkV0psY201bGRHVnpMV0ZrCmJXbHVNSUlCSWpBTkJna3Foa2lHOXcwQkFRRUZBQU9DQVE4QU1JSUJDZ0tDQVFFQXVPN0M2OWNlUjVVbmZuUDIKREZNR1M2RzZ2RkYxLzNxSU85ZVFTR0pZRU15dDhHWmo4QWNlbnMrQTNDS05jRHJ4dk5oQlpvazlPa2w4SnpzRQpZOFhKMTFmZVdPS0Zwb0dYVGtseHBuakcvRWVwbWh1d1JodUZhaTlsZ1BXQnUyei9JOG5meTdjSUhscW1EYW1UCkViaEJGWitSbzlPYjB5dTJ1MklhUWNUTVpPREc5eEdTRURIT1VYTkIzcG5oWGhYRmhZUTZyeUpVMkFzYy9TS0UKa1ZTN3RWY2hoVHZOSHVnWC9YQ1hwZ1dpSXJhMUZNbUNBMGMzY003NDJhNFI4QnhmKzBremNBV0ZqQUIwc2xFdQphcURSK3I2SWQyUWRyVjVBQXUra29rU1MyRnhKNzF6R3VtRnRQdlVyT1UzeXhreWFJbHZ1aFcxZEV5cHl4WjVJCkJvSUNWd0lEQVFBQm95Y3dKVEFPQmdOVkhROEJBZjhFQkFNQ0JhQXdFd1lEVlIwbEJBd3dDZ1lJS3dZQkJRVUgKQXdJd0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dFQkFLKzRlV280b0g0K1RFS3doUU9ZSFJvWXhWcEVxWDBXZmoxSQp5US9Qbm9sTXh5ZmQxTkc4VzdyeW1FdWJZbEtIV3JjOTNKdHJBOTA3N01BRHMrWGcvWVZrbFdpVnF2b2Eva2xOCm9aY1pVc3Q2VEdMNVJ6QmE5NzFheCt3Q09xMXRiSng0ZGtwSDdmWVVWeXVaTUg0MXhxL1hIZEwzSnV3RzFSbU8KQnEvZWdmY3d0QzdhZGthRU1SbWxlRCtzelVBZkQxYjFmd29JNnpyV2ZyV0JnUTUwTmhFUzk0NldLOER1QU84cQovalFXRlVtd3M3Nzg0d3ZPR0JjTGpSeWNvWGpxSDZXeW5hSE9BRVdCakJaMUtyYjh3LzNucnNucTArOTdtbG1oCi9IMkpNQ1lIbDRZQi9Bb2pXUnhpVE50b3BsM0k1Sll4Vlkza1J1ZWF2c0pQNElmNmJ1MD0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + client-key-data: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFcEFJQkFBS0NBUUVBdU83QzY5Y2VSNVVuZm5QMkRGTUdTNkc2dkZGMS8zcUlPOWVRU0dKWUVNeXQ4R1pqCjhBY2VucytBM0NLTmNEcnh2TmhCWm9rOU9rbDhKenNFWThYSjExZmVXT0tGcG9HWFRrbHhwbmpHL0VlcG1odXcKUmh1RmFpOWxnUFdCdTJ6L0k4bmZ5N2NJSGxxbURhbVRFYmhCRlorUm85T2IweXUydTJJYVFjVE1aT0RHOXhHUwpFREhPVVhOQjNwbmhYaFhGaFlRNnJ5SlUyQXNjL1NLRWtWUzd0VmNoaFR2Tkh1Z1gvWENYcGdXaUlyYTFGTW1DCkEwYzNjTTc0MmE0UjhCeGYrMGt6Y0FXRmpBQjBzbEV1YXFEUityNklkMlFkclY1QUF1K2tva1NTMkZ4SjcxekcKdW1GdFB2VXJPVTN5eGt5YUlsdnVoVzFkRXlweXhaNUlCb0lDVndJREFRQUJBb0lCQVFDaC9SSmtmdlFaQTcrcQpkbXpwOHJlcS9DbVQxMDhpei9RUlp3c05QSWVqZjRaRTg0dEtyeEhWVGpHem9kaCtuRU12aGNZVHlOY0cvV054CkFiTWdxaG5aTlRDZ2J4dGU5RmpTekdadXlaQ1RYenBpc1NwQTNzNklhcWZneEN3MVBvNW1qT2dwaTFQak1zZ04KWTZKZGZTWVZpTWFMMkVuQU9hUkFrdmdvNy9lUnpBeFowNkhZeEJ5WlM2dzZrRVhpQmJQYThCeUpkd0pIRzRqLwpHSU9yZkhXRWhldkdjcTU3SmF6c3hrTzczZXBYbW5IU21NQWVKcXY0RmF2czBtSUZ4THlRU1NjSmxZZ2xXWlJuCjRmekZVM2tLYmRaak9NdWJpbGY1ODluQUZ4cVpnMzhJblU0bGpzSjRsSFVhU0M1VlZ5UTFvc2pmaGlFeFg1NVMKQmJmSFU3MnhBb0dCQU91T2pHTTlwYi9rQ282ZXhaREZNSlRqbmZMQmtEQlAxNVN6NjByUjY5cjFRQ1hwZk1CWgppT0N1aHZTSnJvRGJQYVVvUXc4QkhvV01BT3d4VU1veXpUZDYwOC8xVWRmY1F2RUtsY0dXSXpGMWJxSTllcFNpCmQ3Q3dSZ2Vxd01rUTF6SHBsYU4rWkY3MEJsZ3RJYkx3Ymd6dWhuSWlZRjVRbi82dWVMQXkzQkhyQW9HQkFNajcKZWoybklVT1JxY2lpeUZwQlVERGUwT01GYWNiYkk2VzA3RUl4cElxRWpLejZPenVmZFBYSFpLVU44QmEwRjlBUgpsSzVBdXd1STErbnFqeTJJOWV3Lzk1QTQ1NGFBYjM0WjRqekUzUm8vS1N0dytQR0xUb0RSeEZneGVhQmYvUkdlClIvZlpTK2h2VHgrYy9pQjRQYU5sOGVpQ2ZFcVFhYS9MbW5WcitncEZBb0dBYnNWN0tWUWROTzdsTkFwZjkrTnoKSkNFaDdyMnRzN3BvTTZxa05Hd2hVTGRTTWtIcGczN1hTbWxvVjJqRG9oNzNqMG91dHNpYzlNcFF5TUdzTDFuUwpmWXVLUGRvc1lhbFg1WWhId21CN0xrLzk5ZGVaWkhvK2ROMkFJU3pnT2Uxc2RURldTQ3N3d2lKWk5YQUx6OTBXCnM0Z2J1MktGRlBVdk9CSU4rVFlCblg4Q2dZRUFyWEdBYTZaSXFaUVNMb2gwV0pkV3llWHY1SXJ1WHVNTW4xdEUKTEZmRkJKa2hBY0lzemVadFBCR29CRnpEM2dQckxPK1BITlhWMVQxeC8zY2h1bzBnbFpJYVpnY0ZudWhGejFBdApFbjVkeE9ITytLTlU0clp5dCs3Ty84RXFra0ZrRndrK1dHRFpCaXpRM3BwUUlOdERiamh6REZGWFM4M1d0eFFCCkp1Wlk4UFVDZ1lBOFlsZnNsQ2VmZktvTVBVK3pKZWF5SUlQNTR0M3NySkFwblNyc1BuNlNBSlB1WjU5ZkYrUTcKNGlvNk0weEw0dE5VckhPQ2ZpODJXSzlRQ29EbDRIYTJGZ0Y3YkNMTTQ1Qld5cVp5THg0b0JRYmFGNE5ybmVuTgpVUVVFbkU2V2ZyN1RFWWVBNngrOG10dXAvcDJzbTd2eWdkYngrRFB1OXYzV1JyZE1DZDBNWUE9PQotLS0tLUVORCBSU0EgUFJJVkFURSBLRVktLS0tLQo= diff --git a/dubhe-server/common-k8s/src/main/resources/kubeconfig_prod b/dubhe-server/common-k8s/src/main/resources/kubeconfig_prod new file mode 100644 index 0000000..ee32ce4 --- /dev/null +++ b/dubhe-server/common-k8s/src/main/resources/kubeconfig_prod @@ -0,0 +1,19 @@ +apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUN5RENDQWJDZ0F3SUJBZ0lCQURBTkJna3Foa2lHOXcwQkFRc0ZBREFWTVJNd0VRWURWUVFERXdwcmRXSmwKY201bGRHVnpNQjRYRFRJeE1EVXdOekE0TURFME9Gb1hEVE14TURVd05UQTRNREUwT0Zvd0ZURVRNQkVHQTFVRQpBeE1LYTNWaVpYSnVaWFJsY3pDQ0FTSXdEUVlKS29aSWh2Y05BUUVCQlFBRGdnRVBBRENDQVFvQ2dnRUJBTXU2CmRJYWVmOWQzYXByK3lKK3BLY0EyaU5NQXlQVWFsbFgyZHBwelprd282T0R2TW9FYmgrRWFaY0Y1aEdMVnhSWDYKbnhtcUQvVFNCWXdENFNzY1E3c0YzcWtxeWxBLzJXaTg1NTJKbGJQcXFSQm5CaEUwV211ZE9EVXZYYVF0N3BnWApzR1JKcDROcFBsd0tLMUVpZmhsdkJIMmRVWHFjZDRENmZKbWRSSWgrNEpOS3ZOL09Hait4WjNKUG5Cc0pKOUlICms4TWFsc3NuTTYvaFpna0tKVlplc2YvcVorN2I3dXpJSVJteEd3L0RBcmtNaGgvL1VCZUUzVEFsd1lWWisxZWkKU293eldEN3EzYzFhU3NJYkdrbXJWaXNQcVVZTk9sUEplcHJLTVFJRUJKVEdPWUYzSzk0eHNKaEtBMkI0Z0VCRgpOdkJxTzVqZFZ4RkM0SFdxM1VFQ0F3RUFBYU1qTUNFd0RnWURWUjBQQVFIL0JBUURBZ0trTUE4R0ExVWRFd0VCCi93UUZNQU1CQWY4d0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dFQkFLV254YlZqTFRWZVVyZDB0Q004SE83ejk2QnQKQTBGS3pmclRSTkhtT0I2WVVSRnVRbWZJdE9GTkY4QnJoYVBCZVNKMFZrNVdNUXZBd1BkdnY2R2l1NU1VNU45TApHV3R2eXhsS0Z5aVkxR25RUy9sWjRjR1JaSE9kMmtMNFY3bVNLQmo3ZFpzcDN0dW42d3BQZWM3dUJ6Z1UvNzdxCjN1b3BGMGVzR21wY3ZFaVhNSlRrZUN1NTNhaTVFVHhSS0Q0V0xxUUFhbDViUlB4b0UwL3Mrams5SGI4b1JuNnQKa0RDaElpQnVjL3RDaHhwTmNFVWt6UUwyRjBHR2hRaVlQTFFQdkgzRml5S2tlWjBiYUNHdFdpODEvZG9lVVJKZQpOM21jU3pvZEM1SUhMc1Zta05HTzh3Y2pocjVZck0yVGxHS01sV1huRVk5QWFMUEl1blI0VmQ4ZU1iWT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + server: https://10.105.0.1:6443 + name: kubernetes +contexts: +- context: + cluster: kubernetes + user: kubernetes-admin + name: kubernetes-admin@kubernetes +current-context: kubernetes-admin@kubernetes +kind: Config +preferences: {} +users: +- name: kubernetes-admin + user: + client-certificate-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUM4akNDQWRxZ0F3SUJBZ0lJSDByc3FWMTBudUF3RFFZSktvWklodmNOQVFFTEJRQXdGVEVUTUJFR0ExVUUKQXhNS2EzVmlaWEp1WlhSbGN6QWVGdzB5TVRBMU1EY3dPREF4TkRoYUZ3MHlNakExTURjd09EQXhOVEJhTURReApGekFWQmdOVkJBb1REbk41YzNSbGJUcHRZWE4wWlhKek1Sa3dGd1lEVlFRREV4QnJkV0psY201bGRHVnpMV0ZrCmJXbHVNSUlCSWpBTkJna3Foa2lHOXcwQkFRRUZBQU9DQVE4QU1JSUJDZ0tDQVFFQXUwVlR6ejIvK1VqbEtVRHkKL211bVYrbTBjejg1UURTUjU1aXN5QmNDTWVzbEJsSEYyVEhWQm9FRHhUMEFHQmF5ai8rdEx6c1J4Vjg4Z28vNQpkZFVZL0llNllNVmVlMVZuQ29CL2VvYStkUDBQZUlhZVl3amU3WTJhdFhpMDBOM3EzZ253Q1AvM3FodUpabDBTClQraTJHMktOQlRvbjYvSGUxRVBva0hlcTZaMU5yYm5aTWROTzBWM1VaTExzMXdhS2ZESDJHRStlang1QzU4VEQKSnVGdjN5QkFPWW5CblI0YTBObWJpTFJ2RmN3d1BFR24wamlHazZGY2oyK0RWRlVCQjYzbVFOV3puWGVlaDQzVwpodm9GQnZKUVg0OFZTeng0U2tHVURtaUU1Sktncms0T3dJNk9vR0Fsa0kvbU4wQnl2b3o4UjFFbENMY29jZzJWCkhvN1Rrd0lEQVFBQm95Y3dKVEFPQmdOVkhROEJBZjhFQkFNQ0JhQXdFd1lEVlIwbEJBd3dDZ1lJS3dZQkJRVUgKQXdJd0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dFQkFNSGpQdnNNWDUyMjNzbTZTQTJzYlBzc3NaalJJSUhCSWloUwp6MlBVR0Q2R2NXZ0RRRXBzRWVRRDYzby9vLzVLcndBbHUveGlnVW9VK2dkRTQ0S29PTmM0Z05tdHdZOVhzcnZXCmZSamc3YXh2MGN5czBuSzBCdVJyYjBQVDZ0ZkVqb25yQXUvQ1NuSG9LZERuaVBweUNQSys5amZONGpJR1VobFkKU0Z4Qnh6N3ZGQlUrRlVZbjBYR3BHV3FnQjd6bVRwRllSVXNKOVc5eXJlaWlsZDVJcDdzSGljZG0za0NiYlJWagpmQ1k0eVV5elVSbFV2Q2xrOE8zQktEQkYxeWZIUGErUW1STStabzhEK3V2T2VHVVFqSW92eHdXNVNPM1RkNkE5Clk2WEoxbEpwRmtUWndrMk13N1N0RjhxK3MwMGFnNFB0RGJhKzNoM2FmWG1DWU9FOG5pUT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + client-key-data: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFb2dJQkFBS0NBUUVBdTBWVHp6Mi8rVWpsS1VEeS9tdW1WK20wY3o4NVFEU1I1NWlzeUJjQ01lc2xCbEhGCjJUSFZCb0VEeFQwQUdCYXlqLyt0THpzUnhWODhnby81ZGRVWS9JZTZZTVZlZTFWbkNvQi9lb2ErZFAwUGVJYWUKWXdqZTdZMmF0WGkwME4zcTNnbndDUC8zcWh1SlpsMFNUK2kyRzJLTkJUb242L0hlMUVQb2tIZXE2WjFOcmJuWgpNZE5PMFYzVVpMTHMxd2FLZkRIMkdFK2VqeDVDNThUREp1RnYzeUJBT1luQm5SNGEwTm1iaUxSdkZjd3dQRUduCjBqaUdrNkZjajIrRFZGVUJCNjNtUU5Xem5YZWVoNDNXaHZvRkJ2SlFYNDhWU3p4NFNrR1VEbWlFNUpLZ3JrNE8Kd0k2T29HQWxrSS9tTjBCeXZvejhSMUVsQ0xjb2NnMlZIbzdUa3dJREFRQUJBb0lCQUVYR0pOM1lZZ2lkY2xTVwprSExlNVJGb1VBV0lqdW92TEJXZ092QXFNblVxNlphYkxSNHBoUGR4WmxnOHpDWXRmc1pNT3RpWUo1emtTUVZVClkxdlYxQU56QnF3N25XSlNoWnZTR0swc094WVhtNFlLa2tUUDcwK1BMUTlrTStxR1pKWHFHZmNnZDhSM2toQUQKcVdrQWlhbFdaTGlIM0l2NmlFMktKOEo3ODhBcWFrM1liemJFR0xvUkxKbzZONjZzSm9CcHRUS3BOOENpcjF0bQozR1MvRllZK2ZzcVFudU8vWkNjOUl1ck5scVJkU0ZNdThRT2pRMFp6TkJqMVVEZFBNa24vWEhWN3BIQWF1NkVrCm5lLzlrSUNWMWxMZ2FuK0lLYW1kVElvZk5aVUhsL3k5QksvZ1U0SFRVMGo1Sit5WkV0bXI2VktmeGloQks5aWcKelZRM1R0RUNnWUVBNnh0b0RRd2dFRmNSdEJ3WXZQZWR5L0FZTjlwc0ZjaDhGRFJJUXlxUlhUWUVURlRjaXFNQwpwMHYyaXY2dHdoWUo1QmZVcHBJUU1kK2c3NjFiZHdqTHpweEJKZHpzakwySE5HMzhaeWxZTXc1LzBIT1FOTHFoCmt2VW9rMVJHQkxhOGFuOFA2eGdvNXptMHY3SW11MGs4RVQ0bjI5aDJJclVVSm1pWkZMM3dBMThDZ1lFQXkrbXEKUUR3NktJc1lDTU44NnBzSXAvQU5zTWErVlJ5cFJRMWYzQStSSjdnNkhYRGxqOVlLQWIxSEdobnMwKy80ZzRrRQpoTUczZmVNM2hoc281aVhCNjRPS3RkQkFodVA1UkF0RDU0eDY2MHhtYnZqY3YyVFJFS0pLMGs5SXBSU1ppa2pGCmVVTG1rbzZnUmU1aVI5NU5NQ0o5NFRBQW80SXNEZjJxQ0pqRU1FMENnWUJFRjZMeUxISFk2YTdKOEYxRjFaMlIKSkUrUFZhWjZSSitUSm5WTFpyZkZQRkRRWHIrbE00TWdPd01EekxFOGhpK0ZMVlc4akk4K01wdWs3eHVQaFMrcAovbDFyL3VsUDlkQ0Q0ZHI1Y2VNR25vdHNMeHd6K1YyMGQyYXlEUFZlaGlKWjRjVVZmT0RUMzBXM1EzeXVQNDZ6Ckc4SmxqUExpS0huV2lmTFVMQktvbHdLQmdBeU45Mmg3RE0yZ09ydVhaYUtBSnhsSDQxL2w3S1FLM3JVY3JMRTgKMkNBTTdLOFJXMkR1dWJEL3VWZjNEcWpCMXBncW9IZVlBYmNqZlRDcGpXd1dHUWxxRU9rK3lDcEY4UHZxZ2FUSQo5bXgwU0w0K1hrRCtjUXpJSVRrdm5uWmpmVXlSVEc4NTJqNWR2NnB1a2VpQTNGbkJWZVMrY3R1ZVVSNFBaeCtlCllEM2xBb0dBRWRId3VVa2ZBYWZJMFdkOXdlYXNDb1daTUlDTm1MK1ZDcXEzMEZyamVFMXg0cElTNjdFODJkU2UKKy9TbDNkWXpucHkxODJVdGJQNzlSYThoNXloOUVCTzVYMXlweGlCeCt3MmdwNWVJWG00aStOUWZxbjNOUm42awo2VW1RY1hCMXV4djNKWENuYWxVUk5tVWdudEFzaEQxaGd0Y1VvSzArS0NXT3VhUnFqSTQ9Ci0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0tCg== diff --git a/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/dao/PtTrainAlgorithmMapper.java b/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/dao/PtTrainAlgorithmMapper.java index ba59c52..63b28dc 100644 --- a/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/dao/PtTrainAlgorithmMapper.java +++ b/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/dao/PtTrainAlgorithmMapper.java @@ -21,8 +21,8 @@ import com.baomidou.mybatisplus.core.mapper.BaseMapper; import org.apache.ibatis.annotations.Param; import org.apache.ibatis.annotations.Select; import org.apache.ibatis.annotations.Update; -import org.dubhe.biz.base.annotation.DataPermission; import org.dubhe.algorithm.domain.entity.PtTrainAlgorithm; +import org.dubhe.biz.base.annotation.DataPermission; import java.util.List; import java.util.Set; @@ -31,7 +31,7 @@ import java.util.Set; * @description 训练算法Mapper * @date 2020-04-27 */ -@DataPermission(ignoresMethod = {"insert"}) +@DataPermission(ignoresMethod = {"insert", "selectPreAlgorithm"}) public interface PtTrainAlgorithmMapper extends BaseMapper { /** @@ -67,4 +67,11 @@ public interface PtTrainAlgorithmMapper extends BaseMapper { */ @Update("update pt_train_algorithm set deleted = #{deleteFlag} where id = #{id}") int updateStatusById(@Param("id") Long id, @Param("deleteFlag") boolean deleteFlag); + + /** + * 查询可推理预置算法 + * @return List 返回可推理预置算法集合 + */ + @Select("select * from pt_train_algorithm where deleted = 0 and inference=1 and algorithm_source=2 order by id desc") + List selectPreAlgorithm(); } diff --git a/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/service/impl/PtTrainAlgorithmServiceImpl.java b/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/service/impl/PtTrainAlgorithmServiceImpl.java index 89a9af1..d324d87 100644 --- a/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/service/impl/PtTrainAlgorithmServiceImpl.java +++ b/dubhe-server/dubhe-algorithm/src/main/java/org/dubhe/algorithm/service/impl/PtTrainAlgorithmServiceImpl.java @@ -620,21 +620,37 @@ public class PtTrainAlgorithmServiceImpl implements PtTrainAlgorithmService { */ @Override public List getInferenceAlgorithm() { + //获取用户信息 + UserContext user = userContext.getCurUser(); QueryWrapper wrapper = new QueryWrapper<>(); wrapper.eq("inference", true).orderByDesc("id"); List ptTrainAlgorithms = ptTrainAlgorithmMapper.selectList(wrapper); - if (CollectionUtils.isEmpty(ptTrainAlgorithms)) { - return null; + List ptTrainAlgorithmQueryResult = new ArrayList<>(); + if (CollectionUtils.isNotEmpty(ptTrainAlgorithms)) { + ptTrainAlgorithmQueryResult = ptTrainAlgorithms.stream().map(x -> { + PtTrainAlgorithmQueryVO ptTrainAlgorithmQueryVO = new PtTrainAlgorithmQueryVO(); + BeanUtils.copyProperties(x, ptTrainAlgorithmQueryVO); + //获取镜像名称与版本 + getImageNameAndImageTag(x, ptTrainAlgorithmQueryVO); + return ptTrainAlgorithmQueryVO; + }).collect(Collectors.toList()); + } + + //非管理员用户查询可推理预置算法 + if (!BaseService.isAdmin(user)) { + List preAlgorithms = ptTrainAlgorithmMapper.selectPreAlgorithm(); + List preAlgorithmQueryResult = preAlgorithms.stream().map(x -> { + PtTrainAlgorithmQueryVO ptTrainAlgorithmQueryVO = new PtTrainAlgorithmQueryVO(); + BeanUtils.copyProperties(x, ptTrainAlgorithmQueryVO); + //获取镜像名称与版本 + getImageNameAndImageTag(x, ptTrainAlgorithmQueryVO); + return ptTrainAlgorithmQueryVO; + }).collect(Collectors.toList()); + if (CollectionUtils.isNotEmpty(preAlgorithmQueryResult)) { + ptTrainAlgorithmQueryResult.addAll(preAlgorithmQueryResult); + } } - List ptTrainAlgorithmQueryResult = ptTrainAlgorithms.stream().map(x -> { - PtTrainAlgorithmQueryVO ptTrainAlgorithmQueryVO = new PtTrainAlgorithmQueryVO(); - BeanUtils.copyProperties(x, ptTrainAlgorithmQueryVO); - //获取镜像名称与版本 - getImageNameAndImageTag(x, ptTrainAlgorithmQueryVO); - return ptTrainAlgorithmQueryVO; - }).collect(Collectors.toList()); return ptTrainAlgorithmQueryResult; } - } diff --git a/dubhe-server/dubhe-algorithm/src/main/resources/bootstrap.yml b/dubhe-server/dubhe-algorithm/src/main/resources/bootstrap.yml index 0824216..94dd2df 100644 --- a/dubhe-server/dubhe-algorithm/src/main/resources/bootstrap.yml +++ b/dubhe-server/dubhe-algorithm/src/main/resources/bootstrap.yml @@ -31,7 +31,7 @@ spring: refresh: true discovery: enabled: true - namespace: dubhe-server-cloud-prod + namespace: dubhe-server-cloud-dev group: dubhe server-addr: 127.0.0.1:8848 diff --git a/dubhe-server/dubhe-data-dcm/src/main/resources/bootstrap.yml b/dubhe-server/dubhe-data-dcm/src/main/resources/bootstrap.yml index 46b2f81..adc8619 100644 --- a/dubhe-server/dubhe-data-dcm/src/main/resources/bootstrap.yml +++ b/dubhe-server/dubhe-data-dcm/src/main/resources/bootstrap.yml @@ -42,7 +42,7 @@ spring: discovery: enabled: true - namespace: dubhe-server-cloud-prod + namespace: dubhe-server-cloud-dev group: dubhe server-addr: 127.0.0.1:8848 # 配置允许后面的Bean覆盖前面名称重复的Bean diff --git a/dubhe-server/dubhe-data-task/src/main/resources/bootstrap.yml b/dubhe-server/dubhe-data-task/src/main/resources/bootstrap.yml index ecf6b93..729037d 100644 --- a/dubhe-server/dubhe-data-task/src/main/resources/bootstrap.yml +++ b/dubhe-server/dubhe-data-task/src/main/resources/bootstrap.yml @@ -36,7 +36,7 @@ spring: refresh: true discovery: enabled: true - namespace: dubhe-server-cloud-prod + namespace: dubhe-server-cloud-dev group: dubhe server-addr: 127.0.0.1:8848 # 配置允许后面的Bean覆盖前面名称重复的Bean diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/dao/DatasetMapper.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/dao/DatasetMapper.java index 529a541..8f1cf78 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/dao/DatasetMapper.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/dao/DatasetMapper.java @@ -31,7 +31,7 @@ import org.dubhe.data.domain.entity.Dataset; * @description 数据集管理 Mapper 接口 * @date 2020-04-10 */ -@DataPermission(ignoresMethod = {"insert", "selectById", "selectCountByPublic"}) +@DataPermission(ignoresMethod = {"insert", "selectById", "selectCountByPublic", "selectList"}) public interface DatasetMapper extends BaseMapper { /** diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/domain/entity/DatasetVersion.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/domain/entity/DatasetVersion.java index 377394b..8f9fb59 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/domain/entity/DatasetVersion.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/domain/entity/DatasetVersion.java @@ -24,6 +24,8 @@ import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.EqualsAndHashCode; +import org.dubhe.biz.base.constant.NumberConstant; +import org.dubhe.biz.base.constant.UserConstant; import org.dubhe.biz.db.entity.BaseEntity; import org.dubhe.data.domain.dto.DatasetVersionCreateDTO; @@ -87,4 +89,15 @@ public class DatasetVersion extends BaseEntity { this.setCreateTime(new Timestamp(System.currentTimeMillis())); } + public DatasetVersion(Long datasetId, String versionName, String versionNote) { + this.datasetId = datasetId; + this.versionName = versionName; + this.setCreateUserId(UserConstant.DEFAULT_CREATE_USER_ID); + this.setCreateTime(new Timestamp(System.currentTimeMillis())); + this.versionUrl = "dataset/"+datasetId +"/versionFile/"+versionName; + this.dataConversion = NumberConstant.NUMBER_2; + this.originUserId = UserConstant.DEFAULT_ORIGIN_USER_ID; + this.versionNote = versionNote; + } + } diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/rest/DatasetController.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/rest/DatasetController.java index bcde8c9..add4e66 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/rest/DatasetController.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/rest/DatasetController.java @@ -159,5 +159,10 @@ public class DatasetController { return new DataResponseBody(datasetService.getConvertInfoByDatasetId(datasetId)); } + @ApiOperation("获取预置数据集列表") + @GetMapping(value = "/getPresetDataset") + public DataResponseBody getPresetDataset() { + return new DataResponseBody(datasetService.getPresetDataset()); + } } diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/DatasetService.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/DatasetService.java index bf3c367..1b7b9ab 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/DatasetService.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/DatasetService.java @@ -300,4 +300,12 @@ public interface DatasetService { * @param versionFiles 原版本列表 */ void backupDatasetDBAndMinioData(Dataset originDataset, Dataset targetDataset, List versionFiles); + + /** + * 获取预置数据集列表 + * + * @return Map 数据集详情 + */ + List getPresetDataset(); + } diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/DatasetVersionService.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/DatasetVersionService.java index 2a5d4f7..adb3a65 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/DatasetVersionService.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/DatasetVersionService.java @@ -190,4 +190,12 @@ public interface DatasetVersionService { * @param fileNameMap 文件列表 */ void insertEsData(String versionSource, String versionTarget, Long datasetId, Long datasetIdTarget, Map fileNameMap); + + /** + * 生成版本数据 + * + * @param datasetVersion 版本详情 + */ + void insertOne(DatasetVersion datasetVersion); + } diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/impl/DatasetServiceImpl.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/impl/DatasetServiceImpl.java index ea028ad..ae0cd09 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/impl/DatasetServiceImpl.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/impl/DatasetServiceImpl.java @@ -624,6 +624,13 @@ public class DatasetServiceImpl extends ServiceImpl impl } } dataset.setUri(fileUtil.getDatasetAbsPath(dataset.getId())); + if (datasetCreateDTO.getDataType().equals(DatatypeEnum.AUTO_IMPORT.getValue())) { + //自定义数据集处理 1.生成版本数据并设计数据集当前版本 2.数据集状态修改为标注完成 + datasetVersionService.insertOne(new DatasetVersion(dataset.getId(), DEFAULT_VERSION, + DatatypeEnum.getEnumValue(datasetCreateDTO.getDataType()).getMsg())); + dataset.setStatus(DataStateCodeConstant.ANNOTATION_COMPLETE_STATE); + dataset.setCurrentVersionName(DEFAULT_VERSION); + } updateById(dataset); return dataset.getId(); } @@ -1643,4 +1650,18 @@ public class DatasetServiceImpl extends ServiceImpl impl ); } } + + /** + * 获取预置数据集列表 + * + * @return Map 数据集详情 + */ + @Override + public List getPresetDataset() { + QueryWrapper queryWrapper = new QueryWrapper<>(); + queryWrapper.eq("type", MagicNumConstant.TWO) + .ne("deleted", MagicNumConstant.ONE); + return baseMapper.selectList(queryWrapper); + } + } diff --git a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/impl/DatasetVersionServiceImpl.java b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/impl/DatasetVersionServiceImpl.java index 9fdc60e..a787d2c 100644 --- a/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/impl/DatasetVersionServiceImpl.java +++ b/dubhe-server/dubhe-data/src/main/java/org/dubhe/data/service/impl/DatasetVersionServiceImpl.java @@ -1019,4 +1019,15 @@ public class DatasetVersionServiceImpl extends ServiceImpl projectTypes; +} \ No newline at end of file diff --git a/dubhe-server/dubhe-image/src/main/java/org/dubhe/image/domain/entity/PtImage.java b/dubhe-server/dubhe-image/src/main/java/org/dubhe/image/domain/entity/PtImage.java index 623ca79..6f03153 100644 --- a/dubhe-server/dubhe-image/src/main/java/org/dubhe/image/domain/entity/PtImage.java +++ b/dubhe-server/dubhe-image/src/main/java/org/dubhe/image/domain/entity/PtImage.java @@ -89,4 +89,12 @@ public class PtImage extends BaseEntity { */ @TableField(value = "origin_user_id", fill = FieldFill.INSERT) private Long originUserId; + + //镜像ssh密码 + @TableField(value = "ssh_pwd") + private String sshPwd; + + //镜像ssh用户 + @TableField(value = "ssh_user") + private String sshUser; } diff --git a/dubhe-server/dubhe-image/src/main/java/org/dubhe/image/rest/PtImageController.java b/dubhe-server/dubhe-image/src/main/java/org/dubhe/image/rest/PtImageController.java index fd45d09..575dfa5 100644 --- a/dubhe-server/dubhe-image/src/main/java/org/dubhe/image/rest/PtImageController.java +++ b/dubhe-server/dubhe-image/src/main/java/org/dubhe/image/rest/PtImageController.java @@ -21,11 +21,7 @@ import io.swagger.annotations.Api; import io.swagger.annotations.ApiOperation; import org.dubhe.biz.base.constant.Permissions; import org.dubhe.biz.base.vo.DataResponseBody; -import org.dubhe.image.domain.dto.PtImageDeleteDTO; -import org.dubhe.image.domain.dto.PtImageQueryDTO; -import org.dubhe.image.domain.dto.PtImageQueryUrlDTO; -import org.dubhe.image.domain.dto.PtImageUpdateDTO; -import org.dubhe.image.domain.dto.PtImageUploadDTO; +import org.dubhe.image.domain.dto.*; import org.dubhe.image.service.PtImageService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.security.access.prepost.PreAuthorize; @@ -60,8 +56,8 @@ public class PtImageController { @ApiOperation("通过projectName查询镜像") @GetMapping - public DataResponseBody getTagsByImageName(@RequestParam Integer projectType, @RequestParam String imageName) { - return new DataResponseBody(ptImageService.searchImages(projectType, imageName)); + public DataResponseBody getTagsByImageName(@Validated PtImageQueryImageDTO ptImageQueryImageDTO) { + return new DataResponseBody(ptImageService.searchImages(ptImageQueryImageDTO)); } @PostMapping("uploadImage") @@ -90,8 +86,8 @@ public class PtImageController { @GetMapping("/imageNameList") @ApiOperation("获取镜像名称列表") - public DataResponseBody getImageNameList(@RequestParam Integer projectType) { - return new DataResponseBody(ptImageService.getImageNameList(projectType)); + public DataResponseBody getImageNameList(@Validated PtImageQueryNameDTO ptImageQueryNameDTO) { + return new DataResponseBody(ptImageService.getImageNameList(ptImageQueryNameDTO)); } @PutMapping("/imageResource") @@ -106,4 +102,10 @@ public class PtImageController { public DataResponseBody getImageUrl(@Validated PtImageQueryUrlDTO ptImageQueryUrlDTO) { return new DataResponseBody(ptImageService.getImageUrl(ptImageQueryUrlDTO)); } + + @GetMapping("/terminalImageList") + @ApiOperation("获取终端镜像列表") + public DataResponseBody getTerminalImageList() { + return new DataResponseBody(ptImageService.getTerminalImageList()); + } } diff --git a/dubhe-server/dubhe-image/src/main/java/org/dubhe/image/service/PtImageService.java b/dubhe-server/dubhe-image/src/main/java/org/dubhe/image/service/PtImageService.java index e3d1b3a..8b47285 100644 --- a/dubhe-server/dubhe-image/src/main/java/org/dubhe/image/service/PtImageService.java +++ b/dubhe-server/dubhe-image/src/main/java/org/dubhe/image/service/PtImageService.java @@ -49,12 +49,12 @@ public interface PtImageService { /** - * 根据镜像获取信息 + * 获取镜像信息 * - * @param imageName 镜像名 + * @param ptImageQueryImageDTO 查询条件 * @return List 镜像集合 */ - List searchImages(Integer projectType, String imageName); + List searchImages(PtImageQueryImageDTO ptImageQueryImageDTO); /** * 删除镜像 @@ -73,10 +73,10 @@ public interface PtImageService { /** * 获取镜像名称列表 - * @param projectType 镜像项目类型 + * @param ptImageQueryNameDTO 获取镜像名称列表查询条件 * @return Set 镜像列表 */ - Set getImageNameList(Integer projectType); + Set getImageNameList(PtImageQueryNameDTO ptImageQueryNameDTO); /** * 修改镜像来源(notebook定制) @@ -100,4 +100,10 @@ public interface PtImageService { */ void recycleRollback(RecycleCreateDTO dto); + /** + * 获取终端镜像列表 + * + * @return List + */ + List getTerminalImageList(); } diff --git a/dubhe-server/dubhe-image/src/main/java/org/dubhe/image/service/impl/PtImageServiceImpl.java b/dubhe-server/dubhe-image/src/main/java/org/dubhe/image/service/impl/PtImageServiceImpl.java index ca36f22..b365950 100644 --- a/dubhe-server/dubhe-image/src/main/java/org/dubhe/image/service/impl/PtImageServiceImpl.java +++ b/dubhe-server/dubhe-image/src/main/java/org/dubhe/image/service/impl/PtImageServiceImpl.java @@ -232,17 +232,19 @@ public class PtImageServiceImpl implements PtImageService { } /** - * 根据镜像获取信息 + * 获取镜像信息 * - * @param imageName 镜像名 + * @param ptImageQueryImageDTO 查询条件 * @return List 通过imageName查询所含镜像版本信息 */ @Override @DataPermissionMethod(dataType = DatasetTypeEnum.PUBLIC) - public List searchImages(Integer projectType, String imageName) { + public List searchImages(PtImageQueryImageDTO ptImageQueryImageDTO) { LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper<>(); - queryWrapper.eq(PtImage::getProjectName, resourcetoName(projectType)) - .eq(PtImage::getImageName, imageName) + if (ptImageQueryImageDTO.getProjectType() != null) { + queryWrapper.eq(PtImage::getProjectName, resourcetoName(ptImageQueryImageDTO.getProjectType())); + } + queryWrapper.eq(PtImage::getImageName, ptImageQueryImageDTO.getImageName()) .eq(PtImage::getImageStatus, ImageStateEnum.SUCCESS.getCode()); List ptImages = ptImageMapper.selectList(queryWrapper); List list = new ArrayList<>(); @@ -335,14 +337,17 @@ public class PtImageServiceImpl implements PtImageService { /** * 获取镜像名称列表 - * @param projectType 镜像项目类型 + * @param ptImageQueryNameDTO 获取镜像名称列表查询条件 * @return Set 镜像列表 */ @Override @DataPermissionMethod(dataType = DatasetTypeEnum.PUBLIC) - public Set getImageNameList(Integer projectType) { + public Set getImageNameList(PtImageQueryNameDTO ptImageQueryNameDTO) { + List projectTypes = new ArrayList<>(); + ptImageQueryNameDTO.getProjectTypes().forEach(x -> + projectTypes.add(ImageTypeEnum.getType(x))); List imageList = ptImageMapper.selectList(new LambdaQueryWrapper() - .eq(PtImage::getProjectName, ImageTypeEnum.getType(projectType)) + .in(PtImage::getProjectName, projectTypes) .eq(PtImage::getImageStatus, ImageStateEnum.SUCCESS.getCode())); Set imageNames = new HashSet<>(); imageList.forEach(image -> { @@ -397,10 +402,6 @@ public class PtImageServiceImpl implements PtImageService { @Override @DataPermissionMethod(dataType = DatasetTypeEnum.PUBLIC) public String getImageUrl(PtImageQueryUrlDTO imageQueryUrlDTO) { - - if (imageQueryUrlDTO.getProjectType().equals(ImageTypeEnum.NOTEBOOK.getType())) { - DataContext.set(CommonPermissionDataDTO.builder().type(true).build()); - } LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper<>(); if (imageQueryUrlDTO.getProjectType() != null && ImageTypeEnum.NOTEBOOK.getType().equals(imageQueryUrlDTO.getProjectType())) { DataContext.set(CommonPermissionDataDTO.builder().type(true).build()); @@ -414,8 +415,10 @@ public class PtImageServiceImpl implements PtImageService { if (StrUtil.isNotEmpty(imageQueryUrlDTO.getImageTag())) { queryWrapper.eq(PtImage::getImageTag, imageQueryUrlDTO.getImageTag()); } - queryWrapper.eq(PtImage::getProjectName, resourcetoName(imageQueryUrlDTO.getProjectType())) - .eq(PtImage::getImageStatus, ImageStateEnum.SUCCESS.getCode()); + if (imageQueryUrlDTO.getProjectType() != null) { + queryWrapper.eq(PtImage::getProjectName, resourcetoName(imageQueryUrlDTO.getProjectType())); + } + queryWrapper.eq(PtImage::getImageStatus, ImageStateEnum.SUCCESS.getCode()); List imageList = ptImageMapper.selectList(queryWrapper); if (CollUtil.isEmpty(imageList)) { @@ -437,6 +440,31 @@ public class PtImageServiceImpl implements PtImageService { ptImageMapper.updateDeletedById(Long.valueOf(imageId), false); } + @Override + @DataPermissionMethod(dataType = DatasetTypeEnum.PUBLIC) + public List getTerminalImageList() { + UserContext user = userContextService.getCurUser(); + LambdaQueryWrapper queryTerminalWrapper = new LambdaQueryWrapper<>(); + queryTerminalWrapper.eq(PtImage::getProjectName, ImageTypeEnum.TERMINAL.getCode()) + .eq(PtImage::getImageStatus, ImageStateEnum.SUCCESS.getCode()); + if (user != null && !BaseService.isAdmin()) { + queryTerminalWrapper.and(wrapper -> wrapper.eq(PtImage::getCreateUserId, user.getId()).or().eq(PtImage::getImageResource, ImageSourceEnum.PRE.getCode())); + } + + List terminalImages = ptImageMapper.selectList(queryTerminalWrapper); + + List list = new ArrayList<>(); + if (CollUtil.isEmpty(terminalImages)) { + return new ArrayList<>(); + } + + terminalImages.stream().forEach(ptImage -> { + ptImage.setImageUrl(ptImage.getImageUrl()); + list.add(ptImage); + }); + return list; + } + /** * @param ptImageUploadDTO 镜像上传逻辑校验 @@ -448,8 +476,7 @@ public class PtImageServiceImpl implements PtImageService { LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper<>(); - queryWrapper.eq(PtImage::getProjectName, resourcetoName(ptImageUploadDTO.getProjectType())) - .eq(PtImage::getImageName, ptImageUploadDTO.getImageName()) + queryWrapper.eq(PtImage::getImageName, ptImageUploadDTO.getImageName()) .eq(PtImage::getImageTag, ptImageUploadDTO.getImageTag()) .eq(PtImage::getImageResource, source); diff --git a/dubhe-server/dubhe-image/src/main/resources/bootstrap.yml b/dubhe-server/dubhe-image/src/main/resources/bootstrap.yml index ed63575..2e63790 100644 --- a/dubhe-server/dubhe-image/src/main/resources/bootstrap.yml +++ b/dubhe-server/dubhe-image/src/main/resources/bootstrap.yml @@ -30,7 +30,7 @@ spring: refresh: true discovery: enabled: true - namespace: dubhe-server-cloud-prod + namespace: dubhe-server-cloud-dev group: dubhe server-addr: 127.0.0.1:8848 diff --git a/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/domain/dto/ResourceQuotaDTO.java b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/domain/dto/ResourceQuotaDTO.java new file mode 100644 index 0000000..839bba3 --- /dev/null +++ b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/domain/dto/ResourceQuotaDTO.java @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.dubhek8s.domain.dto; + +import lombok.Data; + +import javax.validation.constraints.NotNull; + +/** + * @description k8s节点资源隔离DTO + * @date 2021-07-21 + */ +@Data +public class ResourceQuotaDTO { + + @NotNull(message = "用户 ID 不能为空") + private Long userId; + + @NotNull(message = "CPU 资源限制配置不能为空") + private Integer cpuLimit; + + @NotNull(message = "内存资源限制配置不能为空") + private Integer memoryLimit; + + @NotNull(message = "GPU 资源限制配置不能为空") + private Integer gpuLimit; +} diff --git a/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/domain/vo/NamespaceVO.java b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/domain/vo/NamespaceVO.java new file mode 100644 index 0000000..0531660 --- /dev/null +++ b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/domain/vo/NamespaceVO.java @@ -0,0 +1,63 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.dubhek8s.domain.vo; + +import lombok.Data; +import lombok.experimental.Accessors; +import java.util.List; + +/** + * @description + * @date 2021-7-14 + */ +@Data +@Accessors(chain = true) +public class NamespaceVO { + + /** + * CPU 资源总量 单位:核 + */ + private Integer hardCpu; + /** + * 内存资源总量 单位:Gi + */ + private Integer hardMemory; + /** + * GPU 资源总量 单位:块 + */ + private Integer hardGpu; + /** + * CPU 资源已使用量 单位:核 + */ + private Integer usedCpu; + /** + * 内存资源已使用量 单位:Gi + */ + private Integer usedMemory; + /** + * GPU 资源已使用量 单位:块 + */ + private Integer usedGpu; + + /** + * 任务 资源占用信息 + */ + private List tasks; + + + +} diff --git a/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/domain/vo/PodResVO.java b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/domain/vo/PodResVO.java new file mode 100644 index 0000000..fb73c5c --- /dev/null +++ b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/domain/vo/PodResVO.java @@ -0,0 +1,50 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.dubhek8s.domain.vo; + +import lombok.Data; +import lombok.experimental.Accessors; + +/** + * @description Pod 资源占用信息展示 VO + * @date 2021-7-19 + */ +@Data +@Accessors(chain = true) +public class PodResVO { + /** + * pod的名称 + */ + private String podName; + + /** + * pod的内存 + */ + private Integer podMemory; + /** + * pod的cpu + */ + private Integer podCpu; + /** + * pod的显卡 + */ + private Integer podCard; + /*** + * pod的状态 + */ + private String status; +} diff --git a/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/domain/vo/TaskResVO.java b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/domain/vo/TaskResVO.java new file mode 100644 index 0000000..ff28733 --- /dev/null +++ b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/domain/vo/TaskResVO.java @@ -0,0 +1,65 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.dubhek8s.domain.vo; + +import lombok.Data; +import lombok.experimental.Accessors; + +import java.util.List; +import java.util.Objects; + +/** + * @description 任务 资源占用信息展示 VO + * @date 2021-7-29 + */ +@Data +@Accessors(chain = true) +public class TaskResVO { + /** + * 任务 ID + */ + private Long taskId; + /** + * 任务名称 + */ + private String taskName; + + /** + * 业务标签 + */ + private String businessLabel; + + /** + * 该任务所有Pod资源占用信息 + */ + List podResVOS; + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TaskResVO taskResVO = (TaskResVO) o; + return Objects.equals(taskId, taskResVO.taskId) && + Objects.equals(taskName, taskResVO.taskName) && + Objects.equals(businessLabel, taskResVO.businessLabel); + } + + @Override + public int hashCode() { + return Objects.hash(taskId, taskName, businessLabel); + } +} diff --git a/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/event/callback/PodCallback.java b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/event/callback/PodCallback.java index 4ec8769..2595091 100644 --- a/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/event/callback/PodCallback.java +++ b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/event/callback/PodCallback.java @@ -27,6 +27,7 @@ import org.dubhe.biz.base.utils.StringUtils; import org.dubhe.biz.base.vo.DataResponseBody; import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; +import org.dubhe.dubhek8s.handler.WebSocketServer; import org.dubhe.k8s.cache.ResourceCache; import org.dubhe.k8s.constant.K8sLabelConstants; import org.dubhe.k8s.domain.dto.BaseK8sPodCallbackCreateDTO; @@ -35,6 +36,7 @@ import org.dubhe.k8s.enums.PodPhaseEnum; import org.dubhe.k8s.enums.WatcherActionEnum; import org.dubhe.k8s.service.K8sResourceService; import org.dubhe.k8s.utils.K8sCallBackTool; +import org.dubhe.k8s.utils.K8sNameTool; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.HttpEntity; import org.springframework.http.HttpHeaders; @@ -42,7 +44,6 @@ import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Component; import org.springframework.web.client.RestTemplate; - import java.util.Observable; @@ -60,6 +61,10 @@ public class PodCallback extends Observable { private ResourceCache resourceCache; @Autowired private RestTemplate restTemplate; + @Autowired + private WebSocketServer webSocketServer; + @Autowired + private K8sNameTool k8sNameTool; private static final String POD_CONDITION_STATUS_FALSE = "False"; @@ -77,9 +82,14 @@ public class PodCallback extends Observable { if (pod == null){ return; } + // 推送集群资源监控信息 + Long userId = k8sNameTool.getUserIdFromNamespace(pod.getNamespace()); + if (userId != null){ + webSocketServer.sendToClient(userId); + } String businessLabel = pod.getBusinessLabel(); LogUtil.info(LogEnum.BIZ_K8S,"watch pod {} action:{} phase:{}",pod.getName(),watcherActionEnum.getAction(),pod.getPhase()); - dealWithAdded(watcherActionEnum,pod); + cachePod(watcherActionEnum,pod); String waitingReason = dealWithWaiting(watcherActionEnum, pod); setChanged(); notifyObservers(pod); @@ -113,9 +123,13 @@ public class PodCallback extends Observable { * @param watcherActionEnum 监控枚举类 * @param pod Pod对象 */ - private void dealWithAdded(WatcherActionEnum watcherActionEnum, BizPod pod) { + private void cachePod(WatcherActionEnum watcherActionEnum, BizPod pod) { if (WatcherActionEnum.ADDED.getAction().equals(watcherActionEnum.getAction())){ resourceCache.cachePod(pod.getLabel(K8sLabelConstants.BASE_TAG_SOURCE),pod.getName()); + } else if (PodPhaseEnum.RUNNING.getPhase().equals(pod.getPhase())) { + if(!resourceCache.isPodNameCached(pod.getName())) { + resourceCache.cachePod(pod.getLabel(K8sLabelConstants.BASE_TAG_SOURCE),pod.getName()); + } } } @@ -136,7 +150,7 @@ public class PodCallback extends Observable { String waitingReason = pod.getContainerStatuses().get(MagicNumConstant.ZERO).getWaiting().getReason(); waitingMessgae = pod.getContainerStatuses().get(MagicNumConstant.ZERO).getWaiting().getMessage(); if(waitingReason == null || "ContainerCreating".equals(waitingReason)){ - return "Container is being created"; + return "任务已下发到 kubernetes"; } // 将 Phase 置为 FAILED @@ -150,7 +164,7 @@ public class PodCallback extends Observable { waitingMessgae = pod.getConditions().get(MagicNumConstant.ZERO).getMessage(); return waitingMessgae; } - return "Container is being created"; + return "任务已下发到 kubernetes"; } return waitingMessgae; } diff --git a/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/handler/WebSocketServer.java b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/handler/WebSocketServer.java new file mode 100644 index 0000000..77c5b68 --- /dev/null +++ b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/handler/WebSocketServer.java @@ -0,0 +1,244 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.dubhek8s.handler; + +import com.alibaba.fastjson.JSON; +import com.auth0.jwt.JWT; +import com.auth0.jwt.JWTVerifier; +import com.auth0.jwt.algorithms.Algorithm; +import com.auth0.jwt.interfaces.Claim; +import com.auth0.jwt.interfaces.DecodedJWT; +import org.dubhe.biz.base.constant.AuthConst; +import org.dubhe.biz.base.constant.ResponseCode; +import org.dubhe.biz.base.utils.StringUtils; +import org.dubhe.biz.base.vo.WebsocketDataResponseBody; +import org.dubhe.biz.log.enums.LogEnum; +import org.dubhe.biz.log.utils.LogUtil; +import org.dubhe.cloud.authconfig.dto.JwtUserDTO; +import org.dubhe.dubhek8s.service.SystemNamespaceService; +import org.dubhe.k8s.enums.WebsocketTopicEnum; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.security.core.userdetails.UserDetailsService; +import org.springframework.stereotype.Component; +import javax.websocket.OnClose; +import javax.websocket.OnError; +import javax.websocket.OnMessage; +import javax.websocket.OnOpen; +import javax.websocket.Session; +import javax.websocket.server.ServerEndpoint; +import java.io.IOException; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +/** + * @description WebSocket 服务处理类 + * @date 2021-7-19 + */ +@ServerEndpoint("/ws") +@Component +public class WebSocketServer { + + // ConcurrentHashMap 用于保存 session 信息 + private static final ConcurrentMap USER_CLIENT_MAP = new ConcurrentHashMap<>(); + private Session session; + private Long userId; + + // 需要注入的 bean 声明为静态变量,保证每一个用户连接创建的 websocket 对象都能使用 + private static SystemNamespaceService systemNamespaceService; + private static UserDetailsService userDetailsService; + + @Autowired + public void setSystemNamespaceService(SystemNamespaceService systemNamespaceService){ + WebSocketServer.systemNamespaceService = systemNamespaceService; + } + + @Autowired + public void setUserDetailsService(@Qualifier("userDetailsServiceImpl") UserDetailsService userDetailsService){ + WebSocketServer.userDetailsService = userDetailsService; + } + + + /** + * 连接成功调用的方法 + * + * @param session 连接 session + */ + @OnOpen + public void onOpen(Session session) { + this.session = session; + // 验证 session 是否合法 + userId = verify(session); + // 如果为 null,则不合法,断开连接 + if (userId == null){ + close(); + return; + } + // 这里用 用户ID,不用 sessionId 是因为发送消息时需要通过用户Id查询消息内容 + USER_CLIENT_MAP.put(userId, this); + } + + /** + * 收到客户端发送的消息后调用的方法 + * + * @param message 消息 + * @param session session + */ + @OnMessage + public void onMessage(String message, Session session) { + LogUtil.debug(LogEnum.BIZ_K8S,"WebSocketServer onMessage, message:{}, session:{}, sessionId:{}", message, this.toString(), session.getId()); + // 验证 session 是否合法 + Long userId = verify(session); + // 如果不合法,或者 USER_CLIENT_MAP 不包含这个 session,则关闭 + if (userId == null || !USER_CLIENT_MAP.containsKey(userId)){ + this.close(); + return; + } + // 解析客户端发过来的消息 + WebsocketDataResponseBody websocketDataResponseBody = JSON.parseObject(message, WebsocketDataResponseBody.class); + // 校验 Topic + if (WebsocketTopicEnum.RESOURCE_MONITOR.getTopic().equals(websocketDataResponseBody.getTopic())){ + sendMessage(JSON.toJSONString(new WebsocketDataResponseBody(WebsocketTopicEnum.RESOURCE_MONITOR.getTopic(), systemNamespaceService.findNamespace(userId)))); + } else { + sendMessage(JSON.toJSONString(new WebsocketDataResponseBody(ResponseCode.BADREQUEST, null, null))); + } + } + + + /** + * 发生异常时的方法 + * + * @param session 客户端 session + * @param throwable + */ + @OnError + public void onError(Session session, Throwable throwable) { + if (this.session != null && this.session.isOpen()) { + LogUtil.error(LogEnum.BIZ_K8S, "An error occurred on Websocket connection, sessionId:{}, session:{}, error:{}",session.getId(), this, throwable); + this.close(); + } else { + LogUtil.debug(LogEnum.BIZ_K8S,"An error occurred on the closed websocket connection, inputSession:{}, localSession:{}, error:{}", session.getId(), this, throwable); + } + } + + /** + * 连接关闭时调用的方法 + */ + @OnClose + public void onClose() { + LogUtil.debug(LogEnum.BIZ_K8S,"WebSocketServer onClose, session:{}", this); + this.close(); + } + + /** + * 给所有 session 发消息的方法 + */ + public void sendToAll() { + USER_CLIENT_MAP.keySet().parallelStream().forEach(userId -> USER_CLIENT_MAP.get(userId) + .sendMessage(JSON.toJSONString(new WebsocketDataResponseBody(WebsocketTopicEnum.RESOURCE_MONITOR.getTopic(), + systemNamespaceService.findNamespace(userId))))); + } + + public void sendToClient(Long userId) { + if (USER_CLIENT_MAP.get(userId) != null){ + USER_CLIENT_MAP.get(userId).sendMessage(JSON.toJSONString(new WebsocketDataResponseBody(WebsocketTopicEnum.RESOURCE_MONITOR.getTopic(), + systemNamespaceService.findNamespace(userId)))); + } + + } + + /** + * 获取存储 session 的 Map + */ + public static ConcurrentMap getUserClientMap(){ + return USER_CLIENT_MAP; + } + + /** + * 推送消息 + * + * @param message 消息内容 + */ + private void sendMessage(String message) { + try { + session.getBasicRemote().sendText(message); + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_K8S, "WebSocketServer sendMessage error, message:{}, session:{}, error:{}",message, this, e); + } + } + + /** + * 关闭session连接 + */ + private void close() { + // 从 map 中删除 session + if (userId != null){ + USER_CLIENT_MAP.remove(userId); + } + if (session == null) { + LogUtil.debug(LogEnum.BIZ_K8S, "Websocket connection had been closed, session:{}", this); + return; + } + // 关闭session + try { + if (session.isOpen()) { + session.close(); + } + LogUtil.info(LogEnum.BIZ_K8S, "Websocket connection is closed" ); + } catch (IOException e) { + LogUtil.error(LogEnum.BIZ_K8S,"WebSocketServer close error, session:{}, error:{}" ); + } + } + + /** + * 验证 session 是否合法,合法返回用户 ID, 不合法返回 null + * + * @param session 连接 session + * @return Long + */ + private Long verify (Session session) { + DecodedJWT jwt = null; + Long curUserId = null; + + // 获取请求参数 + String queryString = session.getQueryString(); + if (StringUtils.isEmpty(queryString)){ + return curUserId; + } + // 获取 token + String token = queryString.contains("Bearer%20") ? queryString.substring(9) : ""; + + try { + // 解析 token + JWTVerifier jwtVerifier = JWT.require(Algorithm.HMAC256(AuthConst.CLIENT_SECRET)).build(); + jwt = jwtVerifier.verify(token); + Map claims = jwt.getClaims(); + + // 获取用户名 + Claim userNameClaim = claims.get("user_name"); + String userName = userNameClaim.asString(); + // 获根据用户名取用户信息 + JwtUserDTO jwtUserDTO = (JwtUserDTO) userDetailsService.loadUserByUsername(userName); + curUserId = jwtUserDTO.getCurUserId(); + + }catch (Exception e) { + LogUtil.error(LogEnum.BIZ_K8S,"WebSocketServer verify error, error:{}", e); + } + return curUserId; + } +} diff --git a/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/rest/ResourceQuotaController.java b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/rest/ResourceQuotaController.java new file mode 100644 index 0000000..461203f --- /dev/null +++ b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/rest/ResourceQuotaController.java @@ -0,0 +1,51 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.dubhek8s.rest; + +import io.swagger.annotations.Api; +import io.swagger.annotations.ApiOperation; +import org.dubhe.biz.base.constant.ResponseCode; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.dubhek8s.domain.dto.ResourceQuotaDTO; +import org.dubhe.dubhek8s.service.ResourceQuotaService; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +/** + * @description + * @date 2021-7-21 + */ +@Api(tags = "系统:ResourceQuota管理") +@RestController +@RequestMapping("/resourceQuota") +public class ResourceQuotaController { + @Autowired + ResourceQuotaService resourceQuotaService; + + @ApiOperation("通过用户 ID 更新 ResourceQuota(用于 admin 模块内部调用)") + @PostMapping(value = "update") + public DataResponseBody updateResourceQuota(@RequestBody ResourceQuotaDTO resourceQuotaDTO){ + if (resourceQuotaService.UpdateResourceQuota(resourceQuotaDTO)){ + return new DataResponseBody(); + } + + return new DataResponseBody(ResponseCode.ERROR); + } +} diff --git a/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/rest/SystemNamespaceController.java b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/rest/SystemNamespaceController.java new file mode 100644 index 0000000..5d04ff8 --- /dev/null +++ b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/rest/SystemNamespaceController.java @@ -0,0 +1,53 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.dubhek8s.rest; + +import io.swagger.annotations.Api; +import io.swagger.annotations.ApiOperation; +import org.dubhe.biz.base.constant.Permissions; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.dubhek8s.handler.WebSocketServer; +import org.dubhe.dubhek8s.service.SystemNamespaceService; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.security.access.prepost.PreAuthorize; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +/** + * @description 查询命名空间状态的 controller 层 + * @date 2021-7-14 + */ +@Api(tags = "系统:命名空间状态管理") +@RestController +@RequestMapping("/namespace") +public class SystemNamespaceController { + @Autowired + SystemNamespaceService systemNamespaceService; + + @Autowired + WebSocketServer webSocketServer; + + @ApiOperation("查询命名空间资源信息") + @GetMapping(value = "findNamespace") + @PreAuthorize(Permissions.USER_RESOURCE_INFO) + public DataResponseBody findNamespace(@RequestParam(value = "userId") Long userId){ + return new DataResponseBody(systemNamespaceService.findNamespace(userId)); + } +} diff --git a/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/service/ResourceQuotaService.java b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/service/ResourceQuotaService.java new file mode 100644 index 0000000..580ddf0 --- /dev/null +++ b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/service/ResourceQuotaService.java @@ -0,0 +1,27 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.dubhek8s.service; + +import org.dubhe.dubhek8s.domain.dto.ResourceQuotaDTO; + +/** + * @description ResourceQuota 服务接口 + * @date 2021-7-21 + */ +public interface ResourceQuotaService { + boolean UpdateResourceQuota(ResourceQuotaDTO ResourceQuotaDTO); +} diff --git a/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/service/SystemNamespaceService.java b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/service/SystemNamespaceService.java new file mode 100644 index 0000000..870e5a9 --- /dev/null +++ b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/service/SystemNamespaceService.java @@ -0,0 +1,33 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.dubhek8s.service; + +import org.dubhe.dubhek8s.domain.vo.NamespaceVO; + +/** + * @description 查询命名空间状态的 service 层接口 + * @date 2021-7-14 + */ +public interface SystemNamespaceService { + /** + * 查询命名空间封装的数据 + * + * @param userId 用户 ID + * @return NamespaceVO + */ + NamespaceVO findNamespace(Long userId); +} diff --git a/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/service/impl/ResourceQuotaServiceImpl.java b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/service/impl/ResourceQuotaServiceImpl.java new file mode 100644 index 0000000..76a4aff --- /dev/null +++ b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/service/impl/ResourceQuotaServiceImpl.java @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.dubhek8s.service.impl; + +import org.dubhe.dubhek8s.domain.dto.ResourceQuotaDTO; +import org.dubhe.dubhek8s.handler.WebSocketServer; +import org.dubhe.dubhek8s.service.ResourceQuotaService; +import org.dubhe.k8s.api.ResourceQuotaApi; +import org.dubhe.k8s.domain.resource.BizResourceQuota; +import org.dubhe.k8s.utils.K8sNameTool; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +/** + * @description ResourceQuotaService 实现类 + * @date 2021-7-21 + */ +@Service +public class ResourceQuotaServiceImpl implements ResourceQuotaService { + @Autowired + ResourceQuotaApi resourceQuotaApi; + @Autowired + K8sNameTool k8sNameTool; + @Autowired + WebSocketServer webSocketServer; + @Override + public boolean UpdateResourceQuota(ResourceQuotaDTO resourceQuotaDTO) { + String namespace = k8sNameTool.getNamespace(resourceQuotaDTO.getUserId()); + BizResourceQuota bizResourceQuota = resourceQuotaApi.create(namespace, namespace, resourceQuotaDTO.getCpuLimit(), + resourceQuotaDTO.getMemoryLimit(), resourceQuotaDTO.getGpuLimit()); + webSocketServer.sendToAll(); + return bizResourceQuota.isSuccess(); + } +} diff --git a/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/service/impl/SystemNamespaceServiceImpl.java b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/service/impl/SystemNamespaceServiceImpl.java new file mode 100644 index 0000000..ad2207e --- /dev/null +++ b/dubhe-server/dubhe-k8s/src/main/java/org/dubhe/dubhek8s/service/impl/SystemNamespaceServiceImpl.java @@ -0,0 +1,254 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.dubhek8s.service.impl; + +import cn.hutool.core.collection.CollectionUtil; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.utils.StringUtils; +import org.dubhe.biz.redis.utils.RedisUtils; +import org.dubhe.dubhek8s.domain.vo.PodResVO; +import org.dubhe.dubhek8s.domain.vo.TaskResVO; +import org.dubhe.dubhek8s.service.SystemNamespaceService; +import org.dubhe.dubhek8s.domain.vo.NamespaceVO; +import org.dubhe.k8s.api.PodApi; +import org.dubhe.k8s.api.ResourceQuotaApi; +import org.dubhe.k8s.cache.ResourceCache; +import org.dubhe.k8s.constant.K8sParamConstants; +import org.dubhe.k8s.domain.resource.BizContainer; +import org.dubhe.k8s.domain.resource.BizPod; +import org.dubhe.k8s.domain.resource.BizQuantity; +import org.dubhe.k8s.domain.resource.BizResourceQuota; +import org.dubhe.k8s.enums.PodPhaseEnum; +import org.dubhe.k8s.utils.K8sNameTool; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; +import org.springframework.util.CollectionUtils; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.dubhe.biz.base.constant.StringConstant.CACHE_TASK_ID; +import static org.dubhe.biz.base.constant.StringConstant.CACHE_TASK_NAME; + +/** + * @description 查询命名空间状态的 service 层接口实现类 + * @date 2021-7-14 + */ +@Service +public class SystemNamespaceServiceImpl implements SystemNamespaceService { + @Autowired + K8sNameTool k8sNameTool; + + @Autowired + ResourceQuotaApi resourceQuotaApi; + + @Autowired + PodApi podApi; + + @Autowired + ResourceCache resourceCache; + + @Autowired + RedisUtils redisUtils; + + @Value("${user.config.cpu-limit}") + private Integer cpuLimit; + + @Value("${user.config.memory-limit}") + private Integer memoryLimit; + + @Value("${user.config.gpu-limit}") + private Integer gpuLimit; + + private static final String UNKNOW = "unknown"; + + /** + * 查询命名空间资源信息 + * + * @param userId 用户 ID + * @return NamespaceVO 命名空间 VO + */ + @Override + public NamespaceVO findNamespace(Long userId) { + NamespaceVO namespaceVO = new NamespaceVO(); + String namespaceStr = k8sNameTool.generateNamespace(userId); + Set taskResVOS = new HashSet<>(); + List taskResVOList = new ArrayList<>(); + + + // 获取该命名空间下的所有占用资源的 Pod + List bizPodList = podApi.getWithNamespace(namespaceStr).parallelStream() + .filter(obj -> !PodPhaseEnum.SUCCEEDED.getPhase().equals(obj.getPhase())).collect(Collectors.toList()); + HashMap> sortedBizPodsMap = sortBizPod(bizPodList, taskResVOS); + Set keys = sortedBizPodsMap.keySet(); + for (String key : keys) { + List podResVOS = new ArrayList<>(); + List sortedTaskResVOS = taskResVOS.stream().filter(taskResVO -> + key.equals(taskResVO.getBusinessLabel() + taskResVO.getTaskId() + taskResVO.getTaskName())).collect(Collectors.toList()); + TaskResVO taskResVO = sortedTaskResVOS.get(0); + List sortedBizPods = sortedBizPodsMap.get(key); + + // 遍历 Pod,得到每一个 Pod 资源占用信息 + for (BizPod bizPod : sortedBizPods) { + int podCpuAmount = 0; + int podMemoryAmount = 0; + int podGpuAmount = 0; + PodResVO podResVO = new PodResVO(); + + // 遍历 Pod 中的容器,Pod 中容器资源占用之和就是一个 Pod 的资源占用量 + for (BizContainer container : bizPod.getContainers()) { + Map limits = container.getLimits(); + if (limits == null){ + continue; + } + // 获取 CPU 资源占用 + Integer cpuAmount = getResourceAmount(limits, K8sParamConstants.QUANTITY_CPU_KEY); + if (cpuAmount != null){ + podCpuAmount += cpuAmount; + } + // 获取内存资源占用 + Integer memoryAmount = getResourceAmount(limits, K8sParamConstants.QUANTITY_MEMORY_KEY); + if (memoryAmount != null){ + podMemoryAmount += memoryAmount; + } + // 获取 GPU 资源占用 + Integer gpuAmount = getResourceAmount(limits, K8sParamConstants.GPU_RESOURCE_KEY); + if (gpuAmount != null){ + podGpuAmount += gpuAmount; + } + } + + // 封装VO + podResVO.setPodName(bizPod.getName()) + .setPodCpu(podCpuAmount) + .setPodMemory(podMemoryAmount) + .setPodCard(podGpuAmount) + .setStatus(bizPod.getPhase()); + if (CollectionUtil.isNotEmpty(bizPod.getContainerStatuses()) + && null != bizPod.getContainerStatuses().get(MagicNumConstant.ZERO).getWaiting()){ + podResVO.setStatus(bizPod.getContainerStatuses().get(MagicNumConstant.ZERO).getWaiting().getReason()); + } + podResVOS.add(podResVO); + } + taskResVO = taskResVO.setPodResVOS(podResVOS); + taskResVOList.add(taskResVO); + } + + + namespaceVO.setTasks(taskResVOList); + + // 查询该 namespace 下的资源配额,过滤得到无指定特定 Scope 的资源配额 + List resourceQuotas = resourceQuotaApi.list(namespaceStr).stream().filter(bizResourceQuota -> + namespaceStr.equals(bizResourceQuota.getName()) || CollectionUtils.isEmpty(bizResourceQuota.getMatchExpressions())).collect(Collectors.toList()); + + if (CollectionUtil.isEmpty(resourceQuotas)){ + namespaceVO.setHardCpu(cpuLimit) + .setHardMemory(memoryLimit * 1024) + .setHardGpu(gpuLimit) + .setUsedCpu(0) + .setUsedMemory(0) + .setUsedGpu(0); + return namespaceVO; + } + + BizResourceQuota bizResourceQuota = resourceQuotas.get(0); + + // 获取资源配额总资源 + Map hard = bizResourceQuota.getHard(); + + // 获取资源配额已用资源 + Map used = bizResourceQuota.getUsed(); + + // 封装 VO + namespaceVO.setHardCpu(getResourceAmount(hard, K8sParamConstants.RESOURCE_QUOTA_CPU_LIMITS_KEY)) + .setHardMemory(getResourceAmount(hard, K8sParamConstants.RESOURCE_QUOTA_MEMORY_LIMITS_KEY)) + .setHardGpu(getResourceAmount(hard, K8sParamConstants.RESOURCE_QUOTA_GPU_LIMITS_KEY)) + .setUsedCpu(getResourceAmount(used, K8sParamConstants.RESOURCE_QUOTA_CPU_LIMITS_KEY)) + .setUsedMemory(getResourceAmount(used, K8sParamConstants.RESOURCE_QUOTA_MEMORY_LIMITS_KEY)) + .setUsedGpu(getResourceAmount(used, K8sParamConstants.RESOURCE_QUOTA_GPU_LIMITS_KEY)); + return namespaceVO; + } + + /** + * 查询各资源(配额的或已用的或总共的)数量 + * + * @param key 不同的Key代表不同的资源信息(CPU/内存/GPU) + * @param quantityMap 存放资源信息的 Map + * @return NamespaceVO 命名空间 VO + */ + private Integer getResourceAmount(Map quantityMap, String key){ + BizQuantity quantity = quantityMap.get(key); + if (quantity != null){ + if ("Gi".equals(quantity.getFormat())){ + return Integer.valueOf(quantity.getAmount()) * 1024; + } else if ("Ti".equals(quantity.getFormat())){ + return Integer.valueOf(quantity.getAmount()) * 1024 * 1024; + } else if ("m".equals(quantity.getFormat())){ + return Integer.valueOf(quantity.getAmount()) / 1000; + } + return Integer.valueOf(quantity.getAmount()); + } + return null; + } + + /** + * 按照任务对 Pod 分类 + * + * @param bizPods 业务 Pod 对象集合 + * @return HashMap> 分类后的业务 Pod 对象 + */ + private HashMap> sortBizPod(List bizPods, Set taskResVOS){ + HashMap> bizPodsMap = new HashMap(); + for (BizPod pod : bizPods) { + String businessLabel = pod.getBusinessLabel(); + String taskIdentifyLabel = pod.getTaskIdentifyLabel(); + if (StringUtils.isNotEmpty(businessLabel) && StringUtils.isNotEmpty(taskIdentifyLabel) && CollectionUtil.isNotEmpty(redisUtils.hmget(pod.getTaskIdentifyLabel()))){ + // 从redis 获取任务缓存信息 + Map taskMap = redisUtils.hmget(pod.getTaskIdentifyLabel()); + Long taskId = (Long) taskMap.get(CACHE_TASK_ID); + String taskName = (String) taskMap.get(CACHE_TASK_NAME); + // 以 businessLabel + taskId + taskName 为键,标识一个任务 + String sortedKey = businessLabel + taskId + taskName; + List sortedBizPods = bizPodsMap.get(sortedKey); + sortedBizPods = CollectionUtil.isEmpty(sortedBizPods) ? new ArrayList() : sortedBizPods; + sortedBizPods.add(pod); + bizPodsMap.put(sortedKey, sortedBizPods); + // 封装 TaskResVO + TaskResVO taskResVO = new TaskResVO().setTaskName(taskName).setTaskId(taskId).setBusinessLabel(businessLabel); + taskResVOS.add(taskResVO); + } else { + businessLabel = businessLabel == null ? "" : businessLabel; + // 缓存不存在任务处理,添加 UNKNOW 分类 + String sortedKey = businessLabel + 0L + UNKNOW; + List sortedBizPods = bizPodsMap.get(sortedKey); + sortedBizPods = CollectionUtil.isEmpty(sortedBizPods) ? new ArrayList() : sortedBizPods; + sortedBizPods.add(pod); + bizPodsMap.put(sortedKey, sortedBizPods); + TaskResVO taskResVO = new TaskResVO().setTaskName(UNKNOW).setTaskId(0L).setBusinessLabel(businessLabel); + taskResVOS.add(taskResVO); + } + } + return bizPodsMap; + } + +} diff --git a/dubhe-server/dubhe-k8s/src/main/resources/bootstrap.yml b/dubhe-server/dubhe-k8s/src/main/resources/bootstrap.yml index 626c2ca..2c9347b 100644 --- a/dubhe-server/dubhe-k8s/src/main/resources/bootstrap.yml +++ b/dubhe-server/dubhe-k8s/src/main/resources/bootstrap.yml @@ -22,6 +22,6 @@ spring: refresh: true discovery: enabled: true - namespace: dubhe-server-cloud-prod + namespace: dubhe-server-cloud-dev group: dubhe server-addr: 127.0.0.1:8848 diff --git a/dubhe-server/dubhe-measure/src/main/resources/bootstrap.yml b/dubhe-server/dubhe-measure/src/main/resources/bootstrap.yml index c465866..50f438d 100644 --- a/dubhe-server/dubhe-measure/src/main/resources/bootstrap.yml +++ b/dubhe-server/dubhe-measure/src/main/resources/bootstrap.yml @@ -30,7 +30,7 @@ spring: refresh: true discovery: enabled: true - namespace: dubhe-server-cloud-prod + namespace: dubhe-server-cloud-dev group: dubhe server-addr: 127.0.0.1:8848 diff --git a/dubhe-server/dubhe-model/src/main/resources/bootstrap.yml b/dubhe-server/dubhe-model/src/main/resources/bootstrap.yml index dd0e21d..ddce830 100644 --- a/dubhe-server/dubhe-model/src/main/resources/bootstrap.yml +++ b/dubhe-server/dubhe-model/src/main/resources/bootstrap.yml @@ -30,6 +30,6 @@ spring: refresh: true discovery: enabled: true - namespace: dubhe-server-cloud-prod + namespace: dubhe-server-cloud-dev group: dubhe server-addr: 127.0.0.1:8848 \ No newline at end of file diff --git a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/constants/NoteBookErrorConstant.java b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/constants/NoteBookErrorConstant.java new file mode 100644 index 0000000..71b9fd3 --- /dev/null +++ b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/constants/NoteBookErrorConstant.java @@ -0,0 +1,16 @@ +package org.dubhe.notebook.constants; + +/** + * notebook错误信息常量类 + */ +public class NoteBookErrorConstant { + /** + * notebook不存在 + */ + public static final String NOTEBOOK_NOT_EXISTS = "Notebook不存在"; + + /** + * 无效的notebook状态 + */ + public static final String INVALID_NOTEBOOK_STATUS = "notebook没在运行,不能停止"; +} diff --git a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/convert/NoteBookConvert.java b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/convert/NoteBookConvert.java index 9465cd5..ef197ba 100644 --- a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/convert/NoteBookConvert.java +++ b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/convert/NoteBookConvert.java @@ -18,9 +18,10 @@ package org.dubhe.notebook.convert; +import org.dubhe.biz.base.vo.NoteBookVO; import org.dubhe.biz.db.base.BaseConvert; import org.dubhe.notebook.domain.entity.NoteBook; -import org.dubhe.notebook.domain.vo.NoteBookVO; + import org.mapstruct.Mapper; import org.mapstruct.ReportingPolicy; diff --git a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/convert/PtJupyterResourceConvert.java b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/convert/PtJupyterResourceConvert.java index 3543fba..1c92111 100644 --- a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/convert/PtJupyterResourceConvert.java +++ b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/convert/PtJupyterResourceConvert.java @@ -42,7 +42,7 @@ public class PtJupyterResourceConvert { * @param notebookDelayDeleteTime * @return PtJupyterResourceBO */ - public static PtJupyterResourceBO toPtJupyterResourceBo(NoteBook noteBook, K8sNameTool k8sNameTool, Integer notebookDelayDeleteTime) { + public static PtJupyterResourceBO toPtJupyterResourceBo(NoteBook noteBook, K8sNameTool k8sNameTool, Integer notebookDelayDeleteTime, String taskIdentify) { if (noteBook == null) { return null; } @@ -55,7 +55,6 @@ public class PtJupyterResourceConvert { .setImage(noteBook.getK8sImageName()) .setWorkspaceDir(k8sNameTool.getAbsolutePath(noteBook.getK8sPvcPath())) .setWorkspaceMountPath(noteBook.getK8sMountPath()) - .setDatasetDir(noteBook.getDataSourcePath()) // request和limit先一致 .setWorkspaceRequest(noteBook.getDiskMemNum() + "Mi") .setWorkspaceLimit(noteBook.getDiskMemNum() + "Mi") @@ -64,6 +63,8 @@ public class PtJupyterResourceConvert { .setDatasetMountPath(k8sNameTool.getDatasetPath()) .setDatasetReadOnly(true) .setDelayDeleteTime(notebookDelayDeleteTime) + .setPipSitePackageDir(k8sNameTool.getAbsolutePath(noteBook.getPipSitePackagePath())) + .setTaskIdentifyLabel(taskIdentify) ; return bo; } diff --git a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/dao/NoteBookMapper.java b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/dao/NoteBookMapper.java index 4ad3698..c1df996 100644 --- a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/dao/NoteBookMapper.java +++ b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/dao/NoteBookMapper.java @@ -47,6 +47,14 @@ public interface NoteBookMapper extends BaseMapper { @Select("select count(1) from notebook where status = #{status} and deleted = 0") int selectRunNoteBookNum( @Param("status") Integer status); + /** + * 查询正在运行的notebook列表 + * + * @return + */ + @Select("select * from notebook where status = 0 and deleted = 0 and (url is not null or url != '')") + List selectRunningList(); + /** * 根据namespace + resourceName查询 * diff --git a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/domain/entity/NoteBook.java b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/domain/entity/NoteBook.java index e8ca674..7ec4a0d 100644 --- a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/domain/entity/NoteBook.java +++ b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/domain/entity/NoteBook.java @@ -150,36 +150,11 @@ public class NoteBook extends BaseEntity { @ApiModelProperty(hidden = true) private Long algorithmId; - @Override - public String toString() { - return "NoteBook{" + - "id=" + id + - ", originUserId=" + originUserId + - ", name='" + name + '\'' + - ", noteBookName='" + noteBookName + '\'' + - ", description='" + description + '\'' + - ", url='" + url + '\'' + - ", totalRunMin=" + totalRunMin + - ", cpuNum=" + cpuNum + - ", gpuNum=" + gpuNum + - ", memNum=" + memNum + - ", diskMemNum=" + diskMemNum + - ", status=" + status + - ", lastStartTime=" + lastStartTime + - ", lastOperationTimeout=" + lastOperationTimeout + - ", createResource=" + createResource + - ", k8sStatusCode='" + k8sStatusCode + '\'' + - ", k8sStatusInfo='" + k8sStatusInfo + '\'' + - ", k8sNamespace='" + k8sNamespace + '\'' + - ", k8sResourceName='" + k8sResourceName + '\'' + - ", k8sImageName='" + k8sImageName + '\'' + - ", k8sMountPath='" + k8sMountPath + '\'' + - ", k8sPvcPath='" + k8sPvcPath + '\'' + - ", dataSourceName='" + dataSourceName + '\'' + - ", dataSourcePath='" + dataSourcePath + '\'' + - ", algorithmId=" + algorithmId + - '}'; - } + + @TableField(value = "pip_site_package_path") + @Size(max = 255, message = "pip包路径") + @ApiModelProperty(hidden = true) + private String pipSitePackagePath; /** * put 键值 diff --git a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/domain/vo/NoteBookVO.java b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/domain/vo/NoteBookVO.java deleted file mode 100644 index 737c351..0000000 --- a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/domain/vo/NoteBookVO.java +++ /dev/null @@ -1,119 +0,0 @@ -/** - * Copyright 2020 Tianshu AI Platform. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ============================================================= - */ - -package org.dubhe.notebook.domain.vo; - -import cn.hutool.core.date.DatePattern; -import com.fasterxml.jackson.annotation.JsonFormat; -import com.fasterxml.jackson.annotation.JsonIgnore; -import io.swagger.annotations.ApiModel; -import io.swagger.annotations.ApiModelProperty; -import lombok.Data; -import org.dubhe.biz.base.utils.DateUtil; - -import java.io.Serializable; -import java.util.Date; - -/** - * @description 返回前端请求体 - * @date 2020-04-28 - */ -@Data -@ApiModel("NoteBookDTO 响应") -public class NoteBookVO implements Serializable { - - @ApiModelProperty("ID") - private Long id; - - @ApiModelProperty("所属用户") - private Long userId; - - @ApiModelProperty("NoteBook 名称") - @JsonIgnore - private String name; - - @ApiModelProperty("NoteBook 名称") - private String noteBookName; - - @ApiModelProperty("备注描述") - private String description; - - @ApiModelProperty("可访问jupyter地址") - private String url; - - @JsonIgnore - private Integer totalRunMin; - - @ApiModelProperty("CPU数量") - private Integer cpuNum; - - @ApiModelProperty("GPU数量") - private Integer gpuNum; - - @ApiModelProperty("内存大小(M)") - private Integer memNum; - - @ApiModelProperty("硬盘内存大小(M)") - private Integer diskMemNum; - - @ApiModelProperty("0运行,1停止, 2删除, 3启动中,4停止中,5删除中,6运行异常(暂未启用)") - private Integer status; - - @ApiModelProperty("状态对应的详情信息") - private String statusDetail; - - @ApiModelProperty("k8s响应状态码") - private String k8sStatusCode; - - @ApiModelProperty("k8s响应状态信息") - private String k8sStatusInfo; - - @JsonIgnore - private String k8sNamespace; - - @JsonIgnore - private String k8sResourceName; - - private String k8sImageName; - - @ApiModelProperty("k8s中pvc存储路径") - private String k8sPvcPath; - - @JsonFormat(pattern = DatePattern.NORM_DATETIME_MS_PATTERN, timezone = DateUtil.DEFAULT_TIME_ZONE) - private Date createTime; - - @JsonIgnore - private Long createUserId; - - @JsonFormat(pattern = DatePattern.NORM_DATETIME_MS_PATTERN, timezone = DateUtil.DEFAULT_TIME_ZONE) - private Date updateTime; - - @JsonIgnore - private Long updateUserId; - - @ApiModelProperty("数据集名称") - private String dataSourceName; - - @ApiModelProperty("数据集路径") - private String dataSourcePath; - - @ApiModelProperty("算法ID") - private Long algorithmId; - - @ApiModelProperty("资源拥有者ID") - private Long originUserId; -} diff --git a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/rest/NoteBookController.java b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/rest/NoteBookController.java index ce617af..eaa7f41 100644 --- a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/rest/NoteBookController.java +++ b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/rest/NoteBookController.java @@ -67,6 +67,12 @@ public class NoteBookController { return new DataResponseBody(noteBookService.getNoteBookList(page, noteBookListQueryDTO)); } + @ApiOperation("根据id查询notebook") + @GetMapping("/detail/{id}") + @PreAuthorize(Permissions.NOTEBOOK) + public DataResponseBody getNoteBook(@PathVariable Long id) { + return DataResponseFactory.success(noteBookService.getNotebookDetail(id)); + } @ApiOperation("修改notebook算法ID") @PutMapping(value = "/algorithm") @@ -111,6 +117,13 @@ public class NoteBookController { , resultInfo); } + @ApiOperation("一键停止所有notebook") + @PutMapping(value = "/batchStop") + @PreAuthorize(Permissions.NOTEBOOK_STOP) + public DataResponseBody batchStopNotebook() { + noteBookService.batchStopNoteBooks(); + return new DataResponseBody(); + } @ApiOperation("打开notebook") @GetMapping(value = "/{id}") @@ -138,14 +151,6 @@ public class NoteBookController { return new DataResponseBody(noteBookService.getAddress(noteBookId)); } - @ApiOperation("获取状态") - @GetMapping(value = "/status") - @PreAuthorize(Permissions.NOTEBOOK) - public DataResponseBody getNoteBookStatus() { - return new DataResponseBody(noteBookService.getNoteBookStatus()); - } - - @ApiOperation("获取正在运行的notebook数量") @GetMapping(value = "/run-number") @PreAuthorize(Permissions.NOTEBOOK) diff --git a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/service/NoteBookService.java b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/service/NoteBookService.java index 311c347..66c10bd 100644 --- a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/service/NoteBookService.java +++ b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/service/NoteBookService.java @@ -24,7 +24,7 @@ import org.dubhe.biz.file.enums.BizPathEnum; import org.dubhe.notebook.domain.dto.*; import org.dubhe.notebook.domain.entity.NoteBook; import org.dubhe.notebook.enums.NoteBookStatusEnum; -import org.dubhe.notebook.domain.vo.NoteBookVO; +import org.dubhe.biz.base.vo.NoteBookVO; import java.util.List; import java.util.Map; @@ -93,6 +93,13 @@ public interface NoteBookService { */ String stopNoteBook(Long noteBookId); + /** + * 一键停止所有notebook + * + * @return + */ + void batchStopNoteBooks(); + /** * 更新notebook * @@ -142,13 +149,6 @@ public interface NoteBookService { */ String getAddress(Long noteBookId); - /** - * 获取notebook所有状态 - * - * @return List notebook状态集合 - */ - List getNoteBookStatus(); - /** * 获取正在运行的notebook数量 * @@ -183,6 +183,14 @@ public interface NoteBookService { */ List getNotebookDetail(Set noteBookIds); + /** + * 获取notebook详情 + * + * @param noteBookId notebook id 集合 + * @return NoteBookVO notebook 详情 + */ + NoteBookVO getNotebookDetail(Long noteBookId); + /** * 获取正在运行却没有URL的notebook * diff --git a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/service/impl/NoteBookServiceImpl.java b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/service/impl/NoteBookServiceImpl.java index 76fae42..5e08b6f 100644 --- a/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/service/impl/NoteBookServiceImpl.java +++ b/dubhe-server/dubhe-notebook/src/main/java/org/dubhe/notebook/service/impl/NoteBookServiceImpl.java @@ -18,19 +18,25 @@ package org.dubhe.notebook.service.impl; import cn.hutool.core.collection.CollUtil; +import cn.hutool.core.date.DateBetween; +import cn.hutool.core.date.DateUnit; import cn.hutool.core.util.RandomUtil; import cn.hutool.core.util.StrUtil; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; import com.baomidou.mybatisplus.core.metadata.IPage; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; -import org.apache.commons.lang3.StringUtils; +import org.dubhe.biz.base.utils.StringUtils; import org.dubhe.biz.base.constant.HarborProperties; import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.constant.NumberConstant; +import org.dubhe.biz.base.constant.StringConstant; import org.dubhe.biz.base.constant.SymbolConstant; +import org.dubhe.biz.base.context.UserContext; import org.dubhe.biz.base.dto.NoteBookAlgorithmQueryDTO; import org.dubhe.biz.base.dto.NoteBookAlgorithmUpdateDTO; import org.dubhe.biz.base.dto.PtImageQueryUrlDTO; +import org.dubhe.biz.base.dto.SysUserConfigDTO; import org.dubhe.biz.base.enums.BizEnum; import org.dubhe.biz.base.enums.ImageSourceEnum; import org.dubhe.biz.base.enums.ImageTypeEnum; @@ -38,6 +44,7 @@ import org.dubhe.biz.base.exception.BusinessException; import org.dubhe.biz.base.service.UserContextService; import org.dubhe.biz.base.utils.HttpUtils; import org.dubhe.biz.base.utils.NumberUtil; +import org.dubhe.biz.base.utils.ResultUtil; import org.dubhe.biz.base.vo.DataResponseBody; import org.dubhe.biz.base.vo.DatasetVO; import org.dubhe.biz.db.utils.PageUtil; @@ -46,9 +53,11 @@ import org.dubhe.biz.file.api.FileStoreApi; import org.dubhe.biz.file.enums.BizPathEnum; import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; +import org.dubhe.biz.redis.utils.RedisUtils; import org.dubhe.k8s.api.JupyterResourceApi; import org.dubhe.k8s.api.NamespaceApi; import org.dubhe.k8s.api.PodApi; +import org.dubhe.k8s.cache.ResourceCache; import org.dubhe.k8s.domain.PtBaseResult; import org.dubhe.k8s.domain.resource.BizNamespace; import org.dubhe.k8s.domain.resource.BizPod; @@ -58,19 +67,19 @@ import org.dubhe.k8s.utils.K8sNameTool; import org.dubhe.notebook.client.DatasetClient; import org.dubhe.notebook.client.ImageClient; import org.dubhe.notebook.config.NoteBookConfig; +import org.dubhe.notebook.constants.NoteBookErrorConstant; import org.dubhe.notebook.convert.NoteBookConvert; import org.dubhe.notebook.convert.PtJupyterResourceConvert; import org.dubhe.notebook.dao.NoteBookMapper; import org.dubhe.notebook.domain.dto.NoteBookCreateDTO; import org.dubhe.notebook.domain.dto.NoteBookListQueryDTO; -import org.dubhe.notebook.domain.dto.NoteBookStatusDTO; import org.dubhe.notebook.domain.dto.SourceNoteBookDTO; import org.dubhe.notebook.domain.entity.NoteBook; import org.dubhe.notebook.enums.NoteBookStatusEnum; import org.dubhe.notebook.service.NoteBookService; import org.dubhe.notebook.service.ProcessNotebookCommand; import org.dubhe.notebook.utils.NotebookUtil; -import org.dubhe.notebook.domain.vo.NoteBookVO; +import org.dubhe.biz.base.vo.NoteBookVO; import org.springframework.beans.BeanUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; @@ -111,8 +120,9 @@ public class NoteBookServiceImpl implements NoteBookService { @Autowired private UserContextService userContextService; - @Value("${delay.notebook.delete}") - private Integer notebookDelayDeleteTime; + @Value("${user.config.notebook-delay-delete-time}") + private Integer defaultNotebookDelayDeleteTime; + @Autowired private ImageClient imageClient; @@ -130,6 +140,15 @@ public class NoteBookServiceImpl implements NoteBookService { @Autowired private NoteBookConfig noteBookConfig; + @Autowired + private RedisUtils redisUtils; + + @Autowired + private ResourceCache resourceCache; + + @Value("Task:Notebook:"+"${spring.profiles.active}_notebook_id_") + private String notebookIdPrefix; + /** * 分页查询所有 notebook 记录 * @@ -287,12 +306,14 @@ public class NoteBookServiceImpl implements NoteBookService { } noteBook.setCreateResource(BizPathEnum.NOTEBOOK.getCreateResource()); noteBook.setK8sMountPath(NotebookUtil.getK8sMountPath()); - if (start(noteBook)) { + String taskIdentify = StringUtils.getUUID(); + if (start(noteBook, taskIdentify)) { noteBook.setStatus(NoteBookStatusEnum.STARTING.getCode()); } else { noteBook.setStatus(NoteBookStatusEnum.STOP.getCode()); } noteBookMapper.insert(noteBook); + resourceCache.addTaskCache(taskIdentify,noteBook.getId(), noteBookName, notebookIdPrefix); return noteBookConvert.toDto(noteBook); } @@ -352,6 +373,10 @@ public class NoteBookServiceImpl implements NoteBookService { for (NoteBook noteBook : noteBookList) { noteBook.setStatus(NoteBookStatusEnum.DELETING.getCode()); noteBookMapper.updateById(noteBook); + String taskIdentify = (String) redisUtils.get(notebookIdPrefix + String.valueOf(noteBook.getId())); + if (StringUtils.isNotEmpty(taskIdentify)){ + redisUtils.del(taskIdentify, notebookIdPrefix + String.valueOf(noteBook.getId())); + } } } } @@ -388,7 +413,8 @@ public class NoteBookServiceImpl implements NoteBookService { throw new BusinessException("notebook【" + noteBook.getName() + "】当前状态:" + NoteBookStatusEnum.getDescription(noteBook.getStatus()) + ",无法再次启动。"); } String returnStr; - if (start(noteBook)) { + String taskIdentify = resourceCache.getTaskIdentify(noteBook.getId(), noteBook.getNoteBookName(), notebookIdPrefix); + if (start(noteBook, taskIdentify)) { noteBook.setStatus(NoteBookStatusEnum.STARTING.getCode()); returnStr = NoteBookStatusEnum.STARTING.getDescription(); } else { @@ -423,15 +449,22 @@ public class NoteBookServiceImpl implements NoteBookService { * @param noteBook notebook * @return true 启动成功;false 启动失败 */ - private boolean start(NoteBook noteBook) { + private boolean start(NoteBook noteBook, String taskIdentify) { + Long curUserId = userContextService.getCurUserId(); + if (StringUtils.isBlank(noteBook.getPipSitePackagePath())) { + String pipSitePackagePath = StringConstant.PIP_SITE_PACKAGE + SymbolConstant.SLASH + curUserId + SymbolConstant.SLASH + noteBook.getName() + SymbolConstant.SLASH; + noteBook.setPipSitePackagePath(pipSitePackagePath); + } // 添加启动时间 noteBook.setLastStartTime(new Date()); // 添加超时时间点 noteBook.setLastOperationTimeout(NotebookUtil.getTimeoutSecondLong()); if (initNameSpace(noteBook, null)) { try { + // 获取Notebook延迟删除时间,单位小时转化为分钟 + int notebookDelayDeleteTime = getNotebookDelayDeleteTime() * 60; //创建时不创建PVC - PtJupyterDeployVO result = jupyterResourceApi.create(PtJupyterResourceConvert.toPtJupyterResourceBo(noteBook, k8sNameTool, notebookDelayDeleteTime)); + PtJupyterDeployVO result = jupyterResourceApi.create(PtJupyterResourceConvert.toPtJupyterResourceBo(noteBook, k8sNameTool, notebookDelayDeleteTime, taskIdentify)); noteBook.setK8sStatusCode(result.getCode() == null ? SymbolConstant.BLANK : result.getCode()); noteBook.setK8sStatusInfo(NotebookUtil.getK8sStatusInfo(result)); if (!result.isSuccess()) { @@ -460,18 +493,17 @@ public class NoteBookServiceImpl implements NoteBookService { public String stopNoteBook(Long noteBookId) { NumberUtil.isNumber(noteBookId); NoteBook noteBook = noteBookMapper.selectById(noteBookId); - if (noteBook == null) { - throw new BusinessException(NotebookUtil.NOTEBOOK_NOT_EXISTS); - } - if (!NoteBookStatusEnum.RUN.getCode().equals(noteBook.getStatus())) { - throw new BusinessException("notebook没在运行,不能停止"); - } + ResultUtil.notNull(noteBook, NoteBookErrorConstant.NOTEBOOK_NOT_EXISTS); + ResultUtil.isEquals(NoteBookStatusEnum.RUN.getCode(), noteBook.getStatus(), + NoteBookErrorConstant.INVALID_NOTEBOOK_STATUS); + String returnStr; NoteBookStatusEnum statusEnum = getStatus(noteBook); if (NoteBookStatusEnum.STOP == statusEnum) { noteBook.setK8sStatusCode(SymbolConstant.BLANK); noteBook.setK8sStatusInfo(SymbolConstant.BLANK); noteBook.setUrl(SymbolConstant.BLANK); + noteBook.setStatus(NoteBookStatusEnum.STOP.getCode()); returnStr = "已停止"; } else { try { @@ -504,6 +536,18 @@ public class NoteBookServiceImpl implements NoteBookService { return returnStr; } + /** + * @see NoteBookService#batchStopNoteBooks() + */ + @Override + public void batchStopNoteBooks() { + List noteBooks = noteBookMapper.selectRunningList(); + if (CollectionUtils.isEmpty(noteBooks)) { + return; + } + noteBooks.forEach(noteBook -> stopNoteBook(noteBook.getId())); + } + /** * 开启notebook * @@ -566,6 +610,12 @@ public class NoteBookServiceImpl implements NoteBookService { noteBook.setK8sStatusCode(result.getCode() == null ? SymbolConstant.BLANK : result.getCode()); noteBook.setK8sStatusInfo(NotebookUtil.getK8sStatusInfo(result)); if (K8sResponseEnum.NOT_FOUND.getCode().equals(result.getCode())) { + + long gap = new DateBetween(noteBook.getLastStartTime(), new Date()).between(DateUnit.MINUTE); + // 超时处理 + if (gap < NumberConstant.NUMBER_2) { + return null; + } // 结果不存在当已停止 return NoteBookStatusEnum.STOP; } else if (!HttpUtils.isSuccess(result.getCode())) { @@ -670,26 +720,6 @@ public class NoteBookServiceImpl implements NoteBookService { return null; } - - /** - * 获取notebook所有状态 - * - * @return List notebook状态集合 - */ - @Override - public List getNoteBookStatus() { - List noteBookStatusDtoList = new ArrayList<>(); - for (NoteBookStatusEnum noteBookStatusEnum : NoteBookStatusEnum.values()) { - if (noteBookStatusEnum != NoteBookStatusEnum.DELETED) { - NoteBookStatusDTO noteBookStatusDTO = new NoteBookStatusDTO(); - noteBookStatusDTO.setStatusCode(noteBookStatusEnum.getCode()); - noteBookStatusDTO.setStatusName(noteBookStatusEnum.getDescription()); - noteBookStatusDtoList.add(noteBookStatusDTO); - } - } - return noteBookStatusDtoList; - } - /** * 获取正在运行的notebook数量 * @@ -769,6 +799,20 @@ public class NoteBookServiceImpl implements NoteBookService { return noteBookConvert.toDto(noteBookList); } + /** + * 获取notebook详情 + * + * @param noteBookId notebook id + * @return List notebook vo 集合 + */ + @Override + public NoteBookVO getNotebookDetail(Long noteBookId) { + NoteBook noteBook = noteBookMapper.selectById(noteBookId); + return noteBookConvert.toDto(noteBook); + } + + + /** * 获取正在运行却没有URL的notebook * @@ -808,4 +852,21 @@ public class NoteBookServiceImpl implements NoteBookService { } return noteBookMapper.getNoteBookIdByAlgorithm(noteBookAlgorithmQueryDTO.getAlgorithmIdList()); } + + /** + * 获取 Notebook 延时删除时间 + */ + private int getNotebookDelayDeleteTime() { + + UserContext curUser = userContextService.getCurUser(); + SysUserConfigDTO userConfig = curUser.getUserConfig(); + // 查询该用户是否配置 Notebook 延时删除时间 + Integer notebookDelayDeleteTime = userConfig.getNotebookDelayDeleteTime(); + if (userConfig.getNotebookDelayDeleteTime() != null) { + return notebookDelayDeleteTime; + } + + // 若该用户未配置 Notebook 延时删除时间,使用默认配置时间 + return defaultNotebookDelayDeleteTime; + } } diff --git a/dubhe-server/dubhe-notebook/src/main/resources/bootstrap.yml b/dubhe-server/dubhe-notebook/src/main/resources/bootstrap.yml index 3151a5f..e32f7f4 100644 --- a/dubhe-server/dubhe-notebook/src/main/resources/bootstrap.yml +++ b/dubhe-server/dubhe-notebook/src/main/resources/bootstrap.yml @@ -28,6 +28,6 @@ spring: discovery: enabled: true - namespace: dubhe-server-cloud-prod + namespace: dubhe-server-cloud-dev group: dubhe server-addr: 127.0.0.1:8848 diff --git a/dubhe-server/dubhe-optimize/src/main/java/org/dubhe/optimize/service/impl/ModelOptTaskInstanceServiceImpl.java b/dubhe-server/dubhe-optimize/src/main/java/org/dubhe/optimize/service/impl/ModelOptTaskInstanceServiceImpl.java index e17b6d9..e8845ce 100644 --- a/dubhe-server/dubhe-optimize/src/main/java/org/dubhe/optimize/service/impl/ModelOptTaskInstanceServiceImpl.java +++ b/dubhe-server/dubhe-optimize/src/main/java/org/dubhe/optimize/service/impl/ModelOptTaskInstanceServiceImpl.java @@ -44,7 +44,9 @@ import org.dubhe.biz.file.enums.BizPathEnum; import org.dubhe.biz.file.utils.MinioUtil; import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; +import org.dubhe.biz.redis.utils.RedisUtils; import org.dubhe.k8s.api.ModelOptJobApi; +import org.dubhe.k8s.cache.ResourceCache; import org.dubhe.k8s.domain.bo.PtModelOptimizationJobBO; import org.dubhe.k8s.domain.dto.PodQueryDTO; import org.dubhe.k8s.domain.resource.BizJob; @@ -71,6 +73,7 @@ import org.dubhe.optimize.enums.DistillCommandEnum; import org.dubhe.optimize.enums.OptimizeTypeEnum; import org.dubhe.optimize.service.ModelOptTaskInstanceService; import org.springframework.beans.BeanUtils; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -79,6 +82,7 @@ import javax.annotation.Resource; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -118,6 +122,15 @@ public class ModelOptTaskInstanceServiceImpl extends ServiceImpl wrapper = new LambdaQueryWrapper<>(); wrapper.eq(ModelOptTaskInstance::getTaskId, taskId); + String taskIdentify = (String) redisUtils.get(modelOptIdPrefix + String.valueOf(taskId)); + if (org.dubhe.biz.base.utils.StringUtils.isNotEmpty(taskIdentify)){ + redisUtils.del(taskIdentify, modelOptIdPrefix + String.valueOf(taskId)); + } return modelOptTaskInstanceMapper.delete(wrapper); } diff --git a/dubhe-server/dubhe-optimize/src/main/resources/bootstrap.yml b/dubhe-server/dubhe-optimize/src/main/resources/bootstrap.yml index 3ba7413..d45c14a 100644 --- a/dubhe-server/dubhe-optimize/src/main/resources/bootstrap.yml +++ b/dubhe-server/dubhe-optimize/src/main/resources/bootstrap.yml @@ -30,7 +30,7 @@ spring: refresh: true discovery: enabled: true - namespace: dubhe-server-cloud-prod + namespace: dubhe-server-cloud-dev group: dubhe server-addr: 127.0.0.1:8848 diff --git a/dubhe-server/dubhe-serving-gateway/src/main/resources/bootstrap.yml b/dubhe-server/dubhe-serving-gateway/src/main/resources/bootstrap.yml index fa8efb6..744bdc0 100644 --- a/dubhe-server/dubhe-serving-gateway/src/main/resources/bootstrap.yml +++ b/dubhe-server/dubhe-serving-gateway/src/main/resources/bootstrap.yml @@ -22,6 +22,6 @@ spring: refresh: true discovery: enabled: true - namespace: dubhe-server-cloud-prod + namespace: dubhe-server-cloud-dev group: dubhe server-addr: 127.0.0.1:8848 \ No newline at end of file diff --git a/dubhe-server/dubhe-serving/src/main/java/org/dubhe/serving/domain/vo/ServingPodMetricsVO.java b/dubhe-server/dubhe-serving/src/main/java/org/dubhe/serving/domain/vo/ServingPodMetricsVO.java index 4316ee4..3469b03 100644 --- a/dubhe-server/dubhe-serving/src/main/java/org/dubhe/serving/domain/vo/ServingPodMetricsVO.java +++ b/dubhe-server/dubhe-serving/src/main/java/org/dubhe/serving/domain/vo/ServingPodMetricsVO.java @@ -19,7 +19,7 @@ package org.dubhe.serving.domain.vo; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; -import org.dubhe.k8s.domain.vo.GpuUsageVO; +import org.dubhe.k8s.domain.vo.GpuValueVO; import java.io.Serializable; import java.util.List; @@ -96,7 +96,7 @@ public class ServingPodMetricsVO implements Serializable { /** * gpu使用百分比 */ - private List gpuUsagePersent; + private List gpuUsagePersent; /** * grafana地址 */ diff --git a/dubhe-server/dubhe-serving/src/main/java/org/dubhe/serving/service/impl/BatchServingServiceImpl.java b/dubhe-server/dubhe-serving/src/main/java/org/dubhe/serving/service/impl/BatchServingServiceImpl.java index b15dc87..5dfe93b 100644 --- a/dubhe-server/dubhe-serving/src/main/java/org/dubhe/serving/service/impl/BatchServingServiceImpl.java +++ b/dubhe-server/dubhe-serving/src/main/java/org/dubhe/serving/service/impl/BatchServingServiceImpl.java @@ -51,7 +51,9 @@ import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; import org.dubhe.biz.permission.annotation.DataPermissionMethod; import org.dubhe.biz.permission.base.BaseService; +import org.dubhe.biz.redis.utils.RedisUtils; import org.dubhe.cloud.authconfig.service.AdminClient; +import org.dubhe.k8s.cache.ResourceCache; import org.dubhe.k8s.domain.dto.PodQueryDTO; import org.dubhe.k8s.domain.vo.PodVO; import org.dubhe.k8s.enums.PodPhaseEnum; @@ -81,6 +83,7 @@ import org.dubhe.serving.service.BatchServingService; import org.dubhe.serving.task.DeployServingAsyncTask; import org.dubhe.serving.utils.ServingStatusDetailDescUtil; import org.springframework.beans.BeanUtils; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -134,6 +137,12 @@ public class BatchServingServiceImpl extends ServiceImpl modelConfigList = insertServing(servingInfoCreateDTO, user, servingInfo); // 异步部署容器 - deployServingAsyncTask.deployServing(user, servingInfo, modelConfigList); + String taskIdentify = resourceCache.getTaskIdentify(servingInfo.getId(), servingInfo.getName(), servingIdPrefix); + deployServingAsyncTask.deployServing(user, servingInfo, modelConfigList, taskIdentify); return new ServingInfoCreateVO(servingInfo.getId(), servingInfo.getStatus()); } @@ -557,8 +567,9 @@ public class ServingServiceImpl implements ServingService { } } List modelConfigList = updateServing(servingInfoUpdateDTO, user, servingInfo); + String taskIdentify = resourceCache.getTaskIdentify(servingInfo.getId(), servingInfo.getName(),servingIdPrefix); // 异步部署容器 - deployServingAsyncTask.deployServing(user, servingInfo, modelConfigList); + deployServingAsyncTask.deployServing(user, servingInfo, modelConfigList, taskIdentify); return new ServingInfoUpdateVO(servingInfo.getId(), servingInfo.getStatus()); } @@ -632,6 +643,10 @@ public class ServingServiceImpl implements ServingService { List modelConfigList = getModelConfigByServingId(servingInfo.getId()); deployServingAsyncTask.deleteServing(user, servingInfo, modelConfigList); deleteServing(servingInfoDeleteDTO, user, servingInfo); + String taskIdentify = (String) redisUtils.get(servingIdPrefix + String.valueOf(servingInfo.getId())); + if (StringUtils.isNotEmpty(taskIdentify)){ + redisUtils.del(taskIdentify, servingIdPrefix + String.valueOf(servingInfo.getId())); + } Map map = new HashMap<>(NumberConstant.NUMBER_2); map.put("serving_id", servingInfo.getId()); if (!servingModelConfigService.removeByMap(map)) { @@ -751,7 +766,8 @@ public class ServingServiceImpl implements ServingService { servingInfo.setStatusDetail(SymbolConstant.BRACKETS); updateServingStart(user, servingInfo, modelConfigList); // 异步部署容器 - deployServingAsyncTask.deployServing(user, servingInfo, modelConfigList); + String taskIdentify = resourceCache.getTaskIdentify(servingInfo.getId(), servingInfo.getName(), servingIdPrefix); + deployServingAsyncTask.deployServing(user, servingInfo, modelConfigList, taskIdentify); return new ServingStartVO(servingInfo.getId(), servingInfo.getStatus()); } diff --git a/dubhe-server/dubhe-serving/src/main/java/org/dubhe/serving/task/DeployServingAsyncTask.java b/dubhe-server/dubhe-serving/src/main/java/org/dubhe/serving/task/DeployServingAsyncTask.java index 28a9073..6eb1202 100644 --- a/dubhe-server/dubhe-serving/src/main/java/org/dubhe/serving/task/DeployServingAsyncTask.java +++ b/dubhe-server/dubhe-serving/src/main/java/org/dubhe/serving/task/DeployServingAsyncTask.java @@ -147,13 +147,13 @@ public class DeployServingAsyncTask { */ @Async("servingExecutor") @Transactional(rollbackFor = Exception.class) - public void deployServing(UserContext user, ServingInfo servingInfo, List modelConfigList) { + public void deployServing(UserContext user, ServingInfo servingInfo, List modelConfigList, String taskIdentify) { boolean flag = false; //去除可能因为上次部署时程序异常被捕获的异常信息 servingInfo.removeStatusDetail(ServingStatusDetailDescUtil.getServingStatusDetailKey(ServingStatusDetailDescUtil.CONTAINER_DEPLOYMENT_EXCEPTION, servingInfo.getName())); for (ServingModelConfig servingModelConfig : modelConfigList) { try { - ModelServingBO bo = buildModelServingBO(user, servingInfo, servingModelConfig); + ModelServingBO bo = buildModelServingBO(user, servingInfo, servingModelConfig, taskIdentify); if (bo == null) { LogUtil.error(LogEnum.SERVING, "User {} build the parameter failed.The id of servingModelConfig is {}", user.getUsername(), servingModelConfig.getId()); continue; @@ -173,6 +173,19 @@ public class DeployServingAsyncTask { servingModelConfig.setUrl(url); flag = true; servingInfo.removeStatusDetail(statusDetailKey); + if (servingModelConfigService.updateById(servingModelConfig)) { + LogUtil.info(LogEnum.SERVING, "User {} deploy the model SUCCESS. servingModelConfigId = {}, resourceInfo = {}", user.getUsername(), servingModelConfig.getId(), servingModelConfig.getResourceInfo()); + } else { + LogUtil.error(LogEnum.SERVING, "User {} failed saving online service model config. Database update FAILED, service model config id={}, resourceInfo = {}", user.getUsername(), servingModelConfig.getId(), servingModelConfig.getResourceInfo()); + // 数据库修改失败,但pod创建成功时,修改状态已异常,并删除成功创建的pod + if (StringUtils.isNotBlank(servingModelConfig.getUrl())) { + flag = false; + // 删除已创建的pod + List deleteList = new ArrayList<>(); + deleteList.add(servingModelConfig); + deleteServing(user, servingInfo, deleteList); + } + } } else { servingInfo.putStatusDetail(statusDetailKey, "pod对应的url为空"); } @@ -185,18 +198,7 @@ public class DeployServingAsyncTask { servingInfo.putStatusDetail(statusDetailKey, e.getMessage()); LogUtil.error(LogEnum.SERVING, "User {} create serving failed.The name of serving is {}", user.getUsername(), servingInfo.getName(), e); } - if (!servingModelConfigService.updateById(servingModelConfig)) { - LogUtil.error(LogEnum.SERVING, "User {} failed saving online service model config. Database update FAILED, service model config id={}", user.getUsername(), servingModelConfig.getId()); - // 数据库修改失败,但pod创建成功时,修改状态已异常,并删除成功创建的pod - if (StringUtils.isNotBlank(servingModelConfig.getUrl())) { - flag = false; - // 删除已创建的pod - List deleteList = new ArrayList<>(); - deleteList.add(servingModelConfig); - deleteServing(user, servingInfo, deleteList); - } - } } //修改服务状态 @@ -225,7 +227,7 @@ public class DeployServingAsyncTask { * @param servingModelConfig 在线服务模型部署信息 * @return ModelServingBO 返回构建后对象 */ - private ModelServingBO buildModelServingBO(UserContext user, ServingInfo servingInfo, ServingModelConfig servingModelConfig) { + private ModelServingBO buildModelServingBO(UserContext user, ServingInfo servingInfo, ServingModelConfig servingModelConfig, String taskIdentify) { ModelServingBO bo = new ModelServingBO(); //容器端口 if (ServingTypeEnum.GRPC.getType().equals(servingInfo.getType())) { @@ -260,7 +262,8 @@ public class DeployServingAsyncTask { .setFsMounts(new HashMap(NumberConstant.NUMBER_4) {{ put(ServingConstant.MODEL_PATH, new PtMountDirBO(k8sNameTool.getAbsolutePath(servingModelConfig.getModelAddress()))); put(ServingConstant.DUBHE_SERVING_PATH, new PtMountDirBO(servingPath)); - }}); + }}) + .setTaskIdentifyLabel(taskIdentify); bo.setBusinessLabel(k8sNameTool.getPodLabel(BizEnum.SERVING)); return bo; } @@ -328,7 +331,13 @@ public class DeployServingAsyncTask { if (!ServingConstant.SUCCESS_CODE.equals(ptBaseResult.getCode())) { servingInfo.putStatusDetail(statusDetailKey, ptBaseResult.getMessage()); flag = false; + } else { + servingModelConfig.setResourceInfo(null); + if (servingModelConfigService.updateById(servingModelConfig)) { + LogUtil.info(LogEnum.SERVING, "User {} delete the service SUCCESS, namespace:{}, resourceName:{}", user.getUsername(), namespace, resourceName); + } } + } } catch (KubernetesClientException e) { servingInfo.putStatusDetail(ServingStatusDetailDescUtil.getServingStatusDetailKey(ServingStatusDetailDescUtil.CONTAINER_DELETION_EXCEPTION, servingInfo.getName()), e.getMessage()); @@ -358,10 +367,10 @@ public class DeployServingAsyncTask { */ @Async("servingExecutor") @Transactional(rollbackFor = Exception.class) - public void deployBatchServing(UserContext user, BatchServing batchServing) { + public void deployBatchServing(UserContext user, BatchServing batchServing, String taskIdentify) { if (batchServing.getResourcesPoolNode() == NumberConstant.NUMBER_1) { //单节点 - PtJupyterJobBO ptJupyterJobBO = buildJobBo(user, batchServing); + PtJupyterJobBO ptJupyterJobBO = buildJobBo(user, batchServing, taskIdentify); if (ptJupyterJobBO != null) { PtJupyterJobVO vo = trainJobApi.create(ptJupyterJobBO); //添加状态详情信息 @@ -380,7 +389,7 @@ public class DeployServingAsyncTask { } else { //多节点分布式 - DistributeTrainBO distributeTrainBO = buildDistributeTrainBO(user, batchServing); + DistributeTrainBO distributeTrainBO = buildDistributeTrainBO(user, batchServing, taskIdentify); if (distributeTrainBO != null) { BizDistributeTrain distribute = distributeTrainApi.create(distributeTrainBO); @@ -443,7 +452,7 @@ public class DeployServingAsyncTask { * @param batchServing 批量服务信息 * @return PtJupyterJobBO 返回构建后对象 */ - public PtJupyterJobBO buildJobBo(UserContext user, BatchServing batchServing) { + public PtJupyterJobBO buildJobBo(UserContext user, BatchServing batchServing, String taskIdentify) { String platform = ServingFrameTypeEnum.getFrameName(batchServing.getFrameType()); if (batchServing.getUseScript()) { platform = SymbolConstant.BLANK; @@ -469,7 +478,8 @@ public class DeployServingAsyncTask { put(ServingConstant.DUBHE_SERVING_PATH, new PtMountDirBO(servingPath)); }}) .setImage(batchServing.getImage()) - .setBusinessLabel(k8sNameTool.getPodLabel(BizEnum.BATCH_SERVING)); + .setBusinessLabel(k8sNameTool.getPodLabel(BizEnum.BATCH_SERVING)) + .setTaskIdentifyLabel(taskIdentify); return bo; } @@ -480,7 +490,7 @@ public class DeployServingAsyncTask { * @param batchServing 批量服务信息 * @return DistributeTrainBO 返回构建后对象 */ - public DistributeTrainBO buildDistributeTrainBO(UserContext user, BatchServing batchServing) { + public DistributeTrainBO buildDistributeTrainBO(UserContext user, BatchServing batchServing, String taskIdentify) { String platform = ServingFrameTypeEnum.getFrameName(batchServing.getFrameType()); if (batchServing.getUseScript()) { platform = SymbolConstant.BLANK; @@ -511,7 +521,8 @@ public class DeployServingAsyncTask { put(ServingConstant.INPUT_PATH, new PtMountDirBO(k8sNameTool.getAbsolutePath(batchServing.getInputPath()))); put(ServingConstant.OUTPUT_PATH, new PtMountDirBO(k8sNameTool.getAbsolutePath(batchServing.getOutputPath()))); put(ServingConstant.DUBHE_SERVING_PATH, new PtMountDirBO(servingPath)); - }}); + }}) + .setTaskIdentifyLabel(taskIdentify); return bo; } diff --git a/dubhe-server/dubhe-serving/src/main/resources/bootstrap.yml b/dubhe-server/dubhe-serving/src/main/resources/bootstrap.yml index 02e081b..707f391 100644 --- a/dubhe-server/dubhe-serving/src/main/resources/bootstrap.yml +++ b/dubhe-server/dubhe-serving/src/main/resources/bootstrap.yml @@ -30,6 +30,6 @@ spring: refresh: true discovery: enabled: true - namespace: dubhe-server-cloud-prod + namespace: dubhe-server-cloud-dev group: dubhe server-addr: 127.0.0.1:8848 \ No newline at end of file diff --git a/dubhe-server/dubhe-terminal/pom.xml b/dubhe-server/dubhe-terminal/pom.xml new file mode 100644 index 0000000..c5cfccb --- /dev/null +++ b/dubhe-server/dubhe-terminal/pom.xml @@ -0,0 +1,97 @@ + + + + server + org.dubhe + 0.0.1-SNAPSHOT + + 4.0.0 + 0.0.1-SNAPSHOT + dubhe-terminal + 专业版终端服务 + Dubhe terminal + + + + org.springframework.boot + spring-boot-starter-web + + + org.springframework.cloud + spring-cloud-context + + + + org.dubhe.biz + data-response + ${org.dubhe.biz.data-response.version} + + + + org.dubhe + common-k8s + ${org.dubhe.common-k8s.version} + + + + org.dubhe.cloud + remote-call + ${org.dubhe.cloud.remote-call.version} + + + + org.dubhe.cloud + registration + ${org.dubhe.cloud.registration.version} + + + + org.dubhe.cloud + configuration + ${org.dubhe.cloud.configuration.version} + + + + org.dubhe.cloud + auth-config + ${org.dubhe.cloud.auth-config.version} + + + + org.dubhe.biz + log + ${org.dubhe.biz.log.version} + + + + junit + junit + test + + + + + + + org.springframework.boot + spring-boot-maven-plugin + + false + true + exec + + + + + org.apache.maven.plugins + maven-surefire-plugin + + true + + + + + + \ No newline at end of file diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/TerminalApplication.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/TerminalApplication.java new file mode 100644 index 0000000..d8eaf8f --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/TerminalApplication.java @@ -0,0 +1,35 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.terminal; + +import org.mybatis.spring.annotation.MapperScan; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.scheduling.annotation.EnableAsync; + +/** + * @description serving 专业版Terminal模块启动类 + * @date 2021-01-19 + */ +@SpringBootApplication(scanBasePackages = "org.dubhe") +@MapperScan(basePackages = {"org.dubhe.**.dao"}) +@EnableAsync +public class TerminalApplication { + public static void main(String[] args) { + SpringApplication.run(TerminalApplication.class, args); + } +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/config/TerminalConfig.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/config/TerminalConfig.java new file mode 100644 index 0000000..02d619a --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/config/TerminalConfig.java @@ -0,0 +1,50 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.config; + +import lombok.Getter; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Configuration; + +/** + * @description 配置 + * @date 2021-07-19 + */ +@Getter +@Configuration +public class TerminalConfig { + //专业版终端模块目录 + @Value("${terminal.terminal-dir}") + private String terminalDir; + + //用户workspace目录 + @Value("${terminal.workspace-dir}") + private String workspaceDir; + + //用户workspace目录 + @Value("${terminal.ssh-host}") + private String sshHost; + + //harbor 地址 + @Value("${harbor.address}") + private String harborAddress; + + //服务端口 + @Value("${server.port}") + private String serverPort; +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/constant/TerminalConstant.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/constant/TerminalConstant.java new file mode 100644 index 0000000..4bb5d27 --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/constant/TerminalConstant.java @@ -0,0 +1,31 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.terminal.constant; +/** + * @description + * @date 2021-7-19 + */ +public class TerminalConstant { + + public static final String DATASET_VOLUME_MOUNTS = "/dataset"; + + public static final String WORKSPACE_VOLUME_MOUNTS = "/workspace"; + + public static final String SSH_USER_COMMAND = "ssh -p {} {}@{}"; + + public static final String SSH_COMMAND = "ssh -p {} {}"; +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/dao/PtImageMapper.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/dao/PtImageMapper.java new file mode 100644 index 0000000..bbd5326 --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/dao/PtImageMapper.java @@ -0,0 +1,29 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.dao; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import org.dubhe.terminal.domain.entity.PtImage; + + +/** + * @description 镜像 Mapper 接口 + * @date 2020-04-27 + */ +public interface PtImageMapper extends BaseMapper { +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/dao/TerminalInfoMapper.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/dao/TerminalInfoMapper.java new file mode 100644 index 0000000..70877e4 --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/dao/TerminalInfoMapper.java @@ -0,0 +1,44 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.dao; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import org.apache.ibatis.annotations.Param; +import org.apache.ibatis.annotations.Select; +import org.apache.ibatis.annotations.Update; +import org.dubhe.terminal.domain.entity.TerminalInfo; + +import java.util.List; + +/** + * @description + * @date 2021-07-15 + */ +public interface TerminalInfoMapper extends BaseMapper { + /** + * 还原回收数据 + * + * @param id terminal id + * @return int 数量 + */ + @Update("update terminal_info set deleted = 1 where terminal_id = #{id}") + int deleteByTerminalId(@Param("id") Long id); + + @Select("select * from terminal_info where terminal_id = #{id} and deleted = 0") + List selectByTerminalId(@Param("id") Long id); +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/dao/TerminalMapper.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/dao/TerminalMapper.java new file mode 100644 index 0000000..8cbc4bc --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/dao/TerminalMapper.java @@ -0,0 +1,29 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.dao; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import org.dubhe.terminal.domain.entity.Terminal; + +/** + * @description terminal 实体类 + * @date 2021-07-08 + */ +public interface TerminalMapper extends BaseMapper { + +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/dto/TerminalCreateDTO.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/dto/TerminalCreateDTO.java new file mode 100644 index 0000000..0d3eb76 --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/dto/TerminalCreateDTO.java @@ -0,0 +1,93 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.domain.dto; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.hibernate.validator.constraints.Length; + +import javax.validation.constraints.Min; +import javax.validation.constraints.NotBlank; +import javax.validation.constraints.NotNull; +import java.util.List; +import java.util.Set; + +/** + * @description 创建终端服务 DTO + * @date 2021-07-12 + */ +@Data +@NoArgsConstructor +@Accessors(chain = true) +public class TerminalCreateDTO extends TerminalDTO { + @ApiModelProperty(value = "终端连接名称, 长度在1-32个字符", required = false) + @Length(min = MagicNumConstant.ONE, max = MagicNumConstant.THIRTY_TWO, message = "训练作业名长度在1-32个字符") + private String name; + + @ApiModelProperty(value = "数据来源名称, 长度在1-127个字符", required = false) + @Length(min = MagicNumConstant.ONE, max = MagicNumConstant.ONE_HUNDRED_TWENTY_SEVEN, message = "数据来源名称长度在1-127个字符") + private String dataSourceName; + + @ApiModelProperty(value = "数据来源路径, 长度在1-127个字符", required = false) + @Length(min = MagicNumConstant.ONE, max = MagicNumConstant.ONE_HUNDRED_TWENTY_SEVEN, message = "数据来源路径长度在1-127个字符") + private String dataSourcePath; + + @ApiModelProperty(value = "镜像版本", required = true) + @NotBlank(message = "镜像版本不能为空") + private String imageTag; + + @ApiModelProperty(value = "镜像名称", required = true) + @NotBlank(message = "镜像名称不能为空") + private String imageName; + + @ApiModelProperty(value = "镜像地址", required = true) + @NotBlank(message = "镜像地址不能为空") + private String imageUrl; + + @NotNull + @Min(value = MagicNumConstant.ONE, message = "最少需要1节点") + @ApiModelProperty(value = "总节点数") + private Integer totalNode; + + @ApiModelProperty(value = "描述") + @Length(max = MagicNumConstant.INTEGER_TWO_HUNDRED_AND_FIFTY_FIVE, message = "描述长度不超过255个字符") + private String description; + + @NotNull + @ApiModelProperty(value = "是否相同规格") + private boolean sameInfo; + + @ApiModelProperty(value = "端口") + private Set ports; + + @ApiModelProperty(value = "节点详情") + private List info; + + @ApiModelProperty(value = "执行命令") + private List cmdLines; + + @ApiModelProperty(value = "镜像ssh密码") + private String sshPwd; + + @ApiModelProperty(value = "镜像ssh用户") + private String sshUser; + +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/dto/TerminalDTO.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/dto/TerminalDTO.java new file mode 100644 index 0000000..4351427 --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/dto/TerminalDTO.java @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.domain.dto; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.constant.MagicNumConstant; + +import javax.validation.constraints.Min; +import java.io.Serializable; + +/** + * @description 删除终端服务DTO + * @date 2021-07-13 + */ +@Data +@NoArgsConstructor +@Accessors(chain = true) +public class TerminalDTO implements Serializable { + private static final long serialVersionUID = 1L; + + @ApiModelProperty(value = "id") + @Min(value = MagicNumConstant.ONE, message = "id数值不合法") + private Long id; + + public TerminalDTO(Long id){ + this.id = id; + } +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/dto/TerminalInfoDTO.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/dto/TerminalInfoDTO.java new file mode 100644 index 0000000..6d9b63c --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/dto/TerminalInfoDTO.java @@ -0,0 +1,67 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.domain.dto; + +import io.swagger.annotations.ApiModelProperty; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.terminal.domain.entity.TerminalInfo; + +import javax.validation.constraints.Min; +import javax.validation.constraints.NotNull; +import java.io.Serializable; + +/** + * @description 节点详情DTO + * @date 2021-07-15 + */ +@Data +@NoArgsConstructor +@AllArgsConstructor +@Accessors(chain = true) +public class TerminalInfoDTO implements Serializable { + private static final long serialVersionUID = 1L; + + @ApiModelProperty(value = "id") + @Min(value = MagicNumConstant.ONE, message = "id数值不合法") + private Long id; + + @NotNull + @Min(value = MagicNumConstant.ONE, message = "最少需要一个CPU") + @ApiModelProperty(value = "cpu数量") + private Integer cpuNum; + + @Min(value = MagicNumConstant.ZERO, message = "GPU数量不能小于0") + @ApiModelProperty(value = "gpu数量") + private Integer gpuNum; + + @NotNull + @Min(value = MagicNumConstant.ONE, message = "最少需要1G内存") + @ApiModelProperty(value = "内存大小") + private Integer memNum; + + @ApiModelProperty(value = "磁盘大小(M)") + private Integer diskMemNum; + + public TerminalInfo toTerminalInfo(Long terminalId,String k8sResourceName,Long originUserId,String sshUser,String sshPwd){ + return new TerminalInfo(id,terminalId,cpuNum,memNum,gpuNum,diskMemNum,k8sResourceName,originUserId,sshUser,sshPwd); + } +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/dto/TerminalK8sPodCallbackCreateDTO.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/dto/TerminalK8sPodCallbackCreateDTO.java new file mode 100644 index 0000000..688401d --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/dto/TerminalK8sPodCallbackCreateDTO.java @@ -0,0 +1,36 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.domain.dto; + +import io.swagger.annotations.ApiModel; +import lombok.Data; +import org.dubhe.k8s.domain.dto.BaseK8sPodCallbackCreateDTO; + +/** + * @description k8s pod异步回调 terminal + * @date 2021-06-24 + */ +@ApiModel(description = "k8s pod异步回调 terminal") +@Data +public class TerminalK8sPodCallbackCreateDTO extends BaseK8sPodCallbackCreateDTO { + + @Override + public String toString() { + return super.toString(); + } +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/dto/TerminalPreserveDTO.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/dto/TerminalPreserveDTO.java new file mode 100644 index 0000000..db32a92 --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/dto/TerminalPreserveDTO.java @@ -0,0 +1,59 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.domain.dto; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.constant.MagicNumConstant; + +import javax.validation.constraints.Min; +import javax.validation.constraints.NotBlank; +import javax.validation.constraints.NotNull; +import java.io.Serializable; + +/** + * @description 保存终端 DTO + * @date 2021-07-13 + */ +@Data +@NoArgsConstructor +@Accessors(chain = true) +public class TerminalPreserveDTO implements Serializable { + private static final long serialVersionUID = 1L; + + @ApiModelProperty(value = "terminalId") + @NotNull(message = "terminalId不能为空") + @Min(value = MagicNumConstant.ONE, message = "terminalId数值不合法") + private Long id; + + @ApiModelProperty(value = "镜像名称", required = true) + @NotBlank(message = "镜像名称不能为空") + private String imageName; + + @ApiModelProperty(value = "镜像版本", required = true) + @NotBlank(message = "镜像版本不能为空") + private String imageTag; + + @ApiModelProperty(value = "镜像描述", required = false) + private String imageRemark; + + @ApiModelProperty(value = "密码", required = false) + private String password; +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/entity/PtImage.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/entity/PtImage.java new file mode 100644 index 0000000..dfe9bbc --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/entity/PtImage.java @@ -0,0 +1,100 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.domain.entity; + +import com.baomidou.mybatisplus.annotation.FieldFill; +import com.baomidou.mybatisplus.annotation.IdType; +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.db.entity.BaseEntity; + +/** + * @description 镜像 + * @date 2020-04-27 + */ +@Data +@Accessors(chain = true) +@TableName(value = "pt_image", autoResultMap = true) +public class PtImage extends BaseEntity { + + /** + * 主键 + */ + @TableId(value = "id", type = IdType.AUTO) + private Long id; + + /** + * 镜像名称 + */ + @TableField(value = "image_name") + private String imageName; + + /** + * 镜像地址 + */ + @TableField(value = "image_url") + private String imageUrl; + + /** + * 镜像版本 + */ + @TableField(value = "image_tag") + private String imageTag; + + /** + * 镜像描述 + */ + @TableField(value = "remark") + private String remark; + + /** + * projectName + */ + @TableField(value = "project_name") + private String projectName; + + /** + * 镜像来源 + */ + @TableField(value = "image_resource") + private Integer imageResource; + + + /** + * 镜像状态 + */ + @TableField(value = "image_status") + private Integer imageStatus; + + /** + * 资源拥有者ID + */ + @TableField(value = "origin_user_id", fill = FieldFill.INSERT) + private Long originUserId; + + //镜像ssh密码 + @TableField(value = "ssh_pwd") + private String sshPwd; + + //镜像ssh用户 + @TableField(value = "ssh_user") + private String sshUser; +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/entity/Terminal.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/entity/Terminal.java new file mode 100644 index 0000000..f321b6a --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/entity/Terminal.java @@ -0,0 +1,187 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.domain.entity; + +import com.baomidou.mybatisplus.annotation.*; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.constant.SymbolConstant; +import org.dubhe.biz.base.utils.DateUtil; +import org.dubhe.biz.base.utils.StringUtils; +import org.dubhe.biz.db.entity.BaseEntity; + +import javax.validation.constraints.NotNull; +import java.util.Date; + +/** + * @description 终端 + * @date 2021-07-08 + */ +@Data +@NoArgsConstructor +@Accessors(chain = true) +@TableName("terminal") +public class Terminal extends BaseEntity { + /** + * 主键 + */ + @TableId(value = "id", type = IdType.AUTO) + @NotNull(groups = {Update.class}) + private Long id; + /** + * 名称 + */ + @TableField(value = "name") + private String name; + /** + * 镜像名 + */ + @TableField(value = "image_name") + private String imageName; + /** + * 镜像全路径 + */ + @TableField(value = "image_url") + private String imageUrl; + /** + * 镜像版本 + */ + @TableField(value = "image_tag") + private String imageTag; + /** + * 数据集名称 + */ + @TableField(value = "data_source_name") + private String dataSourceName; + /** + * 数据集路径 + */ + @TableField(value = "data_source_path") + private String dataSourcePath; + /** + * 运行节点数 + */ + @TableField(value = "running_node") + private Integer runningNode; + /** + * 服务总节点数 + */ + @TableField(value = "total_node") + private Integer totalNode; + /** + * 描述 + */ + @TableField("description") + private String description; + + /** + * 上次启动时刻 + */ + @TableField("last_start_time") + private Date lastStartTime; + + /** + * 上次停止时刻 + */ + @TableField("last_stop_time") + private Date lastStopTime; + + /** + *节点规格是否相同:0相同 1:不同 + */ + @TableField("same_info") + private boolean sameInfo; + + /** + * 服务状态:0-异常,1-部署中,2-运行中,3-停止中,4-已停止 + */ + @TableField(value = "status") + private Integer status; + + /** + * 状态对应的详情信息 + */ + @TableField(value = "status_detail") + private String statusDetail; + + /** + * 资源拥有者ID + */ + @TableField(value = "origin_user_id",fill = FieldFill.INSERT) + private Long originUserId; + + /** + * put 键值 + * + * @param key 键 + * @param value 值 + */ + public void putStatusDetail(String key,String value){ + statusDetail = StringUtils.putIntoJsonStringMap(key,value,statusDetail); + } + + /** + * 移除 键值 + * + * @param key 键 + */ + public void removeStatusDetail(String key){ + statusDetail = StringUtils.removeFromJsonStringMap(key,statusDetail); + } + + public void setUpdateInfo(Long userId){ + setUpdateTime(DateUtil.getCurrentTimestamp()); + setUpdateUserId(userId); + } + + /** + * 获取镜像 路径 + * + * @return String + */ + public String getImagePath(){ + if (StringUtils.isEmpty(imageUrl)){ + return null; + } + StringBuffer imageProject = new StringBuffer(); + String[] strings = imageUrl.split(SymbolConstant.SLASH); + for (int i = MagicNumConstant.ZERO;i < strings.length - MagicNumConstant.ONE;i++){ + if (i == strings.length - MagicNumConstant.TWO){ + imageProject.append(strings[i]); + }else { + imageProject.append(strings[i]+SymbolConstant.SLASH); + } + } + return imageProject.toString(); + } + + /** + * 获取镜像 project + * + * @return String + */ + public String getImageProject(){ + if (StringUtils.isEmpty(imageUrl)){ + return null; + } + String[] strings = imageUrl.split(SymbolConstant.SLASH); + return strings.length > 0 ? strings[MagicNumConstant.ZERO] : null; + } +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/entity/TerminalInfo.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/entity/TerminalInfo.java new file mode 100644 index 0000000..ae026fb --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/entity/TerminalInfo.java @@ -0,0 +1,178 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.domain.entity; + +import com.baomidou.mybatisplus.annotation.*; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.utils.DateUtil; +import org.dubhe.biz.base.utils.StringUtils; +import org.dubhe.biz.db.entity.BaseEntity; + +import javax.validation.constraints.NotNull; + +/** + * @description 终端详情 + * @date 2021-07-12 + */ +@Data +@NoArgsConstructor +@Accessors(chain = true) +@TableName("terminal_info") +public class TerminalInfo extends BaseEntity { + /** + * 主键 + */ + @TableId(value = "id", type = IdType.AUTO) + @NotNull(groups = {Update.class}) + private Long id; + + /** + * 主键 + */ + @TableField(value = "terminal_id") + private Long terminalId; + /** + * 名称 + */ + @TableField(value = "name") + private String name; + + /** + * k8s 资源名称 + */ + @TableField(value = "k8s_resource_name") + private String k8sResourceName; + + /** + * 服务状态:0-异常,1-部署中,2-运行中,3-停止中,4-已停止 + */ + @TableField(value = "status") + private Integer status; + + /** + * 状态对应的详情信息 + */ + @TableField(value = "status_detail") + private String statusDetail; + + /** + * ssh命令 + */ + @TableField(value = "ssh") + private String ssh; + + /** + * ssh 密码 + */ + @TableField(value = "ssh_password") + private String sshPassword; + + /** + * CPU数量(核) + */ + @TableField(value = "cpu_num") + private Integer cpuNum; + + /** + * 内存大小(M) + */ + @TableField(value = "mem_num") + private Integer memNum; + + /** + * GPU数量(核) + */ + @TableField(value = "gpu_num") + private Integer gpuNum; + + /** + * 磁盘大小(M) + */ + @TableField(value = "disk_mem_num") + private Integer diskMemNum; + + /** + * 资源拥有者ID + */ + @TableField(value = "origin_user_id",fill = FieldFill.INSERT) + private Long originUserId; + + /** + * pod ip + */ + @TableField(value = "pod_ip") + private String podIp; + + /** + * ssh端口 + */ + @TableField(value = "ssh_port") + private Integer sshPort; + + /** + * ssh 用户 + */ + @TableField(value = "ssh_user") + private String sshUser; + + /** + *是否master节点:false 否 true:是 + */ + @TableField("master_flag") + private boolean masterFlag; + + public TerminalInfo(Long id,Long terminalId,Integer cpuNum,Integer memNum,Integer gpuNum,Integer diskMemNum,String k8sResourceName,Long originUserId,String sshUser,String sshPwd){ + this.id = id; + this.terminalId = terminalId; + this.cpuNum = cpuNum; + this.memNum = memNum; + this.gpuNum = gpuNum; + this.diskMemNum = diskMemNum; + this.k8sResourceName = k8sResourceName; + this.originUserId = originUserId; + this.setCreateUserId(originUserId); + this.sshUser = sshUser; + this.sshPassword = sshPwd; + } + + /** + * put 键值 + * + * @param key 键 + * @param value 值 + */ + public void putStatusDetail(String key,String value){ + statusDetail = StringUtils.putIntoJsonStringMap(key,value,statusDetail); + } + + /** + * 移除 键值 + * + * @param key 键 + */ + public void removeStatusDetail(String key){ + statusDetail = StringUtils.removeFromJsonStringMap(key,statusDetail); + } + + public void setUpdateInfo(Long userId){ + setUpdateTime(DateUtil.getCurrentTimestamp()); + setUpdateUserId(userId); + } +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/vo/TerminalInfoVO.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/vo/TerminalInfoVO.java new file mode 100644 index 0000000..6e2373a --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/vo/TerminalInfoVO.java @@ -0,0 +1,97 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.domain.vo; + +import com.baomidou.mybatisplus.annotation.IdType; +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableId; +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.Accessors; +import org.dubhe.biz.db.entity.BaseEntity; +import org.dubhe.terminal.domain.entity.TerminalInfo; + +import javax.validation.constraints.NotNull; +import java.io.Serializable; + +/** + * @description 终端信息VO + * @date 2021-07-13 + */ +@Data +@NoArgsConstructor +@Accessors(chain = true) +public class TerminalInfoVO implements Serializable { + private static final long serialVersionUID = 1L; + + @ApiModelProperty(value = "id") + private Long id; + + @ApiModelProperty(value = "terminalId") + private Long terminalId; + + @TableField(value = "name") + private String name; + + @ApiModelProperty(value = "服务状态:0-异常,1-部署中,2-运行中,3-停止中,4-已停止") + private Integer status; + + @ApiModelProperty("状态对应的详情信息") + private String statusDetail; + + @ApiModelProperty(value = "ssh命令") + private String ssh; + + @ApiModelProperty(value = "ssh 密码") + private String sshPassword; + + @ApiModelProperty(value = "ssh 用户") + private String sshUser; + + @ApiModelProperty(value = "CPU数量(核)") + private Integer cpuNum; + + @ApiModelProperty(value = "内存大小(M)") + private Integer memNum; + + @ApiModelProperty(value = "GPU数量(核)") + private Integer gpuNum; + + @ApiModelProperty(value = "磁盘大小(M)") + private Integer diskMemNum; + + @ApiModelProperty(value = "是否master节点:false 否 true:是") + private boolean masterFlag; + + public TerminalInfoVO(TerminalInfo terminalInfo){ + this.id = terminalInfo.getId(); + this.terminalId = terminalInfo.getTerminalId(); + this.name = terminalInfo.getName(); + this.status = terminalInfo.getStatus(); + this.ssh = terminalInfo.getSsh(); + this.sshPassword = terminalInfo.getSshPassword(); + this.sshUser = terminalInfo.getSshUser(); + this.cpuNum = terminalInfo.getCpuNum(); + this.memNum = terminalInfo.getMemNum(); + this.gpuNum = terminalInfo.getGpuNum(); + this.diskMemNum = terminalInfo.getDiskMemNum(); + this.masterFlag = terminalInfo.isMasterFlag(); + this.statusDetail = terminalInfo.getStatusDetail(); + } +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/vo/TerminalVO.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/vo/TerminalVO.java new file mode 100644 index 0000000..e989815 --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/domain/vo/TerminalVO.java @@ -0,0 +1,111 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.domain.vo; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.Accessors; +import org.dubhe.terminal.domain.entity.Terminal; +import org.dubhe.terminal.domain.entity.TerminalInfo; +import org.springframework.util.CollectionUtils; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; + +/** + * @description 终端VO + * @date 2021-07-13 + */ +@Data +@NoArgsConstructor +@Accessors(chain = true) +public class TerminalVO implements Serializable { + private static final long serialVersionUID = 1L; + + @ApiModelProperty(value = "id") + private Long id; + + @ApiModelProperty(value = "终端连接名称, 长度在1-32个字符", required = false) + private String name; + + @ApiModelProperty(value = "数据来源名称, 长度在1-127个字符") + private String dataSourceName; + + @ApiModelProperty(value = "数据来源路径, 长度在1-127个字符") + private String dataSourcePath; + + @ApiModelProperty(value = "镜像版本") + private String imageTag; + + @ApiModelProperty(value = "镜像名称") + private String imageName; + + @ApiModelProperty(value = "镜像全路径") + private String imageUrl; + + @ApiModelProperty(value = "运行节点数") + private Integer runningNode; + + @ApiModelProperty(value = "服务总节点数") + private Integer totalNode; + + @ApiModelProperty(value = "节点规格是否相同:0相同 1:不同") + private boolean sameInfo; + + @ApiModelProperty(value = "上次启动时刻") + private Date lastStartTime; + + @ApiModelProperty(value = "上次停止时刻") + private Date lastStopTime; + + @ApiModelProperty(value = "连接详情", required = true) + private List info; + + @ApiModelProperty(value = "服务状态:0-异常,1-保存中,2-运行中,3-已停止") + private Integer status; + + @ApiModelProperty("状态对应的详情信息") + private String statusDetail; + + public TerminalVO(Terminal terminal,List terminalInfoList){ + this.id = terminal.getId(); + this.name = terminal.getName(); + this.dataSourceName = terminal.getDataSourceName(); + this.dataSourcePath = terminal.getDataSourcePath(); + this.imageTag = terminal.getImageTag(); + this.imageName = terminal.getImageName(); + this.imageUrl = terminal.getImageUrl(); + this.lastStartTime = terminal.getLastStartTime(); + this.lastStopTime = terminal.getLastStopTime(); + this.runningNode = terminal.getRunningNode(); + this.totalNode = terminal.getTotalNode(); + this.sameInfo = terminal.isSameInfo(); + this.status = terminal.getStatus(); + this.statusDetail = terminal.getStatusDetail(); + + if (!CollectionUtils.isEmpty(terminalInfoList)){ + info = new ArrayList<>(); + terminalInfoList.forEach(terminalInfo -> { + info.add(new TerminalInfoVO(terminalInfo)); + }); + } + } +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/enums/TerminalInfoStatusEnum.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/enums/TerminalInfoStatusEnum.java new file mode 100644 index 0000000..c5eec59 --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/enums/TerminalInfoStatusEnum.java @@ -0,0 +1,92 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.enums; + + +import org.dubhe.k8s.enums.PodPhaseEnum; + + +/** + * @description terminal 状态枚举 + * @date 2020-07-15 + */ +public enum TerminalInfoStatusEnum { + FAILED(0, "异常",PodPhaseEnum.FAILED.getPhase()), + + UNKNOWN(0, "异常",PodPhaseEnum.UNKNOWN.getPhase()), + + PENDING(1, "调度中",PodPhaseEnum.PENDING.getPhase()), + + RUNNING(2, "运行中",PodPhaseEnum.RUNNING.getPhase()), + + DELETED(3, "已停止",PodPhaseEnum.DELETED.getPhase()), + + SUCCEEDED(3, "已停止",PodPhaseEnum.SUCCEEDED.getPhase()); + /** + * 编码 + */ + private Integer code; + /** + * 描述 + */ + private String description; + /** + * k8s pod状态 + */ + private String phase; + + public Integer getCode() { + return code; + } + + public String getDescription() { + return description; + } + + public String getPhase(){ + return phase; + } + + TerminalInfoStatusEnum(int code, String description, String phase) { + this.code = code; + this.description = description; + this.phase = phase; + } + + public static String getDescription(Integer code) { + if (code != null) { + for (TerminalInfoStatusEnum en : TerminalInfoStatusEnum.values()) { + if (en.getCode().equals(code)) { + return en.getDescription(); + } + } + } + return null; + } + + public static Integer getCode(String phase) { + if (phase != null) { + for (TerminalInfoStatusEnum en : TerminalInfoStatusEnum.values()) { + if (en.getPhase().equals(phase)) { + return en.getCode(); + } + } + } + return null; + } +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/enums/TerminalStatusEnum.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/enums/TerminalStatusEnum.java new file mode 100644 index 0000000..a4abdd6 --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/enums/TerminalStatusEnum.java @@ -0,0 +1,65 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.enums; + +/** + * @description terminal 状态枚举 + * @date 2020-07-15 + */ +public enum TerminalStatusEnum { + FAILED(0, "异常"), + + SAVING(1, "保存中"), + + RUNNING(2, "运行中"), + + DELETED(3, "已停止"); + /** + * 编码 + */ + private Integer code; + /** + * 描述 + */ + private String description; + + public Integer getCode() { + return code; + } + + public String getDescription() { + return description; + } + + + TerminalStatusEnum(int code, String description) { + this.code = code; + this.description = description; + } + + public static String getDescription(Integer code) { + if (code != null) { + for (TerminalStatusEnum en : TerminalStatusEnum.values()) { + if (en.getCode().equals(code)) { + return en.getDescription(); + } + } + } + return null; + } +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/rest/DockerCallbackController.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/rest/DockerCallbackController.java new file mode 100644 index 0000000..1bdeca3 --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/rest/DockerCallbackController.java @@ -0,0 +1,59 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.rest; + +import io.swagger.annotations.Api; +import io.swagger.annotations.ApiOperation; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.dataresponse.factory.DataResponseFactory; +import org.dubhe.docker.constant.DockerCallbackConstant; +import org.dubhe.docker.domain.dto.DockerPushCallbackDTO; +import org.dubhe.terminal.service.TerminalService; +import org.springframework.validation.annotation.Validated; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import javax.annotation.Resource; + +/** + * @description + * @date 2021-07-27 + */ +@Api(tags = "docker 回调") +@RestController +@RequestMapping(DockerCallbackConstant.DOCKER_CALLBACK_URI) +public class DockerCallbackController { + + @Resource + private TerminalService terminalService; + + @PostMapping("/push") + @ApiOperation("推送镜像失败") + //@PreAuthorize(Permissions.TERMINAL_CREATE) + public DataResponseBody pushImageError(@Validated @RequestBody DockerPushCallbackDTO dockerPushCallbackDTO) { + if (dockerPushCallbackDTO.isError()){ + terminalService.pushImageError(dockerPushCallbackDTO.getTerminalId(),dockerPushCallbackDTO.getErrorMessage(),dockerPushCallbackDTO.getUserId()); + }else { + terminalService.pushImageComplete(dockerPushCallbackDTO.getTerminalId(),dockerPushCallbackDTO.getUserId()); + } + + return DataResponseFactory.success("docker回调处理中"); + } +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/rest/K8sCallbackPodController.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/rest/K8sCallbackPodController.java new file mode 100644 index 0000000..c3bee0d --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/rest/K8sCallbackPodController.java @@ -0,0 +1,63 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.terminal.rest; + +import io.swagger.annotations.Api; +import io.swagger.annotations.ApiOperation; +import io.swagger.annotations.ApiParam; +import org.dubhe.biz.base.constant.StringConstant; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.dataresponse.factory.DataResponseFactory; +import org.dubhe.k8s.service.PodCallbackAsyncService; +import org.dubhe.k8s.utils.K8sCallBackTool; +import org.dubhe.terminal.domain.dto.TerminalK8sPodCallbackCreateDTO; +import org.springframework.validation.annotation.Validated; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestHeader; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import javax.annotation.Resource; + +/** + * @description k8s Pod 异步回调处理类 + * @date 2021-07-15 + */ +@Api(tags = "k8s回调:Pod") +@RestController +@RequestMapping(StringConstant.K8S_CALLBACK_URI) +public class K8sCallbackPodController { + + @Resource(name = "terminalAsyncService") + private PodCallbackAsyncService terminalAsyncService; + + /** + * terminal 服务pod异步回调 + * + * @param k8sToken + * @param k8sPodCallbackReq + * @return + */ + @PostMapping(value = "/terminal") + @ApiOperation("terminal pod 回调") + public DataResponseBody servingPodCallBack(@ApiParam(type = "head") @RequestHeader(name = K8sCallBackTool.K8S_CALLBACK_TOKEN) String k8sToken, + @Validated @RequestBody TerminalK8sPodCallbackCreateDTO k8sPodCallbackReq) { + terminalAsyncService.podCallBack(k8sPodCallbackReq); + return DataResponseFactory.success("terminal服务异步回调处理中"); + } +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/rest/TerminalController.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/rest/TerminalController.java new file mode 100644 index 0000000..d8894af --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/rest/TerminalController.java @@ -0,0 +1,90 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.rest; + +import io.swagger.annotations.Api; +import io.swagger.annotations.ApiOperation; +import org.dubhe.biz.base.annotation.ApiVersion; +import org.dubhe.biz.base.constant.Permissions; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.terminal.domain.dto.TerminalCreateDTO; +import org.dubhe.terminal.domain.dto.TerminalDTO; +import org.dubhe.terminal.domain.dto.TerminalPreserveDTO; +import org.dubhe.terminal.service.TerminalService; +import org.springframework.security.access.prepost.PreAuthorize; +import org.springframework.validation.annotation.Validated; +import org.springframework.web.bind.annotation.*; + +import javax.annotation.Resource; + +/** + * @description 专业版终端 + * @date 2021-07-12 + */ +@Api(tags = "专业版:终端") +@RestController +@ApiVersion(1) +@RequestMapping("/terminals") +public class TerminalController { + @Resource + private TerminalService terminalService; + + @PostMapping("/create") + @ApiOperation("创建") + //@PreAuthorize(Permissions.TERMINAL_CREATE) + public DataResponseBody create(@Validated @RequestBody TerminalCreateDTO terminalCreateDTO) { + return new DataResponseBody(terminalService.create(terminalCreateDTO)); + } + + @PostMapping("/restart") + @ApiOperation("重新启动") + //@PreAuthorize(Permissions.TERMINAL_RESTART) + public DataResponseBody restart(@Validated @RequestBody TerminalCreateDTO terminalCreateDTO) { + return new DataResponseBody(terminalService.restart(terminalCreateDTO)); + } + + @PostMapping("/preserve") + @ApiOperation("保存并停止") + //@PreAuthorize(Permissions.TERMINAL_PRESAVE) + public DataResponseBody preserve(@Validated @RequestBody TerminalPreserveDTO terminalPreserveDTO) { + return new DataResponseBody(terminalService.preserve(terminalPreserveDTO)); + } + + @PostMapping("/delete") + @ApiOperation("删除") + //@PreAuthorize(Permissions.TERMINAL_DELETE) + public DataResponseBody delete(@Validated @RequestBody TerminalDTO terminalDTO) { + return new DataResponseBody(terminalService.delete(terminalDTO)); + } + + @GetMapping("/detail") + @ApiOperation("根据terminalId查询详情") + //@PreAuthorize(Permissions.TERMINAL_DETAIL) + public DataResponseBody detail(@Validated TerminalDTO terminalDTO) { + return new DataResponseBody(terminalService.detail(terminalDTO)); + } + + @GetMapping("/list") + @ApiOperation("连接列表") + //@PreAuthorize(Permissions.TERMINAL_LIST) + public DataResponseBody list() { + return new DataResponseBody(terminalService.list()); + } + + +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/service/TerminalService.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/service/TerminalService.java new file mode 100644 index 0000000..407cba3 --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/service/TerminalService.java @@ -0,0 +1,120 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.service; + +import org.dubhe.terminal.domain.dto.TerminalCreateDTO; +import org.dubhe.terminal.domain.dto.TerminalDTO; +import org.dubhe.terminal.domain.dto.TerminalK8sPodCallbackCreateDTO; +import org.dubhe.terminal.domain.dto.TerminalPreserveDTO; +import org.dubhe.terminal.domain.entity.Terminal; +import org.dubhe.terminal.domain.entity.TerminalInfo; +import org.dubhe.terminal.domain.vo.TerminalVO; + +import java.util.List; + +/** + * @description 专业版终端 + * @date 2021-07-12 + */ +public interface TerminalService { + /** + * 创建 + * + * @param terminalCreateDTO + * @return TerminalVO + */ + TerminalVO create(TerminalCreateDTO terminalCreateDTO); + + /** + * 重启 + * + * @param terminalCreateDTO + * @return TerminalVO + */ + TerminalVO restart(TerminalCreateDTO terminalCreateDTO); + + /** + * 保存并停止 + * + * @param terminalPreserveDTO + * @return boolean + */ + boolean preserve(TerminalPreserveDTO terminalPreserveDTO); + + /** + * 删除 + * + * @param terminalDTO + * @return boolean + */ + boolean delete(TerminalDTO terminalDTO); + + /** + * 查询详情 + * + * @param terminalDTO + * @return + */ + TerminalVO detail(TerminalDTO terminalDTO); + + /** + * 查询列表 + * + * @return + */ + List list(); + + /** + * 刷新 TerminalInfo 状态 + * + * @param id + */ + TerminalInfo refreshTerminalInfoStatus(Long id); + + /** + * 刷新 Terminal 状态 + * + * @param id + * @return + */ + Terminal refreshTerminalStatus(Long id); + + /** + * k8s回调pod在线服务状态 + * + * @param times 回调请求次数 + * @param req 回调请求对象 + * @return boolean 返回是否回调成功 + */ + boolean terminalPodCallback(int times, TerminalK8sPodCallbackCreateDTO req); + + /** + * 推送镜像完成 + * + * @param terminalId + */ + void pushImageComplete(Long terminalId,Long userId); + + /** + * 推送镜像失败 + * + * @param terminalId + * @param message 失败信息 + */ + void pushImageError(Long terminalId,String message,Long userId); +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/service/impl/TerminalPodAsyServiceImpl.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/service/impl/TerminalPodAsyServiceImpl.java new file mode 100644 index 0000000..8723dbd --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/service/impl/TerminalPodAsyServiceImpl.java @@ -0,0 +1,47 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.terminal.service.impl; + +import org.dubhe.biz.log.enums.LogEnum; +import org.dubhe.biz.log.utils.LogUtil; +import org.dubhe.k8s.abstracts.AbstractPodCallback; +import org.dubhe.k8s.domain.dto.BaseK8sPodCallbackCreateDTO; +import org.dubhe.k8s.service.PodCallbackAsyncService; +import org.dubhe.terminal.domain.dto.TerminalK8sPodCallbackCreateDTO; +import org.dubhe.terminal.service.TerminalService; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +@Service(value = "terminalAsyncService") +public class TerminalPodAsyServiceImpl extends AbstractPodCallback implements PodCallbackAsyncService { + @Autowired + private TerminalService terminalService; + + @Override + public boolean doCallback(int times, R k8sPodCallbackCreateDTO) { + TerminalK8sPodCallbackCreateDTO req = (TerminalK8sPodCallbackCreateDTO) k8sPodCallbackCreateDTO; + LogUtil.info(LogEnum.TERMINAL, "Thread {} try {} time.Request: {}", Thread.currentThread(), times, req.toString()); + //在线服务Pod回调 + return terminalService.terminalPodCallback(times, req); + } + + @Override + public void callbackFailed(int retryTimes, R k8sPodCallbackCreateDTO) { + TerminalK8sPodCallbackCreateDTO req = (TerminalK8sPodCallbackCreateDTO) k8sPodCallbackCreateDTO; + LogUtil.info(LogEnum.TERMINAL, "Thread {} try {} times FAILED! if you want to storage or send failed msg,please impl this.. Request: {}", Thread.currentThread(), retryTimes, req.toString()); + } +} diff --git a/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/service/impl/TerminalServiceImpl.java b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/service/impl/TerminalServiceImpl.java new file mode 100644 index 0000000..f120c5c --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/java/org/dubhe/terminal/service/impl/TerminalServiceImpl.java @@ -0,0 +1,703 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.terminal.service.impl; +import java.sql.Timestamp; + +import cn.hutool.core.util.RandomUtil; +import cn.hutool.core.util.StrUtil; +import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; +import com.github.dockerjava.api.DockerClient; +import com.google.common.collect.Maps; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.dubhe.biz.base.constant.ResponseCode; +import org.dubhe.biz.base.constant.SymbolConstant; +import org.dubhe.biz.base.enums.BizEnum; +import org.dubhe.biz.base.exception.BusinessException; +import org.dubhe.biz.base.service.UserContextService; +import org.dubhe.biz.base.utils.StringUtils; +import org.dubhe.biz.file.api.FileStoreApi; +import org.dubhe.biz.log.enums.LogEnum; +import org.dubhe.biz.log.utils.LogUtil; +import org.dubhe.biz.redis.utils.RedisUtils; +import org.dubhe.docker.api.DockerApi; +import org.dubhe.docker.config.DockerClientFactory; +import org.dubhe.docker.enums.DockerOperationEnum; +import org.dubhe.docker.utils.DockerCallbackTool; +import org.dubhe.k8s.api.PodApi; +import org.dubhe.k8s.api.TerminalApi; +import org.dubhe.k8s.cache.ResourceCache; +import org.dubhe.k8s.constant.K8sParamConstants; +import org.dubhe.k8s.domain.bo.PtMountDirBO; +import org.dubhe.k8s.domain.bo.TerminalBO; +import org.dubhe.k8s.domain.resource.BizPod; +import org.dubhe.k8s.domain.resource.BizServicePort; +import org.dubhe.k8s.domain.vo.TerminalResourceVO; +import org.dubhe.k8s.enums.BusinessLabelServiceNameEnum; +import org.dubhe.k8s.enums.PodPhaseEnum; +import org.dubhe.k8s.utils.K8sNameTool; +import org.dubhe.docker.callback.TerminalPushImageResultCallback; +import org.dubhe.terminal.config.TerminalConfig; +import org.dubhe.terminal.constant.TerminalConstant; +import org.dubhe.terminal.dao.PtImageMapper; +import org.dubhe.terminal.dao.TerminalInfoMapper; +import org.dubhe.terminal.dao.TerminalMapper; +import org.dubhe.terminal.domain.dto.TerminalCreateDTO; +import org.dubhe.terminal.domain.dto.TerminalDTO; +import org.dubhe.terminal.domain.dto.TerminalK8sPodCallbackCreateDTO; +import org.dubhe.terminal.domain.dto.TerminalPreserveDTO; +import org.dubhe.terminal.domain.entity.PtImage; +import org.dubhe.terminal.domain.entity.Terminal; +import org.dubhe.terminal.domain.entity.TerminalInfo; +import org.dubhe.terminal.domain.vo.TerminalVO; +import org.dubhe.terminal.enums.TerminalInfoStatusEnum; +import org.dubhe.terminal.enums.TerminalStatusEnum; +import org.dubhe.terminal.service.TerminalService; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.CollectionUtils; + +import java.util.ArrayList; +import java.util.Date; +import java.util.List; + +/** + * @description 专业版终端实现 + * @date 2021-07-12 + */ +@Service +public class TerminalServiceImpl implements TerminalService { + @Autowired + @Qualifier("hostFileStoreApiImpl") + private FileStoreApi fileStoreApi; + + @Autowired + private K8sNameTool k8sNameTool; + + @Autowired + private TerminalConfig terminalConfig; + + @Autowired + private TerminalApi terminalApi; + + @Autowired + private PodApi podApi; + + @Autowired + private DockerApi dockerApi; + + @Autowired + private DockerClientFactory dockerClientFactory; + + @Autowired + private UserContextService userContextService; + + @Autowired + private TerminalMapper terminalMapper; + + @Autowired + private PtImageMapper ptImageMapper; + + @Autowired + private TerminalInfoMapper terminalInfoMapper; + + @Autowired + private DockerCallbackTool dockerCallbackTool; + + @Autowired + private ResourceCache resourceCache; + + @Autowired + private RedisUtils redisUtils; + + @Value("Task:Terminal:"+"${spring.profiles.active}_terminal_id_") + private String terminalIdPrefix; + + private static final String CONN = "Conn"; + + /** + * 创建 + * + * @param terminalCreateDTO + * @return TerminalVO + */ + @Override + @Transactional(rollbackFor = Exception.class) + public TerminalVO create(TerminalCreateDTO terminalCreateDTO) { + try{ + return start(terminalCreateDTO); + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL,"create error : {}",e.getMessage(),e); + throw new BusinessException("内部错误:"+e.getMessage()); + } + } + + /** + * 重启 + * + * @param terminalCreateDTO + * @return TerminalVO + */ + @Override + public TerminalVO restart(TerminalCreateDTO terminalCreateDTO) { + try{ + if (terminalCreateDTO == null || terminalCreateDTO.getId() == null){ + LogUtil.error(LogEnum.TERMINAL,"restart error : {}",terminalCreateDTO); + throw new BusinessException("缺少id"); + } + return start(terminalCreateDTO); + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL,"restart error : {}",e.getMessage(),e); + throw new BusinessException("内部错误:"+e.getMessage()); + } + } + + @Override + public boolean preserve(TerminalPreserveDTO terminalPreserveDTO) { + Terminal terminal = terminalMapper.selectById(terminalPreserveDTO.getId()); + if (terminal == null){ + LogUtil.error(LogEnum.TERMINAL,"preserve terminal 数据不存在 terminalPreserveDTO: {}",terminalPreserveDTO); + throw new BusinessException("数据不存在"); + } + + LambdaQueryWrapper wrapper = new LambdaQueryWrapper<>(); + wrapper.eq(TerminalInfo::getTerminalId, terminalPreserveDTO.getId()); + List terminalInfoList = terminalInfoMapper.selectList(wrapper); + if (CollectionUtils.isEmpty(terminalInfoList)){ + LogUtil.error(LogEnum.TERMINAL,"preserve terminalInfoList 数据不存在 terminalPreserveDTO: {}",terminalPreserveDTO); + throw new BusinessException("数据不存在"); + } + + TerminalInfo masterTerminalInfo = null; + for (TerminalInfo terminalInfo : terminalInfoList) { + if (terminalInfo.isMasterFlag()){ + masterTerminalInfo = terminalInfo; + } + } + if (masterTerminalInfo == null){ + LogUtil.error(LogEnum.TERMINAL,"master 节点不存在 terminalPreserveDTO:{}",terminalPreserveDTO); + throw new BusinessException("master 节点不存在"); + } + + BizPod pod = podApi.getWithResourceName(k8sNameTool.getNamespace(terminal.getCreateUserId()),masterTerminalInfo.getK8sResourceName()); + if (pod == null){ + LogUtil.error(LogEnum.TERMINAL,"master 容器不存在 terminalPreserveDTO:{}",terminalPreserveDTO); + throw new BusinessException("master 容器不存在"); + } + if (!PodPhaseEnum.RUNNING.getPhase().equals(pod.getPhase()) || pod.getPodIp() == null || StringUtils.isNotEmpty(pod.getContainerStateMessages())){ + LogUtil.error(LogEnum.TERMINAL,"master 容器未运行 terminalPreserveDTO:{}",terminalPreserveDTO); + throw new BusinessException("master 容器未运行"); + } + String containerID = pod.getContainerId(); + if (StringUtils.isEmpty(containerID)){ + LogUtil.error(LogEnum.TERMINAL,"master 容器未运行 terminalPreserveDTO:{}",terminalPreserveDTO); + throw new BusinessException("master 容器未运行"); + } + terminal.setStatus(TerminalStatusEnum.SAVING.getCode()); + terminalMapper.updateById(terminal); + DockerClient dockerClient = dockerClientFactory.getDockerClient(pod.getHostIP()); + String newImagePath = terminal.getImageProject()+SymbolConstant.SLASH+userContextService.getCurUserId()+SymbolConstant.SLASH+terminalPreserveDTO.getImageName(); + String newImageRepository = terminalConfig.getHarborAddress()+SymbolConstant.SLASH+newImagePath; + try { + dockerApi.commit(dockerClient,containerID,newImageRepository,terminalPreserveDTO.getImageTag()); + boolean pushResult = dockerApi.push(dockerClient,newImageRepository+SymbolConstant.COLON+terminalPreserveDTO.getImageTag(),new TerminalPushImageResultCallback(dockerCallbackTool.getCallbackUrl(SymbolConstant.LOCAL_HOST,terminalConfig.getServerPort(), DockerOperationEnum.PUSH.getType()),terminal.getId(),dockerClient,userContextService.getCurUserId())); + if (!pushResult){ + LogUtil.error(LogEnum.TERMINAL,"master 推送镜像错误 terminalPreserveDTO:{}",terminalPreserveDTO); + throw new BusinessException("推送镜像错误:"); + } + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL,"master 保存容器错误:{}",e.getMessage(),e); + throw new BusinessException("保存容器错误:"+e.getMessage()); + } + + terminal.setImageUrl(newImagePath+SymbolConstant.COLON+terminalPreserveDTO.getImageTag()); + terminal.setImageName(terminalPreserveDTO.getImageName()); + terminal.setDescription(terminalPreserveDTO.getImageRemark()); + terminal.setImageTag(terminalPreserveDTO.getImageTag()); + terminal.setLastStopTime(new Date()); + terminal.setUpdateInfo(userContextService.getCurUserId()); + + return terminalMapper.updateById(terminal) > 0; + } + + @Override + @Transactional(rollbackFor = Exception.class) + public boolean delete(TerminalDTO terminalDTO) { + try{ + Terminal terminal = terminalMapper.selectById(terminalDTO.getId()); + if (terminal == null){ + LogUtil.error(LogEnum.TERMINAL,"delete 数据不存在 terminalDTO: {}",terminalDTO); + throw new BusinessException("数据不存在"); + } + terminal.setDeleted(true); + terminal.setLastStopTime(new Date()); + terminal.setStatus(TerminalStatusEnum.DELETED.getCode()); + terminalMapper.deleteById(terminal); + // 删除任务缓存 + String taskIdentify = (String) redisUtils.get(terminalIdPrefix + String.valueOf(terminal.getId())); + if (StringUtils.isNotEmpty(taskIdentify)){ + redisUtils.del(taskIdentify, terminalIdPrefix + String.valueOf(terminal.getId())); + } + String namespace = k8sNameTool.getNamespace(terminal.getCreateUserId()); + + LambdaQueryWrapper wrapper = new LambdaQueryWrapper<>(); + wrapper.eq(TerminalInfo::getTerminalId, terminalDTO.getId()); + List terminalInfoList = terminalInfoMapper.selectList(wrapper); + + if (!CollectionUtils.isEmpty(terminalInfoList)){ + for (TerminalInfo terminalInfo : terminalInfoList) { + terminalApi.delete(namespace,terminalInfo.getK8sResourceName()); + terminalInfo.setDeleted(true); + terminalInfoMapper.deleteById(terminalInfo); + } + } + return true; + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL,"detail error : {}",e.getMessage(),e); + throw new BusinessException("内部错误:"+e.getMessage()); + } + } + + /** + * 查询详情 + * + * @param terminalDTO + * @return + */ + @Override + public TerminalVO detail(TerminalDTO terminalDTO) { + try{ + Terminal terminal = terminalMapper.selectById(terminalDTO.getId()); + if (terminal == null){ + LogUtil.error(LogEnum.TERMINAL,"detail 数据不存在 terminalDTO: {}",terminalDTO); + throw new BusinessException("数据不存在"); + } + + LambdaQueryWrapper wrapper = new LambdaQueryWrapper<>(); + wrapper.eq(TerminalInfo::getTerminalId, terminalDTO.getId()); + List terminalInfoList = terminalInfoMapper.selectList(wrapper); + + return new TerminalVO(terminal,terminalInfoList); + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL,"detail error : {}",e.getMessage(),e); + throw new BusinessException("内部错误:"+e.getMessage()); + } + } + + /** + * 查询列表 + * + * @return + */ + @Override + public List list() { + try{ + List terminalVOList = new ArrayList<>(); + + LambdaQueryWrapper wrapper = new LambdaQueryWrapper<>(); + wrapper.eq(Terminal::getCreateUserId, userContextService.getCurUserId()); + List terminalInfoList = terminalMapper.selectList(wrapper); + + if (CollectionUtils.isEmpty(terminalInfoList)){ + return terminalVOList; + } + + for (Terminal terminal : terminalInfoList) { + terminalVOList.add(detail(new TerminalDTO(terminal.getId()))); + } + return terminalVOList; + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL,"list error : {}",e.getMessage(),e); + throw new BusinessException("内部错误:"+e.getMessage()); + } + } + + @Override + @Transactional(rollbackFor = Exception.class) + public TerminalInfo refreshTerminalInfoStatus(Long terminalInfoId) { + try { + if (terminalInfoId == null){ + return null; + } + TerminalInfo terminalInfo = terminalInfoMapper.selectById(terminalInfoId); + if (terminalInfo == null){ + LogUtil.error(LogEnum.TERMINAL,"refreshTerminalInfoStatus no terminalInfo found terminalInfoId:{}",terminalInfoId); + return null; + } + + String namespace = k8sNameTool.getNamespace(terminalInfo.getCreateUserId()); + + TerminalResourceVO terminalResourceVO = terminalApi.get(namespace,terminalInfo.getK8sResourceName()); + if (terminalResourceVO != null && terminalResourceVO.getBizService() != null){ + BizServicePort bizServicePort = terminalResourceVO.getBizService().getServicePortByTargetPort(MagicNumConstant.TWENTY_TWO); + if (bizServicePort != null){ + terminalInfo.setSshPort(bizServicePort.getNodePort()); + } + } + BizPod pod = podApi.getWithResourceName(namespace,terminalInfo.getK8sResourceName()); + if (pod != null){ + terminalInfo.setPodIp(pod.getPodIp()); + } + if (terminalInfo.getSshPort() != null){ + if (StringUtils.isNotEmpty(terminalInfo.getSshUser())){ + terminalInfo.setSsh(StrUtil.format(TerminalConstant.SSH_USER_COMMAND,terminalInfo.getSshPort(),terminalInfo.getSshUser(),terminalConfig.getSshHost())); + }else { + terminalInfo.setSsh(StrUtil.format(TerminalConstant.SSH_COMMAND,terminalInfo.getSshPort(),terminalConfig.getSshHost())); + } + } + terminalInfo.setStatus(TerminalInfoStatusEnum.getCode(pod.getRealPodPhase())); + terminalInfo.setUpdateInfo(userContextService.getCurUserId()); + terminalInfoMapper.updateById(terminalInfo); + return terminalInfo; + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL,"refreshStatus error : {}",e.getMessage(),e); + return null; + } + } + + @Override + public Terminal refreshTerminalStatus(Long id) { + try { + if (id == null){ + return null; + } + Terminal terminal = terminalMapper.selectById(id); + if (terminal == null){ + LogUtil.error(LogEnum.TERMINAL,"refreshTerminalStatus no terminal found id:{}",id); + return null; + } + + LambdaQueryWrapper wrapper = new LambdaQueryWrapper<>(); + wrapper.eq(TerminalInfo::getTerminalId, terminal.getId()); + List terminalInfoList = terminalInfoMapper.selectList(wrapper); + + Integer runningNode = 0; + for (TerminalInfo terminalInfo : terminalInfoList) { + TerminalInfo refreshTerminalInfo = refreshTerminalInfoStatus(terminalInfo.getId()); + if (refreshTerminalInfo != null && TerminalInfoStatusEnum.RUNNING.getCode().equals(refreshTerminalInfo.getStatus())){ + ++runningNode; + } + } + terminal.setRunningNode(runningNode); + terminal.setUpdateInfo(userContextService.getCurUserId()); + + terminalMapper.updateById(terminal); + return terminal; + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL,"refreshStatus error : {}",e.getMessage(),e); + return null; + } + } + + @Override + @Transactional(rollbackFor = Exception.class) + public boolean terminalPodCallback(int times, TerminalK8sPodCallbackCreateDTO req) { + LogUtil.info(LogEnum.TERMINAL,"terminalPodCallback times:{} req:{}",times,req); + try { + LambdaQueryWrapper wrapper = new LambdaQueryWrapper<>(); + wrapper.eq(TerminalInfo::getK8sResourceName, req.getResourceName()); + TerminalInfo terminalInfo = terminalInfoMapper.selectOne(wrapper); + if (terminalInfo == null){ + LogUtil.error(LogEnum.TERMINAL,"terminalPodCallback no terminalInfo found k8sResourceName:{}",req.getResourceName()); + return false; + } + + //修改状态 + if (StringUtils.isEmpty(req.getMessages())){ + terminalInfo.removeStatusDetail(req.getResourceName()); + }else { + terminalInfo.putStatusDetail(req.getResourceName(),req.getMessages()); + } + terminalInfo.setStatus(TerminalInfoStatusEnum.getCode(req.getPhase())); + terminalInfo.setUpdateInfo(userContextService.getCurUserId()); + terminalInfoMapper.updateById(terminalInfo); + refreshTerminalInfoStatus(terminalInfo.getId()); + + Terminal terminal = terminalMapper.selectById(terminalInfo.getTerminalId()); + if (TerminalInfoStatusEnum.RUNNING.getCode().equals(terminalInfo.getStatus()) && !TerminalInfoStatusEnum.RUNNING.getCode().equals(req.getPhase())){ + if (terminal.getRunningNode() > MagicNumConstant.ZERO){ + terminal.setRunningNode(terminal.getRunningNode() - MagicNumConstant.ONE); + }else { + terminal.setRunningNode(MagicNumConstant.ZERO); + } + } + if (!TerminalInfoStatusEnum.RUNNING.getCode().equals(terminalInfo.getStatus()) && TerminalInfoStatusEnum.RUNNING.getCode().equals(req.getPhase())){ + terminal.setRunningNode(terminal.getRunningNode()+MagicNumConstant.ONE); + } + terminal.setUpdateInfo(userContextService.getCurUserId()); + //只修改节点运行计数,不修改状态 + terminal.setStatus(null); + terminalMapper.updateById(terminal); + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL,"terminalPodCallback error : {}",e.getMessage(),e); + } + return true; + } + + /** + * 推送镜像完成 + * + * @param terminalId + */ + @Override + public void pushImageComplete(Long terminalId,Long userId) { + try{ + LogUtil.info(LogEnum.TERMINAL,"pushImageComplete id:{}",terminalId); + if (terminalId == null){ + return; + } + Terminal terminal = terminalMapper.selectById(terminalId); + if (terminal == null){ + LogUtil.error(LogEnum.TERMINAL,"pushImageComplete no terminal found id:{}",terminalId); + return; + } + stop(userId,terminalId); + if (TerminalStatusEnum.SAVING.getCode().equals(terminal.getStatus())){ + terminal.setStatus(TerminalStatusEnum.DELETED.getCode()); + terminalMapper.updateById(terminal); + + LambdaQueryWrapper wrapper = new LambdaQueryWrapper<>(); + wrapper.eq(PtImage::getImageUrl, terminal.getImageUrl()); + PtImage ptImage = ptImageMapper.selectOne(wrapper); + if (ptImage == null){ + ptImage = new PtImage(); + } + + ptImage.setImageName(terminal.getImageName()); + ptImage.setImageUrl(terminal.getImageUrl()); + ptImage.setImageTag(terminal.getImageTag()); + ptImage.setRemark(terminal.getDescription()); + ptImage.setProjectName(terminal.getImageProject()); + ptImage.setImageResource(MagicNumConstant.ZERO); + ptImage.setImageStatus(MagicNumConstant.ONE); + ptImage.setDeleted(false); + ptImage.setUpdateUserId(userId); + ptImage.setUpdateTime(new Timestamp(new java.util.Date().getTime())); + + if (ptImage.getId() != null){ + ptImageMapper.updateById(ptImage); + }else { + ptImage.setOriginUserId(userId); + ptImage.setCreateUserId(userId); + ptImage.setCreateTime(new Timestamp(new java.util.Date().getTime())); + ptImageMapper.insert(ptImage); + } + } + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL,"pushImageComplete error : {}",e.getMessage(),e); + } + } + + /** + * 推送镜像失败 + * + * @param terminalId + * @param message 失败信息 + */ + @Override + public void pushImageError(Long terminalId, String message,Long userId) { + try{ + LogUtil.info(LogEnum.TERMINAL,"pushImageError id:{}",terminalId); + if (terminalId == null){ + return; + } + stop(userId,terminalId); + + Terminal terminal = terminalMapper.selectById(terminalId); + if (terminal == null){ + LogUtil.error(LogEnum.TERMINAL,"pushImageError no terminal found id:{}",terminalId); + return; + } + terminal.setStatus(TerminalStatusEnum.FAILED.getCode()); + terminal.putStatusDetail("镜像推送失败",message); + terminalMapper.updateById(terminal); + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL,"pushImageError error : {}",e.getMessage(),e); + } + } + + /** + * 构建TerminalBO + * + * @param terminalCreateDTO + * @param terminalInfo + * @param namespace 命名空间 + * @return TerminalBO + */ + private TerminalBO buildTerminalBO(TerminalCreateDTO terminalCreateDTO,TerminalInfo terminalInfo,String namespace, String taskIdentifyLabel){ + TerminalBO terminalBO = new TerminalBO(); + terminalBO.setNamespace(namespace); + terminalBO.setResourceName(terminalInfo.getK8sResourceName()); + terminalBO.setReplicas(1); + terminalBO.setGpuNum(terminalInfo.getGpuNum()); + terminalBO.setMemNum(terminalInfo.getMemNum()); + terminalBO.setCpuNum(terminalInfo.getCpuNum()); + terminalBO.setImage(terminalConfig.getHarborAddress()+SymbolConstant.SLASH+terminalCreateDTO.getImageUrl()); + terminalBO.setFsMounts(Maps.newHashMap()); + terminalBO.setBusinessLabel(BusinessLabelServiceNameEnum.TERMINAL.getBusinessLabel()); + terminalBO.setTaskIdentifyLabel(taskIdentifyLabel); + terminalBO.addPort(MagicNumConstant.TWENTY_TWO); + terminalBO.addPorts(terminalCreateDTO.getPorts()); + terminalBO.setCmdLines(terminalCreateDTO.getCmdLines()); + if (terminalCreateDTO.getDataSourcePath() != null){ + String dataSetDir = fileStoreApi.getRootDir() + fileStoreApi.getBucket().substring(1)+terminalCreateDTO.getDataSourcePath(); + terminalBO.putfsMounts(TerminalConstant.DATASET_VOLUME_MOUNTS,new PtMountDirBO(dataSetDir,true)); + } + if (terminalInfo.getDiskMemNum() != null){ + String workspaceDir = fileStoreApi.getRootDir() + fileStoreApi.getBucket().substring(1)+ terminalConfig.getTerminalDir()+SymbolConstant.SLASH+userContextService.getCurUserId()+SymbolConstant.SLASH+terminalConfig.getWorkspaceDir(); + terminalBO.putfsMounts(TerminalConstant.WORKSPACE_VOLUME_MOUNTS,new PtMountDirBO(workspaceDir,terminalInfo.getDiskMemNum()+ K8sParamConstants.MEM_UNIT,terminalInfo.getDiskMemNum()+ K8sParamConstants.MEM_UNIT,false)); + } + return terminalBO; + } + + /** + * 部署服务 + * + * @param terminalCreateDTO + * @return + */ + private TerminalVO start(TerminalCreateDTO terminalCreateDTO){ + try{ + LogUtil.info(LogEnum.BIZ_K8S, "TerminalService create terminalCreateDTO:{}", terminalCreateDTO); + String k8sResourceName = k8sNameTool.generateResourceName(BizEnum.TERMINAL, RandomUtil.randomString(MagicNumConstant.FIVE)); + String namespace = ""; + String sshUser = terminalCreateDTO.getSshUser(); + String sshPassword = terminalCreateDTO.getSshPwd(); + + if (terminalCreateDTO.getId() == null){ + namespace = k8sNameTool.getNamespace(userContextService.getCurUser()); + }else { + Terminal oldTerminal = terminalMapper.selectById(terminalCreateDTO.getId()); + namespace = k8sNameTool.getNamespace(oldTerminal.getCreateUserId()); + } + + Terminal terminal = new Terminal(); + terminal.setId(terminalCreateDTO.getId()); + terminal.setName(StringUtils.isEmpty(terminalCreateDTO.getName())?CONN+ SymbolConstant.HYPHEN +k8sResourceName:terminalCreateDTO.getName()); + terminal.setImageName(terminalCreateDTO.getImageName()); + terminal.setImageTag(terminalCreateDTO.getImageTag()); + terminal.setImageUrl(terminalCreateDTO.getImageUrl()); + terminal.setDataSourceName(terminalCreateDTO.getDataSourceName()); + terminal.setDataSourcePath(terminalCreateDTO.getDataSourcePath()); + terminal.setTotalNode(terminalCreateDTO.getTotalNode()); + terminal.setDescription(terminalCreateDTO.getDescription()); + terminal.setSameInfo(terminalCreateDTO.isSameInfo()); + terminal.setLastStartTime(new Date()); + terminal.setStatus(TerminalStatusEnum.RUNNING.getCode()); + + //terminal数据入库 + if (terminal.getId() == null){ + terminal.setOriginUserId(userContextService.getCurUserId()); + terminal.setCreateUserId(userContextService.getCurUserId()); + terminalMapper.insert(terminal); + }else { + terminal.setUpdateInfo(userContextService.getCurUserId()); + terminalMapper.updateById(terminal); + } + + //复用ssh 用户名 密码 + if (terminal.getId() != null){ + List terminalInfoList = terminalInfoMapper.selectByTerminalId(terminal.getId()); + if (!CollectionUtils.isEmpty(terminalInfoList)){ + sshUser = StringUtils.isEmpty(sshUser) ? terminalInfoList.get(0).getSshUser():sshUser; + sshPassword = StringUtils.isEmpty(sshPassword) ? terminalInfoList.get(0).getSshPassword():sshPassword; + } + } + + List terminalInfoList = new ArrayList<>(); + + if (CollectionUtils.isEmpty(terminalCreateDTO.getInfo())){ + LogUtil.error(LogEnum.TERMINAL,"start 未填写节点规格 terminalCreateDTO:{}",terminalCreateDTO); + throw new BusinessException(ResponseCode.ERROR, "未填写节点规格!"); + } + + if (terminalCreateDTO.isSameInfo()){ + for (int i = 0;i < terminalCreateDTO.getTotalNode();i++){ + //第一个默认为主节点 + TerminalInfo terminalInfo = terminalCreateDTO.getInfo().get(0).toTerminalInfo(terminal.getId(),k8sResourceName+i,userContextService.getCurUserId(),sshUser,sshPassword); + if (i == 0){ + terminalInfo.setMasterFlag(true); + } + terminalInfoList.add(terminalInfo); + } + }else { + for (int i = 0;i < terminalCreateDTO.getInfo().size();i++){ + //第一个默认为主节点 + TerminalInfo terminalInfo = terminalCreateDTO.getInfo().get(i).toTerminalInfo(terminal.getId(),k8sResourceName+i,userContextService.getCurUserId(),sshUser,sshPassword); + if (i == 0){ + terminalInfo.setMasterFlag(true); + } + terminalInfoList.add(terminalInfo); + } + } + terminalInfoMapper.deleteByTerminalId(terminal.getId()); + //terminalInfo 数据入库 + for (TerminalInfo terminalInfo : terminalInfoList) { + if (terminalInfo.getId() == null){ + terminalInfo.setCreateUserId(userContextService.getCurUserId()); + terminalInfoMapper.insert(terminalInfo); + }else { + terminalInfo.setUpdateInfo(userContextService.getCurUserId()); + terminalInfoMapper.updateById(terminalInfo); + } + } + //获取任务识别标识 + String taskIdentify = resourceCache.getTaskIdentify(terminal.getId(),terminal.getName(),terminalIdPrefix); + //启动k8s服务 + for (TerminalInfo terminalInfo : terminalInfoList) { + TerminalBO terminalBO = buildTerminalBO(terminalCreateDTO,terminalInfo,namespace, taskIdentify); + terminalApi.create(terminalBO); + } + + return detail(new TerminalDTO(terminal.getId())); + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL,"create error : {}",e.getMessage(),e); + throw new BusinessException("内部错误:"+e.getMessage()); + } + } + + /** + * 停止服务 + * + * @param userId 用户id + * @param terminalId + */ + private void stop(Long userId,Long terminalId){ + try{ + LogUtil.info(LogEnum.TERMINAL, "TerminalService stop userId {} terminalId {}", userId,terminalId); + if (terminalId == null){ + return; + } + LambdaQueryWrapper wrapper = new LambdaQueryWrapper<>(); + wrapper.eq(TerminalInfo::getTerminalId, terminalId); + List terminalInfoList = terminalInfoMapper.selectList(wrapper); + + String namespace = k8sNameTool.getNamespace(userId); + + if (!CollectionUtils.isEmpty(terminalInfoList)){ + for (TerminalInfo terminalInfo : terminalInfoList) { + terminalApi.delete(namespace,terminalInfo.getK8sResourceName()); + } + } + }catch (Exception e){ + LogUtil.error(LogEnum.TERMINAL,"stop error : {}",e.getMessage(),e); + } + } +} diff --git a/dubhe-server/dubhe-terminal/src/main/resources/bootstrap.yml b/dubhe-server/dubhe-terminal/src/main/resources/bootstrap.yml new file mode 100644 index 0000000..5614294 --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/main/resources/bootstrap.yml @@ -0,0 +1,31 @@ +server: + port: 8970 + +spring: + application: + name: dubhe-terminal + profiles: + active: dev + cloud: + nacos: + config: + enabled: true + server-addr: 10.105.1.133:8848 + namespace: 9a185fa2-991b-4465-bd4d-25f9f079c930 + shared-configs[0]: + data-id: common-biz.yaml + group: dubhe + refresh: true # 是否动态刷新,默认为false + shared-configs[1]: + data-id: common-k8s.yaml + group: dubhe + refresh: true + shared-configs[2]: + data-id: dubhe-terminal.yaml + group: dubhe + refresh: true + discovery: + enabled: true + namespace: 9a185fa2-991b-4465-bd4d-25f9f079c930 + group: dubhe + server-addr: 10.105.1.133:8848 diff --git a/dubhe-server/dubhe-terminal/src/test/java/service/TerminalServiceTest.java b/dubhe-server/dubhe-terminal/src/test/java/service/TerminalServiceTest.java new file mode 100644 index 0000000..fe899ee --- /dev/null +++ b/dubhe-server/dubhe-terminal/src/test/java/service/TerminalServiceTest.java @@ -0,0 +1,58 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package service; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + +import org.dubhe.terminal.TerminalApplication; +import org.dubhe.terminal.domain.dto.TerminalCreateDTO; +import org.dubhe.terminal.domain.dto.TerminalInfoDTO; +import org.dubhe.terminal.service.TerminalService; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.junit4.SpringRunner; + +/** + * @description TerminalService测试类 + * @date 2020-07-20 + */ +@SpringBootTest(classes= TerminalApplication.class) +@RunWith(SpringRunner.class) +public class TerminalServiceTest { + @Autowired + private TerminalService terminalService; + + @Test + public void create() { + TerminalCreateDTO terminalCreateDTO = new TerminalCreateDTO(); + terminalCreateDTO.setName("terminal-test"); + terminalCreateDTO.setDataSourceName("测试数据集"); + terminalCreateDTO.setDataSourcePath("dataset/2/versionFile/V0001/ofrecord/train"); + terminalCreateDTO.setImageTag("oneflow-0.1.102-py36-0713"); + terminalCreateDTO.setImageName("jupyterlab"); + terminalCreateDTO.setImageUrl("notebook/jupyterlab:oneflow-0.1.102-py36-0713"); + terminalCreateDTO.setTotalNode(2); + terminalCreateDTO.setDescription("terminal-test"); + terminalCreateDTO.setSameInfo(true); + terminalCreateDTO.setPorts(Sets.newHashSet(80,443)); + terminalCreateDTO.setInfo(Lists.newArrayList(new TerminalInfoDTO(null,1,0,1024,1024))); + terminalService.create(terminalCreateDTO); + } +} diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/async/TrainJobAsync.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/async/TrainJobAsync.java index e3fa0db..8e3aa1a 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/async/TrainJobAsync.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/async/TrainJobAsync.java @@ -214,10 +214,13 @@ public class TrainJobAsync { .setMasterCmd(wholeCommand) .setMemNum(baseTrainJobDTO.getMemNum()) .setCpuNum(baseTrainJobDTO.getCpuNum() * MagicNumConstant.ONE_THOUSAND) - .putFsMounts(TrainConstant.DATASET_VOLUME_MOUNTS, k8sNameTool.getAbsolutePath(baseTrainJobDTO.getDataSourcePath())) .putFsMounts(TrainConstant.WORKSPACE_VOLUME_MOUNTS, fileStoreApi.formatPath(fileStoreApi.getRootDir() + basePath)) .putFsMounts(TrainConstant.MODEL_VOLUME_MOUNTS, k8sNameTool.getAbsolutePath(relativePath + StrUtil.SLASH + trainJobConfig.getOutPath())) - .setBusinessLabel(k8sNameTool.getPodLabel(BizEnum.ALGORITHM)); + .setBusinessLabel(k8sNameTool.getPodLabel(BizEnum.ALGORITHM)) + .setTaskIdentifyLabel(baseTrainJobDTO.getTaskIdentify()); + if (StringUtils.isNotBlank(baseTrainJobDTO.getDataSourcePath())) { + distributeTrainBO.putFsMounts(TrainConstant.DATASET_VOLUME_MOUNTS, k8sNameTool.getAbsolutePath(baseTrainJobDTO.getDataSourcePath())); + } if (StringUtils.isNotBlank(valDataSourcePath)) { distributeTrainBO.putFsMounts(trainJobConfig.getDockerValDatasetPath(), fileStoreApi.formatPath(fileStoreApi.getRootDir() + fileStoreApi.getBucket() + valDataSourcePath)); } @@ -391,13 +394,34 @@ public class TrainJobAsync { jobBo.setNamespace(namespace) .setName(baseTrainJobDTO.getJobName()) .setImage(ptImageAndAlgorithmVO.getImageName()) - .putFsMounts(trainJobConfig.getDockerDatasetPath(), fileStoreApi.getRootDir() + fileStoreApi.getBucket().substring(1) + baseTrainJobDTO.getDataSourcePath()) .setCmdLines(list) .putFsMounts(trainJobConfig.getDockerTrainPath(), fileStoreApi.getRootDir() + commonPath.substring(1)) - .setBusinessLabel(k8sNameTool.getPodLabel(BizEnum.ALGORITHM)); + .setBusinessLabel(k8sNameTool.getPodLabel(BizEnum.ALGORITHM)) + .setTaskIdentifyLabel(baseTrainJobDTO.getTaskIdentify()); + if (StringUtils.isNotBlank(baseTrainJobDTO.getDataSourcePath())) { + jobBo.putFsMounts(trainJobConfig.getDockerDatasetPath(), fileStoreApi.getRootDir() + fileStoreApi.getBucket().substring(1) + baseTrainJobDTO.getDataSourcePath()); + } if (StringUtils.isNotBlank(valDataSourcePath)) { jobBo.putFsMounts(trainJobConfig.getDockerValDatasetPath(), fileStoreApi.formatPath(fileStoreApi.getRootDir() + fileStoreApi.getBucket() + valDataSourcePath)); } + //挂载pip路径 + if (StringUtils.isNotBlank(baseTrainJobDTO.getPipSitePackagePath())) { + String formatPath = fileStoreApi.formatPath(fileStoreApi.getRootDir() + fileStoreApi.getBucket() + baseTrainJobDTO.getPipSitePackagePath()); + jobBo.putFsMounts(trainJobConfig.getDockerPipSitePackagePath(), formatPath); + //检测pip包依赖路径 + int startIndex = -1; + List cmdLines = jobBo.getCmdLines(); + for (int i = 0; i < cmdLines.size(); i++) { + //bash -c 这种情况 + if ("-c".equals(cmdLines.get(i))) { + startIndex = i; + } + } + String cmdLine = cmdLines.get(startIndex + 1); + String appendPythonPath = " export PYTHONPATH=" + trainJobConfig.getDockerPipSitePackagePath() + " && "; + cmdLine = appendPythonPath + cmdLine; + cmdLines.set(startIndex + 1, cmdLine); + } //延时启动,单位为分钟 if (baseTrainJobDTO.getDelayCreateTime() != null && baseTrainJobDTO.getDelayCreateTime() > 0) { jobBo.setDelayCreateTime(baseTrainJobDTO.getDelayCreateTime() * MagicNumConstant.SIXTY); diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/client/NoteBookClient.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/client/NoteBookClient.java new file mode 100644 index 0000000..a50c4de --- /dev/null +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/client/NoteBookClient.java @@ -0,0 +1,45 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.train.client; + +import org.dubhe.biz.base.constant.ApplicationNameConst; +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.base.vo.NoteBookVO; +import org.dubhe.train.client.fallback.NoteBookClientFallback; +import org.springframework.cloud.openfeign.FeignClient; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; + +/** + * @description Notebook远程服务调用接口 + * @date 2021-08-12 + */ +@FeignClient(value = ApplicationNameConst.SERVER_NOTEBOOK, contextId = "notebookClient", fallback = NoteBookClientFallback.class) +public interface NoteBookClient { + + /** + * 根据Id查询所有数据 + * + * @param id + * @return NoteBookVO notebook详情 + */ + @GetMapping("/notebooks/detail/{id}") + DataResponseBody getNoteBook(@PathVariable("id") Long id); + +} + + diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/client/fallback/NoteBookClientFallback.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/client/fallback/NoteBookClientFallback.java new file mode 100644 index 0000000..65e4d89 --- /dev/null +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/client/fallback/NoteBookClientFallback.java @@ -0,0 +1,33 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.train.client.fallback; + +import org.dubhe.biz.base.vo.DataResponseBody; +import org.dubhe.biz.base.vo.NoteBookVO; +import org.dubhe.biz.dataresponse.factory.DataResponseFactory; +import org.dubhe.train.client.NoteBookClient; + +/** + * @description Notebook远程调用熔断类 + * @date 2021-08-12 + */ +public class NoteBookClientFallback implements NoteBookClient { + @Override + public DataResponseBody getNoteBook(Long id) { + return DataResponseFactory.failed("call dubhe-notebook server selectById error"); + } +} diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/config/TrainJobConfig.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/config/TrainJobConfig.java index 926a241..39b5302 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/config/TrainJobConfig.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/config/TrainJobConfig.java @@ -74,6 +74,8 @@ public class TrainJobConfig { private String dockerValDatasetPath; + private String dockerPipSitePackagePath; + private String loadValDatasetKey; private String dockerVisualizedLogPath; diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/dao/PtTrainJobMapper.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/dao/PtTrainJobMapper.java index ac9471e..8fbc361 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/dao/PtTrainJobMapper.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/dao/PtTrainJobMapper.java @@ -1,12 +1,12 @@ /** * Copyright 2020 Tianshu AI Platform. All Rights Reserved. - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,7 +20,6 @@ package org.dubhe.train.dao; import com.baomidou.mybatisplus.core.mapper.BaseMapper; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import org.apache.ibatis.annotations.Param; -import org.apache.ibatis.annotations.Select; import org.dubhe.biz.base.annotation.DataPermission; import org.dubhe.train.domain.entity.PtTrainJob; import org.dubhe.train.domain.vo.PtTrainVO; @@ -29,33 +28,23 @@ import org.dubhe.train.domain.vo.PtTrainVO; * @description 训练作业job Mapper 接口 * @date 2020-04-27 */ -@DataPermission(ignoresMethod = {"insert","selectCountByStatus","getPageTrain"}) +@DataPermission(ignoresMethod = {"insert", "getPageTrain"}) public interface PtTrainJobMapper extends BaseMapper { - /** - * 获取训练列表,并进行分页。 - * - * @param page 页 - * @param createUserId 用户id - * @param trainStatus 训练状态 - * @param trainName 训练名称 - * @param sort 排序字段 - * @param order 排序方式 - * - * @return PtTrainVO - */ - Page getPageTrain(Page page, @Param("createUserId") Long createUserId, - @Param("trainStatus") Integer trainStatus, @Param("trainName") String trainName, @Param("sort") String sort, - @Param("order") String order); - - /** - * 根据状态进行统计数量 + /** + * 获取训练列表,并进行分页。 + * + * @param page 页 + * @param createUserId 用户id + * @param trainStatus 训练状态 + * @param trainName 训练名称 + * @param sort 排序字段 + * @param order 排序方式 * - * @param userId 当前用户id - * @param param sql片段 - * @return 统计的数量 + * @return PtTrainVO */ - @Select("select count(1) from pt_train_job t1 inner join pt_train t2 on t1.train_id = t2.id where t1.create_user_id= #{userId} and t1.train_status in ${param} and t1.deleted= 0 and t2.deleted = 0 ") - Integer selectCountByStatus(@Param("userId") Long userId, @Param("param") String param); + Page getPageTrain(Page page, @Param("createUserId") Long createUserId, + @Param("trainStatus") Integer trainStatus, @Param("trainName") String trainName, @Param("sort") String sort, + @Param("order") String order); } diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/BaseImageDTO.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/BaseImageDTO.java index 8750127..33f0964 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/BaseImageDTO.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/BaseImageDTO.java @@ -20,7 +20,6 @@ import io.swagger.annotations.ApiModelProperty; import lombok.Data; import lombok.experimental.Accessors; -import javax.validation.constraints.NotBlank; import java.io.Serializable; /** @@ -34,11 +33,9 @@ public class BaseImageDTO implements Serializable { private static final long serialVersionUID = 1L; @ApiModelProperty(value = "镜像版本", required = true) - @NotBlank(message = "镜像版本不能为空") private String imageTag; @ApiModelProperty(value = "镜像名称", required = true) - @NotBlank(message = "镜像名称不能为空") private String imageName; } \ No newline at end of file diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/BaseTrainJobDTO.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/BaseTrainJobDTO.java index 9a9835e..b583343 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/BaseTrainJobDTO.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/BaseTrainJobDTO.java @@ -35,6 +35,7 @@ public class BaseTrainJobDTO implements Serializable { private JSONObject runParams; private String jobName; + private String taskIdentify; private String dataSourcePath; private String trainModelPath; private String trainOutPath; @@ -94,4 +95,9 @@ public class BaseTrainJobDTO implements Serializable { * 模型路径 */ private String modelLoadPathDir; + + /** + * pip包路径 + */ + private String pipSitePackagePath; } diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainJobBaseDTO.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainJobBaseDTO.java new file mode 100644 index 0000000..0f5c2f0 --- /dev/null +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainJobBaseDTO.java @@ -0,0 +1,47 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.train.domain.dto; + +import io.swagger.annotations.ApiModelProperty; +import lombok.Data; +import lombok.experimental.Accessors; +import org.dubhe.biz.base.constant.MagicNumConstant; +import org.hibernate.validator.constraints.Length; + +import javax.validation.constraints.NotBlank; + +/** + * @description 创建训练或者修改训练的基础数据包 + * @date 2021-08-19 + */ +@Data +@Accessors(chain = true) +public class PtTrainJobBaseDTO extends BaseImageDTO { + + @ApiModelProperty(value = "Notebook Id") + private Long notebookId; + + @ApiModelProperty(value = "算法来源id") + private Long algorithmId; + + @ApiModelProperty(value = "运行命令,输入长度不能超过128个字符", required = true) + @NotBlank(message = "运行命令不能为空") + @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_EIGHT, message = "运行命令-输入长度不能超过128个字符") + private String runCommand; + +} diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainJobCreateDTO.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainJobCreateDTO.java index 0dcbd22..4b399df 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainJobCreateDTO.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainJobCreateDTO.java @@ -35,7 +35,7 @@ import javax.validation.constraints.*; @EqualsAndHashCode(callSuper = true) @Data @Accessors(chain = true) -public class PtTrainJobCreateDTO extends BaseImageDTO { +public class PtTrainJobCreateDTO extends PtTrainJobBaseDTO { @ApiModelProperty(value = "训练作业名, 长度在1-32个字符", required = true) @NotNull(message = "训练作业名不能为空") @@ -47,23 +47,19 @@ public class PtTrainJobCreateDTO extends BaseImageDTO { @Length(max = MagicNumConstant.INTEGER_TWO_HUNDRED_AND_FIFTY_FIVE, message = "描述长度不能超过255个字符") private String description; - @ApiModelProperty(value = "算法来源id", required = true) - @NotNull(message = "algorithmId不能为空") - @Min(value = MagicNumConstant.ONE, message = "algorithmId必须不小于1") - private Long algorithmId; + @ApiModelProperty("算法用途,输入长度不能超过128个字符") + @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_EIGHT, message = "算法用途-输入长度不能超过128个字符") + private String algorithmUsage; - @ApiModelProperty(value = "运行命令,输入长度不能超过128个字符", required = true) - @NotBlank(message = "运行命令不能为空") - @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_EIGHT, message = "运行命令-输入长度不能超过128个字符") - private String runCommand; + @ApiModelProperty("验证数据集算法用途,输入长度不能超过128个字符") + @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_EIGHT, message = "验证数据集算法用途-输入长度不能超过128个字符") + private String valAlgorithmUsage; - @ApiModelProperty(value = "数据来源名称, 长度在1-127个字符", required = true) - @NotNull(message = "数据来源名称不能为空") + @ApiModelProperty(value = "数据来源名称, 长度在1-127个字符") @Length(min = MagicNumConstant.ONE, max = MagicNumConstant.ONE_HUNDRED_TWENTY_SEVEN, message = "数据来源名称长度在1-127个字符") private String dataSourceName; - @ApiModelProperty(value = "数据来源路径, 长度在1-127个字符", required = true) - @NotNull(message = "数据来源路径不能为空") + @ApiModelProperty(value = "数据来源路径, 长度在1-127个字符") @Length(min = MagicNumConstant.ONE, max = MagicNumConstant.ONE_HUNDRED_TWENTY_SEVEN, message = "数据来源路径长度在1-127个字符") private String dataSourcePath; @@ -171,4 +167,5 @@ public class PtTrainJobCreateDTO extends BaseImageDTO { @Length(max = MagicNumConstant.INTEGER_TWO_HUNDRED_AND_FIFTY_FIVE, message = "学生模型长度不能超过255个字符") @Pattern(regexp = TrainUtil.REGEXP_IDS_STRING, message = "学生模型ids参数格式不正确") private String studentModelIds; + } diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainJobUpdateDTO.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainJobUpdateDTO.java index 6133c5f..2edea15 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainJobUpdateDTO.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainJobUpdateDTO.java @@ -35,7 +35,7 @@ import javax.validation.constraints.*; @EqualsAndHashCode(callSuper = true) @Data @Accessors(chain = true) -public class PtTrainJobUpdateDTO extends BaseImageDTO { +public class PtTrainJobUpdateDTO extends PtTrainJobBaseDTO { @ApiModelProperty(value = "id", required = true) @NotNull(message = "id不能为null") @@ -46,23 +46,19 @@ public class PtTrainJobUpdateDTO extends BaseImageDTO { @Length(max = MagicNumConstant.INTEGER_TWO_HUNDRED_AND_FIFTY_FIVE, message = "描述长度不能超过255个字符") private String description; - @ApiModelProperty("算法id") - @NotNull(message = "algorithmId不能为空") - @Min(value = MagicNumConstant.ONE, message = "algorithmId必须大于1") - private Long algorithmId; + @ApiModelProperty("算法用途,输入长度不能超过128个字符") + @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_EIGHT, message = "算法用途-输入长度不能超过128个字符") + private String algorithmUsage; - @ApiModelProperty(value = "运行命令,输入长度不能超过128个字符", required = true) - @NotBlank(message = "运行命令不能为空") - @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_EIGHT, message = "运行命令-输入长度不能超过128个字符") - private String runCommand; + @ApiModelProperty("验证数据集算法用途,输入长度不能超过128个字符") + @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_EIGHT, message = "验证数据集算法用途-输入长度不能超过128个字符") + private String valAlgorithmUsage; - @ApiModelProperty(value = "数据集来源路径,输入长度不能超过127个字符", required = true) - @NotBlank(message = "数据集来源路径不能为空") + @ApiModelProperty(value = "数据集来源路径,输入长度不能超过127个字符") @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_SEVEN, message = "数据集来源路径-输入长度不能超过127个字符") private String dataSourcePath; - @ApiModelProperty(value = "数据集来源名称,输入长度不能超过127个字符", required = true) - @NotBlank(message = "数据集来源名称不能为空") + @ApiModelProperty(value = "数据集来源名称,输入长度不能超过127个字符") @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_SEVEN, message = "数据集来源名称-输入长度不能超过127个字符") private String dataSourceName; diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainParamCreateDTO.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainParamCreateDTO.java index dfe0ca4..37b573f 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainParamCreateDTO.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainParamCreateDTO.java @@ -52,18 +52,24 @@ public class PtTrainParamCreateDTO extends BaseImageDTO { @Min(value = MagicNumConstant.ONE, message = "算法id不能小于1") private Long algorithmId; + @ApiModelProperty("算法用途,输入长度不能超过128个字符") + @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_EIGHT, message = "算法用途-输入长度不能超过128个字符") + private String algorithmUsage; + + @ApiModelProperty("验证数据集算法用途,输入长度不能超过128个字符") + @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_EIGHT, message = "验证数据集算法用途-输入长度不能超过128个字符") + private String valAlgorithmUsage; + @ApiModelProperty(value = "运行命令,输入长度不能超过128个字符", required = true) @NotBlank(message = "运行命令不能为空") @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_EIGHT, message = "运行命令-输入长度不能超过128个字符") private String runCommand; - @ApiModelProperty(value = "数据集来源路径,输入长度不能超过127个字符", required = true) - @NotBlank(message = "数据集来源路径不能为空") + @ApiModelProperty(value = "数据集来源路径,输入长度不能超过127个字符") @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_SEVEN, message = "数据集来源路径-输入长度不能超过127个字符") private String dataSourcePath; - @ApiModelProperty(value = "数据集来源名称,输入长度不能超过127个字符", required = true) - @NotBlank(message = "数据集来源名称不能为空") + @ApiModelProperty(value = "数据集来源名称,输入长度不能超过127个字符") @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_SEVEN, message = "数据集来源名称-输入长度不能超过127个字符") private String dataSourceName; diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainParamUpdateDTO.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainParamUpdateDTO.java index 8e81459..53d0c04 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainParamUpdateDTO.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/dto/PtTrainParamUpdateDTO.java @@ -57,18 +57,24 @@ public class PtTrainParamUpdateDTO extends BaseImageDTO { @Min(value = MagicNumConstant.ONE, message = "算法id不能小于1") private Long algorithmId; + @ApiModelProperty("算法用途,输入长度不能超过128个字符") + @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_EIGHT, message = "算法用途-输入长度不能超过128个字符") + private String algorithmUsage; + + @ApiModelProperty("验证数据集算法用途,输入长度不能超过128个字符") + @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_EIGHT, message = "验证数据集算法用途-输入长度不能超过128个字符") + private String valAlgorithmUsage; + @ApiModelProperty(value = "运行命令,输入长度不能超过128个字符", required = true) @NotBlank(message = "运行命令不能为空") @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_EIGHT, message = "运行命令-输入长度不能超过128个字符") private String runCommand; - @ApiModelProperty(value = "数据集来源路径,输入长度不能超过127个字符", required = true) - @NotBlank(message = "数据集来源路径不能为空") + @ApiModelProperty(value = "数据集来源路径,输入长度不能超过127个字符") @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_SEVEN, message = "数据集来源路径-输入长度不能超过127个字符") private String dataSourcePath; - @ApiModelProperty(value = "数据集来源名称,输入长度不能超过127个字符", required = true) - @NotBlank(message = "数据集来源名称不能为空") + @ApiModelProperty(value = "数据集来源名称,输入长度不能超过127个字符") @Length(max = MagicNumConstant.ONE_HUNDRED_TWENTY_SEVEN, message = "数据集来源名称-输入长度不能超过127个字符") private String dataSourceName; diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/entity/PtJobParam.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/entity/PtJobParam.java index 6db4c0b..90671c6 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/entity/PtJobParam.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/entity/PtJobParam.java @@ -18,10 +18,7 @@ package org.dubhe.train.domain.entity; import com.alibaba.fastjson.JSONObject; -import com.baomidou.mybatisplus.annotation.IdType; -import com.baomidou.mybatisplus.annotation.TableField; -import com.baomidou.mybatisplus.annotation.TableId; -import com.baomidou.mybatisplus.annotation.TableName; +import com.baomidou.mybatisplus.annotation.*; import com.baomidou.mybatisplus.extension.handlers.FastjsonTypeHandler; import lombok.Data; import lombok.EqualsAndHashCode; @@ -58,6 +55,18 @@ public class PtJobParam extends BaseEntity { @TableField(value = "algorithm_id") private Long algorithmId; + /** + * 算法用途 + */ + @TableField(value = "algorithm_usage") + private String algorithmUsage; + + /** + * 验证数据集算法用途 + */ + @TableField(value = "val_algorithm_usage") + private String valAlgorithmUsage; + /** * 运行命令 */ @@ -112,4 +121,15 @@ public class PtJobParam extends BaseEntity { @TableField(value = "delay_delete_time") private Timestamp delayDeleteTime; + /** + * notebookId + */ + @TableField(value = "notebook_id") + private Long notebookId; + + /** + * notebook名称 + */ + @TableField(value = "notebook_name") + private String notebookName; } diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/entity/PtTrainParam.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/entity/PtTrainParam.java index 0c1ff2c..2b034bb 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/entity/PtTrainParam.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/entity/PtTrainParam.java @@ -57,6 +57,18 @@ public class PtTrainParam extends BaseEntity { @TableField(value = "algorithm_id") private Long algorithmId; + /** + * 算法用途 + */ + @TableField(value = "algorithm_usage") + private String algorithmUsage; + + /** + * 验证数据集算法用途 + */ + @TableField(value = "val_algorithm_usage") + private String valAlgorithmUsage; + /** * 运行命令 */ diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtImageAndAlgorithmVO.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtImageAndAlgorithmVO.java index 929918d..9ba50aa 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtImageAndAlgorithmVO.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtImageAndAlgorithmVO.java @@ -17,6 +17,7 @@ package org.dubhe.train.domain.vo; +import com.fasterxml.jackson.annotation.JsonIgnore; import io.swagger.annotations.ApiModelProperty; import lombok.Data; import lombok.experimental.Accessors; @@ -36,9 +37,6 @@ public class PtImageAndAlgorithmVO implements Serializable { @ApiModelProperty("镜像名称") private String imageName; - @ApiModelProperty("镜像地址") - private String imageUrl; - @ApiModelProperty("代码目录") private String codeDir; @@ -54,4 +52,8 @@ public class PtImageAndAlgorithmVO implements Serializable { @ApiModelProperty("输出可视化日志") private Boolean isVisualizedLog; + @JsonIgnore + @ApiModelProperty(value = "pip包路径",hidden = true) + private String pipSitePackagePath; + } diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtTrainJobDetailQueryVO.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtTrainJobDetailQueryVO.java index 28f84d7..9fcb6a2 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtTrainJobDetailQueryVO.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtTrainJobDetailQueryVO.java @@ -22,7 +22,6 @@ import io.swagger.annotations.ApiModelProperty; import lombok.Data; import lombok.experimental.Accessors; -import java.io.Serializable; import java.sql.Timestamp; /** @@ -31,9 +30,7 @@ import java.sql.Timestamp; */ @Data @Accessors(chain = true) -public class PtTrainJobDetailQueryVO implements Serializable { - - private static final long serialVersionUID = 1L; +public class PtTrainJobDetailQueryVO extends PtTrainJobDetailVO { @ApiModelProperty("训练作业名") private String trainName; @@ -143,6 +140,9 @@ public class PtTrainJobDetailQueryVO implements Serializable { @ApiModelProperty("算法用途") private String algorithmUsage; + @ApiModelProperty("验证数据集算法用途") + private String valAlgorithmUsage; + @ApiModelProperty("算法精度") private String accuracy; diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtTrainJobDetailVO.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtTrainJobDetailVO.java index 3861b81..e4f6fc1 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtTrainJobDetailVO.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtTrainJobDetailVO.java @@ -140,6 +140,9 @@ public class PtTrainJobDetailVO implements Serializable { @ApiModelProperty("算法用途") private String algorithmUsage; + @ApiModelProperty("验证数据集算法用途") + private String valAlgorithmUsage; + @ApiModelProperty("算法精度") private String accuracy; @@ -149,7 +152,7 @@ public class PtTrainJobDetailVO implements Serializable { @ApiModelProperty(value = "算法文件路径") private String algorithmCodeDir; - @ApiModelProperty("训练类型") + @ApiModelProperty("训练类型 0:普通训练,1:分布式训练") private Integer trainType; @ApiModelProperty("验证数据来源名称") @@ -187,4 +190,10 @@ public class PtTrainJobDetailVO implements Serializable { @ApiModelProperty(value = "炼知学生模型ids,多个id之前用','隔开") private String studentModelIds; -} + + @ApiModelProperty(value = "notebook名称") + private String notebookName; + + @ApiModelProperty(value = "notebookId") + private Long notebookId; +} \ No newline at end of file diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtTrainParamQueryVO.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtTrainParamQueryVO.java index 0bd4c9a..e2b603b 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtTrainParamQueryVO.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/domain/vo/PtTrainParamQueryVO.java @@ -34,8 +34,6 @@ import java.sql.Timestamp; @Data public class PtTrainParamQueryVO extends BaseVO implements Serializable { - private static final long serialVersionUID = 1L; - @ApiModelProperty("任务参数ID") private Long id; @@ -66,6 +64,12 @@ public class PtTrainParamQueryVO extends BaseVO implements Serializable { @ApiModelProperty("算法来源(1为我的算法,2为预置算法)") private Integer algorithmSource; + @ApiModelProperty("算法用途") + private String algorithmUsage; + + @ApiModelProperty("验证数据集算法用途") + private String valAlgorithmUsage; + @ApiModelProperty("数据来源路径") private String dataSourcePath; diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/rest/PtTrainJobController.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/rest/PtTrainJobController.java index 41f288f..e6ca0d3 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/rest/PtTrainJobController.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/rest/PtTrainJobController.java @@ -145,6 +145,14 @@ public class PtTrainJobController { return new DataResponseBody(ptTrainJobService.stopTrainJob(ptTrainJobStopDTO)); } + @PostMapping("/batchStop") + @ApiOperation("一键停止所有训练任务") + @PreAuthorize(Permissions.TRAINING_JOB_UPDATE) + public DataResponseBody batchStopTrainJob() { + ptTrainJobService.batchStopTrainJob(); + return new DataResponseBody(); + } + @PostMapping("/resume") @ApiOperation("恢复训练任务") @PreAuthorize(Permissions.TRAINING_JOB_UPDATE) diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/service/PtTrainJobService.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/service/PtTrainJobService.java index b02010d..79b8582 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/service/PtTrainJobService.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/service/PtTrainJobService.java @@ -148,4 +148,10 @@ public interface PtTrainJobService { * @return Map 可视化训练列表及分页信息 */ Map getVisualTrainList(VisualTrainQueryDTO visualTrainQueryDTO); + + /** + * 一键停止所有训练job + * + */ + void batchStopTrainJob(); } diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/service/impl/PtTrainJobServiceImpl.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/service/impl/PtTrainJobServiceImpl.java index c2a2b7d..f097ca6 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/service/impl/PtTrainJobServiceImpl.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/service/impl/PtTrainJobServiceImpl.java @@ -21,6 +21,7 @@ import cn.hutool.core.bean.BeanUtil; import cn.hutool.core.io.FileUtil; import cn.hutool.core.util.StrUtil; import com.alibaba.fastjson.JSONObject; +import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; import com.baomidou.mybatisplus.core.conditions.update.UpdateWrapper; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; @@ -46,11 +47,12 @@ import org.dubhe.biz.log.enums.LogEnum; import org.dubhe.biz.log.utils.LogUtil; import org.dubhe.biz.permission.annotation.DataPermissionMethod; import org.dubhe.biz.permission.base.BaseService; -import org.dubhe.biz.permission.util.SqlUtil; +import org.dubhe.biz.redis.utils.RedisUtils; import org.dubhe.k8s.api.DistributeTrainApi; import org.dubhe.k8s.api.PersistentVolumeClaimApi; import org.dubhe.k8s.api.PodApi; import org.dubhe.k8s.api.TrainJobApi; +import org.dubhe.k8s.cache.ResourceCache; import org.dubhe.k8s.domain.PtBaseResult; import org.dubhe.k8s.domain.resource.BizPod; import org.dubhe.k8s.utils.K8sNameTool; @@ -178,6 +180,18 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { @Autowired private UserContextService userContextService; + @Autowired + private RedisUtils redisUtils; + + @Autowired + private ResourceCache resourceCache; + + @Autowired + private NoteBookClient noteBookClient; + + @Value("Task:Train:" + "${spring.profiles.active}_train_job_id_") + private String trainIdPrefix; + public final static List FIELD_NAMES; static { @@ -187,38 +201,26 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { /** * 作业列表展示 * - * @param ptTrainQueryDTO 查询作业列表参数 + * @param ptTrainQueryDTO 查询作业列表参数 * @return Map 作业列表分页信息 **/ @Override @DataPermissionMethod(dataType = DatasetTypeEnum.PUBLIC) public Map getTrainJob(@NonNull PtTrainQueryDTO ptTrainQueryDTO) { - Page pageTrainResult; Page page = ptTrainQueryDTO.toPage(); - String order; - String sort; - try { - //排序方式 - order = StringConstant.SORT_ASC.equalsIgnoreCase(ptTrainQueryDTO.getOrder()) ? StringConstant.SORT_ASC : StringConstant.SORT_DESC; - //排序字段 - String sortField = FIELD_NAMES.contains(ptTrainQueryDTO.getSort()) ? ptTrainQueryDTO.getSort() : StringConstant.ID; - sort = StringUtils.humpToLine(sortField); - //设置管理员可以查询所有数据 - Long userId = userContextService.getCurUserId(); - if (BaseService.isAdmin(userContextService.getCurUser())) { - userId = null; - } - pageTrainResult = ptTrainJobMapper.getPageTrain(page, userId, ptTrainQueryDTO.getTrainStatus(), ptTrainQueryDTO.getTrainName(), sort, order); - } catch (Exception e) { - LogUtil.error(LogEnum.BIZ_TRAIN, " ptTrainQueryDTO is {},query job list shows exception {}", ptTrainQueryDTO, e); - throw new BusinessException("查询作业列表展示异常"); - } + //排序方式 + String order = StringConstant.SORT_ASC.equalsIgnoreCase(ptTrainQueryDTO.getOrder()) ? StringConstant.SORT_ASC : StringConstant.SORT_DESC; + //排序字段 + String sortField = FIELD_NAMES.contains(ptTrainQueryDTO.getSort()) ? ptTrainQueryDTO.getSort() : StringConstant.ID; + String sort = StringUtils.humpToLine(sortField); + //设置管理员可以查询所有数据 + Long userId = userContextService.getCurUserId(); + if (BaseService.isAdmin(userContextService.getCurUser())) { + userId = null; + } + Page pageTrainResult = ptTrainJobMapper.getPageTrain(page, userId, ptTrainQueryDTO.getTrainStatus(), ptTrainQueryDTO.getTrainName(), sort, order); List trainResult = pageTrainResult.getRecords(); - if (CollectionUtils.isNotEmpty(trainResult)) { - LogUtil.info(LogEnum.BIZ_TRAIN, "The user {} query job list is displayed and the result is as follows {}.", userContextService.getCurUser().getUsername(), trainResult); - } return PageUtil.toPage(page, trainResult); - } /** @@ -255,23 +257,36 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { queryJobParamWrapper.in("train_job_id", jobIds); //找出所有训练参数 List ptJobParams = ptJobParamMapper.selectList(queryJobParamWrapper); - Set algorithmIds = null; + List ptTrainAlgorithms = null; if (CollectionUtils.isNotEmpty(ptJobParams)) { - algorithmIds = ptJobParams.stream().map(PtJobParam::getAlgorithmId).collect(Collectors.toSet()); + Set algorithmIds = ptJobParams.stream().map(PtJobParam::getAlgorithmId).filter(x -> x != null).collect(Collectors.toSet()); + ptTrainAlgorithms = selectAllBatchIds(algorithmIds); + } + //获取训练信息 + PtTrain ptTrain = ptTrainMapper.selectById(ptTrainJobVersionQueryDTO.getTrainId()); + //结果集处理 + return getTrainJobDetail(ptTrainJobs, ptJobParams, ptTrainAlgorithms, ptTrain); + } + + /** + * 查询算法 + * + * @param algorithmIds 算法id集合 + * @return + */ + public List selectAllBatchIds(Set algorithmIds) { + if (CollectionUtils.isEmpty(algorithmIds)) { + return Collections.emptyList(); } TrainAlgorithmSelectAllBatchIdDTO trainAlgorithmSelectAllBatchIdDTO = new TrainAlgorithmSelectAllBatchIdDTO(); trainAlgorithmSelectAllBatchIdDTO.setIds(algorithmIds); DataResponseBody> dataResponseBody = algorithmClient.selectAllBatchIds(trainAlgorithmSelectAllBatchIdDTO); - List ptTrainAlgorithms = null; if (dataResponseBody.succeed()) { - ptTrainAlgorithms = dataResponseBody.getData(); + return dataResponseBody.getData(); + } else { + LogUtil.info(LogEnum.BIZ_TRAIN, "Fail to query algorithm. data response body is {}", dataResponseBody); + throw new BusinessException("算法服务调用失败,请稍后重试~"); } - //获取训练信息 - PtTrain ptTrain = ptTrainMapper.selectById(ptTrainJobVersionQueryDTO.getTrainId()); - //结果集处理 - List list = getTrainJobDetail(ptTrainJobs, ptJobParams, ptTrainAlgorithms, ptTrain); - LogUtil.info(LogEnum.BIZ_TRAIN, "User {} query different version of job list display completed, return result {}", userContextService.getCurUser().getUsername(), list); - return list; } /** @@ -284,67 +299,94 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { * @return List 训练版本查询详情集合 */ private List getTrainJobDetail(List ptTrainJobs, List ptJobParams, List ptTrainAlgorithms, PtTrain ptTrain) { + Map algorithmMap = new HashedMap<>(); + + if (CollectionUtils.isNotEmpty(ptTrainAlgorithms)) { + ptTrainAlgorithms.forEach(x -> algorithmMap.put(x.getId(), x)); + } + List list = new ArrayList<>(); - Map jobParamMap = new HashedMap<>(); + Map jobParamMap = new HashedMap<>(); + ptTrainJobs.forEach(x -> { PtTrainJobDetailVO ptTrainJobDetailVO = new PtTrainJobDetailVO(); BeanUtil.copyProperties(x, ptTrainJobDetailVO); list.add(ptTrainJobDetailVO); - jobParamMap.put(x.getId(), list.size()); + jobParamMap.put(x.getId(), ptTrainJobDetailVO); }); + ptJobParams.forEach(x -> { - PtTrainJobDetailVO ptTrainJobDetailVO = list.get(jobParamMap.get(x.getTrainJobId()) - 1); + PtTrainJobDetailVO ptTrainJobDetailVO = jobParamMap.get(x.getTrainJobId()); if (null != ptTrainJobDetailVO) { - ptTrainJobDetailVO.setAlgorithmId(x.getAlgorithmId()).setRunCommand(x.getRunCommand()).setImageName(x.getImageName()) + ptTrainJobDetailVO.setAlgorithmId(x.getAlgorithmId()) + .setAlgorithmUsage(x.getAlgorithmUsage()) + .setValAlgorithmUsage(x.getValAlgorithmUsage()) + .setRunCommand(x.getRunCommand()) + .setImageName(x.getImageName()) .setRunParams(x.getRunParams()) - .setParamF1(x.getParamF1()).setParamCallback(x.getParamCallback()) - .setParamPrecise(x.getParamPrecise()).setParamAccuracy(x.getParamAccuracy()); + .setParamF1(x.getParamF1()) + .setParamCallback(x.getParamCallback()) + .setNotebookId(x.getNotebookId()) + .setNotebookName(x.getNotebookName()) + .setParamPrecise(x.getParamPrecise()) + .setParamAccuracy(x.getParamAccuracy()); long nowTime = System.currentTimeMillis(); //获取训练延时启动倒计时(分钟) - if (x.getDelayCreateTime() != null && nowTime < x.getDelayCreateTime().getTime() && TrainJobStatusEnum.checkRunStatus(ptTrainJobDetailVO.getTrainStatus())) { + if (x.getDelayCreateTime() != null + && nowTime < x.getDelayCreateTime().getTime() + && TrainJobStatusEnum.checkRunStatus(ptTrainJobDetailVO.getTrainStatus())) { ptTrainJobDetailVO.setDelayCreateCountDown(TrainUtil.getCountDown(x.getDelayCreateTime().getTime())); } //获取训练自动停止倒计时(分钟) - if (x.getDelayDeleteTime() != null && nowTime < x.getDelayDeleteTime().getTime() && TrainJobStatusEnum.checkRunStatus(ptTrainJobDetailVO.getTrainStatus())) { + if (x.getDelayDeleteTime() != null + && nowTime < x.getDelayDeleteTime().getTime() + && TrainJobStatusEnum.checkRunStatus(ptTrainJobDetailVO.getTrainStatus())) { ptTrainJobDetailVO.setDelayDeleteCountDown(TrainUtil.getCountDown(x.getDelayDeleteTime().getTime())); } - //image信息拼装 - if (StringUtils.isNotBlank(x.getImageName())) { - String imageNameSuffix = x.getImageName().substring(x.getImageName().lastIndexOf(StrUtil.SLASH) + MagicNumConstant.ONE); - String[] imageNameSuffixArray = imageNameSuffix.split(StrUtil.COLON); - ptTrainJobDetailVO.setImageName(imageNameSuffixArray[0]); - ptTrainJobDetailVO.setImageTag(imageNameSuffixArray[1]); - } + buildImageAndTagInfo(x, ptTrainJobDetailVO); } }); - Map algorithmMap = new HashedMap<>(); - ptTrainAlgorithms.forEach(x -> algorithmMap.put(x.getId(), x)); - for (PtTrainJobDetailVO ptTrainJobDetailVO : list) { + ptTrainJobDetailVO.setTrainName(ptTrain.getTrainName()); TrainAlgorithmQureyVO ptTrainAlgorithm = algorithmMap.get(ptTrainJobDetailVO.getAlgorithmId()); if (null != ptTrainAlgorithm) { ptTrainJobDetailVO.setAlgorithmName(ptTrainAlgorithm.getAlgorithmName()) .setAlgorithmSource(ptTrainAlgorithm.getAlgorithmSource()) - .setAlgorithmUsage(ptTrainAlgorithm.getAlgorithmUsage()) .setAccuracy(ptTrainAlgorithm.getAccuracy()) .setP4InferenceSpeed(ptTrainAlgorithm.getP4InferenceSpeed()); + //1为我的算法,2为预置算法 if (ptTrainAlgorithm.getAlgorithmSource() == MagicNumConstant.ONE) { ptTrainJobDetailVO.setAlgorithmCodeDir(ptTrainAlgorithm.getCodeDir()); } } } - list.forEach(x -> x.setTrainName(ptTrain.getTrainName())); return list; } + /** + * 构建镜像信息 + * + * @param ptJobParam + * @param ptTrainJobDetailVO + */ + public void buildImageAndTagInfo(PtJobParam ptJobParam, PtTrainJobDetailVO ptTrainJobDetailVO) { + //image信息拼装 + if (StringUtils.isNotBlank(ptJobParam.getImageName())) { + String imageNameSuffix = ptJobParam.getImageName().substring(ptJobParam.getImageName().lastIndexOf(StrUtil.SLASH) + MagicNumConstant.ONE); + String[] imageNameSuffixArray = imageNameSuffix.split(StrUtil.COLON); + ptTrainJobDetailVO.setImageName(imageNameSuffixArray[0]); + ptTrainJobDetailVO.setImageTag(imageNameSuffixArray[1]); + } + } + /** * 校验请求不同版本job所传参数是否合法 * - * @param trainId 训练ID + * @param trainId 训练ID */ private void checkTrainId(Long trainId) { if (null == trainId || trainId < 1) { @@ -356,6 +398,27 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { } } + /** + * 获取notebook + * + * @param id + * @return + */ + private NoteBookVO getNoteBook(Long id) { + DataResponseBody dataResponseBody = noteBookClient.getNoteBook(id); + if (dataResponseBody.succeed()) { + NoteBookVO data = dataResponseBody.getData(); + if (data == null) { + LogUtil.info(LogEnum.BIZ_TRAIN, "There is no such notebook, id is ", id); + throw new BusinessException("无此NoteBook"); + } + return dataResponseBody.getData(); + } else { + LogUtil.info(LogEnum.BIZ_TRAIN, "NoteBook service unreachable! Msg is {}", dataResponseBody.getMsg()); + throw new BusinessException("NoteBook服务调用失败,请稍后重试~"); + } + } + /** * 创建训练job * @@ -366,21 +429,18 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { @Transactional(rollbackFor = Exception.class) @DataPermissionMethod(dataType = DatasetTypeEnum.PUBLIC) public List createTrainJobVersion(PtTrainJobCreateDTO ptTrainJobCreateDTO) { - LogUtil.info(LogEnum.BIZ_TRAIN, "User {} creates a training job and receives {} as an argument", userContextService.getCurUser().getUsername(), ptTrainJobCreateDTO); + + validatePtTrainJobCreateDTO(ptTrainJobCreateDTO); // 判断当前trainName是否已经存在 checkTrainName(ptTrainJobCreateDTO.getTrainName(), userContextService.getCurUserId()); + // 校验trainParamName是否存在 if (ptTrainJobCreateDTO.getSaveParams() != null && ptTrainJobCreateDTO.getSaveParams()) { checkTrainParamName(ptTrainJobCreateDTO, userContextService.getCurUserId()); // 保存任务参数到数据库 saveParamToDb(ptTrainJobCreateDTO, userContextService.getCurUser()); } - // 获取镜像和算法目录 - PtImageAndAlgorithmVO ptImageAndAlgorithmVO = getPtImageByAlgorithmId(ptTrainJobCreateDTO.getAlgorithmId()); - //使用用户创建训练时提供的镜像与运行命令 - String images = imageUtil.getImageUrl(ptTrainJobCreateDTO, userContextService.getCurUser()); - ptImageAndAlgorithmVO.setImageName(trainHarborConfig.getAddress() + StrUtil.SLASH + images).setRunCommand(ptTrainJobCreateDTO.getRunCommand()); //jobKey String trainKey = KeyUtil.generateTrainKey(userContextService.getCurUserId()); @@ -390,19 +450,106 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { //生成k8s 的job名称 String jobName = trainKey + trainJobConfig.getSeparator() + version; + // 获取镜像和算法目录 + PtImageAndAlgorithmVO ptImageAndAlgorithmVO = buildPtImageAndAlgorithmVO(ptTrainJobCreateDTO); + + //生成任务识别标识 + String taskIdentify = StringUtils.getUUID(); BaseTrainJobDTO baseTrainJobDTO = new BaseTrainJobDTO(); BeanUtil.copyProperties(ptTrainJobCreateDTO, baseTrainJobDTO); - baseTrainJobDTO.setJobName(jobName).setTrainJobSpecsName(ptTrainJobCreateDTO.getTrainJobSpecsName()).setResourcesPoolType(ptTrainJobCreateDTO.getResourcesPoolType()) - .setCpuNum(ptTrainJobCreateDTO.getCpuNum()).setGpuNum(ptTrainJobCreateDTO.getGpuNum()).setMemNum(ptTrainJobCreateDTO.getMemNum()).setWorkspaceRequest(ptTrainJobCreateDTO.getWorkspaceRequest()); - //保存用户自定义imageName - String userImageName = images.split(StrUtil.SLASH)[0] + StrUtil.SLASH + ptTrainJobCreateDTO.getImageName() + StrUtil.COLON + ptTrainJobCreateDTO.getImageTag(); + baseTrainJobDTO.setJobName(jobName) + .setPipSitePackagePath(ptImageAndAlgorithmVO.getPipSitePackagePath()) + .setTrainJobSpecsName(ptTrainJobCreateDTO.getTrainJobSpecsName()) + .setResourcesPoolType(ptTrainJobCreateDTO.getResourcesPoolType()) + .setCpuNum(ptTrainJobCreateDTO.getCpuNum()) + .setGpuNum(ptTrainJobCreateDTO.getGpuNum()) + .setMemNum(ptTrainJobCreateDTO.getMemNum()) + .setWorkspaceRequest(ptTrainJobCreateDTO.getWorkspaceRequest()) + .setTaskIdentify(taskIdentify); + + //例如: 将harbor.dubhe.ai/notebook/notebook:v1 去掉 harbor地址 + String userImageName = trimHarborAddress(ptImageAndAlgorithmVO.getImageName()); //结果集处理 PtTrainJob ptTrainJob = saveTrainJobTableData(ptTrainJobCreateDTO, userContextService.getCurUser(), userImageName, trainKey, baseTrainJobDTO); + //添加任务缓存 + resourceCache.addTaskCache(taskIdentify, ptTrainJob.getTrainId(), ptTrainJobCreateDTO.getTrainName(), trainIdPrefix); // 提交job asyncManager.execute(baseTrainJobDTO, userContextService.getCurUserId(), ptImageAndAlgorithmVO, ptTrainJob); return Collections.singletonList(ptTrainJob.getTrainId()); } + /** + * 去掉harbor地址 + * + * @param imageName + * @return + */ + private String trimHarborAddress(String imageName) { + return StringUtils.isBlank(imageName) ? StringUtils.EMPTY : imageName.replace(trainHarborConfig.getAddress() + StrUtil.SLASH, StringUtils.EMPTY); + } + + /** + * 构建镜像和算法目录VO 考虑到无算法创建 + * + * @param ptTrainJobBaseDTO + * @return + */ + private PtImageAndAlgorithmVO buildPtImageAndAlgorithmVO(PtTrainJobBaseDTO ptTrainJobBaseDTO) { + PtImageAndAlgorithmVO ptImageAndAlgorithmVO; + //没有算法id则以notebook为主 + if (ptTrainJobBaseDTO.getAlgorithmId() == null) { + ptImageAndAlgorithmVO = new PtImageAndAlgorithmVO(); + //notebook 信息 + NoteBookVO noteBook = getNoteBook(ptTrainJobBaseDTO.getNotebookId()); + ptImageAndAlgorithmVO.setPipSitePackagePath(noteBook.getPipSitePackagePath()); + + ptImageAndAlgorithmVO.setImageName(noteBook.getK8sImageName()); + ptImageAndAlgorithmVO.setIsTrainOut(true); + ptImageAndAlgorithmVO.setIsTrainModelOut(true); + //默认可视化输出不输出 python 文件中可以不用接受这个参数 + ptImageAndAlgorithmVO.setIsVisualizedLog(false); + ptImageAndAlgorithmVO.setCodeDir(noteBook.getK8sPvcPath()); + } else { + //使用用户创建训练时提供的镜像与运行命令 + String imageUrl = imageUtil.getImageUrl(ptTrainJobBaseDTO, userContextService.getCurUser()); + String userImageName = imageUrl.split(StrUtil.SLASH)[0] + StrUtil.SLASH + ptTrainJobBaseDTO.getImageName() + StrUtil.COLON + ptTrainJobBaseDTO.getImageTag(); + ptImageAndAlgorithmVO = getPtImageByAlgorithmId(ptTrainJobBaseDTO.getAlgorithmId()); + String imageName = trainHarborConfig.getAddress() + StrUtil.SLASH + userImageName; + ptImageAndAlgorithmVO.setImageName(imageName); + + } + ptImageAndAlgorithmVO.setRunCommand(ptTrainJobBaseDTO.getRunCommand()); + return ptImageAndAlgorithmVO; + } + + /** + * 参数校验 + * + * @param ptTrainJobCreateDTO + */ + private void validatePtTrainJobCreateDTO(PtTrainJobCreateDTO ptTrainJobCreateDTO) { + if (ptTrainJobCreateDTO.getAlgorithmId() == null && ptTrainJobCreateDTO.getNotebookId() == null) { + LogUtil.error(LogEnum.BIZ_TRAIN, "Neither algorithm's id nor notebook's id can be null at the same time"); + throw new BusinessException("算法ID或者notebookId不能同时为空!"); + } + //带算法创建(非notebook发起训练)时校验参数 + if (ptTrainJobCreateDTO.getNotebookId() == null) { + validateCreateTrainJobWithAlgorithm(ptTrainJobCreateDTO); + } + } + + /** + * 参数校验 + * + * @param ptTrainJobCreateDTO + */ + private void validateCreateTrainJobWithAlgorithm(PtTrainJobCreateDTO ptTrainJobCreateDTO) { + if (ptTrainJobCreateDTO.getAlgorithmId() == null) { + LogUtil.error(LogEnum.BIZ_TRAIN, "Algorithm id is null"); + throw new BusinessException("算法ID不能为空~"); + } + } + /** * 保存训练任务数据 * @@ -429,9 +576,17 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { //检查模型是否合法,合法则保存其路径地址 checkModelAndSavePath(currentUser, baseTrainJobDTO); + // 保存job参数 + PtJobParam ptJobParam = new PtJobParam(); + // 添加train_job表 PtTrainJob ptTrainJob = new PtTrainJob(); BeanUtil.copyProperties(ptTrainJobCreateDTO, ptTrainJob); + if (ptTrainJobCreateDTO.getNotebookId() != null) { + NoteBookVO noteBook = getNoteBook(ptTrainJobCreateDTO.getNotebookId()); + ptJobParam.setNotebookName(noteBook.getNoteBookName()); + ptJobParam.setNotebookId(noteBook.getId()); + } ptTrainJob.setTrainId(ptTrain.getId()) .setTrainVersion(trainJobConfig.getVersionLabel().toUpperCase() + String.format(TrainUtil.FOUR_DECIMAL, MagicNumConstant.ONE)) .setJobName(baseTrainJobDTO.getJobName()) @@ -442,14 +597,21 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { throw new BusinessException("内部错误"); } - // 保存job参数 - PtJobParam ptJobParam = new PtJobParam(); + ptJobParam.setTrainJobId(ptTrainJob.getId()) .setAlgorithmId(ptTrainJobCreateDTO.getAlgorithmId()) .setRunCommand(ptTrainJobCreateDTO.getRunCommand()) .setImageName(imageName) .setRunParams(ptTrainJobCreateDTO.getRunParams()) .setCreateUserId(currentUser.getId()); + //保存算法用途 + if (ptTrainJobCreateDTO.getAlgorithmUsage() != null) { + ptJobParam.setAlgorithmUsage(ptTrainJobCreateDTO.getAlgorithmUsage()); + } + //保存验证数据集算法用途 + if (ptTrainJobCreateDTO.getValAlgorithmUsage() != null) { + ptJobParam.setValAlgorithmUsage(ptTrainJobCreateDTO.getValAlgorithmUsage()); + } //保存训练延时启动时间 if (ptTrainJobCreateDTO.getDelayCreateTime() != null && ptTrainJobCreateDTO.getDelayCreateTime() > 0) { ptJobParam.setDelayCreateTime(TrainUtil.getDelayTime(ptTrainJobCreateDTO.getDelayCreateTime())); @@ -473,8 +635,8 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { /** * 检查模型是否合法,合法则保存其路径地址 * - * @param currentUser 用户 - * @param baseTrainJobDTO 基础训练参数 + * @param currentUser 用户 + * @param baseTrainJobDTO 基础训练参数 */ private void checkModelAndSavePath(UserContext currentUser, BaseTrainJobDTO baseTrainJobDTO) { @@ -589,7 +751,7 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { /** * 调整模型地址 * - * @param modelUrl 模型地址 + * @param modelUrl 模型地址 * @return 模型地址 */ private String adjustmentUrl(String modelUrl) { @@ -619,8 +781,8 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { PtTrainParam ptTrainParam = new PtTrainParam(); BeanUtil.copyProperties(ptTrainJobCreateDTO, ptTrainParam); //获取镜像url - String images = imageUtil.getImageUrl(ptTrainJobCreateDTO, currentUser); - ptTrainParam.setImageName(images); + String image = imageUtil.getImageUrl(ptTrainJobCreateDTO, currentUser); + ptTrainParam.setImageName(image); ptTrainParam.setParamName(ptTrainJobCreateDTO.getTrainParamName()) .setDescription(ptTrainJobCreateDTO.getTrainParamDesc()) .setRunParams(ptTrainJobCreateDTO.getRunParams()) @@ -635,8 +797,7 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { /** * 获取镜像和算法目录 * - * @param algorithmId 算法ID - * @param userId 用户ID + * @param algorithmId 算法 * @return PtImageAndAlgorithmVO 镜像 */ private PtImageAndAlgorithmVO getPtImageByAlgorithmId(Long algorithmId) { @@ -711,35 +872,45 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { /** * 修改训练job * - * @param ptTrainJobUpdateDTO 修改训练job参数 + * @param ptTrainJobUpdateDTO 修改训练job参数 * @return List id集合 **/ @Override @Transactional(rollbackFor = Exception.class) @DataPermissionMethod(dataType = DatasetTypeEnum.PUBLIC) public List updateTrainJob(PtTrainJobUpdateDTO ptTrainJobUpdateDTO) { + if (ptTrainJobUpdateDTO.getNotebookId() == null && ptTrainJobUpdateDTO.getAlgorithmId() == null) { + LogUtil.error(LogEnum.BIZ_TRAIN, "Neither algorithm's id nor notebook's id can be null at the same time"); + throw new BusinessException("算法ID或者notebookId不能同时为空!"); + } PtTrainJob existPtTrainJob = ptTrainJobMapper.selectById(ptTrainJobUpdateDTO.getId()); if (null == existPtTrainJob) { LogUtil.error(LogEnum.BIZ_TRAIN, "It is illegal for a user {} to modify a training job, jobId, to {}", userContextService.getCurUser().getUsername(), ptTrainJobUpdateDTO.getId()); throw new BusinessException("您输入的id不存在或已被删除"); } - //获取算法 - PtImageAndAlgorithmVO ptImageAndAlgorithmVO = getPtImageByAlgorithmId(ptTrainJobUpdateDTO.getAlgorithmId()); - //使用用户修改训练时提供的镜像与运行命令 - //获取镜像url - String images = imageUtil.getImageUrl(ptTrainJobUpdateDTO, userContextService.getCurUser()); - ptImageAndAlgorithmVO.setImageName(trainHarborConfig.getAddress() + StrUtil.SLASH + images).setRunCommand(ptTrainJobUpdateDTO.getRunCommand()); + PtTrain ptTrain = ptTrainMapper.selectById(existPtTrainJob.getTrainId()); String jobName = buildVersion(ptTrain); + + PtImageAndAlgorithmVO ptImageAndAlgorithmVO = buildPtImageAndAlgorithmVO(ptTrainJobUpdateDTO); + BaseTrainJobDTO baseTrainJobDTO = new BaseTrainJobDTO(); BeanUtil.copyProperties(ptTrainJobUpdateDTO, baseTrainJobDTO); - baseTrainJobDTO.setJobName(jobName).setTrainJobSpecsName(ptTrainJobUpdateDTO.getTrainJobSpecsName()).setResourcesPoolType(ptTrainJobUpdateDTO.getResourcesPoolType()) - .setCpuNum(ptTrainJobUpdateDTO.getCpuNum()).setGpuNum(ptTrainJobUpdateDTO.getGpuNum()).setMemNum(ptTrainJobUpdateDTO.getMemNum()).setWorkspaceRequest(ptTrainJobUpdateDTO.getWorkspaceRequest()); - //保存用户自定义imageName - String userImageName = images.split(StrUtil.SLASH)[0] + StrUtil.SLASH + ptTrainJobUpdateDTO.getImageName() + StrUtil.COLON + ptTrainJobUpdateDTO.getImageTag(); + String taskIdentify = resourceCache.getTaskIdentify(ptTrain.getId(), ptTrain.getTrainName(), trainIdPrefix); + baseTrainJobDTO.setJobName(jobName) + .setTrainJobSpecsName(ptTrainJobUpdateDTO.getTrainJobSpecsName()) + .setPipSitePackagePath(ptImageAndAlgorithmVO.getPipSitePackagePath()) + .setResourcesPoolType(ptTrainJobUpdateDTO.getResourcesPoolType()) + .setCpuNum(ptTrainJobUpdateDTO.getCpuNum()) + .setGpuNum(ptTrainJobUpdateDTO.getGpuNum()) + .setMemNum(ptTrainJobUpdateDTO.getMemNum()) + .setWorkspaceRequest(ptTrainJobUpdateDTO.getWorkspaceRequest()) + .setTaskIdentify(taskIdentify); + + String userImageName = trimHarborAddress(ptImageAndAlgorithmVO.getImageName()); //结果集处理 PtTrainJob ptTrainJob = updateTrainJobTableData(ptTrainJobUpdateDTO, userContextService.getCurUser(), existPtTrainJob, userImageName, ptTrain, baseTrainJobDTO); //提交job @@ -764,12 +935,23 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { //检查模型是否合法,合法则保存其路径地址 checkModelAndSavePath(currentUser, baseTrainJobDTO); - + //保存job参数 + PtJobParam ptJobParam = new PtJobParam(); //添加train_job表 PtTrainJob ptTrainJob = new PtTrainJob(); BeanUtil.copyProperties(ptTrainJobUpdateDTO, ptTrainJob); - ptTrainJob.setTrainId(ptTrain.getId()).setTrainVersion(trainJobConfig.getVersionLabel().toUpperCase() + String.format(TrainUtil.FOUR_DECIMAL, ptTrain.getTotalNum() + 1)) - .setJobName(baseTrainJobDTO.getJobName()).setParentTrainVersion(existPtTrainJob.getTrainVersion()) + + if (ptTrainJobUpdateDTO.getNotebookId() != null) { + NoteBookVO noteBook = getNoteBook(ptTrainJobUpdateDTO.getNotebookId()); + ptJobParam.setNotebookName(noteBook.getNoteBookName()); + ptJobParam.setNotebookId(noteBook.getId()); + } + + ptTrainJob.setTrainId(ptTrain.getId()) + .setTrainVersion(trainJobConfig.getVersionLabel().toUpperCase() + String.format(TrainUtil.FOUR_DECIMAL, ptTrain.getTotalNum() + 1)) + .setJobName(baseTrainJobDTO.getJobName()) + .setParentTrainVersion(existPtTrainJob.getTrainVersion()) + .setOriginUserId(ptTrain.getCreateUserId()) .setCreateUserId(ptTrain.getCreateUserId()); int jobResult = ptTrainJobMapper.insert(ptTrainJob); if (jobResult < 1) { @@ -777,14 +959,21 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { throw new BusinessException("内部错误"); } - //保存job参数 - PtJobParam ptJobParam = new PtJobParam(); + ptJobParam.setTrainJobId(ptTrainJob.getId()) .setAlgorithmId(ptTrainJobUpdateDTO.getAlgorithmId()) .setRunCommand(ptTrainJobUpdateDTO.getRunCommand()) .setImageName(imageName) .setRunParams(ptTrainJobUpdateDTO.getRunParams()) .setCreateUserId(ptTrain.getCreateUserId()); + //保存算法用途 + if (ptTrainJobUpdateDTO.getAlgorithmUsage() != null) { + ptJobParam.setAlgorithmUsage(ptTrainJobUpdateDTO.getAlgorithmUsage()); + } + //保存验证数据集算法用途 + if (ptTrainJobUpdateDTO.getValAlgorithmUsage() != null) { + ptJobParam.setValAlgorithmUsage(ptTrainJobUpdateDTO.getValAlgorithmUsage()); + } //保存训练延时启动时间 if (ptTrainJobUpdateDTO.getDelayCreateTime() != null && ptTrainJobUpdateDTO.getDelayCreateTime() > 0) { ptJobParam.setDelayCreateTime(TrainUtil.getDelayTime(ptTrainJobUpdateDTO.getDelayCreateTime())); @@ -829,6 +1018,7 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { PtTrain ptTrain = checkAndReturnPtTrain(ptTrainJobDeleteDTO, userContextService.getCurUser(), jobList); Collection jobIdList = new ArrayList<>(); + String taskIdentify = (String) redisUtils.get(trainIdPrefix + String.valueOf(ptTrain.getId())); if (null != ptTrainJobDeleteDTO.getId()) { //要删除的训练任务 @@ -853,6 +1043,9 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { if (ptTrain.getVersionNum() == 1) { int trainResult = ptTrainMapper.deleteById(ptTrain.getId()); + if (StringUtils.isNotEmpty(taskIdentify)) { + redisUtils.del(taskIdentify, trainIdPrefix + String.valueOf(ptTrain.getId())); + } if (trainResult < 1) { LogUtil.error(LogEnum.BIZ_TRAIN, "User {} deleted training job, pt Train table deleted data failed", userContextService.getCurUser().getUsername()); throw new BusinessException("训练任务已删除或参数不合法"); @@ -867,7 +1060,9 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { } else { deleteTrainAndJob(ptTrainJobDeleteDTO, userContextService.getCurUser(), jobList, ptTrain, jobIdList); - + if (StringUtils.isNotEmpty(taskIdentify)) { + redisUtils.del(taskIdentify, trainIdPrefix + String.valueOf(ptTrain.getId())); + } } //删除pt_job_param表中相关数据 @@ -1017,7 +1212,7 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { } } } catch (Exception e) { - LogUtil.error(LogEnum.BIZ_TRAIN, "User {} delete training job, k8s delete failed,exception:{}",currentUser.getUsername(), e); + LogUtil.error(LogEnum.BIZ_TRAIN, "User {} delete training job, k8s delete failed,exception:{}", currentUser.getUsername(), e); throw new BusinessException("内部错误"); } } @@ -1074,15 +1269,11 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { @DataPermissionMethod(dataType = DatasetTypeEnum.PUBLIC) public PtTrainJobStatisticsMineVO statisticsMine() { // 获取运行中的任务 - Integer runCount = ptTrainJobMapper.selectCountByStatus(userContextService.getCurUserId(), - SqlUtil.integerlistToString(new Integer[]{TrainJobStatusEnum.RUNNING.getStatus()})); - + Integer runCount = ptTrainJobMapper.selectCount(new LambdaQueryWrapper().eq(PtTrainJob::getTrainStatus, TrainJobStatusEnum.RUNNING.getStatus())); // 已经完成的任务 - Integer finishCount = ptTrainJobMapper.selectCountByStatus(userContextService.getCurUserId(), - SqlUtil.integerlistToString( - new Integer[]{TrainJobStatusEnum.FAILED.getStatus(), TrainJobStatusEnum.STOP.getStatus(), - TrainJobStatusEnum.SUCCEEDED.getStatus(), TrainJobStatusEnum.UNKNOWN.getStatus()})); - + Integer finishCount = ptTrainJobMapper.selectCount(new LambdaQueryWrapper().in(PtTrainJob::getTrainStatus, TrainJobStatusEnum.FAILED.getStatus(), + TrainJobStatusEnum.STOP.getStatus(), + TrainJobStatusEnum.SUCCEEDED.getStatus(), TrainJobStatusEnum.UNKNOWN.getStatus())); PtTrainJobStatisticsMineVO vo = new PtTrainJobStatisticsMineVO(); vo.setRunJobCount(runCount); vo.setFinishJobCount(finishCount); @@ -1188,17 +1379,20 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { QueryWrapper jobParamQuery = new QueryWrapper<>(); jobParamQuery.eq("train_job_id", ptTrainJob.getId()); PtJobParam ptJobParam = ptJobParamMapper.selectOne(jobParamQuery); - if (ptJobParam == null || ptJobParam.getAlgorithmId() < MagicNumConstant.ONE) { + if (ptJobParam == null || ptJobParam.getAlgorithmId() != null && ptJobParam.getAlgorithmId() < MagicNumConstant.ONE) { LogUtil.error(LogEnum.BIZ_TRAIN, "The algorithm ID corresponding to the jobId is {} query by the user {} does not exist", userContextService.getCurUser().getUsername(), ptTrainJobDetailQueryDTO.getId()); throw new BusinessException("您查询的jobId对应的算法id不存在或已被删除"); } //获取算法参数 TrainAlgorithmQureyVO ptTrainAlgorithm = null; - TrainAlgorithmSelectAllByIdDTO trainAlgorithmSelectAllByIdDTO = new TrainAlgorithmSelectAllByIdDTO(); - trainAlgorithmSelectAllByIdDTO.setId(ptJobParam.getAlgorithmId()); - DataResponseBody dataResponseBody = algorithmClient.selectAllById(trainAlgorithmSelectAllByIdDTO); - if (dataResponseBody.succeed()) { - ptTrainAlgorithm = dataResponseBody.getData(); + if (ptJobParam.getAlgorithmId() != null) { + + TrainAlgorithmSelectAllByIdDTO trainAlgorithmSelectAllByIdDTO = new TrainAlgorithmSelectAllByIdDTO(); + trainAlgorithmSelectAllByIdDTO.setId(ptJobParam.getAlgorithmId()); + DataResponseBody dataResponseBody = algorithmClient.selectAllById(trainAlgorithmSelectAllByIdDTO); + if (dataResponseBody.succeed()) { + ptTrainAlgorithm = dataResponseBody.getData(); + } } //结果集处理 PtTrainJobDetailQueryVO ptTrainJobDetailQueryVO = new PtTrainJobDetailQueryVO(); @@ -1206,7 +1400,10 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { BeanUtils.copyProperties(ptTrainJob, ptTrainJobDetailQueryVO); ptTrainJobDetailQueryVO.setTrainName(ptTrain.getTrainName()).setAlgorithmId(ptJobParam.getAlgorithmId()).setRunCommand(ptJobParam.getRunCommand()) .setRunParams(ptJobParam.getRunParams()).setParamF1(ptJobParam.getParamF1()).setParamCallback(ptJobParam.getParamCallback()) - .setParamPrecise(ptJobParam.getParamPrecise()).setParamAccuracy(ptJobParam.getParamAccuracy()); + .setParamPrecise(ptJobParam.getParamPrecise()).setParamAccuracy(ptJobParam.getParamAccuracy()) + .setNotebookId(ptJobParam.getNotebookId()) + .setNotebookName(ptJobParam.getNotebookName()) + .setAlgorithmUsage(ptJobParam.getAlgorithmUsage()).setValAlgorithmUsage(ptJobParam.getValAlgorithmUsage()); long nowTime = System.currentTimeMillis(); //获取训练延时启动倒计时(分钟) if (ptJobParam.getDelayCreateTime() != null && nowTime < ptJobParam.getDelayCreateTime().getTime() && TrainJobStatusEnum.checkRunStatus(ptTrainJob.getTrainStatus())) { @@ -1237,7 +1434,6 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { if (ptTrainAlgorithm != null) { ptTrainJobDetailQueryVO.setAlgorithmName(ptTrainAlgorithm.getAlgorithmName()) .setAlgorithmSource(ptTrainAlgorithm.getAlgorithmSource()) - .setAlgorithmUsage(ptTrainAlgorithm.getAlgorithmUsage()) .setAccuracy(ptTrainAlgorithm.getAccuracy()) .setP4InferenceSpeed(ptTrainAlgorithm.getP4InferenceSpeed()); if (ptTrainAlgorithm.getAlgorithmSource() == MagicNumConstant.ONE) { @@ -1263,38 +1459,47 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { LogUtil.error(LogEnum.BIZ_TRAIN, "It is illegal for user {} to resume training job and jobId to be {}", userContextService.getCurUser().getUsername(), ptTrainJobResumeDTO.getId()); throw new BusinessException("您输入的id不存在或已被删除"); } + // 获取算法id和运行参数 QueryWrapper jobParamQuery = new QueryWrapper<>(); jobParamQuery.eq("train_job_id", ptTrainJob.getId()); PtJobParam ptJobParam = ptJobParamMapper.selectOne(jobParamQuery); - if (ptJobParam == null || ptJobParam.getAlgorithmId() < MagicNumConstant.ONE) { + if (ptJobParam == null || ptJobParam.getAlgorithmId() != null && ptJobParam.getAlgorithmId() < MagicNumConstant.ONE) { LogUtil.error(LogEnum.BIZ_TRAIN, "The algorithm ID corresponding to the jobId is {} query by the user {} does not exist", userContextService.getCurUser().getUsername(), ptTrainJobResumeDTO.getId()); throw new BusinessException("您查询的jobId对应的算法id不存在"); } - //获取镜像 - PtImageAndAlgorithmVO ptImageAndAlgorithmVO = getPtImageByAlgorithmId(ptJobParam.getAlgorithmId()); - //使用用户训练时提供的镜像与运行命令 - ptImageAndAlgorithmVO.setImageName(trainHarborConfig.getAddress() + StrUtil.SLASH + ptJobParam.getImageName()).setRunCommand(ptJobParam.getRunCommand()); + BaseTrainJobDTO baseTrainJobDTO = new BaseTrainJobDTO(); + BeanUtil.copyProperties(ptTrainJob, baseTrainJobDTO); + + //获取算法 + PtTrainJobBaseDTO ptTrainJobBaseDTO = convertPtTrainJobBaseDTO(ptJobParam); + + PtImageAndAlgorithmVO ptImageAndAlgorithmVO = buildPtImageAndAlgorithmVO(ptTrainJobBaseDTO); + String[] codeDirResult = ptImageAndAlgorithmVO.getCodeDir().split(StrUtil.SLASH); String codeDirName = codeDirResult[codeDirResult.length - 1]; - //处理目录问题 String noEnvPath = StrUtil.SLASH + trainJobConfig.getManage() + StrUtil.SLASH + ptTrainJob.getCreateUserId() + StrUtil.SLASH + ptTrainJob.getJobName(); String commonPath = fileStoreApi.getBucket() + noEnvPath.substring(1); - String outPath = commonPath + StrUtil.SLASH + trainJobConfig.getOutPath(); + String outPath = commonPath + StrUtil.SLASH + trainJobConfig.getModelPath(); String loadPath = commonPath + StrUtil.SLASH + trainJobConfig.getLoadPath(); String codePath = commonPath + StrUtil.SLASH + codeDirName; - String noEnvOut = noEnvPath + StrUtil.SLASH + trainJobConfig.getOutPath(); + String noEnvOut = noEnvPath + StrUtil.SLASH + trainJobConfig.getModelPath(); String path = ptTrainJobResumeDTO.getPath(); if (!path.startsWith(noEnvOut)) { LogUtil.error(LogEnum.BIZ_TRAIN, "path: {}", path); throw new BusinessException("内部错误"); } String modelLoadDir = path.substring(noEnvOut.length()); - FileUtil.del(fileStoreApi.getRootDir() + loadPath); - FileUtil.del(fileStoreApi.getRootDir() + codePath); - FileUtil.rename(new File(fileStoreApi.getRootDir() + outPath), fileStoreApi.getRootDir() + loadPath, false, true); + String codeAbsolutePath = fileStoreApi.getRootDir() + codePath; + String loadAbsolutePath = fileStoreApi.getRootDir() + loadPath; + String outAbsolutePath = fileStoreApi.getRootDir() + outPath; + + FileUtil.del(loadAbsolutePath); + FileUtil.del(codeAbsolutePath); + + FileUtil.rename(new File(outAbsolutePath), loadAbsolutePath, false, true); //获取训练规格信息 QueryResourceSpecsDTO queryResourceSpecsDTO = new QueryResourceSpecsDTO(); queryResourceSpecsDTO.setModule(2).setSpecsName(ptTrainJob.getTrainJobSpecsName()); @@ -1305,22 +1510,23 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { } // 拼load路径 JSONObject runParams = ptJobParam.getRunParams(); - runParams.put(trainJobConfig.getLoadKey(), trainJobConfig.getDockerTrainPath() + StrUtil.SLASH + - trainJobConfig.getLoadPath() + modelLoadDir); - BaseTrainJobDTO baseTrainJobDTO = new BaseTrainJobDTO(); - BeanUtil.copyProperties(ptTrainJob, baseTrainJobDTO); - baseTrainJobDTO.setTrainJobSpecsName(queryResourceSpecsVO.getSpecsName()).setCpuNum(queryResourceSpecsVO.getCpuNum()) - .setGpuNum(queryResourceSpecsVO.getGpuNum()).setMemNum(queryResourceSpecsVO.getMemNum()).setWorkspaceRequest(queryResourceSpecsVO.getWorkspaceRequest()); - if (queryResourceSpecsVO.getResourcesPoolType()) { - baseTrainJobDTO.setResourcesPoolType(MagicNumConstant.ONE); - } else { - baseTrainJobDTO.setResourcesPoolType(MagicNumConstant.ZERO); - } + runParams.put(trainJobConfig.getLoadKey(), + trainJobConfig.getDockerTrainPath() + StrUtil.SLASH + trainJobConfig.getLoadPath() + modelLoadDir); + PtTrain ptTrain = ptTrainMapper.selectById(ptTrainJob.getTrainId()); + baseTrainJobDTO.setTrainJobSpecsName(queryResourceSpecsVO.getSpecsName()) + .setPipSitePackagePath(ptImageAndAlgorithmVO.getPipSitePackagePath()) + .setCpuNum(queryResourceSpecsVO.getCpuNum()) + .setGpuNum(queryResourceSpecsVO.getGpuNum()) + .setMemNum(queryResourceSpecsVO.getMemNum()) + .setWorkspaceRequest(queryResourceSpecsVO.getWorkspaceRequest()) + .setTaskIdentify(resourceCache.getTaskIdentify(ptTrain.getId(), ptTrain.getTrainName(), trainIdPrefix)); + baseTrainJobDTO.setResourcesPoolType(queryResourceSpecsVO.getResourcesPoolType() ? MagicNumConstant.ONE : MagicNumConstant.ZERO); baseTrainJobDTO.setRunParams(runParams); // 初始化训练时间和状态 PtTrainJob updatePtTrainJob = new PtTrainJob(); - updatePtTrainJob.setId(ptTrainJob.getId()).setRuntime(TrainUtil.INIT_RUNTIME) + updatePtTrainJob.setId(ptTrainJob.getId()) + .setRuntime(TrainUtil.INIT_RUNTIME) .setTrainStatus(TrainJobStatusEnum.PENDING.getStatus()) .setUpdateTime(new Timestamp(System.currentTimeMillis())); int updateResult = ptTrainJobMapper.updateById(updatePtTrainJob); @@ -1334,10 +1540,31 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { asyncManager.execute(baseTrainJobDTO, ptTrainJob.getCreateUserId(), ptImageAndAlgorithmVO, ptTrainJob); } + /** + * PtJobParam转换PtTrainJobBaseDTO + * + * @param ptJobParam + * @return + */ + private PtTrainJobBaseDTO convertPtTrainJobBaseDTO(PtJobParam ptJobParam) { + PtTrainJobBaseDTO ptTrainJobBaseDTO = new PtTrainJobBaseDTO(); + ptTrainJobBaseDTO.setAlgorithmId(ptJobParam.getAlgorithmId()); + ptTrainJobBaseDTO.setNotebookId(ptJobParam.getNotebookId()); + ptTrainJobBaseDTO.setRunCommand(ptJobParam.getRunCommand()); + if (ptJobParam != null) { + String[] infos = ptJobParam.getImageName().split(StrUtil.SLASH); + if (infos.length == 2) { + ptTrainJobBaseDTO.setImageName(infos[0]); + ptTrainJobBaseDTO.setImageTag(infos[1]); + } + } + return ptTrainJobBaseDTO; + } + /** * 获取job在grafana监控的地址 * - * @param jobId 任务ID + * @param jobId 任务ID * @return List grafana监控的地址信息 */ @Override @@ -1374,7 +1601,7 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { /** * 获取训练使用的模型信息 * - * @param ptTrainModelDTO + * @param ptTrainModelDTO * @return PtTrainJobModelVO */ @Override @@ -1494,7 +1721,7 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { /** * 回收训练任务 * @param recyclePath 文件路径 - * @param id 训练id + * @param id 训练id */ public void recycleTaskWithTrain(String recyclePath, long id) { //创建已删除训练任务的无效文件回收任务 @@ -1572,4 +1799,21 @@ public class PtTrainJobServiceImpl implements PtTrainJobService { } return PageUtil.toPage(page, visualTrainQueryVOs); } + + /** + * 一键停止所有训练job + * + */ + @Override + public void batchStopTrainJob() { + //查询所有处于待处理或运行中的训练 + QueryWrapper queryTrainJobWrapper = new QueryWrapper<>(); + queryTrainJobWrapper.in("train_status", TrainJobStatusEnum.PENDING.getStatus(), TrainJobStatusEnum.RUNNING.getStatus()); + List ptTrainJobs = ptTrainJobMapper.selectList(queryTrainJobWrapper); + if (ptTrainJobs.size() < 1) { + throw new BusinessException("没有待停止的job"); + } + //停止job + stopTrainJobAsync.stopJobs(userContextService.getCurUser(), ptTrainJobs); + } } diff --git a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/utils/ImageUtil.java b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/utils/ImageUtil.java index 8d9c592..69c7ed3 100644 --- a/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/utils/ImageUtil.java +++ b/dubhe-server/dubhe-train/src/main/java/org/dubhe/train/utils/ImageUtil.java @@ -48,7 +48,6 @@ public class ImageUtil { **/ public String getImageUrl(BaseImageDTO baseImageDTO, UserContext user) { PtImageQueryUrlDTO ptImageQueryUrlDTO = new PtImageQueryUrlDTO(); - ptImageQueryUrlDTO.setProjectType(ImageTypeEnum.TRAIN.getType()); ptImageQueryUrlDTO.setImageTag(baseImageDTO.getImageTag()); ptImageQueryUrlDTO.setImageName(baseImageDTO.getImageName()); DataResponseBody dataResponseBody = imageClient.getImageUrl(ptImageQueryUrlDTO); diff --git a/dubhe-server/dubhe-train/src/main/resources/bootstrap.yml b/dubhe-server/dubhe-train/src/main/resources/bootstrap.yml index 884edb8..cb1641f 100644 --- a/dubhe-server/dubhe-train/src/main/resources/bootstrap.yml +++ b/dubhe-server/dubhe-train/src/main/resources/bootstrap.yml @@ -30,7 +30,7 @@ spring: refresh: true discovery: enabled: true - namespace: dubhe-server-cloud-prod + namespace: dubhe-server-cloud-dev group: dubhe server-addr: 127.0.0.1:8848 diff --git a/dubhe-server/gateway/src/main/resources/bootstrap.yml b/dubhe-server/gateway/src/main/resources/bootstrap.yml index 79e148d..aef866d 100644 --- a/dubhe-server/gateway/src/main/resources/bootstrap.yml +++ b/dubhe-server/gateway/src/main/resources/bootstrap.yml @@ -18,7 +18,7 @@ spring: refresh: true discovery: enabled: true - namespace: dubhe-server-cloud-prod + namespace: dubhe-server-cloud-dev group: dubhe server-addr: 127.0.0.1:8848 diff --git a/dubhe-server/pom.xml b/dubhe-server/pom.xml index 922d8f6..f39355d 100644 --- a/dubhe-server/pom.xml +++ b/dubhe-server/pom.xml @@ -36,6 +36,7 @@ dubhe-data-dcm dubhe-serving dubhe-serving-gateway + dubhe-terminal diff --git a/dubhe-server/yaml/common-biz.yaml b/dubhe-server/yaml/common-biz.yaml index 4bd4ad6..eea9922 100644 --- a/dubhe-server/yaml/common-biz.yaml +++ b/dubhe-server/yaml/common-biz.yaml @@ -140,3 +140,10 @@ model: json: http://127.0.0.1:32760/model_measure/measure converter: url: http://127.0.0.1:32230/model_convert + +user: + config: + notebook-delay-delete-time: 8 + cpu-limit: 10 + memory-limit: 32 + gpu-limit: 2 diff --git a/dubhe-server/yaml/common-k8s.yaml b/dubhe-server/yaml/common-k8s.yaml index d9fc7ad..7ad13c6 100644 --- a/dubhe-server/yaml/common-k8s.yaml +++ b/dubhe-server/yaml/common-k8s.yaml @@ -56,9 +56,13 @@ k8s: query: api/v1/query query-range: api/v1/query_range gpu-query-param: sum(container_accelerator_duty_cycle{pod="pod-name-placeholder"})by(pod,acc_id) + gpu-mem-total-query-param: sum(container_accelerator_memory_total_bytes{pod="pod-name-placeholder"})by(pod,acc_id) + gpu-mem-use-query-param: sum(container_accelerator_memory_used_bytes{pod="pod-name-placeholder"})by(pod,acc_id) cpu-range-query-param: sum(rate(container_cpu_usage_seconds_total{image!="",pod="pod-name-placeholder"}[1m])) by (pod) / (sum(container_spec_cpu_quota{image!=""}/100000) by (pod)) * 100 mem-range-query-param: sum(container_memory_rss{image!="",pod="pod-name-placeholder"}) gpu-range-query-param: sum(container_accelerator_duty_cycle{pod="pod-name-placeholder"}) by (pod,acc_id) + gpu-mem-total-range-query-param: sum(container_accelerator_memory_total_bytes{pod="pod-name-placeholder"}) by (pod,acc_id) + gpu-mem-use-range-query-param: sum(container_accelerator_memory_used_bytes{pod="pod-name-placeholder"}) by (pod,acc_id) nfs-storage-class-name: zjlab-nfs-storage namespace-limits: cpu: 10 @@ -78,4 +82,7 @@ minio: secretKey: 123@abc.com bucketName: dubhe-cloud-test presignedUrlExpiryTime: 300 - annotation: /annotation/ \ No newline at end of file + annotation: /annotation/ + +docker: + remote-api-port: 2375 \ No newline at end of file diff --git a/dubhe-server/yaml/dubhe-data-dcm.yaml b/dubhe-server/yaml/dubhe-data-dcm.yaml index 6cb739a..eeed80a 100644 --- a/dubhe-server/yaml/dubhe-data-dcm.yaml +++ b/dubhe-server/yaml/dubhe-data-dcm.yaml @@ -16,6 +16,9 @@ data: # 文件存储服务器用户名 userName: root + #数据集训练配置 + ptversion: http://127.0.0.1:8000/ + # 数据处理医学影像数据集dcm服务器配置,查看影像功能需要使用该服务 dcm: host: 127.0.0.1 diff --git a/dubhe-server/yaml/dubhe-data-task.yaml b/dubhe-server/yaml/dubhe-data-task.yaml index 36b8911..3f851d9 100644 --- a/dubhe-server/yaml/dubhe-data-task.yaml +++ b/dubhe-server/yaml/dubhe-data-task.yaml @@ -16,6 +16,9 @@ data: # 文件存储服务器用户名 userName: root + #数据集训练配置 + ptversion: http://127.0.0.1:8000/ + # 资源回收 recycle: # 超时时间 @@ -36,4 +39,4 @@ es: serverPort: 9200 transportPort: 9300 clusterName: docker-cluster - index: dataset_text_prod \ No newline at end of file + index: dataset_text_test \ No newline at end of file diff --git a/dubhe-server/yaml/dubhe-data.yaml b/dubhe-server/yaml/dubhe-data.yaml index 3a56520..7e64d1c 100644 --- a/dubhe-server/yaml/dubhe-data.yaml +++ b/dubhe-server/yaml/dubhe-data.yaml @@ -31,6 +31,8 @@ data: server: # 文件存储服务器用户名 userName: root + #数据集训练配置 + ptversion: http://127.0.0.1:8000/ minioweb: GetToken: diff --git a/dubhe-server/yaml/dubhe-notebook.yaml b/dubhe-server/yaml/dubhe-notebook.yaml index e6116dd..a8479fa 100644 --- a/dubhe-server/yaml/dubhe-notebook.yaml +++ b/dubhe-server/yaml/dubhe-notebook.yaml @@ -8,11 +8,6 @@ notebook-specs: mem-num: 1024 # 工作空间配额(m) disk-mem-num: 1024 -# 延时全局配置 -delay: - notebook: - #模型开发延时关闭时间 - delete: 240 # 可匿名访问路径 security: diff --git a/dubhe-server/yaml/dubhe-terminal.yaml b/dubhe-server/yaml/dubhe-terminal.yaml new file mode 100644 index 0000000..4cda475 --- /dev/null +++ b/dubhe-server/yaml/dubhe-terminal.yaml @@ -0,0 +1,13 @@ +# 专业版终端配置 +terminal: + # 专业版终端模块目录 + terminal-dir: "/terminal" + # 用户workspace目录 + workspace-dir: "/workspace" + # ssh主机 + ssh-host: 10.5.26.91 + +# 可匿名访问路径 +security: + permitAll: + matchers: /api/k8s/callback/pod/terminal,/api/docker/callback/push \ No newline at end of file diff --git a/webapp/.env.development b/webapp/.env.development index 91ec4c6..6b771af 100644 --- a/webapp/.env.development +++ b/webapp/.env.development @@ -3,6 +3,10 @@ ENV = 'development' # 默认BASE URL VUE_APP_BASE_API = '' +# TODO: 目前后端连接位于 8960端口 k8s 服务,需要后端调整后再同步调整 +# WebSocket 连接地址 +VUE_APP_WS_API = 'ws://{HOST}:8960/ws' + # 数据管理 VUE_APP_DATA_API = '' diff --git a/webapp/.env.pre b/webapp/.env.pre new file mode 100644 index 0000000..0f39204 --- /dev/null +++ b/webapp/.env.pre @@ -0,0 +1,35 @@ +ENV = 'production' + +# 默认BASE URL +VUE_APP_BASE_API = '/' + +# TODO: 目前后端连接位于 8960端口 k8s 服务,需要后端调整后再同步调整 +# WebSocket 连接地址 +VUE_APP_WS_API = 'ws://{HOST}:8960/ws' + +# 数据管理 +VUE_APP_DATA_API = '/' + +# 训练可视化 +VUE_APP_VISUAL_API = '/' + +# minio +VUE_APP_MINIO_API = 'http://10.105.1.132:9000/minio' + +# atlas +VUE_APP_ATLAS_HOST = 'http://183.129.174.186' + +# DCM4CHEE +VUE_APP_DCM_API = 'http://pre.dubhe.club/dcm4chee/dcm4chee-arc/aets/DCM4CHEE_ADMIN' + +# minIO 服务 IP +VUE_APP_MINIO_ENDPOINT = '10.105.1.132' +# minIO 服务 端口 +VUE_APP_MINIO_PORT = '9000' +# 是否开启 SSL +VUE_APP_MINIO_USESSL = 'false' +# bucketName +VUE_APP_MINIO_BUCKETNAME = 'dubhe-pre' + +# 文档链接 +VUE_APP_DOCS_URL = http://docs.tianshu.org.cn/docs/ diff --git a/webapp/.env.production b/webapp/.env.production index a23c957..ebcd9ae 100644 --- a/webapp/.env.production +++ b/webapp/.env.production @@ -3,6 +3,10 @@ ENV = 'production' # 默认BASE URL, 后端服务地址 VUE_APP_BASE_API = '' +# TODO: 目前后端连接位于 8960端口 k8s 服务,需要后端调整后再同步调整 +# WebSocket 连接地址 +VUE_APP_WS_API = 'ws://{HOST}:8960/ws' + # 数据管理 VUE_APP_DATA_API = '' @@ -33,4 +37,4 @@ VUE_APP_MINIO_USESSL = 'false' VUE_APP_MINIO_BUCKETNAME = 'dubhe-prod' # 文档链接 -VUE_APP_DOCS_URL = http://docs.dubhe.ai/docs/ +VUE_APP_DOCS_URL = http://docs.tianshu.org.cn/docs/ diff --git a/webapp/.env.test b/webapp/.env.test index 668ac96..88ee095 100644 --- a/webapp/.env.test +++ b/webapp/.env.test @@ -3,6 +3,10 @@ ENV = 'test' # 默认BASE URL VUE_APP_BASE_API = '' +# TODO: 目前后端连接位于 8960端口 k8s 服务,需要后端调整后再同步调整 +# WebSocket 连接地址 +VUE_APP_WS_API = 'ws://{HOST}:8960/ws' + # 数据管理 VUE_APP_DATA_API = '' diff --git a/webapp/CHANGELOG.md b/webapp/CHANGELOG.md index 44ecb8b..461cc5f 100644 --- a/webapp/CHANGELOG.md +++ b/webapp/CHANGELOG.md @@ -1,3 +1,20 @@ +## 2.0.0 (2021-08-30) + +### Breaking Change + +- 新增用户资源监控功能 +- 新增用户配置功能 +- [天枢专业版] 新增天枢专业版模块,提供远程连接镜像的功能 + +### Features + +- [算法开发] 支持一键停止所有 Notebook +- [训练管理] 支持使用 Notebook 环境、Notebook 镜像、终端镜像创建训练 +- [训练管理] 支持一键停止所有训练 +- [训练管理] 监控信息增加显存信息展示 +- [训练管理] 训练数据集改为非必选 +- [训练管理] 镜像管理增加终端镜像管理 + ## 0.4.1 (2021-07-14) ### Bug Fixs diff --git a/webapp/package.json b/webapp/package.json index 30418f1..b27500c 100644 --- a/webapp/package.json +++ b/webapp/package.json @@ -1,6 +1,6 @@ { "name": "dubhe-web", - "version": "0.4.1", + "version": "2.0.0", "description": "之江天枢人工智能开源平台", "author": "zhejianglab", "keywords": [ @@ -16,6 +16,7 @@ "serve": "vue-cli-service serve --host 0.0.0.0", "serve:test": "vue-cli-service serve --mode test --open", "build:prod": "vue-cli-service build", + "build:pre": "vue-cli-service build --mode pre", "build:test": "vue-cli-service build --mode test", "build:dev": "vue-cli-service build --mode development", "lint": "eslint --ext .js,.vue src", @@ -71,6 +72,7 @@ "json2csv": "^5.0.1", "lodash": "^4.17.15", "minio": "7.0.16", + "mitt": "^3.0.0", "nanoid": "^3.1.3", "normalize.css": "7.0.0", "nprogress": "0.2.0", diff --git a/webapp/src/api/development/notebook.js b/webapp/src/api/development/notebook.js index 8df2c7d..b5d4b0d 100644 --- a/webapp/src/api/development/notebook.js +++ b/webapp/src/api/development/notebook.js @@ -64,13 +64,6 @@ export function open(id) { }); } -export function getStatus() { - return request({ - url: `/${API_MODULE_NAME.NOTEBOOK}/notebooks/status`, - method: 'get', - }); -} - export function getModels() { return request({ url: `/${API_MODULE_NAME.NOTEBOOK}/notebooks/notebook-model`, @@ -108,4 +101,12 @@ export function detail(data) { }); } +// 一键停止所有 Notebook +export function batchStopNotebook() { + return request({ + url: `/${API_MODULE_NAME.NOTEBOOK}/notebooks/batchStop`, + method: 'put', + }); +} + export default { list, add, del, start, stop, open, detail }; diff --git a/webapp/src/api/preparation/dataset.js b/webapp/src/api/preparation/dataset.js index 2d19901..ceba979 100644 --- a/webapp/src/api/preparation/dataset.js +++ b/webapp/src/api/preparation/dataset.js @@ -252,4 +252,12 @@ export function count(datasetId, params) { }); } +// 查询所有带有版本的预置数据集 +export function getPresetDataset() { + return request({ + url: `/${API_MODULE_NAME.DATA}/datasets/getPresetDataset`, + method: 'get', + }); +} + export default { list, add, del }; diff --git a/webapp/src/api/system/pod.js b/webapp/src/api/system/pod.js index b4b6099..fe9e2fb 100644 --- a/webapp/src/api/system/pod.js +++ b/webapp/src/api/system/pod.js @@ -65,3 +65,12 @@ export function getHistoryMetrics(params) { params, }); } + +// 根据用户 Id 查询用户当前资源占用情况 +export function getUserResourceInfo(userId) { + return request({ + url: `/${API_MODULE_NAME.K8S}/namespace/findNamespace`, + method: 'get', + params: { userId }, + }); +} diff --git a/webapp/src/api/system/user.js b/webapp/src/api/system/user.js index 5a2a1d0..dcb1770 100644 --- a/webapp/src/api/system/user.js +++ b/webapp/src/api/system/user.js @@ -57,4 +57,22 @@ export function findByNickName() { }); } +// 获取用户配置信息 +export function getUserConfig(userId) { + return request({ + url: `/${API_MODULE_NAME.ADMIN}/users/getUserConfig`, + method: 'get', + params: { userId }, + }); +} + +// 更改用户配置信息 +export function submitUserConfig(data) { + return request({ + url: `/${API_MODULE_NAME.ADMIN}/users/setUserConfig`, + method: 'put', + data, + }); +} + export default { list, add, edit, del }; diff --git a/webapp/src/api/terminal/index.js b/webapp/src/api/terminal/index.js new file mode 100644 index 0000000..0d97a2d --- /dev/null +++ b/webapp/src/api/terminal/index.js @@ -0,0 +1,71 @@ +/** Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +import request from '@/utils/request'; +import { API_MODULE_NAME } from '@/config'; + +// 获取连接列表 +export function getTerminalList() { + return request({ + url: `/${API_MODULE_NAME.DUBHE_PRO}/terminals/list`, + method: 'get', + }); +} + +// 创建连接 +export function createTerminal(data) { + return request({ + url: `/${API_MODULE_NAME.DUBHE_PRO}/terminals/create`, + method: 'post', + data, + }); +} + +// 保存并停止连接 +export function preserveTerminal(data) { + return request({ + url: `/${API_MODULE_NAME.DUBHE_PRO}/terminals/preserve`, + method: 'post', + data, + }); +} + +// 删除连接 +export function deleteTerminal(id) { + return request({ + url: `/${API_MODULE_NAME.DUBHE_PRO}/terminals/delete`, + method: 'post', + data: { id }, + }); +} + +// 重启连接 +export function restartTerminal(data) { + return request({ + url: `/${API_MODULE_NAME.DUBHE_PRO}/terminals/restart`, + method: 'post', + data, + }); +} + +// 根据 id 查询连接详情 +export function getDetailById(id) { + return request({ + url: `/${API_MODULE_NAME.DUBHE_PRO}/terminals/detail`, + method: 'get', + param: { id }, + }); +} diff --git a/webapp/src/api/trainingImage/index.js b/webapp/src/api/trainingImage/index.js index b08c8c4..63e18f2 100644 --- a/webapp/src/api/trainingImage/index.js +++ b/webapp/src/api/trainingImage/index.js @@ -73,4 +73,11 @@ export function setPrecast(params) { }); } +export function getTerminalImageList() { + return request({ + url: `/${API_MODULE_NAME.IMAGE}/ptImage/terminalImageList`, + method: 'get', + }); +} + export default { list, add, edit }; diff --git a/webapp/src/api/trainingJob/job.js b/webapp/src/api/trainingJob/job.js index 6ac3d01..78bd422 100644 --- a/webapp/src/api/trainingJob/job.js +++ b/webapp/src/api/trainingJob/job.js @@ -65,6 +65,15 @@ export function stop(data) { }); } +// 停止所有训练 +export function batchStop(data) { + return request({ + url: `/${API_MODULE_NAME.TRAIN}/trainJob/batchStop`, + method: 'post', + data, + }); +} + export function getJobList(params) { return request({ url: `/${API_MODULE_NAME.TRAIN}/trainJob/trainJobVersionDetail`, diff --git a/webapp/src/assets/styles/atomic.scss b/webapp/src/assets/styles/atomic.scss index 7eb7819..fef43c7 100644 --- a/webapp/src/assets/styles/atomic.scss +++ b/webapp/src/assets/styles/atomic.scss @@ -48,6 +48,10 @@ vertical-align: text-top; } +.v-text-bottom { + vertical-align: text-bottom; +} + .v-bottom { vertical-align: bottom; } @@ -60,6 +64,10 @@ justify-content: space-between; } +.flex-around { + justify-content: space-around; +} + .flex-vertical-align { align-items: center; } @@ -227,6 +235,11 @@ margin-bottom: 10px; } +.my-20 { + margin-top: 20px; + margin-bottom: 20px; +} + .mx-auto { margin-right: auto; margin-left: auto; diff --git a/webapp/src/assets/styles/common.scss b/webapp/src/assets/styles/common.scss index ef012d4..b5fbc65 100644 --- a/webapp/src/assets/styles/common.scss +++ b/webapp/src/assets/styles/common.scss @@ -138,6 +138,8 @@ } .dynamic { + display: flex; + .label { vertical-align: top; } @@ -288,6 +290,7 @@ pre { } .ts-tip { + padding: 12px 20px; color: $tipColor; background-color: $tipBgColor; } diff --git a/webapp/src/boot/index.js b/webapp/src/boot/index.js index 395610f..7ce5298 100644 --- a/webapp/src/boot/index.js +++ b/webapp/src/boot/index.js @@ -15,7 +15,10 @@ */ import './errorHandle'; +import { initWebSocket } from '@/utils'; const allSettled = require('promise.allsettled'); allSettled.shim(); + +initWebSocket(); diff --git a/webapp/src/components/BaseTable/index.vue b/webapp/src/components/BaseTable/index.vue index ccae5a5..7b7f318 100644 --- a/webapp/src/components/BaseTable/index.vue +++ b/webapp/src/components/BaseTable/index.vue @@ -211,7 +211,7 @@ export default { // 展示文本格式化 const getContent = (column, row) => { if (typeof column.formatter === 'function') { - return column.formatter(row[column.prop]); + return column.formatter(row[column.prop], row); } return row[column.prop]; }; diff --git a/webapp/src/components/Hamburger/index.vue b/webapp/src/components/Hamburger/index.vue index aebe54c..ce20067 100644 --- a/webapp/src/components/Hamburger/index.vue +++ b/webapp/src/components/Hamburger/index.vue @@ -1,9 +1,18 @@ -/* * Copyright 2019-2020 Zheng Jie * * Licensed under the Apache License, Version 2.0 (the -"License"); * you may not use this file except in compliance with the License. * You may obtain a -copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by -applicable law or agreed to in writing, software * distributed under the License is distributed on -an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See -the License for the specific language governing permissions and * limitations under the License. */ +/* +* Copyright 2019-2020 Zheng Jie +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/