|
- package utils
-
- import (
- "bufio"
- "fmt"
- "os"
- "regexp"
- "strconv"
- "strings"
- )
-
- // SlurmResource 定义SLURM资源规格结构体
- type SlurmResource struct {
- JobName string `json:"job_name"`
- CPUs string `json:"cpus"` // 每任务CPU数
- Memory string `json:"memory"` // 内存
- Nodes string `json:"nodes"` // 节点数
- Tasks string `json:"tasks"` // 总任务数
- TasksPerNode string `json:"tasks_per_node"` // 每节点任务数
- CPUsPerTask string `json:"cpus_per_task"` // 每任务CPU数
- Partition string `json:"partition"` // 队列/分区
- Time string `json:"time"` // 时间限制
- Output string `json:"output"` // 输出文件
- Error string `json:"error"` // 错误文件
- QOS string `json:"qos"` // 服务质量
- Account string `json:"account"` // 账户
- GPUs string `json:"gpus"` // GPU数量
- GPUType string `json:"gpu_type"` // GPU类型
- Constraint string `json:"constraint"` // 节点约束
- Exclusive bool `json:"exclusive"` // 独占节点
- ArrayJobID string `json:"array_job_id"` // 数组作业ID
- WorkingDir string `json:"working_dir"` // 工作目录
- MailType string `json:"mail_type"` // 邮件类型
- MailUser string `json:"mail_user"` // 邮件用户
- }
-
- // SlurmParser SLURM解析器
- type SlurmParser struct {
- patterns map[string][]*regexp.Regexp
- }
-
- // NewSlurmParser 创建新的SLURM解析器
- func NewSlurmParser() *SlurmParser {
- parser := &SlurmParser{
- patterns: make(map[string][]*regexp.Regexp),
- }
- parser.initPatterns()
- return parser
- }
-
- // initPatterns 初始化所有匹配模式
- func (p *SlurmParser) initPatterns() {
- // 作业名称的各种写法
- p.patterns["job_name"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--job-name[=\s]+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+-J\s+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+--job-name\s*=\s*"([^"]+)"`),
- regexp.MustCompile(`#SBATCH\s+--job-name\s*=\s*'([^']+)'`),
- }
-
- // CPU相关的各种写法
- p.patterns["cpus_per_task"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--cpus-per-task[=\s]+(\d+)`),
- regexp.MustCompile(`#SBATCH\s+-c\s+(\d+)`),
- regexp.MustCompile(`#SBATCH\s+--cpus-per-task\s*=\s*(\d+)`),
- }
-
- // 内存的各种写法
- p.patterns["memory"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--mem[=\s]+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+-m\s+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+--mem\s*=\s*([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+--mem-per-cpu[=\s]+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+--mem-per-node[=\s]+([^\s]+)`),
- }
-
- // 节点数的各种写法
- p.patterns["nodes"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--nodes[=\s]+(\d+)`),
- regexp.MustCompile(`#SBATCH\s+-N\s+(\d+)`),
- regexp.MustCompile(`#SBATCH\s+--nodes\s*=\s*(\d+)`),
- regexp.MustCompile(`#SBATCH\s+--nodes[=\s]+(\d+-\d+)`), // 范围格式
- }
-
- // 任务数的各种写法
- p.patterns["tasks"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--ntasks[=\s]+(\d+)`),
- regexp.MustCompile(`#SBATCH\s+-n\s+(\d+)`),
- regexp.MustCompile(`#SBATCH\s+--ntasks\s*=\s*(\d+)`),
- }
-
- // 每节点任务数
- p.patterns["tasks_per_node"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--ntasks-per-node[=\s]+(\d+)`),
- regexp.MustCompile(`#SBATCH\s+--ntasks-per-node\s*=\s*(\d+)`),
- }
-
- // 分区/队列的各种写法
- p.patterns["partition"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--partition[=\s]+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+-p\s+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+--partition\s*=\s*([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+--partition\s*=\s*"([^"]+)"`),
- }
-
- // 时间限制的各种写法
- p.patterns["time"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--time[=\s]+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+-t\s+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+--time\s*=\s*([^\s]+)`),
- }
-
- // 输出文件
- p.patterns["output"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--output[=\s]+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+-o\s+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+--output\s*=\s*([^\s]+)`),
- }
-
- // 错误文件
- p.patterns["error"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--error[=\s]+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+-e\s+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+--error\s*=\s*([^\s]+)`),
- }
-
- // 服务质量
- p.patterns["qos"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--qos[=\s]+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+--qos\s*=\s*([^\s]+)`),
- }
-
- // 账户
- p.patterns["account"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--account[=\s]+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+-A\s+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+--account\s*=\s*([^\s]+)`),
- }
-
- // GPU相关
- p.patterns["gpus"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--gpus[=\s]+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+--gpus-per-node[=\s]+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+--gpus-per-task[=\s]+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+--gres[=\s]+gpu:(\d+)`),
- regexp.MustCompile(`#SBATCH\s+--gres[=\s]+gpu:([^:]+):(\d+)`), // gpu类型:数量
- }
-
- // 约束条件
- p.patterns["constraint"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--constraint[=\s]+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+-C\s+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+--constraint\s*=\s*"([^"]+)"`),
- }
-
- // 独占节点
- p.patterns["exclusive"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--exclusive`),
- }
-
- // 数组作业
- p.patterns["array"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--array[=\s]+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+-a\s+([^\s]+)`),
- }
-
- // 工作目录
- p.patterns["workdir"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--chdir[=\s]+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+--workdir[=\s]+([^\s]+)`),
- regexp.MustCompile(`#SBATCH\s+-D\s+([^\s]+)`),
- }
-
- // 邮件通知
- p.patterns["mail_type"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--mail-type[=\s]+([^\s]+)`),
- }
-
- p.patterns["mail_user"] = []*regexp.Regexp{
- regexp.MustCompile(`#SBATCH\s+--mail-user[=\s]+([^\s]+)`),
- }
- }
-
- // ParseScript 解析SLURM脚本
- func (p *SlurmParser) ParseScript(scriptContent string) *SlurmResource {
- resource := &SlurmResource{}
-
- scanner := bufio.NewScanner(strings.NewReader(scriptContent))
-
- for scanner.Scan() {
- line := strings.TrimSpace(scanner.Text())
-
- // 跳过非SBATCH行和注释行
- if !strings.HasPrefix(line, "#SBATCH") {
- continue
- }
-
- // 处理每个字段
- p.parseField(line, "job_name", &resource.JobName)
- p.parseField(line, "cpus_per_task", &resource.CPUsPerTask)
- p.parseField(line, "memory", &resource.Memory)
- p.parseField(line, "nodes", &resource.Nodes)
- p.parseField(line, "tasks", &resource.Tasks)
- p.parseField(line, "tasks_per_node", &resource.TasksPerNode)
- p.parseField(line, "partition", &resource.Partition)
- p.parseField(line, "time", &resource.Time)
- p.parseField(line, "output", &resource.Output)
- p.parseField(line, "error", &resource.Error)
- p.parseField(line, "qos", &resource.QOS)
- p.parseField(line, "account", &resource.Account)
- p.parseField(line, "constraint", &resource.Constraint)
- p.parseField(line, "array", &resource.ArrayJobID)
- p.parseField(line, "workdir", &resource.WorkingDir)
- p.parseField(line, "mail_type", &resource.MailType)
- p.parseField(line, "mail_user", &resource.MailUser)
-
- // 处理GPU
- p.parseGPU(line, resource)
-
- // 处理exclusive
- if p.matchPattern(line, "exclusive") != "" {
- resource.Exclusive = true
- }
- }
-
- // 后处理:推导缺失的信息
- p.postProcess(resource)
-
- return resource
- }
-
- // parseField 解析单个字段
- func (p *SlurmParser) parseField(line, field string, target *string) {
- if *target == "" { // 只在字段为空时才设置
- if value := p.matchPattern(line, field); value != "" {
- *target = value
- }
- }
- }
-
- // parseGPU 解析GPU相关信息
- func (p *SlurmParser) parseGPU(line string, resource *SlurmResource) {
- if patterns, exists := p.patterns["gpus"]; exists {
- for _, pattern := range patterns {
- if matches := pattern.FindStringSubmatch(line); len(matches) > 1 {
- if strings.Contains(pattern.String(), "gres.*gpu:([^:]+):(\\d+)") && len(matches) > 2 {
- // gpu类型:数量格式
- resource.GPUType = matches[1]
- resource.GPUs = matches[2]
- } else {
- resource.GPUs = matches[1]
- }
- break
- }
- }
- }
- }
-
- // matchPattern 匹配模式并返回值
- func (p *SlurmParser) matchPattern(line, field string) string {
- if patterns, exists := p.patterns[field]; exists {
- for _, pattern := range patterns {
- if matches := pattern.FindStringSubmatch(line); len(matches) > 1 {
- return matches[1]
- }
- }
- }
- return ""
- }
-
- // postProcess 后处理,推导缺失信息
- func (p *SlurmParser) postProcess(resource *SlurmResource) {
- // 如果没有指定CPUs但有tasks和cpus_per_task,计算总CPU数
- if resource.CPUs == "" && resource.Tasks != "" && resource.CPUsPerTask != "" {
- if tasks, err1 := strconv.Atoi(resource.Tasks); err1 == nil {
- if cpusPerTask, err2 := strconv.Atoi(resource.CPUsPerTask); err2 == nil {
- resource.CPUs = strconv.Itoa(tasks * cpusPerTask)
- }
- }
- }
-
- // 如果只有tasks但没有nodes,假设为单节点
- if resource.Tasks != "" && resource.Nodes == "" && resource.TasksPerNode == "" {
- resource.Nodes = "1"
- }
- }
-
- // ParseFile 从文件解析SLURM脚本
- func (p *SlurmParser) ParseFile(filename string) (*SlurmResource, error) {
- content, err := os.ReadFile(filename)
- if err != nil {
- return nil, fmt.Errorf("读取文件失败: %v", err)
- }
- return p.ParseScript(string(content)), nil
- }
-
- // String 格式化输出
- func (r *SlurmResource) String() string {
- var result strings.Builder
- result.WriteString("SLURM资源规格:\n")
- result.WriteString("====================\n")
-
- if r.JobName != "" {
- result.WriteString(fmt.Sprintf("作业名称: %s\n", r.JobName))
- }
- if r.Partition != "" {
- result.WriteString(fmt.Sprintf("队列/分区: %s\n", r.Partition))
- }
- if r.Nodes != "" {
- result.WriteString(fmt.Sprintf("节点数: %s\n", r.Nodes))
- }
- if r.Tasks != "" {
- result.WriteString(fmt.Sprintf("任务数: %s\n", r.Tasks))
- }
- if r.TasksPerNode != "" {
- result.WriteString(fmt.Sprintf("每节点任务数: %s\n", r.TasksPerNode))
- }
- if r.CPUsPerTask != "" {
- result.WriteString(fmt.Sprintf("每任务CPU数: %s\n", r.CPUsPerTask))
- }
- if r.CPUs != "" {
- result.WriteString(fmt.Sprintf("总CPU数: %s\n", r.CPUs))
- }
- if r.Memory != "" {
- result.WriteString(fmt.Sprintf("内存: %s\n", r.Memory))
- }
- if r.GPUs != "" {
- result.WriteString(fmt.Sprintf("GPU数量: %s\n", r.GPUs))
- if r.GPUType != "" {
- result.WriteString(fmt.Sprintf("GPU类型: %s\n", r.GPUType))
- }
- }
- if r.Time != "" {
- result.WriteString(fmt.Sprintf("运行时间: %s\n", r.Time))
- }
- if r.Account != "" {
- result.WriteString(fmt.Sprintf("账户: %s\n", r.Account))
- }
- if r.QOS != "" {
- result.WriteString(fmt.Sprintf("服务质量: %s\n", r.QOS))
- }
- if r.Constraint != "" {
- result.WriteString(fmt.Sprintf("节点约束: %s\n", r.Constraint))
- }
- if r.Exclusive {
- result.WriteString("独占节点: 是\n")
- }
- if r.ArrayJobID != "" {
- result.WriteString(fmt.Sprintf("数组作业: %s\n", r.ArrayJobID))
- }
- if r.Output != "" {
- result.WriteString(fmt.Sprintf("输出文件: %s\n", r.Output))
- }
- if r.Error != "" {
- result.WriteString(fmt.Sprintf("错误文件: %s\n", r.Error))
- }
-
- return result.String()
- }
-
- // GetResourceSummary 获取核心资源摘要
- func (r *SlurmResource) GetResourceSummary() map[string]string {
- summary := make(map[string]string)
-
- if r.JobName != "" {
- summary["job_name"] = r.JobName
- }
- if r.Partition != "" {
- summary["queue"] = r.Partition
- }
- if r.Nodes != "" {
- summary["nodes"] = r.Nodes
- }
- if r.Tasks != "" {
- summary["tasks"] = r.Tasks
- }
- if r.CPUs != "" {
- summary["total_cpus"] = r.CPUs
- }
- if r.CPUsPerTask != "" {
- summary["cpus_per_task"] = r.CPUsPerTask
- }
- if r.Memory != "" {
- summary["memory"] = r.Memory
- }
- if r.GPUs != "" {
- summary["gpus"] = r.GPUs
- }
- if r.Time != "" {
- summary["time_limit"] = r.Time
- }
-
- return summary
- }
|