package utils import ( "bufio" "fmt" "os" "regexp" "strconv" "strings" ) // SlurmResource 定义SLURM资源规格结构体 type SlurmResource struct { JobName string `json:"job_name"` CPUs string `json:"cpus"` // 每任务CPU数 Memory string `json:"memory"` // 内存 Nodes string `json:"nodes"` // 节点数 Tasks string `json:"tasks"` // 总任务数 TasksPerNode string `json:"tasks_per_node"` // 每节点任务数 CPUsPerTask string `json:"cpus_per_task"` // 每任务CPU数 Partition string `json:"partition"` // 队列/分区 Time string `json:"time"` // 时间限制 Output string `json:"output"` // 输出文件 Error string `json:"error"` // 错误文件 QOS string `json:"qos"` // 服务质量 Account string `json:"account"` // 账户 GPUs string `json:"gpus"` // GPU数量 GPUType string `json:"gpu_type"` // GPU类型 Constraint string `json:"constraint"` // 节点约束 Exclusive bool `json:"exclusive"` // 独占节点 ArrayJobID string `json:"array_job_id"` // 数组作业ID WorkingDir string `json:"working_dir"` // 工作目录 MailType string `json:"mail_type"` // 邮件类型 MailUser string `json:"mail_user"` // 邮件用户 } // SlurmParser SLURM解析器 type SlurmParser struct { patterns map[string][]*regexp.Regexp } // NewSlurmParser 创建新的SLURM解析器 func NewSlurmParser() *SlurmParser { parser := &SlurmParser{ patterns: make(map[string][]*regexp.Regexp), } parser.initPatterns() return parser } // initPatterns 初始化所有匹配模式 func (p *SlurmParser) initPatterns() { // 作业名称的各种写法 p.patterns["job_name"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--job-name[=\s]+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+-J\s+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+--job-name\s*=\s*"([^"]+)"`), regexp.MustCompile(`#SBATCH\s+--job-name\s*=\s*'([^']+)'`), } // CPU相关的各种写法 p.patterns["cpus_per_task"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--cpus-per-task[=\s]+(\d+)`), regexp.MustCompile(`#SBATCH\s+-c\s+(\d+)`), regexp.MustCompile(`#SBATCH\s+--cpus-per-task\s*=\s*(\d+)`), } // 内存的各种写法 p.patterns["memory"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--mem[=\s]+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+-m\s+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+--mem\s*=\s*([^\s]+)`), regexp.MustCompile(`#SBATCH\s+--mem-per-cpu[=\s]+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+--mem-per-node[=\s]+([^\s]+)`), } // 节点数的各种写法 p.patterns["nodes"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--nodes[=\s]+(\d+)`), regexp.MustCompile(`#SBATCH\s+-N\s+(\d+)`), regexp.MustCompile(`#SBATCH\s+--nodes\s*=\s*(\d+)`), regexp.MustCompile(`#SBATCH\s+--nodes[=\s]+(\d+-\d+)`), // 范围格式 } // 任务数的各种写法 p.patterns["tasks"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--ntasks[=\s]+(\d+)`), regexp.MustCompile(`#SBATCH\s+-n\s+(\d+)`), regexp.MustCompile(`#SBATCH\s+--ntasks\s*=\s*(\d+)`), } // 每节点任务数 p.patterns["tasks_per_node"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--ntasks-per-node[=\s]+(\d+)`), regexp.MustCompile(`#SBATCH\s+--ntasks-per-node\s*=\s*(\d+)`), } // 分区/队列的各种写法 p.patterns["partition"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--partition[=\s]+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+-p\s+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+--partition\s*=\s*([^\s]+)`), regexp.MustCompile(`#SBATCH\s+--partition\s*=\s*"([^"]+)"`), } // 时间限制的各种写法 p.patterns["time"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--time[=\s]+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+-t\s+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+--time\s*=\s*([^\s]+)`), } // 输出文件 p.patterns["output"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--output[=\s]+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+-o\s+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+--output\s*=\s*([^\s]+)`), } // 错误文件 p.patterns["error"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--error[=\s]+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+-e\s+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+--error\s*=\s*([^\s]+)`), } // 服务质量 p.patterns["qos"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--qos[=\s]+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+--qos\s*=\s*([^\s]+)`), } // 账户 p.patterns["account"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--account[=\s]+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+-A\s+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+--account\s*=\s*([^\s]+)`), } // GPU相关 p.patterns["gpus"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--gpus[=\s]+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+--gpus-per-node[=\s]+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+--gpus-per-task[=\s]+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+--gres[=\s]+gpu:(\d+)`), regexp.MustCompile(`#SBATCH\s+--gres[=\s]+gpu:([^:]+):(\d+)`), // gpu类型:数量 } // 约束条件 p.patterns["constraint"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--constraint[=\s]+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+-C\s+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+--constraint\s*=\s*"([^"]+)"`), } // 独占节点 p.patterns["exclusive"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--exclusive`), } // 数组作业 p.patterns["array"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--array[=\s]+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+-a\s+([^\s]+)`), } // 工作目录 p.patterns["workdir"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--chdir[=\s]+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+--workdir[=\s]+([^\s]+)`), regexp.MustCompile(`#SBATCH\s+-D\s+([^\s]+)`), } // 邮件通知 p.patterns["mail_type"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--mail-type[=\s]+([^\s]+)`), } p.patterns["mail_user"] = []*regexp.Regexp{ regexp.MustCompile(`#SBATCH\s+--mail-user[=\s]+([^\s]+)`), } } // ParseScript 解析SLURM脚本 func (p *SlurmParser) ParseScript(scriptContent string) *SlurmResource { resource := &SlurmResource{} scanner := bufio.NewScanner(strings.NewReader(scriptContent)) for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) // 跳过非SBATCH行和注释行 if !strings.HasPrefix(line, "#SBATCH") { continue } // 处理每个字段 p.parseField(line, "job_name", &resource.JobName) p.parseField(line, "cpus_per_task", &resource.CPUsPerTask) p.parseField(line, "memory", &resource.Memory) p.parseField(line, "nodes", &resource.Nodes) p.parseField(line, "tasks", &resource.Tasks) p.parseField(line, "tasks_per_node", &resource.TasksPerNode) p.parseField(line, "partition", &resource.Partition) p.parseField(line, "time", &resource.Time) p.parseField(line, "output", &resource.Output) p.parseField(line, "error", &resource.Error) p.parseField(line, "qos", &resource.QOS) p.parseField(line, "account", &resource.Account) p.parseField(line, "constraint", &resource.Constraint) p.parseField(line, "array", &resource.ArrayJobID) p.parseField(line, "workdir", &resource.WorkingDir) p.parseField(line, "mail_type", &resource.MailType) p.parseField(line, "mail_user", &resource.MailUser) // 处理GPU p.parseGPU(line, resource) // 处理exclusive if p.matchPattern(line, "exclusive") != "" { resource.Exclusive = true } } // 后处理:推导缺失的信息 p.postProcess(resource) return resource } // parseField 解析单个字段 func (p *SlurmParser) parseField(line, field string, target *string) { if *target == "" { // 只在字段为空时才设置 if value := p.matchPattern(line, field); value != "" { *target = value } } } // parseGPU 解析GPU相关信息 func (p *SlurmParser) parseGPU(line string, resource *SlurmResource) { if patterns, exists := p.patterns["gpus"]; exists { for _, pattern := range patterns { if matches := pattern.FindStringSubmatch(line); len(matches) > 1 { if strings.Contains(pattern.String(), "gres.*gpu:([^:]+):(\\d+)") && len(matches) > 2 { // gpu类型:数量格式 resource.GPUType = matches[1] resource.GPUs = matches[2] } else { resource.GPUs = matches[1] } break } } } } // matchPattern 匹配模式并返回值 func (p *SlurmParser) matchPattern(line, field string) string { if patterns, exists := p.patterns[field]; exists { for _, pattern := range patterns { if matches := pattern.FindStringSubmatch(line); len(matches) > 1 { return matches[1] } } } return "" } // postProcess 后处理,推导缺失信息 func (p *SlurmParser) postProcess(resource *SlurmResource) { // 如果没有指定CPUs但有tasks和cpus_per_task,计算总CPU数 if resource.CPUs == "" && resource.Tasks != "" && resource.CPUsPerTask != "" { if tasks, err1 := strconv.Atoi(resource.Tasks); err1 == nil { if cpusPerTask, err2 := strconv.Atoi(resource.CPUsPerTask); err2 == nil { resource.CPUs = strconv.Itoa(tasks * cpusPerTask) } } } // 如果只有tasks但没有nodes,假设为单节点 if resource.Tasks != "" && resource.Nodes == "" && resource.TasksPerNode == "" { resource.Nodes = "1" } } // ParseFile 从文件解析SLURM脚本 func (p *SlurmParser) ParseFile(filename string) (*SlurmResource, error) { content, err := os.ReadFile(filename) if err != nil { return nil, fmt.Errorf("读取文件失败: %v", err) } return p.ParseScript(string(content)), nil } // String 格式化输出 func (r *SlurmResource) String() string { var result strings.Builder result.WriteString("SLURM资源规格:\n") result.WriteString("====================\n") if r.JobName != "" { result.WriteString(fmt.Sprintf("作业名称: %s\n", r.JobName)) } if r.Partition != "" { result.WriteString(fmt.Sprintf("队列/分区: %s\n", r.Partition)) } if r.Nodes != "" { result.WriteString(fmt.Sprintf("节点数: %s\n", r.Nodes)) } if r.Tasks != "" { result.WriteString(fmt.Sprintf("任务数: %s\n", r.Tasks)) } if r.TasksPerNode != "" { result.WriteString(fmt.Sprintf("每节点任务数: %s\n", r.TasksPerNode)) } if r.CPUsPerTask != "" { result.WriteString(fmt.Sprintf("每任务CPU数: %s\n", r.CPUsPerTask)) } if r.CPUs != "" { result.WriteString(fmt.Sprintf("总CPU数: %s\n", r.CPUs)) } if r.Memory != "" { result.WriteString(fmt.Sprintf("内存: %s\n", r.Memory)) } if r.GPUs != "" { result.WriteString(fmt.Sprintf("GPU数量: %s\n", r.GPUs)) if r.GPUType != "" { result.WriteString(fmt.Sprintf("GPU类型: %s\n", r.GPUType)) } } if r.Time != "" { result.WriteString(fmt.Sprintf("运行时间: %s\n", r.Time)) } if r.Account != "" { result.WriteString(fmt.Sprintf("账户: %s\n", r.Account)) } if r.QOS != "" { result.WriteString(fmt.Sprintf("服务质量: %s\n", r.QOS)) } if r.Constraint != "" { result.WriteString(fmt.Sprintf("节点约束: %s\n", r.Constraint)) } if r.Exclusive { result.WriteString("独占节点: 是\n") } if r.ArrayJobID != "" { result.WriteString(fmt.Sprintf("数组作业: %s\n", r.ArrayJobID)) } if r.Output != "" { result.WriteString(fmt.Sprintf("输出文件: %s\n", r.Output)) } if r.Error != "" { result.WriteString(fmt.Sprintf("错误文件: %s\n", r.Error)) } return result.String() } // GetResourceSummary 获取核心资源摘要 func (r *SlurmResource) GetResourceSummary() map[string]string { summary := make(map[string]string) if r.JobName != "" { summary["job_name"] = r.JobName } if r.Partition != "" { summary["queue"] = r.Partition } if r.Nodes != "" { summary["nodes"] = r.Nodes } if r.Tasks != "" { summary["tasks"] = r.Tasks } if r.CPUs != "" { summary["total_cpus"] = r.CPUs } if r.CPUsPerTask != "" { summary["cpus_per_task"] = r.CPUsPerTask } if r.Memory != "" { summary["memory"] = r.Memory } if r.GPUs != "" { summary["gpus"] = r.GPUs } if r.Time != "" { summary["time_limit"] = r.Time } return summary }