Browse Source

Merge branch 'feature_rzs'

gitlink
JeshuaRen 1 year ago
parent
commit
a0c7a6163e
19 changed files with 1167 additions and 396 deletions
  1. +95
    -0
      main.go
  2. +2
    -2
      pkgs/actor/actor.go
  3. +89
    -0
      pkgs/async/unbound_channel.go
  4. +1
    -1
      pkgs/distlock/internal/acquire_actor.go
  5. +18
    -5
      pkgs/future/future.go
  6. +87
    -0
      pkgs/future/ready.go
  7. +63
    -62
      pkgs/future/set_value_future.go
  8. +15
    -12
      pkgs/future/set_void_future.go
  9. +9
    -1
      pkgs/mq/client.go
  10. +9
    -1
      pkgs/mq/server.go
  11. +197
    -0
      sdks/scheduler/modeljob.go
  12. +97
    -14
      sdks/scheduler/models.go
  13. +1
    -1
      sdks/storage/models.go
  14. +3
    -0
      sdks/storage/object.go
  15. +256
    -0
      sdks/unifyops/mock_data.go
  16. +48
    -297
      sdks/unifyops/unifyops.go
  17. +13
    -0
      utils/config/config.go
  18. +60
    -0
      utils/http/http.go
  19. +104
    -0
      utils/time2/test.go

+ 95
- 0
main.go View File

@@ -0,0 +1,95 @@
package main

import (
"fmt"
"io"
"os"
"strconv"
"time"

cdssdk "gitlink.org.cn/cloudream/common/sdks/storage"
)

func main() {
test1("http://121.36.5.116:32010")
// test2("http://127.0.0.1:7890")
}

func test1(url string) {
cli := cdssdk.NewClient(&cdssdk.Config{
URL: url,
})

openLen, err := strconv.ParseInt(os.Args[1], 10, 64)
if err != nil {
fmt.Println(err)
return
}

readLen, err := strconv.ParseInt(os.Args[2], 10, 64)
if err != nil {
fmt.Println(err)
return
}

partLen, err := strconv.ParseInt(os.Args[3], 10, 64)
if err != nil {
fmt.Println(err)
return
}

startTime := time.Now()
obj, err := cli.Object().Download(cdssdk.ObjectDownload{
UserID: 1,
ObjectID: 470790,
Offset: 0,
Length: &openLen,
PartSize: partLen,
})
if err != nil {
fmt.Println(err)
return
}
fmt.Printf("Open time: %v\n", time.Since(startTime))

startTime = time.Now()
buf := make([]byte, readLen)
_, err = io.ReadFull(obj.File, buf)
fmt.Printf("Read time: %v\n", time.Since(startTime))
if err != nil {
fmt.Println(err)
return
}

startTime = time.Now()
obj.File.Close()
fmt.Printf("Close time: %v\n", time.Since(startTime))
}

func test2(url string) {
cli := cdssdk.NewClient(&cdssdk.Config{
URL: url,
})

obj, err := cli.Object().Download(cdssdk.ObjectDownload{
UserID: 1,
ObjectID: 27151,
Offset: 0,
PartSize: 100000000,
// Length: &openLen,
})

if err != nil {
fmt.Println(err)
return
}

f, err := os.Create("test.txt")
if err != nil {
fmt.Println(err)
return
}

io.Copy(f, obj.File)
}


+ 2
- 2
pkgs/actor/actor.go View File

@@ -126,7 +126,7 @@ func WaitValue[T any](ctx context.Context, c *CommandChannel, cmd func() (T, err
fut.SetComplete(val, err)
})

return fut.WaitValue(ctx)
return fut.Wait(ctx)
}

func WaitValue2[T1 any, T2 any](ctx context.Context, c *CommandChannel, cmd func() (T1, T2, error)) (T1, T2, error) {
@@ -137,5 +137,5 @@ func WaitValue2[T1 any, T2 any](ctx context.Context, c *CommandChannel, cmd func
fut.SetComplete(val1, val2, err)
})

return fut.WaitValue(ctx)
return fut.Wait(ctx)
}

+ 89
- 0
pkgs/async/unbound_channel.go View File

@@ -0,0 +1,89 @@
package async

import (
"container/list"
"errors"
"gitlink.org.cn/cloudream/common/pkgs/future"
"sync"
)

var ErrChannelClosed = errors.New("channel is closed")

type UnboundChannel[T any] struct {
values *list.List
waiters []*future.SetValueFuture[T]
lock sync.Mutex
err error
}

func NewUnboundChannel[T any]() *UnboundChannel[T] {
return &UnboundChannel[T]{
values: list.New(),
}
}

func (c *UnboundChannel[T]) Error() error {
return c.err
}

func (c *UnboundChannel[T]) Send(val T) error {
c.lock.Lock()
defer c.lock.Unlock()

if c.err != nil {
return c.err
}

c.values.PushBack(val)

for len(c.waiters) > 0 && c.values.Len() > 0 {
waiter := c.waiters[0]
waiter.SetValue(c.values.Front().Value.(T))
c.values.Remove(c.values.Front())
c.waiters = c.waiters[1:]
return nil
}

return nil
}

func (c *UnboundChannel[T]) Receive() future.Future1[T] {
c.lock.Lock()
defer c.lock.Unlock()

if c.err != nil {
return future.NewReadyError1[T](c.err)
}

if c.values.Len() > 0 {
ret := c.values.Front().Value.(T)
c.values.Remove(c.values.Front())
return future.NewReadyValue1[T](ret)
}

fut := future.NewSetValue[T]()
c.waiters = append(c.waiters, fut)

return fut
}

func (c *UnboundChannel[T]) Close() {
c.CloseWithError(ErrChannelClosed)
}

func (c *UnboundChannel[T]) CloseWithError(err error) {
c.lock.Lock()
defer c.lock.Unlock()

if c.err != nil {
return
}
c.err = err

for i := 0; i < len(c.waiters); i++ {
c.waiters[i].SetError(c.err)
}

c.waiters = nil
c.values = nil
}

+ 1
- 1
pkgs/distlock/internal/acquire_actor.go View File

@@ -93,7 +93,7 @@ func (a *AcquireActor) Acquire(ctx context.Context, req LockRequest) (string, er
}()

// 此处不能直接用ctx去等Callback,原因是Wait超时不代表锁没有获取到,这会导致锁泄露。
return info.Callback.WaitValue(context.Background())
return info.Callback.Wait(context.Background())
}

// TryAcquireNow 重试一下内部还没有成功的锁请求。不会阻塞调用者


+ 18
- 5
pkgs/future/future.go View File

@@ -6,18 +6,31 @@ import (
)

var ErrContextCancelled = fmt.Errorf("context cancelled")
var ErrCompleted = fmt.Errorf("context cancelled")

type Future interface {
Error() error
IsComplete() bool

Chan() <-chan error

Wait(ctx context.Context) error
}

type ValueFuture[T any] interface {
Future
type ChanValue1[T any] struct {
Value T
Err error
}

type ChanValue2[T1 any, T2 any] struct {
Value1 T1
Value2 T2
Err error
}

type Future1[T any] interface {
IsComplete() bool

Value() T
Chan() <-chan ChanValue1[T]

WaitValue(ctx context.Context) (T, error)
Wait(ctx context.Context) (T, error)
}

+ 87
- 0
pkgs/future/ready.go View File

@@ -0,0 +1,87 @@
package future

import "context"

type Ready struct {
ch chan error
}

func NewReady(err error) *Ready {
ch := make(chan error, 1)
ch <- err
close(ch)

return &Ready{
ch: ch,
}
}

func (f *Ready) IsComplete() bool {
return true
}

func (f *Ready) Wait(ctx context.Context) error {
select {
case v, ok := <-f.ch:
if !ok {
return ErrCompleted
}
return v

case <-ctx.Done():
return ErrContextCancelled
}
}

func (f *Ready) Chan() <-chan error {
return f.ch
}

type Ready1[T any] struct {
ch chan ChanValue1[T]
}

func NewReady1[T any](val T, err error) *Ready1[T] {
ch := make(chan ChanValue1[T], 1)
ch <- ChanValue1[T]{
Err: err,
Value: val,
}
close(ch)

return &Ready1[T]{
ch: ch,
}
}

func NewReadyValue1[T any](val T) *Ready1[T] {
return NewReady1[T](val, nil)
}

func NewReadyError1[T any](err error) *Ready1[T] {
var ret T
return NewReady1[T](ret, err)
}

func (f *Ready1[T]) IsComplete() bool {
return true
}

func (f *Ready1[T]) Wait(ctx context.Context) (T, error) {
select {
case cv, ok := <-f.ch:
if !ok {
var ret T
return ret, cv.Err
}
return cv.Value, cv.Err

case <-ctx.Done():
var ret T
return ret, ErrContextCancelled
}
}

func (f *Ready1[T]) Chan() <-chan ChanValue1[T] {
return f.ch
}

+ 63
- 62
pkgs/future/set_value_future.go View File

@@ -6,72 +6,76 @@ import (
)

type SetValueFuture[T any] struct {
value T
err error
isCompleted bool
completeChan chan any
ch chan ChanValue1[T]
completeOnce sync.Once
}

func NewSetValue[T any]() *SetValueFuture[T] {
return &SetValueFuture[T]{
completeChan: make(chan any),
ch: make(chan ChanValue1[T], 1),
}
}

func (f *SetValueFuture[T]) SetComplete(val T, err error) {
f.completeOnce.Do(func() {
f.value = val
f.err = err
f.ch <- ChanValue1[T]{
Err: err,
Value: val,
}
close(f.ch)
f.isCompleted = true
close(f.completeChan)
})
}

func (f *SetValueFuture[T]) SetValue(val T) {
f.completeOnce.Do(func() {
f.value = val
f.ch <- ChanValue1[T]{
Value: val,
}
close(f.ch)
f.isCompleted = true
close(f.completeChan)
})
}

func (f *SetValueFuture[T]) SetError(err error) {
f.completeOnce.Do(func() {
f.err = err
f.ch <- ChanValue1[T]{
Err: err,
}
close(f.ch)
f.isCompleted = true
close(f.completeChan)
})
}

func (f *SetValueFuture[T]) Error() error {
return f.err
}

func (f *SetValueFuture[T]) Value() T {
return f.value
}

func (f *SetValueFuture[T]) IsComplete() bool {
return f.isCompleted
}

// 等待直到Complete或者ctx被取消。
// 注:返回ErrContextCancelled不代表产生结果的过程没有执行过,甚至不代表Future没有Complete
func (f *SetValueFuture[T]) Wait(ctx context.Context) error {
select {
case <-f.completeChan:
return f.err

case <-ctx.Done():
return ErrContextCancelled
}
func (f *SetValueFuture[T]) Chan() <-chan ChanValue1[T] {
return f.ch
}

func (f *SetValueFuture[T]) WaitValue(ctx context.Context) (T, error) {
// 等待直到Complete或者ctx被取消。
// 注:返回ErrContextCancelled不代表产生结果的过程没有执行过,甚至不代表Future没有Complete
//func (f *SetValueFuture[T]) Wait(ctx context.Context) error {
// select {
// case <-f.ch:
// return f.err
//
// case <-ctx.Done():
// return ErrContextCancelled
// }
//}

func (f *SetValueFuture[T]) Wait(ctx context.Context) (T, error) {
select {
case <-f.completeChan:
return f.value, f.err
case cv, ok := <-f.ch:
if !ok {
var ret T
return ret, cv.Err
}
return cv.Value, cv.Err

case <-ctx.Done():
var ret T
@@ -80,68 +84,61 @@ func (f *SetValueFuture[T]) WaitValue(ctx context.Context) (T, error) {
}

type SetValueFuture2[T1 any, T2 any] struct {
value1 T1
value2 T2
err error
isCompleted bool
completeChan chan any
ch chan ChanValue2[T1, T2]
completeOnce sync.Once
}

func NewSetValue2[T1 any, T2 any]() *SetValueFuture2[T1, T2] {
return &SetValueFuture2[T1, T2]{
completeChan: make(chan any),
ch: make(chan ChanValue2[T1, T2], 1),
}
}

func (f *SetValueFuture2[T1, T2]) SetComplete(val1 T1, val2 T2, err error) {
f.completeOnce.Do(func() {
f.value1 = val1
f.value2 = val2
f.err = err
f.ch <- ChanValue2[T1, T2]{
Value1: val1,
Value2: val2,
Err: err,
}
close(f.ch)
f.isCompleted = true
close(f.completeChan)
})
}

func (f *SetValueFuture2[T1, T2]) SetValue(val1 T1, val2 T2) {
f.completeOnce.Do(func() {
f.value1 = val1
f.value2 = val2
f.ch <- ChanValue2[T1, T2]{
Value1: val1,
Value2: val2,
}
close(f.ch)
f.isCompleted = true
close(f.completeChan)
})
}

func (f *SetValueFuture2[T1, T2]) SetError(err error) {
f.completeOnce.Do(func() {
f.err = err
f.ch <- ChanValue2[T1, T2]{
Err: err,
}
close(f.ch)
f.isCompleted = true
close(f.completeChan)
})
}

func (f *SetValueFuture2[T1, T2]) Error() error {
return f.err
}

func (f *SetValueFuture2[T1, T2]) Value() (T1, T2) {
return f.value1, f.value2
}

func (f *SetValueFuture2[T1, T2]) IsComplete() bool {
return f.isCompleted
}

func (f *SetValueFuture2[T1, T2]) Wait() error {
<-f.completeChan
return f.err
}

func (f *SetValueFuture2[T1, T2]) WaitValue(ctx context.Context) (T1, T2, error) {
func (f *SetValueFuture2[T1, T2]) Wait(ctx context.Context) (T1, T2, error) {
select {
case <-f.completeChan:
return f.value1, f.value2, f.err
case cv, ok := <-f.ch:
if !ok {
return cv.Value1, cv.Value2, cv.Err
}
return cv.Value1, cv.Value2, cv.Err

case <-ctx.Done():
var ret1 T1
@@ -149,3 +146,7 @@ func (f *SetValueFuture2[T1, T2]) WaitValue(ctx context.Context) (T1, T2, error)
return ret1, ret2, ErrContextCancelled
}
}

func (f *SetValueFuture2[T1, T2]) Chan() <-chan ChanValue2[T1, T2] {
return f.ch
}

+ 15
- 12
pkgs/future/set_void_future.go View File

@@ -6,47 +6,50 @@ import (
)

type SetVoidFuture struct {
err error
isCompleted bool
completeChan chan any
ch chan error
completeOnce sync.Once
}

func NewSetVoid() *SetVoidFuture {
return &SetVoidFuture{
completeChan: make(chan any),
ch: make(chan error, 1),
}
}

func (f *SetVoidFuture) SetVoid() {
f.completeOnce.Do(func() {
f.ch <- nil
close(f.ch)
f.isCompleted = true
close(f.completeChan)
})
}

func (f *SetVoidFuture) SetError(err error) {
f.completeOnce.Do(func() {
f.err = err
f.ch <- err
close(f.ch)
f.isCompleted = true
close(f.completeChan)
})
}

func (f *SetVoidFuture) Error() error {
return f.err
}

func (f *SetVoidFuture) IsComplete() bool {
return f.isCompleted
}

func (f *SetVoidFuture) Wait(ctx context.Context) error {
select {
case <-f.completeChan:
return f.err
case v, ok := <-f.ch:
if !ok {
return ErrCompleted
}
return v

case <-ctx.Done():
return ErrContextCancelled
}
}

func (f *SetVoidFuture) Chan() <-chan error {
return f.ch
}

+ 9
- 1
pkgs/mq/client.go View File

@@ -2,6 +2,7 @@ package mq

import (
"fmt"
"net"
"sync"
"time"

@@ -70,7 +71,14 @@ type RabbitMQTransport struct {
}

func NewRabbitMQTransport(url string, key string, exchange string) (*RabbitMQTransport, error) {
connection, err := amqp.Dial(url)
config := amqp.Config{
Dial: func(network, addr string) (net.Conn, error) {
return net.DialTimeout(network, addr, 60*time.Second) // 设置连接超时时间为 60 秒
},
}
connection, err := amqp.DialConfig(url, config)

//connection, err := amqp.Dial(url)
if err != nil {
return nil, fmt.Errorf("connecting to %s: %w", url, err)
}


+ 9
- 1
pkgs/mq/server.go View File

@@ -2,6 +2,7 @@ package mq

import (
"fmt"
"net"
"time"

"github.com/streadway/amqp"
@@ -77,7 +78,14 @@ type RabbitMQServer struct {
}

func NewRabbitMQServer(url string, queueName string, onMessage MessageHandlerFn) (*RabbitMQServer, error) {
connection, err := amqp.Dial(url)
config := amqp.Config{
Dial: func(network, addr string) (net.Conn, error) {
return net.DialTimeout(network, addr, 60*time.Second) // 设置连接超时时间为 60 秒
},
}
connection, err := amqp.DialConfig(url, config)

//connection, err := amqp.Dial(url)
if err != nil {
return nil, fmt.Errorf("connecting to %s: %w", url, err)
}


+ 197
- 0
sdks/scheduler/modeljob.go View File

@@ -0,0 +1,197 @@
package schsdk

import (
"fmt"
"gitlink.org.cn/cloudream/common/consts/errorcode"
"gitlink.org.cn/cloudream/common/pkgs/mq"
myhttp "gitlink.org.cn/cloudream/common/utils/http"
"gitlink.org.cn/cloudream/common/utils/serder"
"net/url"
"strings"
)

// 这个结构体无任何字段,但实现了Noop,每种MessageBody都要内嵌这个结构体
type MessageBodyBase struct{}

// 此处的receiver是指针
func (b *MessageBodyBase) Noop() {}

type RunningModelResp struct {
MessageBodyBase
RunningModels map[string]RunningModelInfo `json:"allNode"`
}

type AllModelResp struct {
MessageBodyBase
AllModels []Models `json:"allModels"`
}

type Models struct {
ModelID ModelID `json:"modelID"`
ModelName ModelName `json:"modelName"`
}

type NodeInfo struct {
MessageBodyBase
InstanceID JobID `json:"instanceID"`
//NodeID NodeID `json:"nodeID"`
Address Address `json:"address"`
Status string `json:"status"`
}

type RunningModelInfo struct {
MessageBodyBase
JobSetID JobSetID `json:"jobSetID"`
ModelID ModelID `json:"modelID"`
ModelName ModelName `json:"modelName"`
CustomModelName ModelName `json:"customModelName"`
Nodes []NodeInfo `json:"nodes"`
}

type ECSNodeRunningInfoReq struct {
mq.MessageBodyBase
CustomModelName ModelName `form:"customModelName" json:"customModelName" binding:"required"`
ModelID ModelID `form:"modelID" json:"modelID" binding:"required"`
}

type ECSNodeRunningInfoResp struct {
MessageBodyBase
NodeUsageRateInfos []NodeUsageRateInfo `json:"nodeUsageRateInfos"`
}

func NewECSNodeRunningInfoResp(nodeUsageRateInfos []NodeUsageRateInfo) *ECSNodeRunningInfoResp {
return &ECSNodeRunningInfoResp{
NodeUsageRateInfos: nodeUsageRateInfos,
}
}

type NodeUsageRateInfo struct {
MessageBodyBase
InstanceID JobID `json:"instanceID"`
Address Address `json:"address"`
MemoryUtilization []UsageRate `json:"memoryUtilization"`
GPUUtilization []UsageRate `json:"GPUUtilization"`
CPUUtilization []UsageRate `json:"CPUUtilization"`
}

type UsageRate struct {
Timestamp string `json:"timestamp"`
Number string `json:"number"`
}

const (
FineTuning = "finetuning"
DataPreprocess = "DataPreprocess"

CreateECS = "create"
RunECS = "run"
PauseECS = "pause"
DestroyECS = "destroy"
OperateServer = "operate"
RestartServer = "restartServer"

GPUMonitor = "GPUMonitor"

RcloneMount = "rclone"
Mounted = "mounted"
MountDir = "/mnt/oss"

Deploying = "Deploying"
Waiting = "Waiting"
Failed = "Failed"
Invalid = "Invalid"
)

type QueryRunningModelsReq struct {
UserID int64 `form:"userID" json:"userID"`
}

func (c *Client) QueryRunningModels(req QueryRunningModelsReq) (*RunningModelResp, error) {
url, err := url.JoinPath(c.baseURL, "/job/queryRunningModels")
if err != nil {
return nil, err
}

resp, err := myhttp.GetJSON(url, myhttp.RequestParam{
Body: req,
})
if err != nil {
return nil, err
}

contType := resp.Header.Get("Content-Type")
if strings.Contains(contType, myhttp.ContentTypeJSON) {
var codeResp response[RunningModelResp]
if err := serder.JSONToObjectStream(resp.Body, &codeResp); err != nil {
return nil, fmt.Errorf("parsing response: %w", err)
}

if codeResp.Code == errorcode.OK {
return &codeResp.Data, nil
}

return nil, codeResp.ToError()
}

return nil, fmt.Errorf("unknow response content type: %s", contType)
}

func (c *Client) QueryAllModels(req QueryRunningModelsReq) (*AllModelResp, error) {
url, err := url.JoinPath(c.baseURL, "/job/getAllModels")
if err != nil {
return nil, err
}

resp, err := myhttp.GetJSON(url, myhttp.RequestParam{
Body: req,
})
if err != nil {
return nil, err
}

contType := resp.Header.Get("Content-Type")
if strings.Contains(contType, myhttp.ContentTypeJSON) {
var codeResp response[AllModelResp]
if err := serder.JSONToObjectStream(resp.Body, &codeResp); err != nil {
return nil, fmt.Errorf("parsing response: %w", err)
}

if codeResp.Code == errorcode.OK {
return &codeResp.Data, nil
}

return nil, codeResp.ToError()
}

return nil, fmt.Errorf("unknow response content type: %s", contType)
}

func (c *Client) ECSNodeRunningInfo(req ECSNodeRunningInfoReq) (*ECSNodeRunningInfoResp, error) {
url, err := url.JoinPath(c.baseURL, "/job/getECSNodeRunningInfo")
if err != nil {
return nil, err
}

resp, err := myhttp.GetJSON(url, myhttp.RequestParam{
Body: req,
})
if err != nil {
return nil, err
}

contType := resp.Header.Get("Content-Type")
if strings.Contains(contType, myhttp.ContentTypeJSON) {
var codeResp response[ECSNodeRunningInfoResp]
if err := serder.JSONToObjectStream(resp.Body, &codeResp); err != nil {
return nil, fmt.Errorf("parsing response: %w", err)
}

if codeResp.Code == errorcode.OK {
return &codeResp.Data, nil
}

return nil, codeResp.ToError()
}

return nil, fmt.Errorf("unknow response content type: %s", contType)
}

+ 97
- 14
sdks/scheduler/models.go View File

@@ -7,14 +7,20 @@ import (
)

const (
JobTypeNormal = "Normal"
JobTypeResource = "Resource"
JobTypeInstance = "Instance"
JobTypeNormal = "Normal"
JobTypeResource = "Resource"
JobTypeInstance = "Instance"
JobTypeFinetuning = "Finetuning"
JobTypeDataPreprocess = "DataPreprocess"

FileInfoTypePackage = "Package"
FileInfoTypeLocalFile = "LocalFile"
FileInfoTypeResource = "Resource"
FileInfoTypeImage = "Image"

MemoryUtilization = "MemoryUtilization"
GPUUtilization = "GPUUtilization"
CPUUtilization = "CPUUtilization"
)

type JobID string
@@ -26,6 +32,12 @@ type ImageID int64
// 计算中心ID
type CCID int64

type ModelID string
type ModelName string
type ECSInstanceID string
type NodeID int64
type Address string

type JobSetInfo struct {
Jobs []JobInfo `json:"jobs"`
}
@@ -39,6 +51,9 @@ var JobInfoTypeUnion = types.NewTypeUnion[JobInfo](
(*DataReturnJobInfo)(nil),
(*MultiInstanceJobInfo)(nil),
(*InstanceJobInfo)(nil),
(*UpdateMultiInstanceJobInfo)(nil),
(*FinetuningJobInfo)(nil),
(*DataPreprocessJobInfo)(nil),
)
var _ = serder.UseTypeUnionInternallyTagged(&JobInfoTypeUnion, "type")

@@ -53,6 +68,30 @@ func (i *JobInfoBase) GetLocalJobID() string {
type NormalJobInfo struct {
serder.Metadata `union:"Normal"`
JobInfoBase
Type string `json:"type"`
Files JobFilesInfo `json:"files"`
Runtime JobRuntimeInfo `json:"runtime"`
Resources JobResourcesInfo `json:"resources"`
Services JobServicesInfo `json:"services"`
ModelJobInfo ModelJobInfo `json:"modelJobInfo"`
}

// FinetuningJobInfo 模型微调
type FinetuningJobInfo struct {
serder.Metadata `union:"Finetuning"`
JobInfoBase
Type string `json:"type"`
Files JobFilesInfo `json:"files"`
Runtime JobRuntimeInfo `json:"runtime"`
Resources JobResourcesInfo `json:"resources"`
Services JobServicesInfo `json:"services"`
ModelJobInfo ModelJobInfo `json:"modelJobInfo"`
}

// DataPreprocessJobInfo 数据预处理
type DataPreprocessJobInfo struct {
serder.Metadata `union:"DataPreprocess"`
JobInfoBase
Type string `json:"type"`
Files JobFilesInfo `json:"files"`
Runtime JobRuntimeInfo `json:"runtime"`
@@ -68,23 +107,47 @@ type DataReturnJobInfo struct {
TargetLocalJobID string `json:"targetLocalJobID"`
}

// MultiInstanceJobInfo 多实例(推理任务)
type MultiInstanceJobInfo struct {
serder.Metadata `union:"MultiInstance"`
JobInfoBase
Type string `json:"type"`
Files JobFilesInfo `json:"files"`
Runtime JobRuntimeInfo `json:"runtime"`
Resources JobResourcesInfo `json:"resources"`
Type string `json:"type"`
Files JobFilesInfo `json:"files"`
Runtime JobRuntimeInfo `json:"runtime"`
Resources JobResourcesInfo `json:"resources"`
ModelJobInfo ModelJobInfo `json:"modelJobInfo"`
}

// UpdateMultiInstanceJobInfo 更新模型
type UpdateMultiInstanceJobInfo struct {
serder.Metadata `union:"UpdateModel"`
JobInfoBase
Type string `json:"type"`
Files JobFilesInfo `json:"files"`
Runtime JobRuntimeInfo `json:"runtime"`
MultiInstanceJobSetID JobSetID `json:"multiInstanceJobSetID"`
UpdateType string `json:"updateType"`
SubJobs []JobID `json:"subJobs"`
Operate string `json:"operate"`
}

type ModelJobInfo struct {
Type string `json:"type"`
ModelID ModelID `json:"modelID"`
CustomModelName ModelName `json:"customModelName"`
Command string `json:"command"`
}

// InstanceJobInfo 单实例(推理任务)
type InstanceJobInfo struct {
serder.Metadata `union:"Instance"`
JobInfoBase
Type string `json:"type"`
LocalJobID string `json:"multiInstJobID"`
Files JobFilesInfo `json:"files"`
Runtime JobRuntimeInfo `json:"runtime"`
Resources JobResourcesInfo `json:"resources"`
Type string `json:"type"`
LocalJobID string `json:"multiInstJobID"`
Files JobFilesInfo `json:"files"`
Runtime JobRuntimeInfo `json:"runtime"`
Resources JobResourcesInfo `json:"resources"`
ModelJobInfo ModelJobInfo `json:"modelJobInfo"`
}

type JobFilesInfo struct {
@@ -219,6 +282,26 @@ func (b *NoEnvBootstrap) GetBootstrapType() string {
}

const (
JobDataInEnv = "SCH_DATA_IN"
JobDataOutEnv = "SCH_DATA_OUT"
JobDataInEnv = "SCH_DATA_IN"
JobDataOutEnv = "SCH_DATA_OUT"
FinetuningOutEnv = "FINETUNING_OUT"
AccessPath = "ACCESS_PATH"
)

type Rclone struct {
CDSRcloneID string `json:"cds_rcloneID"`
CDSRcloneConfigID string `json:"cds_rcloneConfigID"`
}

type InferencePlatform struct {
PlatformName string `json:"platformName"`
ApiBaseUrl string `json:"apiBaseUrl"`
ApiKey string `json:"apiKey"`
ApiProxy string `json:"apiProxy"`
LlmModel string `json:"llmModel"`
EmbedModel string `json:"embedModel"`
ChunkMaxLength string `json:"chunkMaxLength"`
StartChunkThreshold string `json:"startChunkThreshold"`
SimilarityThreshold string `json:"similarityThreshold"`
EntriesPerFile string `json:"entriesPerFile"`
}

+ 1
- 1
sdks/storage/models.go View File

@@ -72,7 +72,7 @@ func (b *RepRedundancy) Value() (driver.Value, error) {
return serder.ObjectToJSONEx[Redundancy](b)
}

var DefaultECRedundancy = *NewECRedundancy(2, 3, 1024*1024*5)
var DefaultECRedundancy = *NewECRedundancy(3, 6, 1024*1024*5)

type ECRedundancy struct {
serder.Metadata `union:"ec"`


+ 3
- 0
sdks/storage/object.go View File

@@ -138,7 +138,10 @@ func (c *ObjectService) Download(req ObjectDownload) (*DownloadingObject, error)
return nil, err
}

startTime := time.Now()
file, err := files.MoveNext()
endTime := time.Now()
fmt.Printf("files.MoveNext(), spend time: %.0f s", endTime.Sub(startTime).Seconds())
if err == iterator.ErrNoMoreItem {
return nil, fmt.Errorf("no file found in response")
}


+ 256
- 0
sdks/unifyops/mock_data.go View File

@@ -0,0 +1,256 @@
package uopsdk

// CPU
func shuguang() (*[]ResourceData, error) {
var ret []ResourceData

cpuResourceData := CPUResourceData{
Name: ResourceTypeCPU,
Total: UnitValue[int64]{
Value: 600,
Unit: "",
},
Available: UnitValue[int64]{
Value: 500,
Unit: "",
},
}
ret = append(ret, &cpuResourceData)

npuResourceData := NPUResourceData{
Name: ResourceTypeNPU,
Total: UnitValue[int64]{
Value: 100,
Unit: "",
},
Available: UnitValue[int64]{
Value: 100,
Unit: "",
},
}
ret = append(ret, &npuResourceData)

gpuResourceData := GPUResourceData{
Name: ResourceTypeGPU,
Total: UnitValue[int64]{
Value: 100,
Unit: "",
},
Available: UnitValue[int64]{
Value: 100,
Unit: "",
},
}
ret = append(ret, &gpuResourceData)

mluResourceData := MLUResourceData{
Name: ResourceTypeMLU,
Total: UnitValue[int64]{
Value: 100,
Unit: "",
},
Available: UnitValue[int64]{
Value: 100,
Unit: "",
},
}
ret = append(ret, &mluResourceData)

storageResourceData := StorageResourceData{
Name: ResourceTypeStorage,
Total: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
Available: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
}
ret = append(ret, &storageResourceData)

memoryResourceData := MemoryResourceData{
Name: ResourceTypeMemory,
Total: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
Available: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
}
ret = append(ret, &memoryResourceData)

return &ret, nil
}

// GPU
func modelarts() (*[]ResourceData, error) {
var ret []ResourceData

cpuResourceData := CPUResourceData{
Name: ResourceTypeCPU,
Total: UnitValue[int64]{
Value: 100,
Unit: "",
},
Available: UnitValue[int64]{
Value: 100,
Unit: "",
},
}
ret = append(ret, &cpuResourceData)

npuResourceData := NPUResourceData{
Name: ResourceTypeNPU,
Total: UnitValue[int64]{
Value: 100,
Unit: "",
},
Available: UnitValue[int64]{
Value: 100,
Unit: "",
},
}
ret = append(ret, &npuResourceData)

gpuResourceData := GPUResourceData{
Name: ResourceTypeGPU,
Total: UnitValue[int64]{
Value: 600,
Unit: "",
},
Available: UnitValue[int64]{
Value: 500,
Unit: "",
},
}
ret = append(ret, &gpuResourceData)

mluResourceData := MLUResourceData{
Name: ResourceTypeMLU,
Total: UnitValue[int64]{
Value: 100,
Unit: "",
},
Available: UnitValue[int64]{
Value: 100,
Unit: "",
},
}
ret = append(ret, &mluResourceData)

storageResourceData := StorageResourceData{
Name: ResourceTypeStorage,
Total: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
Available: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
}
ret = append(ret, &storageResourceData)

memoryResourceData := MemoryResourceData{
Name: ResourceTypeMemory,
Total: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
Available: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
}
ret = append(ret, &memoryResourceData)

return &ret, nil
}

// NPU
func hanwuji() (*[]ResourceData, error) {
var ret []ResourceData

cpuResourceData := CPUResourceData{
Name: ResourceTypeCPU,
Total: UnitValue[int64]{
Value: 100,
Unit: "",
},
Available: UnitValue[int64]{
Value: 100,
Unit: "",
},
}
ret = append(ret, &cpuResourceData)

npuResourceData := NPUResourceData{
Name: ResourceTypeNPU,
Total: UnitValue[int64]{
Value: 600,
Unit: "",
},
Available: UnitValue[int64]{
Value: 500,
Unit: "",
},
}
ret = append(ret, &npuResourceData)

gpuResourceData := GPUResourceData{
Name: ResourceTypeGPU,
Total: UnitValue[int64]{
Value: 100,
Unit: "",
},
Available: UnitValue[int64]{
Value: 100,
Unit: "",
},
}
ret = append(ret, &gpuResourceData)

mluResourceData := MLUResourceData{
Name: ResourceTypeMLU,
Total: UnitValue[int64]{
Value: 100,
Unit: "",
},
Available: UnitValue[int64]{
Value: 100,
Unit: "",
},
}
ret = append(ret, &mluResourceData)

storageResourceData := StorageResourceData{
Name: ResourceTypeStorage,
Total: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
Available: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
}
ret = append(ret, &storageResourceData)

memoryResourceData := MemoryResourceData{
Name: ResourceTypeMemory,
Total: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
Available: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
}
ret = append(ret, &memoryResourceData)

return &ret, nil
}

+ 48
- 297
sdks/unifyops/unifyops.go View File

@@ -223,302 +223,53 @@ func (c *Client) GetMemoryData(node GetOneResourceDataReq) (*MemoryResourceData,
}

func (c *Client) GetIndicatorData(node GetOneResourceDataReq) (*[]ResourceData, error) {
//url, err := url.JoinPath(c.baseURL, "/cmdb/resApi/getIndicatorData")
//if err != nil {
// return nil, err
//}
//resp, err := myhttp.PostJSON(url, myhttp.RequestParam{
// Body: node,
//})
//if err != nil {
// return nil, err
//}
//
//contType := resp.Header.Get("Content-Type")
//if strings.Contains(contType, myhttp.ContentTypeJSON) {
//
// var codeResp response[[]map[string]any]
// if err := serder.JSONToObjectStream(resp.Body, &codeResp); err != nil {
// return nil, fmt.Errorf("parsing response: %w", err)
// }
//
// if codeResp.Code != CORRECT_CODE {
// return nil, codeResp.ToError()
// }
//
// var ret []ResourceData
// for _, mp := range codeResp.Data {
// var data ResourceData
// err := serder.MapToObject(mp, &data)
// if err != nil {
// return nil, err
// }
// ret = append(ret, data)
// }
//
// return &ret, nil
//}
//
//return nil, fmt.Errorf("unknow response content type: %s", contType)
if node.SlwNodeID == 1 {
return mockData1()
}

if node.SlwNodeID == 2 {
return mockData2()
}

return mockData3()
}

func mockData1() (*[]ResourceData, error) {
var ret []ResourceData

cpuResourceData := CPUResourceData{
Name: ResourceTypeCPU,
Total: UnitValue[int64]{
Value: 100,
Unit: "",
},
Available: UnitValue[int64]{
Value: 100,
Unit: "",
},
}
ret = append(ret, &cpuResourceData)

npuResourceData := NPUResourceData{
Name: ResourceTypeNPU,
Total: UnitValue[int64]{
Value: 0,
Unit: "",
},
Available: UnitValue[int64]{
Value: 0,
Unit: "",
},
}
ret = append(ret, &npuResourceData)

gpuResourceData := GPUResourceData{
Name: ResourceTypeGPU,
Total: UnitValue[int64]{
Value: 0,
Unit: "",
},
Available: UnitValue[int64]{
Value: 0,
Unit: "",
},
}
ret = append(ret, &gpuResourceData)

mluResourceData := MLUResourceData{
Name: ResourceTypeMLU,
Total: UnitValue[int64]{
Value: 100,
Unit: "",
},
Available: UnitValue[int64]{
Value: 100,
Unit: "",
},
}
ret = append(ret, &mluResourceData)

storageResourceData := StorageResourceData{
Name: ResourceTypeStorage,
Total: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
Available: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
}
ret = append(ret, &storageResourceData)

memoryResourceData := MemoryResourceData{
Name: ResourceTypeMemory,
Total: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
Available: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
}
ret = append(ret, &memoryResourceData)

return &ret, nil
}

func mockData2() (*[]ResourceData, error) {
var ret []ResourceData

cpuResourceData := CPUResourceData{
Name: ResourceTypeCPU,
Total: UnitValue[int64]{
Value: 100,
Unit: "",
},
Available: UnitValue[int64]{
Value: 100,
Unit: "",
},
}
ret = append(ret, &cpuResourceData)

npuResourceData := NPUResourceData{
Name: ResourceTypeNPU,
Total: UnitValue[int64]{
Value: 100,
Unit: "",
},
Available: UnitValue[int64]{
Value: 100,
Unit: "",
},
}
ret = append(ret, &npuResourceData)

gpuResourceData := GPUResourceData{
Name: ResourceTypeGPU,
Total: UnitValue[int64]{
Value: 0,
Unit: "",
},
Available: UnitValue[int64]{
Value: 0,
Unit: "",
},
}
ret = append(ret, &gpuResourceData)

mluResourceData := MLUResourceData{
Name: ResourceTypeMLU,
Total: UnitValue[int64]{
Value: 0,
Unit: "",
},
Available: UnitValue[int64]{
Value: 0,
Unit: "",
},
}
ret = append(ret, &mluResourceData)

storageResourceData := StorageResourceData{
Name: ResourceTypeStorage,
Total: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
Available: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
}
ret = append(ret, &storageResourceData)

memoryResourceData := MemoryResourceData{
Name: ResourceTypeMemory,
Total: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
Available: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
}
ret = append(ret, &memoryResourceData)

return &ret, nil
switch node.SlwNodeID {
case 1:
return shuguang()
case 2:
return modelarts()
case 3:
return hanwuji()
}
return nil, nil
}

func mockData3() (*[]ResourceData, error) {
var ret []ResourceData

cpuResourceData := CPUResourceData{
Name: ResourceTypeCPU,
Total: UnitValue[int64]{
Value: 100,
Unit: "",
},
Available: UnitValue[int64]{
Value: 100,
Unit: "",
},
}
ret = append(ret, &cpuResourceData)

npuResourceData := NPUResourceData{
Name: ResourceTypeNPU,
Total: UnitValue[int64]{
Value: 0,
Unit: "",
},
Available: UnitValue[int64]{
Value: 0,
Unit: "",
},
}
ret = append(ret, &npuResourceData)

gpuResourceData := GPUResourceData{
Name: ResourceTypeGPU,
Total: UnitValue[int64]{
Value: 100,
Unit: "",
},
Available: UnitValue[int64]{
Value: 100,
Unit: "",
},
}
ret = append(ret, &gpuResourceData)

mluResourceData := MLUResourceData{
Name: ResourceTypeMLU,
Total: UnitValue[int64]{
Value: 0,
Unit: "",
},
Available: UnitValue[int64]{
Value: 0,
Unit: "",
},
}
ret = append(ret, &mluResourceData)

storageResourceData := StorageResourceData{
Name: ResourceTypeStorage,
Total: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
Available: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
}
ret = append(ret, &storageResourceData)

memoryResourceData := MemoryResourceData{
Name: ResourceTypeMemory,
Total: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
Available: UnitValue[float64]{
Value: 100,
Unit: "GB",
},
}
ret = append(ret, &memoryResourceData)

return &ret, nil
}
//func (c *Client) GetIndicatorData(node GetOneResourceDataReq) (*[]ResourceData, error) {
//url, err := url.JoinPath(c.baseURL, "/cmdb/resApi/getIndicatorData")
//if err != nil {
// return nil, err
//}
//resp, err := myhttp.PostJSON(url, myhttp.RequestParam{
// Body: node,
//})
//if err != nil {
// return nil, err
//}
//
//contType := resp.Header.Get("Content-Type")
//if strings.Contains(contType, myhttp.ContentTypeJSON) {
//
// var codeResp response[[]map[string]any]
// if err := serder.JSONToObjectStream(resp.Body, &codeResp); err != nil {
// return nil, fmt.Errorf("parsing response: %w", err)
// }
//
// if codeResp.Code != CORRECT_CODE {
// return nil, codeResp.ToError()
// }
//
// var ret []ResourceData
// for _, mp := range codeResp.Data {
// var data ResourceData
// err := serder.MapToObject(mp, &data)
// if err != nil {
// return nil, err
// }
// ret = append(ret, data)
// }
//
// return &ret, nil
//}
//
//return nil, fmt.Errorf("unknow response content type: %s", contType)
//}

+ 13
- 0
utils/config/config.go View File

@@ -6,6 +6,7 @@ import (
"github.com/imdario/mergo"
"os"
"path/filepath"
"strings"
)

// Load 加载配置文件
@@ -26,6 +27,18 @@ func DefaultLoad(modeulName string, defCfg interface{}) error {
return err
}

if strings.Contains(execPath, "scheduler") {
execPath = "D:\\Work\\Codes\\new\\workspace\\workspace\\scheduler\\common\\assets\\confs\\"
}

if strings.Contains(execPath, "storage") {
execPath = "D:\\Work\\Codes\\new\\workspace\\workspace\\storage\\common\\assets\\confs\\"
}

if strings.Contains(execPath, "gateway") {
execPath = "D:\\Work\\Codes\\new\\workspace\\workspace\\gateway\\assets\\confs\\"
}

// TODO 可以考虑根据环境变量读取不同的配置
configFilePath := filepath.Join(filepath.Dir(execPath), "..", "confs", fmt.Sprintf("%s.config.json", modeulName))
return Load(configFilePath, defCfg)


+ 60
- 0
utils/http/http.go View File

@@ -56,6 +56,27 @@ func GetJSON(url string, param RequestParam) (*http.Response, error) {
return defaultClient.Do(req)
}

func DeleteJSON(url string, param RequestParam) (*http.Response, error) {
req, err := http.NewRequest(http.MethodDelete, url, nil)
if err != nil {
return nil, err
}

if err = prepareQuery(req, param.Query); err != nil {
return nil, err
}

if err = prepareHeader(req, param.Header); err != nil {
return nil, err
}

if err = prepareJSONBody(req, param.Body); err != nil {
return nil, err
}

return defaultClient.Do(req)
}

func GetForm(url string, param RequestParam) (*http.Response, error) {
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
@@ -98,6 +119,45 @@ func PostJSON(url string, param RequestParam) (*http.Response, error) {
return defaultClient.Do(req)
}

func PostJSONRow(url string, param RequestParam) (*http.Response, error) {
req, err := http.NewRequest(http.MethodPost, url, nil)
if err != nil {
return nil, err
}

if err = prepareQuery(req, param.Query); err != nil {
return nil, err
}

if err = prepareHeader(req, param.Header); err != nil {
return nil, err
}

//if err = prepareJSONBody(req, param.Body); err != nil {
// return nil, err
//}

setHeader(req.Header, "Content-Type", ContentTypeJSON)

if param.Body == nil {
return nil, nil
}

switch body := param.Body.(type) {
case nil:
case string:
req.ContentLength = int64(len(body))
req.Body = io.NopCloser(bytes.NewReader([]byte(body)))
case []byte:
req.ContentLength = int64(len(body))
req.Body = io.NopCloser(bytes.NewReader(body))
default:
return nil, fmt.Errorf("body error")
}

return defaultClient.Do(req)
}

func PostForm(url string, param RequestParam) (*http.Response, error) {
req, err := http.NewRequest(http.MethodPost, url, nil)
if err != nil {


+ 104
- 0
utils/time2/test.go View File

@@ -0,0 +1,104 @@
package time2

import (
"fmt"
"path"
"runtime"
"strings"
"time"
)

type Measurement struct {
startTime time.Time
lastPointTime time.Time
printer func(string)
on bool
title string
}

func NewMeasurement(printer func(string)) Measurement {
return Measurement{
printer: printer,
}
}

func (m *Measurement) Begin(on bool, title ...string) {
if m == nil {
return
}

m.on = on
m.title = strings.Join(title, ".")

if on {
m.startTime = time.Now()
m.lastPointTime = m.startTime

_, file, line, ok := runtime.Caller(1)

titlePart := ""
if m.title != "" {
titlePart = fmt.Sprintf(":%s", m.title)
}

if ok {
m.printer(fmt.Sprintf("[begin%v]%v:%v", titlePart, path.Base(file), line))
} else {
m.printer(fmt.Sprintf("[begin%v]unknown point", titlePart))
}
}
}

func (m *Measurement) Point(head ...string) {
if m == nil {
return
}

if m.on {
m.printer(m.makePointString(strings.Join(head, ".")))
}
}

func (m *Measurement) makePointString(head string) string {
last := m.lastPointTime
now := time.Now()
m.lastPointTime = now

_, file, line, ok := runtime.Caller(2)

prefixCont := ""

if m.title != "" {
prefixCont = m.title
}

if head != "" {
if prefixCont == "" {
prefixCont = head
} else {
prefixCont = fmt.Sprintf("%s.%s", prefixCont, head)
}
}

prefixPart := ""
if prefixCont != "" {
prefixPart = fmt.Sprintf("[%s]", prefixCont)
}

if ok {
return fmt.Sprintf("%v%v:%v@%v(%v)", prefixPart, path.Base(file), line, now.Sub(last), now.Sub(m.startTime))
}

return fmt.Sprintf("%vunknown point@%v(%v)", prefixPart, now.Sub(last), now.Sub(m.startTime))
}

func (m *Measurement) End(head ...string) {
if m == nil {
return
}

if m.on {
m.printer(fmt.Sprintf("[end]%v\n", m.makePointString(strings.Join(head, "."))))
}
}


Loading…
Cancel
Save