You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

vector_embedding.go 4.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. // Copyright 2023 The casbin Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package object
  15. import (
  16. "context"
  17. "fmt"
  18. "io"
  19. "net/http"
  20. "strings"
  21. "time"
  22. "github.com/casbin/casibase/ai"
  23. "github.com/casbin/casibase/storage"
  24. "github.com/casbin/casibase/util"
  25. "golang.org/x/time/rate"
  26. )
  27. func filterTextFiles(files []*storage.Object) []*storage.Object {
  28. var res []*storage.Object
  29. for _, file := range files {
  30. if strings.HasSuffix(file.Key, ".txt") || strings.HasSuffix(file.Key, ".md") {
  31. res = append(res, file)
  32. }
  33. }
  34. return res
  35. }
  36. func getTextFiles(provider string, prefix string) ([]*storage.Object, error) {
  37. files, err := storage.ListObjects(provider, prefix)
  38. if err != nil {
  39. return nil, err
  40. }
  41. return filterTextFiles(files), nil
  42. }
  43. func getObjectReadCloser(object *storage.Object) (io.ReadCloser, error) {
  44. resp, err := http.Get(object.Url)
  45. if err != nil {
  46. return nil, err
  47. }
  48. if resp.StatusCode != http.StatusOK {
  49. resp.Body.Close()
  50. return nil, fmt.Errorf("HTTP request failed with status code: %d", resp.StatusCode)
  51. }
  52. return resp.Body, nil
  53. }
  54. func addEmbeddedVector(authToken string, text string, storeName string, fileName string) (bool, error) {
  55. embedding, err := ai.GetEmbeddingSafe(authToken, text)
  56. if err != nil {
  57. return false, err
  58. }
  59. displayName := text
  60. if len(text) > 25 {
  61. displayName = text[:25]
  62. }
  63. vector := &Vector{
  64. Owner: "admin",
  65. Name: fmt.Sprintf("vector_%s", util.GetRandomName()),
  66. CreatedTime: util.GetCurrentTime(),
  67. DisplayName: displayName,
  68. Store: storeName,
  69. File: fileName,
  70. Text: text,
  71. Data: embedding,
  72. }
  73. return AddVector(vector)
  74. }
  75. func setTxtObjectVector(authToken string, provider string, key string, storeName string) (bool, error) {
  76. lb := rate.NewLimiter(rate.Every(time.Minute), 3)
  77. txtObjects, err := getTextFiles(provider, key)
  78. if err != nil {
  79. return false, err
  80. }
  81. if len(txtObjects) == 0 {
  82. return false, nil
  83. }
  84. for _, txtObject := range txtObjects {
  85. readCloser, err := getObjectReadCloser(txtObject)
  86. if err != nil {
  87. return false, err
  88. }
  89. defer readCloser.Close()
  90. splitTxts := ai.GetSplitTxt(readCloser)
  91. for _, splitTxt := range splitTxts {
  92. if lb.Allow() {
  93. success, err := addEmbeddedVector(authToken, splitTxt, storeName, txtObject.Key)
  94. if err != nil {
  95. return false, err
  96. }
  97. if !success {
  98. return false, nil
  99. }
  100. } else {
  101. err := lb.Wait(context.Background())
  102. if err != nil {
  103. return false, err
  104. }
  105. success, err := addEmbeddedVector(authToken, splitTxt, storeName, txtObject.Key)
  106. if err != nil {
  107. return false, err
  108. }
  109. if !success {
  110. return false, nil
  111. }
  112. }
  113. }
  114. }
  115. return true, nil
  116. }
  117. func getRelatedVectors(owner string) ([]*Vector, error) {
  118. vectors, err := GetVectors(owner)
  119. if err != nil {
  120. return nil, err
  121. }
  122. if len(vectors) == 0 {
  123. return nil, fmt.Errorf("no knowledge vectors found")
  124. }
  125. return vectors, nil
  126. }
  127. func GetNearestVectorText(authToken string, owner string, question string) (string, error) {
  128. qVector, err := ai.GetEmbeddingSafe(authToken, question)
  129. if err != nil {
  130. return "", err
  131. }
  132. if qVector == nil {
  133. return "", fmt.Errorf("no qVector found")
  134. }
  135. vectors, err := getRelatedVectors(owner)
  136. if err != nil {
  137. return "", err
  138. }
  139. var nVectors [][]float32
  140. for _, candidate := range vectors {
  141. nVectors = append(nVectors, candidate.Data)
  142. }
  143. i := ai.GetNearestVectorIndex(qVector, nVectors)
  144. return vectors[i].Text, nil
  145. }