From e6ff917c405d0bbceff1a58ab874ea5180836d50 Mon Sep 17 00:00:00 2001 From: Yang Luo Date: Sat, 30 Sep 2023 18:06:11 +0800 Subject: [PATCH] Skip existing items in RefreshStoreVectors() --- object/vector.go | 14 ++++++++++++++ object/vector_embedding.go | 11 +++++++++++ 2 files changed, 25 insertions(+) diff --git a/object/vector.go b/object/vector.go index 4e2491f..38ca6ae 100644 --- a/object/vector.go +++ b/object/vector.go @@ -72,6 +72,20 @@ func getVector(owner string, name string) (*Vector, error) { } } +func getVectorByIndex(owner string, store string, file string, index int) (*Vector, error) { + vector := Vector{Owner: owner, Store: store, File: file, Index: index} + existed, err := adapter.engine.Get(&vector) + if err != nil { + return &vector, err + } + + if existed { + return &vector, nil + } else { + return nil, nil + } +} + func GetVector(id string) (*Vector, error) { owner, name := util.GetOwnerAndNameFromId(id) return getVector(owner, name) diff --git a/object/vector_embedding.go b/object/vector_embedding.go index c7aee14..29e7c9c 100644 --- a/object/vector_embedding.go +++ b/object/vector_embedding.go @@ -93,6 +93,17 @@ func addVectorsForStore(storageProviderObj storage.StorageProvider, embeddingPro textSections := txt.GetTextSections(text) for i, textSection := range textSections { + var vector *Vector + vector, err = getVectorByIndex("admin", storeName, file.Key, i) + if err != nil { + return false, err + } + + if vector != nil { + fmt.Printf("[%d/%d] Generating embedding for store: [%s]'s text section: %s\n", i+1, len(textSections), storeName, "Skipped due to already exists") + continue + } + if timeLimiter.Allow() { fmt.Printf("[%d/%d] Generating embedding for store: [%s]'s text section: %s\n", i+1, len(textSections), storeName, textSection) affected, err = addEmbeddedVector(embeddingProviderObj, textSection, storeName, file.Key, i, embeddingProviderName)