1234567891011121314151617181920212223242526272829 |
- from add_vectordb import GetVectorStore, get_data_list, read_and_split_files, create_ids, get_document, check_existed_data
- from dotenv import load_dotenv
- import os
- from langchain_community.vectorstores import SupabaseVectorStore
- from langchain_openai import OpenAIEmbeddings
- from supabase.client import Client, create_client
- import gdown
- load_dotenv("../.env")
- supabase_url = os.environ.get("SUPABASE_URL")
- supabase_key = os.environ.get("SUPABASE_KEY")
- document_table = "documents2"
- supabase: Client = create_client(supabase_url, supabase_key)
- embeddings = OpenAIEmbeddings()
- vector_store = GetVectorStore(embeddings, supabase, document_table)
- # a file
- url = "https://docs.google.com/document/u/0/export?format=docx&id=1bg1yOYlFd8GkDy_JuASKIWVN4MNbd9moZ4P-3stqaoI&token=AC4w5Vj1CZYNkmPrnJXQrJbcE5VVua5sig%3A1727167683932&ouid=103663058481204095886&includes_info_params=true&usp=drive_web&cros_files=false&inspectorResult=%7B%22pc%22%3A97%2C%22lplc%22%3A9%7D"
- path = "/home/ling/systex/file_loader"
- output = "new_information.docx"
- gdown.download(url, os.path.join(path, output))
- vector_store.delete([output])
- file_list = [os.path.join(path, output)]
- document_ids, documents, document_metadatas = get_document(data_list=file_list, update=True)
- vector_store.insert(documents, document_metadatas)
|