new_information_loader.py 1.3 KB

1234567891011121314151617181920212223242526272829
  1. from add_vectordb import GetVectorStore, get_data_list, read_and_split_files, create_ids, get_document, check_existed_data
  2. from dotenv import load_dotenv
  3. import os
  4. from langchain_community.vectorstores import SupabaseVectorStore
  5. from langchain_openai import OpenAIEmbeddings
  6. from supabase.client import Client, create_client
  7. import gdown
  8. load_dotenv("../.env")
  9. supabase_url = os.environ.get("SUPABASE_URL")
  10. supabase_key = os.environ.get("SUPABASE_KEY")
  11. document_table = "documents2"
  12. supabase: Client = create_client(supabase_url, supabase_key)
  13. embeddings = OpenAIEmbeddings()
  14. vector_store = GetVectorStore(embeddings, supabase, document_table)
  15. # a file
  16. url = "https://docs.google.com/document/u/0/export?format=docx&id=1bg1yOYlFd8GkDy_JuASKIWVN4MNbd9moZ4P-3stqaoI&token=AC4w5Vj1CZYNkmPrnJXQrJbcE5VVua5sig%3A1727167683932&ouid=103663058481204095886&includes_info_params=true&usp=drive_web&cros_files=false&inspectorResult=%7B%22pc%22%3A97%2C%22lplc%22%3A9%7D"
  17. path = "/home/ling/systex/file_loader"
  18. output = "new_information.docx"
  19. gdown.download(url, os.path.join(path, output))
  20. vector_store.delete([output])
  21. file_list = [os.path.join(path, output)]
  22. document_ids, documents, document_metadatas = get_document(data_list=file_list, update=True)
  23. vector_store.insert(documents, document_metadatas)