news_vectordb.py 1006 B

123456789101112131415161718192021222324252627282930
  1. import os
  2. from dotenv import load_dotenv
  3. from langchain_openai import OpenAIEmbeddings
  4. import pandas as pd
  5. from supabase import Client, create_client
  6. import nest_asyncio
  7. nest_asyncio.apply()
  8. from news_documents import NewsLoader
  9. from add_vectordb import GetVectorStore
  10. load_dotenv("../.env")
  11. supabase_url = os.environ.get("SUPABASE_URL")
  12. supabase_key = os.environ.get("SUPABASE_KEY")
  13. document_table = "documents2"
  14. supabase: Client = create_client(supabase_url, supabase_key)
  15. embeddings = OpenAIEmbeddings()
  16. vector_store = GetVectorStore(embeddings, supabase, document_table)
  17. response = supabase.table("systex_website_data").select("title", "date", "url", "search_kw", "category", "related_kw", "official_website_source").execute()
  18. url_list = [data['url'] for data in response.data]
  19. supabase_data_list = response.data
  20. loader = NewsLoader(url_list, supabase_data_list)
  21. loader.requests_per_second = 2
  22. _, documents, document_metadatas = loader.aload()
  23. vector_store.insert(documents, document_metadatas)