import os from dotenv import load_dotenv from langchain_openai import OpenAIEmbeddings import pandas as pd from supabase import Client, create_client import nest_asyncio nest_asyncio.apply() from news_documents import NewsLoader from add_vectordb import GetVectorStore load_dotenv("../.env") supabase_url = os.environ.get("SUPABASE_URL") supabase_key = os.environ.get("SUPABASE_KEY") document_table = "documents2" supabase: Client = create_client(supabase_url, supabase_key) embeddings = OpenAIEmbeddings() vector_store = GetVectorStore(embeddings, supabase, document_table) response = supabase.table("systex_website_data").select("title", "date", "url", "search_kw", "category", "related_kw", "official_website_source").execute() url_list = [data['url'] for data in response.data] supabase_data_list = response.data loader = NewsLoader(url_list, supabase_data_list) loader.requests_per_second = 2 _, documents, document_metadatas = loader.aload() vector_store.insert(documents, document_metadatas)