123456789101112131415161718192021222324252627282930 |
- import os
- from dotenv import load_dotenv
- from langchain_openai import OpenAIEmbeddings
- import pandas as pd
- from supabase import Client, create_client
- import nest_asyncio
- nest_asyncio.apply()
- from news_documents import NewsLoader
- from add_vectordb import GetVectorStore
- load_dotenv("../.env")
- supabase_url = os.environ.get("SUPABASE_URL")
- supabase_key = os.environ.get("SUPABASE_KEY")
- document_table = "documents"
- supabase: Client = create_client(supabase_url, supabase_key)
- embeddings = OpenAIEmbeddings()
- vector_store = GetVectorStore(embeddings, supabase, document_table)
- response = supabase.table("systex_website_data").select("title", "date", "url", "search_kw", "category", "related_kw", "official_website_source").execute()
- url_list = [data['url'] for data in response.data]
- supabase_data_list = response.data
- loader = NewsLoader(url_list, supabase_data_list)
- loader.requests_per_second = 2
- _, documents, document_metadatas = loader.aload()
- vector_store.insert(documents, document_metadatas)
|