### Python = 3.9
import os
from dotenv import load_dotenv
load_dotenv()

import openai 
openai_api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = openai_api_key

from langchain_openai import OpenAIEmbeddings
embeddings_model = OpenAIEmbeddings()

from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_chroma import Chroma

from supabase import create_client, Client 
supabase_url = os.getenv("SUPABASE_URL")
supabase_key = os.getenv("SUPABASE_KEY")
supabase: Client = create_client(supabase_url, supabase_key)

from apscheduler.schedulers.background import BackgroundScheduler
from typing import AsyncIterator
scheduler = BackgroundScheduler(timezone="Asia/Taipei")

############# Load data #############
# def extract_field(doc, field_name):
#     for line in doc.page_content.split('\n'):
#         if line.startswith(f"{field_name}:"):
#             return line.split(':', 1)[1].strip()
#     return None

# loader = CSVLoader(file_path="video_cache_rows.csv")
# data = loader.load()
# field_name = "question"
# question = [extract_field(doc, field_name) for doc in data]

# ####### load data from supabase #######
# embeddings_model = OpenAIEmbeddings()

vectorstore_list ={}
question_id_map_list = {}

def generated(language:str ="ch"):
    global response,count,ids
    response,count = supabase.table("video_cache").select("question","id").eq("language",language).order("id").execute()
    data = response[1]
    question = [item['question'] for item in data if 'question' in item]
    #print(question)
    ids = [item['id'] for item in data if 'id' in item]
    question_id_map = {item['question']: item['id'] for item in data if 'id' in item and 'question' in item}
    question_id_map_list[language] = question_id_map

    ########## generate embedding ###########
    embedding = embeddings_model.embed_documents(question)

    ########## Write embedding to the supabase table  #######
    # for id, new_embedding in zip(ids, embedding):
    #     supabase.table("video_cache_rows_duplicate").insert({"embedding": embedding.tolist()}).eq("id", id).execute()

    ######### Vector Store ##########
    # Put pre-compute embeddings to vector store. ## save to disk
    persist_directory = f"./chroma_db_{language}"

    vectorstore = Chroma.from_texts(
        texts=question_id_map_list[language],
        embedding=embeddings_model,
        persist_directory=persist_directory
        )

    #vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings_model)
    vectorstore_list[language] = vectorstore

    print(f"gernerate {language}")
    #print(question_id_map_list)

generated("ch")
generated("en")
generated("jp")
generated("ko")

scheduler.add_job(generated, 'cron' ,hour='*/2',kwargs={"language" : "ch"})
scheduler.add_job(generated, 'cron' ,hour='*/2',kwargs={"language" : "en"})
scheduler.add_job(generated, 'cron' ,hour='*/2',kwargs={"language" : "jp"})
scheduler.add_job(generated, 'cron' ,hour='*/2',kwargs={"language" : "ko"})

scheduler.start()

def get_id_by_question(question,language):
    return question_id_map_list[language].get(question)

# print(question)
# created_at = []
# question = []
# ids = []
# answer = []
# video_url = []

# for item in data:
#     ids.append(item['id'])
#     created_at.append(item['created_at'])
#     question.append(item['question'])
#     answer.append(item['answer'])
#     video_url.append(item['video_url'])


def ask_question(question:str, SIMILARITY_THRESHOLD:int = 0.83,language:str ="ch"):
    # generated(language=language)
    print(language)
    vectorstore = vectorstore_list[language]
    print(vectorstore)
    docs_and_scores = vectorstore.similarity_search_with_relevance_scores(question, k=1)
    doc, score = docs_and_scores[0]
    print(doc,score)
    
    if score >= SIMILARITY_THRESHOLD:
        id = get_id_by_question(doc.page_content,language)
        data,count = supabase.table("video_cache").select("*").eq("id",id).execute()

        if data[1][0]["answer"] == None :
            return None

        return data[1]
    else:
        return None
    
def ask_question_find_brand(question:str):
    # 使用 OpenAI 模型生成查询
    # 使用 OpenAI ChatCompletion 模型生成关键词
    response = openai.ChatCompletion.create(
        model="gpt-4",  # 选择 GPT-4 模型
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"Extract keywords from the following text for a database search: {question}"}
        ],
        max_tokens=50,
        temperature=0.5,
    )

    # 提取模型返回的关键词
    keywords = response.choices[0].message['content'].strip().split(", ")

    return keywords


if __name__ == "__main__" :
    ####### load from disk  #######
    query = "美食街在哪裡"
    #docs = vectorstore.similarity_search(query)
    #print(f"Query: {query}  | 最接近文檔:{docs[0].page_content}")

    ####### Query it #########
    query = "101可以帶狗嗎"
    #docs = vectorstore.similarity_search(query)
    #print(f"Query: {query}  | 最接近文檔:{docs[0].page_content}")