from langchain_community.chat_models import ChatOllama
from langchain_openai import ChatOpenAI
from transformers import AutoModelForCausalLM, AutoTokenizer,pipeline
import torch
from langchain_huggingface import HuggingFacePipeline
from typing import Any, List, Optional, Dict
from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models import BaseChatModel
from langchain_core.messages import BaseMessage, AIMessage, HumanMessage, SystemMessage
from langchain_core.outputs import ChatResult, ChatGeneration
from pydantic import Field
import subprocess
import time
from dotenv import load_dotenv
load_dotenv()
system_prompt: str = "你是一個來自台灣的AI助理,你的名字是 TAIDE,樂於以台灣人的立場幫助使用者,會用繁體中文回答問題。"
def hf():
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
llm = HuggingFacePipeline.from_model_id(
model_id=model_id,
task="text-generation",
model_kwargs={"torch_dtype": torch.bfloat16},
pipeline_kwargs={"return_full_text": False,
"max_new_tokens": 512,
"repetition_penalty":1.03},
device=0, device_map='cuda')
# print(llm.pipeline)
llm.pipeline.tokenizer.pad_token_id = llm.pipeline.model.config.eos_token_id[0]
return llm
def ollama_():
# model = "cwchang/llama3-taide-lx-8b-chat-alpha1"
model = "llama3.1:latest"
# model = "llama3.1:70b"
# model = "893379029/piccolo-large-zh-v2"
sys = "你是一個來自台灣的 AI 助理,,樂於以台灣人的立場幫助使用者,會用繁體中文回答問題。請用 5 句話以內回答問題。"
# llm = ChatOllama(model=model, num_gpu=2, num_thread=32, temperature=0, system=sys, keep_alive="10m", verbose=True)
llm = ChatOllama(model=model, num_gpu=2, temperature=0, system=sys, keep_alive="10m")
return llm
def openai_(): # not lacal
llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")
return llm
class OllamaChatModel(BaseChatModel):
model_name: str = Field(default="taide-local-llama3")
def _generate(
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> ChatResult:
formatted_messages = []
for msg in messages:
if isinstance(msg, HumanMessage):
formatted_messages.append({"role": "user", "content": msg.content})
elif isinstance(msg, AIMessage):
formatted_messages.append({"role": "assistant", "content": msg.content})
elif isinstance(msg, SystemMessage):
formatted_messages.append({"role": "system", "content": msg.content})
# prompt = f"[INST] <>\n{system_prompt}\n<>\n\n" # TAIDE llama2
prompt = f"<|begin_of_text|><|start_header_id|>{system_prompt}<|end_header_id|>" # TAIDE llama3
for msg in formatted_messages:
if msg['role'] == 'user':
# prompt += f"{msg['content']} [/INST]" # TAIDE llama2
prompt += f"<|eot_id|><|start_header_id|>{msg['content']}<|end_header_id|>" # TAIDE llama3
elif msg['role'] == "assistant":
# prompt += f"{msg['content']} [INST]" # TAIDE llama2
prompt += f"<|eot_id|><|start_header_id|>{msg['content']}<|end_header_id|>" # TAIDE llama3
command = ["docker", "exec", "-it", "ollama", "ollama", "run", self.model_name, prompt]
result = subprocess.run(command, capture_output=True, text=True)
if result.returncode != 0:
raise Exception(f"Ollama command failed: {result.stderr}")
content = result.stdout.strip()
message = AIMessage(content=content)
generation = ChatGeneration(message=message)
return ChatResult(generations=[generation])
@property
def _llm_type(self) -> str:
return "ollama-chat-model"
# taide_llm = OllamaChatModel(model_name="taide-local-llama2")
if __name__ == "__main__":
question = ""
while question.lower() != "exit":
question = input("Question: ")
# 溫室氣體是什麼?
for function in [ollama_, huggingface_, huggingface2_, openai_]:
start = time.time()
llm = function()
answer = llm.invoke(question)
print(answer)
processing_time = time.time() - start
print(processing_time)