from langchain_community.chat_models import ChatOllama from langchain_openai import ChatOpenAI from transformers import AutoModelForCausalLM, AutoTokenizer,pipeline import torch from langchain_huggingface import HuggingFacePipeline from typing import Any, List, Optional, Dict from langchain_core.callbacks import CallbackManagerForLLMRun from langchain_core.language_models import BaseChatModel from langchain_core.messages import BaseMessage, AIMessage, HumanMessage, SystemMessage from langchain_core.outputs import ChatResult, ChatGeneration from pydantic import Field import subprocess import time from dotenv import load_dotenv load_dotenv() system_prompt: str = "你是一個來自台灣的AI助理,你的名字是 TAIDE,樂於以台灣人的立場幫助使用者,會用繁體中文回答問題。" def hf(): model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_id) llm = HuggingFacePipeline.from_model_id( model_id=model_id, task="text-generation", model_kwargs={"torch_dtype": torch.bfloat16}, pipeline_kwargs={"return_full_text": False, "max_new_tokens": 512, "repetition_penalty":1.03}, device=0, device_map='cuda') # print(llm.pipeline) llm.pipeline.tokenizer.pad_token_id = llm.pipeline.model.config.eos_token_id[0] return llm def ollama_(): # model = "cwchang/llama3-taide-lx-8b-chat-alpha1" model = "llama3.1:latest" # model = "llama3.1:70b" # model = "893379029/piccolo-large-zh-v2" sys = "你是一個來自台灣的 AI 助理,,樂於以台灣人的立場幫助使用者,會用繁體中文回答問題。請用 5 句話以內回答問題。" # llm = ChatOllama(model=model, num_gpu=2, num_thread=32, temperature=0, system=sys, keep_alive="10m", verbose=True) llm = ChatOllama(model=model, num_gpu=2, temperature=0, system=sys, keep_alive="10m") return llm def openai_(): # not lacal llm = ChatOpenAI(temperature=0, model="gpt-4o-mini") return llm class OllamaChatModel(BaseChatModel): model_name: str = Field(default="taide-local-llama3") def _generate( self, messages: List[BaseMessage], stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> ChatResult: formatted_messages = [] for msg in messages: if isinstance(msg, HumanMessage): formatted_messages.append({"role": "user", "content": msg.content}) elif isinstance(msg, AIMessage): formatted_messages.append({"role": "assistant", "content": msg.content}) elif isinstance(msg, SystemMessage): formatted_messages.append({"role": "system", "content": msg.content}) # prompt = f"[INST] <>\n{system_prompt}\n<>\n\n" # TAIDE llama2 prompt = f"<|begin_of_text|><|start_header_id|>{system_prompt}<|end_header_id|>" # TAIDE llama3 for msg in formatted_messages: if msg['role'] == 'user': # prompt += f"{msg['content']} [/INST]" # TAIDE llama2 prompt += f"<|eot_id|><|start_header_id|>{msg['content']}<|end_header_id|>" # TAIDE llama3 elif msg['role'] == "assistant": # prompt += f"{msg['content']} [INST]" # TAIDE llama2 prompt += f"<|eot_id|><|start_header_id|>{msg['content']}<|end_header_id|>" # TAIDE llama3 command = ["docker", "exec", "-it", "ollama", "ollama", "run", self.model_name, prompt] result = subprocess.run(command, capture_output=True, text=True) if result.returncode != 0: raise Exception(f"Ollama command failed: {result.stderr}") content = result.stdout.strip() message = AIMessage(content=content) generation = ChatGeneration(message=message) return ChatResult(generations=[generation]) @property def _llm_type(self) -> str: return "ollama-chat-model" # taide_llm = OllamaChatModel(model_name="taide-local-llama2") if __name__ == "__main__": question = "" while question.lower() != "exit": question = input("Question: ") # 溫室氣體是什麼? for function in [ollama_, huggingface_, huggingface2_, openai_]: start = time.time() llm = function() answer = llm.invoke(question) print(answer) processing_time = time.time() - start print(processing_time)