| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119 | from langchain_community.chat_models import ChatOllamafrom langchain_openai import ChatOpenAIfrom transformers import AutoModelForCausalLM, AutoTokenizer,pipelineimport torchfrom langchain_huggingface import HuggingFacePipelinefrom typing import Any, List, Optional, Dictfrom langchain_core.callbacks import CallbackManagerForLLMRunfrom langchain_core.language_models import BaseChatModelfrom langchain_core.messages import BaseMessage, AIMessage, HumanMessage, SystemMessagefrom langchain_core.outputs import ChatResult, ChatGenerationfrom pydantic import Fieldimport subprocessimport timefrom dotenv import load_dotenvload_dotenv()system_prompt: str = "你是一個來自台灣的AI助理,你的名字是 TAIDE,樂於以台灣人的立場幫助使用者,會用繁體中文回答問題。"def hf():    model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"    tokenizer = AutoTokenizer.from_pretrained(model_id)    llm = HuggingFacePipeline.from_model_id(        model_id=model_id,        task="text-generation",        model_kwargs={"torch_dtype": torch.bfloat16},        pipeline_kwargs={"return_full_text": False,            "max_new_tokens": 512,            "repetition_penalty":1.03},        device=0, device_map='cuda')    # print(llm.pipeline)    llm.pipeline.tokenizer.pad_token_id = llm.pipeline.model.config.eos_token_id[0]    return llmdef ollama_():    # model = "cwchang/llama3-taide-lx-8b-chat-alpha1"    model = "llama3.1:latest"    # model = "llama3.1:70b"    # model = "893379029/piccolo-large-zh-v2"    sys = "你是一個來自台灣的 AI 助理,,樂於以台灣人的立場幫助使用者,會用繁體中文回答問題。請用 5 句話以內回答問題。"    # llm = ChatOllama(model=model, num_gpu=2, num_thread=32, temperature=0, system=sys, keep_alive="10m", verbose=True)    llm = ChatOllama(model=model, num_gpu=2, temperature=0, system=sys, keep_alive="10m")    return llmdef openai_(): # not lacal    llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")    return llmclass OllamaChatModel(BaseChatModel):    model_name: str = Field(default="taide-local-llama3")    def _generate(            self,            messages: List[BaseMessage],            stop: Optional[List[str]] = None,            run_manager: Optional[CallbackManagerForLLMRun] = None,            **kwargs: Any,    ) -> ChatResult:        formatted_messages = []        for msg in messages:            if isinstance(msg, HumanMessage):                formatted_messages.append({"role": "user", "content": msg.content})            elif isinstance(msg, AIMessage):                formatted_messages.append({"role": "assistant", "content": msg.content})            elif isinstance(msg, SystemMessage):                 formatted_messages.append({"role": "system", "content": msg.content})        # prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n" # TAIDE llama2        prompt = f"<|begin_of_text|><|start_header_id|>{system_prompt}<|end_header_id|>" # TAIDE llama3        for msg in formatted_messages:            if msg['role'] == 'user':                # prompt += f"{msg['content']} [/INST]" # TAIDE llama2                prompt += f"<|eot_id|><|start_header_id|>{msg['content']}<|end_header_id|>" # TAIDE llama3            elif msg['role'] == "assistant":                # prompt += f"{msg['content']} </s><s>[INST]" # TAIDE llama2                prompt += f"<|eot_id|><|start_header_id|>{msg['content']}<|end_header_id|>" # TAIDE llama3        command = ["docker", "exec", "-it", "ollama", "ollama", "run", self.model_name, prompt]        result = subprocess.run(command, capture_output=True, text=True)        if result.returncode != 0:            raise Exception(f"Ollama command failed: {result.stderr}")                content = result.stdout.strip()        message = AIMessage(content=content)        generation = ChatGeneration(message=message)        return ChatResult(generations=[generation])        @property    def _llm_type(self) -> str:        return "ollama-chat-model"    # taide_llm = OllamaChatModel(model_name="taide-local-llama2")    if __name__ == "__main__":    question = ""    while question.lower() != "exit":         question = input("Question: ")        # 溫室氣體是什麼?        for function in [ollama_, huggingface_, huggingface2_, openai_]:            start = time.time()            llm = function()            answer = llm.invoke(question)            print(answer)            processing_time = time.time() - start            print(processing_time)
 |