# coding=gbk
from langchain.llms.base import LLM
from typing import Any, List, Optional
from langchain.callbacks.manager import CallbackManagerForLLMRun
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch


class ChatGLM_LLM(LLM):
    # ���ڱ��� InternLM �Զ��� LLM ��
    tokenizer: AutoTokenizer = None
    model: AutoModelForCausalLM = None

    def __init__(self, model_path: str):
        # model_path: InternLM ģ��·��
        # �ӱ��س�ʼ��ģ��
        super().__init__()
        print("���ڴӱ��ؼ���ģ��...")
        self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
        self.model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True).to(torch.bfloat16).cuda(
            device=1)
        self.model = self.model.eval()
        print("��ɱ���ģ�͵ļ���")

    def _call(self, prompt: str, stop: Optional[List[str]] = None,
              run_manager: Optional[CallbackManagerForLLMRun] = None,
              **kwargs: Any):
        # ��д���ú���
        response, history = self.model.chat(self.tokenizer, prompt, history=[], do_sample=False)
        return response


    @property
    def _llm_type(self) -> str:
        return "ChatGLM3-6B"