19 lines
927 B
Python
19 lines
927 B
Python
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
|
import torch
|
|
|
|
class OpenBuddy:
|
|
def __init__( self, model_path):
|
|
self.model = AutoModelForCausalLM.from_pretrained(model_path,
|
|
device_map="auto",
|
|
trust_remote_code=True,
|
|
torch_dtype=torch.float16)
|
|
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
|
|
self.model.eval()
|
|
def generate_answer(self,prompt):
|
|
input_ids = self.tokenizer.encode(prompt, return_tensors='pt').to('cuda')
|
|
with torch.no_grad():
|
|
output_ids = self.model.generate(
|
|
input_ids=input_ids,
|
|
max_new_tokens=100,
|
|
eos_token_id=self.tokenizer.eos_token_id)
|
|
return self.tokenizer.decode(output_ids[0], skip_special_tokens=True) |