BA-Chatbot/backend/embeddings/transformer_llama.py

102 lines
4.2 KiB
Python
Executable File

import os
import sys
from dotenv import load_dotenv
load_dotenv()
sys_path = os.environ.get('SYS_PATH')
# from embeddings.llama import Embedder
sys.path.append(sys_path)
from embeddings.llama import Embedder
from transformers import LlamaForCausalLM, LlamaTokenizer
import torch
from database.es_handler import ElasticSearchData
from tqdm import tqdm
import pickle
class LlamaTransformerEmbeddings:
def __init__(self, model_path, ggml=False, output_hidden_states= True):
if ggml:
self.model_ggml= Embedder(model_path)
else:
self.tokenizer = LlamaTokenizer.from_pretrained(model_path)
self.model = LlamaForCausalLM.from_pretrained(model_path, output_hidden_states=output_hidden_states)
def get_embeddings_hf(self, text):
inputs = self.tokenizer(text, return_tensors="pt")
with torch.no_grad():
outputs = self.model(**inputs)
# embeddings = outputs.hidden_states[-1]
# avg_embeddings = torch.mean(embeddings, dim=1)
# return avg_embeddings[0]
# embeddings, _ = torch.max(outputs.hidden_states[-1], dim=1)
hidden_states= outputs[2]
token_vecs = hidden_states[-2][0]
sentence_embedding = torch.mean(token_vecs, dim=0)
return sentence_embedding
def get_input_embeddings(self, text):
inputs = self.tokenizer(text, return_tensors="pt")
input_ids = inputs['input_ids']
with torch.no_grad():
input_embeddings = self.model.get_input_embeddings()
embeddings = input_embeddings(input_ids)
mean = torch.mean(embeddings[0], 0).cpu().numpy()
return mean
def get_embeddings_ggml(self,text):
return self.model_ggml.embed_text_llama(doc=text)
def generate_answer(self, prompt):
inputs = self.tokenizer(prompt, return_tensors="pt")
generate_ids = self.model.generate(inputs.input_ids, max_length=700)
return self.tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
# print("Program has ended.")
if __name__ == "__main__":
# tokenizer = LlamaTokenizer.from_pretrained("../models/tmp/llama-13b-hf")
# model = LlamaForCausalLM.from_pretrained("../models/tmp/llama-13b-hf" ,output_hidden_states=True)
# text = "Ihr Text für die semantische Suche"
# inputs = tokenizer(text, return_tensors="pt")
# with torch.no_grad():
# outputs = model(**inputs)
# embeddings = outputs.hidden_states
# with open('hidden_states.pkl', 'wb') as f:
# pickle.dump(embeddings, f)
# final_embeddings = embeddings[-1]
with open('hidden_states.pkl', 'rb') as f:
hidden_states = pickle.load(f)
print(len(hidden_states))
########### WORD EMBEDDINGS##############################
# Concatenate the tensors for all layers. We use `stack` here to
# create a new dimension in the tensor.
# token_embeddings = torch.stack(hidden_states, dim=0)
# print(token_embeddings.size())
# # Remove dimension 1, the "batches".
# token_embeddings = torch.squeeze(token_embeddings, dim=1)
# print(token_embeddings.size())
# # Swap dimensions 0 and 1.
# token_embeddings = token_embeddings.permute(1,0,2)
# print(token_embeddings.size())
# # Stores the token vectors, with shape [ num_tokens x 20480]
# token_vecs_cat = []
# for token in token_embeddings:
# # `token` is a [40 x 5120] tensor
# # Concatenate the vectors (that is, append them together) from the last
# # four layers.
# # Each layer vector is 5120 values, so `cat_vec` is length 20480.
# cat_vec = torch.cat((token[-1], token[-2], token[-3], token[-4]), dim=0)
# # Use `cat_vec` to represent `token`.
# token_vecs_cat.append(cat_vec)
# print ('Shape is: %d x %d' % (len(token_vecs_cat), len(token_vecs_cat[0])))
########### WORD EMBEDDINGS##############################
########### SENTENCE EMBEDDINGS##############################
token_vecs = hidden_states[-2][0]
sentence_embedding = torch.mean(token_vecs, dim=0)
# print ("Our final sentence embedding vector of shape:", sentence_embedding.size())
########### SENTENCE EMBEDDINGS##############################