import os import sys from dotenv import load_dotenv load_dotenv() sys_path = os.environ.get('SYS_PATH') # from embeddings.llama import Embedder sys.path.append(sys_path) from embeddings.llama import Embedder from transformers import LlamaForCausalLM, LlamaTokenizer import torch from database.es_handler import ElasticSearchData from tqdm import tqdm import pickle class LlamaTransformerEmbeddings: def __init__(self, model_path, ggml=False, output_hidden_states= True): if ggml: self.model_ggml= Embedder(model_path) else: self.tokenizer = LlamaTokenizer.from_pretrained(model_path) self.model = LlamaForCausalLM.from_pretrained(model_path, output_hidden_states=output_hidden_states) def get_embeddings_hf(self, text): inputs = self.tokenizer(text, return_tensors="pt") with torch.no_grad(): outputs = self.model(**inputs) # embeddings = outputs.hidden_states[-1] # avg_embeddings = torch.mean(embeddings, dim=1) # return avg_embeddings[0] # embeddings, _ = torch.max(outputs.hidden_states[-1], dim=1) hidden_states= outputs[2] token_vecs = hidden_states[-2][0] sentence_embedding = torch.mean(token_vecs, dim=0) return sentence_embedding def get_input_embeddings(self, text): inputs = self.tokenizer(text, return_tensors="pt") input_ids = inputs['input_ids'] with torch.no_grad(): input_embeddings = self.model.get_input_embeddings() embeddings = input_embeddings(input_ids) mean = torch.mean(embeddings[0], 0).cpu().numpy() return mean def get_embeddings_ggml(self,text): return self.model_ggml.embed_text_llama(doc=text) def generate_answer(self, prompt): inputs = self.tokenizer(prompt, return_tensors="pt") generate_ids = self.model.generate(inputs.input_ids, max_length=700) return self.tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] # print("Program has ended.") if __name__ == "__main__": # tokenizer = LlamaTokenizer.from_pretrained("../models/tmp/llama-13b-hf") # model = LlamaForCausalLM.from_pretrained("../models/tmp/llama-13b-hf" ,output_hidden_states=True) # text = "Ihr Text für die semantische Suche" # inputs = tokenizer(text, return_tensors="pt") # with torch.no_grad(): # outputs = model(**inputs) # embeddings = outputs.hidden_states # with open('hidden_states.pkl', 'wb') as f: # pickle.dump(embeddings, f) # final_embeddings = embeddings[-1] with open('hidden_states.pkl', 'rb') as f: hidden_states = pickle.load(f) print(len(hidden_states)) ########### WORD EMBEDDINGS############################## # Concatenate the tensors for all layers. We use `stack` here to # create a new dimension in the tensor. # token_embeddings = torch.stack(hidden_states, dim=0) # print(token_embeddings.size()) # # Remove dimension 1, the "batches". # token_embeddings = torch.squeeze(token_embeddings, dim=1) # print(token_embeddings.size()) # # Swap dimensions 0 and 1. # token_embeddings = token_embeddings.permute(1,0,2) # print(token_embeddings.size()) # # Stores the token vectors, with shape [ num_tokens x 20480] # token_vecs_cat = [] # for token in token_embeddings: # # `token` is a [40 x 5120] tensor # # Concatenate the vectors (that is, append them together) from the last # # four layers. # # Each layer vector is 5120 values, so `cat_vec` is length 20480. # cat_vec = torch.cat((token[-1], token[-2], token[-3], token[-4]), dim=0) # # Use `cat_vec` to represent `token`. # token_vecs_cat.append(cat_vec) # print ('Shape is: %d x %d' % (len(token_vecs_cat), len(token_vecs_cat[0]))) ########### WORD EMBEDDINGS############################## ########### SENTENCE EMBEDDINGS############################## token_vecs = hidden_states[-2][0] sentence_embedding = torch.mean(token_vecs, dim=0) # print ("Our final sentence embedding vector of shape:", sentence_embedding.size()) ########### SENTENCE EMBEDDINGS##############################