knn und embedding python

main
Kai Sellmann 2026-01-08 16:23:29 +01:00
commit 4cd6a06c35
3 changed files with 1715 additions and 0 deletions

581
embedding_modul.py 100644
View File

@ -0,0 +1,581 @@
"""
Embedding-Modul für Textdokumente mit Datenbank-Integration
Unterstützt PDF, TXT, DOCX und andere Textformate
Speichert Embeddings in Datenbank oder als Fallback in Datei
Kommuniziert mit HNSW-Modul über localhost
"""
import os
import pickle
import numpy as np
import requests
import json
from typing import List, Dict, Tuple, Optional
from pathlib import Path
from dataclasses import dataclass, asdict
from datetime import datetime
# Text-Extraktion
import PyPDF2
from docx import Document
# Embedding-Modelle
from sentence_transformers import SentenceTransformer
@dataclass
class DocumentChunk:
"""Repräsentiert einen Text-Chunk mit Metadaten"""
text: str
embedding: np.ndarray
source_file: str
chunk_index: int
metadata: Dict = None
def to_dict(self) -> Dict:
"""Konvertiert zu Dictionary für Speicherung/API"""
return {
'text': self.text,
'embedding': self.embedding.tolist(), # NumPy → Liste für JSON
'source_file': self.source_file,
'chunk_index': self.chunk_index,
'metadata': self.metadata
}
class StorageManager:
"""Verwaltet Speicherung von Embeddings (Datenbank oder Datei)"""
def __init__(self, storage_dir: str = "./embeddings_storage"):
"""
Initialisiert Storage Manager
Args:
storage_dir: Verzeichnis für Datei-Fallback
"""
self.storage_dir = Path(storage_dir)
self.storage_dir.mkdir(exist_ok=True)
self.db_available = self._check_database_connection()
def _check_database_connection(self) -> bool:
"""
Prüft, ob Datenbank-Verbindung verfügbar ist
Returns:
True wenn verbunden, False sonst
"""
# TODO: Hier Datenbank-Verbindung prüfen
# Beispiel für PostgreSQL/MySQL:
# try:
# import psycopg2
# conn = psycopg2.connect(...)
# return True
# except:
# return False
print("⚠️ Keine Datenbank konfiguriert - verwende Datei-Speicherung")
return False
def save_to_database(self, chunks: List[DocumentChunk]) -> bool:
"""
Speichert Chunks in Datenbank
Args:
chunks: Liste von DocumentChunk-Objekten
Returns:
True bei Erfolg, False bei Fehler
"""
if not self.db_available:
return False
try:
# TODO: Implementiere Datenbank-Speicherung
# Beispiel:
# for chunk in chunks:
# cursor.execute(
# "INSERT INTO embeddings (text, vector, source, chunk_index) VALUES (%s, %s, %s, %s)",
# (chunk.text, chunk.embedding.tolist(), chunk.source_file, chunk.chunk_index)
# )
# conn.commit()
print(f"{len(chunks)} Chunks in Datenbank gespeichert")
return True
except Exception as e:
print(f"✗ Datenbank-Fehler: {e}")
return False
def save_to_file(self, chunks: List[DocumentChunk], filename: Optional[str] = None) -> str:
"""
Speichert Chunks in Datei (Fallback)
Args:
chunks: Liste von DocumentChunk-Objekten
filename: Optional - Dateiname, sonst timestamp-basiert
Returns:
Pfad zur gespeicherten Datei
"""
if filename is None:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
source = Path(chunks[0].source_file).stem
filename = f"embeddings_{source}_{timestamp}.pkl"
filepath = self.storage_dir / filename
with open(filepath, 'wb') as f:
pickle.dump(chunks, f)
print(f"{len(chunks)} Chunks gespeichert in: {filepath}")
return str(filepath)
def save(self, chunks: List[DocumentChunk]) -> Dict[str, any]:
"""
Speichert Chunks - versucht erst Datenbank, dann Datei
Args:
chunks: Liste von DocumentChunk-Objekten
Returns:
Dictionary mit Status und Speicherort
"""
result = {
'success': False,
'storage_type': None,
'location': None,
'chunks_count': len(chunks)
}
# Versuch 1: Datenbank
if self.db_available:
if self.save_to_database(chunks):
result['success'] = True
result['storage_type'] = 'database'
result['location'] = 'database'
return result
# Versuch 2: Datei (Fallback)
filepath = self.save_to_file(chunks)
result['success'] = True
result['storage_type'] = 'file'
result['location'] = filepath
return result
def load_from_file(self, filepath: str) -> List[DocumentChunk]:
"""
Lädt Chunks aus Datei
Args:
filepath: Pfad zur Datei
Returns:
Liste von DocumentChunk-Objekten
"""
with open(filepath, 'rb') as f:
chunks = pickle.load(f)
print(f"{len(chunks)} Chunks geladen aus: {filepath}")
return chunks
class HNSWConnector:
"""Verbindung zum HNSW-Modul über localhost"""
def __init__(self, host: str = "localhost", port: int = 8000):
"""
Initialisiert HNSW-Connector
Args:
host: Hostname des HNSW-Servers
port: Port des HNSW-Servers
"""
self.base_url = f"http://{host}:{port}"
self.connected = self._check_connection()
def _check_connection(self) -> bool:
"""
Prüft Verbindung zum HNSW-Modul
Returns:
True wenn verbunden, False sonst
"""
try:
response = requests.get(f"{self.base_url}/health", timeout=2)
if response.status_code == 200:
print(f"✓ HNSW-Modul erreichbar: {self.base_url}")
return True
except requests.exceptions.RequestException:
pass
print(f"✗ HNSW-Modul nicht erreichbar: {self.base_url}")
return False
def send_embeddings(self, chunks: List[DocumentChunk]) -> Dict[str, any]:
"""
Sendet Embeddings an HNSW-Modul
Args:
chunks: Liste von DocumentChunk-Objekten
Returns:
Dictionary mit Status und Antwort
"""
if not self.connected:
return {
'success': False,
'error': 'HNSW-Modul nicht erreichbar',
'message': f'Keine Verbindung zu {self.base_url}'
}
try:
# Konvertiere Chunks zu JSON-Format
payload = {
'chunks': [chunk.to_dict() for chunk in chunks],
'timestamp': datetime.now().isoformat(),
'source_file': chunks[0].source_file
}
# Sende POST-Request
response = requests.post(
f"{self.base_url}/api/embeddings/add",
json=payload,
timeout=30
)
if response.status_code == 200:
result = response.json()
print(f"{len(chunks)} Chunks an HNSW-Modul gesendet")
return {
'success': True,
'response': result
}
else:
return {
'success': False,
'error': f'HTTP {response.status_code}',
'message': response.text
}
except requests.exceptions.Timeout:
return {
'success': False,
'error': 'Timeout',
'message': 'HNSW-Modul antwortet nicht (Timeout)'
}
except Exception as e:
return {
'success': False,
'error': 'Exception',
'message': str(e)
}
def retry_connection(self, max_retries: int = 3) -> bool:
"""
Versucht erneut, Verbindung herzustellen
Args:
max_retries: Maximale Anzahl Versuche
Returns:
True wenn verbunden, False sonst
"""
print(f"Versuche Verbindung zu HNSW-Modul ({max_retries} Versuche)...")
for i in range(max_retries):
print(f" Versuch {i+1}/{max_retries}...")
if self._check_connection():
self.connected = True
return True
if i < max_retries - 1:
import time
time.sleep(2)
self.connected = False
return False
class TextExtractor:
"""Extrahiert Text aus verschiedenen Dateiformaten"""
@staticmethod
def extract_from_pdf(filepath: str) -> str:
text = ""
try:
with open(filepath, 'rb') as file:
pdf_reader = PyPDF2.PdfReader(file)
for page in pdf_reader.pages:
text += page.extract_text() + "\n"
except Exception as e:
raise Exception(f"Fehler beim Lesen der PDF: {e}")
return text.strip()
@staticmethod
def extract_from_txt(filepath: str, encoding: str = 'utf-8') -> str:
try:
with open(filepath, 'r', encoding=encoding) as file:
return file.read()
except UnicodeDecodeError:
for enc in ['latin-1', 'cp1252', 'iso-8859-1']:
try:
with open(filepath, 'r', encoding=enc) as file:
return file.read()
except:
continue
raise Exception("Konnte Datei mit keinem Encoding lesen")
@staticmethod
def extract_from_docx(filepath: str) -> str:
try:
doc = Document(filepath)
text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
return text.strip()
except Exception as e:
raise Exception(f"Fehler beim Lesen der DOCX: {e}")
@staticmethod
def extract_text(filepath: str) -> str:
file_ext = Path(filepath).suffix.lower()
if file_ext == '.pdf':
return TextExtractor.extract_from_pdf(filepath)
elif file_ext == '.txt':
return TextExtractor.extract_from_txt(filepath)
elif file_ext in ['.docx', '.doc']:
return TextExtractor.extract_from_docx(filepath)
else:
raise ValueError(f"Nicht unterstütztes Dateiformat: {file_ext}")
class TextChunker:
"""Teilt Text in sinnvolle Chunks auf"""
@staticmethod
def chunk_by_tokens(text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]:
words = text.split()
chunks = []
for i in range(0, len(words), chunk_size - overlap):
chunk = ' '.join(words[i:i + chunk_size])
if chunk.strip():
chunks.append(chunk)
return chunks
@staticmethod
def chunk_by_sentences(text: str, sentences_per_chunk: int = 5) -> List[str]:
sentences = text.replace('!', '.').replace('?', '.').split('.')
sentences = [s.strip() for s in sentences if s.strip()]
chunks = []
for i in range(0, len(sentences), sentences_per_chunk):
chunk = '. '.join(sentences[i:i + sentences_per_chunk])
if chunk.strip():
chunks.append(chunk + '.')
return chunks
class EmbeddingGenerator:
"""Erzeugt Embeddings aus Text-Chunks"""
def __init__(self, model_name: str = 'all-MiniLM-L6-v2'):
print(f"Lade Embedding-Modell: {model_name}")
self.model = SentenceTransformer(model_name)
self.embedding_dimension = self.model.get_sentence_embedding_dimension()
print(f"Modell geladen. Embedding-Dimension: {self.embedding_dimension}")
def generate_embeddings(self, texts: List[str], batch_size: int = 32) -> np.ndarray:
print(f"Erzeuge Embeddings für {len(texts)} Text-Chunks...")
embeddings = self.model.encode(
texts,
batch_size=batch_size,
show_progress_bar=True,
convert_to_numpy=True
)
return embeddings
class DocumentEmbedder:
"""Hauptklasse für das Embedding von Dokumenten mit Storage & HNSW-Integration"""
def __init__(
self,
model_name: str = 'all-MiniLM-L6-v2',
storage_dir: str = "./embeddings_storage",
hnsw_host: str = "localhost",
hnsw_port: int = 8000
):
"""
Initialisiert den Document Embedder mit Storage und HNSW-Anbindung
Args:
model_name: Name des Embedding-Modells
storage_dir: Verzeichnis für Datei-Speicherung
hnsw_host: Hostname des HNSW-Servers
hnsw_port: Port des HNSW-Servers
"""
self.text_extractor = TextExtractor()
self.chunker = TextChunker()
self.embedding_generator = EmbeddingGenerator(model_name)
self.storage_manager = StorageManager(storage_dir)
self.hnsw_connector = HNSWConnector(hnsw_host, hnsw_port)
def process_file(
self,
filepath: str,
chunk_method: str = 'tokens',
chunk_size: int = 500,
overlap: int = 50,
auto_store: bool = True,
send_to_hnsw: bool = True
) -> Dict[str, any]:
"""
Verarbeitet eine Datei komplett: Embedding Storage HNSW
Args:
filepath: Pfad zur Datei
chunk_method: 'tokens' oder 'sentences'
chunk_size: Größe der Chunks
overlap: Überlappung zwischen Chunks
auto_store: Automatisch speichern nach Embedding
send_to_hnsw: An HNSW-Modul senden nach Speicherung
Returns:
Dictionary mit Status aller Schritte
"""
print(f"\n{'='*60}")
print(f"Verarbeite Datei: {filepath}")
print(f"{'='*60}")
result = {
'file': filepath,
'embedding': {'success': False},
'storage': {'success': False},
'hnsw': {'success': False}
}
try:
# 1. Text extrahieren
print("1. Extrahiere Text...")
text = self.text_extractor.extract_text(filepath)
print(f"{len(text)} Zeichen extrahiert")
# 2. Text in Chunks aufteilen
print("2. Teile Text in Chunks...")
if chunk_method == 'tokens':
chunks_text = self.chunker.chunk_by_tokens(text, chunk_size, overlap)
else:
chunks_text = self.chunker.chunk_by_sentences(text, chunk_size)
print(f"{len(chunks_text)} Chunks erstellt")
# 3. Embeddings erzeugen
print("3. Erzeuge Embeddings...")
embeddings = self.embedding_generator.generate_embeddings(chunks_text)
# 4. DocumentChunk-Objekte erstellen
document_chunks = []
for i, (chunk_text, embedding) in enumerate(zip(chunks_text, embeddings)):
doc_chunk = DocumentChunk(
text=chunk_text,
embedding=embedding,
source_file=filepath,
chunk_index=i,
metadata={
'total_chunks': len(chunks_text),
'chunk_method': chunk_method,
'file_size': os.path.getsize(filepath)
}
)
document_chunks.append(doc_chunk)
result['embedding'] = {
'success': True,
'chunks_count': len(document_chunks)
}
print(f"✓ Embedding abgeschlossen: {len(document_chunks)} Chunks")
# 5. Speichern (wenn gewünscht)
if auto_store:
print("\n4. Speichere Embeddings...")
storage_result = self.storage_manager.save(document_chunks)
result['storage'] = storage_result
if not storage_result['success']:
print("✗ Speicherung fehlgeschlagen")
return result
# 6. An HNSW senden (wenn gewünscht)
if send_to_hnsw:
print("\n5. Sende an HNSW-Modul...")
hnsw_result = self.hnsw_connector.send_embeddings(document_chunks)
result['hnsw'] = hnsw_result
if not hnsw_result['success']:
print(f"✗ HNSW-Fehler: {hnsw_result.get('message', 'Unbekannt')}")
print("\n Tipp: Starte das HNSW-Modul mit:")
print(f" python hnsw_server.py --port {self.hnsw_connector.base_url.split(':')[-1]}")
print(f"\n{'='*60}")
print("✓ VERARBEITUNG ABGESCHLOSSEN")
print(f"{'='*60}")
self._print_summary(result)
return result
except Exception as e:
print(f"\n✗ FEHLER: {e}")
result['error'] = str(e)
return result
def _print_summary(self, result: Dict):
"""Gibt Zusammenfassung der Verarbeitung aus"""
print("\n📊 ZUSAMMENFASSUNG:")
print(f" • Embedding: {'' if result['embedding']['success'] else ''}")
if result['storage']['success']:
print(f" • Storage: ✓ ({result['storage']['storage_type']})")
if result['storage']['storage_type'] == 'file':
print(f"{result['storage']['location']}")
else:
print(f" • Storage: ✗")
if result['hnsw']['success']:
print(f" • HNSW: ✓ Gesendet")
else:
print(f" • HNSW: ✗ {result['hnsw'].get('error', 'Fehler')}")
# Beispiel-Verwendung
if __name__ == "__main__":
# Embedder initialisieren
embedder = DocumentEmbedder(
model_name='all-MiniLM-L6-v2',
storage_dir='./embeddings_storage',
hnsw_host='localhost',
hnsw_port=8000
)
# Datei verarbeiten
filepath = "beispiel_dokument.pdf"
if os.path.exists(filepath):
result = embedder.process_file(
filepath=filepath,
chunk_size=500,
overlap=50,
auto_store=True, # Automatisch speichern
send_to_hnsw=True # An HNSW senden
)
else:
print(f"Datei nicht gefunden: {filepath}")
print("\nErstelle Test-Datei...")
with open("test.txt", "w", encoding="utf-8") as f:
f.write("Das ist ein Beispieltext für das Embedding-Modul. " * 100)
result = embedder.process_file(
filepath="test.txt",
auto_store=True,
send_to_hnsw=True
)

491
faiss_api_v4.py 100644
View File

@ -0,0 +1,491 @@
from flask import Flask, request, jsonify
import numpy as np
import faiss
from typing import List, Dict, Optional
class FAISSSearch:
"""
HNSW-basierte Vektorsuche mit FAISS
Schnelle approximate nearest neighbor search
"""
def __init__(self, dim: int, metric: str = 'cosine', max_elements: int = 10000):
"""
Initialisiert FAISS Index
Args:
dim: Vektor-Dimension
metric: 'cosine', 'l2' (euclidean), oder 'ip' (inner product)
max_elements: Maximale Anzahl von Vektoren (für HNSW)
"""
self.dim = dim
self.metric = metric
self.max_elements = max_elements
self.index = None
self.metadata_store = {} # Speichert Text und Metadata
self.current_id = 0
self.is_initialized = False
self.id_map = [] # Mapping von FAISS Index Position zu Custom ID
def init_index(self, M: int = 32, ef_construction: int = 200, ef_search: int = 50):
"""
Erstellt neuen FAISS HNSW Index
Args:
M: Anzahl der Verbindungen pro Knoten (32 = standard, höher = besser)
ef_construction: Suchbereich beim Index-Aufbau (200 = standard)
ef_search: Standard-Suchbereich für Queries (50 = standard)
"""
# Erstelle HNSW Index basierend auf Metrik
if self.metric == 'cosine':
# Für Cosine: verwende IP (Inner Product) mit normalisierten Vektoren
self.index = faiss.IndexHNSWFlat(self.dim, M, faiss.METRIC_INNER_PRODUCT)
self.normalize = True
elif self.metric == 'l2':
# Für L2 (Euclidean)
self.index = faiss.IndexHNSWFlat(self.dim, M, faiss.METRIC_L2)
self.normalize = False
elif self.metric == 'ip':
# Inner Product ohne Normalisierung
self.index = faiss.IndexHNSWFlat(self.dim, M, faiss.METRIC_INNER_PRODUCT)
self.normalize = False
else:
raise ValueError(f"Unbekannte Metrik: {self.metric}")
# Setze HNSW Parameter
self.index.hnsw.efConstruction = ef_construction
self.index.hnsw.efSearch = ef_search
self.is_initialized = True
def add_items(self, vectors: List[List[float]], texts: List[str] = None,
metadata: List[Dict] = None, ids: List[int] = None):
"""
Fügt Vektoren zum Index hinzu
Args:
vectors: Liste von Vektoren
texts: Optionale Texte zu den Vektoren
metadata: Optionale Metadata zu den Vektoren
ids: Optionale IDs (werden automatisch vergeben wenn None)
"""
if not self.is_initialized:
raise RuntimeError("Index muss zuerst initialisiert werden (init_index)")
vectors_np = np.array(vectors, dtype=np.float32)
# Prüfe Dimensionen
if vectors_np.shape[1] != self.dim:
raise ValueError(
f"Vektor-Dimension {vectors_np.shape[1]} stimmt nicht mit Index-Dimension {self.dim} überein"
)
# Normalisiere Vektoren für Cosine Similarity
if self.normalize:
faiss.normalize_L2(vectors_np)
# Generiere IDs falls nicht vorhanden
if ids is None:
ids = list(range(self.current_id, self.current_id + len(vectors)))
self.current_id += len(vectors)
# Speichere ID-Mapping (FAISS Position -> Custom ID)
start_idx = self.index.ntotal
for i, custom_id in enumerate(ids):
self.id_map.append(custom_id)
# Speichere Metadata
self.metadata_store[custom_id] = {
'id': custom_id,
'text': texts[i] if texts and i < len(texts) else '',
'metadata': metadata[i] if metadata and i < len(metadata) else {}
}
# Füge Vektoren zum FAISS Index hinzu
self.index.add(vectors_np)
def search(self, query_vector: List[float], k: int = 5, ef_search: int = None) -> List[Dict]:
"""
Sucht die k ähnlichsten Vektoren
Args:
query_vector: Suchvektor
k: Anzahl der Ergebnisse
ef_search: Suchbereich (höher = genauer aber langsamer, None = use default)
Returns:
Liste von Ergebnis-Dictionaries mit id, score, text, metadata
"""
if not self.is_initialized:
raise RuntimeError("Index muss zuerst initialisiert werden")
if self.index.ntotal == 0:
return []
# Setze ef_search falls angegeben
if ef_search is not None:
self.index.hnsw.efSearch = max(ef_search, k)
query_np = np.array([query_vector], dtype=np.float32)
# Normalisiere Query-Vektor für Cosine
if self.normalize:
faiss.normalize_L2(query_np)
# Suche durchführen
k_actual = min(k, self.index.ntotal) # Nicht mehr als verfügbar
distances, indices = self.index.search(query_np, k_actual)
# Ergebnisse zusammenstellen
results = []
for idx, distance in zip(indices[0], distances[0]):
if idx == -1: # FAISS gibt -1 zurück wenn keine weiteren Ergebnisse
continue
# Hole Custom ID aus Mapping
custom_id = self.id_map[idx]
# Konvertiere Distance zu Score
if self.metric == 'cosine' or self.metric == 'ip':
# Bei Inner Product/Cosine: höher = ähnlicher
score = float(distance)
else: # l2
# Bei L2: kleinere Distance = ähnlicher
score = float(1.0 / (1.0 + distance))
result = {
'id': int(custom_id),
'score': score,
'distance': float(distance),
**self.metadata_store.get(custom_id, {})
}
results.append(result)
return results
def get_stats(self) -> Dict:
"""Gibt Statistiken über den Index zurück mit umfangreichen Prüfungen"""
# Basis-Validierung: Objekt vollständig initialisiert?
if not hasattr(self, 'dim') or not hasattr(self, 'metric'):
return {
'initialized': False,
'error': 'Objekt nicht vollständig initialisiert'
}
# Prüfe ob Index initialisiert wurde
if not self.is_initialized or self.index is None:
return {
'initialized': False,
'dimension': self.dim,
'metric': self.metric
}
# Basis-Stats sammeln
stats = {
'initialized': True,
'dimension': self.dim,
'metric': self.metric,
'max_elements': self.max_elements
}
# Sichere Abfrage: Anzahl Vektoren in FAISS
try:
stats['current_count'] = self.index.ntotal
except (AttributeError, RuntimeError) as e:
stats['current_count'] = 0
stats['warning'] = f'Konnte current_count nicht abrufen: {str(e)}'
# Sichere Abfrage: Metadata Count
try:
stats['metadata_count'] = len(self.metadata_store)
except (AttributeError, TypeError):
stats['metadata_count'] = 0
# Konsistenz-Check: FAISS vs Metadata
if 'current_count' in stats and 'metadata_count' in stats:
if stats['current_count'] != stats['metadata_count']:
stats['consistency_warning'] = (
f"Inkonsistenz erkannt: {stats['current_count']} Vektoren in FAISS, "
f"aber {stats['metadata_count']} Einträge in Metadata-Store"
)
# HNSW-spezifische Parameter (mit Fehlerbehandlung)
try:
if hasattr(self.index, 'hnsw') and self.index.hnsw is not None:
stats['M'] = self.index.hnsw.M
stats['ef_construction'] = self.index.hnsw.efConstruction
stats['ef_search'] = self.index.hnsw.efSearch
else:
stats['M'] = 'N/A'
stats['ef_construction'] = 'N/A'
stats['ef_search'] = 'N/A'
except (AttributeError, RuntimeError) as e:
stats['M'] = 'N/A'
stats['ef_construction'] = 'N/A'
stats['ef_search'] = 'N/A'
stats['hnsw_warning'] = f'HNSW-Parameter nicht verfügbar: {str(e)}'
# Speicher-Schätzung (Vektoren im RAM)
try:
if stats['current_count'] > 0:
# float32 = 4 bytes pro Dimension
vector_memory_mb = (stats['current_count'] * self.dim * 4) / (1024 * 1024)
stats['estimated_memory_mb'] = round(vector_memory_mb, 2)
except (KeyError, ZeroDivisionError, TypeError):
stats['estimated_memory_mb'] = 'N/A'
# Kapazitäts-Warnung
try:
if stats['current_count'] >= self.max_elements * 0.9:
stats['capacity_warning'] = (
f"Index ist zu {(stats['current_count'] / self.max_elements * 100):.1f}% gefüllt. "
f"Bald Kapazitätsgrenze erreicht!"
)
except (KeyError, ZeroDivisionError, TypeError):
pass
return stats
# Flask App
app = Flask(__name__)
# Globaler FAISS Index
faiss_index: Optional[FAISSSearch] = None
@app.route('/health', methods=['GET'])
def health():
"""Health Check Endpoint"""
return jsonify({
'status': 'ok',
'service': 'FAISS HNSW Search API',
'version': '3.0',
'library': 'FAISS',
'index_initialized': faiss_index is not None and faiss_index.is_initialized
})
@app.route('/init', methods=['POST'])
def init_index():
"""
Initialisiert einen neuen FAISS HNSW Index
Body:
{
"dim": 128,
"metric": "cosine", // optional: "cosine", "l2", "ip"
"max_elements": 10000, // optional (für Info)
"M": 32, // optional (Standard: 32)
"ef_construction": 200, // optional
"ef_search": 50 // optional
}
"""
global faiss_index
try:
data = request.get_json()
if not data or 'dim' not in data:
return jsonify({
'success': False,
'error': 'Feld "dim" (Dimension) ist erforderlich'
}), 400
dim = data['dim']
metric = data.get('metric', 'cosine')
max_elements = data.get('max_elements', 10000)
M = data.get('M', 32)
ef_construction = data.get('ef_construction', 200)
ef_search = data.get('ef_search', 50)
# Erstelle neuen Index
faiss_index = FAISSSearch(dim=dim, metric=metric, max_elements=max_elements)
faiss_index.init_index(M=M, ef_construction=ef_construction, ef_search=ef_search)
return jsonify({
'success': True,
'message': 'FAISS HNSW Index erfolgreich initialisiert',
'stats': faiss_index.get_stats()
}), 200
except Exception as e:
return jsonify({
'success': False,
'error': f'Fehler beim Initialisieren: {str(e)}'
}), 500
@app.route('/add', methods=['POST'])
def add_vectors():
"""
Fügt Vektoren zum Index hinzu
Body:
{
"vectors": [[0.1, 0.2, ...], [0.3, 0.4, ...]],
"texts": ["Text 1", "Text 2"], // optional
"metadata": [{"key": "value"}, {}], // optional
"ids": [1, 2] // optional
}
"""
global faiss_index
try:
if faiss_index is None or not faiss_index.is_initialized:
return jsonify({
'success': False,
'error': 'Index muss zuerst initialisiert werden (/init)'
}), 400
data = request.get_json()
if not data or 'vectors' not in data:
return jsonify({
'success': False,
'error': 'Feld "vectors" ist erforderlich'
}), 400
vectors = data['vectors']
texts = data.get('texts', None)
metadata = data.get('metadata', None)
ids = data.get('ids', None)
if not isinstance(vectors, list) or len(vectors) == 0:
return jsonify({
'success': False,
'error': 'vectors muss eine nicht-leere Liste sein'
}), 400
# Füge Vektoren hinzu
faiss_index.add_items(vectors=vectors, texts=texts, metadata=metadata, ids=ids)
return jsonify({
'success': True,
'message': f'{len(vectors)} Vektoren erfolgreich hinzugefügt',
'stats': faiss_index.get_stats()
}), 200
except Exception as e:
return jsonify({
'success': False,
'error': f'Fehler beim Hinzufügen: {str(e)}'
}), 500
@app.route('/search', methods=['POST'])
def search_vectors():
"""
Sucht ähnliche Vektoren im Index
Body:
{
"query_vector": [0.1, 0.2, ...],
"k": 5, // optional, default: 5
"ef_search": 50 // optional (Suchgenauigkeit)
}
"""
global faiss_index
try:
if faiss_index is None or not faiss_index.is_initialized:
return jsonify({
'success': False,
'error': 'Index muss zuerst initialisiert werden (/init)'
}), 400
data = request.get_json()
if not data or 'query_vector' not in data:
return jsonify({
'success': False,
'error': 'Feld "query_vector" ist erforderlich'
}), 400
query_vector = data['query_vector']
k = data.get('k', 5)
ef_search = data.get('ef_search', None)
if not isinstance(query_vector, list):
return jsonify({
'success': False,
'error': 'query_vector muss eine Liste sein'
}), 400
# Suche durchführen
results = faiss_index.search(query_vector=query_vector, k=k, ef_search=ef_search)
return jsonify({
'success': True,
'query_vector_dim': len(query_vector),
'k': k,
'results_count': len(results),
'results': results
}), 200
except Exception as e:
return jsonify({
'success': False,
'error': f'Fehler bei der Suche: {str(e)}'
}), 500
@app.route('/stats', methods=['GET'])
def get_stats():
"""
Gibt Statistiken über den Index zurück (mit umfassender Fehlerbehandlung)
"""
try:
global faiss_index
# Prüfe ob Index-Objekt existiert
if faiss_index is None:
return jsonify({
'success': True,
'stats': {
'initialized': False,
'message': 'Index wurde noch nicht erstellt. Bitte /init aufrufen.'
}
}), 200
# Hole Stats mit integrierter Fehlerbehandlung
stats = faiss_index.get_stats()
# Prüfe ob Stats-Abruf Fehler enthält
if 'error' in stats:
return jsonify({
'success': False,
'error': stats['error'],
'stats': stats
}), 500
return jsonify({
'success': True,
'stats': stats
}), 200
except AttributeError as e:
return jsonify({
'success': False,
'error': f'Attributfehler beim Abrufen der Statistiken: {str(e)}',
'stats': {'initialized': False}
}), 500
except RuntimeError as e:
return jsonify({
'success': False,
'error': f'Laufzeitfehler beim Abrufen der Statistiken: {str(e)}',
'stats': {'initialized': False}
}), 500
except Exception as e:
return jsonify({
'success': False,
'error': f'Unerwarteter Fehler beim Abrufen der Statistiken: {str(e)}',
'stats': {'initialized': False}
}), 500
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=5000)

643
visu_new.html 100644
View File

@ -0,0 +1,643 @@
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Distanzmetriken Visualisierung</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
background: #f5f5f5;
padding: 20px;
line-height: 1.6;
}
.container {
max-width: 1400px;
margin: 0 auto;
}
.header {
text-align: center;
margin-bottom: 40px;
background: white;
padding: 30px;
border-radius: 10px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}
h1 {
color: #333;
margin-bottom: 10px;
}
.subtitle {
color: #666;
}
.highlight-box {
background: #fff3cd;
border-left: 4px solid #ffc107;
padding: 20px;
margin: 20px 0;
border-radius: 8px;
}
.highlight-box h3 {
color: #856404;
margin-bottom: 10px;
}
.highlight-box p {
color: #856404;
font-size: 14px;
}
.controls {
background: white;
padding: 30px;
border-radius: 10px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
margin-bottom: 30px;
}
.controls h2 {
margin-bottom: 20px;
color: #333;
}
.control-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 40px;
}
.vector-controls {
padding: 20px;
border-radius: 8px;
}
.vector-controls.v1 {
background: #e3f2fd;
border-left: 4px solid #2196F3;
}
.vector-controls.v2 {
background: #ffebee;
border-left: 4px solid #f44336;
}
.vector-controls h3 {
margin-bottom: 20px;
}
.control-group {
margin-bottom: 20px;
}
.control-group label {
display: block;
margin-bottom: 8px;
font-weight: 500;
color: #555;
}
input[type="range"] {
width: 100%;
height: 6px;
border-radius: 3px;
background: #ddd;
outline: none;
}
input[type="range"]::-webkit-slider-thumb {
-webkit-appearance: none;
appearance: none;
width: 20px;
height: 20px;
border-radius: 50%;
background: #2196F3;
cursor: pointer;
}
.viz-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 30px;
margin-bottom: 30px;
}
.viz-card {
background: white;
padding: 30px;
border-radius: 10px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}
.viz-card h3 {
margin-bottom: 20px;
font-size: 24px;
}
.viz-card.euclidean h3 {
color: #9c27b0;
}
.viz-card.cosine h3 {
color: #4caf50;
}
.svg-container {
background: #fafafa;
border: 1px solid #e0e0e0;
border-radius: 8px;
padding: 10px;
margin-bottom: 20px;
}
.result-box {
padding: 20px;
border-radius: 8px;
margin-top: 20px;
}
.result-box.euclidean {
background: #f3e5f5;
border-left: 4px solid #9c27b0;
}
.result-box.cosine {
background: #e8f5e9;
border-left: 4px solid #4caf50;
}
.formula {
font-family: 'Courier New', monospace;
font-size: 14px;
margin-bottom: 10px;
padding: 10px;
background: rgba(0,0,0,0.05);
border-radius: 4px;
}
.result-value {
font-size: 24px;
font-weight: bold;
margin: 10px 0;
}
.result-box.euclidean .result-value {
color: #9c27b0;
}
.result-box.cosine .result-value {
color: #4caf50;
}
.interpretation {
margin-top: 15px;
padding: 15px;
background: white;
border-radius: 6px;
font-size: 14px;
font-weight: 600;
}
.interpretation.similar {
color: #2e7d32;
border: 2px solid #4caf50;
}
.interpretation.dissimilar {
color: #c62828;
border: 2px solid #f44336;
}
.interpretation.medium {
color: #f57c00;
border: 2px solid #ff9800;
}
.description {
font-size: 14px;
color: #666;
margin-top: 10px;
}
.comparison {
background: white;
padding: 30px;
border-radius: 10px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}
.comparison h2 {
margin-bottom: 20px;
color: #333;
}
.comparison-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 30px;
}
.comparison-box {
padding: 20px;
border-radius: 8px;
}
.comparison-box.euclidean {
background: #f3e5f5;
border-left: 4px solid #9c27b0;
}
.comparison-box.cosine {
background: #e8f5e9;
border-left: 4px solid #4caf50;
}
.comparison-box h3 {
margin-bottom: 15px;
}
.comparison-box ul {
list-style: none;
padding-left: 0;
}
.comparison-box li {
padding: 5px 0;
font-size: 14px;
}
.quick-test {
background: #e3f2fd;
padding: 20px;
border-radius: 10px;
margin-bottom: 30px;
border-left: 4px solid #2196F3;
}
.quick-test h3 {
color: #1565c0;
margin-bottom: 15px;
}
.test-buttons {
display: flex;
gap: 10px;
flex-wrap: wrap;
}
.test-btn {
padding: 10px 20px;
background: #2196F3;
color: white;
border: none;
border-radius: 6px;
cursor: pointer;
font-size: 14px;
transition: background 0.3s;
}
.test-btn:hover {
background: #1976d2;
}
@media (max-width: 968px) {
.control-grid, .viz-grid, .comparison-grid {
grid-template-columns: 1fr;
}
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>Distanzmetriken für Vektorvergleiche</h1>
<p class="subtitle">Interaktive Visualisierung der mathematischen Konzepte</p>
</div>
<div class="highlight-box">
<h3>Der entscheidende Unterschied!</h3>
<p><strong>Teste die Szenarien unten:</strong> Wenn beide Vektoren die gleiche Richtung haben (gleicher Winkel),
aber unterschiedlich lang sind, zeigt Kosinus perfekte Ähnlichkeit (1.0), während Euklidisch eine große Distanz (= wenig bis mittlere Ähnlichkeit) gemessen wird.</p>
</div>
<div class="quick-test">
<h3>Schnell-Test:</h3>
<div class="test-buttons">
<button class="test-btn" onclick="testScenario('same-direction')">
Gleiche Richtung, unterschiedliche Länge
</button>
<button class="test-btn" onclick="testScenario('different-direction')">
Unterschiedliche Richtung, gleiche Länge
</button>
<button class="test-btn" onclick="testScenario('orthogonal')">
Orthogonal (90° Unterschied)
</button>
<button class="test-btn" onclick="testScenario('opposite')">
Fast entgegengesetzt
</button>
</div>
</div>
<div class="controls">
<h2>Vektoreinstellungen</h2>
<div class="control-grid">
<div class="vector-controls v1">
<h3>Vektor 1 (Blau)</h3>
<div class="control-group">
<label>Winkel: <span id="angle1-value">30</span>°</label>
<input type="range" id="angle1" min="0" max="180" value="30">
</div>
<div class="control-group">
<label>Länge: <span id="length1-value">80</span></label>
<input type="range" id="length1" min="20" max="100" value="80">
</div>
</div>
<div class="vector-controls v2">
<h3>Vektor 2 (Rot)</h3>
<div class="control-group">
<label>Winkel: <span id="angle2-value">60</span>°</label>
<input type="range" id="angle2" min="0" max="180" value="60">
</div>
<div class="control-group">
<label>Länge: <span id="length2-value">60</span></label>
<input type="range" id="length2" min="20" max="100" value="60">
</div>
</div>
</div>
</div>
<div class="viz-grid">
<div class="viz-card euclidean">
<h3>Euklidische Distanz</h3>
<div class="svg-container">
<svg id="euclidean-svg" width="100%" height="300" viewBox="-20 -220 260 260"></svg>
</div>
<div class="result-box euclidean">
<div class="formula">d = √[(x₂-x₁)² + (y₂-y₁)²]</div>
<div class="result-value">Distanz: <span id="euclidean-result">0.00</span></div>
<div class="interpretation" id="euclidean-interpretation"></div>
<div class="description">
Misst die <strong>direkte räumliche Entfernung</strong> zwischen den Vektorendpunkten.
Berücksichtigt sowohl Richtung als auch Länge der Vektoren.
</div>
</div>
</div>
<div class="viz-card cosine">
<h3>Kosinus-Ähnlichkeit</h3>
<div class="svg-container">
<svg id="cosine-svg" width="100%" height="300" viewBox="-20 -220 260 260"></svg>
</div>
<div class="result-box cosine">
<div class="formula">cos(θ) = (v1 · v2) / (||v1|| × ||v2||)</div>
<div class="result-value">Ähnlichkeit: <span id="cosine-result">0.0000</span></div>
<div class="interpretation" id="cosine-interpretation"></div>
<div class="description">
Misst nur den <strong>Winkel zwischen Vektoren</strong>, ignoriert die Länge komplett!
Perfekt für Text-Embeddings: "Hund" und "großer Hund" haben gleiche Semantik (Richtung),
auch wenn unterschiedlich lang.
</div>
<div class="description" style="margin-top: 10px; font-size: 12px;">
1.0 = gleiche Richtung | 0.0 = orthogonal (90°) | -1.0 = entgegengesetzt (180°)
</div>
</div>
</div>
</div>
<div class="comparison">
<h2>Wann welche Metrik verwenden?</h2>
<div class="comparison-grid">
<div class="comparison-box euclidean">
<h3>Euklidische Distanz</h3>
<ul>
<li>✓ Geografische Koordinaten</li>
<li>✓ Physikalische Messungen</li>
<li>✓ Pixelwerte in Bildern</li>
<li>✓ Wenn absolute Position wichtig ist</li>
<li>✓ Wenn Magnitude (Länge) relevant ist</li>
<li>✗ Nicht ideal für hochdimensionale Daten</li>
</ul>
</div>
<div class="comparison-box cosine">
<h3>Kosinus-Ähnlichkeit</h3>
<ul>
<li>✓ Text-Embeddings (Semantik zählt, nicht Länge)</li>
<li>✓ Dokumentenähnlichkeit</li>
<li>✓ Empfehlungssysteme</li>
<li>✓ Wenn nur Richtung wichtig ist</li>
<li>✓ Robust gegenüber Skalierung</li>
<li>✓ Ideal für normalisierte Daten</li>
</ul>
</div>
</div>
</div>
</div>
<script>
const scale = 2;
function toRadians(deg) {
return deg * Math.PI / 180;
}
function testScenario(scenario) {
const scenarios = {
'same-direction': { a1: 45, l1: 90, a2: 45, l2: 30 },
'different-direction': { a1: 30, l1: 70, a2: 90, l2: 70 },
'orthogonal': { a1: 0, l1: 70, a2: 90, l2: 70 },
'opposite': { a1: 20, l1: 70, a2: 160, l2: 70 }
};
const s = scenarios[scenario];
document.getElementById('angle1').value = s.a1;
document.getElementById('length1').value = s.l1;
document.getElementById('angle2').value = s.a2;
document.getElementById('length2').value = s.l2;
update();
}
function getEuclideanInterpretation(dist, maxDist = 200) {
const normalized = dist / maxDist;
if (normalized < 0.2) {
return { class: 'similar', text: '✓ Sehr ähnlich (kleine Distanz)' };
} else if (normalized < 0.5) {
return { class: 'medium', text: '~ Mittlere Ähnlichkeit' };
} else {
return { class: 'dissimilar', text: '✗ Sehr unterschiedlich (große Distanz)' };
}
}
function getCosineInterpretation(similarity) {
if (similarity > 0.9) {
return { class: 'similar', text: '✓ Sehr ähnlich (fast gleiche Richtung)' };
} else if (similarity > 0.5) {
return { class: 'medium', text: '~ Mittlere Ähnlichkeit' };
} else if (similarity > 0) {
return { class: 'dissimilar', text: '✗ Unterschiedlich (großer Winkel)' };
} else if (similarity > -0.5) {
return { class: 'dissimilar', text: '✗ Sehr unterschiedlich (> 90°)' };
} else {
return { class: 'dissimilar', text: '✗✗ Fast entgegengesetzt!' };
}
}
function drawEuclideanViz(v1x, v1y, v2x, v2y) {
const svg = document.getElementById('euclidean-svg');
svg.innerHTML = `
<!-- Koordinatensystem -->
<line x1="0" y1="0" x2="220" y2="0" stroke="#ddd" stroke-width="1" />
<line x1="0" y1="0" x2="0" y2="-220" stroke="#ddd" stroke-width="1" />
<!-- Gitter -->
${[50, 100, 150, 200].map(i => `
<line x1="${i}" y1="0" x2="${i}" y2="-200" stroke="#f0f0f0" stroke-width="0.5" />
<line x1="0" y1="${-i}" x2="200" y2="${-i}" stroke="#f0f0f0" stroke-width="0.5" />
`).join('')}
<!-- Distanzlinie -->
<line x1="${v1x * scale}" y1="${-v1y * scale}"
x2="${v2x * scale}" y2="${-v2y * scale}"
stroke="purple" stroke-width="3" stroke-dasharray="5,5" />
<!-- Vektor 1 -->
<defs>
<marker id="arrowBlue" markerWidth="10" markerHeight="10" refX="9" refY="3" orient="auto">
<path d="M0,0 L0,6 L9,3 z" fill="blue" />
</marker>
<marker id="arrowRed" markerWidth="10" markerHeight="10" refX="9" refY="3" orient="auto">
<path d="M0,0 L0,6 L9,3 z" fill="red" />
</marker>
</defs>
<line x1="0" y1="0" x2="${v1x * scale}" y2="${-v1y * scale}"
stroke="blue" stroke-width="3" marker-end="url(#arrowBlue)" />
<!-- Vektor 2 -->
<line x1="0" y1="0" x2="${v2x * scale}" y2="${-v2y * scale}"
stroke="red" stroke-width="3" marker-end="url(#arrowRed)" />
<!-- Endpunkte -->
<circle cx="${v1x * scale}" cy="${-v1y * scale}" r="4" fill="blue" />
<circle cx="${v2x * scale}" cy="${-v2y * scale}" r="4" fill="red" />
<!-- Labels -->
<text x="${v1x * scale + 10}" y="${-v1y * scale}" fill="blue" font-size="14" font-weight="bold">v1</text>
<text x="${v2x * scale + 10}" y="${-v2y * scale}" fill="red" font-size="14" font-weight="bold">v2</text>
<text x="${(v1x + v2x) * scale / 2}" y="${-(v1y + v2y) * scale / 2 - 10}" fill="purple" font-size="12" font-weight="bold">d</text>
`;
}
function drawCosineViz(v1x, v1y, v2x, v2y, angle1, angle2) {
const svg = document.getElementById('cosine-svg');
const angleDiff = Math.abs(angle2 - angle1);
// Winkelbogen berechnen
const arcRadius = 30;
const startAngle = toRadians(Math.min(angle1, angle2));
const endAngle = toRadians(Math.max(angle1, angle2));
const largeArc = angleDiff > 180 ? 1 : 0;
const sweep = 1;
svg.innerHTML = `
<!-- Koordinatensystem -->
<line x1="0" y1="0" x2="220" y2="0" stroke="#ddd" stroke-width="1" />
<line x1="0" y1="0" x2="0" y2="-220" stroke="#ddd" stroke-width="1" />
<!-- Gitter -->
${[50, 100, 150, 200].map(i => `
<line x1="${i}" y1="0" x2="${i}" y2="-200" stroke="#f0f0f0" stroke-width="0.5" />
<line x1="0" y1="${-i}" x2="200" y2="${-i}" stroke="#f0f0f0" stroke-width="0.5" />
`).join('')}
<!-- Winkelbogen -->
<path d="M ${arcRadius * Math.cos(startAngle)} ${-arcRadius * Math.sin(startAngle)}
A ${arcRadius} ${arcRadius} 0 ${largeArc} ${sweep}
${arcRadius * Math.cos(endAngle)} ${-arcRadius * Math.sin(endAngle)}"
fill="none" stroke="green" stroke-width="2" />
<!-- Vektoren -->
<defs>
<marker id="arrowBlue2" markerWidth="10" markerHeight="10" refX="9" refY="3" orient="auto">
<path d="M0,0 L0,6 L9,3 z" fill="blue" />
</marker>
<marker id="arrowRed2" markerWidth="10" markerHeight="10" refX="9" refY="3" orient="auto">
<path d="M0,0 L0,6 L9,3 z" fill="red" />
</marker>
</defs>
<line x1="0" y1="0" x2="${v1x * scale}" y2="${-v1y * scale}"
stroke="blue" stroke-width="3" marker-end="url(#arrowBlue2)" />
<line x1="0" y1="0" x2="${v2x * scale}" y2="${-v2y * scale}"
stroke="red" stroke-width="3" marker-end="url(#arrowRed2)" />
<!-- Labels -->
<text x="${v1x * scale + 10}" y="${-v1y * scale}" fill="blue" font-size="14" font-weight="bold">v1</text>
<text x="${v2x * scale + 10}" y="${-v2y * scale}" fill="red" font-size="14" font-weight="bold">v2</text>
<text x="45" y="-45" fill="green" font-size="12" font-weight="bold">θ = ${angleDiff.toFixed(1)}°</text>
`;
}
function update() {
const angle1 = parseFloat(document.getElementById('angle1').value);
const length1 = parseFloat(document.getElementById('length1').value);
const angle2 = parseFloat(document.getElementById('angle2').value);
const length2 = parseFloat(document.getElementById('length2').value);
// Update labels
document.getElementById('angle1-value').textContent = angle1;
document.getElementById('length1-value').textContent = length1;
document.getElementById('angle2-value').textContent = angle2;
document.getElementById('length2-value').textContent = length2;
// Berechne kartesische Koordinaten
const v1x = length1 * Math.cos(toRadians(angle1));
const v1y = length1 * Math.sin(toRadians(angle1));
const v2x = length2 * Math.cos(toRadians(angle2));
const v2y = length2 * Math.sin(toRadians(angle2));
// Euklidische Distanz
const euclidean = Math.sqrt(Math.pow(v2x - v1x, 2) + Math.pow(v2y - v1y, 2));
document.getElementById('euclidean-result').textContent = euclidean.toFixed(2);
const eucInterpret = getEuclideanInterpretation(euclidean);
const eucInterpretEl = document.getElementById('euclidean-interpretation');
eucInterpretEl.textContent = eucInterpret.text;
eucInterpretEl.className = 'interpretation ' + eucInterpret.class;
// Kosinus-Ähnlichkeit
const dotProduct = v1x * v2x + v1y * v2y;
const norm1 = Math.sqrt(v1x * v1x + v1y * v1y);
const norm2 = Math.sqrt(v2x * v2x + v2y * v2y);
const cosine = dotProduct / (norm1 * norm2);
document.getElementById('cosine-result').textContent = cosine.toFixed(4);
const cosInterpret = getCosineInterpretation(cosine);
const cosInterpretEl = document.getElementById('cosine-interpretation');
cosInterpretEl.textContent = cosInterpret.text;
cosInterpretEl.className = 'interpretation ' + cosInterpret.class;
// Zeichne Visualisierungen
drawEuclideanViz(v1x, v1y, v2x, v2y);
drawCosineViz(v1x, v1y, v2x, v2y, angle1, angle2);
}
// Event Listeners
document.getElementById('angle1').addEventListener('input', update);
document.getElementById('length1').addEventListener('input', update);
document.getElementById('angle2').addEventListener('input', update);
document.getElementById('length2').addEventListener('input', update);
// Initial draw
update();
</script>
</body>
</html>