Zweiter versuch Wordcloud

2025-05-20 12:09:33 +02:00 · 2025-05-20 12:09:33 +02:00 · cd13864fb1
parent 08ced3e693
commit cd13864fb1
8 changed files with 970 additions and 0 deletions
--- a/de.hs-mannheim.informatik.wordcloud/pom.xml
+++ b/de.hs-mannheim.informatik.wordcloud/pom.xml
@ -0,0 +1,166 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<groupId>worldcloud.informatik</groupId>
+	<artifactId>wordcloud.informatik.maven.eclipse</artifactId>
+	<version>0.0.1-SNAPSHOT</version>
+
+	<properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+		<maven.compiler.source>1.8</maven.compiler.source>
+		<maven.compiler.target>1.8</maven.compiler.target>
+	</properties>
+
+	<dependencies>
+		 <dependency>
+		    <groupId>org.apache.poi</groupId>
+		    <artifactId>poi-ooxml</artifactId>
+		    <version>5.2.5</version>
+		</dependency>
+	
+
+	<dependency>
+    	<groupId>org.apache.pdfbox</groupId>
+    	<artifactId>pdfbox</artifactId>
+    	<version>3.0.4</version>
+	</dependency>
+		
+		<dependency>
+    		<groupId>org.apache.servicemix.bundles</groupId>
+    		<artifactId>org.apache.servicemix.bundles.lucene-analyzers-common</artifactId>
+    		<version>8.11.1_1</version>
+  		</dependency>
+  		
+    
+    <dependency>
+        <groupId>org.junit.jupiter</groupId>
+        <artifactId>junit-jupiter</artifactId>
+        <version>5.10.0</version>
+        <scope>test</scope>
+    </dependency>
+    
+    <dependency>
+    <groupId>org.apache.logging.log4j</groupId>
+    <artifactId>log4j-api</artifactId>
+    <version>2.20.0</version>
+  </dependency>
+  
+  <dependency>
+    <groupId>org.apache.logging.log4j</groupId>
+    <artifactId>log4j-core</artifactId>
+    <version>2.20.0</version>
+  </dependency>
+
+		
+	</dependencies>
+	
+	<build>
+		<plugins>
+
+			<!-- Compiler -->
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-compiler-plugin</artifactId>
+				<version>3.13.0</version>
+				<configuration>
+					<source>${maven.compiler.source}</source>
+					<target>${maven.compiler.target}</target>
+				</configuration>
+			</plugin>
+
+			<!-- JAR creation -->
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-shade-plugin</artifactId>
+				<version>3.6.0</version>
+				<configuration>
+					<createDependencyReducedPom>false</createDependencyReducedPom>
+				</configuration>
+				<executions>
+					<execution>
+						<phase>package</phase>
+						<goals>
+							<goal>shade</goal>
+						</goals>
+						<configuration>
+							<transformers>
+								<transformer
+									implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+									<mainClass>de.hs_mannheim.informatik.wordcloud.main</mainClass>
+									<mainClass>de.hs_mannheim.informatik.wordcloud.test</mainClass>
+								</transformer>
+							</transformers>
+						</configuration>
+					</execution>
+				</executions>
+			</plugin>
+
+			<!-- Code coverage, cf.: target/site/jacoco -->
+			<plugin>
+				<groupId>org.jacoco</groupId>
+				<artifactId>jacoco-maven-plugin</artifactId>
+				<version>0.8.12</version>
+				<executions>
+					<execution>
+						<goals>
+							<goal>prepare-agent</goal>
+						</goals>
+					</execution>
+					<execution>
+						<id>report</id>
+						<phase>test</phase>
+						<goals>
+							<goal>report</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+
+			<!-- Static code analysis, cf: target/site/pmd.html -->
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-pmd-plugin</artifactId>
+				<version>3.26.0</version>
+				<configuration>
+					<failOnViolation>false</failOnViolation>
+					<printFailingErrors>true</printFailingErrors>
+				</configuration>
+				<executions>
+					<execution>
+						<phase>verify</phase>
+						<goals>
+							<goal>check</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+
+		</plugins>
+
+	</build>
+
+	<reporting>
+		<plugins>
+			<!-- generate Javadocs via "mvn site" and find them in the sie folder-->
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-javadoc-plugin</artifactId>
+				<version>3.11.2</version>
+				<configuration>
+					<show>private</show>
+					<nohelp>true</nohelp>
+				</configuration>
+			</plugin>
+
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-checkstyle-plugin</artifactId>
+				<version>3.6.0</version>
+			</plugin>
+
+		</plugins>
+
+	</reporting>
+
+</project>
--- a/de.hs-mannheim.informatik.wordcloud/src/main/java/de/hs_mannheim/informatik/wordcloud/main/Filereading.java
+++ b/de.hs-mannheim.informatik.wordcloud/src/main/java/de/hs_mannheim/informatik/wordcloud/main/Filereading.java
@ -0,0 +1,228 @@
+package de.hs_mannheim.informatik.wordcloud.main;
+
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.Map;
+
+import java.util.TreeMap;
+
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.text.PDFTextStripper;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.de.GermanAnalyzer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFShape;
+import org.apache.poi.xslf.usermodel.XSLFSlide;
+import org.apache.poi.xslf.usermodel.XSLFTextShape;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+
+
+
+public class Filereading {
+	
+	
+	private Map<String, Integer> words = new TreeMap<>();
+	private static final Logger logger = LogManager.getLogger(Filereading.class);
+
+
+	Filereading(String path, Language language){
+		
+		if(path.endsWith(".pdf")) {
+			String pdfText = pdfReading(path);
+		if(pdfText != null) {
+			 textAnalyzis(pdfText, language);
+		}
+		}else if(path.endsWith(".docx")) {
+			String text = reading(path);
+		if(text != null) {
+			 textAnalyzis(text, language);
+		}
+		}else if(path.endsWith(".pptx")) {
+			String text = pptReading(path);
+			if(text != null) {
+				 textAnalyzis(text, language);
+			}
+		}else if(path.endsWith(".txt")) {
+			String text =txtReading(path);
+			if(text != null) {
+				 textAnalyzis(text, language);
+		}
+		}
+		
+		
+	
+	}
+	
+	//1.Quelle
+	public enum Language {
+		ENGLISH {
+	        @Override
+	        public Analyzer getAnalyzer() {
+	            return new EnglishAnalyzer();
+	        }
+	    },
+	    GERMAN {
+	        @Override
+	        public Analyzer getAnalyzer() {
+	            return new GermanAnalyzer();
+	        }
+	    };
+
+	    public abstract Analyzer getAnalyzer();
+	}
+	
+
+	public Map<String, Integer> getWords() {
+		return words;
+	}
+
+	
+	public String txtReading(String path) {
+		String filePath = path;
+		 try {
+		        // Lies den gesamten Inhalt der Datei in einen String
+		        String content = new String(Files.readAllBytes(Paths.get(path)));
+		        logger.info("Datei erfolgreich gelesen.");
+		        return content;
+		    } catch (IOException e) {
+		        logger.error("Fehler beim Lesen der Datei.", e);
+		        return null;
+		    }
+	
+	}
+
+
+	
+	
+	public String reading(String path) {
+		
+		String filePath = path;
+		logger.info("Datei wird gelesen: " + filePath);
+		File file = new File(filePath);
+		
+		
+		StringBuilder sb = new StringBuilder();
+
+        try (FileInputStream fis = new FileInputStream(file);
+             XWPFDocument document = new XWPFDocument(fis)) {
+
+            List<XWPFParagraph> paragraphs = document.getParagraphs();
+            for (XWPFParagraph para : paragraphs) {
+                sb.append(para.getText()).append("\n");
+                
+            }
+
+            return sb.toString();
+
+        } catch (IOException e) {
+            logger.error("Fehler beim Öffnen der Word-Datei: " + file.getPath(), e);
+            return null;
+        }
+		
+		
+	}
+	
+	
+	public String pdfReading(String path) {
+
+		String filePath = path;
+		logger.info("Datei wird gelesen: " + filePath);
+		File file = new File(filePath);
+		
+		String text = " ";
+
+		try(PDDocument document = Loader.loadPDF(file)){
+			PDFTextStripper pdfStripper = new PDFTextStripper();
+			text = pdfStripper.getText(document);
+			return text;
+		}catch(Exception e) {
+			logger.error("Fehler beim öffnen der Datei.", e);
+			e.printStackTrace();
+			return null;
+		}
+	}
+	
+	//1. Quelle
+	public String pptReading(String path) {
+		StringBuilder text = new StringBuilder();
+		logger.info("Datei wird gelesen: " + path);
+        try (FileInputStream fis = new FileInputStream(path);
+             XMLSlideShow ppt = new XMLSlideShow(fis)) {
+
+            for (XSLFSlide slide : ppt.getSlides()) {
+                for (XSLFShape shape : slide.getShapes()) {
+                    if (shape instanceof XSLFTextShape) {
+                        XSLFTextShape textShape = (XSLFTextShape) shape;
+                        text.append(textShape.getText()).append("\n");
+                    }
+                }
+            }
+
+        } catch (IOException e) {
+            System.err.println("Fehler beim Lesen der PPTX-Datei:");
+            e.printStackTrace();
+        }
+
+        return text.toString();
+	}
+	
+	
+	
+	public Map<String, Integer> textAnalyzis(String text, Language language) {
+		
+		Map<String, Integer> textmap = new TreeMap<>();
+			try (Analyzer analyzer = language.getAnalyzer()) {
+			
+			TokenStream tokenStream = analyzer.tokenStream(null, text);
+			CharTermAttribute termAttribute =
+			tokenStream.addAttribute(CharTermAttribute.class);
+			tokenStream.reset();
+			while (tokenStream.incrementToken()) {
+				String token = termAttribute.toString();
+				
+				if(!token.matches(".*\\d.*") && token.length() > 2 && !token.matches("^[^a-zA-Z0-9].*")) {
+					words.put(termAttribute.toString(), words.getOrDefault(termAttribute.toString(), 0)+1);
+				}
+				
+				
+			}
+			logger.info("Es wurden "+words.size()+" Worte ausgelesen");
+			tokenStream.close();
+			
+			} catch (IOException e) {
+			e.printStackTrace();
+			return null;
+			}
+			return textmap;
+	}
+	
+	
+	public Map<String, Integer> getTopNWords(int n) {
+	    return words.entrySet()
+	            .stream()
+	            .sorted((e1, e2) -> e2.getValue().compareTo(e1.getValue())) 
+	            .limit(n)
+	            .collect(TreeMap::new,
+	                    (m, e) -> m.put(e.getKey(), e.getValue()),
+	                    TreeMap::putAll);//2. Quelle
+	}
+		
+	
+}
+
+
+
+
--- a/de.hs-mannheim.informatik.wordcloud/src/main/java/de/hs_mannheim/informatik/wordcloud/main/InsertWordcloudElements.java
+++ b/de.hs-mannheim.informatik.wordcloud/src/main/java/de/hs_mannheim/informatik/wordcloud/main/InsertWordcloudElements.java
@ -0,0 +1,156 @@
+package de.hs_mannheim.informatik.wordcloud.main;
+
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+
+
+	public class InsertWordcloudElements {
+		private static final Logger logger = LogManager.getLogger(InsertWordcloudElements.class);
+		private String search;
+		
+		
+		InsertWordcloudElements(Map<String, Integer> cloudwords, ArrayList<String> nonoWords, int neededFreq, String search) {
+			this.search = search;
+			
+			if(!nonoWords.isEmpty()) {
+				Map<String, Integer> filterMap = filter(cloudwords, nonoWords);
+				enterWordcloudElements(filterMap, neededFreq);
+			}else {
+				enterWordcloudElements(cloudwords, neededFreq);
+			}
+			
+			createCSVFile();
+			writeCSVFile(cloudwords);
+		}
+		
+		
+		
+		public Map<String,Integer> filter(Map<String, Integer> words,ArrayList<String> badWords) {
+			words.keySet().removeIf(badWords::contains);	   
+			return words;
+		}
+		
+		
+		
+		public void createCSVFile() {
+			try {
+				File file = new File("src/test/resources/woerter.csv");
+				if(file.createNewFile()) {
+					logger.info("Die csv Datei wurde erstellt");
+				}
+				else {
+					logger.info("csv datei exestiert bereits");
+				}
+			}catch(Exception e) {
+				logger.error("Fehler beim erstellen der csv Datei.", e);
+				e.printStackTrace();
+			}
+		}
+		
+		
+		public void writeCSVFile(Map<String, Integer> words) {
+			try {
+				FileWriter write = new FileWriter("src/test/resources/woerter.csv");
+				for(String word: words.keySet()) {
+					write.write(word+", "+words.get(word)+",\n");
+				}
+				write.close();
+				logger.info("csv datei wurde erfolgreich berschrieben.");
+			}catch(Exception e) {
+				logger.error("Datei konnte nicht beschrieben werden.");
+				e.printStackTrace();
+			}
+		}
+		
+		
+	
+	public void enterWordcloudElements(Map<String, Integer> words, int neededFreq) {
+			
+			int minFreq = Collections.min(words.values());
+			int maxFreq = Collections.max(words.values());
+			
+			String filepath = "site/wordcloud.html";
+			
+			try {
+			    List<String> lines = Files.readAllLines(Paths.get(filepath));
+			    List<String> updateLines = new ArrayList<>();
+
+			    boolean inOldSpanBlock = false;
+
+			    for (String line : lines) {
+			        
+			  
+			        if (inOldSpanBlock) {
+			            if (line.contains("</div>")) {
+			              
+			                updateLines.add(line);
+			                inOldSpanBlock = false;
+			            }
+			            continue;
+			        }
+
+		
+			        updateLines.add(line);
+
+			        if (line.contains("<!-- TODO: Hier die generierten Tags einsetzen -->")) {
+			            int idCounter = 0;
+
+			            for (String key : words.keySet()) {
+			                if (words.get(key) < neededFreq) {
+			                    continue;
+			                }
+
+			                String tagClass = getTagcloudClass(words.get(key), minFreq, maxFreq);
+
+			                String word = "<span id=\""
+			                        + idCounter + "\" class=\"wrd "
+			                        + tagClass + "\"><a href=\"https://www."+search+".com/search?q="
+			                        + key + "\">"
+			                        + key + "</a></span>";
+
+			                updateLines.add(word);
+			                idCounter++;
+			            }
+
+			 
+			            inOldSpanBlock = true;
+			        }
+			    }
+
+			
+			    Files.write(Paths.get(filepath), updateLines);
+
+			} catch (IOException e) {
+			    System.out.println("Fehler beim Lesen oder Schreiben der Datei.");
+			    e.printStackTrace();
+			}
+		}
+		
+		
+		public  String getTagcloudClass(int frequency, int minFreq, int maxFreq) {
+		    if (maxFreq == minFreq) {
+		        return "tagcloud5"; 
+		    }
+	
+		   
+		    int range = maxFreq - minFreq;
+		    int relativeValue = (int) Math.round(10.0 * (frequency - minFreq) / range);
+	
+		    return "tagcloud" + relativeValue;
+		}
+	
+	
+	}
+
--- a/de.hs-mannheim.informatik.wordcloud/src/main/java/de/hs_mannheim/informatik/wordcloud/main/main.java
+++ b/de.hs-mannheim.informatik.wordcloud/src/main/java/de/hs_mannheim/informatik/wordcloud/main/main.java
@ -0,0 +1,12 @@
+package de.hs_mannheim.informatik.wordcloud.main;
+
+import java.io.FileNotFoundException;
+
+public class main {
+
+	public static void main(String[] args) throws FileNotFoundException{
+		new UserInterface();
+
+	}
+
+}
--- a/de.hs-mannheim.informatik.wordcloud/src/test/java/de/hs_mannheim/informatik/wordcloud/test/Filereading.java
+++ b/de.hs-mannheim.informatik.wordcloud/src/test/java/de/hs_mannheim/informatik/wordcloud/test/Filereading.java
@ -0,0 +1,205 @@
+package de.hs_mannheim.informatik.wordcloud.test;
+
+
+import java.io.File;
+import java.io.IOException;
+import java.io.FileInputStream;
+import java.util.List;
+import java.util.Map;
+
+import java.util.TreeMap;
+
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.text.PDFTextStripper;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.de.GermanAnalyzer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFShape;
+import org.apache.poi.xslf.usermodel.XSLFSlide;
+import org.apache.poi.xslf.usermodel.XSLFTextShape;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+
+
+
+public class Filereading {
+	
+	
+	private Map<String, Integer> words = new TreeMap<>();
+	private static final Logger logger = LogManager.getLogger(Filereading.class);
+
+
+	Filereading(String path, Language language){
+		
+		if(path.endsWith(".pdf")) {
+			String pdfText = pdfReading(path);
+		if(pdfText != null) {
+			 textAnalyzis(pdfText, language);
+		}
+		}else if(path.endsWith(".docx")) {
+			String text = reading(path);
+		if(text != null) {
+			 textAnalyzis(text, language);
+		}
+		}else if(path.endsWith(".pptx")) {
+			String text = pptReading(path);
+			if(text != null) {
+				 textAnalyzis(text, language);
+			}
+		}
+		
+		
+		
+	
+	}
+	
+	public enum Language {
+		ENGLISH {
+	        @Override
+	        public Analyzer getAnalyzer() {
+	            return new EnglishAnalyzer();
+	        }
+	    },
+	    GERMAN {
+	        @Override
+	        public Analyzer getAnalyzer() {
+	            return new GermanAnalyzer();
+	        }
+	    };
+
+	    public abstract Analyzer getAnalyzer();
+	}
+	
+
+	public Map<String, Integer> getWords() {
+		return words;
+	}
+
+
+
+	
+	
+	public String reading(String path) {
+		
+		String filePath = path;
+		logger.info("Datei wird gelesen: " + filePath);
+		File file = new File(filePath);
+		
+		
+		StringBuilder sb = new StringBuilder();
+
+        try (FileInputStream fis = new FileInputStream(file);
+             XWPFDocument document = new XWPFDocument(fis)) {
+
+            List<XWPFParagraph> paragraphs = document.getParagraphs();
+            for (XWPFParagraph para : paragraphs) {
+                sb.append(para.getText()).append("\n");
+                
+            }
+
+            return sb.toString();
+
+        } catch (IOException e) {
+            logger.error("Fehler beim Öffnen der Word-Datei: " + file.getPath(), e);
+            return null;
+        }
+		
+		
+	}
+	
+	
+	public String pdfReading(String path) {
+
+		String filePath = path;
+		logger.info("Datei wird gelesen: " + filePath);
+		File file = new File(filePath);
+		
+		String text = " ";
+
+		try(PDDocument document = Loader.loadPDF(file)){
+			PDFTextStripper pdfStripper = new PDFTextStripper();
+			text = pdfStripper.getText(document);
+			return text;
+		}catch(Exception e) {
+			logger.error("Fehler beim öffnen der Datei.", e);
+			e.printStackTrace();
+			return null;
+		}
+	}
+	
+	
+	public String pptReading(String path) {
+		StringBuilder text = new StringBuilder();
+		logger.info("Datei wird gelesen: " + path);
+        try (FileInputStream fis = new FileInputStream(path);
+             XMLSlideShow ppt = new XMLSlideShow(fis)) {
+
+            for (XSLFSlide slide : ppt.getSlides()) {
+                for (XSLFShape shape : slide.getShapes()) {
+                    if (shape instanceof XSLFTextShape) {
+                        XSLFTextShape textShape = (XSLFTextShape) shape;
+                        text.append(textShape.getText()).append("\n");
+                    }
+                }
+            }
+
+        } catch (IOException e) {
+            System.err.println("Fehler beim Lesen der PPTX-Datei:");
+            e.printStackTrace();
+        }
+
+        return text.toString();
+	}
+	
+	
+	
+	public Map<String, Integer> textAnalyzis(String text, Language language) {
+		
+		Map<String, Integer> textmap = new TreeMap<>();
+			try (Analyzer analyzer = language.getAnalyzer()) {
+			
+			TokenStream tokenStream = analyzer.tokenStream(null, text);
+			CharTermAttribute termAttribute =
+			tokenStream.addAttribute(CharTermAttribute.class);
+			tokenStream.reset();
+			while (tokenStream.incrementToken()) {
+				String token = termAttribute.toString();
+				
+				if(!token.matches(".*\\d.*") && token.length() > 2 && !token.matches("^[^a-zA-Z0-9].*")) {
+					words.put(termAttribute.toString(), words.getOrDefault(termAttribute.toString(), 0)+1);
+				}
+				
+				
+			}
+			logger.info("Es wurden "+words.size()+" Worte ausgelesen");
+			tokenStream.close();
+			
+			} catch (IOException e) {
+			e.printStackTrace();
+			return null;
+			}
+			return textmap;
+	}
+	
+	
+	public Map<String, Integer> getTopNWords(int n) {
+	    return words.entrySet()
+	            .stream()
+	            .sorted((e1, e2) -> e2.getValue().compareTo(e1.getValue())) 
+	            .limit(n)
+	            .collect(TreeMap::new,
+	                    (m, e) -> m.put(e.getKey(), e.getValue()),
+	                    TreeMap::putAll);
+	}
+		
+	
+}
+
+
+
+
--- a/de.hs-mannheim.informatik.wordcloud/src/test/java/de/hs_mannheim/informatik/wordcloud/test/Filereadingtest.java
+++ b/de.hs-mannheim.informatik.wordcloud/src/test/java/de/hs_mannheim/informatik/wordcloud/test/Filereadingtest.java
@ -0,0 +1,35 @@
+package de.hs_mannheim.informatik.wordcloud.test;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.util.Map;
+
+import org.junit.jupiter.api.Test;
+
+import de.hs_mannheim.informatik.wordcloud.test.Filereading.Language;
+
+class FileReadingtest {
+
+	@Test
+	public void testAnalyzeText() {
+       
+        Language deLang = Language.GERMAN;
+        Language enLang = Language.ENGLISH;
+
+        Filereading fileReading = new Filereading("src/test/resources/testCfile.pdf",deLang);
+        Filereading docReading = new Filereading("src/test/resources/test.docx", deLang);
+        Filereading pptxReading = new Filereading("src/test/resources/samplepptx.pptx", enLang);
+    
+       
+        Map<String, Integer> words = fileReading.getWords();
+        Map<String, Integer> docwords = docReading.getWords();
+        Map<String, Integer> pptxwords = pptxReading.getWords();
+    
+        assertEquals(4, words.get("welt"));
+        assertEquals(4, docwords.get("hallo"));
+        assertEquals(2, pptxwords.get("handout"));
+       
+        
+    }
+
+}
--- a/de.hs-mannheim.informatik.wordcloud/src/test/java/de/hs_mannheim/informatik/wordcloud/test/InsertWordcloudElements.java
+++ b/de.hs-mannheim.informatik.wordcloud/src/test/java/de/hs_mannheim/informatik/wordcloud/test/InsertWordcloudElements.java
@ -0,0 +1,156 @@
+package de.hs_mannheim.informatik.wordcloud.test;
+
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+
+
+	public class InsertWordcloudElements {
+		private static final Logger logger = LogManager.getLogger(InsertWordcloudElements.class);
+		private String search;
+		
+		
+		InsertWordcloudElements(Map<String, Integer> cloudwords, ArrayList<String> nonoWords, int neededFreq, String search) {
+			this.search = search;
+			
+			if(!nonoWords.isEmpty()) {
+				Map<String, Integer> filterMap = filter(cloudwords, nonoWords);
+				enterWordcloudElements(filterMap, neededFreq);
+			}else {
+				enterWordcloudElements(cloudwords, neededFreq);
+			}
+			
+			createCSVFile();
+			writeCSVFile(cloudwords);
+		}
+		
+		
+		
+		public Map<String,Integer> filter(Map<String, Integer> words,ArrayList<String> badWords) {
+			words.keySet().removeIf(badWords::contains);	   
+			return words;
+		}
+		
+		
+		
+		public void createCSVFile() {
+			try {
+				File file = new File("src/test/resources/woerter.csv");
+				if(file.createNewFile()) {
+					logger.info("Die csv Datei wurde erstellt");
+				}
+				else {
+					logger.info("csv datei exestiert bereits");
+				}
+			}catch(Exception e) {
+				logger.error("Fehler beim erstellen der csv Datei.", e);
+				e.printStackTrace();
+			}
+		}
+		
+		
+		public void writeCSVFile(Map<String, Integer> words) {
+			try {
+				FileWriter write = new FileWriter("src/test/resources/woerter.csv");
+				for(String word: words.keySet()) {
+					write.write(word+", "+words.get(word)+",\n");
+				}
+				write.close();
+				logger.info("csv datei wurde erfolgreich berschrieben.");
+			}catch(Exception e) {
+				logger.error("Datei konnte nicht beschrieben werden.");
+				e.printStackTrace();
+			}
+		}
+		
+		
+	
+	public void enterWordcloudElements(Map<String, Integer> words, int neededFreq) {
+			
+			int minFreq = Collections.min(words.values());
+			int maxFreq = Collections.max(words.values());
+			
+			String filepath = "site/wordcloud.html";
+			
+			try {
+			    List<String> lines = Files.readAllLines(Paths.get(filepath));
+			    List<String> updateLines = new ArrayList<>();
+
+			    boolean inOldSpanBlock = false;
+
+			    for (String line : lines) {
+			        
+			  
+			        if (inOldSpanBlock) {
+			            if (line.contains("</div>")) {
+			              
+			                updateLines.add(line);
+			                inOldSpanBlock = false;
+			            }
+			            continue;
+			        }
+
+		
+			        updateLines.add(line);
+
+			        if (line.contains("<!-- TODO: Hier die generierten Tags einsetzen -->")) {
+			            int idCounter = 0;
+
+			            for (String key : words.keySet()) {
+			                if (words.get(key) < neededFreq) {
+			                    continue;
+			                }
+
+			                String tagClass = getTagcloudClass(words.get(key), minFreq, maxFreq);
+
+			                String word = "<span id=\""
+			                        + idCounter + "\" class=\"wrd "
+			                        + tagClass + "\"><a href=\"https://www."+search+".com/search?q="
+			                        + key + "\">"
+			                        + key + "</a></span>";
+
+			                updateLines.add(word);
+			                idCounter++;
+			            }
+
+			 
+			            inOldSpanBlock = true;
+			        }
+			    }
+
+			
+			    Files.write(Paths.get(filepath), updateLines);
+
+			} catch (IOException e) {
+			    System.out.println("Fehler beim Lesen oder Schreiben der Datei.");
+			    e.printStackTrace();
+			}
+		}
+		
+		
+		public  String getTagcloudClass(int frequency, int minFreq, int maxFreq) {
+		    if (maxFreq == minFreq) {
+		        return "tagcloud5"; 
+		    }
+	
+		   
+		    int range = maxFreq - minFreq;
+		    int relativeValue = (int) Math.round(10.0 * (frequency - minFreq) / range);
+	
+		    return "tagcloud" + relativeValue;
+		}
+	
+	
+	}
+
--- a/de.hs-mannheim.informatik.wordcloud/src/test/java/de/hs_mannheim/informatik/wordcloud/test/test.java
+++ b/de.hs-mannheim.informatik.wordcloud/src/test/java/de/hs_mannheim/informatik/wordcloud/test/test.java
@ -0,0 +1,12 @@
+package de.hs_mannheim.informatik.wordcloud.test;
+
+import java.io.FileNotFoundException;
+
+public class test {
+
+	public static void main(String[] args) throws FileNotFoundException{
+		new UserInterface();
+
+	}
+
+}