From 30654cc2ab5c995348dd622d78875ea262e0c00d Mon Sep 17 00:00:00 2001 From: Daniel Fromm <3015351@stud.hs-mannheim.de> Date: Mon, 12 May 2025 19:46:40 +0200 Subject: [PATCH] refactored code and added tests, stemming not functioning --- deutsche_stopwoerter.txt | 232 +++++++++++++++++++++ pom.xml | 12 ++ src/main/java/domain/TextProcessing.java | 40 ++-- src/main/java/domain/WordCloudCreator.java | 11 +- src/main/java/facade/WordCloudManager.java | 4 - src/main/java/tui/TUI.java | 14 +- src/test/java/FileLoaderTest.java | 23 ++ src/test/java/TextProcessingTest.java | 68 ++++++ src/test/java/WordCloudCreatorTest.java | 30 +++ wordcloud.html | 1 + 10 files changed, 395 insertions(+), 40 deletions(-) create mode 100644 deutsche_stopwoerter.txt create mode 100644 src/test/java/FileLoaderTest.java create mode 100644 src/test/java/TextProcessingTest.java create mode 100644 src/test/java/WordCloudCreatorTest.java create mode 100644 wordcloud.html diff --git a/deutsche_stopwoerter.txt b/deutsche_stopwoerter.txt new file mode 100644 index 0000000..67eb5f8 --- /dev/null +++ b/deutsche_stopwoerter.txt @@ -0,0 +1,232 @@ +aber +alle +allem +allen +aller +alles +als +also +am +an +ander +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders +auch +auf +aus +bei +bin +bis +bist +da +damit +dann +das +dasselbe +dazu +daß +dein +deine +deinem +deinen +deiner +deines +dem +demselben +den +denn +denselben +der +derer +derselbe +derselben +des +desselben +dessen +dich +die +dies +diese +dieselbe +dieselben +diesem +diesen +dieser +dieses +dir +doch +dort +du +durch +ein +eine +einem +einen +einer +eines +einig +einige +einigem +einigen +einiger +einiges +einmal +er +es +etwas +euch +euer +eure +eurem +euren +eurer +eures +für +gegen +gewesen +hab +habe +haben +hat +hatte +hatten +hier +hin +hinter +ich +ihm +ihn +ihnen +ihr +ihre +ihrem +ihren +ihrer +ihres +im +in +indem +ins +ist +jede +jedem +jeden +jeder +jedes +jene +jenem +jenen +jener +jenes +jetzt +kann +kein +keine +keinem +keinen +keiner +keines +können +könnte +machen +man +manche +manchem +manchen +mancher +manches +mein +meine +meinem +meinen +meiner +meines +mich +mir +mit +muss +musste +nach +nicht +nichts +noch +nun +nur +ob +oder +ohne +sehr +sein +seine +seinem +seinen +seiner +seines +selbst +sich +sie +sind +so +solche +solchem +solchen +solcher +solches +soll +sollte +sondern +sonst +um +und +uns +unser +unsere +unserem +unseren +unserer +unseres +unter +viel +vom +von +vor +war +waren +warst +was +weg +weil +weiter +welche +welchem +welchen +welcher +welches +wenn +werde +werden +wie +wieder +will +wir +wird +wirst +wo +wollen +wollte +während +würde +würden +zu +zum +zur +zwar +zwischen +über diff --git a/pom.xml b/pom.xml index fb38baa..4cd0a50 100644 --- a/pom.xml +++ b/pom.xml @@ -39,6 +39,18 @@ lucene-analysis-common 9.9.2 + + junit + junit + 4.13.2 + test + + + org.junit.jupiter + junit-jupiter + RELEASE + test + diff --git a/src/main/java/domain/TextProcessing.java b/src/main/java/domain/TextProcessing.java index 4c38631..0b14c05 100644 --- a/src/main/java/domain/TextProcessing.java +++ b/src/main/java/domain/TextProcessing.java @@ -67,11 +67,6 @@ public class TextProcessing { return "Nothing found!"; } -// public String fileToTextString(File path, String format) { -// String text = formatToText(path, format); -// return text; -// } - public void textToSetStopwords(Map words) { Set stopwords = new HashSet<>(); for (Map.Entry entry : words.entrySet()) { @@ -99,9 +94,9 @@ public class TextProcessing { public Map sortList(Map unsortedMap) { List> entryList = new ArrayList<>(unsortedMap.entrySet()); - entryList.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue())); //Ki erstellte Zeile + entryList.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue())); //KI erstellte Zeile - Map sortedMap = new TreeMap<>(); + Map sortedMap = new LinkedHashMap<>(); for (Map.Entry entry : entryList) { sortedMap.put(entry.getKey(), entry.getValue()); } @@ -143,6 +138,22 @@ public class TextProcessing { return words; } +// public Map stemming(Map wordList) { +// Map wordCounts = new HashMap<>(); +// GermanStemmer stemmer = new GermanStemmer(); +// +// for (String key : wordList.keySet()) { +// char[] wordChars = key.toCharArray(); +// int length = stemmer.stem(wordChars, wordChars.length); // Stemming durchführen +// String stemmedWord = new String(wordChars, 0, length); // Gestemmtes Wort extrahieren +// +// if (stemmedWord != null && !stemmedWord.isBlank()) { +// wordCounts.merge(stemmedWord, wordList.get(key), Integer::sum); +// } +// } +// return wordCounts; +// } + public void setStemming(boolean stemming) { this.stemming = stemming; } @@ -150,19 +161,4 @@ public class TextProcessing { public void setMaxWords(int maxWords) { this.maxWords = maxWords; } - - - -// public Map stemming(Map wordList) { -// Map wordCounts = new HashMap<>(); -// GermanStemmer stemmer = new GermanStemmer(); -// -// for (String key: wordList.keySet()) { -// String stemmedWord = stemmer.stemWord(key); -// if (stemmedWord != null) { -// wordCounts.merge(stemmedWord, 1, Integer::sum); -// } -// } -// return wordCounts; -// } } diff --git a/src/main/java/domain/WordCloudCreator.java b/src/main/java/domain/WordCloudCreator.java index e2e1ef1..ab5fe9f 100644 --- a/src/main/java/domain/WordCloudCreator.java +++ b/src/main/java/domain/WordCloudCreator.java @@ -4,8 +4,8 @@ import java.io.*; import java.util.Map; public class WordCloudCreator { + private int maxFontSize = 70; - //AI Method but mit massive changes public boolean insertWordsIntoTemplate(Map wordMap) { File templateFile = new File("wordcloud.html"); // Template in project directory File outputFile = new File("createdHTML.html"); // Output in project directory @@ -23,14 +23,14 @@ public class WordCloudCreator { while ((line = reader.readLine()) != null) { htmlContent.append(line).append("\n"); } - - // Generate clickable word entries with font size based on frequency +//AI generated lines below + // Generated clickable word entries with font size based on frequency StringBuilder wordEntries = new StringBuilder(); int id = 1; for (Map.Entry entry : wordMap.entrySet()) { String word = entry.getKey(); int frequency = entry.getValue(); - int fontSize = (int) ((float) 12 + frequency * 1.5); // Example: Base size 10px, increase by 2px per frequency + int fontSize = Math.min(10 + frequency * 2, maxFontSize); // Example: Base size 10px, increase by 2px per frequency wordEntries.append(String.format( "" + "%s" + @@ -50,10 +50,11 @@ public class WordCloudCreator { // Write the updated HTML to the output file writer.write(updatedHtml); System.out.println("Output file 'output.html' created successfully!"); + return true; } catch (IOException e) { throw new RuntimeException("Error processing HTML template", e); } - return true; + } } \ No newline at end of file diff --git a/src/main/java/facade/WordCloudManager.java b/src/main/java/facade/WordCloudManager.java index e669686..3f804b3 100644 --- a/src/main/java/facade/WordCloudManager.java +++ b/src/main/java/facade/WordCloudManager.java @@ -20,12 +20,8 @@ public class WordCloudManager { public boolean loadFile() { fl.loadFileGUI(); if (fl.getFilePath().length() > 0) { - System.out.println(fl.getFilePath()); - System.out.println(fl.getStopwordsPath()); return true; } else { - System.out.println(fl.getFilePath()); - System.out.println(fl.getStopwordsPath()); return false; } } diff --git a/src/main/java/tui/TUI.java b/src/main/java/tui/TUI.java index 79f1e73..2190cd3 100644 --- a/src/main/java/tui/TUI.java +++ b/src/main/java/tui/TUI.java @@ -51,7 +51,7 @@ public class TUI { public void fileMenu() { while(fMenu) { System.out.println("(0) Load Stopwords\n(1) Add to Stopwords\n(2) Set Max Words in HTML\n" + - "(3) Stemming? (only German available)\n(4) Create WordCloud\n(5) Exit FileMenu"); + "(3) Stemming? (only German available)\n(4) Create WordCloud and Exit"); option = Integer.parseInt(scan.nextLine()); switch(option) { case (0): @@ -88,20 +88,16 @@ public class TUI { } break; case(4): - //Create WordCloud + //Create WordCloud and exit program if(wcm.createWordCloud()) { - System.out.println("HTML File created!"); + System.out.println("HTML File created!\n"); fMenu = false; + System.out.println("Close Program!"); } else { - System.out.println("HTML FIle not created!"); + System.out.println("HTML FIle not created!\n"); } break; - case(5): - //Exit Filemenu - fMenu = false; - System.out.println("Close Program!"); - break; } } } diff --git a/src/test/java/FileLoaderTest.java b/src/test/java/FileLoaderTest.java new file mode 100644 index 0000000..aa93d0f --- /dev/null +++ b/src/test/java/FileLoaderTest.java @@ -0,0 +1,23 @@ +import domain.FileLoader; +import org.junit.jupiter.api.Test; +import java.io.File; + +import static org.junit.jupiter.api.Assertions.*; + +class FileLoaderTest { + + @Test + void testGetFileFormat_knownFormats() { + FileLoader loader = new FileLoader(); + assertEquals("txt", loader.getFileFormat(new File("test.txt"))); + assertEquals("pdf", loader.getFileFormat(new File("test.pdf"))); + assertEquals("docx", loader.getFileFormat(new File("test.docx"))); + assertEquals("pptx", loader.getFileFormat(new File("test.pptx"))); + } + + @Test + void testGetFileFormat_unknownFormat() { + FileLoader loader = new FileLoader(); + assertEquals("File format not supported", loader.getFileFormat(new File("test.xyz"))); + } +} diff --git a/src/test/java/TextProcessingTest.java b/src/test/java/TextProcessingTest.java new file mode 100644 index 0000000..071beae --- /dev/null +++ b/src/test/java/TextProcessingTest.java @@ -0,0 +1,68 @@ +import domain.TextProcessing; +import org.junit.jupiter.api.Test; + +import java.util.*; + +import static org.junit.jupiter.api.Assertions.*; + +class TextProcessingTest { + + @Test + void testAddToStopWords() { + TextProcessing tp = new TextProcessing(); + tp.addToStopWords("der"); + assertTrue(tp.tokenizingFile("Der Hund ist groß.").containsKey("hund")); + assertFalse(tp.tokenizingFile("Der Hund ist groß.").containsKey("der")); + } + + @Test + void testSortList() { + TextProcessing tp = new TextProcessing(); + Map input = Map.of("a", 1, "b", 3, "c", 2); + List values = new ArrayList<>(tp.sortList(input).values()); + assertEquals(List.of(3, 2, 1), values); + } + + @Test + void testMaxShowWords() { + TextProcessing tp = new TextProcessing(); + Map input = new LinkedHashMap<>(); + input.put("eins", 1); + input.put("zwei", 2); + input.put("drei", 3); + + Map result = tp.maxShowWords(input, 2); + assertEquals(2, result.size()); + } + + @Test + void testAddStopWordsIndividuallyAndAsList() { + TextProcessing tp = new TextProcessing(); + + tp.addToStopWords("der"); + tp.addToStopWords("die"); + tp.addToStopWords("das"); + + assertTrue(tp.tokenizingFile("Der Hund ist groß.").containsKey("hund")); + assertFalse(tp.tokenizingFile("Der Hund ist groß.").containsKey("der")); + assertFalse(tp.tokenizingFile("Die Katze ist klein.").containsKey("die")); + + Map stopwordMap = Map.of("und", 1, "oder", 1, "aber", 1); + tp.textToSetStopwords(stopwordMap); + + assertFalse(tp.tokenizingFile("Und der Hund ist groß.").containsKey("und")); + assertFalse(tp.tokenizingFile("Oder die Katze ist klein.").containsKey("oder")); + assertFalse(tp.tokenizingFile("Aber das Haus ist alt.").containsKey("aber")); + } + +// @Test +// void testStemming() { +// TextProcessing tp = new TextProcessing(); +// Map input = new HashMap<>(); +// input.put("gegangen", 1); +// input.put("geht", 1); +// +// Map result = tp.stemming(input); +// assertFalse(result.isEmpty()); +// } +} diff --git a/src/test/java/WordCloudCreatorTest.java b/src/test/java/WordCloudCreatorTest.java new file mode 100644 index 0000000..eeaf3a3 --- /dev/null +++ b/src/test/java/WordCloudCreatorTest.java @@ -0,0 +1,30 @@ +import domain.WordCloudCreator; +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +class WordCloudCreatorTest { + + @Test + void testInsertWordsIntoTemplate_success() throws IOException { + // Prepare a simple HTML template with placeholder + File template = new File("wordcloud.html"); + try (FileWriter fw = new FileWriter(template)) { + fw.write(""); + } + + WordCloudCreator creator = new WordCloudCreator(); + Map words = Map.of("Test", 3, "Java", 2); + + assertTrue(creator.insertWordsIntoTemplate(words)); + + File output = new File("createdHTML.html"); + assertTrue(output.exists()); + assertTrue(output.length() > 0); + } +} diff --git a/wordcloud.html b/wordcloud.html new file mode 100644 index 0000000..a5b4fdb --- /dev/null +++ b/wordcloud.html @@ -0,0 +1 @@ + \ No newline at end of file