refactored code and added tests, stemming not functioning
parent
3aeee20146
commit
30654cc2ab
|
@ -0,0 +1,232 @@
|
|||
aber
|
||||
alle
|
||||
allem
|
||||
allen
|
||||
aller
|
||||
alles
|
||||
als
|
||||
also
|
||||
am
|
||||
an
|
||||
ander
|
||||
andere
|
||||
anderem
|
||||
anderen
|
||||
anderer
|
||||
anderes
|
||||
anderm
|
||||
andern
|
||||
anderr
|
||||
anders
|
||||
auch
|
||||
auf
|
||||
aus
|
||||
bei
|
||||
bin
|
||||
bis
|
||||
bist
|
||||
da
|
||||
damit
|
||||
dann
|
||||
das
|
||||
dasselbe
|
||||
dazu
|
||||
daß
|
||||
dein
|
||||
deine
|
||||
deinem
|
||||
deinen
|
||||
deiner
|
||||
deines
|
||||
dem
|
||||
demselben
|
||||
den
|
||||
denn
|
||||
denselben
|
||||
der
|
||||
derer
|
||||
derselbe
|
||||
derselben
|
||||
des
|
||||
desselben
|
||||
dessen
|
||||
dich
|
||||
die
|
||||
dies
|
||||
diese
|
||||
dieselbe
|
||||
dieselben
|
||||
diesem
|
||||
diesen
|
||||
dieser
|
||||
dieses
|
||||
dir
|
||||
doch
|
||||
dort
|
||||
du
|
||||
durch
|
||||
ein
|
||||
eine
|
||||
einem
|
||||
einen
|
||||
einer
|
||||
eines
|
||||
einig
|
||||
einige
|
||||
einigem
|
||||
einigen
|
||||
einiger
|
||||
einiges
|
||||
einmal
|
||||
er
|
||||
es
|
||||
etwas
|
||||
euch
|
||||
euer
|
||||
eure
|
||||
eurem
|
||||
euren
|
||||
eurer
|
||||
eures
|
||||
für
|
||||
gegen
|
||||
gewesen
|
||||
hab
|
||||
habe
|
||||
haben
|
||||
hat
|
||||
hatte
|
||||
hatten
|
||||
hier
|
||||
hin
|
||||
hinter
|
||||
ich
|
||||
ihm
|
||||
ihn
|
||||
ihnen
|
||||
ihr
|
||||
ihre
|
||||
ihrem
|
||||
ihren
|
||||
ihrer
|
||||
ihres
|
||||
im
|
||||
in
|
||||
indem
|
||||
ins
|
||||
ist
|
||||
jede
|
||||
jedem
|
||||
jeden
|
||||
jeder
|
||||
jedes
|
||||
jene
|
||||
jenem
|
||||
jenen
|
||||
jener
|
||||
jenes
|
||||
jetzt
|
||||
kann
|
||||
kein
|
||||
keine
|
||||
keinem
|
||||
keinen
|
||||
keiner
|
||||
keines
|
||||
können
|
||||
könnte
|
||||
machen
|
||||
man
|
||||
manche
|
||||
manchem
|
||||
manchen
|
||||
mancher
|
||||
manches
|
||||
mein
|
||||
meine
|
||||
meinem
|
||||
meinen
|
||||
meiner
|
||||
meines
|
||||
mich
|
||||
mir
|
||||
mit
|
||||
muss
|
||||
musste
|
||||
nach
|
||||
nicht
|
||||
nichts
|
||||
noch
|
||||
nun
|
||||
nur
|
||||
ob
|
||||
oder
|
||||
ohne
|
||||
sehr
|
||||
sein
|
||||
seine
|
||||
seinem
|
||||
seinen
|
||||
seiner
|
||||
seines
|
||||
selbst
|
||||
sich
|
||||
sie
|
||||
sind
|
||||
so
|
||||
solche
|
||||
solchem
|
||||
solchen
|
||||
solcher
|
||||
solches
|
||||
soll
|
||||
sollte
|
||||
sondern
|
||||
sonst
|
||||
um
|
||||
und
|
||||
uns
|
||||
unser
|
||||
unsere
|
||||
unserem
|
||||
unseren
|
||||
unserer
|
||||
unseres
|
||||
unter
|
||||
viel
|
||||
vom
|
||||
von
|
||||
vor
|
||||
war
|
||||
waren
|
||||
warst
|
||||
was
|
||||
weg
|
||||
weil
|
||||
weiter
|
||||
welche
|
||||
welchem
|
||||
welchen
|
||||
welcher
|
||||
welches
|
||||
wenn
|
||||
werde
|
||||
werden
|
||||
wie
|
||||
wieder
|
||||
will
|
||||
wir
|
||||
wird
|
||||
wirst
|
||||
wo
|
||||
wollen
|
||||
wollte
|
||||
während
|
||||
würde
|
||||
würden
|
||||
zu
|
||||
zum
|
||||
zur
|
||||
zwar
|
||||
zwischen
|
||||
über
|
12
pom.xml
12
pom.xml
|
@ -39,6 +39,18 @@
|
|||
<artifactId>lucene-analysis-common</artifactId>
|
||||
<version>9.9.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.13.2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter</artifactId>
|
||||
<version>RELEASE</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<properties>
|
||||
|
|
|
@ -67,11 +67,6 @@ public class TextProcessing {
|
|||
return "Nothing found!";
|
||||
}
|
||||
|
||||
// public String fileToTextString(File path, String format) {
|
||||
// String text = formatToText(path, format);
|
||||
// return text;
|
||||
// }
|
||||
|
||||
public void textToSetStopwords(Map<String, Integer> words) {
|
||||
Set<String> stopwords = new HashSet<>();
|
||||
for (Map.Entry<String, Integer> entry : words.entrySet()) {
|
||||
|
@ -99,9 +94,9 @@ public class TextProcessing {
|
|||
public Map<String, Integer> sortList(Map<String, Integer> unsortedMap) {
|
||||
List<Map.Entry<String, Integer>> entryList = new ArrayList<>(unsortedMap.entrySet());
|
||||
|
||||
entryList.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue())); //Ki erstellte Zeile
|
||||
entryList.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue())); //KI erstellte Zeile
|
||||
|
||||
Map<String, Integer> sortedMap = new TreeMap<>();
|
||||
Map<String, Integer> sortedMap = new LinkedHashMap<>();
|
||||
for (Map.Entry<String, Integer> entry : entryList) {
|
||||
sortedMap.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
|
@ -143,6 +138,22 @@ public class TextProcessing {
|
|||
return words;
|
||||
}
|
||||
|
||||
// public Map<String, Integer> stemming(Map<String, Integer> wordList) {
|
||||
// Map<String, Integer> wordCounts = new HashMap<>();
|
||||
// GermanStemmer stemmer = new GermanStemmer();
|
||||
//
|
||||
// for (String key : wordList.keySet()) {
|
||||
// char[] wordChars = key.toCharArray();
|
||||
// int length = stemmer.stem(wordChars, wordChars.length); // Stemming durchführen
|
||||
// String stemmedWord = new String(wordChars, 0, length); // Gestemmtes Wort extrahieren
|
||||
//
|
||||
// if (stemmedWord != null && !stemmedWord.isBlank()) {
|
||||
// wordCounts.merge(stemmedWord, wordList.get(key), Integer::sum);
|
||||
// }
|
||||
// }
|
||||
// return wordCounts;
|
||||
// }
|
||||
|
||||
public void setStemming(boolean stemming) {
|
||||
this.stemming = stemming;
|
||||
}
|
||||
|
@ -150,19 +161,4 @@ public class TextProcessing {
|
|||
public void setMaxWords(int maxWords) {
|
||||
this.maxWords = maxWords;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// public Map<String, Integer> stemming(Map<String, Integer> wordList) {
|
||||
// Map<String, Integer> wordCounts = new HashMap<>();
|
||||
// GermanStemmer stemmer = new GermanStemmer();
|
||||
//
|
||||
// for (String key: wordList.keySet()) {
|
||||
// String stemmedWord = stemmer.stemWord(key);
|
||||
// if (stemmedWord != null) {
|
||||
// wordCounts.merge(stemmedWord, 1, Integer::sum);
|
||||
// }
|
||||
// }
|
||||
// return wordCounts;
|
||||
// }
|
||||
}
|
||||
|
|
|
@ -4,8 +4,8 @@ import java.io.*;
|
|||
import java.util.Map;
|
||||
|
||||
public class WordCloudCreator {
|
||||
private int maxFontSize = 70;
|
||||
|
||||
//AI Method but mit massive changes
|
||||
public boolean insertWordsIntoTemplate(Map<String, Integer> wordMap) {
|
||||
File templateFile = new File("wordcloud.html"); // Template in project directory
|
||||
File outputFile = new File("createdHTML.html"); // Output in project directory
|
||||
|
@ -23,14 +23,14 @@ public class WordCloudCreator {
|
|||
while ((line = reader.readLine()) != null) {
|
||||
htmlContent.append(line).append("\n");
|
||||
}
|
||||
|
||||
// Generate clickable word entries with font size based on frequency
|
||||
//AI generated lines below
|
||||
// Generated clickable word entries with font size based on frequency
|
||||
StringBuilder wordEntries = new StringBuilder();
|
||||
int id = 1;
|
||||
for (Map.Entry<String, Integer> entry : wordMap.entrySet()) {
|
||||
String word = entry.getKey();
|
||||
int frequency = entry.getValue();
|
||||
int fontSize = (int) ((float) 12 + frequency * 1.5); // Example: Base size 10px, increase by 2px per frequency
|
||||
int fontSize = Math.min(10 + frequency * 2, maxFontSize); // Example: Base size 10px, increase by 2px per frequency
|
||||
wordEntries.append(String.format(
|
||||
"<span id=\"%d\" class=\"wrd\" style=\"font-size:%dpx;\">" +
|
||||
"<a href=\"https://www.google.com/search?q=%s\" target=\"_blank\">%s</a>" +
|
||||
|
@ -50,10 +50,11 @@ public class WordCloudCreator {
|
|||
// Write the updated HTML to the output file
|
||||
writer.write(updatedHtml);
|
||||
System.out.println("Output file 'output.html' created successfully!");
|
||||
return true;
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Error processing HTML template", e);
|
||||
}
|
||||
return true;
|
||||
|
||||
}
|
||||
}
|
|
@ -20,12 +20,8 @@ public class WordCloudManager {
|
|||
public boolean loadFile() {
|
||||
fl.loadFileGUI();
|
||||
if (fl.getFilePath().length() > 0) {
|
||||
System.out.println(fl.getFilePath());
|
||||
System.out.println(fl.getStopwordsPath());
|
||||
return true;
|
||||
} else {
|
||||
System.out.println(fl.getFilePath());
|
||||
System.out.println(fl.getStopwordsPath());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,7 +51,7 @@ public class TUI {
|
|||
public void fileMenu() {
|
||||
while(fMenu) {
|
||||
System.out.println("(0) Load Stopwords\n(1) Add to Stopwords\n(2) Set Max Words in HTML\n" +
|
||||
"(3) Stemming? (only German available)\n(4) Create WordCloud\n(5) Exit FileMenu");
|
||||
"(3) Stemming? (only German available)\n(4) Create WordCloud and Exit");
|
||||
option = Integer.parseInt(scan.nextLine());
|
||||
switch(option) {
|
||||
case (0):
|
||||
|
@ -88,19 +88,15 @@ public class TUI {
|
|||
}
|
||||
break;
|
||||
case(4):
|
||||
//Create WordCloud
|
||||
//Create WordCloud and exit program
|
||||
if(wcm.createWordCloud()) {
|
||||
System.out.println("HTML File created!");
|
||||
fMenu = false;
|
||||
}
|
||||
else {
|
||||
System.out.println("HTML FIle not created!");
|
||||
}
|
||||
break;
|
||||
case(5):
|
||||
//Exit Filemenu
|
||||
System.out.println("HTML File created!\n");
|
||||
fMenu = false;
|
||||
System.out.println("Close Program!");
|
||||
}
|
||||
else {
|
||||
System.out.println("HTML FIle not created!\n");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
import domain.FileLoader;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import java.io.File;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class FileLoaderTest {
|
||||
|
||||
@Test
|
||||
void testGetFileFormat_knownFormats() {
|
||||
FileLoader loader = new FileLoader();
|
||||
assertEquals("txt", loader.getFileFormat(new File("test.txt")));
|
||||
assertEquals("pdf", loader.getFileFormat(new File("test.pdf")));
|
||||
assertEquals("docx", loader.getFileFormat(new File("test.docx")));
|
||||
assertEquals("pptx", loader.getFileFormat(new File("test.pptx")));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testGetFileFormat_unknownFormat() {
|
||||
FileLoader loader = new FileLoader();
|
||||
assertEquals("File format not supported", loader.getFileFormat(new File("test.xyz")));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
import domain.TextProcessing;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class TextProcessingTest {
|
||||
|
||||
@Test
|
||||
void testAddToStopWords() {
|
||||
TextProcessing tp = new TextProcessing();
|
||||
tp.addToStopWords("der");
|
||||
assertTrue(tp.tokenizingFile("Der Hund ist groß.").containsKey("hund"));
|
||||
assertFalse(tp.tokenizingFile("Der Hund ist groß.").containsKey("der"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testSortList() {
|
||||
TextProcessing tp = new TextProcessing();
|
||||
Map<String, Integer> input = Map.of("a", 1, "b", 3, "c", 2);
|
||||
List<Integer> values = new ArrayList<>(tp.sortList(input).values());
|
||||
assertEquals(List.of(3, 2, 1), values);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testMaxShowWords() {
|
||||
TextProcessing tp = new TextProcessing();
|
||||
Map<String, Integer> input = new LinkedHashMap<>();
|
||||
input.put("eins", 1);
|
||||
input.put("zwei", 2);
|
||||
input.put("drei", 3);
|
||||
|
||||
Map<String, Integer> result = tp.maxShowWords(input, 2);
|
||||
assertEquals(2, result.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testAddStopWordsIndividuallyAndAsList() {
|
||||
TextProcessing tp = new TextProcessing();
|
||||
|
||||
tp.addToStopWords("der");
|
||||
tp.addToStopWords("die");
|
||||
tp.addToStopWords("das");
|
||||
|
||||
assertTrue(tp.tokenizingFile("Der Hund ist groß.").containsKey("hund"));
|
||||
assertFalse(tp.tokenizingFile("Der Hund ist groß.").containsKey("der"));
|
||||
assertFalse(tp.tokenizingFile("Die Katze ist klein.").containsKey("die"));
|
||||
|
||||
Map<String, Integer> stopwordMap = Map.of("und", 1, "oder", 1, "aber", 1);
|
||||
tp.textToSetStopwords(stopwordMap);
|
||||
|
||||
assertFalse(tp.tokenizingFile("Und der Hund ist groß.").containsKey("und"));
|
||||
assertFalse(tp.tokenizingFile("Oder die Katze ist klein.").containsKey("oder"));
|
||||
assertFalse(tp.tokenizingFile("Aber das Haus ist alt.").containsKey("aber"));
|
||||
}
|
||||
|
||||
// @Test
|
||||
// void testStemming() {
|
||||
// TextProcessing tp = new TextProcessing();
|
||||
// Map<String, Integer> input = new HashMap<>();
|
||||
// input.put("gegangen", 1);
|
||||
// input.put("geht", 1);
|
||||
//
|
||||
// Map<String, Integer> result = tp.stemming(input);
|
||||
// assertFalse(result.isEmpty());
|
||||
// }
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
import domain.WordCloudCreator;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class WordCloudCreatorTest {
|
||||
|
||||
@Test
|
||||
void testInsertWordsIntoTemplate_success() throws IOException {
|
||||
// Prepare a simple HTML template with placeholder
|
||||
File template = new File("wordcloud.html");
|
||||
try (FileWriter fw = new FileWriter(template)) {
|
||||
fw.write("<html><body><!-- TODO: Hier die generierten Tags einsetzen --></body></html>");
|
||||
}
|
||||
|
||||
WordCloudCreator creator = new WordCloudCreator();
|
||||
Map<String, Integer> words = Map.of("Test", 3, "Java", 2);
|
||||
|
||||
assertTrue(creator.insertWordsIntoTemplate(words));
|
||||
|
||||
File output = new File("createdHTML.html");
|
||||
assertTrue(output.exists());
|
||||
assertTrue(output.length() > 0);
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
<html><body><!-- TODO: Hier die generierten Tags einsetzen --></body></html>
|
Loading…
Reference in New Issue