diff --git a/pom.xml b/pom.xml
index 0fb13ab..fb38baa 100644
--- a/pom.xml
+++ b/pom.xml
@@ -34,6 +34,11 @@
log4j-core
2.18.0
+
+ org.apache.lucene
+ lucene-analysis-common
+ 9.9.2
+
diff --git a/src/main/java/domain/FileLoader.java b/src/main/java/domain/FileLoader.java
index 9b0b100..fbde739 100644
--- a/src/main/java/domain/FileLoader.java
+++ b/src/main/java/domain/FileLoader.java
@@ -4,9 +4,6 @@ import javax.swing.*;
import javax.swing.filechooser.FileNameExtensionFilter;
import java.awt.*;
import java.io.File;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
public class FileLoader {
private File inputFile;
diff --git a/src/main/java/domain/TextProcessing.java b/src/main/java/domain/TextProcessing.java
index c6b2e20..133b145 100644
--- a/src/main/java/domain/TextProcessing.java
+++ b/src/main/java/domain/TextProcessing.java
@@ -8,11 +8,36 @@ import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.poi.xslf.usermodel.XSLFShape;
import org.apache.poi.xslf.usermodel.XSLFTextShape;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.de.GermanStemmer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.CharArraySet;
+import java.io.IOException;
+import java.util.*;
import java.io.*;
-import java.util.HashMap;
public class TextProcessing {
+ private boolean stemming;
+ private int maxWords;
+
+ public boolean isStemming() {
+ return stemming;
+ }
+
+ public int getMaxWords() {
+ return maxWords;
+ }
+
+ public void setStemming(boolean stemming) {
+ this.stemming = stemming;
+ }
+
+ public void setMaxWords(int maxWords) {
+ this.maxWords = maxWords;
+ }
public String formatToText(File file, String format) {
try {
@@ -57,9 +82,9 @@ public class TextProcessing {
return "Nothing found!";
}
- public HashMap maxShowWords(int number, HashMap words) {
+ public Map maxShowWords(Map words) {
HashMap cuttedHashmap = new HashMap<>();
- int index = number;
+ int index = maxWords;
for (String word : words.keySet()) {
if(index > 0) {
cuttedHashmap.put(word, words.get(word));
@@ -69,25 +94,74 @@ public class TextProcessing {
return cuttedHashmap;
}
- public HashMap tokenizingText(String text){
- HashMap filteredWords = new HashMap<>();
- try {
- if(!text.isEmpty()) {
- //Tokenizing der Wörter
- String splitter = "[,\\s\\.:/!§$%&/()=?+*~#.;_<\\-–>^°\"']";
- String[] textWords = text.split(splitter);
- for (String word : textWords) {
- if (filteredWords.containsKey(word)) {
- filteredWords.compute(word, (k, counter) -> counter + 1);
- } else {
- filteredWords.put(word, 1);
+ //KI Methode die abgeändert wurde, damit sie in dieses Programm passt
+ public Map tokenizingFile(String text, Set stopwords) {
+ Map words = new HashMap<>();
+
+ if (text == null || text.isBlank()) {
+ return words;
+ }
+ CharArraySet luceneStopwords =
+ stopwords != null ? new CharArraySet(stopwords, true) : CharArraySet.EMPTY_SET;
+
+ try (Analyzer analyzer = new StandardAnalyzer(luceneStopwords)) {
+ TokenStream tokenStream = analyzer.tokenStream(null, text);
+ CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
+
+ tokenStream.reset();
+ while (tokenStream.incrementToken()) {
+ String word = charTermAttribute.toString();
+ if (words.containsKey(word)) {
+ words.compute(word, (k, counter) -> counter + 1);
+ }
+ else {
+ words.put(word, 1);
}
- }
}
+ tokenStream.end();
}
- catch (Exception ex) {
- throw new RuntimeException(ex);
+ catch (IOException e) {
+ throw new RuntimeException(e);
}
- return filteredWords;
+ return words;
}
+
+ public Set textToSetStopwords(Map words) {
+ Set stopwordList = new HashSet<>();
+ for (Map.Entry entry : words.entrySet()) {
+ stopwordList.add(entry.getKey());
+ }
+ return stopwordList;
+ }
+
+ public String fileToTextString(File path, String format) {
+ String text = formatToText(path, format);
+ return text;
+ }
+
+ public Map sortList(Map unsortedMap) {
+ List> entryList = new ArrayList<>(unsortedMap.entrySet());
+
+ entryList.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue())); //Ki erstellte Zeile
+
+ Map sortedMap = new LinkedHashMap<>();
+ for (Map.Entry entry : entryList) {
+ sortedMap.put(entry.getKey(), entry.getValue());
+ }
+
+ return sortedMap;
+ }
+
+// public Map stemming(Map wordList) {
+// Map wordCounts = new HashMap<>();
+// GermanStemmer stemmer = new GermanStemmer();
+//
+// for (String key: wordList.keySet()) {
+// String stemmedWord = stemmer.stemWord(key);
+// if (stemmedWord != null) {
+// wordCounts.merge(stemmedWord, 1, Integer::sum);
+// }
+// }
+// return wordCounts;
+// }
}
diff --git a/src/main/java/facade/WordCloudManager.java b/src/main/java/facade/WordCloudManager.java
index 79976b3..540f7a6 100644
--- a/src/main/java/facade/WordCloudManager.java
+++ b/src/main/java/facade/WordCloudManager.java
@@ -1,30 +1,94 @@
package facade;
-import domain.FileManager;
-import domain.PictureManager;
+import domain.FileLoader;
+import domain.WordCloudCreator;
+import domain.TextProcessing;
import java.io.File;
import java.util.HashMap;
-import java.util.List;
+import java.util.HashSet;
+import java.util.Set;
public class WordCloudManager {
- FileManager fileManager;
- PictureManager pictureManager;
+ private FileLoader fileLoader;
+ private TextProcessing processing;
+ private WordCloudCreator creator;
+ private File filePath;
+ private File stopwordsPath;
+ private String fileFormat;
+ private String fileFormathStopwords;
+ private String text;
+ Set stopwordList = new HashSet<>();
+ private HashMap wordMap;
public WordCloudManager() {
- fileManager = new FileManager();
- pictureManager = new PictureManager();
+ fileLoader = new FileLoader();
+ processing = new TextProcessing();
+ creator = new WordCloudCreator();
+ fileFormat = "";
+ fileFormathStopwords = "";
}
public boolean loadFileGUI() {
-
- String fileText = fileManager.loadFile();
- HashMap wordMap = fileManager.tokenizingText(fileText);
- if(wordMap != null) {
- return true;
+ if (filePath == null) {
+ filePath = fileLoader.loadFileGUI();
+ fileFormat = fileLoader.getFileFormat(filePath);
+ System.out.println("File: " + filePath);
+ System.out.println("File: " + stopwordsPath);
+ } else {
+ stopwordsPath = fileLoader.loadFileGUI();
+ fileFormathStopwords = fileLoader.getFileFormat(stopwordsPath);
+ System.out.println("File: " + filePath);
+ System.out.println("File: " + stopwordsPath);
}
- else {
+ if (filePath.length() > 0) {
+ return true;
+ } else {
return false;
}
}
+
+ public void addToStopWords(String extraStopword) {
+ stopwordList.add(extraStopword);
+ System.out.println(stopwordList);
+ }
+
+ public void setStopWords() {
+ Set stopwords = processing.textToSetStopwords(processing.tokenizingFile(processing.
+ formatToText(stopwordsPath, fileFormathStopwords), null));
+ stopwordList.addAll(stopwords);
+ System.out.println(stopwordList);
+ }
+
+
+ public void stemming(String approval) {
+ if(approval.equals("yes")) {
+ processing.setStemming(true);
+ }
+ }
+
+ public void maxWordsInList(int number) {
+ processing.setMaxWords(number);
+ }
+
+
+ // ab hier noch nicht fertig.
+ public void tokenizingText() {
+ wordMap = (HashMap) processing.tokenizingFile(processing.fileToTextString(filePath, fileFormat)
+ , !stopwordList.isEmpty() ? stopwordList : null);
+ System.out.println(wordMap.keySet() + "\n" + wordMap.values());
+ }
+
+ public void cutWordsList() {
+
+
+ wordMap = (HashMap) processing.maxShowWords(processing.sortList(wordMap));
+ processing.sortList(wordMap);
+ System.out.println(wordMap.keySet() + "\n" + wordMap.values());
+ }
+
+ public void createWordCloud() {
+ creator.insertWordsIntoTemplate(wordMap);
+ }
}
+
diff --git a/src/main/java/tui/Main.java b/src/main/java/tui/Main.java
index d3d662b..45ee85c 100644
--- a/src/main/java/tui/Main.java
+++ b/src/main/java/tui/Main.java
@@ -1,10 +1,7 @@
package tui;
-import java.util.Scanner;
-
public class Main {
-
public static void main(String[]args){
- TUI tui = new TUI();
+ new TUI();
}
}
diff --git a/src/main/java/tui/TUI.java b/src/main/java/tui/TUI.java
index a74ab4b..d23d052 100644
--- a/src/main/java/tui/TUI.java
+++ b/src/main/java/tui/TUI.java
@@ -5,45 +5,97 @@ import facade.WordCloudManager;
import java.util.Scanner;
public class TUI {
- boolean isRunning;
+ private boolean isRunning;
+ private Scanner scan;
+ private int option;
+ private WordCloudManager wcm;
+ private boolean fMenu;
public TUI(){
+ wcm = new WordCloudManager();
isRunning = true;
+ fMenu = true;
+ scan = new Scanner(System.in);
tui();
+
}
public void tui() {
- Scanner scan = new Scanner(System.in);
- WordCloudManager wcm = new WordCloudManager();
-// while(isRunning) {
- System.out.println("Welcome to Word Cloud.\nMenu:\n\n(0) Load File from main path\n(1) Load File with Gui" +
- "\n(2) Save File\n(3) Show Picture\n(4) Exit");
- int option = scan.nextInt();
+
+
+ while(isRunning) {
+ System.out.println("Welcome to Word Cloud.\nType number in the following Menu to access your targeted Option.\nMenu:\n\n(0) Load File\n(1) URL Path" +
+ "\n(2) Exit");
+ option = Integer.parseInt(scan.nextLine());
switch (option) {
- case (0):
- //Load File Path
- break;
- case (1):
+ case(0):
//Load File GUI
if(wcm.loadFileGUI()) {
System.out.println("File loaded successful!\n");
+ fileMenu();
} else {
System.out.println("File cannot be loaded!\n");
}
break;
- case (2):
- //Save Picture
+ case(1):
+ //URL Input
break;
- case (3):
- //Show Picture
- break;
- case (4):
+ case(2):
//Exit
isRunning = false;
System.out.println("Close Program!");
break;
}
-// }
+ }
scan.close();
}
+
+ public void fileMenu() {
+ while(fMenu) {
+ System.out.println("(0) Load Stopwords\n(1) Add to Stopwords\n(2) Set Max Words in HTML\n" +
+ "(3) Stemming? (only German available)\n(4) Create WordCloud\n(5) Exit FileMenu");
+ option = Integer.parseInt(scan.nextLine());
+ switch(option) {
+ case (0):
+ // Load stopwords file
+ if(wcm.loadFileGUI()) {
+ System.out.println("File loaded successful!\n");
+ wcm.setStopWords();
+ } else {
+ System.out.println("File cannot be loaded!\n");
+ }
+ break;
+ case(1):
+ // Add more stopwords
+ System.out.println("Type your stopword:\n");
+ String input = scan.nextLine();
+ wcm.addToStopWords(input);
+ break;
+ case(2):
+ // Set number of max words
+ int number = Integer.parseInt(scan.nextLine());
+ wcm.maxWordsInList(number);
+ wcm.cutWordsList();
+ break;
+ case(3):
+ // Set Stemming
+ System.out.println("Set Stemming: Input 'yes' or 'no'");
+ String stemmingOption = scan.nextLine();
+ wcm.stemming(stemmingOption);
+ break;
+ case(4):
+ //Create WordCloud
+ wcm.tokenizingText();
+ wcm.createWordCloud();
+ fMenu = false;
+ System.out.println("HTML File created!");
+ break;
+ case(5):
+ //Exit filemenu
+ fMenu = false;
+ System.out.println("Close Program!");
+ break;
+ }
+ }
+ }
}