commit
8d518b8804
5
pom.xml
5
pom.xml
|
@ -34,6 +34,11 @@
|
||||||
<artifactId>log4j-core</artifactId>
|
<artifactId>log4j-core</artifactId>
|
||||||
<version>2.18.0</version>
|
<version>2.18.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.lucene</groupId>
|
||||||
|
<artifactId>lucene-analysis-common</artifactId>
|
||||||
|
<version>9.9.2</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
|
|
|
@ -4,9 +4,6 @@ import javax.swing.*;
|
||||||
import javax.swing.filechooser.FileNameExtensionFilter;
|
import javax.swing.filechooser.FileNameExtensionFilter;
|
||||||
import java.awt.*;
|
import java.awt.*;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class FileLoader {
|
public class FileLoader {
|
||||||
private File inputFile;
|
private File inputFile;
|
||||||
|
|
|
@ -8,11 +8,36 @@ import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||||
import org.apache.poi.xslf.usermodel.XSLFSlide;
|
import org.apache.poi.xslf.usermodel.XSLFSlide;
|
||||||
import org.apache.poi.xslf.usermodel.XSLFShape;
|
import org.apache.poi.xslf.usermodel.XSLFShape;
|
||||||
import org.apache.poi.xslf.usermodel.XSLFTextShape;
|
import org.apache.poi.xslf.usermodel.XSLFTextShape;
|
||||||
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.de.GermanStemmer;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.*;
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
public class TextProcessing {
|
public class TextProcessing {
|
||||||
|
private boolean stemming;
|
||||||
|
private int maxWords;
|
||||||
|
|
||||||
|
public boolean isStemming() {
|
||||||
|
return stemming;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getMaxWords() {
|
||||||
|
return maxWords;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStemming(boolean stemming) {
|
||||||
|
this.stemming = stemming;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMaxWords(int maxWords) {
|
||||||
|
this.maxWords = maxWords;
|
||||||
|
}
|
||||||
|
|
||||||
public String formatToText(File file, String format) {
|
public String formatToText(File file, String format) {
|
||||||
try {
|
try {
|
||||||
|
@ -57,9 +82,9 @@ public class TextProcessing {
|
||||||
return "Nothing found!";
|
return "Nothing found!";
|
||||||
}
|
}
|
||||||
|
|
||||||
public HashMap maxShowWords(int number, HashMap<String, Integer> words) {
|
public Map<String, Integer> maxShowWords(Map<String, Integer> words) {
|
||||||
HashMap <String, Integer> cuttedHashmap = new HashMap<>();
|
HashMap <String, Integer> cuttedHashmap = new HashMap<>();
|
||||||
int index = number;
|
int index = maxWords;
|
||||||
for (String word : words.keySet()) {
|
for (String word : words.keySet()) {
|
||||||
if(index > 0) {
|
if(index > 0) {
|
||||||
cuttedHashmap.put(word, words.get(word));
|
cuttedHashmap.put(word, words.get(word));
|
||||||
|
@ -69,25 +94,74 @@ public class TextProcessing {
|
||||||
return cuttedHashmap;
|
return cuttedHashmap;
|
||||||
}
|
}
|
||||||
|
|
||||||
public HashMap tokenizingText(String text){
|
//KI Methode die abgeändert wurde, damit sie in dieses Programm passt
|
||||||
HashMap<String, Integer> filteredWords = new HashMap<>();
|
public Map<String, Integer> tokenizingFile(String text, Set<String> stopwords) {
|
||||||
try {
|
Map<String, Integer> words = new HashMap<>();
|
||||||
if(!text.isEmpty()) {
|
|
||||||
//Tokenizing der Wörter
|
if (text == null || text.isBlank()) {
|
||||||
String splitter = "[,\\s\\.:/!§$%&/()=?+*~#.;_<\\-–>^°\"']";
|
return words;
|
||||||
String[] textWords = text.split(splitter);
|
}
|
||||||
for (String word : textWords) {
|
CharArraySet luceneStopwords =
|
||||||
if (filteredWords.containsKey(word)) {
|
stopwords != null ? new CharArraySet(stopwords, true) : CharArraySet.EMPTY_SET;
|
||||||
filteredWords.compute(word, (k, counter) -> counter + 1);
|
|
||||||
} else {
|
try (Analyzer analyzer = new StandardAnalyzer(luceneStopwords)) {
|
||||||
filteredWords.put(word, 1);
|
TokenStream tokenStream = analyzer.tokenStream(null, text);
|
||||||
|
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
|
||||||
|
|
||||||
|
tokenStream.reset();
|
||||||
|
while (tokenStream.incrementToken()) {
|
||||||
|
String word = charTermAttribute.toString();
|
||||||
|
if (words.containsKey(word)) {
|
||||||
|
words.compute(word, (k, counter) -> counter + 1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
words.put(word, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
tokenStream.end();
|
||||||
}
|
}
|
||||||
|
catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
catch (Exception ex) {
|
return words;
|
||||||
throw new RuntimeException(ex);
|
|
||||||
}
|
}
|
||||||
return filteredWords;
|
|
||||||
|
public Set<String> textToSetStopwords(Map<String, Integer> words) {
|
||||||
|
Set<String> stopwordList = new HashSet<>();
|
||||||
|
for (Map.Entry<String, Integer> entry : words.entrySet()) {
|
||||||
|
stopwordList.add(entry.getKey());
|
||||||
}
|
}
|
||||||
|
return stopwordList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String fileToTextString(File path, String format) {
|
||||||
|
String text = formatToText(path, format);
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, Integer> sortList(Map<String, Integer> unsortedMap) {
|
||||||
|
List<Map.Entry<String, Integer>> entryList = new ArrayList<>(unsortedMap.entrySet());
|
||||||
|
|
||||||
|
entryList.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue())); //Ki erstellte Zeile
|
||||||
|
|
||||||
|
Map<String, Integer> sortedMap = new LinkedHashMap<>();
|
||||||
|
for (Map.Entry<String, Integer> entry : entryList) {
|
||||||
|
sortedMap.put(entry.getKey(), entry.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
return sortedMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
// public Map<String, Integer> stemming(Map<String, Integer> wordList) {
|
||||||
|
// Map<String, Integer> wordCounts = new HashMap<>();
|
||||||
|
// GermanStemmer stemmer = new GermanStemmer();
|
||||||
|
//
|
||||||
|
// for (String key: wordList.keySet()) {
|
||||||
|
// String stemmedWord = stemmer.stemWord(key);
|
||||||
|
// if (stemmedWord != null) {
|
||||||
|
// wordCounts.merge(stemmedWord, 1, Integer::sum);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// return wordCounts;
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,30 +1,94 @@
|
||||||
package facade;
|
package facade;
|
||||||
|
|
||||||
import domain.FileManager;
|
import domain.FileLoader;
|
||||||
import domain.PictureManager;
|
import domain.WordCloudCreator;
|
||||||
|
import domain.TextProcessing;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
public class WordCloudManager {
|
public class WordCloudManager {
|
||||||
FileManager fileManager;
|
private FileLoader fileLoader;
|
||||||
PictureManager pictureManager;
|
private TextProcessing processing;
|
||||||
|
private WordCloudCreator creator;
|
||||||
|
private File filePath;
|
||||||
|
private File stopwordsPath;
|
||||||
|
private String fileFormat;
|
||||||
|
private String fileFormathStopwords;
|
||||||
|
private String text;
|
||||||
|
Set<String> stopwordList = new HashSet<>();
|
||||||
|
private HashMap<String, Integer> wordMap;
|
||||||
|
|
||||||
public WordCloudManager() {
|
public WordCloudManager() {
|
||||||
fileManager = new FileManager();
|
fileLoader = new FileLoader();
|
||||||
pictureManager = new PictureManager();
|
processing = new TextProcessing();
|
||||||
|
creator = new WordCloudCreator();
|
||||||
|
fileFormat = "";
|
||||||
|
fileFormathStopwords = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean loadFileGUI() {
|
public boolean loadFileGUI() {
|
||||||
|
if (filePath == null) {
|
||||||
String fileText = fileManager.loadFile();
|
filePath = fileLoader.loadFileGUI();
|
||||||
HashMap wordMap = fileManager.tokenizingText(fileText);
|
fileFormat = fileLoader.getFileFormat(filePath);
|
||||||
if(wordMap != null) {
|
System.out.println("File: " + filePath);
|
||||||
return true;
|
System.out.println("File: " + stopwordsPath);
|
||||||
|
} else {
|
||||||
|
stopwordsPath = fileLoader.loadFileGUI();
|
||||||
|
fileFormathStopwords = fileLoader.getFileFormat(stopwordsPath);
|
||||||
|
System.out.println("File: " + filePath);
|
||||||
|
System.out.println("File: " + stopwordsPath);
|
||||||
}
|
}
|
||||||
else {
|
if (filePath.length() > 0) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addToStopWords(String extraStopword) {
|
||||||
|
stopwordList.add(extraStopword);
|
||||||
|
System.out.println(stopwordList);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStopWords() {
|
||||||
|
Set<String> stopwords = processing.textToSetStopwords(processing.tokenizingFile(processing.
|
||||||
|
formatToText(stopwordsPath, fileFormathStopwords), null));
|
||||||
|
stopwordList.addAll(stopwords);
|
||||||
|
System.out.println(stopwordList);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void stemming(String approval) {
|
||||||
|
if(approval.equals("yes")) {
|
||||||
|
processing.setStemming(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void maxWordsInList(int number) {
|
||||||
|
processing.setMaxWords(number);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// ab hier noch nicht fertig.
|
||||||
|
public void tokenizingText() {
|
||||||
|
wordMap = (HashMap<String, Integer>) processing.tokenizingFile(processing.fileToTextString(filePath, fileFormat)
|
||||||
|
, !stopwordList.isEmpty() ? stopwordList : null);
|
||||||
|
System.out.println(wordMap.keySet() + "\n" + wordMap.values());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void cutWordsList() {
|
||||||
|
|
||||||
|
|
||||||
|
wordMap = (HashMap<String, Integer>) processing.maxShowWords(processing.sortList(wordMap));
|
||||||
|
processing.sortList(wordMap);
|
||||||
|
System.out.println(wordMap.keySet() + "\n" + wordMap.values());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void createWordCloud() {
|
||||||
|
creator.insertWordsIntoTemplate(wordMap);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,7 @@
|
||||||
package tui;
|
package tui;
|
||||||
|
|
||||||
import java.util.Scanner;
|
|
||||||
|
|
||||||
public class Main {
|
public class Main {
|
||||||
|
|
||||||
public static void main(String[]args){
|
public static void main(String[]args){
|
||||||
TUI tui = new TUI();
|
new TUI();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,45 +5,97 @@ import facade.WordCloudManager;
|
||||||
import java.util.Scanner;
|
import java.util.Scanner;
|
||||||
|
|
||||||
public class TUI {
|
public class TUI {
|
||||||
boolean isRunning;
|
private boolean isRunning;
|
||||||
|
private Scanner scan;
|
||||||
|
private int option;
|
||||||
|
private WordCloudManager wcm;
|
||||||
|
private boolean fMenu;
|
||||||
|
|
||||||
public TUI(){
|
public TUI(){
|
||||||
|
wcm = new WordCloudManager();
|
||||||
isRunning = true;
|
isRunning = true;
|
||||||
|
fMenu = true;
|
||||||
|
scan = new Scanner(System.in);
|
||||||
tui();
|
tui();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void tui() {
|
public void tui() {
|
||||||
Scanner scan = new Scanner(System.in);
|
|
||||||
WordCloudManager wcm = new WordCloudManager();
|
|
||||||
// while(isRunning) {
|
while(isRunning) {
|
||||||
System.out.println("Welcome to Word Cloud.\nMenu:\n\n(0) Load File from main path\n(1) Load File with Gui" +
|
System.out.println("Welcome to Word Cloud.\nType number in the following Menu to access your targeted Option.\nMenu:\n\n(0) Load File\n(1) URL Path" +
|
||||||
"\n(2) Save File\n(3) Show Picture\n(4) Exit");
|
"\n(2) Exit");
|
||||||
int option = scan.nextInt();
|
option = Integer.parseInt(scan.nextLine());
|
||||||
switch (option) {
|
switch (option) {
|
||||||
case (0):
|
case(0):
|
||||||
//Load File Path
|
|
||||||
break;
|
|
||||||
case (1):
|
|
||||||
//Load File GUI
|
//Load File GUI
|
||||||
if(wcm.loadFileGUI()) {
|
if(wcm.loadFileGUI()) {
|
||||||
System.out.println("File loaded successful!\n");
|
System.out.println("File loaded successful!\n");
|
||||||
|
fileMenu();
|
||||||
} else {
|
} else {
|
||||||
System.out.println("File cannot be loaded!\n");
|
System.out.println("File cannot be loaded!\n");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case (2):
|
case(1):
|
||||||
//Save Picture
|
//URL Input
|
||||||
break;
|
break;
|
||||||
case (3):
|
case(2):
|
||||||
//Show Picture
|
|
||||||
break;
|
|
||||||
case (4):
|
|
||||||
//Exit
|
//Exit
|
||||||
isRunning = false;
|
isRunning = false;
|
||||||
System.out.println("Close Program!");
|
System.out.println("Close Program!");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// }
|
}
|
||||||
scan.close();
|
scan.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void fileMenu() {
|
||||||
|
while(fMenu) {
|
||||||
|
System.out.println("(0) Load Stopwords\n(1) Add to Stopwords\n(2) Set Max Words in HTML\n" +
|
||||||
|
"(3) Stemming? (only German available)\n(4) Create WordCloud\n(5) Exit FileMenu");
|
||||||
|
option = Integer.parseInt(scan.nextLine());
|
||||||
|
switch(option) {
|
||||||
|
case (0):
|
||||||
|
// Load stopwords file
|
||||||
|
if(wcm.loadFileGUI()) {
|
||||||
|
System.out.println("File loaded successful!\n");
|
||||||
|
wcm.setStopWords();
|
||||||
|
} else {
|
||||||
|
System.out.println("File cannot be loaded!\n");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case(1):
|
||||||
|
// Add more stopwords
|
||||||
|
System.out.println("Type your stopword:\n");
|
||||||
|
String input = scan.nextLine();
|
||||||
|
wcm.addToStopWords(input);
|
||||||
|
break;
|
||||||
|
case(2):
|
||||||
|
// Set number of max words
|
||||||
|
int number = Integer.parseInt(scan.nextLine());
|
||||||
|
wcm.maxWordsInList(number);
|
||||||
|
wcm.cutWordsList();
|
||||||
|
break;
|
||||||
|
case(3):
|
||||||
|
// Set Stemming
|
||||||
|
System.out.println("Set Stemming: Input 'yes' or 'no'");
|
||||||
|
String stemmingOption = scan.nextLine();
|
||||||
|
wcm.stemming(stemmingOption);
|
||||||
|
break;
|
||||||
|
case(4):
|
||||||
|
//Create WordCloud
|
||||||
|
wcm.tokenizingText();
|
||||||
|
wcm.createWordCloud();
|
||||||
|
fMenu = false;
|
||||||
|
System.out.println("HTML File created!");
|
||||||
|
break;
|
||||||
|
case(5):
|
||||||
|
//Exit filemenu
|
||||||
|
fMenu = false;
|
||||||
|
System.out.println("Close Program!");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue