refactored code
parent
40e0988a11
commit
4535b50da6
|
@ -6,13 +6,19 @@ import java.awt.*;
|
|||
import java.io.File;
|
||||
|
||||
public class FileLoader {
|
||||
private File inputFile;
|
||||
private File filePath;
|
||||
private File stopwordsPath;
|
||||
private String fileFormat;
|
||||
private String stopwordFormat;
|
||||
|
||||
public FileLoader() {
|
||||
this.inputFile = null;
|
||||
this.filePath = null;
|
||||
this.stopwordsPath = null;
|
||||
this.fileFormat = "";
|
||||
this.stopwordFormat = "";
|
||||
}
|
||||
//KI erstellte Methode mit anpassungen
|
||||
public File loadFileGUI() {
|
||||
//AI Method with swing FileChooser with changes to run in this program
|
||||
public void loadFileGUI() {
|
||||
try {
|
||||
JFileChooser fileChooser = new JFileChooser();
|
||||
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PDF Files", "pdf"));
|
||||
|
@ -22,17 +28,21 @@ public class FileLoader {
|
|||
int result = fileChooser.showOpenDialog(null);
|
||||
|
||||
if (result == JFileChooser.APPROVE_OPTION) {
|
||||
inputFile = fileChooser.getSelectedFile();
|
||||
if (filePath == null) {
|
||||
filePath = fileChooser.getSelectedFile();
|
||||
fileFormat = getFileFormat(filePath);
|
||||
} else {
|
||||
stopwordsPath = fileChooser.getSelectedFile();
|
||||
stopwordFormat = getFileFormat(stopwordsPath);
|
||||
}
|
||||
}
|
||||
return inputFile;
|
||||
} catch (HeadlessException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
//Methode sucht das Datei Format für spätere Verwendung
|
||||
public String getFileFormat(File file) {
|
||||
String fileName = file.getName();
|
||||
public String getFileFormat(File path) {
|
||||
String fileName = path.getName();
|
||||
String fileFormat = fileName.contains(".") ? fileName.substring(fileName.lastIndexOf(".") + 1) : "";
|
||||
|
||||
switch (fileFormat.toLowerCase()) {
|
||||
|
@ -48,4 +58,19 @@ public class FileLoader {
|
|||
return "File format not supported";
|
||||
}
|
||||
}
|
||||
|
||||
public File getFilePath() {
|
||||
return filePath;
|
||||
}
|
||||
|
||||
public File getStopwordsPath() {
|
||||
return stopwordsPath;
|
||||
}
|
||||
|
||||
public String getFileFormat() {
|
||||
return fileFormat;
|
||||
}
|
||||
public String getStopwordFormat() {
|
||||
return stopwordFormat;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,25 +20,9 @@ import java.util.*;
|
|||
import java.io.*;
|
||||
|
||||
public class TextProcessing {
|
||||
private boolean stemming;
|
||||
private int maxWords;
|
||||
|
||||
// für spätere verwendung mit umfangreichen anpassungen
|
||||
// public boolean isStemming() {
|
||||
// return stemming;
|
||||
// }
|
||||
//
|
||||
// public int getMaxWords() {
|
||||
// return maxWords;
|
||||
// }
|
||||
//
|
||||
// public void setStemming(boolean stemming) {
|
||||
// this.stemming = stemming;
|
||||
// }
|
||||
//
|
||||
public void setMaxWords(int maxWords) {
|
||||
this.maxWords = maxWords;
|
||||
}
|
||||
private boolean stemming = false;
|
||||
private int maxWords = 0;
|
||||
private Set<String> stopwordList = new HashSet<>();
|
||||
|
||||
public String formatToText(File file, String format) {
|
||||
try {
|
||||
|
@ -83,7 +67,24 @@ public class TextProcessing {
|
|||
return "Nothing found!";
|
||||
}
|
||||
|
||||
public Map<String, Integer> maxShowWords(Map<String, Integer> words) {
|
||||
// public String fileToTextString(File path, String format) {
|
||||
// String text = formatToText(path, format);
|
||||
// return text;
|
||||
// }
|
||||
|
||||
public void textToSetStopwords(Map<String, Integer> words) {
|
||||
Set<String> stopwords = new HashSet<>();
|
||||
for (Map.Entry<String, Integer> entry : words.entrySet()) {
|
||||
stopwords.add(entry.getKey());
|
||||
}
|
||||
stopwordList.addAll(stopwords);
|
||||
}
|
||||
|
||||
public void addToStopWords(String stopword) {
|
||||
stopwordList.add(stopword);
|
||||
}
|
||||
|
||||
public Map<String, Integer> maxShowWords(Map<String, Integer> words, int maxWords) {
|
||||
HashMap <String, Integer> cuttedHashmap = new HashMap<>();
|
||||
int index = maxWords;
|
||||
for (String word : words.keySet()) {
|
||||
|
@ -95,15 +96,26 @@ public class TextProcessing {
|
|||
return cuttedHashmap;
|
||||
}
|
||||
|
||||
//KI Methode die abgeändert wurde, damit sie in dieses Programm passt
|
||||
public Map<String, Integer> tokenizingFile(String text, Set<String> stopwords) {
|
||||
public Map<String, Integer> sortList(Map<String, Integer> unsortedMap) {
|
||||
List<Map.Entry<String, Integer>> entryList = new ArrayList<>(unsortedMap.entrySet());
|
||||
|
||||
entryList.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue())); //Ki erstellte Zeile
|
||||
|
||||
Map<String, Integer> sortedMap = new TreeMap<>();
|
||||
for (Map.Entry<String, Integer> entry : entryList) {
|
||||
sortedMap.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
return sortedMap;
|
||||
}
|
||||
|
||||
public Map<String, Integer> tokenizingFile(String text) {
|
||||
Map<String, Integer> words = new HashMap<>();
|
||||
|
||||
if (text == null || text.isBlank()) {
|
||||
return words;
|
||||
}
|
||||
CharArraySet luceneStopwords =
|
||||
stopwords != null ? new CharArraySet(stopwords, true) : CharArraySet.EMPTY_SET;
|
||||
stopwordList != null ? new CharArraySet(stopwordList, true) : CharArraySet.EMPTY_SET;
|
||||
|
||||
try (Analyzer analyzer = new StandardAnalyzer(luceneStopwords)) {
|
||||
TokenStream tokenStream = analyzer.tokenStream(null, text);
|
||||
|
@ -114,12 +126,16 @@ public class TextProcessing {
|
|||
String word = charTermAttribute.toString();
|
||||
if (words.containsKey(word)) {
|
||||
words.compute(word, (k, counter) -> counter + 1);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
words.put(word, 1);
|
||||
}
|
||||
}
|
||||
tokenStream.end();
|
||||
if (maxWords > 0) {
|
||||
Map<String, Integer> sortedWords;
|
||||
sortedWords = maxShowWords(sortList(words), maxWords);
|
||||
return sortedWords;
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
|
@ -127,31 +143,15 @@ public class TextProcessing {
|
|||
return words;
|
||||
}
|
||||
|
||||
public Set<String> textToSetStopwords(Map<String, Integer> words) {
|
||||
Set<String> stopwordList = new HashSet<>();
|
||||
for (Map.Entry<String, Integer> entry : words.entrySet()) {
|
||||
stopwordList.add(entry.getKey());
|
||||
}
|
||||
return stopwordList;
|
||||
public void setStemming(boolean stemming) {
|
||||
this.stemming = stemming;
|
||||
}
|
||||
|
||||
public String fileToTextString(File path, String format) {
|
||||
String text = formatToText(path, format);
|
||||
return text;
|
||||
public void setMaxWords(int maxWords) {
|
||||
this.maxWords = maxWords;
|
||||
}
|
||||
|
||||
public Map<String, Integer> sortList(Map<String, Integer> unsortedMap) {
|
||||
List<Map.Entry<String, Integer>> entryList = new ArrayList<>(unsortedMap.entrySet());
|
||||
|
||||
entryList.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue())); //Ki erstellte Zeile
|
||||
|
||||
Map<String, Integer> sortedMap = new LinkedHashMap<>();
|
||||
for (Map.Entry<String, Integer> entry : entryList) {
|
||||
sortedMap.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
|
||||
return sortedMap;
|
||||
}
|
||||
|
||||
// public Map<String, Integer> stemming(Map<String, Integer> wordList) {
|
||||
// Map<String, Integer> wordCounts = new HashMap<>();
|
||||
|
|
|
@ -5,13 +5,13 @@ import java.util.Map;
|
|||
|
||||
public class WordCloudCreator {
|
||||
|
||||
//Ki erstellte Methode wegen Zeitgründen und Krankheitsgründen, dennoch anpassungen in großen Maße waren notwendig
|
||||
public void insertWordsIntoTemplate(Map<String, Integer> wordMap) {
|
||||
//AI Method but mit massive changes
|
||||
public boolean insertWordsIntoTemplate(Map<String, Integer> wordMap) {
|
||||
File templateFile = new File("wordcloud.html"); // Template in project directory
|
||||
File outputFile = new File("output.html"); // Output in project directory
|
||||
File outputFile = new File("createdHTML.html"); // Output in project directory
|
||||
|
||||
if (!templateFile.exists()) {
|
||||
throw new RuntimeException("Template file 'wordcloud.html' not found in project directory.");
|
||||
throw new RuntimeException("File not found!");
|
||||
}
|
||||
|
||||
try (BufferedReader reader = new BufferedReader(new FileReader(templateFile));
|
||||
|
@ -20,7 +20,6 @@ public class WordCloudCreator {
|
|||
StringBuilder htmlContent = new StringBuilder();
|
||||
String line;
|
||||
|
||||
// Read the HTML template
|
||||
while ((line = reader.readLine()) != null) {
|
||||
htmlContent.append(line).append("\n");
|
||||
}
|
||||
|
@ -55,5 +54,6 @@ public class WordCloudCreator {
|
|||
} catch (IOException e) {
|
||||
throw new RuntimeException("Error processing HTML template", e);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -4,87 +4,67 @@ import domain.FileLoader;
|
|||
import domain.WordCloudCreator;
|
||||
import domain.TextProcessing;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
|
||||
public class WordCloudManager {
|
||||
private FileLoader fileLoader;
|
||||
private TextProcessing processing;
|
||||
private WordCloudCreator creator;
|
||||
private File filePath;
|
||||
private File stopwordsPath;
|
||||
private String fileFormat;
|
||||
private String fileFormathStopwords;
|
||||
private String text;
|
||||
Set<String> stopwordList = new HashSet<>();
|
||||
private HashMap<String, Integer> wordMap;
|
||||
private FileLoader fl;
|
||||
private TextProcessing tp;
|
||||
private WordCloudCreator wcm;
|
||||
|
||||
public WordCloudManager() {
|
||||
fileLoader = new FileLoader();
|
||||
processing = new TextProcessing();
|
||||
creator = new WordCloudCreator();
|
||||
fileFormat = "";
|
||||
fileFormathStopwords = "";
|
||||
fl = new FileLoader();
|
||||
tp = new TextProcessing();
|
||||
wcm = new WordCloudCreator();
|
||||
}
|
||||
|
||||
public boolean loadFileGUI() {
|
||||
if (filePath == null) {
|
||||
filePath = fileLoader.loadFileGUI();
|
||||
fileFormat = fileLoader.getFileFormat(filePath);
|
||||
System.out.println("File: " + filePath);
|
||||
System.out.println("File: " + stopwordsPath);
|
||||
} else {
|
||||
stopwordsPath = fileLoader.loadFileGUI();
|
||||
fileFormathStopwords = fileLoader.getFileFormat(stopwordsPath);
|
||||
System.out.println("File: " + filePath);
|
||||
System.out.println("File: " + stopwordsPath);
|
||||
}
|
||||
if (filePath.length() > 0) {
|
||||
public boolean loadFile() {
|
||||
fl.loadFileGUI();
|
||||
if (fl.getFilePath().length() > 0) {
|
||||
System.out.println(fl.getFilePath());
|
||||
System.out.println(fl.getStopwordsPath());
|
||||
return true;
|
||||
} else {
|
||||
System.out.println(fl.getFilePath());
|
||||
System.out.println(fl.getStopwordsPath());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public void addToStopWords(String extraStopword) {
|
||||
stopwordList.add(extraStopword);
|
||||
System.out.println(stopwordList);
|
||||
public boolean loadStopwords() {
|
||||
if(fl.getStopwordsPath() != null) {
|
||||
String stopwordfile = tp.formatToText(fl.getStopwordsPath(), fl.getStopwordFormat());
|
||||
Map<String, Integer> tokinizedWords = tp.tokenizingFile(stopwordfile);
|
||||
tp.textToSetStopwords(tokinizedWords);
|
||||
return true;
|
||||
}
|
||||
|
||||
public void setStopWords() {
|
||||
Set<String> stopwords = processing.textToSetStopwords(processing.tokenizingFile(processing.
|
||||
formatToText(stopwordsPath, fileFormathStopwords), null));
|
||||
stopwordList.addAll(stopwords);
|
||||
System.out.println(stopwordList);
|
||||
}
|
||||
|
||||
// für spätere Verwendung mit umfangreichen Änderungen im Code
|
||||
// public void stemming(String approval) {
|
||||
// if(approval.equals("yes")) {
|
||||
// processing.setStemming(true);
|
||||
// }
|
||||
// }
|
||||
|
||||
public void maxWordsInList(int number) {
|
||||
processing.setMaxWords(number);
|
||||
}
|
||||
|
||||
public void tokenizingText() {
|
||||
wordMap = (HashMap<String, Integer>) processing.tokenizingFile(processing.fileToTextString(filePath, fileFormat)
|
||||
, !stopwordList.isEmpty() ? stopwordList : null);
|
||||
System.out.println(wordMap.keySet() + "\n" + wordMap.values());
|
||||
}
|
||||
|
||||
public void cutWordsList() {
|
||||
wordMap = (HashMap<String, Integer>) processing.maxShowWords(processing.sortList(wordMap));
|
||||
processing.sortList(wordMap);
|
||||
System.out.println(wordMap.keySet() + "\n" + wordMap.values());
|
||||
}
|
||||
|
||||
public void createWordCloud() {
|
||||
creator.insertWordsIntoTemplate(wordMap);
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public void addToStopWords(String stopword) {
|
||||
tp.addToStopWords(stopword);
|
||||
}
|
||||
|
||||
public void setMaxWords(int maxWords) {
|
||||
tp.setMaxWords(maxWords);
|
||||
}
|
||||
|
||||
public void setStemming(boolean stemming) {
|
||||
if(stemming) {
|
||||
tp.setStemming(true);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean createWordCloud() {
|
||||
if(wcm.insertWordsIntoTemplate(tp.tokenizingFile(tp.formatToText(fl.getFilePath(),fl.getFileFormat())))) {
|
||||
fl = null;
|
||||
tp = null;
|
||||
fl = new FileLoader();
|
||||
tp = new TextProcessing();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package tui;
|
||||
|
||||
public class Main {
|
||||
//startet die GUI
|
||||
public static void main(String[]args){
|
||||
new TUI();
|
||||
}
|
||||
|
|
|
@ -14,7 +14,6 @@ public class TUI {
|
|||
public TUI(){
|
||||
wcm = new WordCloudManager();
|
||||
isRunning = true;
|
||||
fMenu = true;
|
||||
scan = new Scanner(System.in);
|
||||
tui();
|
||||
|
||||
|
@ -28,8 +27,9 @@ public class TUI {
|
|||
switch (option) {
|
||||
case(0):
|
||||
//Load File GUI
|
||||
if(wcm.loadFileGUI()) {
|
||||
if(wcm.loadFile()) {
|
||||
System.out.println("File loaded successful!\n");
|
||||
fMenu = true;
|
||||
fileMenu();
|
||||
} else {
|
||||
System.out.println("File cannot be loaded!\n");
|
||||
|
@ -56,40 +56,49 @@ public class TUI {
|
|||
switch(option) {
|
||||
case (0):
|
||||
// Load stopwords file
|
||||
if(wcm.loadFileGUI()) {
|
||||
System.out.println("File loaded successful!\n");
|
||||
wcm.setStopWords();
|
||||
if(wcm.loadFile()) {
|
||||
if(wcm.loadStopwords()) {
|
||||
System.out.println("File loaded successful!");
|
||||
}
|
||||
else{
|
||||
System.out.println("Cannot load one more File!");
|
||||
}
|
||||
} else {
|
||||
System.out.println("File cannot be loaded!\n");
|
||||
System.out.println("File cannot be loaded!");
|
||||
}
|
||||
break;
|
||||
case(1):
|
||||
// Add more stopwords
|
||||
System.out.println("Type your stopword:\n");
|
||||
System.out.println("Type your stopword: ");
|
||||
String input = scan.nextLine();
|
||||
wcm.addToStopWords(input);
|
||||
break;
|
||||
case(2):
|
||||
// Set number of max words
|
||||
System.out.println("How much max words do you want? ");
|
||||
int number = Integer.parseInt(scan.nextLine());
|
||||
wcm.maxWordsInList(number);
|
||||
wcm.cutWordsList();
|
||||
wcm.setMaxWords(number);
|
||||
break;
|
||||
case(3):
|
||||
// Set Stemming
|
||||
System.out.println("Set Stemming: Input 'yes' or 'no'");
|
||||
// String stemmingOption = scan.nextLine();
|
||||
// wcm.stemming(stemmingOption);
|
||||
System.out.println("Stemming: Input 'yes' or 'no'? ");
|
||||
String stemmingOption = scan.nextLine();
|
||||
if(stemmingOption.equals("yes")) {
|
||||
wcm.setStemming(true);
|
||||
}
|
||||
break;
|
||||
case(4):
|
||||
//Create WordCloud
|
||||
wcm.tokenizingText();
|
||||
wcm.createWordCloud();
|
||||
fMenu = false;
|
||||
if(wcm.createWordCloud()) {
|
||||
System.out.println("HTML File created!");
|
||||
fMenu = false;
|
||||
}
|
||||
else {
|
||||
System.out.println("HTML FIle not created!");
|
||||
}
|
||||
break;
|
||||
case(5):
|
||||
//Exit filemenu
|
||||
//Exit Filemenu
|
||||
fMenu = false;
|
||||
System.out.println("Close Program!");
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue