diff --git a/pom.xml b/pom.xml index 0fb13ab..fb38baa 100644 --- a/pom.xml +++ b/pom.xml @@ -34,6 +34,11 @@ log4j-core 2.18.0 + + org.apache.lucene + lucene-analysis-common + 9.9.2 + diff --git a/src/main/java/domain/FileLoader.java b/src/main/java/domain/FileLoader.java index 9b0b100..fbde739 100644 --- a/src/main/java/domain/FileLoader.java +++ b/src/main/java/domain/FileLoader.java @@ -4,9 +4,6 @@ import javax.swing.*; import javax.swing.filechooser.FileNameExtensionFilter; import java.awt.*; import java.io.File; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; public class FileLoader { private File inputFile; diff --git a/src/main/java/domain/TextProcessing.java b/src/main/java/domain/TextProcessing.java index c6b2e20..133b145 100644 --- a/src/main/java/domain/TextProcessing.java +++ b/src/main/java/domain/TextProcessing.java @@ -8,11 +8,36 @@ import org.apache.poi.xslf.usermodel.XMLSlideShow; import org.apache.poi.xslf.usermodel.XSLFSlide; import org.apache.poi.xslf.usermodel.XSLFShape; import org.apache.poi.xslf.usermodel.XSLFTextShape; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.de.GermanStemmer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.CharArraySet; +import java.io.IOException; +import java.util.*; import java.io.*; -import java.util.HashMap; public class TextProcessing { + private boolean stemming; + private int maxWords; + + public boolean isStemming() { + return stemming; + } + + public int getMaxWords() { + return maxWords; + } + + public void setStemming(boolean stemming) { + this.stemming = stemming; + } + + public void setMaxWords(int maxWords) { + this.maxWords = maxWords; + } public String formatToText(File file, String format) { try { @@ -57,9 +82,9 @@ public class TextProcessing { return "Nothing found!"; } - public HashMap maxShowWords(int number, HashMap words) { + public Map maxShowWords(Map words) { HashMap cuttedHashmap = new HashMap<>(); - int index = number; + int index = maxWords; for (String word : words.keySet()) { if(index > 0) { cuttedHashmap.put(word, words.get(word)); @@ -69,25 +94,74 @@ public class TextProcessing { return cuttedHashmap; } - public HashMap tokenizingText(String text){ - HashMap filteredWords = new HashMap<>(); - try { - if(!text.isEmpty()) { - //Tokenizing der Wörter - String splitter = "[,\\s\\.:/!§$%&/()=?+*~#.;_<\\-–>^°\"']"; - String[] textWords = text.split(splitter); - for (String word : textWords) { - if (filteredWords.containsKey(word)) { - filteredWords.compute(word, (k, counter) -> counter + 1); - } else { - filteredWords.put(word, 1); + //KI Methode die abgeändert wurde, damit sie in dieses Programm passt + public Map tokenizingFile(String text, Set stopwords) { + Map words = new HashMap<>(); + + if (text == null || text.isBlank()) { + return words; + } + CharArraySet luceneStopwords = + stopwords != null ? new CharArraySet(stopwords, true) : CharArraySet.EMPTY_SET; + + try (Analyzer analyzer = new StandardAnalyzer(luceneStopwords)) { + TokenStream tokenStream = analyzer.tokenStream(null, text); + CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); + + tokenStream.reset(); + while (tokenStream.incrementToken()) { + String word = charTermAttribute.toString(); + if (words.containsKey(word)) { + words.compute(word, (k, counter) -> counter + 1); + } + else { + words.put(word, 1); } - } } + tokenStream.end(); } - catch (Exception ex) { - throw new RuntimeException(ex); + catch (IOException e) { + throw new RuntimeException(e); } - return filteredWords; + return words; } + + public Set textToSetStopwords(Map words) { + Set stopwordList = new HashSet<>(); + for (Map.Entry entry : words.entrySet()) { + stopwordList.add(entry.getKey()); + } + return stopwordList; + } + + public String fileToTextString(File path, String format) { + String text = formatToText(path, format); + return text; + } + + public Map sortList(Map unsortedMap) { + List> entryList = new ArrayList<>(unsortedMap.entrySet()); + + entryList.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue())); //Ki erstellte Zeile + + Map sortedMap = new LinkedHashMap<>(); + for (Map.Entry entry : entryList) { + sortedMap.put(entry.getKey(), entry.getValue()); + } + + return sortedMap; + } + +// public Map stemming(Map wordList) { +// Map wordCounts = new HashMap<>(); +// GermanStemmer stemmer = new GermanStemmer(); +// +// for (String key: wordList.keySet()) { +// String stemmedWord = stemmer.stemWord(key); +// if (stemmedWord != null) { +// wordCounts.merge(stemmedWord, 1, Integer::sum); +// } +// } +// return wordCounts; +// } } diff --git a/src/main/java/facade/WordCloudManager.java b/src/main/java/facade/WordCloudManager.java index 79976b3..540f7a6 100644 --- a/src/main/java/facade/WordCloudManager.java +++ b/src/main/java/facade/WordCloudManager.java @@ -1,30 +1,94 @@ package facade; -import domain.FileManager; -import domain.PictureManager; +import domain.FileLoader; +import domain.WordCloudCreator; +import domain.TextProcessing; import java.io.File; import java.util.HashMap; -import java.util.List; +import java.util.HashSet; +import java.util.Set; public class WordCloudManager { - FileManager fileManager; - PictureManager pictureManager; + private FileLoader fileLoader; + private TextProcessing processing; + private WordCloudCreator creator; + private File filePath; + private File stopwordsPath; + private String fileFormat; + private String fileFormathStopwords; + private String text; + Set stopwordList = new HashSet<>(); + private HashMap wordMap; public WordCloudManager() { - fileManager = new FileManager(); - pictureManager = new PictureManager(); + fileLoader = new FileLoader(); + processing = new TextProcessing(); + creator = new WordCloudCreator(); + fileFormat = ""; + fileFormathStopwords = ""; } public boolean loadFileGUI() { - - String fileText = fileManager.loadFile(); - HashMap wordMap = fileManager.tokenizingText(fileText); - if(wordMap != null) { - return true; + if (filePath == null) { + filePath = fileLoader.loadFileGUI(); + fileFormat = fileLoader.getFileFormat(filePath); + System.out.println("File: " + filePath); + System.out.println("File: " + stopwordsPath); + } else { + stopwordsPath = fileLoader.loadFileGUI(); + fileFormathStopwords = fileLoader.getFileFormat(stopwordsPath); + System.out.println("File: " + filePath); + System.out.println("File: " + stopwordsPath); } - else { + if (filePath.length() > 0) { + return true; + } else { return false; } } + + public void addToStopWords(String extraStopword) { + stopwordList.add(extraStopword); + System.out.println(stopwordList); + } + + public void setStopWords() { + Set stopwords = processing.textToSetStopwords(processing.tokenizingFile(processing. + formatToText(stopwordsPath, fileFormathStopwords), null)); + stopwordList.addAll(stopwords); + System.out.println(stopwordList); + } + + + public void stemming(String approval) { + if(approval.equals("yes")) { + processing.setStemming(true); + } + } + + public void maxWordsInList(int number) { + processing.setMaxWords(number); + } + + + // ab hier noch nicht fertig. + public void tokenizingText() { + wordMap = (HashMap) processing.tokenizingFile(processing.fileToTextString(filePath, fileFormat) + , !stopwordList.isEmpty() ? stopwordList : null); + System.out.println(wordMap.keySet() + "\n" + wordMap.values()); + } + + public void cutWordsList() { + + + wordMap = (HashMap) processing.maxShowWords(processing.sortList(wordMap)); + processing.sortList(wordMap); + System.out.println(wordMap.keySet() + "\n" + wordMap.values()); + } + + public void createWordCloud() { + creator.insertWordsIntoTemplate(wordMap); + } } + diff --git a/src/main/java/tui/Main.java b/src/main/java/tui/Main.java index d3d662b..45ee85c 100644 --- a/src/main/java/tui/Main.java +++ b/src/main/java/tui/Main.java @@ -1,10 +1,7 @@ package tui; -import java.util.Scanner; - public class Main { - public static void main(String[]args){ - TUI tui = new TUI(); + new TUI(); } } diff --git a/src/main/java/tui/TUI.java b/src/main/java/tui/TUI.java index a74ab4b..d23d052 100644 --- a/src/main/java/tui/TUI.java +++ b/src/main/java/tui/TUI.java @@ -5,45 +5,97 @@ import facade.WordCloudManager; import java.util.Scanner; public class TUI { - boolean isRunning; + private boolean isRunning; + private Scanner scan; + private int option; + private WordCloudManager wcm; + private boolean fMenu; public TUI(){ + wcm = new WordCloudManager(); isRunning = true; + fMenu = true; + scan = new Scanner(System.in); tui(); + } public void tui() { - Scanner scan = new Scanner(System.in); - WordCloudManager wcm = new WordCloudManager(); -// while(isRunning) { - System.out.println("Welcome to Word Cloud.\nMenu:\n\n(0) Load File from main path\n(1) Load File with Gui" + - "\n(2) Save File\n(3) Show Picture\n(4) Exit"); - int option = scan.nextInt(); + + + while(isRunning) { + System.out.println("Welcome to Word Cloud.\nType number in the following Menu to access your targeted Option.\nMenu:\n\n(0) Load File\n(1) URL Path" + + "\n(2) Exit"); + option = Integer.parseInt(scan.nextLine()); switch (option) { - case (0): - //Load File Path - break; - case (1): + case(0): //Load File GUI if(wcm.loadFileGUI()) { System.out.println("File loaded successful!\n"); + fileMenu(); } else { System.out.println("File cannot be loaded!\n"); } break; - case (2): - //Save Picture + case(1): + //URL Input break; - case (3): - //Show Picture - break; - case (4): + case(2): //Exit isRunning = false; System.out.println("Close Program!"); break; } -// } + } scan.close(); } + + public void fileMenu() { + while(fMenu) { + System.out.println("(0) Load Stopwords\n(1) Add to Stopwords\n(2) Set Max Words in HTML\n" + + "(3) Stemming? (only German available)\n(4) Create WordCloud\n(5) Exit FileMenu"); + option = Integer.parseInt(scan.nextLine()); + switch(option) { + case (0): + // Load stopwords file + if(wcm.loadFileGUI()) { + System.out.println("File loaded successful!\n"); + wcm.setStopWords(); + } else { + System.out.println("File cannot be loaded!\n"); + } + break; + case(1): + // Add more stopwords + System.out.println("Type your stopword:\n"); + String input = scan.nextLine(); + wcm.addToStopWords(input); + break; + case(2): + // Set number of max words + int number = Integer.parseInt(scan.nextLine()); + wcm.maxWordsInList(number); + wcm.cutWordsList(); + break; + case(3): + // Set Stemming + System.out.println("Set Stemming: Input 'yes' or 'no'"); + String stemmingOption = scan.nextLine(); + wcm.stemming(stemmingOption); + break; + case(4): + //Create WordCloud + wcm.tokenizingText(); + wcm.createWordCloud(); + fMenu = false; + System.out.println("HTML File created!"); + break; + case(5): + //Exit filemenu + fMenu = false; + System.out.println("Close Program!"); + break; + } + } + } }