From 19350fc80ce77ab7a5802516e1b1746cd8e26d43 Mon Sep 17 00:00:00 2001 From: Daniel Fromm <3015351@stud.hs-mannheim.de> Date: Tue, 13 May 2025 00:47:11 +0200 Subject: [PATCH] some refactoring and implement stemming --- src/main/java/domain/TextProcessing.java | 7 +++---- src/main/java/domain/WordCloudCreator.java | 4 ---- src/main/java/tui/TUI.java | 9 ++++++++- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/main/java/domain/TextProcessing.java b/src/main/java/domain/TextProcessing.java index 3b7c695..4f6c788 100644 --- a/src/main/java/domain/TextProcessing.java +++ b/src/main/java/domain/TextProcessing.java @@ -12,7 +12,6 @@ import org.apache.poi.xslf.usermodel.XSLFTextShape; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.de.GermanStemmer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.CharArraySet; @@ -26,8 +25,8 @@ public class TextProcessing { private Set stopwordList = new HashSet<>(); public String formatToText(File file, String format) { + StringBuilder text = new StringBuilder(); try { - StringBuilder text = new StringBuilder(); if (file != null) { switch (format) { case "txt": @@ -65,7 +64,7 @@ public class TextProcessing { catch (IOException e) { throw new RuntimeException(e); } - return "Nothing found!"; + return text.toString(); } public void textToSetStopwords(Map words) { @@ -113,7 +112,7 @@ public class TextProcessing { CharArraySet luceneStopwords = stopwordList != null ? new CharArraySet(stopwordList, true) : CharArraySet.EMPTY_SET; - try (Analyzer analyzer = new StandardAnalyzer(luceneStopwords)) { + try (Analyzer analyzer = stemming ? new GermanAnalyzer(luceneStopwords) : new StandardAnalyzer(luceneStopwords)) { TokenStream tokenStream = analyzer.tokenStream(null, text); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); diff --git a/src/main/java/domain/WordCloudCreator.java b/src/main/java/domain/WordCloudCreator.java index 8c1da73..8929e8d 100644 --- a/src/main/java/domain/WordCloudCreator.java +++ b/src/main/java/domain/WordCloudCreator.java @@ -10,10 +10,6 @@ public class WordCloudCreator { File templateFile = new File("wordcloud.html"); // Template in project directory File outputFile = new File("createdHTML.html"); // Output in project directory - if (!templateFile.exists()) { - throw new RuntimeException("File not found!"); - } - try (BufferedReader reader = new BufferedReader(new FileReader(templateFile)); BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) { diff --git a/src/main/java/tui/TUI.java b/src/main/java/tui/TUI.java index 08bd504..8d0b176 100644 --- a/src/main/java/tui/TUI.java +++ b/src/main/java/tui/TUI.java @@ -54,6 +54,8 @@ public class TUI { scan.close(); System.out.println("Close Program!"); break; + default: + System.out.println("Unknown input!"); } } scan.close(); @@ -93,11 +95,14 @@ public class TUI { break; case(3): // Set Stemming - System.out.println("Stemming: Input 'yes' or 'no'? "); + System.out.println("Activate stemming? Type 'yes' or 'no'?\nOnly German stemming!"); String stemmingOption = scan.nextLine(); if(stemmingOption.equals("yes")) { wcm.setStemming(true); } + else { + System.out.println("Unknown Input!"); + } break; case(4): //Create WordCloud and exit program @@ -110,6 +115,8 @@ public class TUI { System.out.println("HTML FIle not created!\n"); } break; + default: + System.out.println("Unknown input!"); } } }