some refactoring and implement stemming

main
Daniel Fromm 2025-05-13 00:47:11 +02:00
parent 8a74b2a7b5
commit 19350fc80c
3 changed files with 11 additions and 9 deletions

View File

@ -12,7 +12,6 @@ import org.apache.poi.xslf.usermodel.XSLFTextShape;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.de.GermanStemmer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
@ -26,8 +25,8 @@ public class TextProcessing {
private Set<String> stopwordList = new HashSet<>(); private Set<String> stopwordList = new HashSet<>();
public String formatToText(File file, String format) { public String formatToText(File file, String format) {
StringBuilder text = new StringBuilder();
try { try {
StringBuilder text = new StringBuilder();
if (file != null) { if (file != null) {
switch (format) { switch (format) {
case "txt": case "txt":
@ -65,7 +64,7 @@ public class TextProcessing {
catch (IOException e) { catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
return "Nothing found!"; return text.toString();
} }
public void textToSetStopwords(Map<String, Integer> words) { public void textToSetStopwords(Map<String, Integer> words) {
@ -113,7 +112,7 @@ public class TextProcessing {
CharArraySet luceneStopwords = stopwordList != null ? new CharArraySet(stopwordList, CharArraySet luceneStopwords = stopwordList != null ? new CharArraySet(stopwordList,
true) : CharArraySet.EMPTY_SET; true) : CharArraySet.EMPTY_SET;
try (Analyzer analyzer = new StandardAnalyzer(luceneStopwords)) { try (Analyzer analyzer = stemming ? new GermanAnalyzer(luceneStopwords) : new StandardAnalyzer(luceneStopwords)) {
TokenStream tokenStream = analyzer.tokenStream(null, text); TokenStream tokenStream = analyzer.tokenStream(null, text);
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);

View File

@ -10,10 +10,6 @@ public class WordCloudCreator {
File templateFile = new File("wordcloud.html"); // Template in project directory File templateFile = new File("wordcloud.html"); // Template in project directory
File outputFile = new File("createdHTML.html"); // Output in project directory File outputFile = new File("createdHTML.html"); // Output in project directory
if (!templateFile.exists()) {
throw new RuntimeException("File not found!");
}
try (BufferedReader reader = new BufferedReader(new FileReader(templateFile)); try (BufferedReader reader = new BufferedReader(new FileReader(templateFile));
BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) { BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) {

View File

@ -54,6 +54,8 @@ public class TUI {
scan.close(); scan.close();
System.out.println("Close Program!"); System.out.println("Close Program!");
break; break;
default:
System.out.println("Unknown input!");
} }
} }
scan.close(); scan.close();
@ -93,11 +95,14 @@ public class TUI {
break; break;
case(3): case(3):
// Set Stemming // Set Stemming
System.out.println("Stemming: Input 'yes' or 'no'? "); System.out.println("Activate stemming? Type 'yes' or 'no'?\nOnly German stemming!");
String stemmingOption = scan.nextLine(); String stemmingOption = scan.nextLine();
if(stemmingOption.equals("yes")) { if(stemmingOption.equals("yes")) {
wcm.setStemming(true); wcm.setStemming(true);
} }
else {
System.out.println("Unknown Input!");
}
break; break;
case(4): case(4):
//Create WordCloud and exit program //Create WordCloud and exit program
@ -110,6 +115,8 @@ public class TUI {
System.out.println("HTML FIle not created!\n"); System.out.println("HTML FIle not created!\n");
} }
break; break;
default:
System.out.println("Unknown input!");
} }
} }
} }