some refactoring and implement stemming

main
Daniel Fromm 2025-05-13 00:47:11 +02:00
parent 8a74b2a7b5
commit 19350fc80c
3 changed files with 11 additions and 9 deletions

View File

@ -12,7 +12,6 @@ import org.apache.poi.xslf.usermodel.XSLFTextShape;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.de.GermanStemmer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.CharArraySet;
@ -26,8 +25,8 @@ public class TextProcessing {
private Set<String> stopwordList = new HashSet<>();
public String formatToText(File file, String format) {
StringBuilder text = new StringBuilder();
try {
StringBuilder text = new StringBuilder();
if (file != null) {
switch (format) {
case "txt":
@ -65,7 +64,7 @@ public class TextProcessing {
catch (IOException e) {
throw new RuntimeException(e);
}
return "Nothing found!";
return text.toString();
}
public void textToSetStopwords(Map<String, Integer> words) {
@ -113,7 +112,7 @@ public class TextProcessing {
CharArraySet luceneStopwords = stopwordList != null ? new CharArraySet(stopwordList,
true) : CharArraySet.EMPTY_SET;
try (Analyzer analyzer = new StandardAnalyzer(luceneStopwords)) {
try (Analyzer analyzer = stemming ? new GermanAnalyzer(luceneStopwords) : new StandardAnalyzer(luceneStopwords)) {
TokenStream tokenStream = analyzer.tokenStream(null, text);
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);

View File

@ -10,10 +10,6 @@ public class WordCloudCreator {
File templateFile = new File("wordcloud.html"); // Template in project directory
File outputFile = new File("createdHTML.html"); // Output in project directory
if (!templateFile.exists()) {
throw new RuntimeException("File not found!");
}
try (BufferedReader reader = new BufferedReader(new FileReader(templateFile));
BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) {

View File

@ -54,6 +54,8 @@ public class TUI {
scan.close();
System.out.println("Close Program!");
break;
default:
System.out.println("Unknown input!");
}
}
scan.close();
@ -93,11 +95,14 @@ public class TUI {
break;
case(3):
// Set Stemming
System.out.println("Stemming: Input 'yes' or 'no'? ");
System.out.println("Activate stemming? Type 'yes' or 'no'?\nOnly German stemming!");
String stemmingOption = scan.nextLine();
if(stemmingOption.equals("yes")) {
wcm.setStemming(true);
}
else {
System.out.println("Unknown Input!");
}
break;
case(4):
//Create WordCloud and exit program
@ -110,6 +115,8 @@ public class TUI {
System.out.println("HTML FIle not created!\n");
}
break;
default:
System.out.println("Unknown input!");
}
}
}