some refactoring and implement stemming
parent
8a74b2a7b5
commit
19350fc80c
|
@ -12,7 +12,6 @@ import org.apache.poi.xslf.usermodel.XSLFTextShape;
|
|||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.de.GermanStemmer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
|
||||
|
@ -26,8 +25,8 @@ public class TextProcessing {
|
|||
private Set<String> stopwordList = new HashSet<>();
|
||||
|
||||
public String formatToText(File file, String format) {
|
||||
try {
|
||||
StringBuilder text = new StringBuilder();
|
||||
try {
|
||||
if (file != null) {
|
||||
switch (format) {
|
||||
case "txt":
|
||||
|
@ -65,7 +64,7 @@ public class TextProcessing {
|
|||
catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return "Nothing found!";
|
||||
return text.toString();
|
||||
}
|
||||
|
||||
public void textToSetStopwords(Map<String, Integer> words) {
|
||||
|
@ -113,7 +112,7 @@ public class TextProcessing {
|
|||
CharArraySet luceneStopwords = stopwordList != null ? new CharArraySet(stopwordList,
|
||||
true) : CharArraySet.EMPTY_SET;
|
||||
|
||||
try (Analyzer analyzer = new StandardAnalyzer(luceneStopwords)) {
|
||||
try (Analyzer analyzer = stemming ? new GermanAnalyzer(luceneStopwords) : new StandardAnalyzer(luceneStopwords)) {
|
||||
TokenStream tokenStream = analyzer.tokenStream(null, text);
|
||||
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
|
||||
|
||||
|
|
|
@ -10,10 +10,6 @@ public class WordCloudCreator {
|
|||
File templateFile = new File("wordcloud.html"); // Template in project directory
|
||||
File outputFile = new File("createdHTML.html"); // Output in project directory
|
||||
|
||||
if (!templateFile.exists()) {
|
||||
throw new RuntimeException("File not found!");
|
||||
}
|
||||
|
||||
try (BufferedReader reader = new BufferedReader(new FileReader(templateFile));
|
||||
BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) {
|
||||
|
||||
|
|
|
@ -54,6 +54,8 @@ public class TUI {
|
|||
scan.close();
|
||||
System.out.println("Close Program!");
|
||||
break;
|
||||
default:
|
||||
System.out.println("Unknown input!");
|
||||
}
|
||||
}
|
||||
scan.close();
|
||||
|
@ -93,11 +95,14 @@ public class TUI {
|
|||
break;
|
||||
case(3):
|
||||
// Set Stemming
|
||||
System.out.println("Stemming: Input 'yes' or 'no'? ");
|
||||
System.out.println("Activate stemming? Type 'yes' or 'no'?\nOnly German stemming!");
|
||||
String stemmingOption = scan.nextLine();
|
||||
if(stemmingOption.equals("yes")) {
|
||||
wcm.setStemming(true);
|
||||
}
|
||||
else {
|
||||
System.out.println("Unknown Input!");
|
||||
}
|
||||
break;
|
||||
case(4):
|
||||
//Create WordCloud and exit program
|
||||
|
@ -110,6 +115,8 @@ public class TUI {
|
|||
System.out.println("HTML FIle not created!\n");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
System.out.println("Unknown input!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue