added few methods for processing

pull/6/head
Daniel Fromm 2025-05-11 20:57:06 +02:00
parent d9ae97aea4
commit ffda6973d2
1 changed files with 93 additions and 19 deletions

View File

@ -8,11 +8,36 @@ import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFSlide; import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.poi.xslf.usermodel.XSLFShape; import org.apache.poi.xslf.usermodel.XSLFShape;
import org.apache.poi.xslf.usermodel.XSLFTextShape; import org.apache.poi.xslf.usermodel.XSLFTextShape;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.de.GermanStemmer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.CharArraySet;
import java.io.IOException;
import java.util.*;
import java.io.*; import java.io.*;
import java.util.HashMap;
public class TextProcessing { public class TextProcessing {
private boolean stemming;
private int maxWords;
public boolean isStemming() {
return stemming;
}
public int getMaxWords() {
return maxWords;
}
public void setStemming(boolean stemming) {
this.stemming = stemming;
}
public void setMaxWords(int maxWords) {
this.maxWords = maxWords;
}
public String formatToText(File file, String format) { public String formatToText(File file, String format) {
try { try {
@ -57,9 +82,9 @@ public class TextProcessing {
return "Nothing found!"; return "Nothing found!";
} }
public HashMap maxShowWords(int number, HashMap<String, Integer> words) { public Map<String, Integer> maxShowWords(Map<String, Integer> words) {
HashMap <String, Integer> cuttedHashmap = new HashMap<>(); HashMap <String, Integer> cuttedHashmap = new HashMap<>();
int index = number; int index = maxWords;
for (String word : words.keySet()) { for (String word : words.keySet()) {
if(index > 0) { if(index > 0) {
cuttedHashmap.put(word, words.get(word)); cuttedHashmap.put(word, words.get(word));
@ -69,25 +94,74 @@ public class TextProcessing {
return cuttedHashmap; return cuttedHashmap;
} }
public HashMap tokenizingText(String text){ //KI Methode die abgeändert wurde, damit sie in dieses Programm passt
HashMap<String, Integer> filteredWords = new HashMap<>(); public Map<String, Integer> tokenizingFile(String text, Set<String> stopwords) {
try { Map<String, Integer> words = new HashMap<>();
if(!text.isEmpty()) {
//Tokenizing der Wörter if (text == null || text.isBlank()) {
String splitter = "[,\\s\\.:/!§$%&/()=?+*~#.;_<\\->^°\"']"; return words;
String[] textWords = text.split(splitter); }
for (String word : textWords) { CharArraySet luceneStopwords =
if (filteredWords.containsKey(word)) { stopwords != null ? new CharArraySet(stopwords, true) : CharArraySet.EMPTY_SET;
filteredWords.compute(word, (k, counter) -> counter + 1);
} else { try (Analyzer analyzer = new StandardAnalyzer(luceneStopwords)) {
filteredWords.put(word, 1); TokenStream tokenStream = analyzer.tokenStream(null, text);
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
tokenStream.reset();
while (tokenStream.incrementToken()) {
String word = charTermAttribute.toString();
if (words.containsKey(word)) {
words.compute(word, (k, counter) -> counter + 1);
}
else {
words.put(word, 1);
} }
}
} }
tokenStream.end();
} }
catch (Exception ex) { catch (IOException e) {
throw new RuntimeException(ex); throw new RuntimeException(e);
} }
return filteredWords; return words;
} }
public Set<String> textToSetStopwords(Map<String, Integer> words) {
Set<String> stopwordList = new HashSet<>();
for (Map.Entry<String, Integer> entry : words.entrySet()) {
stopwordList.add(entry.getKey());
}
return stopwordList;
}
public String fileToTextString(File path, String format) {
String text = formatToText(path, format);
return text;
}
public Map<String, Integer> sortList(Map<String, Integer> unsortedMap) {
List<Map.Entry<String, Integer>> entryList = new ArrayList<>(unsortedMap.entrySet());
entryList.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue())); //Ki erstellte Zeile
Map<String, Integer> sortedMap = new LinkedHashMap<>();
for (Map.Entry<String, Integer> entry : entryList) {
sortedMap.put(entry.getKey(), entry.getValue());
}
return sortedMap;
}
// public Map<String, Integer> stemming(Map<String, Integer> wordList) {
// Map<String, Integer> wordCounts = new HashMap<>();
// GermanStemmer stemmer = new GermanStemmer();
//
// for (String key: wordList.keySet()) {
// String stemmedWord = stemmer.stemWord(key);
// if (stemmedWord != null) {
// wordCounts.merge(stemmedWord, 1, Integer::sum);
// }
// }
// return wordCounts;
// }
} }