some refactoring and cleared a bug with PDFDocument file opening warning

main
Daniel Fromm 2025-05-13 01:16:01 +02:00
parent 19350fc80c
commit 59857d1173
5 changed files with 17 additions and 6 deletions

View File

@ -41,6 +41,7 @@ public class FileLoader {
} }
} }
//detect format from file for further processing
public String getFileFormat(File path) { public String getFileFormat(File path) {
String fileName = path.getName(); String fileName = path.getName();
String fileFormat = fileName.contains(".") ? fileName.substring(fileName.lastIndexOf(".") + 1) : ""; String fileFormat = fileName.contains(".") ? fileName.substring(fileName.lastIndexOf(".") + 1) : "";

View File

@ -24,6 +24,7 @@ public class TextProcessing {
private int maxWords = 0; private int maxWords = 0;
private Set<String> stopwordList = new HashSet<>(); private Set<String> stopwordList = new HashSet<>();
//Extract text from file with supported format
public String formatToText(File file, String format) { public String formatToText(File file, String format) {
StringBuilder text = new StringBuilder(); StringBuilder text = new StringBuilder();
try { try {
@ -38,9 +39,10 @@ public class TextProcessing {
} }
return text.toString(); return text.toString();
case "pdf": case "pdf":
PDDocument document = PDDocument.load(file); try (PDDocument document = PDDocument.load(file)) {
PDFTextStripper pdfStripper = new PDFTextStripper(); PDFTextStripper pdfStripper = new PDFTextStripper();
return pdfStripper.getText(document); return pdfStripper.getText(document);
}
case "docx": case "docx":
XWPFDocument officeDocument = new XWPFDocument(new FileInputStream(file)); XWPFDocument officeDocument = new XWPFDocument(new FileInputStream(file));
@ -67,6 +69,7 @@ public class TextProcessing {
return text.toString(); return text.toString();
} }
//Set stopwords in list
public void textToSetStopwords(Map<String, Integer> words) { public void textToSetStopwords(Map<String, Integer> words) {
Set<String> stopwords = new HashSet<>(); Set<String> stopwords = new HashSet<>();
for (Map.Entry<String, Integer> entry : words.entrySet()) { for (Map.Entry<String, Integer> entry : words.entrySet()) {
@ -75,10 +78,12 @@ public class TextProcessing {
stopwordList.addAll(stopwords); stopwordList.addAll(stopwords);
} }
//Set stopword in list
public void addToStopWords(String stopword) { public void addToStopWords(String stopword) {
stopwordList.add(stopword); stopwordList.add(stopword);
} }
//Set maxwords for html
public Map<String, Integer> maxShowWords(Map<String, Integer> words, int maxWords) { public Map<String, Integer> maxShowWords(Map<String, Integer> words, int maxWords) {
HashMap <String, Integer> cuttedHashmap = new HashMap<>(); HashMap <String, Integer> cuttedHashmap = new HashMap<>();
int index = maxWords; int index = maxWords;
@ -91,6 +96,7 @@ public class TextProcessing {
return cuttedHashmap; return cuttedHashmap;
} }
//Sort List for cutting the map with words
public Map<String, Integer> sortList(Map<String, Integer> unsortedMap) { public Map<String, Integer> sortList(Map<String, Integer> unsortedMap) {
List<Map.Entry<String, Integer>> entryList = new ArrayList<>(unsortedMap.entrySet()); List<Map.Entry<String, Integer>> entryList = new ArrayList<>(unsortedMap.entrySet());
@ -103,6 +109,7 @@ public class TextProcessing {
return sortedMap; return sortedMap;
} }
//tokenizing, stemming, lowercasing, stopwordfiltering Method Apachi Lucene
public Map<String, Integer> tokenizingFile(String text) { public Map<String, Integer> tokenizingFile(String text) {
Map<String, Integer> words = new HashMap<>(); Map<String, Integer> words = new HashMap<>();

View File

@ -9,6 +9,7 @@ import java.net.URL;
public class URLContentLoader { public class URLContentLoader {
private String urlPath; private String urlPath;
//extract Content from URL
public String loadURLContent() { public String loadURLContent() {
StringBuilder text = new StringBuilder(); StringBuilder text = new StringBuilder();
try { try {

View File

@ -6,6 +6,7 @@ import java.util.Map;
public class WordCloudCreator { public class WordCloudCreator {
private int maxFontSize = 70; private int maxFontSize = 70;
//Create html file with clickable words
public boolean insertWordsIntoTemplate(Map<String, Integer> wordMap) { public boolean insertWordsIntoTemplate(Map<String, Integer> wordMap) {
File templateFile = new File("wordcloud.html"); // Template in project directory File templateFile = new File("wordcloud.html"); // Template in project directory
File outputFile = new File("createdHTML.html"); // Output in project directory File outputFile = new File("createdHTML.html"); // Output in project directory
@ -28,7 +29,7 @@ public class WordCloudCreator {
int frequency = entry.getValue(); int frequency = entry.getValue();
int fontSize = Math.min(10 + frequency * 2, maxFontSize); // Example: Base size 10px, increase by 2px per frequency int fontSize = Math.min(10 + frequency * 2, maxFontSize); // Example: Base size 10px, increase by 2px per frequency
wordEntries.append(String.format( wordEntries.append(String.format(
"<span id=\"%d\" class=\"wrd\" style=\"font-size:%dpx;\">" + "<span id=\"%d\" class=\"wrd\" style=\"font-size:%dpx; margin-right:10px\">" +
"<a href=\"https://www.google.com/search?q=%s\" target=\"_blank\">%s</a>" + "<a href=\"https://www.google.com/search?q=%s\" target=\"_blank\">%s</a>" +
"</span>\n", "</span>\n",
id++, fontSize, word, word id++, fontSize, word, word

View File

@ -64,7 +64,7 @@ public class TUI {
public void fileMenu() { public void fileMenu() {
while(fMenu) { while(fMenu) {
System.out.println("(0) Load Stopwords\n(1) Add to Stopwords\n(2) Set Max Words in HTML\n" + System.out.println("(0) Load Stopwords\n(1) Add to Stopwords\n(2) Set Max Words in HTML\n" +
"(3) Stemming not functioning!\n(4) Create WordCloud and Exit"); "(3) Activate German stemming\n(4) Create WordCloud and Exit");
option = Integer.parseInt(scan.nextLine()); option = Integer.parseInt(scan.nextLine());
switch(option) { switch(option) {
case (0): case (0):
@ -74,7 +74,8 @@ public class TUI {
System.out.println("File loaded successful!\n"); System.out.println("File loaded successful!\n");
} }
else{ else{
System.out.println("Cannot load one more File!\n"); System.out.println("Cannot load one more File! Please use for more stopwords words " +
"menu option (1).\n");
} }
} }
else { else {