some refactoring and cleared a bug with PDFDocument file opening warning

main
Daniel Fromm 2025-05-13 01:16:01 +02:00
parent 19350fc80c
commit 59857d1173
5 changed files with 17 additions and 6 deletions

View File

@ -41,6 +41,7 @@ public class FileLoader {
}
}
//detect format from file for further processing
public String getFileFormat(File path) {
String fileName = path.getName();
String fileFormat = fileName.contains(".") ? fileName.substring(fileName.lastIndexOf(".") + 1) : "";

View File

@ -24,6 +24,7 @@ public class TextProcessing {
private int maxWords = 0;
private Set<String> stopwordList = new HashSet<>();
//Extract text from file with supported format
public String formatToText(File file, String format) {
StringBuilder text = new StringBuilder();
try {
@ -38,9 +39,10 @@ public class TextProcessing {
}
return text.toString();
case "pdf":
PDDocument document = PDDocument.load(file);
PDFTextStripper pdfStripper = new PDFTextStripper();
return pdfStripper.getText(document);
try (PDDocument document = PDDocument.load(file)) {
PDFTextStripper pdfStripper = new PDFTextStripper();
return pdfStripper.getText(document);
}
case "docx":
XWPFDocument officeDocument = new XWPFDocument(new FileInputStream(file));
@ -67,6 +69,7 @@ public class TextProcessing {
return text.toString();
}
//Set stopwords in list
public void textToSetStopwords(Map<String, Integer> words) {
Set<String> stopwords = new HashSet<>();
for (Map.Entry<String, Integer> entry : words.entrySet()) {
@ -75,10 +78,12 @@ public class TextProcessing {
stopwordList.addAll(stopwords);
}
//Set stopword in list
public void addToStopWords(String stopword) {
stopwordList.add(stopword);
}
//Set maxwords for html
public Map<String, Integer> maxShowWords(Map<String, Integer> words, int maxWords) {
HashMap <String, Integer> cuttedHashmap = new HashMap<>();
int index = maxWords;
@ -91,6 +96,7 @@ public class TextProcessing {
return cuttedHashmap;
}
//Sort List for cutting the map with words
public Map<String, Integer> sortList(Map<String, Integer> unsortedMap) {
List<Map.Entry<String, Integer>> entryList = new ArrayList<>(unsortedMap.entrySet());
@ -103,6 +109,7 @@ public class TextProcessing {
return sortedMap;
}
//tokenizing, stemming, lowercasing, stopwordfiltering Method Apachi Lucene
public Map<String, Integer> tokenizingFile(String text) {
Map<String, Integer> words = new HashMap<>();

View File

@ -9,6 +9,7 @@ import java.net.URL;
public class URLContentLoader {
private String urlPath;
//extract Content from URL
public String loadURLContent() {
StringBuilder text = new StringBuilder();
try {

View File

@ -6,6 +6,7 @@ import java.util.Map;
public class WordCloudCreator {
private int maxFontSize = 70;
//Create html file with clickable words
public boolean insertWordsIntoTemplate(Map<String, Integer> wordMap) {
File templateFile = new File("wordcloud.html"); // Template in project directory
File outputFile = new File("createdHTML.html"); // Output in project directory
@ -28,7 +29,7 @@ public class WordCloudCreator {
int frequency = entry.getValue();
int fontSize = Math.min(10 + frequency * 2, maxFontSize); // Example: Base size 10px, increase by 2px per frequency
wordEntries.append(String.format(
"<span id=\"%d\" class=\"wrd\" style=\"font-size:%dpx;\">" +
"<span id=\"%d\" class=\"wrd\" style=\"font-size:%dpx; margin-right:10px\">" +
"<a href=\"https://www.google.com/search?q=%s\" target=\"_blank\">%s</a>" +
"</span>\n",
id++, fontSize, word, word

View File

@ -64,7 +64,7 @@ public class TUI {
public void fileMenu() {
while(fMenu) {
System.out.println("(0) Load Stopwords\n(1) Add to Stopwords\n(2) Set Max Words in HTML\n" +
"(3) Stemming not functioning!\n(4) Create WordCloud and Exit");
"(3) Activate German stemming\n(4) Create WordCloud and Exit");
option = Integer.parseInt(scan.nextLine());
switch(option) {
case (0):
@ -74,7 +74,8 @@ public class TUI {
System.out.println("File loaded successful!\n");
}
else{
System.out.println("Cannot load one more File!\n");
System.out.println("Cannot load one more File! Please use for more stopwords words " +
"menu option (1).\n");
}
}
else {