From 59857d117359544d936ba7f4836fb752a03b5a20 Mon Sep 17 00:00:00 2001 From: Daniel Fromm <3015351@stud.hs-mannheim.de> Date: Tue, 13 May 2025 01:16:01 +0200 Subject: [PATCH] some refactoring and cleared a bug with PDFDocument file opening warning --- src/main/java/domain/FileLoader.java | 1 + src/main/java/domain/TextProcessing.java | 13 ++++++++++--- src/main/java/domain/URLContentLoader.java | 1 + src/main/java/domain/WordCloudCreator.java | 3 ++- src/main/java/tui/TUI.java | 5 +++-- 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/main/java/domain/FileLoader.java b/src/main/java/domain/FileLoader.java index 1a33927..6e852eb 100644 --- a/src/main/java/domain/FileLoader.java +++ b/src/main/java/domain/FileLoader.java @@ -41,6 +41,7 @@ public class FileLoader { } } + //detect format from file for further processing public String getFileFormat(File path) { String fileName = path.getName(); String fileFormat = fileName.contains(".") ? fileName.substring(fileName.lastIndexOf(".") + 1) : ""; diff --git a/src/main/java/domain/TextProcessing.java b/src/main/java/domain/TextProcessing.java index 4f6c788..cd1499f 100644 --- a/src/main/java/domain/TextProcessing.java +++ b/src/main/java/domain/TextProcessing.java @@ -24,6 +24,7 @@ public class TextProcessing { private int maxWords = 0; private Set stopwordList = new HashSet<>(); + //Extract text from file with supported format public String formatToText(File file, String format) { StringBuilder text = new StringBuilder(); try { @@ -38,9 +39,10 @@ public class TextProcessing { } return text.toString(); case "pdf": - PDDocument document = PDDocument.load(file); - PDFTextStripper pdfStripper = new PDFTextStripper(); - return pdfStripper.getText(document); + try (PDDocument document = PDDocument.load(file)) { + PDFTextStripper pdfStripper = new PDFTextStripper(); + return pdfStripper.getText(document); + } case "docx": XWPFDocument officeDocument = new XWPFDocument(new FileInputStream(file)); @@ -67,6 +69,7 @@ public class TextProcessing { return text.toString(); } + //Set stopwords in list public void textToSetStopwords(Map words) { Set stopwords = new HashSet<>(); for (Map.Entry entry : words.entrySet()) { @@ -75,10 +78,12 @@ public class TextProcessing { stopwordList.addAll(stopwords); } + //Set stopword in list public void addToStopWords(String stopword) { stopwordList.add(stopword); } + //Set maxwords for html public Map maxShowWords(Map words, int maxWords) { HashMap cuttedHashmap = new HashMap<>(); int index = maxWords; @@ -91,6 +96,7 @@ public class TextProcessing { return cuttedHashmap; } + //Sort List for cutting the map with words public Map sortList(Map unsortedMap) { List> entryList = new ArrayList<>(unsortedMap.entrySet()); @@ -103,6 +109,7 @@ public class TextProcessing { return sortedMap; } + //tokenizing, stemming, lowercasing, stopwordfiltering Method Apachi Lucene public Map tokenizingFile(String text) { Map words = new HashMap<>(); diff --git a/src/main/java/domain/URLContentLoader.java b/src/main/java/domain/URLContentLoader.java index c448989..b0599fb 100644 --- a/src/main/java/domain/URLContentLoader.java +++ b/src/main/java/domain/URLContentLoader.java @@ -9,6 +9,7 @@ import java.net.URL; public class URLContentLoader { private String urlPath; + //extract Content from URL public String loadURLContent() { StringBuilder text = new StringBuilder(); try { diff --git a/src/main/java/domain/WordCloudCreator.java b/src/main/java/domain/WordCloudCreator.java index 8929e8d..931d67a 100644 --- a/src/main/java/domain/WordCloudCreator.java +++ b/src/main/java/domain/WordCloudCreator.java @@ -6,6 +6,7 @@ import java.util.Map; public class WordCloudCreator { private int maxFontSize = 70; + //Create html file with clickable words public boolean insertWordsIntoTemplate(Map wordMap) { File templateFile = new File("wordcloud.html"); // Template in project directory File outputFile = new File("createdHTML.html"); // Output in project directory @@ -28,7 +29,7 @@ public class WordCloudCreator { int frequency = entry.getValue(); int fontSize = Math.min(10 + frequency * 2, maxFontSize); // Example: Base size 10px, increase by 2px per frequency wordEntries.append(String.format( - "" + + "" + "%s" + "\n", id++, fontSize, word, word diff --git a/src/main/java/tui/TUI.java b/src/main/java/tui/TUI.java index 8d0b176..28f1877 100644 --- a/src/main/java/tui/TUI.java +++ b/src/main/java/tui/TUI.java @@ -64,7 +64,7 @@ public class TUI { public void fileMenu() { while(fMenu) { System.out.println("(0) Load Stopwords\n(1) Add to Stopwords\n(2) Set Max Words in HTML\n" + - "(3) Stemming not functioning!\n(4) Create WordCloud and Exit"); + "(3) Activate German stemming\n(4) Create WordCloud and Exit"); option = Integer.parseInt(scan.nextLine()); switch(option) { case (0): @@ -74,7 +74,8 @@ public class TUI { System.out.println("File loaded successful!\n"); } else{ - System.out.println("Cannot load one more File!\n"); + System.out.println("Cannot load one more File! Please use for more stopwords words " + + "menu option (1).\n"); } } else {