some refactoring and cleared a bug with PDFDocument file opening warning
parent
19350fc80c
commit
59857d1173
|
@ -41,6 +41,7 @@ public class FileLoader {
|
|||
}
|
||||
}
|
||||
|
||||
//detect format from file for further processing
|
||||
public String getFileFormat(File path) {
|
||||
String fileName = path.getName();
|
||||
String fileFormat = fileName.contains(".") ? fileName.substring(fileName.lastIndexOf(".") + 1) : "";
|
||||
|
|
|
@ -24,6 +24,7 @@ public class TextProcessing {
|
|||
private int maxWords = 0;
|
||||
private Set<String> stopwordList = new HashSet<>();
|
||||
|
||||
//Extract text from file with supported format
|
||||
public String formatToText(File file, String format) {
|
||||
StringBuilder text = new StringBuilder();
|
||||
try {
|
||||
|
@ -38,9 +39,10 @@ public class TextProcessing {
|
|||
}
|
||||
return text.toString();
|
||||
case "pdf":
|
||||
PDDocument document = PDDocument.load(file);
|
||||
PDFTextStripper pdfStripper = new PDFTextStripper();
|
||||
return pdfStripper.getText(document);
|
||||
try (PDDocument document = PDDocument.load(file)) {
|
||||
PDFTextStripper pdfStripper = new PDFTextStripper();
|
||||
return pdfStripper.getText(document);
|
||||
}
|
||||
|
||||
case "docx":
|
||||
XWPFDocument officeDocument = new XWPFDocument(new FileInputStream(file));
|
||||
|
@ -67,6 +69,7 @@ public class TextProcessing {
|
|||
return text.toString();
|
||||
}
|
||||
|
||||
//Set stopwords in list
|
||||
public void textToSetStopwords(Map<String, Integer> words) {
|
||||
Set<String> stopwords = new HashSet<>();
|
||||
for (Map.Entry<String, Integer> entry : words.entrySet()) {
|
||||
|
@ -75,10 +78,12 @@ public class TextProcessing {
|
|||
stopwordList.addAll(stopwords);
|
||||
}
|
||||
|
||||
//Set stopword in list
|
||||
public void addToStopWords(String stopword) {
|
||||
stopwordList.add(stopword);
|
||||
}
|
||||
|
||||
//Set maxwords for html
|
||||
public Map<String, Integer> maxShowWords(Map<String, Integer> words, int maxWords) {
|
||||
HashMap <String, Integer> cuttedHashmap = new HashMap<>();
|
||||
int index = maxWords;
|
||||
|
@ -91,6 +96,7 @@ public class TextProcessing {
|
|||
return cuttedHashmap;
|
||||
}
|
||||
|
||||
//Sort List for cutting the map with words
|
||||
public Map<String, Integer> sortList(Map<String, Integer> unsortedMap) {
|
||||
List<Map.Entry<String, Integer>> entryList = new ArrayList<>(unsortedMap.entrySet());
|
||||
|
||||
|
@ -103,6 +109,7 @@ public class TextProcessing {
|
|||
return sortedMap;
|
||||
}
|
||||
|
||||
//tokenizing, stemming, lowercasing, stopwordfiltering Method Apachi Lucene
|
||||
public Map<String, Integer> tokenizingFile(String text) {
|
||||
Map<String, Integer> words = new HashMap<>();
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ import java.net.URL;
|
|||
public class URLContentLoader {
|
||||
private String urlPath;
|
||||
|
||||
//extract Content from URL
|
||||
public String loadURLContent() {
|
||||
StringBuilder text = new StringBuilder();
|
||||
try {
|
||||
|
|
|
@ -6,6 +6,7 @@ import java.util.Map;
|
|||
public class WordCloudCreator {
|
||||
private int maxFontSize = 70;
|
||||
|
||||
//Create html file with clickable words
|
||||
public boolean insertWordsIntoTemplate(Map<String, Integer> wordMap) {
|
||||
File templateFile = new File("wordcloud.html"); // Template in project directory
|
||||
File outputFile = new File("createdHTML.html"); // Output in project directory
|
||||
|
@ -28,7 +29,7 @@ public class WordCloudCreator {
|
|||
int frequency = entry.getValue();
|
||||
int fontSize = Math.min(10 + frequency * 2, maxFontSize); // Example: Base size 10px, increase by 2px per frequency
|
||||
wordEntries.append(String.format(
|
||||
"<span id=\"%d\" class=\"wrd\" style=\"font-size:%dpx;\">" +
|
||||
"<span id=\"%d\" class=\"wrd\" style=\"font-size:%dpx; margin-right:10px\">" +
|
||||
"<a href=\"https://www.google.com/search?q=%s\" target=\"_blank\">%s</a>" +
|
||||
"</span>\n",
|
||||
id++, fontSize, word, word
|
||||
|
|
|
@ -64,7 +64,7 @@ public class TUI {
|
|||
public void fileMenu() {
|
||||
while(fMenu) {
|
||||
System.out.println("(0) Load Stopwords\n(1) Add to Stopwords\n(2) Set Max Words in HTML\n" +
|
||||
"(3) Stemming not functioning!\n(4) Create WordCloud and Exit");
|
||||
"(3) Activate German stemming\n(4) Create WordCloud and Exit");
|
||||
option = Integer.parseInt(scan.nextLine());
|
||||
switch(option) {
|
||||
case (0):
|
||||
|
@ -74,7 +74,8 @@ public class TUI {
|
|||
System.out.println("File loaded successful!\n");
|
||||
}
|
||||
else{
|
||||
System.out.println("Cannot load one more File!\n");
|
||||
System.out.println("Cannot load one more File! Please use for more stopwords words " +
|
||||
"menu option (1).\n");
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
|
Loading…
Reference in New Issue