diff --git a/pom.xml b/pom.xml
index 52adb1c..0fb13ab 100644
--- a/pom.xml
+++ b/pom.xml
@@ -14,6 +14,26 @@
pdfbox
2.0.29
+
+ org.apache.poi
+ poi-ooxml
+ 5.2.3
+
+
+ org.apache.poi
+ poi
+ 5.2.3
+
+
+ org.apache.xmlbeans
+ xmlbeans
+ 5.1.1
+
+
+ org.apache.logging.log4j
+ log4j-core
+ 2.18.0
+
diff --git a/src/main/java/domain/FileLoader.java b/src/main/java/domain/FileLoader.java
new file mode 100644
index 0000000..9b0b100
--- /dev/null
+++ b/src/main/java/domain/FileLoader.java
@@ -0,0 +1,53 @@
+package domain;
+
+import javax.swing.*;
+import javax.swing.filechooser.FileNameExtensionFilter;
+import java.awt.*;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class FileLoader {
+ private File inputFile;
+
+ public FileLoader() {
+ this.inputFile = null;
+ }
+ //KI erstellte Methode
+ public File loadFileGUI() {
+ try {
+ JFileChooser fileChooser = new JFileChooser();
+ fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PDF Files", "pdf"));
+ fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Text Files", "txt")); //selbst hinzugefügt
+ fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Word Documents", "docx")); //selbst hinzugefügt
+ fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PowerPoint Presentations", "pptx")); //selbst hinzugefügt
+ int result = fileChooser.showOpenDialog(null);
+
+ if (result == JFileChooser.APPROVE_OPTION) {
+ inputFile = fileChooser.getSelectedFile();
+ }
+ return inputFile;
+ } catch (HeadlessException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public String getFileFormat(File file) {
+ String fileName = file.getName();
+ String fileFormat = fileName.contains(".") ? fileName.substring(fileName.lastIndexOf(".") + 1) : "";
+
+ switch (fileFormat.toLowerCase()) {
+ case "pdf":
+ return "pdf";
+ case "txt":
+ return "txt";
+ case "docx":
+ return "docx";
+ case "pptx":
+ return "pptx";
+ default:
+ return "File format not supported";
+ }
+ }
+}
diff --git a/src/main/java/domain/FileManager.java b/src/main/java/domain/FileManager.java
index 2f9a635..5976a56 100644
--- a/src/main/java/domain/FileManager.java
+++ b/src/main/java/domain/FileManager.java
@@ -1,115 +1,26 @@
package domain;
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.text.PDFTextStripper;
-
-import javax.swing.*;
-import javax.swing.filechooser.FileNameExtensionFilter;
-
-import java.awt.*;
-import java.io.*;
+import java.io.File;
import java.util.HashMap;
-import java.util.Map;
-
public class FileManager {
- File inputFile;
- String originalPath;
- String goalPath;
+ FileLoader fileLoader = new FileLoader();
+ TextProcessing textProcessing = new TextProcessing();
+ private File file;
- public FileManager() {
- originalPath = "quelle.pdf";
- goalPath = "ziel.txt";
- inputFile = null;
+ public String loadFile() {
+ file = fileLoader.loadFileGUI();
+ String fileFormat = fileLoader.getFileFormat(file);
+ String text = textProcessing.formatToText(file, fileFormat);
+ return text;
}
- public OutputStream loadFilePath() {
- InputStream in;
- OutputStream out = null;
-
- try {
- in = new FileInputStream(originalPath);
- out = new FileOutputStream(goalPath);
-
- byte[] buffer = new byte[1024];
- int gelesen;
-
- while ((gelesen = in.read(buffer)) > -1) {
- out.write(buffer, 0, gelesen);
- }
-
- in.close();
- out.close();
- return out;
- }
- catch (IOException e) {
- e.printStackTrace();
- }
- return out;
- }
-
- public File loadFileGUI() {
- try {
- JFileChooser fileChooser = new JFileChooser();
- fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PDF Files", "pdf"));
- fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Text Files", "txt"));
- fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Word Documents", "docx"));
- fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PowerPoint Presentations", "pptx"));
- int result = fileChooser.showOpenDialog(null);
-
- if (result == JFileChooser.APPROVE_OPTION) {
- inputFile = fileChooser.getSelectedFile();
- }
- return inputFile;
- } catch (HeadlessException e) {
- throw new RuntimeException(e);
- }
- }
-
- public HashMap tokenizingText(File inputFile){
- HashMap filteredWords = new HashMap<>();
- try {
- PDDocument document = null;
- if(inputFile != null) {
- document = PDDocument.load(inputFile);
- PDFTextStripper pdfStripper = new PDFTextStripper();
- String text = pdfStripper.getText(document);
-
- //Tokenizing der Wörter
- String splittedText = "[,\\s\\.:/!§$%&/()=?+*~#.;_<>^°\"']";
- String[] textWords = text.split(splittedText);
- for(String word : textWords){
- if (filteredWords.containsKey(word)) {
- filteredWords.compute(word, (k, counter) -> counter + 1);
- }
- else {
- filteredWords.put(word, 1);
- }
- }
- for(Map.Entry e : filteredWords.entrySet()){
- System.out.println(e.getKey() + " = " + e.getValue());
- }
- if (document != null) {
- document.close();
- }
- }
- } catch (Exception e){
- e.printStackTrace();
- }
- return filteredWords;
- }
-
- public HashMap maxShowWords(int number, HashMap words) {
- HashMap cuttedHashmap = new HashMap<>();
- int index = number;
- for (String word : words.keySet()) {
- if(index > 0) {
- cuttedHashmap.put(word, words.get(word));
- }
- index--;
- }
- return cuttedHashmap;
+ public HashMap tokenizingText(String text) {
+ HashMap wordMap = textProcessing.tokenizingText(text);
+ return wordMap;
}
public void saveFile(){}
+
+
}
\ No newline at end of file
diff --git a/src/main/java/domain/TextProcessing.java b/src/main/java/domain/TextProcessing.java
new file mode 100644
index 0000000..c6b2e20
--- /dev/null
+++ b/src/main/java/domain/TextProcessing.java
@@ -0,0 +1,93 @@
+package domain;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.text.PDFTextStripper;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFSlide;
+import org.apache.poi.xslf.usermodel.XSLFShape;
+import org.apache.poi.xslf.usermodel.XSLFTextShape;
+
+import java.io.*;
+import java.util.HashMap;
+
+public class TextProcessing {
+
+ public String formatToText(File file, String format) {
+ try {
+ StringBuilder text = new StringBuilder();
+ if (file != null) {
+ switch (format) {
+ case "txt":
+ FileReader fileReader = new FileReader(file);
+ BufferedReader reader = new BufferedReader(fileReader);
+ String line;
+ while((line = reader.readLine()) != null) {
+ text.append(line).append("\n");
+ }
+ return text.toString();
+ case "pdf":
+ PDDocument document = PDDocument.load(file);
+ PDFTextStripper pdfStripper = new PDFTextStripper();
+ return pdfStripper.getText(document);
+
+ case "docx":
+ XWPFDocument officeDocument = new XWPFDocument(new FileInputStream(file));
+ for(XWPFParagraph paragraph : officeDocument.getParagraphs()) {
+ text.append(paragraph.getText()).append("\n");
+ }
+ return text.toString();
+ case "pptx":
+ XMLSlideShow ppt = new XMLSlideShow(new FileInputStream(file));
+ for (XSLFSlide slide : ppt.getSlides()) {
+ for (XSLFShape shape : slide.getShapes()) {
+ if (shape instanceof XSLFTextShape) {
+ text.append(((XSLFTextShape) shape).getText()).append("\n");
+ }
+ }
+ }
+ return text.toString();
+ }
+ }
+ }
+ catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ return "Nothing found!";
+ }
+
+ public HashMap maxShowWords(int number, HashMap words) {
+ HashMap cuttedHashmap = new HashMap<>();
+ int index = number;
+ for (String word : words.keySet()) {
+ if(index > 0) {
+ cuttedHashmap.put(word, words.get(word));
+ }
+ index--;
+ }
+ return cuttedHashmap;
+ }
+
+ public HashMap tokenizingText(String text){
+ HashMap filteredWords = new HashMap<>();
+ try {
+ if(!text.isEmpty()) {
+ //Tokenizing der Wörter
+ String splitter = "[,\\s\\.:/!§$%&/()=?+*~#.;_<\\-–>^°\"']";
+ String[] textWords = text.split(splitter);
+ for (String word : textWords) {
+ if (filteredWords.containsKey(word)) {
+ filteredWords.compute(word, (k, counter) -> counter + 1);
+ } else {
+ filteredWords.put(word, 1);
+ }
+ }
+ }
+ }
+ catch (Exception ex) {
+ throw new RuntimeException(ex);
+ }
+ return filteredWords;
+ }
+}
diff --git a/src/main/java/facade/WordCloudManager.java b/src/main/java/facade/WordCloudManager.java
index 5551908..79976b3 100644
--- a/src/main/java/facade/WordCloudManager.java
+++ b/src/main/java/facade/WordCloudManager.java
@@ -4,9 +4,8 @@ import domain.FileManager;
import domain.PictureManager;
import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.OutputStream;
import java.util.HashMap;
+import java.util.List;
public class WordCloudManager {
FileManager fileManager;
@@ -18,18 +17,14 @@ public class WordCloudManager {
}
public boolean loadFileGUI() {
- File inputFile = fileManager.loadFileGUI();
- HashMap wordMap = fileManager.tokenizingText(inputFile);
- if(wordMap == null) {
- return false;
- }
- else {
+
+ String fileText = fileManager.loadFile();
+ HashMap wordMap = fileManager.tokenizingText(fileText);
+ if(wordMap != null) {
return true;
}
- }
-
- public void loadFilePath() {
- OutputStream inputFile = fileManager.loadFilePath();
-// fileManager.processFile(null, inputFile);
+ else {
+ return false;
+ }
}
}
diff --git a/src/main/java/tui/TUI.java b/src/main/java/tui/TUI.java
index 7ef14f4..a74ab4b 100644
--- a/src/main/java/tui/TUI.java
+++ b/src/main/java/tui/TUI.java
@@ -1,10 +1,7 @@
package tui;
-import domain.FileManager;
import facade.WordCloudManager;
-import java.io.FileNotFoundException;
-import java.io.IOException;
import java.util.Scanner;
public class TUI {
@@ -17,16 +14,14 @@ public class TUI {
public void tui() {
Scanner scan = new Scanner(System.in);
- while(isRunning) {
+ WordCloudManager wcm = new WordCloudManager();
+// while(isRunning) {
System.out.println("Welcome to Word Cloud.\nMenu:\n\n(0) Load File from main path\n(1) Load File with Gui" +
"\n(2) Save File\n(3) Show Picture\n(4) Exit");
int option = scan.nextInt();
- WordCloudManager wcm = new WordCloudManager();
-
switch (option) {
case (0):
//Load File Path
- wcm.loadFilePath();
break;
case (1):
//Load File GUI
@@ -48,7 +43,7 @@ public class TUI {
System.out.println("Close Program!");
break;
}
- }
+// }
scan.close();
}
}