From af33d8a56681f583f7db382d89705e2b929691bc Mon Sep 17 00:00:00 2001
From: Daniel Fromm <3015351@stud.hs-mannheim.de>
Date: Thu, 8 May 2025 15:38:57 +0200
Subject: [PATCH 1/2] refactored FileManager and added Classes FileLoader,
 TextProcessing. PDF opening is functioning.

---
 src/main/java/domain/FileLoader.java       |  53 ++++++++++
 src/main/java/domain/FileManager.java      | 117 +++------------------
 src/main/java/domain/TextProcessing.java   |  72 +++++++++++++
 src/main/java/facade/WordCloudManager.java |  21 ++--
 src/main/java/tui/TUI.java                 |  11 +-
 5 files changed, 150 insertions(+), 124 deletions(-)
 create mode 100644 src/main/java/domain/FileLoader.java
 create mode 100644 src/main/java/domain/TextProcessing.java

diff --git a/src/main/java/domain/FileLoader.java b/src/main/java/domain/FileLoader.java
new file mode 100644
index 0000000..d98f24c
--- /dev/null
+++ b/src/main/java/domain/FileLoader.java
@@ -0,0 +1,53 @@
+package domain;
+
+import javax.swing.*;
+import javax.swing.filechooser.FileNameExtensionFilter;
+import java.awt.*;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class FileLoader {
+    private File inputFile;
+
+    public FileLoader() {
+        this.inputFile = null;
+    }
+
+    public File loadFileGUI() {
+        try {
+            JFileChooser fileChooser = new JFileChooser();
+            fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PDF Files", "pdf"));
+            fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Text Files", "txt"));
+            fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Word Documents", "docx"));
+            fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PowerPoint Presentations", "pptx"));
+            int result = fileChooser.showOpenDialog(null);
+
+            if (result == JFileChooser.APPROVE_OPTION) {
+                inputFile = fileChooser.getSelectedFile();
+            }
+            return inputFile;
+        } catch (HeadlessException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public String getFileFormat(File file) {
+        String fileName = file.getName();
+        String fileFormat = fileName.contains(".") ? fileName.substring(fileName.lastIndexOf(".") + 1) : "";
+
+        switch (fileFormat.toLowerCase()) {
+            case "pdf":
+                return "pdf";
+            case "txt":
+                return "txt";
+            case "docx":
+                return "docx";
+            case "pptx":
+                return "pptx";
+            default:
+                return "File format not supported";
+        }
+    }
+}
diff --git a/src/main/java/domain/FileManager.java b/src/main/java/domain/FileManager.java
index 2f9a635..5976a56 100644
--- a/src/main/java/domain/FileManager.java
+++ b/src/main/java/domain/FileManager.java
@@ -1,115 +1,26 @@
 package domain;
 
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.text.PDFTextStripper;
-
-import javax.swing.*;
-import javax.swing.filechooser.FileNameExtensionFilter;
-
-import java.awt.*;
-import java.io.*;
+import java.io.File;
 import java.util.HashMap;
-import java.util.Map;
-
 
 public class FileManager {
-    File inputFile;
-    String originalPath;
-    String goalPath;
+    FileLoader fileLoader = new FileLoader();
+    TextProcessing textProcessing = new TextProcessing();
+    private File file;
 
-    public FileManager() {
-        originalPath = "quelle.pdf";
-        goalPath = "ziel.txt";
-        inputFile = null;
+    public String loadFile() {
+        file = fileLoader.loadFileGUI();
+        String fileFormat = fileLoader.getFileFormat(file);
+        String text = textProcessing.formatToText(file, fileFormat);
+        return text;
     }
 
-    public OutputStream loadFilePath() {
-        InputStream in;
-        OutputStream out = null;
-        
-        try {
-            in = new FileInputStream(originalPath);
-            out = new FileOutputStream(goalPath);
-
-            byte[] buffer = new byte[1024];
-            int gelesen;
-
-            while ((gelesen = in.read(buffer)) > -1) {
-                out.write(buffer, 0, gelesen);
-            }
-
-            in.close();
-            out.close();
-            return out;
-        }
-        catch (IOException e) {
-            e.printStackTrace();
-        }
-        return out;
-    }
-
-    public File loadFileGUI() {
-        try {
-            JFileChooser fileChooser = new JFileChooser();
-            fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PDF Files", "pdf"));
-            fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Text Files", "txt"));
-            fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Word Documents", "docx"));
-            fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PowerPoint Presentations", "pptx"));
-            int result = fileChooser.showOpenDialog(null);
-
-            if (result == JFileChooser.APPROVE_OPTION) {
-                inputFile = fileChooser.getSelectedFile();
-            }
-            return inputFile;
-        } catch (HeadlessException e) {
-            throw new RuntimeException(e);
-        }
-    }
-
-    public HashMap tokenizingText(File inputFile){
-        HashMap<String, Integer> filteredWords = new HashMap<>();
-        try {
-            PDDocument document = null;
-            if(inputFile != null) {
-                document = PDDocument.load(inputFile);
-                PDFTextStripper pdfStripper = new PDFTextStripper();
-                String text = pdfStripper.getText(document);
-
-                //Tokenizing der Wörter
-                String splittedText = "[,\\s\\.:/!§$%&/()=?+*~#.;_<>^°\"']";
-                String[] textWords = text.split(splittedText);
-                for(String word : textWords){
-                        if (filteredWords.containsKey(word)) {
-                            filteredWords.compute(word, (k, counter) -> counter + 1);
-                        }
-                    else {
-                        filteredWords.put(word, 1);
-                    }
-                }
-                for(Map.Entry e : filteredWords.entrySet()){
-                    System.out.println(e.getKey() + " = " + e.getValue());
-                }
-                if (document != null) {
-                    document.close();
-                }
-            }
-        } catch (Exception e){
-            e.printStackTrace();
-        }
-        return filteredWords;
-    }
-
-    public HashMap maxShowWords(int number, HashMap<String, Integer> words) {
-        HashMap <String, Integer> cuttedHashmap = new HashMap<>();
-        int index = number;
-        for (String word : words.keySet()) {
-            if(index > 0) {
-                cuttedHashmap.put(word, words.get(word));
-            }
-            index--;
-        }
-        return cuttedHashmap;
+    public HashMap tokenizingText(String text) {
+        HashMap<String, Integer> wordMap = textProcessing.tokenizingText(text);
+        return wordMap;
     }
 
     public void saveFile(){}
+
+
 }
\ No newline at end of file
diff --git a/src/main/java/domain/TextProcessing.java b/src/main/java/domain/TextProcessing.java
new file mode 100644
index 0000000..ea4b6d5
--- /dev/null
+++ b/src/main/java/domain/TextProcessing.java
@@ -0,0 +1,72 @@
+package domain;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.text.PDFTextStripper;
+
+import java.io.*;
+import java.util.HashMap;
+
+public class TextProcessing {
+
+    public String formatToText(File file, String format) {
+        PDDocument document;
+        try {
+            if (file != null) {
+                switch (format) {
+                    case "txt":
+
+                        break;
+
+                    case "pdf":
+                        document = PDDocument.load(file);
+                        PDFTextStripper pdfStripper = new PDFTextStripper();
+                        return pdfStripper.getText(document);
+
+                    case "docx":
+
+                        break;
+
+                    case "pptx":
+
+                        break;
+                }
+            }
+        }
+        catch (IOException ex) {
+            throw new RuntimeException(ex);
+        }
+        return "Nothing found!";
+    }
+
+    public HashMap maxShowWords(int number, HashMap<String, Integer> words) {
+        HashMap <String, Integer> cuttedHashmap = new HashMap<>();
+        int index = number;
+        for (String word : words.keySet()) {
+            if(index > 0) {
+                cuttedHashmap.put(word, words.get(word));
+            }
+            index--;
+        }
+        return cuttedHashmap;
+    }
+
+    public HashMap tokenizingText(String text){
+        HashMap<String, Integer> filteredWords = new HashMap<>();
+        try {
+                //Tokenizing der Wörter
+                String splitter = "[,\\s\\.:/!§$%&/()=?+*~#.;_<>^°\"']";
+                String[] textWords = text.split(splitter);
+                for(String word : textWords){
+                    if (filteredWords.containsKey(word)) {
+                        filteredWords.compute(word, (k, counter) -> counter + 1);
+                    }
+                    else {
+                        filteredWords.put(word, 1);
+                    }
+                }
+            } catch (Exception ex) {
+            throw new RuntimeException(ex);
+        }
+        return filteredWords;
+    }
+}
diff --git a/src/main/java/facade/WordCloudManager.java b/src/main/java/facade/WordCloudManager.java
index 5551908..79976b3 100644
--- a/src/main/java/facade/WordCloudManager.java
+++ b/src/main/java/facade/WordCloudManager.java
@@ -4,9 +4,8 @@ import domain.FileManager;
 import domain.PictureManager;
 
 import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.OutputStream;
 import java.util.HashMap;
+import java.util.List;
 
 public class WordCloudManager {
     FileManager fileManager;
@@ -18,18 +17,14 @@ public class WordCloudManager {
     }
 
     public boolean loadFileGUI() {
-        File inputFile = fileManager.loadFileGUI();
-        HashMap wordMap = fileManager.tokenizingText(inputFile);
-        if(wordMap == null) {
-            return false;
-        }
-        else {
+
+        String fileText = fileManager.loadFile();
+        HashMap wordMap = fileManager.tokenizingText(fileText);
+        if(wordMap != null) {
             return true;
         }
-    }
-
-    public void loadFilePath() {
-        OutputStream inputFile = fileManager.loadFilePath();
-//        fileManager.processFile(null, inputFile);
+        else {
+            return false;
+        }
     }
 }
diff --git a/src/main/java/tui/TUI.java b/src/main/java/tui/TUI.java
index 7ef14f4..a74ab4b 100644
--- a/src/main/java/tui/TUI.java
+++ b/src/main/java/tui/TUI.java
@@ -1,10 +1,7 @@
 package tui;
 
-import domain.FileManager;
 import facade.WordCloudManager;
 
-import java.io.FileNotFoundException;
-import java.io.IOException;
 import java.util.Scanner;
 
 public class TUI {
@@ -17,16 +14,14 @@ public class TUI {
 
     public void tui() {
         Scanner scan = new Scanner(System.in);
-        while(isRunning) {
+        WordCloudManager wcm = new WordCloudManager();
+//        while(isRunning) {
             System.out.println("Welcome to Word Cloud.\nMenu:\n\n(0) Load File from main path\n(1) Load File with Gui" +
                     "\n(2) Save File\n(3) Show Picture\n(4) Exit");
             int option = scan.nextInt();
-            WordCloudManager wcm = new WordCloudManager();
-
             switch (option) {
                 case (0):
                     //Load File Path
-                    wcm.loadFilePath();
                     break;
                 case (1):
                     //Load File GUI
@@ -48,7 +43,7 @@ public class TUI {
                     System.out.println("Close Program!");
                     break;
             }
-        }
+//        }
         scan.close();
     }
 }
-- 
2.43.0


From d9ae97aea42793fdb56846da5648db3d7cefeed0 Mon Sep 17 00:00:00 2001
From: Daniel Fromm <3015351@stud.hs-mannheim.de>
Date: Thu, 8 May 2025 22:09:30 +0200
Subject: [PATCH 2/2] implement switch case for textbuilding from pptx, docx,
 txt and add comments for Method loadFileGUI

---
 pom.xml                                  | 20 +++++++++
 src/main/java/domain/FileLoader.java     |  8 ++--
 src/main/java/domain/TextProcessing.java | 55 ++++++++++++++++--------
 3 files changed, 62 insertions(+), 21 deletions(-)

diff --git a/pom.xml b/pom.xml
index 52adb1c..0fb13ab 100644
--- a/pom.xml
+++ b/pom.xml
@@ -14,6 +14,26 @@
         <artifactId>pdfbox</artifactId>
         <version>2.0.29</version>
     </dependency>
+    <dependency>
+        <groupId>org.apache.poi</groupId>
+        <artifactId>poi-ooxml</artifactId>
+        <version>5.2.3</version>
+    </dependency>
+    <dependency>
+        <groupId>org.apache.poi</groupId>
+        <artifactId>poi</artifactId>
+        <version>5.2.3</version>
+    </dependency>
+    <dependency>
+        <groupId>org.apache.xmlbeans</groupId>
+        <artifactId>xmlbeans</artifactId>
+        <version>5.1.1</version>
+    </dependency>
+    <dependency>
+        <groupId>org.apache.logging.log4j</groupId>
+        <artifactId>log4j-core</artifactId>
+        <version>2.18.0</version>
+    </dependency>
 </dependencies>
 
     <properties>
diff --git a/src/main/java/domain/FileLoader.java b/src/main/java/domain/FileLoader.java
index d98f24c..9b0b100 100644
--- a/src/main/java/domain/FileLoader.java
+++ b/src/main/java/domain/FileLoader.java
@@ -14,14 +14,14 @@ public class FileLoader {
     public FileLoader() {
         this.inputFile = null;
     }
-
+    //KI erstellte Methode
     public File loadFileGUI() {
         try {
             JFileChooser fileChooser = new JFileChooser();
             fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PDF Files", "pdf"));
-            fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Text Files", "txt"));
-            fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Word Documents", "docx"));
-            fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PowerPoint Presentations", "pptx"));
+            fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Text Files", "txt")); //selbst hinzugefügt
+            fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Word Documents", "docx")); //selbst hinzugefügt
+            fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PowerPoint Presentations", "pptx")); //selbst hinzugefügt
             int result = fileChooser.showOpenDialog(null);
 
             if (result == JFileChooser.APPROVE_OPTION) {
diff --git a/src/main/java/domain/TextProcessing.java b/src/main/java/domain/TextProcessing.java
index ea4b6d5..c6b2e20 100644
--- a/src/main/java/domain/TextProcessing.java
+++ b/src/main/java/domain/TextProcessing.java
@@ -2,6 +2,12 @@ package domain;
 
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.text.PDFTextStripper;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFSlide;
+import org.apache.poi.xslf.usermodel.XSLFShape;
+import org.apache.poi.xslf.usermodel.XSLFTextShape;
 
 import java.io.*;
 import java.util.HashMap;
@@ -9,31 +15,44 @@ import java.util.HashMap;
 public class TextProcessing {
 
     public String formatToText(File file, String format) {
-        PDDocument document;
         try {
+            StringBuilder text = new StringBuilder();
             if (file != null) {
                 switch (format) {
                     case "txt":
-
-                        break;
-
+                        FileReader fileReader = new FileReader(file);
+                        BufferedReader reader = new BufferedReader(fileReader);
+                        String line;
+                        while((line = reader.readLine()) != null) {
+                            text.append(line).append("\n");
+                        }
+                        return text.toString();
                     case "pdf":
-                        document = PDDocument.load(file);
+                        PDDocument document = PDDocument.load(file);
                         PDFTextStripper pdfStripper = new PDFTextStripper();
                         return pdfStripper.getText(document);
 
                     case "docx":
-
-                        break;
-
+                        XWPFDocument officeDocument = new XWPFDocument(new FileInputStream(file));
+                        for(XWPFParagraph paragraph : officeDocument.getParagraphs()) {
+                            text.append(paragraph.getText()).append("\n");
+                        }
+                        return text.toString();
                     case "pptx":
-
-                        break;
+                        XMLSlideShow ppt = new XMLSlideShow(new FileInputStream(file));
+                        for (XSLFSlide slide : ppt.getSlides()) {
+                            for (XSLFShape shape : slide.getShapes()) {
+                                if (shape instanceof XSLFTextShape) {
+                                    text.append(((XSLFTextShape) shape).getText()).append("\n");
+                                }
+                            }
+                        }
+                        return text.toString();
                 }
             }
         }
-        catch (IOException ex) {
-            throw new RuntimeException(ex);
+        catch (IOException e) {
+            throw new RuntimeException(e);
         }
         return "Nothing found!";
     }
@@ -53,18 +72,20 @@ public class TextProcessing {
     public HashMap tokenizingText(String text){
         HashMap<String, Integer> filteredWords = new HashMap<>();
         try {
+            if(!text.isEmpty()) {
                 //Tokenizing der Wörter
-                String splitter = "[,\\s\\.:/!§$%&/()=?+*~#.;_<>^°\"']";
+                String splitter = "[,\\s\\.:/!§$%&/()=?+*~#.;_<\\-–>^°\"']";
                 String[] textWords = text.split(splitter);
-                for(String word : textWords){
+                for (String word : textWords) {
                     if (filteredWords.containsKey(word)) {
                         filteredWords.compute(word, (k, counter) -> counter + 1);
-                    }
-                    else {
+                    } else {
                         filteredWords.put(word, 1);
                     }
                 }
-            } catch (Exception ex) {
+            }
+        }
+        catch (Exception ex) {
             throw new RuntimeException(ex);
         }
         return filteredWords;
-- 
2.43.0