From af33d8a56681f583f7db382d89705e2b929691bc Mon Sep 17 00:00:00 2001 From: Daniel Fromm <3015351@stud.hs-mannheim.de> Date: Thu, 8 May 2025 15:38:57 +0200 Subject: [PATCH 1/2] refactored FileManager and added Classes FileLoader, TextProcessing. PDF opening is functioning. --- src/main/java/domain/FileLoader.java | 53 ++++++++++ src/main/java/domain/FileManager.java | 117 +++------------------ src/main/java/domain/TextProcessing.java | 72 +++++++++++++ src/main/java/facade/WordCloudManager.java | 21 ++-- src/main/java/tui/TUI.java | 11 +- 5 files changed, 150 insertions(+), 124 deletions(-) create mode 100644 src/main/java/domain/FileLoader.java create mode 100644 src/main/java/domain/TextProcessing.java diff --git a/src/main/java/domain/FileLoader.java b/src/main/java/domain/FileLoader.java new file mode 100644 index 0000000..d98f24c --- /dev/null +++ b/src/main/java/domain/FileLoader.java @@ -0,0 +1,53 @@ +package domain; + +import javax.swing.*; +import javax.swing.filechooser.FileNameExtensionFilter; +import java.awt.*; +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class FileLoader { + private File inputFile; + + public FileLoader() { + this.inputFile = null; + } + + public File loadFileGUI() { + try { + JFileChooser fileChooser = new JFileChooser(); + fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PDF Files", "pdf")); + fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Text Files", "txt")); + fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Word Documents", "docx")); + fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PowerPoint Presentations", "pptx")); + int result = fileChooser.showOpenDialog(null); + + if (result == JFileChooser.APPROVE_OPTION) { + inputFile = fileChooser.getSelectedFile(); + } + return inputFile; + } catch (HeadlessException e) { + throw new RuntimeException(e); + } + } + + public String getFileFormat(File file) { + String fileName = file.getName(); + String fileFormat = fileName.contains(".") ? fileName.substring(fileName.lastIndexOf(".") + 1) : ""; + + switch (fileFormat.toLowerCase()) { + case "pdf": + return "pdf"; + case "txt": + return "txt"; + case "docx": + return "docx"; + case "pptx": + return "pptx"; + default: + return "File format not supported"; + } + } +} diff --git a/src/main/java/domain/FileManager.java b/src/main/java/domain/FileManager.java index 2f9a635..5976a56 100644 --- a/src/main/java/domain/FileManager.java +++ b/src/main/java/domain/FileManager.java @@ -1,115 +1,26 @@ package domain; -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.text.PDFTextStripper; - -import javax.swing.*; -import javax.swing.filechooser.FileNameExtensionFilter; - -import java.awt.*; -import java.io.*; +import java.io.File; import java.util.HashMap; -import java.util.Map; - public class FileManager { - File inputFile; - String originalPath; - String goalPath; + FileLoader fileLoader = new FileLoader(); + TextProcessing textProcessing = new TextProcessing(); + private File file; - public FileManager() { - originalPath = "quelle.pdf"; - goalPath = "ziel.txt"; - inputFile = null; + public String loadFile() { + file = fileLoader.loadFileGUI(); + String fileFormat = fileLoader.getFileFormat(file); + String text = textProcessing.formatToText(file, fileFormat); + return text; } - public OutputStream loadFilePath() { - InputStream in; - OutputStream out = null; - - try { - in = new FileInputStream(originalPath); - out = new FileOutputStream(goalPath); - - byte[] buffer = new byte[1024]; - int gelesen; - - while ((gelesen = in.read(buffer)) > -1) { - out.write(buffer, 0, gelesen); - } - - in.close(); - out.close(); - return out; - } - catch (IOException e) { - e.printStackTrace(); - } - return out; - } - - public File loadFileGUI() { - try { - JFileChooser fileChooser = new JFileChooser(); - fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PDF Files", "pdf")); - fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Text Files", "txt")); - fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Word Documents", "docx")); - fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PowerPoint Presentations", "pptx")); - int result = fileChooser.showOpenDialog(null); - - if (result == JFileChooser.APPROVE_OPTION) { - inputFile = fileChooser.getSelectedFile(); - } - return inputFile; - } catch (HeadlessException e) { - throw new RuntimeException(e); - } - } - - public HashMap tokenizingText(File inputFile){ - HashMap filteredWords = new HashMap<>(); - try { - PDDocument document = null; - if(inputFile != null) { - document = PDDocument.load(inputFile); - PDFTextStripper pdfStripper = new PDFTextStripper(); - String text = pdfStripper.getText(document); - - //Tokenizing der Wörter - String splittedText = "[,\\s\\.:/!§$%&/()=?+*~#.;_<>^°\"']"; - String[] textWords = text.split(splittedText); - for(String word : textWords){ - if (filteredWords.containsKey(word)) { - filteredWords.compute(word, (k, counter) -> counter + 1); - } - else { - filteredWords.put(word, 1); - } - } - for(Map.Entry e : filteredWords.entrySet()){ - System.out.println(e.getKey() + " = " + e.getValue()); - } - if (document != null) { - document.close(); - } - } - } catch (Exception e){ - e.printStackTrace(); - } - return filteredWords; - } - - public HashMap maxShowWords(int number, HashMap words) { - HashMap cuttedHashmap = new HashMap<>(); - int index = number; - for (String word : words.keySet()) { - if(index > 0) { - cuttedHashmap.put(word, words.get(word)); - } - index--; - } - return cuttedHashmap; + public HashMap tokenizingText(String text) { + HashMap wordMap = textProcessing.tokenizingText(text); + return wordMap; } public void saveFile(){} + + } \ No newline at end of file diff --git a/src/main/java/domain/TextProcessing.java b/src/main/java/domain/TextProcessing.java new file mode 100644 index 0000000..ea4b6d5 --- /dev/null +++ b/src/main/java/domain/TextProcessing.java @@ -0,0 +1,72 @@ +package domain; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.text.PDFTextStripper; + +import java.io.*; +import java.util.HashMap; + +public class TextProcessing { + + public String formatToText(File file, String format) { + PDDocument document; + try { + if (file != null) { + switch (format) { + case "txt": + + break; + + case "pdf": + document = PDDocument.load(file); + PDFTextStripper pdfStripper = new PDFTextStripper(); + return pdfStripper.getText(document); + + case "docx": + + break; + + case "pptx": + + break; + } + } + } + catch (IOException ex) { + throw new RuntimeException(ex); + } + return "Nothing found!"; + } + + public HashMap maxShowWords(int number, HashMap words) { + HashMap cuttedHashmap = new HashMap<>(); + int index = number; + for (String word : words.keySet()) { + if(index > 0) { + cuttedHashmap.put(word, words.get(word)); + } + index--; + } + return cuttedHashmap; + } + + public HashMap tokenizingText(String text){ + HashMap filteredWords = new HashMap<>(); + try { + //Tokenizing der Wörter + String splitter = "[,\\s\\.:/!§$%&/()=?+*~#.;_<>^°\"']"; + String[] textWords = text.split(splitter); + for(String word : textWords){ + if (filteredWords.containsKey(word)) { + filteredWords.compute(word, (k, counter) -> counter + 1); + } + else { + filteredWords.put(word, 1); + } + } + } catch (Exception ex) { + throw new RuntimeException(ex); + } + return filteredWords; + } +} diff --git a/src/main/java/facade/WordCloudManager.java b/src/main/java/facade/WordCloudManager.java index 5551908..79976b3 100644 --- a/src/main/java/facade/WordCloudManager.java +++ b/src/main/java/facade/WordCloudManager.java @@ -4,9 +4,8 @@ import domain.FileManager; import domain.PictureManager; import java.io.File; -import java.io.FileNotFoundException; -import java.io.OutputStream; import java.util.HashMap; +import java.util.List; public class WordCloudManager { FileManager fileManager; @@ -18,18 +17,14 @@ public class WordCloudManager { } public boolean loadFileGUI() { - File inputFile = fileManager.loadFileGUI(); - HashMap wordMap = fileManager.tokenizingText(inputFile); - if(wordMap == null) { - return false; - } - else { + + String fileText = fileManager.loadFile(); + HashMap wordMap = fileManager.tokenizingText(fileText); + if(wordMap != null) { return true; } - } - - public void loadFilePath() { - OutputStream inputFile = fileManager.loadFilePath(); -// fileManager.processFile(null, inputFile); + else { + return false; + } } } diff --git a/src/main/java/tui/TUI.java b/src/main/java/tui/TUI.java index 7ef14f4..a74ab4b 100644 --- a/src/main/java/tui/TUI.java +++ b/src/main/java/tui/TUI.java @@ -1,10 +1,7 @@ package tui; -import domain.FileManager; import facade.WordCloudManager; -import java.io.FileNotFoundException; -import java.io.IOException; import java.util.Scanner; public class TUI { @@ -17,16 +14,14 @@ public class TUI { public void tui() { Scanner scan = new Scanner(System.in); - while(isRunning) { + WordCloudManager wcm = new WordCloudManager(); +// while(isRunning) { System.out.println("Welcome to Word Cloud.\nMenu:\n\n(0) Load File from main path\n(1) Load File with Gui" + "\n(2) Save File\n(3) Show Picture\n(4) Exit"); int option = scan.nextInt(); - WordCloudManager wcm = new WordCloudManager(); - switch (option) { case (0): //Load File Path - wcm.loadFilePath(); break; case (1): //Load File GUI @@ -48,7 +43,7 @@ public class TUI { System.out.println("Close Program!"); break; } - } +// } scan.close(); } } -- 2.43.0 From d9ae97aea42793fdb56846da5648db3d7cefeed0 Mon Sep 17 00:00:00 2001 From: Daniel Fromm <3015351@stud.hs-mannheim.de> Date: Thu, 8 May 2025 22:09:30 +0200 Subject: [PATCH 2/2] implement switch case for textbuilding from pptx, docx, txt and add comments for Method loadFileGUI --- pom.xml | 20 +++++++++ src/main/java/domain/FileLoader.java | 8 ++-- src/main/java/domain/TextProcessing.java | 55 ++++++++++++++++-------- 3 files changed, 62 insertions(+), 21 deletions(-) diff --git a/pom.xml b/pom.xml index 52adb1c..0fb13ab 100644 --- a/pom.xml +++ b/pom.xml @@ -14,6 +14,26 @@ pdfbox 2.0.29 + + org.apache.poi + poi-ooxml + 5.2.3 + + + org.apache.poi + poi + 5.2.3 + + + org.apache.xmlbeans + xmlbeans + 5.1.1 + + + org.apache.logging.log4j + log4j-core + 2.18.0 + diff --git a/src/main/java/domain/FileLoader.java b/src/main/java/domain/FileLoader.java index d98f24c..9b0b100 100644 --- a/src/main/java/domain/FileLoader.java +++ b/src/main/java/domain/FileLoader.java @@ -14,14 +14,14 @@ public class FileLoader { public FileLoader() { this.inputFile = null; } - + //KI erstellte Methode public File loadFileGUI() { try { JFileChooser fileChooser = new JFileChooser(); fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PDF Files", "pdf")); - fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Text Files", "txt")); - fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Word Documents", "docx")); - fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PowerPoint Presentations", "pptx")); + fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Text Files", "txt")); //selbst hinzugefügt + fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Word Documents", "docx")); //selbst hinzugefügt + fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PowerPoint Presentations", "pptx")); //selbst hinzugefügt int result = fileChooser.showOpenDialog(null); if (result == JFileChooser.APPROVE_OPTION) { diff --git a/src/main/java/domain/TextProcessing.java b/src/main/java/domain/TextProcessing.java index ea4b6d5..c6b2e20 100644 --- a/src/main/java/domain/TextProcessing.java +++ b/src/main/java/domain/TextProcessing.java @@ -2,6 +2,12 @@ package domain; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; +import org.apache.poi.xwpf.usermodel.XWPFDocument; +import org.apache.poi.xwpf.usermodel.XWPFParagraph; +import org.apache.poi.xslf.usermodel.XMLSlideShow; +import org.apache.poi.xslf.usermodel.XSLFSlide; +import org.apache.poi.xslf.usermodel.XSLFShape; +import org.apache.poi.xslf.usermodel.XSLFTextShape; import java.io.*; import java.util.HashMap; @@ -9,31 +15,44 @@ import java.util.HashMap; public class TextProcessing { public String formatToText(File file, String format) { - PDDocument document; try { + StringBuilder text = new StringBuilder(); if (file != null) { switch (format) { case "txt": - - break; - + FileReader fileReader = new FileReader(file); + BufferedReader reader = new BufferedReader(fileReader); + String line; + while((line = reader.readLine()) != null) { + text.append(line).append("\n"); + } + return text.toString(); case "pdf": - document = PDDocument.load(file); + PDDocument document = PDDocument.load(file); PDFTextStripper pdfStripper = new PDFTextStripper(); return pdfStripper.getText(document); case "docx": - - break; - + XWPFDocument officeDocument = new XWPFDocument(new FileInputStream(file)); + for(XWPFParagraph paragraph : officeDocument.getParagraphs()) { + text.append(paragraph.getText()).append("\n"); + } + return text.toString(); case "pptx": - - break; + XMLSlideShow ppt = new XMLSlideShow(new FileInputStream(file)); + for (XSLFSlide slide : ppt.getSlides()) { + for (XSLFShape shape : slide.getShapes()) { + if (shape instanceof XSLFTextShape) { + text.append(((XSLFTextShape) shape).getText()).append("\n"); + } + } + } + return text.toString(); } } } - catch (IOException ex) { - throw new RuntimeException(ex); + catch (IOException e) { + throw new RuntimeException(e); } return "Nothing found!"; } @@ -53,18 +72,20 @@ public class TextProcessing { public HashMap tokenizingText(String text){ HashMap filteredWords = new HashMap<>(); try { + if(!text.isEmpty()) { //Tokenizing der Wörter - String splitter = "[,\\s\\.:/!§$%&/()=?+*~#.;_<>^°\"']"; + String splitter = "[,\\s\\.:/!§$%&/()=?+*~#.;_<\\-–>^°\"']"; String[] textWords = text.split(splitter); - for(String word : textWords){ + for (String word : textWords) { if (filteredWords.containsKey(word)) { filteredWords.compute(word, (k, counter) -> counter + 1); - } - else { + } else { filteredWords.put(word, 1); } } - } catch (Exception ex) { + } + } + catch (Exception ex) { throw new RuntimeException(ex); } return filteredWords; -- 2.43.0