commit
06c821ad8d
20
pom.xml
20
pom.xml
|
@ -14,6 +14,26 @@
|
|||
<artifactId>pdfbox</artifactId>
|
||||
<version>2.0.29</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi-ooxml</artifactId>
|
||||
<version>5.2.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi</artifactId>
|
||||
<version>5.2.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.xmlbeans</groupId>
|
||||
<artifactId>xmlbeans</artifactId>
|
||||
<version>5.1.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-core</artifactId>
|
||||
<version>2.18.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<properties>
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
package domain;
|
||||
|
||||
import javax.swing.*;
|
||||
import javax.swing.filechooser.FileNameExtensionFilter;
|
||||
import java.awt.*;
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class FileLoader {
|
||||
private File inputFile;
|
||||
|
||||
public FileLoader() {
|
||||
this.inputFile = null;
|
||||
}
|
||||
//KI erstellte Methode
|
||||
public File loadFileGUI() {
|
||||
try {
|
||||
JFileChooser fileChooser = new JFileChooser();
|
||||
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PDF Files", "pdf"));
|
||||
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Text Files", "txt")); //selbst hinzugefügt
|
||||
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Word Documents", "docx")); //selbst hinzugefügt
|
||||
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PowerPoint Presentations", "pptx")); //selbst hinzugefügt
|
||||
int result = fileChooser.showOpenDialog(null);
|
||||
|
||||
if (result == JFileChooser.APPROVE_OPTION) {
|
||||
inputFile = fileChooser.getSelectedFile();
|
||||
}
|
||||
return inputFile;
|
||||
} catch (HeadlessException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public String getFileFormat(File file) {
|
||||
String fileName = file.getName();
|
||||
String fileFormat = fileName.contains(".") ? fileName.substring(fileName.lastIndexOf(".") + 1) : "";
|
||||
|
||||
switch (fileFormat.toLowerCase()) {
|
||||
case "pdf":
|
||||
return "pdf";
|
||||
case "txt":
|
||||
return "txt";
|
||||
case "docx":
|
||||
return "docx";
|
||||
case "pptx":
|
||||
return "pptx";
|
||||
default:
|
||||
return "File format not supported";
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,115 +1,26 @@
|
|||
package domain;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
|
||||
import javax.swing.*;
|
||||
import javax.swing.filechooser.FileNameExtensionFilter;
|
||||
|
||||
import java.awt.*;
|
||||
import java.io.*;
|
||||
import java.io.File;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
public class FileManager {
|
||||
File inputFile;
|
||||
String originalPath;
|
||||
String goalPath;
|
||||
FileLoader fileLoader = new FileLoader();
|
||||
TextProcessing textProcessing = new TextProcessing();
|
||||
private File file;
|
||||
|
||||
public FileManager() {
|
||||
originalPath = "quelle.pdf";
|
||||
goalPath = "ziel.txt";
|
||||
inputFile = null;
|
||||
public String loadFile() {
|
||||
file = fileLoader.loadFileGUI();
|
||||
String fileFormat = fileLoader.getFileFormat(file);
|
||||
String text = textProcessing.formatToText(file, fileFormat);
|
||||
return text;
|
||||
}
|
||||
|
||||
public OutputStream loadFilePath() {
|
||||
InputStream in;
|
||||
OutputStream out = null;
|
||||
|
||||
try {
|
||||
in = new FileInputStream(originalPath);
|
||||
out = new FileOutputStream(goalPath);
|
||||
|
||||
byte[] buffer = new byte[1024];
|
||||
int gelesen;
|
||||
|
||||
while ((gelesen = in.read(buffer)) > -1) {
|
||||
out.write(buffer, 0, gelesen);
|
||||
}
|
||||
|
||||
in.close();
|
||||
out.close();
|
||||
return out;
|
||||
}
|
||||
catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
public File loadFileGUI() {
|
||||
try {
|
||||
JFileChooser fileChooser = new JFileChooser();
|
||||
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PDF Files", "pdf"));
|
||||
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Text Files", "txt"));
|
||||
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Word Documents", "docx"));
|
||||
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PowerPoint Presentations", "pptx"));
|
||||
int result = fileChooser.showOpenDialog(null);
|
||||
|
||||
if (result == JFileChooser.APPROVE_OPTION) {
|
||||
inputFile = fileChooser.getSelectedFile();
|
||||
}
|
||||
return inputFile;
|
||||
} catch (HeadlessException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public HashMap tokenizingText(File inputFile){
|
||||
HashMap<String, Integer> filteredWords = new HashMap<>();
|
||||
try {
|
||||
PDDocument document = null;
|
||||
if(inputFile != null) {
|
||||
document = PDDocument.load(inputFile);
|
||||
PDFTextStripper pdfStripper = new PDFTextStripper();
|
||||
String text = pdfStripper.getText(document);
|
||||
|
||||
//Tokenizing der Wörter
|
||||
String splittedText = "[,\\s\\.:/!§$%&/()=?+*~#.;_<>^°\"']";
|
||||
String[] textWords = text.split(splittedText);
|
||||
for(String word : textWords){
|
||||
if (filteredWords.containsKey(word)) {
|
||||
filteredWords.compute(word, (k, counter) -> counter + 1);
|
||||
}
|
||||
else {
|
||||
filteredWords.put(word, 1);
|
||||
}
|
||||
}
|
||||
for(Map.Entry e : filteredWords.entrySet()){
|
||||
System.out.println(e.getKey() + " = " + e.getValue());
|
||||
}
|
||||
if (document != null) {
|
||||
document.close();
|
||||
}
|
||||
}
|
||||
} catch (Exception e){
|
||||
e.printStackTrace();
|
||||
}
|
||||
return filteredWords;
|
||||
}
|
||||
|
||||
public HashMap maxShowWords(int number, HashMap<String, Integer> words) {
|
||||
HashMap <String, Integer> cuttedHashmap = new HashMap<>();
|
||||
int index = number;
|
||||
for (String word : words.keySet()) {
|
||||
if(index > 0) {
|
||||
cuttedHashmap.put(word, words.get(word));
|
||||
}
|
||||
index--;
|
||||
}
|
||||
return cuttedHashmap;
|
||||
public HashMap tokenizingText(String text) {
|
||||
HashMap<String, Integer> wordMap = textProcessing.tokenizingText(text);
|
||||
return wordMap;
|
||||
}
|
||||
|
||||
public void saveFile(){}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
package domain;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||
import org.apache.poi.xslf.usermodel.XSLFSlide;
|
||||
import org.apache.poi.xslf.usermodel.XSLFShape;
|
||||
import org.apache.poi.xslf.usermodel.XSLFTextShape;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.HashMap;
|
||||
|
||||
public class TextProcessing {
|
||||
|
||||
public String formatToText(File file, String format) {
|
||||
try {
|
||||
StringBuilder text = new StringBuilder();
|
||||
if (file != null) {
|
||||
switch (format) {
|
||||
case "txt":
|
||||
FileReader fileReader = new FileReader(file);
|
||||
BufferedReader reader = new BufferedReader(fileReader);
|
||||
String line;
|
||||
while((line = reader.readLine()) != null) {
|
||||
text.append(line).append("\n");
|
||||
}
|
||||
return text.toString();
|
||||
case "pdf":
|
||||
PDDocument document = PDDocument.load(file);
|
||||
PDFTextStripper pdfStripper = new PDFTextStripper();
|
||||
return pdfStripper.getText(document);
|
||||
|
||||
case "docx":
|
||||
XWPFDocument officeDocument = new XWPFDocument(new FileInputStream(file));
|
||||
for(XWPFParagraph paragraph : officeDocument.getParagraphs()) {
|
||||
text.append(paragraph.getText()).append("\n");
|
||||
}
|
||||
return text.toString();
|
||||
case "pptx":
|
||||
XMLSlideShow ppt = new XMLSlideShow(new FileInputStream(file));
|
||||
for (XSLFSlide slide : ppt.getSlides()) {
|
||||
for (XSLFShape shape : slide.getShapes()) {
|
||||
if (shape instanceof XSLFTextShape) {
|
||||
text.append(((XSLFTextShape) shape).getText()).append("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
return text.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return "Nothing found!";
|
||||
}
|
||||
|
||||
public HashMap maxShowWords(int number, HashMap<String, Integer> words) {
|
||||
HashMap <String, Integer> cuttedHashmap = new HashMap<>();
|
||||
int index = number;
|
||||
for (String word : words.keySet()) {
|
||||
if(index > 0) {
|
||||
cuttedHashmap.put(word, words.get(word));
|
||||
}
|
||||
index--;
|
||||
}
|
||||
return cuttedHashmap;
|
||||
}
|
||||
|
||||
public HashMap tokenizingText(String text){
|
||||
HashMap<String, Integer> filteredWords = new HashMap<>();
|
||||
try {
|
||||
if(!text.isEmpty()) {
|
||||
//Tokenizing der Wörter
|
||||
String splitter = "[,\\s\\.:/!§$%&/()=?+*~#.;_<\\-–>^°\"']";
|
||||
String[] textWords = text.split(splitter);
|
||||
for (String word : textWords) {
|
||||
if (filteredWords.containsKey(word)) {
|
||||
filteredWords.compute(word, (k, counter) -> counter + 1);
|
||||
} else {
|
||||
filteredWords.put(word, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
return filteredWords;
|
||||
}
|
||||
}
|
|
@ -4,9 +4,8 @@ import domain.FileManager;
|
|||
import domain.PictureManager;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
public class WordCloudManager {
|
||||
FileManager fileManager;
|
||||
|
@ -18,18 +17,14 @@ public class WordCloudManager {
|
|||
}
|
||||
|
||||
public boolean loadFileGUI() {
|
||||
File inputFile = fileManager.loadFileGUI();
|
||||
HashMap wordMap = fileManager.tokenizingText(inputFile);
|
||||
if(wordMap == null) {
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
|
||||
String fileText = fileManager.loadFile();
|
||||
HashMap wordMap = fileManager.tokenizingText(fileText);
|
||||
if(wordMap != null) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
public void loadFilePath() {
|
||||
OutputStream inputFile = fileManager.loadFilePath();
|
||||
// fileManager.processFile(null, inputFile);
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,7 @@
|
|||
package tui;
|
||||
|
||||
import domain.FileManager;
|
||||
import facade.WordCloudManager;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.Scanner;
|
||||
|
||||
public class TUI {
|
||||
|
@ -17,16 +14,14 @@ public class TUI {
|
|||
|
||||
public void tui() {
|
||||
Scanner scan = new Scanner(System.in);
|
||||
while(isRunning) {
|
||||
WordCloudManager wcm = new WordCloudManager();
|
||||
// while(isRunning) {
|
||||
System.out.println("Welcome to Word Cloud.\nMenu:\n\n(0) Load File from main path\n(1) Load File with Gui" +
|
||||
"\n(2) Save File\n(3) Show Picture\n(4) Exit");
|
||||
int option = scan.nextInt();
|
||||
WordCloudManager wcm = new WordCloudManager();
|
||||
|
||||
switch (option) {
|
||||
case (0):
|
||||
//Load File Path
|
||||
wcm.loadFilePath();
|
||||
break;
|
||||
case (1):
|
||||
//Load File GUI
|
||||
|
@ -48,7 +43,7 @@ public class TUI {
|
|||
System.out.println("Close Program!");
|
||||
break;
|
||||
}
|
||||
}
|
||||
// }
|
||||
scan.close();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue