implement switch case for textbuilding from pptx, docx, txt and add comments for Method loadFileGUI
parent
af33d8a566
commit
d9ae97aea4
20
pom.xml
20
pom.xml
|
|
@ -14,6 +14,26 @@
|
||||||
<artifactId>pdfbox</artifactId>
|
<artifactId>pdfbox</artifactId>
|
||||||
<version>2.0.29</version>
|
<version>2.0.29</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.poi</groupId>
|
||||||
|
<artifactId>poi-ooxml</artifactId>
|
||||||
|
<version>5.2.3</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.poi</groupId>
|
||||||
|
<artifactId>poi</artifactId>
|
||||||
|
<version>5.2.3</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.xmlbeans</groupId>
|
||||||
|
<artifactId>xmlbeans</artifactId>
|
||||||
|
<version>5.1.1</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.logging.log4j</groupId>
|
||||||
|
<artifactId>log4j-core</artifactId>
|
||||||
|
<version>2.18.0</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
|
|
|
||||||
|
|
@ -14,14 +14,14 @@ public class FileLoader {
|
||||||
public FileLoader() {
|
public FileLoader() {
|
||||||
this.inputFile = null;
|
this.inputFile = null;
|
||||||
}
|
}
|
||||||
|
//KI erstellte Methode
|
||||||
public File loadFileGUI() {
|
public File loadFileGUI() {
|
||||||
try {
|
try {
|
||||||
JFileChooser fileChooser = new JFileChooser();
|
JFileChooser fileChooser = new JFileChooser();
|
||||||
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PDF Files", "pdf"));
|
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PDF Files", "pdf"));
|
||||||
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Text Files", "txt"));
|
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Text Files", "txt")); //selbst hinzugefügt
|
||||||
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Word Documents", "docx"));
|
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("Word Documents", "docx")); //selbst hinzugefügt
|
||||||
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PowerPoint Presentations", "pptx"));
|
fileChooser.addChoosableFileFilter(new FileNameExtensionFilter("PowerPoint Presentations", "pptx")); //selbst hinzugefügt
|
||||||
int result = fileChooser.showOpenDialog(null);
|
int result = fileChooser.showOpenDialog(null);
|
||||||
|
|
||||||
if (result == JFileChooser.APPROVE_OPTION) {
|
if (result == JFileChooser.APPROVE_OPTION) {
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,12 @@ package domain;
|
||||||
|
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
import org.apache.pdfbox.text.PDFTextStripper;
|
import org.apache.pdfbox.text.PDFTextStripper;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||||
|
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||||
|
import org.apache.poi.xslf.usermodel.XSLFSlide;
|
||||||
|
import org.apache.poi.xslf.usermodel.XSLFShape;
|
||||||
|
import org.apache.poi.xslf.usermodel.XSLFTextShape;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
@ -9,31 +15,44 @@ import java.util.HashMap;
|
||||||
public class TextProcessing {
|
public class TextProcessing {
|
||||||
|
|
||||||
public String formatToText(File file, String format) {
|
public String formatToText(File file, String format) {
|
||||||
PDDocument document;
|
|
||||||
try {
|
try {
|
||||||
|
StringBuilder text = new StringBuilder();
|
||||||
if (file != null) {
|
if (file != null) {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case "txt":
|
case "txt":
|
||||||
|
FileReader fileReader = new FileReader(file);
|
||||||
break;
|
BufferedReader reader = new BufferedReader(fileReader);
|
||||||
|
String line;
|
||||||
|
while((line = reader.readLine()) != null) {
|
||||||
|
text.append(line).append("\n");
|
||||||
|
}
|
||||||
|
return text.toString();
|
||||||
case "pdf":
|
case "pdf":
|
||||||
document = PDDocument.load(file);
|
PDDocument document = PDDocument.load(file);
|
||||||
PDFTextStripper pdfStripper = new PDFTextStripper();
|
PDFTextStripper pdfStripper = new PDFTextStripper();
|
||||||
return pdfStripper.getText(document);
|
return pdfStripper.getText(document);
|
||||||
|
|
||||||
case "docx":
|
case "docx":
|
||||||
|
XWPFDocument officeDocument = new XWPFDocument(new FileInputStream(file));
|
||||||
break;
|
for(XWPFParagraph paragraph : officeDocument.getParagraphs()) {
|
||||||
|
text.append(paragraph.getText()).append("\n");
|
||||||
|
}
|
||||||
|
return text.toString();
|
||||||
case "pptx":
|
case "pptx":
|
||||||
|
XMLSlideShow ppt = new XMLSlideShow(new FileInputStream(file));
|
||||||
break;
|
for (XSLFSlide slide : ppt.getSlides()) {
|
||||||
|
for (XSLFShape shape : slide.getShapes()) {
|
||||||
|
if (shape instanceof XSLFTextShape) {
|
||||||
|
text.append(((XSLFTextShape) shape).getText()).append("\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (IOException ex) {
|
return text.toString();
|
||||||
throw new RuntimeException(ex);
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
return "Nothing found!";
|
return "Nothing found!";
|
||||||
}
|
}
|
||||||
|
|
@ -53,18 +72,20 @@ public class TextProcessing {
|
||||||
public HashMap tokenizingText(String text){
|
public HashMap tokenizingText(String text){
|
||||||
HashMap<String, Integer> filteredWords = new HashMap<>();
|
HashMap<String, Integer> filteredWords = new HashMap<>();
|
||||||
try {
|
try {
|
||||||
|
if(!text.isEmpty()) {
|
||||||
//Tokenizing der Wörter
|
//Tokenizing der Wörter
|
||||||
String splitter = "[,\\s\\.:/!§$%&/()=?+*~#.;_<>^°\"']";
|
String splitter = "[,\\s\\.:/!§$%&/()=?+*~#.;_<\\-–>^°\"']";
|
||||||
String[] textWords = text.split(splitter);
|
String[] textWords = text.split(splitter);
|
||||||
for (String word : textWords) {
|
for (String word : textWords) {
|
||||||
if (filteredWords.containsKey(word)) {
|
if (filteredWords.containsKey(word)) {
|
||||||
filteredWords.compute(word, (k, counter) -> counter + 1);
|
filteredWords.compute(word, (k, counter) -> counter + 1);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
filteredWords.put(word, 1);
|
filteredWords.put(word, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (Exception ex) {
|
}
|
||||||
|
}
|
||||||
|
catch (Exception ex) {
|
||||||
throw new RuntimeException(ex);
|
throw new RuntimeException(ex);
|
||||||
}
|
}
|
||||||
return filteredWords;
|
return filteredWords;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue