gitignore

main
SoftwareObservatorium 2023-04-04 10:59:29 +02:00
parent 017680ce25
commit a093f2f92f
6 changed files with 137 additions and 56 deletions

36
.gitignore vendored 100644
View File

@ -0,0 +1,36 @@
ignite/
activemq-data/
.idea/
target/
.classpath
.project
.classpath
.settings/
*.aux
*.glo
*.idx
*.log
*.toc
*.ist
*.acn
*.acr
*.alg
*.bbl
*.blg
*.dvi
*.glg
*.gls
*.ilg
*.ind
*.lof
*.lot
*.maf
*.mtc
*.mtc1
*.out
*.synctex.gz
*.scg
#IntelliJ
*.iml
/.idea/

55
HadoopSS23/.gitignore vendored
View File

@ -1,55 +0,0 @@
# ---> Eclipse
*.pydevproject
.metadata
.gradle
bin/
tmp/
*.tmp
*.bak
*.swp
*~.nib
local.properties
.settings/
.loadpath
# Eclipse Core
.project
# External tool builders
.externalToolBuilders/
# Locally stored "Eclipse launch configurations"
*.launch
# CDT-specific
.cproject
# JDT-specific (Eclipse Java Development Tools)
.classpath
# Java annotation processor (APT)
.factorypath
# PDT-specific
.buildpath
# sbteclipse plugin
.target
# TeXlipse plugin
.texlipse
# ---> Java
*.class
# Mobile Tools for Java (J2ME)
.mtj.tmp/
# Package Files #
*.jar
*.war
*.ear
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*

View File

@ -2,6 +2,7 @@
<project version="4"> <project version="4">
<component name="ProjectModuleManager"> <component name="ProjectModuleManager">
<modules> <modules>
<module fileurl="file://$PROJECT_DIR$/Hadoop/Hadoop.iml" filepath="$PROJECT_DIR$/Hadoop/Hadoop.iml" />
<module fileurl="file://$PROJECT_DIR$/.idea/HadoopSS23.iml" filepath="$PROJECT_DIR$/.idea/HadoopSS23.iml" /> <module fileurl="file://$PROJECT_DIR$/.idea/HadoopSS23.iml" filepath="$PROJECT_DIR$/.idea/HadoopSS23.iml" />
</modules> </modules>
</component> </component>

View File

@ -1 +0,0 @@
/target/

View File

@ -1,6 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="VcsDirectoryMappings"> <component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
<mapping directory="$PROJECT_DIR$/.." vcs="Git" /> <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
</component> </component>
</project> </project>

View File

@ -0,0 +1,99 @@
package de.hsma.bdea;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskCounter;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.log4j.BasicConfigurator;
import java.io.IOException;
public class WordCountVLCounter {
public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String[] woerter = value.toString().split("\\W+");
for (String wort: woerter) {
word.set(wort);
context.write(word, one);
}
}
}
public static class IntSumCombiner extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
// wir könnten auch von IntSumCombiner erben um funktionale Klone zu reduzieren
public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
// sum up words
context.getCounter("mygroup", "words").increment(1);
}
}
public static void main(String[] args) throws Exception {
BasicConfigurator.configure(); // Log4j Config oder ConfigFile in Resources Folder
System.setProperty("hadoop.home.dir", "/"); // zwingend für Hadoop 3.3.0
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCountVLCounter.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumCombiner.class); // reduce lokal bei den Mappern
job.setReducerClass(IntSumReducer.class); // reduce nach Verteilung bei den Reducern
job.setNumReduceTasks(4);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path("resources/klassiker"));
String output1 = "resources/wordcount-output1-" + System.currentTimeMillis();
FileOutputFormat.setOutputPath(job, new Path(output1));
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.waitForCompletion(true);
// -- Hadoop Counter
// default counter (siehe TaskCounter.* für andere)
long outputRecords = job.getCounters().findCounter(TaskCounter.REDUCE_OUTPUT_RECORDS).getValue();
//job.getCounters().findCounter(TaskCounter.).setValue();
// custom counter (Wichtig: nur in Reducer und nicht in Combiner
long words = job.getCounters().findCounter("mygroup", "words").getValue();
// Anzahl ist dieselbe
System.out.println("#Records => " + outputRecords);
System.out.println("#Words => " + words);
}
}