gitignore
parent
017680ce25
commit
a093f2f92f
|
@ -0,0 +1,36 @@
|
||||||
|
ignite/
|
||||||
|
activemq-data/
|
||||||
|
.idea/
|
||||||
|
target/
|
||||||
|
.classpath
|
||||||
|
.project
|
||||||
|
.classpath
|
||||||
|
.settings/
|
||||||
|
*.aux
|
||||||
|
*.glo
|
||||||
|
*.idx
|
||||||
|
*.log
|
||||||
|
*.toc
|
||||||
|
*.ist
|
||||||
|
*.acn
|
||||||
|
*.acr
|
||||||
|
*.alg
|
||||||
|
*.bbl
|
||||||
|
*.blg
|
||||||
|
*.dvi
|
||||||
|
*.glg
|
||||||
|
*.gls
|
||||||
|
*.ilg
|
||||||
|
*.ind
|
||||||
|
*.lof
|
||||||
|
*.lot
|
||||||
|
*.maf
|
||||||
|
*.mtc
|
||||||
|
*.mtc1
|
||||||
|
*.out
|
||||||
|
*.synctex.gz
|
||||||
|
*.scg
|
||||||
|
|
||||||
|
#IntelliJ
|
||||||
|
*.iml
|
||||||
|
/.idea/
|
|
@ -1,55 +0,0 @@
|
||||||
# ---> Eclipse
|
|
||||||
*.pydevproject
|
|
||||||
.metadata
|
|
||||||
.gradle
|
|
||||||
bin/
|
|
||||||
tmp/
|
|
||||||
*.tmp
|
|
||||||
*.bak
|
|
||||||
*.swp
|
|
||||||
*~.nib
|
|
||||||
local.properties
|
|
||||||
.settings/
|
|
||||||
.loadpath
|
|
||||||
|
|
||||||
# Eclipse Core
|
|
||||||
.project
|
|
||||||
|
|
||||||
# External tool builders
|
|
||||||
.externalToolBuilders/
|
|
||||||
|
|
||||||
# Locally stored "Eclipse launch configurations"
|
|
||||||
*.launch
|
|
||||||
|
|
||||||
# CDT-specific
|
|
||||||
.cproject
|
|
||||||
|
|
||||||
# JDT-specific (Eclipse Java Development Tools)
|
|
||||||
.classpath
|
|
||||||
|
|
||||||
# Java annotation processor (APT)
|
|
||||||
.factorypath
|
|
||||||
|
|
||||||
# PDT-specific
|
|
||||||
.buildpath
|
|
||||||
|
|
||||||
# sbteclipse plugin
|
|
||||||
.target
|
|
||||||
|
|
||||||
# TeXlipse plugin
|
|
||||||
.texlipse
|
|
||||||
|
|
||||||
# ---> Java
|
|
||||||
*.class
|
|
||||||
|
|
||||||
# Mobile Tools for Java (J2ME)
|
|
||||||
.mtj.tmp/
|
|
||||||
|
|
||||||
# Package Files #
|
|
||||||
*.jar
|
|
||||||
*.war
|
|
||||||
*.ear
|
|
||||||
|
|
||||||
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
|
|
||||||
hs_err_pid*
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="ProjectModuleManager">
|
<component name="ProjectModuleManager">
|
||||||
<modules>
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/Hadoop/Hadoop.iml" filepath="$PROJECT_DIR$/Hadoop/Hadoop.iml" />
|
||||||
<module fileurl="file://$PROJECT_DIR$/.idea/HadoopSS23.iml" filepath="$PROJECT_DIR$/.idea/HadoopSS23.iml" />
|
<module fileurl="file://$PROJECT_DIR$/.idea/HadoopSS23.iml" filepath="$PROJECT_DIR$/.idea/HadoopSS23.iml" />
|
||||||
</modules>
|
</modules>
|
||||||
</component>
|
</component>
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
/target/
|
|
|
@ -1,6 +1,7 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="VcsDirectoryMappings">
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
|
||||||
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
|
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
|
@ -0,0 +1,99 @@
|
||||||
|
package de.hsma.bdea;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.io.IntWritable;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.mapreduce.Job;
|
||||||
|
import org.apache.hadoop.mapreduce.Mapper;
|
||||||
|
import org.apache.hadoop.mapreduce.Reducer;
|
||||||
|
import org.apache.hadoop.mapreduce.TaskCounter;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
|
||||||
|
import org.apache.log4j.BasicConfigurator;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class WordCountVLCounter {
|
||||||
|
|
||||||
|
public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
|
||||||
|
private final static IntWritable one = new IntWritable(1);
|
||||||
|
private Text word = new Text();
|
||||||
|
|
||||||
|
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
|
||||||
|
String[] woerter = value.toString().split("\\W+");
|
||||||
|
|
||||||
|
for (String wort: woerter) {
|
||||||
|
word.set(wort);
|
||||||
|
context.write(word, one);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class IntSumCombiner extends Reducer<Text, IntWritable, Text, IntWritable> {
|
||||||
|
private IntWritable result = new IntWritable();
|
||||||
|
|
||||||
|
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
|
||||||
|
int sum = 0;
|
||||||
|
for (IntWritable val : values) {
|
||||||
|
sum += val.get();
|
||||||
|
}
|
||||||
|
result.set(sum);
|
||||||
|
context.write(key, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// wir könnten auch von IntSumCombiner erben um funktionale Klone zu reduzieren
|
||||||
|
public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
|
||||||
|
private IntWritable result = new IntWritable();
|
||||||
|
|
||||||
|
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
|
||||||
|
int sum = 0;
|
||||||
|
for (IntWritable val : values) {
|
||||||
|
sum += val.get();
|
||||||
|
}
|
||||||
|
result.set(sum);
|
||||||
|
context.write(key, result);
|
||||||
|
|
||||||
|
// sum up words
|
||||||
|
context.getCounter("mygroup", "words").increment(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
BasicConfigurator.configure(); // Log4j Config oder ConfigFile in Resources Folder
|
||||||
|
System.setProperty("hadoop.home.dir", "/"); // zwingend für Hadoop 3.3.0
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
|
||||||
|
Job job = Job.getInstance(conf, "word count");
|
||||||
|
job.setJarByClass(WordCountVLCounter.class);
|
||||||
|
job.setMapperClass(TokenizerMapper.class);
|
||||||
|
job.setCombinerClass(IntSumCombiner.class); // reduce lokal bei den Mappern
|
||||||
|
job.setReducerClass(IntSumReducer.class); // reduce nach Verteilung bei den Reducern
|
||||||
|
job.setNumReduceTasks(4);
|
||||||
|
job.setOutputKeyClass(Text.class);
|
||||||
|
job.setOutputValueClass(IntWritable.class);
|
||||||
|
|
||||||
|
FileInputFormat.addInputPath(job, new Path("resources/klassiker"));
|
||||||
|
|
||||||
|
String output1 = "resources/wordcount-output1-" + System.currentTimeMillis();
|
||||||
|
FileOutputFormat.setOutputPath(job, new Path(output1));
|
||||||
|
job.setOutputFormatClass(SequenceFileOutputFormat.class);
|
||||||
|
|
||||||
|
job.waitForCompletion(true);
|
||||||
|
|
||||||
|
// -- Hadoop Counter
|
||||||
|
// default counter (siehe TaskCounter.* für andere)
|
||||||
|
long outputRecords = job.getCounters().findCounter(TaskCounter.REDUCE_OUTPUT_RECORDS).getValue();
|
||||||
|
//job.getCounters().findCounter(TaskCounter.).setValue();
|
||||||
|
|
||||||
|
// custom counter (Wichtig: nur in Reducer und nicht in Combiner
|
||||||
|
long words = job.getCounters().findCounter("mygroup", "words").getValue();
|
||||||
|
|
||||||
|
// Anzahl ist dieselbe
|
||||||
|
System.out.println("#Records => " + outputRecords);
|
||||||
|
System.out.println("#Words => " + words);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue