gitignore
parent
017680ce25
commit
a093f2f92f
|
@ -0,0 +1,36 @@
|
|||
ignite/
|
||||
activemq-data/
|
||||
.idea/
|
||||
target/
|
||||
.classpath
|
||||
.project
|
||||
.classpath
|
||||
.settings/
|
||||
*.aux
|
||||
*.glo
|
||||
*.idx
|
||||
*.log
|
||||
*.toc
|
||||
*.ist
|
||||
*.acn
|
||||
*.acr
|
||||
*.alg
|
||||
*.bbl
|
||||
*.blg
|
||||
*.dvi
|
||||
*.glg
|
||||
*.gls
|
||||
*.ilg
|
||||
*.ind
|
||||
*.lof
|
||||
*.lot
|
||||
*.maf
|
||||
*.mtc
|
||||
*.mtc1
|
||||
*.out
|
||||
*.synctex.gz
|
||||
*.scg
|
||||
|
||||
#IntelliJ
|
||||
*.iml
|
||||
/.idea/
|
|
@ -1,55 +0,0 @@
|
|||
# ---> Eclipse
|
||||
*.pydevproject
|
||||
.metadata
|
||||
.gradle
|
||||
bin/
|
||||
tmp/
|
||||
*.tmp
|
||||
*.bak
|
||||
*.swp
|
||||
*~.nib
|
||||
local.properties
|
||||
.settings/
|
||||
.loadpath
|
||||
|
||||
# Eclipse Core
|
||||
.project
|
||||
|
||||
# External tool builders
|
||||
.externalToolBuilders/
|
||||
|
||||
# Locally stored "Eclipse launch configurations"
|
||||
*.launch
|
||||
|
||||
# CDT-specific
|
||||
.cproject
|
||||
|
||||
# JDT-specific (Eclipse Java Development Tools)
|
||||
.classpath
|
||||
|
||||
# Java annotation processor (APT)
|
||||
.factorypath
|
||||
|
||||
# PDT-specific
|
||||
.buildpath
|
||||
|
||||
# sbteclipse plugin
|
||||
.target
|
||||
|
||||
# TeXlipse plugin
|
||||
.texlipse
|
||||
|
||||
# ---> Java
|
||||
*.class
|
||||
|
||||
# Mobile Tools for Java (J2ME)
|
||||
.mtj.tmp/
|
||||
|
||||
# Package Files #
|
||||
*.jar
|
||||
*.war
|
||||
*.ear
|
||||
|
||||
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
|
||||
hs_err_pid*
|
||||
|
|
@ -2,6 +2,7 @@
|
|||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/Hadoop/Hadoop.iml" filepath="$PROJECT_DIR$/Hadoop/Hadoop.iml" />
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/HadoopSS23.iml" filepath="$PROJECT_DIR$/.idea/HadoopSS23.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
/target/
|
|
@ -1,6 +1,7 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
|
||||
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
|
||||
</component>
|
||||
</project>
|
|
@ -0,0 +1,99 @@
|
|||
package de.hsma.bdea;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
import org.apache.hadoop.mapreduce.Reducer;
|
||||
import org.apache.hadoop.mapreduce.TaskCounter;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
|
||||
import org.apache.log4j.BasicConfigurator;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class WordCountVLCounter {
|
||||
|
||||
public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
|
||||
private final static IntWritable one = new IntWritable(1);
|
||||
private Text word = new Text();
|
||||
|
||||
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
|
||||
String[] woerter = value.toString().split("\\W+");
|
||||
|
||||
for (String wort: woerter) {
|
||||
word.set(wort);
|
||||
context.write(word, one);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class IntSumCombiner extends Reducer<Text, IntWritable, Text, IntWritable> {
|
||||
private IntWritable result = new IntWritable();
|
||||
|
||||
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
|
||||
int sum = 0;
|
||||
for (IntWritable val : values) {
|
||||
sum += val.get();
|
||||
}
|
||||
result.set(sum);
|
||||
context.write(key, result);
|
||||
}
|
||||
}
|
||||
|
||||
// wir könnten auch von IntSumCombiner erben um funktionale Klone zu reduzieren
|
||||
public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
|
||||
private IntWritable result = new IntWritable();
|
||||
|
||||
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
|
||||
int sum = 0;
|
||||
for (IntWritable val : values) {
|
||||
sum += val.get();
|
||||
}
|
||||
result.set(sum);
|
||||
context.write(key, result);
|
||||
|
||||
// sum up words
|
||||
context.getCounter("mygroup", "words").increment(1);
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
BasicConfigurator.configure(); // Log4j Config oder ConfigFile in Resources Folder
|
||||
System.setProperty("hadoop.home.dir", "/"); // zwingend für Hadoop 3.3.0
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
|
||||
Job job = Job.getInstance(conf, "word count");
|
||||
job.setJarByClass(WordCountVLCounter.class);
|
||||
job.setMapperClass(TokenizerMapper.class);
|
||||
job.setCombinerClass(IntSumCombiner.class); // reduce lokal bei den Mappern
|
||||
job.setReducerClass(IntSumReducer.class); // reduce nach Verteilung bei den Reducern
|
||||
job.setNumReduceTasks(4);
|
||||
job.setOutputKeyClass(Text.class);
|
||||
job.setOutputValueClass(IntWritable.class);
|
||||
|
||||
FileInputFormat.addInputPath(job, new Path("resources/klassiker"));
|
||||
|
||||
String output1 = "resources/wordcount-output1-" + System.currentTimeMillis();
|
||||
FileOutputFormat.setOutputPath(job, new Path(output1));
|
||||
job.setOutputFormatClass(SequenceFileOutputFormat.class);
|
||||
|
||||
job.waitForCompletion(true);
|
||||
|
||||
// -- Hadoop Counter
|
||||
// default counter (siehe TaskCounter.* für andere)
|
||||
long outputRecords = job.getCounters().findCounter(TaskCounter.REDUCE_OUTPUT_RECORDS).getValue();
|
||||
//job.getCounters().findCounter(TaskCounter.).setValue();
|
||||
|
||||
// custom counter (Wichtig: nur in Reducer und nicht in Combiner
|
||||
long words = job.getCounters().findCounter("mygroup", "words").getValue();
|
||||
|
||||
// Anzahl ist dieselbe
|
||||
System.out.println("#Records => " + outputRecords);
|
||||
System.out.println("#Words => " + words);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue