gitignore

2023-04-04 10:59:29 +02:00 · 2023-04-04 10:59:29 +02:00 · a093f2f92f
parent 017680ce25
commit a093f2f92f
6 changed files with 137 additions and 56 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,36 @@
 ignite/
 activemq-data/
 .idea/
 target/
 .classpath
 .project
 .classpath
 .settings/
 *.aux
 *.glo
 *.idx
 *.log
 *.toc
 *.ist
 *.acn
 *.acr
 *.alg
 *.bbl
 *.blg
 *.dvi
 *.glg
 *.gls
 *.ilg
 *.ind
 *.lof
 *.lot
 *.maf
 *.mtc
 *.mtc1
 *.out
 *.synctex.gz
 *.scg
 #IntelliJ
 *.iml
 /.idea/
--- a/HadoopSS23/.gitignore
+++ b/HadoopSS23/.gitignore
@ -1,55 +0,0 @@
 # ---> Eclipse
 *.pydevproject
 .metadata
 .gradle
 bin/
 tmp/
 *.tmp
 *.bak
 *.swp
 *~.nib
 local.properties
 .settings/
 .loadpath
 # Eclipse Core
 .project
 # External tool builders
 .externalToolBuilders/
 # Locally stored "Eclipse launch configurations"
 *.launch
 # CDT-specific
 .cproject
 # JDT-specific (Eclipse Java Development Tools)
 .classpath
 # Java annotation processor (APT)
 .factorypath
 # PDT-specific
 .buildpath
 # sbteclipse plugin
 .target
 # TeXlipse plugin
 .texlipse
 # ---> Java
 *.class
 # Mobile Tools for Java (J2ME)
 .mtj.tmp/
 # Package Files #
 *.jar
 *.war
 *.ear
 # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
 hs_err_pid*
--- a/HadoopSS23/.idea/modules.xml
+++ b/HadoopSS23/.idea/modules.xml
@ -2,6 +2,7 @@
 <project version="4">
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/Hadoop/Hadoop.iml" filepath="$PROJECT_DIR$/Hadoop/Hadoop.iml" />
      <module fileurl="file://$PROJECT_DIR$/.idea/HadoopSS23.iml" filepath="$PROJECT_DIR$/.idea/HadoopSS23.iml" />
    </modules>
  </component>
--- a/HadoopSS23/Hadoop/.gitignore
+++ b/HadoopSS23/Hadoop/.gitignore
@ -1 +0,0 @@
 /target/
--- a/HadoopSS23/Hadoop/.idea/vcs.xml
+++ b/HadoopSS23/Hadoop/.idea/vcs.xml
@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
    <mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
    <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
  </component>
 </project>
--- a/HadoopSS23/Hadoop/src/main/java/de/hsma/bdea/WordCountVLCounter.java
+++ b/HadoopSS23/Hadoop/src/main/java/de/hsma/bdea/WordCountVLCounter.java
@ -0,0 +1,99 @@
 package de.hsma.bdea;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.mapreduce.TaskCounter;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 import org.apache.log4j.BasicConfigurator;
 import java.io.IOException;
 public class WordCountVLCounter {
 	public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
 		private final static IntWritable one = new IntWritable(1);
 		private Text word = new Text();
 		public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
 			String[] woerter = value.toString().split("\\W+");
 			for (String wort: woerter) {
 				word.set(wort);
 				context.write(word, one); 
 			}
 		}
 	}
 	public static class IntSumCombiner extends Reducer<Text, IntWritable, Text, IntWritable> {
 		private IntWritable result = new IntWritable();
 		public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
 			int sum = 0;
 			for (IntWritable val : values) {
 				sum += val.get();
 			}
 			result.set(sum);
 			context.write(key, result);
 		}
 	}
 	// wir könnten auch von IntSumCombiner erben um funktionale Klone zu reduzieren
 	public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
 		private IntWritable result = new IntWritable();
 		public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
 			int sum = 0;
 			for (IntWritable val : values) {
 				sum += val.get();
 			}
 			result.set(sum);
 			context.write(key, result);
 			// sum up words
 			context.getCounter("mygroup", "words").increment(1);
 		}
 	}
 	public static void main(String[] args) throws Exception {
 		BasicConfigurator.configure(); 					// Log4j Config oder ConfigFile in Resources Folder
 		System.setProperty("hadoop.home.dir", "/");  	// zwingend für Hadoop 3.3.0
 		Configuration conf = new Configuration();
 		Job job = Job.getInstance(conf, "word count");
 		job.setJarByClass(WordCountVLCounter.class);
 		job.setMapperClass(TokenizerMapper.class);
 		job.setCombinerClass(IntSumCombiner.class);	// reduce lokal bei den Mappern
 		job.setReducerClass(IntSumReducer.class);	// reduce nach Verteilung bei den Reducern
 		job.setNumReduceTasks(4);
 		job.setOutputKeyClass(Text.class);
 		job.setOutputValueClass(IntWritable.class);
 		FileInputFormat.addInputPath(job, new Path("resources/klassiker"));
 		String output1 = "resources/wordcount-output1-" + System.currentTimeMillis();
 		FileOutputFormat.setOutputPath(job, new Path(output1));
 		job.setOutputFormatClass(SequenceFileOutputFormat.class);
 		job.waitForCompletion(true);
 		// -- Hadoop Counter
 		// default counter (siehe TaskCounter.* für andere)
 		long outputRecords = job.getCounters().findCounter(TaskCounter.REDUCE_OUTPUT_RECORDS).getValue();
 		//job.getCounters().findCounter(TaskCounter.).setValue();
 		// custom counter (Wichtig: nur in Reducer und nicht in Combiner
 		long words = job.getCounters().findCounter("mygroup", "words").getValue();
 		// Anzahl ist dieselbe
 		System.out.println("#Records => " + outputRecords);
 		System.out.println("#Words => " + words);
 	}
 }