gitignore

2023-04-04 10:59:29 +02:00 · 2023-04-04 10:59:29 +02:00 · a093f2f92f
parent 017680ce25
commit a093f2f92f
6 changed files with 137 additions and 56 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,36 @@
+ignite/
+activemq-data/
+.idea/
+target/
+.classpath
+.project
+.classpath
+.settings/
+*.aux
+*.glo
+*.idx
+*.log
+*.toc
+*.ist
+*.acn
+*.acr
+*.alg
+*.bbl
+*.blg
+*.dvi
+*.glg
+*.gls
+*.ilg
+*.ind
+*.lof
+*.lot
+*.maf
+*.mtc
+*.mtc1
+*.out
+*.synctex.gz
+*.scg
+
+#IntelliJ
+*.iml
+/.idea/
--- a/HadoopSS23/.gitignore
+++ b/HadoopSS23/.gitignore
@ -1,55 +0,0 @@
-# ---> Eclipse
-*.pydevproject
-.metadata
-.gradle
-bin/
-tmp/
-*.tmp
-*.bak
-*.swp
-*~.nib
-local.properties
-.settings/
-.loadpath
-
-# Eclipse Core
-.project
-
-# External tool builders
-.externalToolBuilders/
-
-# Locally stored "Eclipse launch configurations"
-*.launch
-
-# CDT-specific
-.cproject
-
-# JDT-specific (Eclipse Java Development Tools)
-.classpath
-
-# Java annotation processor (APT)
-.factorypath
-
-# PDT-specific
-.buildpath
-
-# sbteclipse plugin
-.target
-
-# TeXlipse plugin
-.texlipse
-
-# ---> Java
-*.class
-
-# Mobile Tools for Java (J2ME)
-.mtj.tmp/
-
-# Package Files #
-*.jar
-*.war
-*.ear
-
-# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
-hs_err_pid*
-
--- a/HadoopSS23/.idea/modules.xml
+++ b/HadoopSS23/.idea/modules.xml
@ -2,6 +2,7 @@
 <project version="4">
  <component name="ProjectModuleManager">
    <modules>
+      <module fileurl="file://$PROJECT_DIR$/Hadoop/Hadoop.iml" filepath="$PROJECT_DIR$/Hadoop/Hadoop.iml" />
      <module fileurl="file://$PROJECT_DIR$/.idea/HadoopSS23.iml" filepath="$PROJECT_DIR$/.idea/HadoopSS23.iml" />
    </modules>
  </component>
--- a/HadoopSS23/Hadoop/.gitignore
+++ b/HadoopSS23/Hadoop/.gitignore
@ -1 +0,0 @@
-/target/
--- a/HadoopSS23/Hadoop/.idea/vcs.xml
+++ b/HadoopSS23/Hadoop/.idea/vcs.xml
@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
    <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
  </component>
 </project>
--- a/HadoopSS23/Hadoop/src/main/java/de/hsma/bdea/WordCountVLCounter.java
+++ b/HadoopSS23/Hadoop/src/main/java/de/hsma/bdea/WordCountVLCounter.java
@ -0,0 +1,99 @@
+package de.hsma.bdea;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.TaskCounter;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.log4j.BasicConfigurator;
+
+import java.io.IOException;
+
+public class WordCountVLCounter {
+
+	public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
+		private final static IntWritable one = new IntWritable(1);
+		private Text word = new Text();
+
+		public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
+			String[] woerter = value.toString().split("\\W+");
+
+			for (String wort: woerter) {
+				word.set(wort);
+				context.write(word, one); 
+			}
+		}
+	}
+
+	public static class IntSumCombiner extends Reducer<Text, IntWritable, Text, IntWritable> {
+		private IntWritable result = new IntWritable();
+
+		public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
+			int sum = 0;
+			for (IntWritable val : values) {
+				sum += val.get();
+			}
+			result.set(sum);
+			context.write(key, result);
+		}
+	}
+
+	// wir könnten auch von IntSumCombiner erben um funktionale Klone zu reduzieren
+	public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
+		private IntWritable result = new IntWritable();
+
+		public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
+			int sum = 0;
+			for (IntWritable val : values) {
+				sum += val.get();
+			}
+			result.set(sum);
+			context.write(key, result);
+
+			// sum up words
+			context.getCounter("mygroup", "words").increment(1);
+		}
+	}
+
+	public static void main(String[] args) throws Exception {
+		BasicConfigurator.configure(); 					// Log4j Config oder ConfigFile in Resources Folder
+		System.setProperty("hadoop.home.dir", "/");  	// zwingend für Hadoop 3.3.0
+
+		Configuration conf = new Configuration();
+
+		Job job = Job.getInstance(conf, "word count");
+		job.setJarByClass(WordCountVLCounter.class);
+		job.setMapperClass(TokenizerMapper.class);
+		job.setCombinerClass(IntSumCombiner.class);	// reduce lokal bei den Mappern
+		job.setReducerClass(IntSumReducer.class);	// reduce nach Verteilung bei den Reducern
+		job.setNumReduceTasks(4);
+		job.setOutputKeyClass(Text.class);
+		job.setOutputValueClass(IntWritable.class);
+
+		FileInputFormat.addInputPath(job, new Path("resources/klassiker"));
+
+		String output1 = "resources/wordcount-output1-" + System.currentTimeMillis();
+		FileOutputFormat.setOutputPath(job, new Path(output1));
+		job.setOutputFormatClass(SequenceFileOutputFormat.class);
+
+		job.waitForCompletion(true);
+
+		// -- Hadoop Counter
+		// default counter (siehe TaskCounter.* für andere)
+		long outputRecords = job.getCounters().findCounter(TaskCounter.REDUCE_OUTPUT_RECORDS).getValue();
+		//job.getCounters().findCounter(TaskCounter.).setValue();
+
+		// custom counter (Wichtig: nur in Reducer und nicht in Combiner
+		long words = job.getCounters().findCounter("mygroup", "words").getValue();
+
+		// Anzahl ist dieselbe
+		System.out.println("#Records => " + outputRecords);
+		System.out.println("#Words => " + words);
+	}
+}