Kiran Dalvi
- 20 Dec, 2021
- 0 Comments
- 2 Mins Read
MapReduce WordCount Programme
Mapper Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | // Importing libraries import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; public class WCMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { // Map function public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter rep) throws IOException { String line = value.toString(); // Splitting the line on spaces for (String word : line.split( " " )) { if (word.length() > 0) { output.collect( new Text(word), new IntWritable(1)); } } } } |
Reducer Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | // Importing libraries import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; public class WCReducer extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { // Reduce function public void reduce(Text key, Iterator<IntWritable> value, OutputCollector<Text, IntWritable> output, Reporter rep) throws IOException { int count = 0; // Counting the frequency of each words while (value.hasNext()) { IntWritable i = value.next(); count += i.get(); } output.collect(key, new IntWritable( count )); } } |
Driver Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | // Importing libraries import java.io.IOException; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class WCDriver extends Configured implements Tool { public int run(String args[]) throws IOException { if (args.length < 2) { System.out.println( "Please give valid inputs" ); return -1; } JobConf conf = new JobConf(WCDriver. class ); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); conf.setMapperClass(WCMapper. class ); conf.setReducerClass(WCReducer. class ); conf.setMapOutputKeyClass(Text. class ); conf.setMapOutputValueClass(IntWritable. class ); conf.setOutputKeyClass(Text. class ); conf.setOutputValueClass(IntWritable. class ); JobClient.runJob(conf); return 0; } // Main Method public static void main(String args[]) throws Exception { int exitCode = ToolRunner.run( new WCDriver(), args); System.out.println(exitCode); } } |
Add external Jar Files
1 2 3 | $HADOOP_HOME /share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.2.0.jar $HADOOP_HOME /share/hadoop/mapreduce/hadoop-mapreduce-client-common-2.2.0.jar $HADOOP_HOME /share/hadoop/common/hadoop-common-2.2.0.jar |
Input file
1 2 3 4 5 | hdfs dfs -cat /user/hive/input/wordcount.txt hi how are you I am learning mapreduce programme mapreduce programme is good |
Run Mapreduce programme
1 | hadoop jar wordcount.jar WCDriver /user/hive/input/wordcount.txt /user/hive/output/wordcount |
Mapreduce Wordcount output
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | hdoop@hadoop:~$ hdfs dfs -cat /user/hive/output/wordcount/part-00000 I 1 always 1 am 1 are 1 good 2 hi 1 how 1 is 2 learning 2 mapreduce 2 programme 2 you 1 |