- ANKUSH THAVALI
- 20 Dec, 2021
- 0 Comments
- 2 Mins Read
MapReduce WordCount Programme
Mapper Code
// Importing libraries import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; public class WCMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { // Map function public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter rep) throws IOException { String line = value.toString(); // Splitting the line on spaces for (String word : line.split(" ")) { if (word.length() > 0) { output.collect(new Text(word), new IntWritable(1)); } } } }
Reducer Code
// Importing libraries import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; public class WCReducer extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { // Reduce function public void reduce(Text key, Iterator<IntWritable> value, OutputCollector<Text, IntWritable> output, Reporter rep) throws IOException { int count = 0; // Counting the frequency of each words while (value.hasNext()) { IntWritable i = value.next(); count += i.get(); } output.collect(key, new IntWritable(count)); } }
Driver Code
// Importing libraries import java.io.IOException; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class WCDriver extends Configured implements Tool { public int run(String args[]) throws IOException { if (args.length < 2) { System.out.println("Please give valid inputs"); return -1; } JobConf conf = new JobConf(WCDriver.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); conf.setMapperClass(WCMapper.class); conf.setReducerClass(WCReducer.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); JobClient.runJob(conf); return 0; } // Main Method public static void main(String args[]) throws Exception { int exitCode = ToolRunner.run(new WCDriver(), args); System.out.println(exitCode); } }
Add external Jar Files
$HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.2.0.jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-client-common-2.2.0.jar $HADOOP_HOME/share/hadoop/common/hadoop-common-2.2.0.jar
Input file
hdfs dfs -cat /user/hive/input/wordcount.txt hi how are you I am learning mapreduce programme mapreduce programme is good
Run Mapreduce programme
hadoop jar wordcount.jar WCDriver /user/hive/input/wordcount.txt /user/hive/output/wordcount
Mapreduce Wordcount output
hdoop@hadoop:~$ hdfs dfs -cat /user/hive/output/wordcount/part-00000 I 1 always 1 am 1 are 1 good 2 hi 1 how 1 is 2 learning 2 mapreduce 2 programme 2 you 1