MapReduce WordCount Programme

Breadcrumb Abstract Shape
Breadcrumb Abstract Shape
Breadcrumb Abstract Shape
Breadcrumb Abstract Shape
Breadcrumb Abstract Shape
Breadcrumb Abstract Shape
  • User AvatarANKUSH THAVALI
  • 20 Dec, 2021
  • 0 Comments
  • 2 Mins Read

MapReduce WordCount Programme

Mapper Code

// Importing libraries
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

public class WCMapper extends MapReduceBase implements Mapper<LongWritable,
												Text, Text, IntWritable> {

	// Map function
	public void map(LongWritable key, Text value, OutputCollector<Text,
				IntWritable> output, Reporter rep) throws IOException
	{

		String line = value.toString();

		// Splitting the line on spaces
		for (String word : line.split(" "))
		{
			if (word.length() > 0)
			{
				output.collect(new Text(word), new IntWritable(1));
			}
		}
	}
}

Reducer Code


// Importing libraries
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

public class WCReducer extends MapReduceBase implements Reducer<Text,
									IntWritable, Text, IntWritable> {

	// Reduce function
	public void reduce(Text key, Iterator<IntWritable> value,
				OutputCollector<Text, IntWritable> output,
							Reporter rep) throws IOException
	{

		int count = 0;

		// Counting the frequency of each words
		while (value.hasNext())
		{
			IntWritable i = value.next();
			count += i.get();
		}

		output.collect(key, new IntWritable(count));
	}
}

Driver Code


// Importing libraries
import java.io.IOException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class WCDriver extends Configured implements Tool {

public int run(String args[]) throws IOException
{
if (args.length &lt; 2)
{
System.out.println("Please give valid inputs");
return -1;
}

JobConf conf = new JobConf(WCDriver.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
conf.setMapperClass(WCMapper.class);
conf.setReducerClass(WCReducer.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(IntWritable.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
JobClient.runJob(conf);
return 0;
}

// Main Method
public static void main(String args[]) throws Exception
{
int exitCode = ToolRunner.run(new WCDriver(), args);
System.out.println(exitCode);
}
}

Add external Jar Files

$HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.2.0.jar
$HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-client-common-2.2.0.jar
$HADOOP_HOME/share/hadoop/common/hadoop-common-2.2.0.jar

Input file

hdfs dfs -cat /user/hive/input/wordcount.txt

hi how are you
I am learning mapreduce programme
mapreduce programme is good

Run Mapreduce programme

hadoop jar wordcount.jar WCDriver /user/hive/input/wordcount.txt /user/hive/output/wordcount

Mapreduce Wordcount output


hdoop@hadoop:~$ hdfs dfs -cat /user/hive/output/wordcount/part-00000

I       1
always  1
am      1
are     1
good    2
hi      1
how     1
is      2
learning        2
mapreduce       2
programme       2
you     1