ANKUSH THAVALI
20 Dec, 2021
0 Comments
2 Mins Read

MapReduce WordCount Programme

Mapper Code

// Importing libraries
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

public class WCMapper extends MapReduceBase implements Mapper<LongWritable,
												Text, Text, IntWritable> {

	// Map function
	public void map(LongWritable key, Text value, OutputCollector<Text,
				IntWritable> output, Reporter rep) throws IOException
	{

		String line = value.toString();

		// Splitting the line on spaces
		for (String word : line.split(" "))
		{
			if (word.length() > 0)
			{
				output.collect(new Text(word), new IntWritable(1));
			}
		}
	}
}

Reducer Code


// Importing libraries
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

public class WCReducer extends MapReduceBase implements Reducer<Text,
									IntWritable, Text, IntWritable> {

	// Reduce function
	public void reduce(Text key, Iterator<IntWritable> value,
				OutputCollector<Text, IntWritable> output,
							Reporter rep) throws IOException
	{

		int count = 0;

		// Counting the frequency of each words
		while (value.hasNext())
		{
			IntWritable i = value.next();
			count += i.get();
		}

		output.collect(key, new IntWritable(count));
	}
}

Driver Code


// Importing libraries
import java.io.IOException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class WCDriver extends Configured implements Tool {

public int run(String args[]) throws IOException
{
if (args.length &lt; 2)
{
System.out.println("Please give valid inputs");
return -1;
}

JobConf conf = new JobConf(WCDriver.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
conf.setMapperClass(WCMapper.class);
conf.setReducerClass(WCReducer.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(IntWritable.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
JobClient.runJob(conf);
return 0;
}

// Main Method
public static void main(String args[]) throws Exception
{
int exitCode = ToolRunner.run(new WCDriver(), args);
System.out.println(exitCode);
}
}

Add external Jar Files

$HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.2.0.jar
$HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-client-common-2.2.0.jar
$HADOOP_HOME/share/hadoop/common/hadoop-common-2.2.0.jar

Input file

hdfs dfs -cat /user/hive/input/wordcount.txt

hi how are you
I am learning mapreduce programme
mapreduce programme is good

Run Mapreduce programme

hadoop jar wordcount.jar WCDriver /user/hive/input/wordcount.txt /user/hive/output/wordcount

Mapreduce Wordcount output


hdoop@hadoop:~$ hdfs dfs -cat /user/hive/output/wordcount/part-00000

I       1
always  1
am      1
are     1
good    2
hi      1
how     1
is      2
learning        2
mapreduce       2
programme       2
you     1

MapReduce WordCount Programme

Mapper Code

Reducer Code

Driver Code

Add external Jar Files

Input file

Run Mapreduce programme

Mapreduce Wordcount output

The next success story is yours....

Get the right guidance to leap through your career

About Us

Explore

Useful Links

Contact Info