Blog

Mapper Code

// Importing libraries
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

public class WCMapper extends MapReduceBase implements Mapper<LongWritable,
												Text, Text, IntWritable> {

	// Map function
	public void map(LongWritable key, Text value, OutputCollector<Text,
				IntWritable> output, Reporter rep) throws IOException
	{

		String line = value.toString();

		// Splitting the line on spaces
		for (String word : line.split(" "))
		{
			if (word.length() > 0)
			{
				output.collect(new Text(word), new IntWritable(1));
			}
		}
	}
}

Reducer Code


// Importing libraries
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

public class WCReducer extends MapReduceBase implements Reducer<Text,
									IntWritable, Text, IntWritable> {

	// Reduce function
	public void reduce(Text key, Iterator<IntWritable> value,
				OutputCollector<Text, IntWritable> output,
							Reporter rep) throws IOException
	{

		int count = 0;

		// Counting the frequency of each words
		while (value.hasNext())
		{
			IntWritable i = value.next();
			count += i.get();
		}

		output.collect(key, new IntWritable(count));
	}
}

Driver Code


// Importing libraries
import java.io.IOException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class WCDriver extends Configured implements Tool {

public int run(String args[]) throws IOException
{
if (args.length &lt; 2)
{
System.out.println("Please give valid inputs");
return -1;
}

JobConf conf = new JobConf(WCDriver.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
conf.setMapperClass(WCMapper.class);
conf.setReducerClass(WCReducer.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(IntWritable.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
JobClient.runJob(conf);
return 0;
}

// Main Method
public static void main(String args[]) throws Exception
{
int exitCode = ToolRunner.run(new WCDriver(), args);
System.out.println(exitCode);
}
}

Add external Jar Files

$HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.2.0.jar
$HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-client-common-2.2.0.jar
$HADOOP_HOME/share/hadoop/common/hadoop-common-2.2.0.jar

Input file

hdfs dfs -cat /user/hive/input/wordcount.txt

hi how are you
I am learning mapreduce programme
mapreduce programme is good

Run Mapreduce programme

hadoop jar wordcount.jar WCDriver /user/hive/input/wordcount.txt /user/hive/output/wordcount

Mapreduce Wordcount output


hdoop@hadoop:~$ hdfs dfs -cat /user/hive/output/wordcount/part-00000

I       1
always  1
am      1
are     1
good    2
hi      1
how     1
is      2
learning        2
mapreduce       2
programme       2
you     1


Follow me

Contact us for Training/ Job Support

Caution: Your use of any information or materials on this website is entirely at your own risk. It is provided for educational purposes only. It has been tested internally, however, we do not guarantee that it will work for you. Ensure that you run it in your test environment before using.