Km algorithm hadoop | Computer Science homework help

The current program only run one iteration of the KMeans algorithm. Please revise it (in the main function) to implement iterative processing, paste your code here, and briefly describe how it works. Were you able to successfully compile and run your program (yes/no)?

import java.util.StringTokenizer;

Don't use plagiarized sources. Get Your Custom Essay on
Km algorithm hadoop | Computer Science homework help
Just from $13/Page
Order Essay

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class KMeans {

 public static class KMMapper
      extends Mapper<Object, Text, IntWritable, Text>{

   private double [][] _centroids;
   private IntWritable cid = new IntWritable();

   public void setup(Mapper.Context context){
     Configuration conf = context.getConfiguration();
     String filename = conf.get(“Centroids-file”);
     _centroids = loadCentroids(filename, conf);

   public void map(Object key, Text value, Context context
                   ) throws IOException, InterruptedException {
     double [] vec = parseVector(value.toString());
     context.write(cid, value);

   private int closest(double [] v){
     double mindist = dist(v, _centroids[0]);
     int label =0;
     for (int i=1; i<_centroids.length; i++){
       double t = dist(v, _centroids[i]);
       if (mindist>t){
         mindist = t;
         label = i;
     return label;


 public static class KMReducer
      extends Reducer<IntWritable, Text, IntWritable, Text> {
   // write output: cid t centroid_vector
   private Text result = new Text();

   public void reduce(IntWritable key, Iterable<Text> vectors,
                      Context context
                      ) throws IOException, InterruptedException {
     double [] sum = null;
     int n=0;
     for (Text vec : vectors) {
       double [] v = parseVector(vec.toString());
       if (sum == null) sum = v;
         for (int i = 0; i < v.length; i++)
           sum[i] += v[i];
       n ++;
     String out = Double.toString(sum[0]/n);
     for (int i = 1; i < sum.length; i ++ ){
       out +=  “,” + Double.toString(sum[i]/n); // csv output
     context.write(key, result);

 // compute square Euclidean distance between two vectors v1 and v2
 public static double dist(double [] v1, double [] v2){
   double sum=0;
   for (int i=0; i< v1.length; i++){
     double d = v1[i]-v2[i];
     sum += d*d;
   return Math.sqrt(sum);

 // check convergence condition
 // max{dist(c1[i], c2[i]), i=1..numClusters < threshold
 private boolean converge(double [][] c1, double [][] c2, double threshold){
   // c1 and c2 are two sets of centroids
   double maxv = 0;
   for (int i=0; i< c1.length; i++){
       double d= dist(c1[i], c2[i]);
       if (maxv<d)
           maxv = d;

   if (maxv <threshold)
     return true;
     return false;

 public static double [][] loadCentroids(String filename, Configuration conf){

   double [][] centroids=null;
   Path p = new Path(filename);  // Path is used for opening the file.
     FileSystem fs = FileSystem.get(conf);//determines local or HDFS
     FSDataInputStream file =;
     byte[] bs = new byte[file.available()];;
     String [] lines = (new String(bs)).split(“n”); //lines are separated by n
     for (String line:lines)
     centroids = new double[lines.length][];
     for (int i = 0; i < lines.length; i++){
       // cid t centroid
       String [] parts = lines[i].split(“t”);
       int cid = Integer.parseInt(parts[0]);
       centroids[cid] = parseVector(parts[1]);
   }catch(Exception e){
   return centroids;

 public static double [] parseVector(String s){
   String [] itr = s.split(“,”); // comma separated
   double [] v = new double[itr.length];
   for (int i = 0; i < itr.length; i++)
     v[i] = Double.parseDouble(itr[i]);
   return v;
 public static void main(String[] args) throws Exception {
   // usage: hadoop jar km.jar hdfs://localhost:9000/user/your_home_directory/centroids data.hdfs output
   Configuration conf = new Configuration();
   conf.set(“Centroids-file”, args[0]);
   Job job = Job.getInstance(conf, “KMeans”);
   FileInputFormat.addInputPath(job, new Path(args[1]));
   FileOutputFormat.setOutputPath(job, new Path(args[2]));    
   System.exit(job.waitForCompletion(true) ? 0 : 1);


Calculate the price of your paper

Total price:$26
Our features

We've got everything to become your favourite writing service

Need a better grade?
We've got you covered.

Order your paper