Java Program to Implement Lloyd’s Algorithm

This is a Java Program to implement Lloyd’s Algorithm. The LBG-algorithm or Lloyd’s algorithm allows clustering of vectors of any dimension. This is helpful for example for image classification when using the SIFT or SURF algorithms. It might be also useful if you want to cluster a large amount of points on a map.

Here is the source code of the Java Program to Implement Lloyd’s Algorithm. The Java program is successfully compiled and run on a Windows system. The program output is also shown below.

  1. //This is a java program to implement Lloyd’s Algorithm
  2. import java.util.ArrayList;
  3.  
  4. public class GenLloyd
  5. {
  6.     protected double[][] samplePoints;
  7.     protected double[][] clusterPoints;
  8.  
  9.     int[] pointApproxIndices;
  10.     int pointDimension = 0;
  11.     protected double epsilon = 0.0005;
  12.     protected double avgDistortion = 0.0;
  13.  
  14.     /**
  15.      * Create Generalized Lloyd object with an array of sample points
  16.      */
  17.     public GenLloyd(double[][] samplePoints)
  18.     {
  19.         this.setSamplePoints(samplePoints);
  20.     }
  21.  
  22.     /**
  23.      * Return epsilon parameter (accuracy)
  24.      */
  25.     public double getEpsilon()
  26.     {
  27.         return epsilon;
  28.     }
  29.  
  30.     /**
  31.      * Set epsilon parameter (accuracy). Should be a small number 0.0 < epsilon
  32.      * < 0.1
  33.      */
  34.     public void setEpsilon(double epsilon)
  35.     {
  36.         this.epsilon = epsilon;
  37.     }
  38.  
  39.     /**
  40.      * Set array of sample points
  41.      */
  42.     public void setSamplePoints(double[][] samplePoints)
  43.     {
  44.         if (samplePoints.length > 0)
  45.         {
  46.             this.samplePoints = samplePoints;
  47.             this.pointDimension = samplePoints[0].length;
  48.         }
  49.     }
  50.  
  51.     /**
  52.      * Get array of sample points
  53.      */
  54.     public double[][] getSamplePoints()
  55.     {
  56.         return samplePoints;
  57.     }
  58.  
  59.     /**
  60.      * Get calculated cluster points. <numClusters> cluster points will be
  61.      * calculated and returned
  62.      */
  63.     public double[][] getClusterPoints(int numClusters)
  64.     {
  65.         this.calcClusters(numClusters);
  66.  
  67.         return clusterPoints;
  68.     }
  69.  
  70.     protected void calcClusters(int numClusters)
  71.     {
  72.         // initialize with first cluster
  73.         clusterPoints = new double[1][pointDimension];
  74.  
  75.         double[] newClusterPoint = initializeClusterPoint(samplePoints);
  76.         clusterPoints[0] = newClusterPoint;
  77.  
  78.         if (numClusters > 1)
  79.         {
  80.             // calculate initial average distortion
  81.             avgDistortion = 0.0;
  82.             for (double[] samplePoint : samplePoints)
  83.             {
  84.                 avgDistortion += calcDist(samplePoint, newClusterPoint);
  85.             }
  86.  
  87.             avgDistortion /= (double) (samplePoints.length * pointDimension);
  88.  
  89.             // set up array of point approximization indices
  90.             pointApproxIndices = new int[samplePoints.length];
  91.  
  92.             // split the clusters
  93.             int i = 1;
  94.             do
  95.             {
  96.                 i = splitClusters();
  97.             } while (i < numClusters);
  98.         }
  99.     }
  100.  
  101.     protected int splitClusters()
  102.     {
  103.         int newClusterPointSize = 2;
  104.         if (clusterPoints.length != 1)
  105.         {
  106.             newClusterPointSize = clusterPoints.length * 2;
  107.         }
  108.  
  109.         // split clusters
  110.         double[][] newClusterPoints = new double[newClusterPointSize][pointDimension];
  111.         int newClusterPointIdx = 0;
  112.         for (double[] clusterPoint : clusterPoints)
  113.         {
  114.             newClusterPoints[newClusterPointIdx] = createNewClusterPoint(
  115.                     clusterPoint, -1);
  116.             newClusterPoints[newClusterPointIdx + 1] = createNewClusterPoint(
  117.                     clusterPoint, +1);
  118.  
  119.             newClusterPointIdx += 2;
  120.         }
  121.  
  122.         clusterPoints = newClusterPoints;
  123.  
  124.         // iterate to approximate cluster points
  125.         // int iteration = 0;
  126.         double curAvgDistortion = 0.0;
  127.         do
  128.         {
  129.             curAvgDistortion = avgDistortion;
  130.  
  131.             // find the min values
  132.             for (int pointIdx = 0; pointIdx < samplePoints.length; pointIdx++)
  133.             {
  134.                 double minDist = Double.MAX_VALUE;
  135.                 for (int clusterPointIdx = 0; clusterPointIdx < clusterPoints.length; clusterPointIdx++)
  136.                 {
  137.                     double newMinDist = calcDist(samplePoints[pointIdx],
  138.                             clusterPoints[clusterPointIdx]);
  139.                     if (newMinDist < minDist)
  140.                     {
  141.                         minDist = newMinDist;
  142.                         pointApproxIndices[pointIdx] = clusterPointIdx;
  143.                     }
  144.                 }
  145.             }
  146.  
  147.             // update codebook
  148.             for (int clusterPointIdx = 0; clusterPointIdx < clusterPoints.length; clusterPointIdx++)
  149.             {
  150.                 double[] newClusterPoint = new double[pointDimension];
  151.                 int num = 0;
  152.                 for (int pointIdx = 0; pointIdx < samplePoints.length; pointIdx++)
  153.                 {
  154.                     if (pointApproxIndices[pointIdx] == clusterPointIdx)
  155.                     {
  156.                         addPointValues(newClusterPoint, samplePoints[pointIdx]);
  157.                         num++;
  158.                     }
  159.                 }
  160.  
  161.                 if (num > 0)
  162.                 {
  163.                     multiplyPointValues(newClusterPoint, 1.0 / (double) num);
  164.                     clusterPoints[clusterPointIdx] = newClusterPoint;
  165.                 }
  166.             }
  167.  
  168.             // update average distortion
  169.             avgDistortion = 0.0;
  170.             for (int pointIdx = 0; pointIdx < samplePoints.length; pointIdx++)
  171.             {
  172.                 avgDistortion += calcDist(samplePoints[pointIdx],
  173.                         clusterPoints[pointApproxIndices[pointIdx]]);
  174.             }
  175.  
  176.             avgDistortion /= (double) (samplePoints.length * pointDimension);
  177.  
  178.         } while (((curAvgDistortion - avgDistortion) / curAvgDistortion) > epsilon);
  179.  
  180.         return clusterPoints.length;
  181.     }
  182.  
  183.     protected double[] initializeClusterPoint(double[][] pointsInCluster)
  184.     {
  185.         // calculate point sum
  186.         double[] clusterPoint = new double[pointDimension];
  187.         for (int numPoint = 0; numPoint < pointsInCluster.length; numPoint++)
  188.         {
  189.             addPointValues(clusterPoint, pointsInCluster[numPoint]);
  190.         }
  191.  
  192.         // calculate average
  193.         multiplyPointValues(clusterPoint, 1.0 / (double) pointsInCluster.length);
  194.  
  195.         return clusterPoint;
  196.     }
  197.  
  198.     protected double[] createNewClusterPoint(double[] clusterPoint,
  199.             int epsilonFactor)
  200.     {
  201.         double[] newClusterPoint = new double[pointDimension];
  202.         addPointValues(newClusterPoint, clusterPoint);
  203.         multiplyPointValues(newClusterPoint, 1.0 + (double) epsilonFactor
  204.                 * epsilon);
  205.  
  206.         return newClusterPoint;
  207.     }
  208.  
  209.     protected double calcDist(double[] v1, double[] v2)
  210.     {
  211.         double distSum = 0.0;
  212.         for (int pointIdx = 0; pointIdx < v1.length; pointIdx++)
  213.         {
  214.             double absDist = Math.abs(v1[pointIdx] - v2[pointIdx]);
  215.             distSum += absDist * absDist;
  216.         }
  217.  
  218.         return distSum;
  219.     }
  220.  
  221.     protected void addPointValues(double[] v1, double[] v2)
  222.     {
  223.         for (int pointIdx = 0; pointIdx < v1.length; pointIdx++)
  224.         {
  225.             v1[pointIdx] += v2[pointIdx];
  226.         }
  227.     }
  228.  
  229.     protected void multiplyPointValues(double[] v1, double f)
  230.     {
  231.         for (int pointIdx = 0; pointIdx < v1.length; pointIdx++)
  232.         {
  233.             v1[pointIdx] *= f;
  234.         }
  235.     }
  236.  
  237.     public static void main(String[] args)
  238.     {
  239.         ArrayList<double[]> points = new ArrayList<double[]>();
  240.  
  241.         // points.add(arrayOf(-1.5, -1.5));
  242.         points.add(arrayOf(-1.5, 2.0, 5.0));
  243.         points.add(arrayOf(-2.0, -2.0, 0.0));
  244.         points.add(arrayOf(1.0, 1.0, 2.0));
  245.         points.add(arrayOf(1.5, 1.5, 1.2));
  246.         points.add(arrayOf(1.0, 2.0, 5.6));
  247.         points.add(arrayOf(1.0, -2.0, -2.0));
  248.         points.add(arrayOf(1.0, -3.0, -2.0));
  249.         points.add(arrayOf(1.0, -2.5, -4.5));
  250.  
  251.         GenLloyd gl = new GenLloyd(points.toArray(new double[points.size()][2]));
  252.  
  253.         double[][] results = gl.getClusterPoints(4);
  254.         for (double[] point : results)
  255.         {
  256.             System.out.println("Cluster " + point[0] + ", " + point[1] + ", "
  257.                     + point[2]);
  258.         }
  259.     }
  260.  
  261.     private static double[] arrayOf(double x, double y, double z)
  262.     {
  263.         double[] a = new double[3];
  264.         a[0] = x;
  265.         a[1] = y;
  266.         a[2] = z;
  267.  
  268.         return a;
  269.     }
  270. }

Output:

$ javac GenLloyd.java
$ java GenLloyd
 
Cluster -2.0, -2.0, 0.0
Cluster 1.0, -2.5, -2.833333333333333
Cluster 1.25, 1.25, 1.6
Cluster -0.25, 2.0, 5.3

Sanfoundry Global Education & Learning Series – 1000 Java Programs.

advertisement
advertisement

Here’s the list of Best Books in Java Programming, Data Structures and Algorithms.

If you find any mistake above, kindly email to [email protected]

advertisement
advertisement
Subscribe to our Newsletters (Subject-wise). Participate in the Sanfoundry Certification contest to get free Certificate of Merit. Join our social networks below and stay updated with latest contests, videos, internships and jobs!

Youtube | Telegram | LinkedIn | Instagram | Facebook | Twitter | Pinterest
Manish Bhojasia - Founder & CTO at Sanfoundry
Manish Bhojasia, a technology veteran with 20+ years @ Cisco & Wipro, is Founder and CTO at Sanfoundry. He lives in Bangalore, and focuses on development of Linux Kernel, SAN Technologies, Advanced C, Data Structures & Alogrithms. Stay connected with him at LinkedIn.

Subscribe to his free Masterclasses at Youtube & discussions at Telegram SanfoundryClasses.