k means JDK java

import java.util.*;

public class KMeansDynamic {

    public static void main(String[] args) {
        Scanner sc = new Scanner(System.in);

        System.out.print("Enter number of data points: ");
        int n = sc.nextInt();
        System.out.print("Enter number of features: ");
        int f = sc.nextInt();

        double[][] data = new double[n][f];
        for (int i = 0; i < n; i++) {
            System.out.print("Enter features for data point " + (i + 1) + " separated by space: ");
            for (int j = 0; j < f; j++) {
                data[i][j] = sc.nextDouble();
            }
        }

        System.out.print("Enter number of clusters (k): ");
        int k = sc.nextInt();

        double[][] centroids = new double[k][f];
        Random rand = new Random();
        for (int i = 0; i < k; i++) {
            int idx = rand.nextInt(n);
            centroids[i] = Arrays.copyOf(data[idx], f);
        }

        int[] labels = new int[n];
        boolean changed = true;

        while (changed) {
            changed = false;

            // Assign points to nearest centroid
            for (int i = 0; i < n; i++) {
                int nearest = 0;
                double minDist = distance(data[i], centroids[0]);
                for (int j = 1; j < k; j++) {
                    double dist = distance(data[i], centroids[j]);
                    if (dist < minDist) {
                        minDist = dist;
                        nearest = j;
                    }
                }
                if (labels[i] != nearest) {
                    labels[i] = nearest;
                    changed = true;
                }
            }

            // Update centroids
            double[][] newCentroids = new double[k][f];
            int[] count = new int[k];
            for (int i = 0; i < n; i++) {
                int cluster = labels[i];
                for (int j = 0; j < f; j++) {
                    newCentroids[cluster][j] += data[i][j];
                }
                count[cluster]++;
            }
            for (int i = 0; i < k; i++) {
                if (count[i] == 0) continue;
                for (int j = 0; j < f; j++) {
                    newCentroids[i][j] /= count[i];
                }
            }
            centroids = newCentroids;
        }

        System.out.println("\nCluster assignments:");
        for (int i = 0; i < n; i++) {
            System.out.println("Data point " + (i + 1) + " -> Cluster " + (labels[i] + 1));
        }

        System.out.println("\nCentroid positions:");
        for (int i = 0; i < k; i++) {
            System.out.print("Cluster " + (i + 1) + ": ");
            for (int j = 0; j < f; j++) {
                System.out.print(centroids[i][j] + " ");
            }
            System.out.println();
        }
    }

    static double distance(double[] a, double[] b) {
        double sum = 0;
        for (int i = 0; i < a.length; i++) {
            sum += Math.pow(a[i] - b[i], 2);
        }
        return Math.sqrt(sum);
    }
}

OUTPUT:

Enter number of data points: 6
Enter number of features: 2
Enter features for data point 1 separated by space: 1 1
Enter features for data point 2 separated by space: 2 1
Enter features for data point 3 separated by space: 1 2
Enter features for data point 4 separated by space: 8 8
Enter features for data point 5 separated by space: 9 8
Enter features for data point 6 separated by space: 8 9
Enter number of clusters (k): 2

Cluster assignments:
Data point 1 -> Cluster 2
Data point 2 -> Cluster 2
Data point 3 -> Cluster 2
Data point 4 -> Cluster 1
Data point 5 -> Cluster 1
Data point 6 -> Cluster 1

Centroid positions:
Cluster 1: 8.333333333333334 8.333333333333334
Cluster 2: 1.3333333333333333 1.3333333333333333

Comments

Popular posts from this blog

Dijkstra's Algorithm Matrix

Chi square test