java apriori

import java.util.*;

public class Apriori {
    static List<Set<String>> dataset = Arrays.asList(
            new HashSet<>(Arrays.asList("Milk", "Bread", "Eggs")),
            new HashSet<>(Arrays.asList("Milk", "Bread")),
            new HashSet<>(Arrays.asList("Milk", "Eggs")),
            new HashSet<>(Arrays.asList("Bread", "Eggs")),
            new HashSet<>(Arrays.asList("Milk", "Bread", "Butter"))
    );
    static double minSupport = 0.4;
    public static Set<String> getAllItems(List<Set<String>> dataset) {
        Set<String> items = new HashSet<>();
        for (Set<String> transaction : dataset) {
            items.addAll(transaction);
        }
        return items;
    }
    public static Map<Set<String>, Double> getFrequent1Itemsets(List<Set<String>> dataset, double minSupport) {
        Map<Set<String>, Double> freqItems = new HashMap<>();
        Set<String> items = getAllItems(dataset);
        int totalTransactions = dataset.size();
        for (String item : items) {
            int count = 0;
            for (Set<String> transaction : dataset) {
                if (transaction.contains(item)) count++;
            }
            double support = (double) count / totalTransactions;
            if (support >= minSupport) {
                freqItems.put(new HashSet<>(Arrays.asList(item)), support);
            }
        }
        return freqItems;
    }
    public static List<Set<String>> generateCandidates(List<Set<String>> prevFreqItemsets, int k) {
        List<Set<String>> candidates = new ArrayList<>();
        for (int i = 0; i < prevFreqItemsets.size(); i++) {
            for (int j = i + 1; j < prevFreqItemsets.size(); j++) {
                Set<String> candidate = new HashSet<>(prevFreqItemsets.get(i));
                candidate.addAll(prevFreqItemsets.get(j));
                if (candidate.size() == k && !candidates.contains(candidate)) {
                    candidates.add(candidate);
                }
            }
        }
        return candidates;
    }
    public static Map<Set<String>, Double> filterCandidates(List<Set<String>> dataset, List<Set<String>> candidates, double minSupport) {
        Map<Set<String>, Double> freqItems = new HashMap<>();
        int totalTransactions = dataset.size();

        for (Set<String> candidate : candidates) {
            int count = 0;
            for (Set<String> transaction : dataset) {
                if (transaction.containsAll(candidate)) count++;
            }
            double support = (double) count / totalTransactions;
            if (support >= minSupport) {
                freqItems.put(candidate, support);
            }
        }
        return freqItems;
    }
    public static Map<Set<String>, Double> apriori(List<Set<String>> dataset, double minSupport) {
        Map<Set<String>, Double> allFreqItemsets = new HashMap<>();
        Map<Set<String>, Double> currentFreqItemsets = getFrequent1Itemsets(dataset, minSupport);
        allFreqItemsets.putAll(currentFreqItemsets);
        int k = 2;
        while (!currentFreqItemsets.isEmpty()) {
            List<Set<String>> candidates = generateCandidates(new ArrayList<>(currentFreqItemsets.keySet()), k);
            currentFreqItemsets = filterCandidates(dataset, candidates, minSupport);
            allFreqItemsets.putAll(currentFreqItemsets);
            k++;
        }
        return allFreqItemsets;
    }
    public static void main(String[] args) {
        Map<Set<String>, Double> frequentItemsets = apriori(dataset, minSupport);

        System.out.println("Frequent Itemsets with Support:");
        for (Map.Entry<Set<String>, Double> entry : frequentItemsets.entrySet()) {
            System.out.println(entry.getKey() + ": " + String.format("%.2f", entry.getValue()));
        }
    }
}

Output 
Frequent Itemsets with Support:
[Milk]: 0.80
[Bread]: 0.80
[Eggs]: 0.60
[Milk, Bread]: 0.60
[Milk, Eggs]: 0.40
[Bread, Eggs]: 0.40

Comments

Popular posts from this blog

Dijkstra's Algorithm Matrix

k means JDK java

Chi square test