java apriori
import java.util.*;
public class Apriori {
static List<Set<String>> dataset = Arrays.asList(
new HashSet<>(Arrays.asList("Milk", "Bread", "Eggs")),
new HashSet<>(Arrays.asList("Milk", "Bread")),
new HashSet<>(Arrays.asList("Milk", "Eggs")),
new HashSet<>(Arrays.asList("Bread", "Eggs")),
new HashSet<>(Arrays.asList("Milk", "Bread", "Butter"))
);
static double minSupport = 0.4;
public static Set<String> getAllItems(List<Set<String>> dataset) {
Set<String> items = new HashSet<>();
for (Set<String> transaction : dataset) {
items.addAll(transaction);
}
return items;
}
public static Map<Set<String>, Double> getFrequent1Itemsets(List<Set<String>> dataset, double minSupport) {
Map<Set<String>, Double> freqItems = new HashMap<>();
Set<String> items = getAllItems(dataset);
int totalTransactions = dataset.size();
for (String item : items) {
int count = 0;
for (Set<String> transaction : dataset) {
if (transaction.contains(item)) count++;
}
double support = (double) count / totalTransactions;
if (support >= minSupport) {
freqItems.put(new HashSet<>(Arrays.asList(item)), support);
}
}
return freqItems;
}
public static List<Set<String>> generateCandidates(List<Set<String>> prevFreqItemsets, int k) {
List<Set<String>> candidates = new ArrayList<>();
for (int i = 0; i < prevFreqItemsets.size(); i++) {
for (int j = i + 1; j < prevFreqItemsets.size(); j++) {
Set<String> candidate = new HashSet<>(prevFreqItemsets.get(i));
candidate.addAll(prevFreqItemsets.get(j));
if (candidate.size() == k && !candidates.contains(candidate)) {
candidates.add(candidate);
}
}
}
return candidates;
}
public static Map<Set<String>, Double> filterCandidates(List<Set<String>> dataset, List<Set<String>> candidates, double minSupport) {
Map<Set<String>, Double> freqItems = new HashMap<>();
int totalTransactions = dataset.size();
for (Set<String> candidate : candidates) {
int count = 0;
for (Set<String> transaction : dataset) {
if (transaction.containsAll(candidate)) count++;
}
double support = (double) count / totalTransactions;
if (support >= minSupport) {
freqItems.put(candidate, support);
}
}
return freqItems;
}
public static Map<Set<String>, Double> apriori(List<Set<String>> dataset, double minSupport) {
Map<Set<String>, Double> allFreqItemsets = new HashMap<>();
Map<Set<String>, Double> currentFreqItemsets = getFrequent1Itemsets(dataset, minSupport);
allFreqItemsets.putAll(currentFreqItemsets);
int k = 2;
while (!currentFreqItemsets.isEmpty()) {
List<Set<String>> candidates = generateCandidates(new ArrayList<>(currentFreqItemsets.keySet()), k);
currentFreqItemsets = filterCandidates(dataset, candidates, minSupport);
allFreqItemsets.putAll(currentFreqItemsets);
k++;
}
return allFreqItemsets;
}
public static void main(String[] args) {
Map<Set<String>, Double> frequentItemsets = apriori(dataset, minSupport);
System.out.println("Frequent Itemsets with Support:");
for (Map.Entry<Set<String>, Double> entry : frequentItemsets.entrySet()) {
System.out.println(entry.getKey() + ": " + String.format("%.2f", entry.getValue()));
}
}
}
Output
Frequent Itemsets with Support:
[Milk]: 0.80
[Bread]: 0.80
[Eggs]: 0.60
[Milk, Bread]: 0.60
[Milk, Eggs]: 0.40
[Bread, Eggs]: 0.40
Comments
Post a Comment