Posts

exp:16 histogram

import numpy as np import matplotlib.pyplot as plt from sklearn.cluster import KMeans n = int(input("Enter number of data points: ")) m = int(input("Enter number of features for each data point: ")) data = [] print("\nEnter the data points:") for i in range(n):     point = []     for j in range(m):         value = float(input(f"  Value {j+1} of point {i+1}: "))         point.append(value)     data.append(point) X = np.array(data) k = int(input("\nEnter number of clusters (k): ")) kmeans = KMeans(n_clusters=k, random_state=0) kmeans.fit(X) labels = kmeans.labels_ centroids = kmeans.cluster_centers_ print("\nCluster Centers:") print(centroids) print("\nCluster Labels:") for i, label in enumerate(labels):     print(f"Point {i+1}: Cluster {label}") if m == 2:     plt.figure(figsize=(6,5))     plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='rainbow', s=80)     plt.scatter(centroid...

k means JDK java

import java.util.*; public class KMeansDynamic {     public static void main(String[] args) {         Scanner sc = new Scanner(System.in);         System.out.print("Enter number of data points: ");         int n = sc.nextInt();         System.out.print("Enter number of features: ");         int f = sc.nextInt();         double[][] data = new double[n][f];         for (int i = 0; i < n; i++) {             System.out.print("Enter features for data point " + (i + 1) + " separated by space: ");             for (int j = 0; j < f; j++) {                 data[i][j] = sc.nextDouble();             }         }         System.out.print("Enter number of clusters (k): ");   ...

python aprori algorithm

import mlxtend from mlxtend.frequent_patterns import apriori, association_rules from mlxtend.preprocessing import TransactionEncoder import pandas as pd dataset = [['I1', ' I2', 'I3', ' I4'],            [' I1', ' I2', ' I3'],            [' I1', ' I2'],            [' I1', ' I4'],            [' I2', ' I4']] te = TransactionEncoder() te_ary = te.fit(dataset).transform(dataset) df = pd.DataFrame(te_ary, columns=te.columns_) print("Transaction Data:") print(df) frequent_itemsets = apriori(df, min_support=0.4, use_colnames=True) print("\nFrequent Itemsets:") print(frequent_itemsets) rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6) print("\nAssociation Rules:") print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]) Output: Transaction Data I1     I2     ...

simulated dataset with unique datasets

Write a Java program to prepare a simulated dataset with unique dataset. import java.util.*; import java.io.*; class Student{     String id,firstname,lastname,city;     Student(String firstname,String lastname,String city){         this.id=UUID.randomUUID().toString();         this.firstname=firstname;         this.lastname=lastname;         this.city=city;     } } public class UniqInstances {     public static void main(String[] args){         ArrayList<Student> student=new ArrayList<>();         Scanner sc=new Scanner(System.in);         System.out.println("Enter no. of records: ");         int n=sc.nextInt();         sc.nextLine();         System.out.println("Enter the record details:");         int count=0;   ...

Dissimilarity Matrix

import numpy as np import pandas as pd from scipy.spatial.distance import pdist, squareform data = {     'Attribute1': [2, 4, 3, 5],     'Attribute2': [6, 8, 5, 9] } df = pd.DataFrame(data, index=['Instance1', 'Instance2', 'Instance3', 'Instance4']) print("Dataset:\n") print(df) dissimilarity = pdist(df.values, metric='euclidean') dissimilarity_matrix = squareform(dissimilarity) dissimilarity_df = pd.DataFrame(     dissimilarity_matrix,     index=df.index,     columns=df.index ) print("\nDissimilarity Matrix (Euclidean Distance):\n") print(dissimilarity_df.round( 3)) Output: Dataset:            Attribute1 Attribute2 Instance1 2 6 Instance2 4 8 Instance3 3 5 Instance4 5 9 Dissimilarity Matrix (Euclidean Distance):            Instance1 Instance2 Instance3 Instance4 Instance1 0.000 2.828 1.414 4.243 Instance2 2.828 0.000 3.162 1.414 Instance3 1.414 3.162 0.000 4.243 Instance4 4.243...

simple k means in python

import numpy as np import pandas as pd num_instances = int(input("Enter number of instances (rows): ")) num_attributes = int(input("Enter number of attributes (columns): ")) data = [] for i in range(num_instances):     print(f"\nEnter values for Instance {i+1}:")     row = []     for j in range(num_attributes):         value = float(input(f" Attribute {j+1}: "))         row.append(value)     data.append(row) df = pd.DataFrame(data, columns=[f'Attribute{j+1}' for j in range(num_attributes)]) print("\nDataset:") print(df) k = int(input("\nEnter number of clusters: ")) np.random.seed(42) centroids = df.sample(n=k).to_numpy() for iteration in range(100):     distances = np.linalg.norm(df.values[:, np.newaxis] - centroids, axis=2)     cluster_labels = np.argmin(distances, axis=1)     new_centroids = np.array([df.values[cluster_ labels == i].mean(axis=0) for i in range(k)])   ...

naive bayes

from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.metrics import accuracy_score, confusion_matrix, classification_report n = int(input("Enter number of samples: ")) f = int(input("Enter number of features for each sample: ")) x = [] for i in range(n):     features = []     for j in range(f):         val = float(input(f"Enter feature {j+1} value for sample {i+1}: "))         features.append(val)     x.append(features) y = [] for i in range(n):     label = input(f"Enter label for sample {i+1}: ")     y.append(label) test_size = float(input("Enter test size (e.g. 0.2 for test data): ")) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size, random_state=42) model = GaussianNB() model.fit(x_train, y_train) y_pred = model.predict(x_test) print("Accuracy Score:", round(accuracy_score(y_test, y_pred) * 100, 2), "%") print("...