Expt No: 9 Clustering Algorithms
Date:
Aim: To write a program to demonstrate clustering
algorithms
Program
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
# Load the dataset and drop unnecessary fields
data = pd.read_csv('Mall_Customers.csv')
data.drop(['CustomerID', 'Genre', 'Spending Score'], axis=1,
inplace=True)
# Use Silhouette Score method to find the optimal number of clusters
silhouette_scores = []
for n_clusters in range(2, 11): #
Trying cluster numbers from 2 to 10
kmeans =
KMeans(n_clusters=n_clusters, random_state=42)
cluster_labels =
kmeans.fit_predict(data)
silhouette_avg =
silhouette_score(data, cluster_labels)
silhouette_scores.append(silhouette_avg)
# Display the optimal number of clusters
optimal_clusters = silhouette_scores.index(max(silhouette_scores)) +
2 # +2 because range starts from 2
print("Optimal number of clusters:", optimal_clusters)
# Plot Silhouette Scores
plt.plot(range(2, 11), silhouette_scores, marker='o')
plt.xlabel('Number of Clusters')
plt.ylabel('Silhouette Score')
plt.title('Silhouette Score vs Number of Clusters')
plt.show()
# Display KMeans clusters
optimal_kmeans = KMeans(n_clusters=optimal_clusters, random_state=42)
optimal_cluster_labels = optimal_kmeans.fit_predict(data)
# Plotting the clusters along with centroids
plt.scatter(data.iloc[:, 0], data.iloc[:, 1], c=optimal_cluster_labels,
cmap='viridis')
plt.scatter(optimal_kmeans.cluster_centers_[:, 0],
optimal_kmeans.cluster_centers_[:, 1], s=150, c='red', marker='o')
plt.xlabel('Annual Income')
plt.ylabel('Age')
plt.title('KMeans Clustering with {} clusters'.format(optimal_clusters))
plt.show()
# Fit Gaussian Mixture Model
gmm = GaussianMixture(n_components=optimal_clusters, random_state=42)
gmm.fit(data)
gmm_cluster_labels = gmm.predict(data)
# Display GMM clusters
plt.scatter(data.iloc[:, 0], data.iloc[:, 1], c=gmm_cluster_labels,
cmap='viridis')
plt.scatter(gmm.means_[:, 0], gmm.means_[:, 1], s=150, c='red',
marker='o')
plt.xlabel('Annual Income')
plt.ylabel('Age')
plt.title('Gaussian Mixture Model Clustering with {}
clusters'.format(optimal_clusters))
plt.show()
Result: Thus the
program to demonstrate clustering algorithms was written and executed.
Sample Output
Optimal number
of clusters: 4
No comments:
Post a Comment
Don't be a silent reader...
Leave your comments...
Anu