Expt No: 8 Ensembling Techniques
Date:
Aim: To write a program to demonstrate ensembling
techniques using bagging and decision tree classifier.
Program
# Ensembling: Bagging
with Decision Tree Classifier
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
# Load the dataset
data = pd.read_csv('BreastCancer.csv')
# Split the data into features and target variable
X = data.drop(['id', 'diagnosis'], axis=1) # Features
y = data['diagnosis']
# Target variable
# Split Data into Training and Testing Sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
# Ensemble Technique - Bagging with Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
# Instantiate a Decision Tree Classifier
dt_classifier = DecisionTreeClassifier(random_state=42)
# Instantiate a Bagging Classifier with Decision Tree as
base estimator
bagging_classifier = BaggingClassifier(estimator=dt_classifier, n_estimators=10, random_state=42)
# Train the Bagging Classifier
bagging_classifier.fit(X_train, y_train)
# Model Evaluation
from sklearn.metrics import
accuracy_score, classification_report, confusion_matrix
# Predict on the testing data
y_pred = bagging_classifier.predict(X_test)
# Evaluate model performance
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
# Display Decision Tree
import matplotlib.pyplot as plt
from sklearn import tree
# Extract individual decision trees from bagging classifier
individual_dt = bagging_classifier.estimators_[0]
# Plot the decision tree
plt.figure(figsize=(20,10))
tree.plot_tree(individual_dt, feature_names=X.columns, class_names=['M', 'B'], filled=True)
plt.title('Decision Tree Example')
plt.show()
# Lists to store training and validation accuracies
train_accuracy = []
val_accuracy = []
# Range of training set sizes
train_sizes = np.linspace(0.1, 0.9, 10)
for size in train_sizes:
# Split the data
into training and validation sets with the current size
X_train_subset, _, y_train_subset, _ = train_test_split(X_train, y_train, train_size=size, random_state=42)
# Train the
Bagging Classifier on the subset of training data
bagging_classifier.fit(X_train_subset, y_train_subset)
# Predict on
training and validation sets
y_train_pred =
bagging_classifier.predict(X_train_subset)
y_val_pred =
bagging_classifier.predict(X_test)
# Calculate
accuracy on training and validation sets
train_accuracy.append(accuracy_score(y_train_subset, y_train_pred))
val_accuracy.append(accuracy_score(y_test, y_val_pred))
# Plotting the training and validation accuracies
plt.figure(figsize=(10, 6))
plt.plot(train_sizes, train_accuracy, label='Training
Accuracy')
plt.plot(train_sizes, val_accuracy, label='Validation
Accuracy')
plt.title('Training and Validation Accuracy vs. Number of
Training Samples')
plt.xlabel('Number of Training Samples')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()
Result: Thus the program to demonstrate ensembling techniques was written and executed.
Sample Output
Accuracy: 0.956140350877193
Classification
Report: precision recall f1-score support
B 0.96 0.97 0.97 71
M 0.95 0.93 0.94 43
accuracy 0.96 114
macro
avg 0.96 0.95 0.95 114
weighted avg 0.96
0.96 0.96 114
Confusion Matrix:
[[69 2]
[ 3 40]]
No comments:
Post a Comment
Don't be a silent reader...
Leave your comments...
Anu