Project 1: PRML
Project Details: In these datasets there are around 70, 000 images which need to be separate on the basis of 9 categories. So, the target is to develop a machine learning model using linear regression model to get the maximum accuracy predicting the accurate category. Also, there are 9 labels from 0 to 9(T-shirt/top, Trouser, pullover, Dress, Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot)
Language Used: Python
Libraries used :
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import numpy as np
Code for the Above Project
@author: kkabi
"""
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import numpy as np
# File paths
Data_MIST_Fashion_Test = r"C:\Users\kkabi\OneDrive\Desktop\DAta science UC\patterntutorial\fashion-mnist_test.csv"
Data_MIST_Fashion_Train = r"C:\Users\kkabi\OneDrive\Desktop\DAta science UC\patterntutorial\fashion-mnist_train.csv"
# Load data
Data_MIST_Fashion_Test_df = pd.read_csv(Data_MIST_Fashion_Test)
Data_MIST_Fashion_Train_df = pd.read_csv(Data_MIST_Fashion_Train)
# Combine test and train datasets
Test_train_combined_df_new = pd.concat([Data_MIST_Fashion_Test_df, Data_MIST_Fashion_Train_df], axis=0)
# Display column names
keys = Test_train_combined_df_new.keys()
print("Keys:", keys)
# Show first 5 labels
labels = Test_train_combined_df_new['label'][:5]
print("Labels:", labels)
# Define the number of images to display
number_images = 5
plt.figure(figsize=(10, 2))
for i in range(number_images):
# Extract the image data and label
image_data = Test_train_combined_df_new.iloc[i, 1:785].values
label = Test_train_combined_df_new.iloc[i]['label']
image_data = image_data.reshape(28, 28)
plt.subplot(1, number_images, i + 1)
plt.imshow(image_data, cmap='gray')
plt.title(f"Label: {label}")
plt.axis('off')
plt.show()
# Prepare labels (y) and features (X)
y = Test_train_combined_df_new['label']
X = Test_train_combined_df_new.drop('label', axis=1)
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0)
# Train Logistic Regression model
lr = LogisticRegression(solver="lbfgs", max_iter=100)
lr.fit(X_train, y_train)
# Make predictions
y_pred = lr.predict(X_test)
score_result = lr.score(X_test, y_test)
print("Score:", score_result)
# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)
class_report = classification_report(y_test, y_pred)
print("Classification Report:\n", class_report)
# Display first 5 images with predictions
plt.figure(figsize=(10, 2))
for idx in range(5):
image_data = X_test.iloc[idx, :].values
prediction = y_pred[idx]
plt.subplot(1, 5, idx + 1)
plt.axis("off")
plt.imshow(image_data.reshape(28, 28), cmap=plt.cm.gray_r, interpolation="nearest")
plt.title(f"Prediction: {int(prediction)}")
plt.show()
# Identify misclassified indexes
misclassifiedIndexes = []
for index, (label, predict) in enumerate(zip(y_test, y_pred)):
if label != predict:
misclassifiedIndexes.append(index)
# Display first 5 misclassified images
plt.figure(figsize=(20, 3))
for plotIndex, badIndex in enumerate(misclassifiedIndexes[0:5]):
plt.subplot(1, 5, plotIndex + 1)
plt.axis("off")
plt.imshow(np.array(X_test.iloc[badIndex, :]).reshape(28, 28), cmap=plt.cm.gray, interpolation='nearest')
plt.title(f'Predicted: {y_pred[badIndex]}, Actual: {y_test.iloc[badIndex]}', fontsize=12)
plt.show()
Project 2: Programming of Data Science
Project Details:
Question 1a: Implement a Python program for Nearest Neighbour Classifier that can
classify an unknown data sample to one of the given classes.
Question 1b: Implement a Python program for K-Means Clustering that can group data
samples to clusters.
