University Projects

Project 1: PRML

Project Details: In these datasets there are around 70, 000 images which need to be separate on the basis of 9 categories. So, the target is to develop a machine learning model using linear regression model to get the maximum accuracy predicting the accurate category. Also, there are 9 labels from 0 to 9(T-shirt/top, Trouser, pullover, Dress, Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot)

Language Used: Python

Libraries used :

    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.metrics import confusion_matrix, classification_report
    import matplotlib.pyplot as plt
    import numpy as np

    Code for the Above Project

    
    @author: kkabi
    """
    
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.metrics import confusion_matrix, classification_report
    import matplotlib.pyplot as plt
    import numpy as np
    
    # File paths
    Data_MIST_Fashion_Test = r"C:\Users\kkabi\OneDrive\Desktop\DAta science UC\patterntutorial\fashion-mnist_test.csv"
    Data_MIST_Fashion_Train = r"C:\Users\kkabi\OneDrive\Desktop\DAta science UC\patterntutorial\fashion-mnist_train.csv"
    
    # Load data
    Data_MIST_Fashion_Test_df = pd.read_csv(Data_MIST_Fashion_Test)
    Data_MIST_Fashion_Train_df = pd.read_csv(Data_MIST_Fashion_Train)
    
    # Combine test and train datasets
    Test_train_combined_df_new = pd.concat([Data_MIST_Fashion_Test_df, Data_MIST_Fashion_Train_df], axis=0)
    
    # Display column names
    keys = Test_train_combined_df_new.keys()
    print("Keys:", keys)
    
    # Show first 5 labels
    labels = Test_train_combined_df_new['label'][:5]
    print("Labels:", labels)
    
    
    # Define the number of images to display
    number_images = 5
    plt.figure(figsize=(10, 2))  
    for i in range(number_images):
      # Extract the image data and label
     image_data = Test_train_combined_df_new.iloc[i, 1:785].values  
        label = Test_train_combined_df_new.iloc[i]['label']  
        
        
      image_data = image_data.reshape(28, 28) 
        plt.subplot(1, number_images, i + 1)
        plt.imshow(image_data, cmap='gray')
        plt.title(f"Label: {label}")
        plt.axis('off') 
    plt.show()
    
    # Prepare labels (y) and features (X)
    y = Test_train_combined_df_new['label']
    X = Test_train_combined_df_new.drop('label', axis=1)
    
    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0)
    
    # Train Logistic Regression model
    lr = LogisticRegression(solver="lbfgs", max_iter=100)
    lr.fit(X_train, y_train)
    
    # Make predictions
    y_pred = lr.predict(X_test)
    score_result = lr.score(X_test, y_test)
    print("Score:", score_result)
    
    # Confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix:\n", conf_matrix)
    
    class_report = classification_report(y_test, y_pred)
    print("Classification Report:\n", class_report)
    
    
    # Display first 5 images with predictions
    plt.figure(figsize=(10, 2))
    for idx in range(5):
        image_data = X_test.iloc[idx, :].values
     prediction = y_pred[idx]
        plt.subplot(1, 5, idx + 1)
        plt.axis("off")
        plt.imshow(image_data.reshape(28, 28), cmap=plt.cm.gray_r, interpolation="nearest")
        plt.title(f"Prediction: {int(prediction)}")
    plt.show()
    
    # Identify misclassified indexes
    misclassifiedIndexes = []
    for index, (label, predict) in enumerate(zip(y_test, y_pred)):
        if label != predict:
            misclassifiedIndexes.append(index)
    
    # Display first 5 misclassified images
    plt.figure(figsize=(20, 3))
    for plotIndex, badIndex in enumerate(misclassifiedIndexes[0:5]):
        plt.subplot(1, 5, plotIndex + 1)
        plt.axis("off")
        plt.imshow(np.array(X_test.iloc[badIndex, :]).reshape(28, 28), cmap=plt.cm.gray, interpolation='nearest')
        plt.title(f'Predicted: {y_pred[badIndex]}, Actual: {y_test.iloc[badIndex]}', fontsize=12)
    plt.show()
    

    Project 2: Programming of Data Science

    Project Details:

    Question 1a: Implement a Python program for Nearest Neighbour Classifier that can
    classify an unknown data sample to one of the given classes.

    Question 1b: Implement a Python program for K-Means Clustering that can group data
    samples to clusters.