Contents

\(\newcommand{\vect}[1]{\boldsymbol{#1}}\) \(\newcommand{\transp}{^{\text{T}}}\) \(\newcommand{\mat}[1]{\boldsymbol{\mathcal{#1}}}\) \(\newcommand{\sign}{\text{sign}}\)

In this practical we will carry out some basic EDA and analysis of some popular ML Datasets that will be used in the course.

1 Fashion MNIST

1.1 Load the data from Keras package

  • Install Keras R package
install.packages("keras")
library(keras)
fashion <- dataset_fashion_mnist()
class(fashion$train)
[1] "list"
names(fashion$train)
[1] "x" "y"
class(fashion$train$x)
[1] "array"
class(fashion$train$y)
[1] "array"
dim(fashion$train$x)
[1] 60000    28    28

1.2 Plot image

  • Here is the first image of the training set
rotate <- function(x) t(apply(x, 2, rev))
# Function to plot image from a matrix x
plot_image <- function(x, title = "", title.color = "black") {
  image(rotate(x), axes = FALSE,
        col = grey(seq(0, 1, length = 255)),
        main = list(title, col = title.color))
}
plot_image(fashion$train$x[1,,])

- Is it a shoe ?

Class labels are stored in the \(y\) outcomes but coded from 0 to 9

clothes.labels <-c( "T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
                    "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot")
clothes.labels[as.numeric(fashion$train$y[1])+1]
[1] "Ankle boot"
plot_image(fashion$train$x[1,,],clothes.labels[as.numeric(fashion$train$y[1])+1])

1.3 Task 1: Present the second image of the training set in the same way as done for the first.

1.3.1 Lets see if the data is balanced

table(fashion$train$y)

   0    1    2    3    4    5    6    7    8    9 
6000 6000 6000 6000 6000 6000 6000 6000 6000 6000 

1.4 Task 2: Do the same for the test set labels.


   0    1    2    3    4    5    6    7    8    9 
1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 

2 MNIST DATA

2.1 Is it the same for MNIST?

  • load the data
MNIST <- dataset_mnist()
dim(MNIST$train$x)
[1] 60000    28    28
dim(MNIST$train$y)
[1] 60000
dim(MNIST$test$x)
[1] 10000    28    28
dim(MNIST$test$y)
[1] 10000
  • Scale to [0,1] values
MNIST$train$x <- MNIST$train$x/255
MNIST$test$x <- MNIST$test$x/255
table(MNIST$train$y)

   0    1    2    3    4    5    6    7    8    9 
5923 6742 5958 6131 5842 5421 5918 6265 5851 5949 
table(MNIST$test$y)

   0    1    2    3    4    5    6    7    8    9 
 980 1135 1032 1010  982  892  958 1028  974 1009 

It is close to balanced (but we can still call that balanced).

2.2 A rough comparison of the training and test set

  • Training set
res.mean <- NULL
for(i in 0:9){
res.mean <- c(res.mean,mean(MNIST$train$x[which(MNIST$train$y==i),,]))  
}
res.mean.train <- res.mean
res.mean.train
 [1] 0.17339933 0.07599864 0.14897513 0.14153014 0.12136559 0.12874939
 [7] 0.13730178 0.11452770 0.15015598 0.12258994
  • Test set
res.mean <- NULL
for(i in 0:9){
res.mean <- c(res.mean,mean(MNIST$test$x[which(MNIST$test$y==i),,]))  
}
res.mean.test <- res.mean
res.mean.test
 [1] 0.1723103 0.0767375 0.1501850 0.1433053 0.1226675 0.1320539 0.1435761
 [8] 0.1149012 0.1531272 0.1252664
library(ggplot2)
data.res <- data.frame(meanPixel=c(res.mean.train,res.mean.test),set=rep(c("train","test"),each=10),digit=rep(0:9,2))
data.res$digit<-as.factor(data.res$digit)
ggplot(data=data.res,aes(x=digit,y=meanPixel,fill=set))+
  geom_bar(stat="identity", position=position_dodge())

2.3 Task 3: Do the same for Fashion MNIST

  • Scale to [0,1] values
 [1] 0.3256078 0.2229053 0.3767010 0.2588977 0.3853255 0.1367355 0.3317848
 [8] 0.1676944 0.3535575 0.3011965
  • Test set
 [1] 0.3279359 0.2234565 0.3739321 0.2603726 0.3911572 0.1363033 0.3327781
 [8] 0.1687051 0.3534861 0.3003660

2.4 Some linear binary classification

Start with digit ‘3’ being ‘positive’ and digit ‘8’ being negative

postiveTrain <- MNIST$train$x[which(MNIST$train$y==3),,]
dim(postiveTrain)[1]
[1] 6131
negativeTrain <- MNIST$train$x[which(MNIST$train$y==8),,]
dim(negativeTrain)[1]
[1] 5851
  • making a vector out of an image
vec.image1 <- c(postiveTrain[1,,])
vec.image1[1:20]
 [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
length(vec.image1)
[1] 784
  • A data matrix of positive samples
dim(postiveTrain)
[1] 6131   28   28
matPosTrain <- matrix(NA,nrow=dim(postiveTrain)[1],ncol=dim(postiveTrain)[2]*dim(postiveTrain)[3])
for (i in 1:dim(postiveTrain)[1]){
  matPosTrain[i,] <- c(postiveTrain[i,,])
}
dim(matPosTrain)
[1] 6131  784
image(rotate(matPosTrain))

  • A data matrix of negative samples
dim(negativeTrain)
[1] 5851   28   28
matNegTrain <- matrix(NA,nrow=dim(negativeTrain)[1],ncol=dim(negativeTrain)[2]*dim(negativeTrain)[3])
for (i in 1:dim(negativeTrain)[1]){
  matNegTrain[i,] <- c(negativeTrain[i,,])
}
dim(matNegTrain)
[1] 5851  784

2.5 The Data Matrix

A <- rbind(matPosTrain,matNegTrain)
dim(A)
[1] 11982   784

We add a column of one for the bais (intercept)

A <- cbind(rep(1,dim(A)[1]),A)
dim(A)
[1] 11982   785

We now minimize

\[||y-A\beta||^2\] with \[\hat{\beta} = A^\dagger y\]

\[\beta=(A^TA)^{-1}A^Ty\] \[A^\dagger=(A^TA)^{-1}A^T\]

library(MASS)
y <- matrix(rep(c(1,-1),times=c(dim(matPosTrain)[1],dim(matNegTrain)[1])),ncol=1)
A.inv <- ginv(A)
beta <- A.inv%*%y
beta[1:20]
 [1]  2.987433e-01  1.649403e-10 -2.743291e-11  7.066158e-11 -6.348332e-11
 [6] -5.295066e-11  1.643006e-10 -2.041703e-11  2.634341e-10  5.118827e-10
[11]  5.996431e-10 -1.034161e-10 -5.919315e-11  2.355582e-11  4.622277e-11
[16]  6.345416e-11 -2.690877e-11 -3.176165e-12  1.572036e-12 -5.945032e-11
plot(beta,xlab="Index",ylab="value",type="h")

2.6 Create our classifier in one code line

classsify <- function(x,beta) ifelse(sum(c(1,x)*beta)>0,1,-1)
classsify(postiveTrain[1,,],beta)
[1] 1
classsify(negativeTrain[1,,],beta)
[1] -1

2.7 Evaluate on the test set

positiveTest <- MNIST$test$x[which(MNIST$test$y==3),,]
(nPosTest <- dim(positiveTest)[1])
[1] 1010
negativeTest <- MNIST$test$x[which(MNIST$test$y==8),,]
(nNegTest <- dim(negativeTest)[1])
[1] 974
  • True Positive
res.pos <- NULL
for(i in 1:dim(positiveTest)[1]){
  res.pos <- c(res.pos,classsify(positiveTest[i,,],beta))
}
table(res.pos)
res.pos
 -1   1 
 42 968 
truePositives <- table(res.pos)[2]
truePositives
  1 
968 

2.7.1 True Negative

res.neg <- NULL
for(i in 1:dim(negativeTest)[1]){
  res.neg <- c(res.neg,classsify(negativeTest[i,,],beta))
}
table(res.neg)
res.neg
 -1   1 
934  40 
trueNegatives <- table(res.neg)[1]
trueNegatives
 -1 
934 

2.8 Task 4: What is the accuracy? What is the precision and recall? What is the \(F_1\) score ?

Reminder:

\[\text{Precision} = \frac{\big|\text{true positive}\big|}{\big|\text{true positive}\big| + \big|\text{false positive}\big|}, \qquad \text{Recall} = \frac{\big|\text{true positive}\big|}{\big|\text{true positive}\big| + \big|\text{false negative}\big|}.\]

-1 
40 
 1 
42 

2.8.1 Precision

        1 
0.9603175 

2.8.2 Recall

        1 
0.9584158 

2.8.3 Harmonic mean

[1] 0.9593657

2.9 Task 5: Repeat the above to make a classifier that distingiushes between the 0 digit and 3 digit

  • Task5 solution

  • Accuracy, precision, recall, and F1 score will be very high

3 CIFAR10

3.1 Load the data

CIFAR10 <- dataset_cifar10()
x_train <- CIFAR10$train$x/255
x_test  <- CIFAR10$test$x/255
y_train <- keras::to_categorical(CIFAR10$train$y, num_classes = 10)
y_test  <- keras::to_categorical(CIFAR10$test$y,  num_classes = 10)
label_name = c("flyer", "car", "bird", "cat", "deer", "dog", "frog ", "horse", "ship", "truck")
label_name
 [1] "flyer" "car"   "bird"  "cat"   "deer"  "dog"   "frog " "horse" "ship" 
[10] "truck"
dim(x_train)
[1] 50000    32    32     3
dim(x_test)
[1] 10000    32    32     3
table(CIFAR10$train$y)

   0    1    2    3    4    5    6    7    8    9 
5000 5000 5000 5000 5000 5000 5000 5000 5000 5000 

It is a 4-tensor

3.2 Display the first image

library(EBImage)

Attaching package: 'EBImage'
The following object is masked _by_ '.GlobalEnv':

    rotate
pictures = c(9802, 5, 7, 10, 4, 28,1, 8, 9, 2)

fig_img  = list()
for (i in 1:10 ) {
  fig_mat  = CIFAR10$train$x[pictures[i], , , ]
  fig_img[[i]]  = normalize(Image(transpose(fig_mat), dim=c(32,32,3), colormode='Color'))
}
fig_img_comb = combine(fig_img[1:10])
fig_img_obj = tile(fig_img_comb,5)
plot(fig_img_obj, all=T)

## First image from CIFAR10

CIFAR10$train$y[1]
[1] 6
label_name[CIFAR10$train$y[1]+1]
[1] "frog "
plot(normalize(Image(transpose(x_train[1,,,]), dim=c(32,32,3), colormode='Color')))

3.3 Image for each channel

temp <- x_train[1,,,]/255
image(rotate(temp[,,1]),axes=FALSE,col = rgb((0:10)/10,0,0))

image(rotate(temp[,,2]),axes=FALSE,col = rgb(0,(0:10)/10,0))

image(rotate(temp[,,3]),axes=FALSE,col = rgb(0,0,(0:10)/10))

3.4 Select the first 5 frogs

ind.frog <- which(CIFAR10$train$y==6)
length(ind.frog)
[1] 5000

3.5 Show the first 5 frog

for (i in ind.frog[1:5]){
 plot(normalize(Image(transpose(x_train[i,,,]), dim=c(32,32,3), colormode='Color')))
  text(20,20,label_name[CIFAR10$train$y[i]+1],col="red",lty=2,lwd=10,cex=5)
  }

3.6 Task 6: Create an animation that goes through the first 100 images and presents the label of each image as red text.