\(\newcommand{\vect}[1]{\boldsymbol{#1}}\) \(\newcommand{\transp}{^{\text{T}}}\) \(\newcommand{\mat}[1]{\boldsymbol{\mathcal{#1}}}\) \(\newcommand{\sign}{\text{sign}}\)
In this practical we deal with a reduced version of the CIFAR10 dataset and train convolutional neural networks. We use a small subset of the dataset to be able to compute things in sensbile time within the tutorial.
The main focus is to see how “to assemble” different convolutional layers.
library(keras)
CIFAR10 <- dataset_cifar10()
x_train <- CIFAR10$train$x/255
x_test <- CIFAR10$test$x/255
y_train <- keras::to_categorical(CIFAR10$train$y, num_classes = 10)
y_test <- keras::to_categorical(CIFAR10$test$y, num_classes = 10)
label_name = c("flyer", "car", "bird", "cat", "deer", "dog", "frog ", "horse", "ship", "truck")
label_name
[1] "flyer" "car" "bird" "cat" "deer" "dog" "frog " "horse" "ship"
[10] "truck"
dim(x_train)
[1] 50000 32 32 3
dim(x_test)
[1] 10000 32 32 3
table(CIFAR10$train$y)
0 1 2 3 4 5 6 7 8 9
5000 5000 5000 5000 5000 5000 5000 5000 5000 5000
We consider only 3 classes - not all 10
We use only “airplane”, “cat” and “truck” to make the problem “easier and quicker” for the practical
ind.select.train <- which(CIFAR10$train$y==0|CIFAR10$train$y==3|CIFAR10$train$y==9)
ind.select.test <- which(CIFAR10$test$y==0|CIFAR10$test$y==3|CIFAR10$test$y==9)
length(ind.select.train)
[1] 15000
length(ind.select.test)
[1] 3000
x_train.filter <- x_train[ind.select.train,,,]
x_test.filter <- x_test[ind.select.test,,,]
y_train.filter <- CIFAR10$train$y[ind.select.train,]
y_test.filter <- CIFAR10$test$y[ind.select.test,]
y_train.filter.cat <- y_train[ind.select.train,c(1,4,10)]
y_test.filter.cat <- y_test[ind.select.test,c(1,4,10)]
We stay with a small training data size and validation data size - also due to time constraints. For testing we could use the full test set. We will also a batch size of 40.
Here I have chosen 8000 for train and 4000 validation but I have a “good” laptop. You migh try with a smaller data size such 2000 for the train set and 1000 for the validation set.
trainLength <- 8000
validateLength <- 4000
trainRange <- 1:trainLength
validateRange <- (trainLength+1):(trainLength+validateLength)
x_train.filter.small <- x_train.filter[1:trainLength,,,]
y_train.filter.small <- y_train.filter.cat[1:trainLength,]
x_val.filter.small <- x_train.filter[(trainLength+1):(trainLength+validateLength),,,]
y_val.filter.small <- y_train.filter.cat[(trainLength+1):(trainLength+validateLength),]
img_width <- 32
img_height <- 32
model <- keras_model_sequential()
model %>%
layer_conv_2d(filter = 8, kernel_size = c(3,3), strides=c(1,1),padding="same", activation="relu",
input_shape = c(img_height, img_width, 3)) %>%
layer_batch_normalization() %>%
layer_max_pooling_2d(pool_size = c(2,2)) %>%
layer_conv_2d(filter = 4, kernel_size = c(3,3), strides=c(1,1),padding="valid", activation="relu") %>%
layer_batch_normalization() %>%
layer_max_pooling_2d(pool_size = c(2,2)) %>%
layer_flatten() %>%
layer_dense(units = 80, activation = "relu") %>%
layer_dropout(rate = 0.5) %>%
layer_dense(units = 40, activation = "relu") %>%
layer_dropout(rate = 0.5) %>%
layer_dense(units = 3, activation = "softmax")
summary(model)
Model: "sequential"
________________________________________________________________________________
Layer (type) Output Shape Param #
================================================================================
conv2d_1 (Conv2D) (None, 32, 32, 8) 224
________________________________________________________________________________
batch_normalization_1 (BatchNormali (None, 32, 32, 8) 32
________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D) (None, 16, 16, 8) 0
________________________________________________________________________________
conv2d (Conv2D) (None, 14, 14, 4) 292
________________________________________________________________________________
batch_normalization (BatchNormaliza (None, 14, 14, 4) 16
________________________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 7, 7, 4) 0
________________________________________________________________________________
flatten (Flatten) (None, 196) 0
________________________________________________________________________________
dense_2 (Dense) (None, 80) 15760
________________________________________________________________________________
dropout_1 (Dropout) (None, 80) 0
________________________________________________________________________________
dense_1 (Dense) (None, 40) 3240
________________________________________________________________________________
dropout (Dropout) (None, 40) 0
________________________________________________________________________________
dense (Dense) (None, 3) 123
================================================================================
Total params: 19,687
Trainable params: 19,663
Non-trainable params: 24
________________________________________________________________________________
devtools::install_github("andrie/deepviz")
library(deepviz)
model %>% plot_model()
Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
Using compatibility `.name_repair`.
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.
Modify the model to have \((5\times 5\)) convolutions in the first layer and no max pooling layer after the second convolutional layer. No padding for the convolutional layers
You’ll need to update the number of neurons in the dense layers.
Call this model, model2.
Model: "sequential_1"
________________________________________________________________________________
Layer (type) Output Shape Param #
================================================================================
conv2d_3 (Conv2D) (None, 28, 28, 8) 608
________________________________________________________________________________
batch_normalization_3 (BatchNormali (None, 28, 28, 8) 32
________________________________________________________________________________
max_pooling2d_2 (MaxPooling2D) (None, 14, 14, 8) 0
________________________________________________________________________________
conv2d_2 (Conv2D) (None, 12, 12, 4) 292
________________________________________________________________________________
batch_normalization_2 (BatchNormali (None, 12, 12, 4) 16
________________________________________________________________________________
flatten_1 (Flatten) (None, 576) 0
________________________________________________________________________________
dense_5 (Dense) (None, 80) 46160
________________________________________________________________________________
dropout_3 (Dropout) (None, 80) 0
________________________________________________________________________________
dense_4 (Dense) (None, 40) 3240
________________________________________________________________________________
dropout_2 (Dropout) (None, 40) 0
________________________________________________________________________________
dense_3 (Dense) (None, 3) 123
================================================================================
Total params: 50,471
Trainable params: 50,447
Non-trainable params: 24
________________________________________________________________________________
model2 %>% compile(
loss = 'categorical_crossentropy',
metrics = 'accuracy',
optimizer = optimizer_adam(lr = 0.001)
)
epochs <- 20
batch.size <- 50
start.time <- Sys.time()
history.model2 <- model2 %>% fit(
x=x_train.filter.small, y=y_train.filter.small, validation_data = list(x_val.filter.small,y_val.filter.small),
epochs = epochs, batch_size = batch.size, verbose=FALSE
)
end.time <- Sys.time()
(running.time <- end.time - start.time)
Time difference of 38.08836 secs
plot(history.model2 )
`geom_smooth()` using formula 'y ~ x'
model.acc <- model2 %>% tensorflow::evaluate(x_test.filter,y_test.filter.cat)
class.name <- c("airplane","cat","truck")
pred<- model2 %>% keras::predict_classes(x_test.filter)
pred.class <- factor(pred,labels=class.name)
true.class <- factor(y_test.filter,labels=class.name)
table(pred.class,true.class)
true.class
pred.class airplane cat truck
airplane 551 16 23
cat 251 934 132
truck 198 50 845
(ACC <- sum(diag(table(pred.class,true.class)))/3000)
[1] 0.7766667
and we’ll see in class who gets the best test accuracy (you can only check the test accuracy once).
Time difference of 1.812771 mins
plot(history.model3 )
`geom_smooth()` using formula 'y ~ x'
(model.acc <- model3 %>% tensorflow::evaluate(x_test.filter,y_test.filter.cat))
loss accuracy
0.5492241 0.8166667
class.name <- c("airplane","cat","truck")
pred<- model3 %>% keras::predict_classes(x_test.filter)
pred.class <- factor(pred,labels=class.name)
true.class <- factor(y_test.filter,labels=class.name)
table(pred.class,true.class)
true.class
pred.class airplane cat truck
airplane 934 154 240
cat 38 822 66
truck 28 24 694
(ACC <- sum(diag(table(pred.class,true.class)))/3000)
[1] 0.8166667