Illustration Neural Network

A short illustration to run a shallow neural network using neuralnet R package pusblished in The R journal: neuralnet: Training of Neural Networks

set.seed(500)
library(MASS)
data <- Boston
any(is.na(Boston))
[1] FALSE

Split the data and Run a linear model

index <- sample(1:nrow(data),round(0.75*nrow(data)))
train <- data[index,]
test <- data[-index,]
lm.fit <- glm(medv~., data=train)
summary(lm.fit)

Call:
glm(formula = medv ~ ., data = train)

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-15.2113   -2.5587   -0.6552    1.8275   29.7110  

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  31.111702   5.459811   5.698 2.49e-08 ***
crim         -0.111372   0.033256  -3.349 0.000895 ***
zn            0.042633   0.014307   2.980 0.003077 ** 
indus         0.001483   0.067455   0.022 0.982473    
chas          1.756844   0.981087   1.791 0.074166 .  
nox         -18.184847   4.471572  -4.067 5.84e-05 ***
rm            4.760341   0.480472   9.908  < 2e-16 ***
age          -0.013439   0.014101  -0.953 0.341190    
dis          -1.553748   0.218929  -7.097 6.65e-12 ***
rad           0.288181   0.072017   4.002 7.62e-05 ***
tax          -0.013739   0.004060  -3.384 0.000791 ***
ptratio      -0.947549   0.140120  -6.762 5.38e-11 ***
black         0.009502   0.002901   3.276 0.001154 ** 
lstat        -0.388902   0.059733  -6.511 2.47e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for gaussian family taken to be 20.23806)

    Null deviance: 32463.5  on 379  degrees of freedom
Residual deviance:  7407.1  on 366  degrees of freedom
AIC: 2237

Number of Fisher Scoring iterations: 2
pr.lm <- predict(lm.fit,test)
(MSE.lm <- sum((pr.lm - test$medv)^2)/nrow(test))
[1] 31.26302

A shallow Neural network

library(neuralnet)
maxs <- apply(data, 2, max) 
mins <- apply(data, 2, min)

scaled <- as.data.frame(scale(data, center = mins, scale = maxs - mins))

train_ <- scaled[index,]
test_ <- scaled[-index,]
train_ <- scaled[index,]
test_ <- scaled[-index,]
n <- names(train_)
f <- as.formula(paste("medv ~", paste(n[!n %in% "medv"], collapse = " + ")))
lin <- function(x) x
nn <- neuralnet(f,data=train_,hidden=3,linear.output=TRUE)
plot(nn, rep="best")

Performance

pr.nn <- compute(nn,test_[,1:13])

pr.nn_ <- pr.nn$net.result * (max(data$medv) - min(data$medv)) + min(data$medv)
test.r <- (test_$medv) * (max(data$medv) - min(data$medv)) + min(data$medv)

MSE.nn <- sum((test.r - pr.nn_)^2)/nrow(test_)
(res <- c(MSE.lm,MSE.nn))
[1] 31.26302 19.61860

Should we add some units ?

nn8 <- neuralnet(f,data=train_,hidden=8,linear.output=TRUE)
plot(nn8, rep="best")

pr.nn <- compute(nn8,test_[,1:13])
pr.nn_ <- pr.nn$net.result * (max(data$medv) - min(data$medv)) + min(data$medv)
test.r <- (test_$medv) * (max(data$medv) - min(data$medv)) + min(data$medv)

MSE.nn8 <- sum((test.r - pr.nn_)^2)/nrow(test_)
res <- c(res,MSE.nn8)
res
[1] 31.26302 19.61860 13.16040

Should we add a layer ?

nn8.4 <- neuralnet(f,data=train_,hidden=c(8,4),linear.output=TRUE)
plot(nn8.4, rep="best")

pr.nn <- compute(nn8.4,test_[,1:13])
pr.nn_ <- pr.nn$net.result * (max(data$medv) - min(data$medv)) + min(data$medv)
test.r <- (test_$medv) * (max(data$medv) - min(data$medv)) + min(data$medv)

MSE.nn8 <- sum((test.r - pr.nn_)^2)/nrow(test_)
res <- c(res,MSE.nn8)
res
[1] 31.263022 19.618598 13.160401  8.079056