Genarate some data
N <- 400 # number of points per class
D <- 2 # dimensionality
K <- 2 # number of classes
X <- data.frame() # data matrix (each row = single example)
Y <- data.frame() # class labels
set.seed(308)
for (j in (1:2)) {
r <- seq(0.05, 1, length.out = N) # radius
t <- seq((j - 1) * 4.7, j * 4.7, length.out = N) + rnorm(N,
sd = 0.3) # theta
Xtemp <- data.frame(x = r * sin(t), y = r * cos(t))
ytemp <- data.frame(matrix(j, N, 1))
X <- rbind(scale(X), Xtemp)
Y <- rbind(Y, ytemp)
}
data <- cbind(X, Y)
colnames(data) <- c(colnames(X), "label")
x_min <- min(X[, 1]) - 0.2
x_max <- max(X[, 1]) + 0.2
y_min <- min(X[, 2]) - 0.2
y_max <- max(X[, 2]) + 0.2
Visualisation
library(ggplot2)
ggplot(data) + geom_point(aes(x = x,y = y,color = as.character(label)), size = 1) +
theme_bw(base_size = 15) +
xlim(x_min, x_max) +
ylim(y_min, y_max) +
coord_fixed(ratio = 0.8) +
theme(axis.ticks=element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text=element_blank(),
axis.title=element_blank(),
legend.position = "none")
Split the data
indexes <- sample(1:800, 600)
train_data <- data[indexes, ]
test_data <- data[-indexes, ]
trainX <- train_data[, c(1, 2)]
trainY <- train_data[, 3]
testX <- test_data[, c(1, 2)]
testY <- test_data[, 3]
trainY <- ifelse(trainY == 1, 0, 1)
testY <- ifelse(testY == 1, 0, 1)
Neural Network as logistic model
data.glm <- data.frame(Y=trainY,X1=trainX[,1],X2=trainX[,2])
NN1 <- glm(Y~X1+X2,data=data.glm)
Decision boundary
step <- 0.01
x_min <- min(trainX[, 1]) - 0.2
x_max <- max(trainX[, 1]) + 0.2
y_min <- min(trainX[, 2]) - 0.2
y_max <- max(trainX[, 2]) + 0.2
grid <- as.matrix(expand.grid(seq(x_min, x_max, by = step), seq(y_min,
y_max, by = step)))
data.grid <- data.frame(X1=grid[,1],X2=grid[,2])
Z <- predict(NN1,newdata=data.grid)
Z <- ifelse(Z <0.5, 1, 2)
g1 <- ggplot() +
geom_tile(aes(x = grid[, 1], y = grid[, 2], fill = as.character(Z)), alpha = 0.3, show.legend = F)+
geom_point(data = train_data, aes(x = x, y = y, color = as.character(trainY)),size = 1)+
theme_bw(base_size = 15) + coord_fixed(ratio = 0.8) +
theme(axis.ticks = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.text = element_blank(),
axis.title = element_blank(), legend.position = "none")
g1
Should we add a layer ?
library(neuralnet)
nn1 <- neuralnet(Y~X1+X2,data=data.glm,hidden=4,linear.output=FALSE)
plot(nn1, rep="best")
pr.nn <- compute(nn1,data.grid)
Still linear boundary ?
Z <- pr.nn$net.result
Z.NN1 <- ifelse(Z <0.5, 1, 2)
g1 <- ggplot() +
geom_tile(aes(x = grid[, 1], y = grid[, 2], fill = as.character(Z.NN1)), alpha = 0.3, show.legend = F)+
geom_point(data = train_data, aes(x = x, y = y, color = as.character(trainY)),size = 1)+
theme_bw(base_size = 15) + coord_fixed(ratio = 0.8) +
theme(axis.ticks = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.text = element_blank(),
axis.title = element_blank(), legend.position = "none")
g1
Should we add a second layer ?
nn2 <- neuralnet(Y~X1+X2,data=data.glm,hidden=c(4,2),linear.output=FALSE)
plot(nn2, rep="best")
pr.nn2 <- compute(nn2,data.grid)
Z2 <- pr.nn2$net.result
Z.NN2 <- ifelse(Z2 <0.5, 1, 2)
g1 <- ggplot() +
geom_tile(aes(x = grid[, 1], y = grid[, 2], fill = as.character(Z.NN2)), alpha = 0.3, show.legend = F)+
geom_point(data = train_data, aes(x = x, y = y, color = as.character(trainY)),size = 1)+
theme_bw(base_size = 15) + coord_fixed(ratio = 0.8) +
theme(axis.ticks = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.text = element_blank(),
axis.title = element_blank(), legend.position = "none")
g1