library(keras)
dataset <- dataset_boston_housing()
c(c(train_data,train_targets),c(test_data,test_targets)) %<-% dataset
str(train_data)
## num [1:404, 1:13] 1.2325 0.0218 4.8982 0.0396 3.6931 ...
str(train_targets) # median hojse price in $k
## num [1:404(1d)] 15.2 42.3 50 21.1 17.7 18.5 11.3 15.6 15.6 14.4 ...
#first normalize the data
mean <- apply(train_data,2, mean)
std <- apply(train_data,2,sd)
train_data <- scale(train_data, center = mean, scale=std)
test_data <- scale(test_data, center = mean, scale=std)
build_model <- function(){
model<-keras_model_sequential() %>%
layer_dense (units=64, activation="relu", input_shape = dim(train_data)[[2]]) %>% # 13 predictors
layer_dense (units=64, activation="relu") %>%
layer_dense(units=1)
model %>% compile (
optimizer="rmsprop",
loss="mse",
metric=c("mae")
)
}
K-hold validation is used since the number of records is small
k <- 4
indices <- sample(1:nrow(train_data))
folds <- cut(indices, breaks = k, labels = FALSE)
num_epochs <- 100
all_scores <- c()
for (i in 1:k) {
cat("processing fold #", i, "\n")
val_indices <- which(folds == i, arr.ind = TRUE)
val_data <- train_data[val_indices,]
val_targets <- train_targets[val_indices]
partial_train_data <- train_data[-val_indices,]
partial_train_targets <- train_targets[-val_indices]
model <- build_model()
model %>% fit(partial_train_data, partial_train_targets,
epochs = num_epochs, batch_size = 1)
results <- model %>% evaluate(val_data, val_targets)
all_scores <- c(all_scores, results$mean_absolute_error)
}
## processing fold # 1
## processing fold # 2
## processing fold # 3
## processing fold # 4
##saving the validation logs at each fold
num_epochs <- 500
all_mae_histories <- NULL
for (i in 1:k) {
cat("processing fold #", i, "\n")
val_indices <- which(folds == i, arr.ind = TRUE)
val_data <- train_data[val_indices,]
val_targets <- train_targets[val_indices]
partial_train_data <- train_data[-val_indices,]
partial_train_targets <- train_targets[-val_indices]
model <- build_model()
history <- model %>% fit(
partial_train_data, partial_train_targets,
validation_data = list(val_data, val_targets),
epochs = num_epochs, batch_size = 1, verbose = 0
)
mae_history <- history$metrics$val_mean_absolute_error
all_mae_histories <- rbind(all_mae_histories, mae_history)
}
## processing fold # 1
## processing fold # 2
## processing fold # 3
## processing fold # 4
average_mae_history <- data.frame(
epoch = seq(1:ncol(all_mae_histories)),
validation_mae = apply(all_mae_histories, 2, mean)
)
#plotting validation scores
library(ggplot2)
ggplot(average_mae_history, aes(x=epoch, y=validation_mae)) + geom_line()
ggplot(average_mae_history, aes(x=epoch, y=validation_mae)) + geom_smooth()
## `geom_smooth()` using method = 'loess'
model <- build_model()
model %>% fit(train_data, train_targets,
epochs = 80, batch_size = 16, verbose = 0)
result <- model %>% evaluate(test_data, test_targets)