setwd("J:/AIiHS/Chap03")
dat= read.csv("TAHIR_rwd1.csv")
table(dat$HwyClass)
##
## Rural Interstate Rural Multi-lane Div. Rural Multi-lane Undiv.
## 356 519 158
## Rural Two-Lane Urban Interstate Urban Multi-lane Div.
## 8461 529 789
## Urban Multi-lane Undiv. Urban Two-lane
## 739 2482
## NewSegID Urban_Rur HwyClass Length AADT Lanes LaneWidth
## 1 001-01_0_2 Rural Rural Two-Lane 2.000 2920 2 12
## 2 001-01_2.53_4.349 Rural Rural Two-Lane 1.819 2920 2 12
## 3 001-01_2_2.53 Rural Rural Two-Lane 0.530 2920 2 12
## 4 001-01_4.349_6.3 Urban Urban Two-lane 1.951 4320 2 12
## 5 001-01_6.3_8.3 Urban Urban Two-lane 2.000 4320 2 12
## 6 001-01_8.3_9.297 Urban Urban Two-lane 0.997 4320 2 12
## ShWidth Curve MinPSL Total_Crash KABC_Crash
## 1 6 8 55 12 4
## 2 6 4 45 6 3
## 3 6 4 55 1 1
## 4 6 7 35 9 4
## 5 6 5 45 3 1
## 6 6 2 45 2 2
dat1= subset(dat, HwyClass=="Rural Two-Lane")
dim(dat1)
## [1] 8461 12
## 75% of the sample size
smp_size <- floor(0.75 * nrow(dat1))
## set the seed to make your partition reproducible
set.seed(123)
train_ind <- sample(seq_len(nrow(dat1)), size = smp_size)
train <- dat1[train_ind, ]
test <- dat1[-train_ind, ]
dim(train)
## [1] 6345 12
## [1] 2116 12
train_df= train[, c(4, 5, 7:11)]
test_df= test[, c(4, 5, 7:11)]
library(keras)
library(tfdatasets)
library(tensorflow)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
spec <- feature_spec(train_df, Total_Crash ~ . ) %>%
step_numeric_column(all_numeric(), normalizer_fn = scaler_standard()) %>%
fit()
spec
## -- Feature Spec ----------------------------------------------------------------
## A feature_spec with 6 steps.
## Fitted: TRUE
## -- Steps -----------------------------------------------------------------------
## The feature_spec has 1 dense features.
## StepNumericColumn: Length, AADT, LaneWidth, ShWidth, Curve, MinPSL
## -- Dense features --------------------------------------------------------------
input <- layer_input_from_dataset(train_df %>% select(-Total_Crash))
output <- input %>%
layer_dense_features(dense_features(spec)) %>%
layer_dense(units = 64, activation = "relu") %>%
layer_dense(units = 64, activation = "relu") %>%
layer_dense(units = 1)
model <- keras_model(input, output)
summary(model)
## Model: "model"
## ________________________________________________________________________________
## Layer (type) Output Shape Param # Connected to
## ================================================================================
## AADT (InputLayer) [(None,)] 0
## ________________________________________________________________________________
## Curve (InputLayer) [(None,)] 0
## ________________________________________________________________________________
## LaneWidth (InputLayer) [(None,)] 0
## ________________________________________________________________________________
## Length (InputLayer) [(None,)] 0
## ________________________________________________________________________________
## MinPSL (InputLayer) [(None,)] 0
## ________________________________________________________________________________
## ShWidth (InputLayer) [(None,)] 0
## ________________________________________________________________________________
## dense_features (DenseFeat (None, 6) 0 AADT[0][0]
## Curve[0][0]
## LaneWidth[0][0]
## Length[0][0]
## MinPSL[0][0]
## ShWidth[0][0]
## ________________________________________________________________________________
## dense_2 (Dense) (None, 64) 448 dense_features[0][0]
## ________________________________________________________________________________
## dense_1 (Dense) (None, 64) 4160 dense_2[0][0]
## ________________________________________________________________________________
## dense (Dense) (None, 1) 65 dense_1[0][0]
## ================================================================================
## Total params: 4,673
## Trainable params: 4,673
## Non-trainable params: 0
## ________________________________________________________________________________
model %>%
compile(
loss = "mse",
optimizer = optimizer_rmsprop(),
metrics = list("mean_absolute_error")
)
build_model <- function() {
input <- layer_input_from_dataset(train_df %>% select(-Total_Crash))
output <- input %>%
layer_dense_features(dense_features(spec)) %>%
layer_dense(units = 64, activation = "relu") %>%
layer_dense(units = 64, activation = "relu") %>%
layer_dense(units = 1)
model <- keras_model(input, output)
model %>%
compile(
loss = "mse",
optimizer = optimizer_rmsprop(),
metrics = list("mean_absolute_error")
)
model
}
# Display training progress by printing a single dot for each completed epoch.
print_dot_callback <- callback_lambda(
on_epoch_end = function(epoch, logs) {
if (epoch %% 80 == 0) cat("\n")
cat(".")
}
)
model <- build_model()
history <- model %>% fit(
x = train_df %>% select(-Total_Crash),
y = train_df$Total_Crash,
epochs = 200,
validation_split = 0.2,
verbose = 0,
callbacks = list(print_dot_callback)
)
##
## ................................................................................
## ................................................................................
## ........................................
library(ggplot2)
plot(history)+theme_bw(base_size=16)
## `geom_smooth()` using formula 'y ~ x'

test_predictions <- model %>% predict(test_df %>% select(-Total_Crash))
test_df$pred= test_predictions[ , 1]
sum(test_df$Total_Crash)
## [1] 6020
## [1] 6020
train_predictions <- model %>% predict(train_df %>% select(-Total_Crash))
train_df$pred= train_predictions[ , 1]
sum(train_df$Total_Crash)
## [1] 17615
sum(train_df$Total_Crash)
## [1] 17615