---
title: "Brood Tree Prediction"
output:
flexdashboard::flex_dashboard:
theme:
bootswatch: solar
version: 4
logo: C:/Users/aland/OneDrive/Desktop/HGEN612/Projects/Assignment_2/tree_image.jpg
orientation: rows
vertical_layout: scroll
source_code: embed
---
```{r setup, include=FALSE}
library(flexdashboard)
library(tidymodels)
library(vip)
library(tidyverse)
library(plotly)
library(readxl)
library(broom)
library(car)
library(ggfortify)
library(GGally)
library(performance)
library(corrr)
library(caret)
library(bslib)
library(cowplot)
library(reshape2)
library(htmltools)
```
```{r Data, include= FALSE}
trees <- read_excel("C:/Users/aland/OneDrive/Desktop/HGEN612/hgen-612/data/Data_1993.xlsx", sheet = 1)
trees %>%
as_tibble()
trees_cor <- trees %>%
as_tibble() %>%
select(where(is.numeric)) %>%
correlate()
trees_cor %>%
rearrange() %>%
fashion() %>%
knitr::kable()
```
```{r Recipe, include= FALSE}
tree_rec <- trees %>%
recipe(DeadDist ~ TreeDiam + Infest_Serv1 + Neigh_1 + BA_20th) %>% step_sqrt(all_outcomes()) %>%
step_corr(all_predictors())
tree_rec %>%
prep() %>%
bake(new_data = NULL)
```
```{r Model, include=FALSE}
lm_mod <-
linear_reg() %>%
set_engine("lm")
```
```{r Linear Workflow, include=FALSE}
tree_wflow <-
workflow() %>%
add_model(lm_mod) %>%
add_recipe(tree_rec)
tree_fit <-
fit(tree_wflow , data = trees)
tree_fit %>%
extract_fit_parsnip() %>%
tidy()
```
```{r, include=FALSE}
tree_fit %>%
extract_fit_parsnip() %>%
glance()
```
```{r, include=FALSE}
tree_fit %>%
extract_preprocessor()
```
```{r, include=FALSE}
tree_fit %>%
extract_spec_parsnip()
```
```{r, include=FALSE}
trees_lasso <- trees %>%
mutate(DeadDist_sqrt = sqrt(DeadDist))
tree_split <- initial_split(trees_lasso)
tree_train <- training(tree_split)
tree_test <- testing(tree_split)
tree_boot <- bootstraps(tree_train)
lambda_grid <- grid_regular(penalty(), levels = 50)
lasso_rec <- tree_train %>%
recipe(DeadDist ~ TreeDiam + Infest_Serv1 + Neigh_1 + BA_20th) %>%
step_corr(all_predictors())
lasso_rec %>%
prep() %>%
bake(new_data = NULL)
lasso_mod <-
linear_reg(mixture = 1, penalty = tune()) %>%
set_engine("glmnet")
```
```{r Lasso Workflow, include=FALSE}
tree_lasso_wflow <-
workflow() %>%
add_model(lasso_mod) %>%
add_recipe(lasso_rec)
set.seed(1234)
lasso_grid <- tune_grid(tree_lasso_wflow,
resamples = tree_boot,
grid = lambda_grid)
```
```{r lowest RMSE, include=FALSE}
lowest_rmse <- lasso_grid %>%
select_best()
# update model with lowest rmse
final_lasso <-
finalize_workflow(tree_lasso_wflow,
lowest_rmse)
final_lasso %>%
fit(tree_train) %>%
extract_fit_parsnip() %>%
tidy()
```
Page 1
=====================================================
Row 1
-----
### **Raw Data Table**
#### *Overview of the variables*
```{r Data Plot}
rmarkdown::paged_table(trees)
```
Row 2
-------------
### **Correlation Heat Map**
#### *Variables most strongly associated with DeadDist*
```{r Correlation Plot, fig.height= 10, fig.width= 15}
(trees %>%
select(where(is.numeric)) %>%
correlate() %>%
rearrange() %>%
rplot(shape = 15,
colors = c("#B58900", "#859900")) +
theme(axis.text.x = element_text(angle = 45,
hjust = 1,
size = 10),
axis.text.y = element_text(size = 10),
plot.margin = margin(30, 15, 30, 15),
plot.background = element_rect(fill = "#eee8d5", color = NA),
panel.background = element_rect(fill = "#fdf6e9", color = NA))) %>%
ggplotly()
```
Page 2
=====================================================
### **Prediction vs Actual **
#### *Evaluation of linear model performance*
```{r Linear Evaluation, fig.height= 10, fig.width= 15}
(tree_fit %>%
augment(new_data = trees) %>%
ggplot(aes(x = .pred, y = DeadDist)) +
geom_point(alpha = 0.7,
colour = "#859900") +
geom_abline(slope = 1, intercept = 0, color = "#cb4b16", linetype = "dashed") +
theme(plot.background = element_rect(fill = "#eee8d5", color = NA),
panel.background = element_rect(fill = "#fdf6e9", color = NA),
legend.position = "none") +
labs(x = "Predicted Squared DeadDist",
y = "Actual Squared DeadDist")) %>%
ggplotly()
```
### **Prediction vs Actual **
#### *Evaluation of lasso model performance*
```{r Lasso Evaluation, fig.height= 10, fig.width= 15}
lasso_fit <-
final_lasso %>%
fit(data = trees)
(predict(lasso_fit, new_data =trees) %>%
bind_cols(trees) %>%
ggplot(aes(x = .pred, y = DeadDist)) +
geom_point(alpha = 0.7,
colour = "#B58900") +
geom_abline(slope = 1, intercept = 0, color = "#2aa198", linetype = "dashed") +
theme(plot.background = element_rect(fill = "#eee8d5", color = NA),
panel.background = element_rect(fill = "#fdf6e9", color = NA),
strip.background = element_rect(fill = "#fdf6e9", color = NA),
legend.position = "none") +
labs(x = "Predicted Squared DeadDist",
y = "Actual Squared DeadDist")) %>%
ggplotly()
```
Page 3
=====================================================
```{r Comparison Code, include=FALSE}
linear_R2 <- tree_fit %>%
augment(new_data = trees) %>% # Original scale
mutate(.pred_orig = (.pred)^2) %>%
metrics(truth = DeadDist, estimate = .pred_orig) %>%
filter(.metric == "rsq")
linear_rmse <- tree_fit %>% # Original scale
augment(new_data = trees) %>%
mutate(.pred_orig = (.pred)^2) %>%
metrics(truth = DeadDist, estimate = .pred_orig) %>%
filter(.metric == "rmse")
lasso_R2 <- lasso_fit %>%
augment(new_data = trees) %>%
metrics(truth = DeadDist, estimate = .pred) %>%
filter(.metric == "rsq")
lasso_rmse <- lasso_grid %>%
show_best(metric = "rmse", n = 1) %>%
pull(mean)
```
Row 1 {data-height=550}
-------------
### **Accuracy Comparison Across Models**
#### *Similar performances with minor differences in fit*
```{r Comparing accuracies, fig.width = 15}
comparison_data <- data.frame(Model = rep(c("Linear", "Regression"), each = 2),
Metric = rep(c("R2", "RMSE"), times = 2),
Value = c(linear_R2$.estimate,
linear_rmse$.estimate,
lasso_R2$.estimate,
lasso_rmse))
comparison_data %>%
ggplot(aes(x = Model,
y = Value,
fill = Model)) +
geom_bar(stat = "identity",
position = "dodge") +
scale_fill_manual(values = c("#859900",
"#B58900")) +
facet_wrap(~Metric,
scales = "free_y") +
ggthemes::theme_wsj() +
theme(plot.background = element_rect(fill = "#eee8d5", color = NA),
panel.background = element_rect(fill = "#eee8d5", color = NA),
strip.background = element_rect(fill = "#fdf6e9", color = NA),
legend.position = "none")
```
Row 2
-------------
### **Linear VIP Plot**
#### *Top predictors in the model*
```{r Linear Predictors plot, fig.width = 7}
tree_fit %>%
extract_fit_parsnip() %>%
vip::vip() +
geom_col(aes(fill = Variable)) +
scale_fill_manual(values = c("#859900",
"#B58900",
"#cb4b16",
"#2aa198")) +
ggthemes::theme_wsj() +
theme(plot.background = element_rect(fill = "#eee8d5", color = NA),
panel.background = element_rect(fill = "#fdf6e9", color = NA),
legend.position = "none")
```
### **Lasso VIP Plot**
#### *Top predictors in the model*
```{r Lasso Predictors plot, fig.width = 7}
final_lasso %>%
fit(tree_train) %>%
extract_fit_parsnip() %>%
vip::vip() +
geom_col(aes(fill = Variable)) +
scale_fill_manual(values = c("#859900",
"#B58900",
"#cb4b16",
"#2aa198")) +
ggthemes::theme_wsj() +
theme(plot.background = element_rect(fill = "#eee8d5", color = NA),
panel.background = element_rect(fill = "#fdf6e9", color = NA),
legend.position = "none")
```
Page 4
=====================================================
### **Linear Regression Check**
#### *Model captures general trend but shows heteroscedasticity*
```{r Linear Model Check, fig.height= 10, fig.width= 15}
tree_fit %>%
extract_fit_parsnip() %>%
check_model(theme = ggthemes::theme_wsj(),
colors = c("#859900","#B58900","#cb4b16"))
```