Page 1

Row 1

Raw Data Table

Overview of the variables

Row 2

Correlation Heat Map

Variables most strongly associated with DeadDist

Page 2

Prediction vs Actual

Evaluation of linear model performance

Prediction vs Actual

Evaluation of lasso model performance

Page 3

Row 1

Accuracy Comparison Across Models

Similar performances with minor differences in fit

Row 2

Linear VIP Plot

Top predictors in the model

Lasso VIP Plot

Top predictors in the model

Page 4

Linear Regression Check

Model captures general trend but shows heteroscedasticity

---
title: "Brood Tree Prediction"
output: 
  flexdashboard::flex_dashboard:
    theme:
      bootswatch: solar
      version: 4
    logo: C:/Users/aland/OneDrive/Desktop/HGEN612/Projects/Assignment_2/tree_image.jpg
    orientation: rows
    vertical_layout: scroll
    source_code: embed
---


```{r setup, include=FALSE}
library(flexdashboard)
library(tidymodels)
library(vip)
library(tidyverse)
library(plotly)
library(readxl)
library(broom)
library(car)
library(ggfortify)
library(GGally)
library(performance)
library(corrr)
library(caret)
library(bslib)
library(cowplot)
library(reshape2)
library(htmltools)

```


```{r Data, include= FALSE}
trees <- read_excel("C:/Users/aland/OneDrive/Desktop/HGEN612/hgen-612/data/Data_1993.xlsx", sheet = 1)

trees %>% 
  as_tibble()

trees_cor <- trees %>% 
  as_tibble() %>% 
  select(where(is.numeric)) %>% 
  correlate() 

trees_cor %>% 
  rearrange() %>% 
  fashion() %>% 
  knitr::kable()

```


```{r Recipe, include= FALSE}
tree_rec <- trees %>% 
  recipe(DeadDist ~ TreeDiam + Infest_Serv1 +  Neigh_1 + BA_20th) %>%   step_sqrt(all_outcomes()) %>% 
  step_corr(all_predictors())

tree_rec %>% 
  prep() %>% 
  bake(new_data = NULL)

```


```{r Model, include=FALSE}
lm_mod <- 
  linear_reg() %>% 
  set_engine("lm")

```


```{r Linear Workflow, include=FALSE}
tree_wflow <- 
  workflow() %>% 
  add_model(lm_mod) %>% 
  add_recipe(tree_rec)

tree_fit <-
  fit(tree_wflow , data = trees)

tree_fit %>% 
  extract_fit_parsnip() %>% 
  tidy()

```


```{r, include=FALSE}
tree_fit %>% 
  extract_fit_parsnip() %>% 
  glance()

```


```{r, include=FALSE}
tree_fit %>% 
  extract_preprocessor()

```


```{r, include=FALSE}
tree_fit %>% 
  extract_spec_parsnip()

```


```{r, include=FALSE}
trees_lasso <- trees %>%
  mutate(DeadDist_sqrt = sqrt(DeadDist))

tree_split <- initial_split(trees_lasso)
tree_train <- training(tree_split)
tree_test <- testing(tree_split)

tree_boot <- bootstraps(tree_train)

lambda_grid <- grid_regular(penalty(), levels = 50)

lasso_rec <- tree_train %>% 
  recipe(DeadDist ~ TreeDiam + Infest_Serv1 +  Neigh_1 + BA_20th) %>%
  step_corr(all_predictors())

lasso_rec %>% 
  prep() %>% 
  bake(new_data = NULL)

lasso_mod <-
  linear_reg(mixture = 1, penalty = tune()) %>% 
  set_engine("glmnet")

```


```{r Lasso Workflow, include=FALSE}
tree_lasso_wflow <- 
  workflow() %>% 
  add_model(lasso_mod) %>% 
  add_recipe(lasso_rec)

set.seed(1234)
lasso_grid <- tune_grid(tree_lasso_wflow,
  resamples = tree_boot,
  grid = lambda_grid)

```


```{r lowest RMSE, include=FALSE}
lowest_rmse <- lasso_grid %>%
  select_best()

# update model with lowest rmse
final_lasso <-
  finalize_workflow(tree_lasso_wflow,
  lowest_rmse)


final_lasso %>% 
  fit(tree_train) %>%
  extract_fit_parsnip() %>% 
  tidy()

```


Page 1
=====================================================

Row 1
-----

### **Raw Data Table**
#### *Overview of the variables*

```{r Data Plot}
rmarkdown::paged_table(trees)

```


Row 2
-------------

### **Correlation Heat Map**
#### *Variables most strongly associated with DeadDist*

```{r Correlation Plot, fig.height= 10, fig.width= 15}
(trees %>%
  select(where(is.numeric)) %>%
  correlate() %>%
  rearrange() %>%
  rplot(shape = 15,
        colors = c("#B58900", "#859900")) +
  theme(axis.text.x = element_text(angle = 45,
                                   hjust = 1,
                                   size = 10),
        axis.text.y = element_text(size = 10),
        plot.margin = margin(30, 15, 30, 15),
        plot.background = element_rect(fill = "#eee8d5", color = NA),
        panel.background = element_rect(fill = "#fdf6e9", color = NA))) %>% 
  ggplotly()

```


Page 2
=====================================================

### **Prediction vs Actual **
#### *Evaluation of linear model performance*

```{r Linear Evaluation, fig.height= 10, fig.width= 15}
(tree_fit %>% 
  augment(new_data = trees) %>% 
  ggplot(aes(x = .pred, y = DeadDist)) +
  geom_point(alpha = 0.7,
             colour = "#859900") +
  geom_abline(slope = 1, intercept = 0, color = "#cb4b16", linetype = "dashed") +
  theme(plot.background = element_rect(fill = "#eee8d5", color = NA),
        panel.background = element_rect(fill = "#fdf6e9", color = NA),
        legend.position = "none") +
  labs(x = "Predicted Squared DeadDist",
       y = "Actual Squared DeadDist")) %>% 
  ggplotly()

```


### **Prediction vs Actual **
#### *Evaluation of lasso model performance*

```{r Lasso Evaluation, fig.height= 10, fig.width= 15}
lasso_fit <-
  final_lasso %>% 
  fit(data = trees)

(predict(lasso_fit, new_data =trees) %>%
     bind_cols(trees) %>% 
    ggplot(aes(x = .pred, y = DeadDist)) +
    geom_point(alpha = 0.7,
              colour = "#B58900") +
    geom_abline(slope = 1, intercept = 0, color = "#2aa198", linetype = "dashed") +
    theme(plot.background = element_rect(fill = "#eee8d5", color = NA),
          panel.background = element_rect(fill = "#fdf6e9", color = NA),
          strip.background = element_rect(fill = "#fdf6e9", color = NA),
          legend.position = "none") +
    labs(x = "Predicted Squared DeadDist",
         y = "Actual Squared DeadDist")) %>%
  ggplotly()

```


Page 3
=====================================================


```{r Comparison Code, include=FALSE}
linear_R2 <- tree_fit %>% 
  augment(new_data = trees) %>%   # Original scale
  mutate(.pred_orig = (.pred)^2) %>%
  metrics(truth = DeadDist, estimate = .pred_orig) %>%
  filter(.metric == "rsq")

linear_rmse <- tree_fit %>%  # Original scale
  augment(new_data = trees) %>%
  mutate(.pred_orig = (.pred)^2) %>%
  metrics(truth = DeadDist, estimate = .pred_orig) %>%
  filter(.metric == "rmse")

lasso_R2 <- lasso_fit %>%
  augment(new_data = trees) %>% 
  metrics(truth = DeadDist, estimate = .pred) %>%
  filter(.metric == "rsq")

lasso_rmse <- lasso_grid %>%
  show_best(metric = "rmse", n = 1) %>%
  pull(mean)
```

Row 1 {data-height=550}
-------------

### **Accuracy Comparison Across Models**
#### *Similar performances with minor differences in fit*

```{r Comparing accuracies, fig.width = 15}
comparison_data <- data.frame(Model = rep(c("Linear", "Regression"), each = 2),
                              Metric = rep(c("R2", "RMSE"), times = 2),
                              Value = c(linear_R2$.estimate,
                                        linear_rmse$.estimate,
                                        lasso_R2$.estimate,
                                        lasso_rmse))

comparison_data %>% 
  ggplot(aes(x = Model, 
             y = Value,
             fill = Model)) +
  geom_bar(stat = "identity",
           position = "dodge") +
  scale_fill_manual(values = c("#859900",
                              "#B58900")) +
  facet_wrap(~Metric,
             scales = "free_y") +
  ggthemes::theme_wsj() +
  theme(plot.background = element_rect(fill = "#eee8d5", color = NA),
        panel.background = element_rect(fill = "#eee8d5", color = NA),
        strip.background = element_rect(fill = "#fdf6e9", color = NA),
        legend.position = "none")

```


Row 2
-------------

### **Linear VIP Plot**
#### *Top predictors in the model*

```{r Linear Predictors plot, fig.width = 7}
tree_fit %>% 
  extract_fit_parsnip() %>%
  vip::vip() +
  geom_col(aes(fill = Variable)) +
  scale_fill_manual(values = c("#859900",
                               "#B58900",
                               "#cb4b16",
                               "#2aa198")) +
  ggthemes::theme_wsj() +
  theme(plot.background = element_rect(fill = "#eee8d5", color = NA),
        panel.background = element_rect(fill = "#fdf6e9", color = NA),
        legend.position = "none")

```


### **Lasso VIP Plot**
#### *Top predictors in the model*

```{r Lasso Predictors plot, fig.width = 7}
final_lasso %>% 
  fit(tree_train) %>% 
  extract_fit_parsnip() %>%
  vip::vip() +
  geom_col(aes(fill = Variable)) +
  scale_fill_manual(values = c("#859900",
                               "#B58900",
                               "#cb4b16",
                               "#2aa198")) +
  ggthemes::theme_wsj() +
  theme(plot.background = element_rect(fill = "#eee8d5", color = NA),
        panel.background = element_rect(fill = "#fdf6e9", color = NA),
        legend.position = "none")

```


Page 4
=====================================================

### **Linear Regression Check**
#### *Model captures general trend but shows heteroscedasticity*

```{r Linear Model Check, fig.height= 10, fig.width= 15}
tree_fit %>% 
  extract_fit_parsnip() %>% 
  check_model(theme = ggthemes::theme_wsj(),
              colors = c("#859900","#B58900","#cb4b16"))

```