---
title: "Jeffrey Pine Beatle Outbreak 1991-1996"
output:
flexdashboard::flex_dashboard:
orientation: rows
vertical_layout: fill
theme: "sandstone"
logo: "img/beetle-logo-resized.jpg"
source_code: embed
---
```{r setup, include=FALSE}
# Libraries --------------------------------------------------------------------
library(flexdashboard)
library(tidyverse)
library(readxl)
library(broom)
library(car)
library(ggfortify)
library(tidymodels)
library(performance)
library(ggplot2)
library(glmnet)
# Data -------------------------------------------------------------------------
beetle_tbl <-
read_excel("data/Data_1993.xlsx")
# Linear Regression ------------------------------------------------------------
# Create training/testing data
beetle_split <- initial_split(beetle_tbl)
beetle_train <- training(beetle_split)
beetle_test <- testing(beetle_split)
beetle_rec <- beetle_tbl %>%
recipe(DeadDist ~ TreeDiam + Infest_Serv1 + SDI_20th + BA_20th) %>%
# step_sqrt(all_outcomes()) %>%
step_corr(all_predictors())
# View feature engineered data
beetle_rec %>%
prep() %>%
bake(new_data = NULL)
# Create model
lm_mod <-
linear_reg() %>%
set_engine("lm")
# Create workflow
beetle_wflow <-
workflow() %>%
add_model(lm_mod) %>%
add_recipe(beetle_rec)
beetle_fit <-
beetle_wflow %>%
fit(data = beetle_tbl)
# Ridge Regression -------------------------------------------------------------
# Create training/testing data
beetle_split <- initial_split(beetle_tbl)
beetle_train <- training(beetle_split)
beetle_test <- testing(beetle_split)
# Use Dr. Smirnova's best lambda estimate
ridge_mod <-
linear_reg(mixture = 0, penalty = 0.1629751) %>%
set_engine("glmnet")
# Verify what we are doing
ridge_mod %>%
translate()
# Create a new recipe
beetle_rec <- beetle_train %>%
recipe(DeadDist ~ TreeDiam + Infest_Serv1 + SDI_20th + BA_20th) %>%
# step_sqrt(all_outcomes()) %>%
step_corr(all_predictors()) %>%
step_normalize(all_numeric(), -all_outcomes()) %>%
step_zv(all_numeric(), -all_outcomes())
beetle_ridge_wflow <-
workflow() %>%
add_model(ridge_mod) %>%
add_recipe(beetle_rec)
beetle_ridge_wflow
beetle_ridge_fit <-
beetle_ridge_wflow %>%
fit(data = beetle_train)
```
Data Overview {data-orientation=columns}
=====================================
Column {data-width=500}
-----------------------------------------------------------------------
### Project Description
```
* From 1991-1996, Jeffrey pine beetles (JPB) caused tree mortality throughout
the Lake Tahoe Basin during a severe drought.
* Census data were collected annually on 10,721 trees to assess patterns of
JPB-caused mortality.
* The motivation for this analysis is to predict the minimum linear distance
to the nearest brood tree (DeadDist).
* This dashboard provides a visual overview of the analysis by communicating
key features of the project.
```
### Table of Selected Data Variables
```{r}
data <- data.frame(
Groups = c("Tree Diameter", "Infestation Severity", "Nearest brood tree", "Forest density", "Beetle population pressure"),
Variables = c("TreeDiam", "Infest_Serv1", "DeadDist", "SDI_20th", "BA_20th"),
Description = c("Tree diameter/size", "Infestation severity nearest to response", "Minimum linear distance to nearest brood tree", "Stand Density Index @ 1/20th-acre neighborhood surrounding response", "Basal area total for all infested trees within 1/20th-acre neighborhood")
)
knitr::kable(data)
```
Column {data-width=500}
-----------------------------------------------------------------------
### JPB-Attacked Trees
```{r}
p.all <-ggplot(data = beetle_tbl,aes(x = Easting, y = Northing)) +
geom_point(aes(color =factor(Response), alpha =factor(Response))) +
scale_alpha_discrete(range =c(0.5, 1), guide=FALSE) +
scale_color_manual("",values=c("yellowgreen", "red"),
labels =c("Alive", "JPB-attacked")) + theme_bw() + xlab("UTM X")+ ylab("UTM Y") +
theme(legend.position =c(0,1), legend.text=element_text(size=15))
p.all
```
Models {data-orientation=columns}
=====================================
Column {data-width=500}
-------------------------------------
### Linear Regression Model Assumptions
```{r}
beetle_fit %>%
extract_fit_parsnip() %>%
check_model()
```
Column {data-width=500}
-------------------------------------
### Ridge Regression Model Tuning Parameters
```{r}
# Convert x into a matrix of predictors and
# Transform any qualitative variables into a dummy variable
fm <- as.formula("DeadDist~TreeDiam + Infest_Serv1+ SDI_20th +BA_20th")
x=model.matrix(fm, beetle_tbl)[,-1]
y = beetle_tbl$DeadDist
# Set possible values of tuning parameter lambda
grid=10^seq(10,-2,length=100)
# Fit the model
ridge.mod=glmnet(x,y,alpha=0,lambda=grid)
cv.fit <- cv.glmnet(x,y,alpha=0, lambda=grid)
plot(cv.fit)
```
Results {data-orientation=rows}
=====================================
Row
-------------------------------------
### Linear Regression Coefficient Summary
```{r}
beetle_fit %>%
extract_fit_parsnip() %>%
tidy()
```
### Linear Regression Residual Plot
```{r}
beetle_pred <-
predict(beetle_fit, beetle_test, type = "numeric") %>%
bind_cols(beetle_test %>% select(DeadDist, TreeDiam, Infest_Serv1, SDI_20th, BA_20th))
ggplot(beetle_pred, aes(x = .pred, y = DeadDist - .pred)) +
geom_point(alpha = 0.6, color = "blue") +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
labs(title = "Residual Plot",
x = "Predicted Values",
y = "Residuals (Actual - Predicted)") +
theme_minimal()
```
Row
-------------------------------------
### Ridge Regression Coefficient Summary
```{r}
beetle_ridge_fit %>%
extract_fit_parsnip() %>%
tidy()
```
### Ridge Regression Predictions
```{r}
beetle_pred <-
predict(beetle_ridge_fit, beetle_test, type = "numeric") %>%
bind_cols(beetle_test %>% select(TreeDiam, Infest_Serv1, SDI_20th, BA_20th))
knitr::kable(head(beetle_pred, 9))
```