---
title: "Pine Distance from Dead Tree Models"
output:
flexdashboard::flex_dashboard:
orientation: rows
vertical_layout: scroll
theme: cosmo
source_code: embed
---
```{r setup, include=FALSE}
#libraries and data import
library(flexdashboard)
library(tidymodels)
library(dplyr)
library(ggplot2)
library(readxl)
library(knitr)
library(vip)
library(plotly)
library(DT)
theme_set(theme_minimal(base_size = 16))
pine_tbl <- read_excel("Data_1993 (2).xlsx")
```
```{r model_creation, include=FALSE}
set.seed(123)
# training/testing split
pine_split <- initial_split(pine_tbl, prop = 0.8)
pine_train <- training(pine_split)
pine_test <- testing(pine_split)
# your exact recipe, now using training data
pine_rec <- pine_train %>%
recipe(DeadDist ~ TreeDiam + Infest_Serv1 + SDI_20th + BA_20th) %>%
step_sqrt(all_outcomes(), skip = TRUE) %>%
step_corr(all_predictors())
# preview feature-engineered training data
baked_train <- pine_rec %>%
prep() %>%
bake(new_data = NULL)
# MODEL 1: your exact linear model
lm_mod <-
linear_reg() %>%
set_engine("lm")
lm_wflow <-
workflow() %>%
add_model(lm_mod) %>%
add_recipe(pine_rec)
lm_fit <-
lm_wflow %>%
fit(data = pine_train)
# MODEL 2: random forest using same recipe
rf_mod <-
rand_forest(trees = 500, mode = "regression") %>%
set_engine("ranger", importance = "permutation")
rf_wflow <-
workflow() %>%
add_model(rf_mod) %>%
add_recipe(pine_rec)
rf_fit <-
rf_wflow %>%
fit(data = pine_train)
# predictions on test set
lm_preds <- predict(lm_fit, new_data = pine_test) %>%
bind_cols(pine_test %>% select(DeadDist)) %>%
mutate(.pred = .pred^2)
rf_preds <- predict(rf_fit, new_data = pine_test) %>%
bind_cols(pine_test %>% select(DeadDist)) %>%
mutate(.pred = .pred^2)
# evaluation metrics
lm_metrics <- lm_preds %>%
metrics(truth = DeadDist, estimate = .pred) %>%
mutate(Model = "Linear Regression")
rf_metrics <- rf_preds %>%
metrics(truth = DeadDist, estimate = .pred) %>%
mutate(Model = "Random Forest")
all_metrics <- bind_rows(lm_metrics, rf_metrics)
# model result tables
lm_tidy <- lm_fit %>%
extract_fit_parsnip() %>%
tidy()
lm_glance <- lm_fit %>%
extract_fit_parsnip() %>%
glance()
predictor_tbl <- tibble(
Predictor = c("TreeDiam", "Infest_Serv1", "SDI_20th", "BA_20th"),
Used_in_Model_1 = "Yes",
Used_in_Model_2 = "Yes"
)
```
Row {data-height=400}
-----------------------------------------------------------------------
### Data Preview
``` {r inputs}
datatable(
head(pine_tbl, 10),
options = list(pageLength = 5, scrollX = TRUE)
)
```
### Outcome Distribution
```{r predicted_dead_distance}
ggplot(pine_tbl, aes(x = DeadDist)) +
geom_histogram(bins = 20, fill = "steelblue", color = "black") +
labs(
title = "Distribution of DeadDist",
x = "DeadDist",
y = "Count"
) +
theme(
plot.title = element_text(size = 16, face = "bold"),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12)
)
```
Row
-----------------------------------------------------------------------
### Predictors Used in Both Models
```{r predictors}
kable(predictor_tbl, caption = "Predictors used in both models")
```
### Feature-Engineered Data Preview
```{r feature}
kable(
head(baked_train, 10),
caption = "Preview of training data after recipe steps"
)
```
Row
-----------------------------------------------------------------------
### Model 1: Linear Regression Results
```{r model 1 results}
kable(lm_tidy, digits = 3, caption = "Linear regression coefficients")
```
### Model 2: Random Forest Variable Importance
```{r model 2 results}
rf_fit %>%
extract_fit_parsnip() %>%
vip(num_features = 10)
```
Row
-----------------------------------------------------------------------
### Model Evaluation on Test Data
```{r model eval on test}
kable(all_metrics, digits = 3, caption = "Test-set evaluation metrics")
```
### Linear Model Summary Statistics
```{r linear model stats}
kable(lm_glance, digits = 3, caption = "Linear model summary")
```
Row {data-height=500}
-----------------------------------------------------------------------
### Observed vs Predicted: Linear Regression
```{r observed vs predicted linear}
ggplot(lm_preds, aes(x = .pred, y = DeadDist)) +
geom_point(alpha = 0.5, size = 1.8) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", linewidth = 1) +
labs(
title = "Linear Regression: Observed vs Predicted",
x = "Predicted DeadDist",
y = "Observed DeadDist"
) +
theme(
plot.title = element_text(size = 16, face = "bold"),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12)
)
```
### Observed vs Predicted: Random Forest
```{r observed vs pred random forest}
ggplot(rf_preds, aes(x = .pred, y = DeadDist)) +
geom_point(alpha = 0.5, size = 1.8) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", linewidth = 1) +
labs(
title = "Random Forest: Observed vs Predicted",
x = "Predicted DeadDist",
y = "Observed DeadDist"
) +
theme(
plot.title = element_text(size = 16, face = "bold"),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12)
)
```