---
title: "Project_2_code_check"
author: "Skyler Pearse"
date: "2026-03-18"
output:
flexdashboard::flex_dashboard:
orientation: rows
vertical_layout: fill
theme: cosmo
source_code: embed
---
```{r setup, include=FALSE}
#libraries and data import
library(flexdashboard)
library(tidymodels)
library(dplyr)
library(ggplot2)
library(readxl)
library(knitr)
library(vip)
library(plotly)
library(DT)
library(ranger)
theme_set(theme_minimal())
pine_tbl <- read_excel("Data_1993 (2).xlsx")
```
```{r model_creation, include=FALSE}
set.seed(123)
# training/testing split
pine_split <- initial_split(pine_tbl, prop = 0.8)
pine_train <- training(pine_split)
pine_test <- testing(pine_split)
# your exact recipe, now using training data
pine_rec <- pine_train %>%
recipe(DeadDist ~ TreeDiam + Infest_Serv1 + SDI_20th + BA_20th) %>%
step_sqrt(all_outcomes(), skip = TRUE) %>%
step_corr(all_predictors())
# preview feature-engineered training data
baked_train <- pine_rec %>%
prep() %>%
bake(new_data = NULL)
# MODEL 1: your exact linear model
lm_mod <-
linear_reg() %>%
set_engine("lm")
lm_wflow <-
workflow() %>%
add_model(lm_mod) %>%
add_recipe(pine_rec)
lm_fit <-
lm_wflow %>%
fit(data = pine_train)
# MODEL 2: random forest using same recipe
rf_mod <-
rand_forest(trees = 500, mode = "regression") %>%
set_engine("ranger", importance = "permutation")
rf_wflow <-
workflow() %>%
add_model(rf_mod) %>%
add_recipe(pine_rec)
rf_fit <-
rf_wflow %>%
fit(data = pine_train)
# predictions on test set
lm_preds <- predict(lm_fit, new_data = pine_test) %>%
bind_cols(pine_test %>% select(DeadDist)) %>%
mutate(.pred = .pred^2)
rf_preds <- predict(rf_fit, new_data = pine_test) %>%
bind_cols(pine_test %>% select(DeadDist)) %>%
mutate(.pred = .pred^2)
# evaluation metrics
lm_metrics <- lm_preds %>%
metrics(truth = DeadDist, estimate = .pred) %>%
mutate(Model = "Linear Regression")
rf_metrics <- rf_preds %>%
metrics(truth = DeadDist, estimate = .pred) %>%
mutate(Model = "Random Forest")
all_metrics <- bind_rows(lm_metrics, rf_metrics)
# model result tables
lm_tidy <- lm_fit %>%
extract_fit_parsnip() %>%
tidy()
lm_glance <- lm_fit %>%
extract_fit_parsnip() %>%
glance()
predictor_tbl <- tibble(
Predictor = c("TreeDiam", "Infest_Serv1", "SDI_20th", "BA_20th"),
Used_in_Model_1 = "Yes",
Used_in_Model_2 = "Yes"
)
```
Row
-----------------------------------------------------------------------
### Data Preview
``` {r inputs}
datatable(
head(pine_tbl, 10),
options = list(pageLength = 5, scrollX = TRUE)
)
```
### Outcome Distribution
```{r predicted_dead_distance}
ggplot(pine_tbl, aes(x = DeadDist)) +
geom_histogram(bins = 20) +
labs(
title = "Distribution of DeadDist",
x = "DeadDist",
y = "Count"
)
```
Row
-----------------------------------------------------------------------
### Predictors Used in Both Models
```{r predictors}
kable(predictor_tbl, caption = "Predictors used in both models")
```
### Feature-Engineered Data Preview
```{r feature}
kable(
head(baked_train, 10),
caption = "Preview of training data after recipe steps"
)
```