Project_2_code

Predictors used in both models
Predictor	Used_in_Model_1	Used_in_Model_2
TreeDiam	Yes	Yes
Infest_Serv1	Yes	Yes
SDI_20th	Yes	Yes
BA_20th	Yes	Yes
Preview of training data after recipe steps
TreeDiam	Infest_Serv1	BA_20th	DeadDist
6	1	17.807310	4.295720
7	1	6.828408	5.435764
9	2	7.259274	3.430652
9	1	14.763978	3.938582
13	24	1.188972	4.123634
8	2	30.400596	3.096326
19	2	20.190708	3.630596
9	1	13.313214	5.458032
16	15	12.336948	3.645128
26	1	11.878812	5.184964
---
title: "Project_2_code_check"
author: "Skyler Pearse"
date: "2026-03-18"
output:  
  flexdashboard::flex_dashboard:
    orientation: rows
    vertical_layout: fill
    theme: cosmo
    source_code: embed
---



```{r setup, include=FALSE}

#libraries and data import
library(flexdashboard)
library(tidymodels)
library(dplyr)
library(ggplot2)
library(readxl)
library(knitr)
library(vip)
library(plotly)
library(DT)
library(ranger)

theme_set(theme_minimal())

pine_tbl <- read_excel("Data_1993 (2).xlsx")


```


```{r model_creation, include=FALSE}

set.seed(123)

# training/testing split
pine_split <- initial_split(pine_tbl, prop = 0.8)
pine_train <- training(pine_split)
pine_test  <- testing(pine_split)

# your exact recipe, now using training data
pine_rec <- pine_train %>% 
  recipe(DeadDist ~ TreeDiam + Infest_Serv1 + SDI_20th + BA_20th) %>% 
  step_sqrt(all_outcomes(), skip = TRUE) %>% 
  step_corr(all_predictors())

# preview feature-engineered training data
baked_train <- pine_rec %>% 
  prep() %>% 
  bake(new_data = NULL)

# MODEL 1: your exact linear model
lm_mod <- 
  linear_reg() %>% 
  set_engine("lm")

lm_wflow <- 
  workflow() %>% 
  add_model(lm_mod) %>% 
  add_recipe(pine_rec)

lm_fit <- 
  lm_wflow %>% 
  fit(data = pine_train)

# MODEL 2: random forest using same recipe
rf_mod <- 
  rand_forest(trees = 500, mode = "regression") %>% 
  set_engine("ranger", importance = "permutation")

rf_wflow <- 
  workflow() %>% 
  add_model(rf_mod) %>% 
  add_recipe(pine_rec)

rf_fit <- 
  rf_wflow %>% 
  fit(data = pine_train)

# predictions on test set
lm_preds <- predict(lm_fit, new_data = pine_test) %>% 
  bind_cols(pine_test %>% select(DeadDist)) %>% 
  mutate(.pred = .pred^2)

rf_preds <- predict(rf_fit, new_data = pine_test) %>% 
  bind_cols(pine_test %>% select(DeadDist)) %>% 
  mutate(.pred = .pred^2)

# evaluation metrics
lm_metrics <- lm_preds %>% 
  metrics(truth = DeadDist, estimate = .pred) %>% 
  mutate(Model = "Linear Regression")

rf_metrics <- rf_preds %>% 
  metrics(truth = DeadDist, estimate = .pred) %>% 
  mutate(Model = "Random Forest")

all_metrics <- bind_rows(lm_metrics, rf_metrics)

# model result tables
lm_tidy <- lm_fit %>% 
  extract_fit_parsnip() %>% 
  tidy()

lm_glance <- lm_fit %>% 
  extract_fit_parsnip() %>% 
  glance()

predictor_tbl <- tibble(
  Predictor = c("TreeDiam", "Infest_Serv1", "SDI_20th", "BA_20th"),
  Used_in_Model_1 = "Yes",
  Used_in_Model_2 = "Yes"
)

```



Row 
-----------------------------------------------------------------------
### Data Preview

``` {r inputs}
datatable(
  head(pine_tbl, 10),
  options = list(pageLength = 5, scrollX = TRUE)
)

```

### Outcome Distribution
```{r predicted_dead_distance}

ggplot(pine_tbl, aes(x = DeadDist)) +
  geom_histogram(bins = 20) +
  labs(
    title = "Distribution of DeadDist",
    x = "DeadDist",
    y = "Count"
  )
```

Row
-----------------------------------------------------------------------
### Predictors Used in Both Models

```{r predictors}

kable(predictor_tbl, caption = "Predictors used in both models")
```

### Feature-Engineered Data Preview

```{r feature}
kable(
  head(baked_train, 10),
  caption = "Preview of training data after recipe steps"
)

```