Row

Data Preview

Outcome Distribution

Row

Predictors Used in Both Models

Predictors used in both models
Predictor Used_in_Model_1 Used_in_Model_2
TreeDiam Yes Yes
Infest_Serv1 Yes Yes
SDI_20th Yes Yes
BA_20th Yes Yes

Feature-Engineered Data Preview

Preview of training data after recipe steps
TreeDiam Infest_Serv1 BA_20th DeadDist
6 1 17.807310 4.295720
7 1 6.828408 5.435764
9 2 7.259274 3.430652
9 1 14.763978 3.938582
13 24 1.188972 4.123634
8 2 30.400596 3.096326
19 2 20.190708 3.630596
9 1 13.313214 5.458032
16 15 12.336948 3.645128
26 1 11.878812 5.184964

Row

Model 1: Linear Regression Results

Linear regression coefficients
term estimate std.error statistic p.value
(Intercept) 6.128 0.062 99.498 0.000
TreeDiam 0.004 0.003 1.342 0.179
Infest_Serv1 -0.009 0.001 -8.181 0.000
BA_20th -0.095 0.002 -38.478 0.000

Model 2: Random Forest Variable Importance

Row

Model Evaluation on Test Data

Test-set evaluation metrics
.metric .estimator .estimate Model
rmse standard 18.423 Linear Regression
rsq standard 0.130 Linear Regression
mae standard 13.616 Linear Regression
rmse standard 17.064 Random Forest
rsq standard 0.252 Random Forest
mae standard 12.565 Random Forest

Linear Model Summary Statistics

Linear model summary
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
0.163 0.163 1.733 503.38 0 3 -15253.56 30517.12 30551.9 23256.8 7745 7749

Row

Observed vs Predicted: Linear Regression

Observed vs Predicted: Random Forest

---
title: "Pine Distance from Dead Tree Models"
output: 
  flexdashboard::flex_dashboard:
    orientation: rows
    vertical_layout: scroll
    theme: cosmo
    source_code: embed
---


```{r setup, include=FALSE}

#libraries and data import
library(flexdashboard)
library(tidymodels)
library(dplyr)
library(ggplot2)
library(readxl)
library(knitr)
library(vip)
library(plotly)
library(DT)

theme_set(theme_minimal(base_size = 16))

pine_tbl <- read_excel("Data_1993 (2).xlsx")


```


```{r model_creation, include=FALSE}

set.seed(123)

# training/testing split
pine_split <- initial_split(pine_tbl, prop = 0.8)
pine_train <- training(pine_split)
pine_test  <- testing(pine_split)

# your exact recipe, now using training data
pine_rec <- pine_train %>% 
  recipe(DeadDist ~ TreeDiam + Infest_Serv1 + SDI_20th + BA_20th) %>% 
  step_sqrt(all_outcomes(), skip = TRUE) %>% 
  step_corr(all_predictors())

# preview feature-engineered training data
baked_train <- pine_rec %>% 
  prep() %>% 
  bake(new_data = NULL)

# MODEL 1: your exact linear model
lm_mod <- 
  linear_reg() %>% 
  set_engine("lm")

lm_wflow <- 
  workflow() %>% 
  add_model(lm_mod) %>% 
  add_recipe(pine_rec)

lm_fit <- 
  lm_wflow %>% 
  fit(data = pine_train)

# MODEL 2: random forest using same recipe
rf_mod <- 
  rand_forest(trees = 500, mode = "regression") %>% 
  set_engine("ranger", importance = "permutation")

rf_wflow <- 
  workflow() %>% 
  add_model(rf_mod) %>% 
  add_recipe(pine_rec)

rf_fit <- 
  rf_wflow %>% 
  fit(data = pine_train)

# predictions on test set
lm_preds <- predict(lm_fit, new_data = pine_test) %>% 
  bind_cols(pine_test %>% select(DeadDist)) %>% 
  mutate(.pred = .pred^2)

rf_preds <- predict(rf_fit, new_data = pine_test) %>% 
  bind_cols(pine_test %>% select(DeadDist)) %>% 
  mutate(.pred = .pred^2)

# evaluation metrics
lm_metrics <- lm_preds %>% 
  metrics(truth = DeadDist, estimate = .pred) %>% 
  mutate(Model = "Linear Regression")

rf_metrics <- rf_preds %>% 
  metrics(truth = DeadDist, estimate = .pred) %>% 
  mutate(Model = "Random Forest")

all_metrics <- bind_rows(lm_metrics, rf_metrics)

# model result tables
lm_tidy <- lm_fit %>% 
  extract_fit_parsnip() %>% 
  tidy()

lm_glance <- lm_fit %>% 
  extract_fit_parsnip() %>% 
  glance()

predictor_tbl <- tibble(
  Predictor = c("TreeDiam", "Infest_Serv1", "SDI_20th", "BA_20th"),
  Used_in_Model_1 = "Yes",
  Used_in_Model_2 = "Yes"
)

```



Row {data-height=400}
-----------------------------------------------------------------------
### Data Preview

``` {r inputs}
datatable(
  head(pine_tbl, 10),
  options = list(pageLength = 5, scrollX = TRUE)
)

```

### Outcome Distribution
```{r predicted_dead_distance}

ggplot(pine_tbl, aes(x = DeadDist)) +
  geom_histogram(bins = 20, fill = "steelblue", color = "black") +
  labs(
    title = "Distribution of DeadDist",
    x = "DeadDist",
    y = "Count"
  ) +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12)
  )
```

Row
-----------------------------------------------------------------------
### Predictors Used in Both Models

```{r predictors}

kable(predictor_tbl, caption = "Predictors used in both models")
```

### Feature-Engineered Data Preview

```{r feature}
kable(
  head(baked_train, 10),
  caption = "Preview of training data after recipe steps"
)

```

Row
-----------------------------------------------------------------------
### Model 1: Linear Regression Results

```{r model 1 results}
kable(lm_tidy, digits = 3, caption = "Linear regression coefficients")
```

### Model 2: Random Forest Variable Importance
```{r model 2 results}
rf_fit %>% 
  extract_fit_parsnip() %>% 
  vip(num_features = 10)
```

Row
-----------------------------------------------------------------------
### Model Evaluation on Test Data
```{r model eval on test}
kable(all_metrics, digits = 3, caption = "Test-set evaluation metrics")
```

### Linear Model Summary Statistics
```{r linear model stats}
kable(lm_glance, digits = 3, caption = "Linear model summary")
```

Row {data-height=500}
-----------------------------------------------------------------------
### Observed vs Predicted: Linear Regression
```{r observed vs predicted linear}
ggplot(lm_preds, aes(x = .pred, y = DeadDist)) +
  geom_point(alpha = 0.5, size = 1.8) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", linewidth = 1) +
  labs(
    title = "Linear Regression: Observed vs Predicted",
    x = "Predicted DeadDist",
    y = "Observed DeadDist"
  ) +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12)
  )
```

### Observed vs Predicted: Random Forest
```{r observed vs pred random forest}
ggplot(rf_preds, aes(x = .pred, y = DeadDist)) +
  geom_point(alpha = 0.5, size = 1.8) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", linewidth = 1) +
  labs(
    title = "Random Forest: Observed vs Predicted",
    x = "Predicted DeadDist",
    y = "Observed DeadDist"
  ) +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12)
  )
```