shiny app sep

Author

Sean Lee

library(shiny)
library(caret)
Warning: package 'caret' was built under R version 4.5.2
Loading required package: ggplot2
Warning: package 'ggplot2' was built under R version 4.5.2
Loading required package: lattice
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.2
✔ lubridate 1.9.4     ✔ tibble    3.3.0
✔ purrr     1.1.0     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ purrr::lift()   masks caret::lift()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(fastshap)
Warning: package 'fastshap' was built under R version 4.5.2

Attaching package: 'fastshap'

The following object is masked from 'package:dplyr':

    explain
library(shapviz)
Warning: package 'shapviz' was built under R version 4.5.2
library(sf)
Linking to GEOS 3.13.1, GDAL 3.11.0, PROJ 9.6.0; sf_use_s2() is TRUE
set.seed(42)
tracts <- st_read("data/variables_sf.gpkg")
Reading layer `variables_sf' from data source 
  `C:\FA 2025\11.218 Data Science\Team Prj\data\variables_sf.gpkg' 
  using driver `GPKG'
Simple feature collection with 325 features and 26 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: 2986587 ymin: 13766650 xmax: 3245120 ymax: 13992550
Projected CRS: NAD83 / Texas South Central (ftUS)
variables <- tracts |>
  select(-c("severity_index.x"), everything())

variables_final <- variables |>
  filter(!is.na(severity_index))

vars_to_fix  <- c("elev_mean", "slope_mean", "median_year_structure_built")
vars_to_zero <- c("lu_open_water", "lu_wetlands", "lu_developed_open_space")

variables_final_clean <- variables_final |>
  mutate(across(
    all_of(vars_to_fix),
    ~ ifelse(is.na(.), mean(., na.rm = TRUE), .))) |>
  mutate(across(
    all_of(vars_to_zero),
    ~ replace(., is.na(.), 0))) |>
  select(-c(GEOID, lu_wetlands))

df_env <- variables_final_clean |>
  select(severity_index, elev_mean, slope_mean, canopy_mean) |>
  st_drop_geometry() |>
  drop_na()

df_soc <- variables_final_clean |>
  select(
    severity_index,
    med_income, pop_density, burdened_household,
    pct_white, pct_black, pct_asian,
    owner_occupied, renter_occupied
  ) |>
  st_drop_geometry() |>
  drop_na()

df_inf <- variables_final_clean |>
  select(
    severity_index,
    lu_open_water, lu_developed_high_intensity,
    lu_developed_medium_intensity, lu_developed_low_intensity,
    lu_developed_open_space, median_year_structure_built,
    single_fam, small_multi, med_multi, large_multi, pct_mobile_home
  ) |>
  st_drop_geometry() |>
  drop_na()
fit_rf_shap <- function(df) {
  idx   <- createDataPartition(df$severity_index, p = 0.8, list = FALSE)
  train <- df[idx, ]

  rf_model <- train(
    severity_index ~ .,
    data = train,
    method = "rf",
    trControl = trainControl(method = "cv", number = 5)
  )

  pred_wrapper <- function(object, newdata) {
    predict(object, newdata = newdata)
  }

  X_train <- train |> select(-severity_index)

  shap_vals <- fastshap::explain(
    object       = rf_model$finalModel,
    X            = X_train,
    pred_wrapper = pred_wrapper,
    nsim         = 100
  )

  sv <- shapviz(shap_vals, X_train)

  list(
    rf = rf_model,
    X  = X_train,
    sv = sv
  )
}

mod_env <- fit_rf_shap(df_env)
note: only 2 unique complexity parameters in default grid. Truncating the grid to 2 .
mod_soc <- fit_rf_shap(df_soc)
mod_inf <- fit_rf_shap(df_inf)
theme_aes <- function() {
  ggplot2::theme_minimal(base_size = 13) +
    ggplot2::theme(
      plot.title = element_text(
        face = "bold",
        size = 15,
        hjust = 0.5,
        color = "#303030"
      ),
      axis.title = element_text(
        face = "bold",
        size = 12,
        color = "#333333"
      ),
      panel.grid.minor = element_blank(),
      panel.grid.major = element_line(linewidth = 0.2, color = "#dddddd"),
      axis.text = element_text(color = "#4a4a4a"),
      strip.background = element_rect(fill = "#f3f3f0", color = NA),
      strip.text = element_text(face = "bold", color = "#303030")
    )
}

make_scatter <- function(df, xvar, xlab) {
  ggplot(df, aes_string(x = xvar, y = "severity_index")) +
    geom_point(alpha = 0.55, color = "#A3B18A") +
    geom_smooth(
      method = "lm",
      se = TRUE,
      color = "#7F5539",
      fill  = "#DDE5D2",
      linewidth = 1
    ) +
    labs(
      title = paste("Relationship Between", xlab, "and Severity Index"),
      x = xlab,
      y = "Severity Index"
    ) +
    theme_aes()
}

make_box_canopy <- function(df) {
  df_binned <- df |>
    mutate(canopy_group = cut(
      canopy_mean,
      breaks = quantile(canopy_mean, probs = seq(0, 1, by = 0.25), na.rm = TRUE),
      include.lowest = TRUE
    ))

  ggplot(df_binned,
         aes(x = canopy_group, y = severity_index, fill = canopy_group)) +
    geom_boxplot(alpha = 0.9, color = "#7F5539", linewidth = 0.4) +
    scale_fill_manual(
      values = c(
        "#DDE5D2",   # light sage
        "#A3B18A",   # sage
        "#588157",   # olive
        "#7F5539"    # brown
      )
    ) +
    labs(
      title = "Severity Index Across Canopy Quartiles",
      x = "Canopy Mean (Quartile Groups)",
      y = "Severity Index",
      fill = "Canopy\nQuartile"
    ) +
    theme_aes() +
    theme(legend.position = "right")
}

ui <- fluidPage(
  titlePanel("Severity Index – SHAP & Exploratory Plots"),

  fluidRow(
    column(
      width = 6,
      h3("SHAP Analysis"),

      wellPanel(
        style = "min-height: 360px; overflow-y: auto;",
        radioButtons(
          "model_type",
          "Variable group:",
          choices = c(
            "Environmental (elev, slope, canopy)" = "env",
            "Social"                               = "soc",
            "Infrastructure"                       = "inf"
          )
        ),
        radioButtons(
          "shap_plot",
          "SHAP plot type:",
          choices = c(
            "Beeswarm"           = "beeswarm",
            "Bar importance"     = "bar",
            "Waterfall (1 obs.)" = "waterfall"
          )
        ),
        sliderInput(
          "row_id",
          "Observation for waterfall plot:",
          min   = 1,
          max   = nrow(mod_env$X),  # will be updated reactively
          value = 1,
          step  = 1
        )
      ),

      plotOutput("shap_plot", height = "550px")
    ),

    column(
      width = 6,
      h3("Exploratory Plots"),

      wellPanel(
         style = "min-height: 360px; overflow-y: auto;",
         radioButtons(
          "eda_plot",
          "Exploratory plot:",
          choices = c(
            "Elevation vs Severity"                  = "elev",
            "Slope vs Severity"                      = "slope",
            "Canopy vs Severity"                     = "canopy",
            "Boxplot: Severity by Canopy Quartiles"  = "box"
          )
        )
      ),

      plotOutput("eda_plot", height = "550px")
    )
  )
)

server <- function(input, output, session) {
  current_mod <- reactive({
    switch(input$model_type,
           "env" = mod_env,
           "soc" = mod_soc,
           "inf" = mod_inf)
  })

  observe({
    n_obs <- nrow(current_mod()$X)
    updateSliderInput(
      session,
      "row_id",
      max   = n_obs,
      value = min(input$row_id, n_obs)
    )
  })

  output$shap_plot <- renderPlot({
    sv_obj <- current_mod()$sv

    if (input$shap_plot == "beeswarm") {
      sv_importance(
        sv_obj,
        kind = "beeswarm",
        viridis_args = list(option = "D", begin = 1, end = 0),
        color_bar_title = "Feature value"
      )

    } else if (input$shap_plot == "bar") {
      sv_importance(sv_obj, kind = "bar")

    } else if (input$shap_plot == "waterfall") {
      sv_waterfall(sv_obj, row_id = input$row_id)
    }
  })

  output$eda_plot <- renderPlot({
    if (input$eda_plot == "elev") {
      make_scatter(df_env, "elev_mean", "Mean Elevation")

    } else if (input$eda_plot == "slope") {
      make_scatter(df_env, "slope_mean", "Mean Slope")

    } else if (input$eda_plot == "canopy") {
      make_scatter(df_env, "canopy_mean", "Mean Canopy Cover")

    } else if (input$eda_plot == "box") {
      make_box_canopy(df_env)
    }
  })
}

shinyApp(ui = ui, server = server)

Shiny applications not supported in static R Markdown documents