ShinyShowcaseRound2

library(shiny)
library(shinyWidgets)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.6
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.1     ✔ tibble    3.3.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(thematic)
library(moderndive)
library(infer)

Attaching package: 'infer'

The following object is masked from 'package:shiny':

    observe
library(bslib)

Attaching package: 'bslib'

The following object is masked from 'package:shinyWidgets':

    show_toast

The following object is masked from 'package:utils':

    page
thematic_shiny()

ui <- fluidPage(
  theme = bs_theme(version = 5),
  setBackgroundColor(
    color = c("#355E3B", "grey"),
    gradient = "radial",
    direction = "bottom"
  ),
  
  titlePanel(h1("Colorado Housing Exploration", align = "center")),
  wellPanel(
    h3("About"),
    tags$p(
      "This app explores what drives home sale prices in Boulder County, Colorado
      using 2020 residential sales data. Users can fit regression models to see
      how property features like square footage and building value relate to
      sale price, test whether those relationships are statistically significant,
      and check whether the model assumptions hold."),

    tags$p(
      tags$strong("Reference: "),
      "Ismay, C. & Kim, A.Y. (2024). ",
      tags$em("Statistical Inference via Data Science: A ModernDive into R and the Tidyverse. "),
    )
  ),
  
  sidebarLayout(
    sidebarPanel(
      input_dark_mode(label = "dark_mode_toggle"),
      
      h4("Exploratory Data Analysis"),
      
      selectInput(
        "edaPlotType",
        "Choose EDA Plot:",
        choices = c(
          "Histogram" = "hist",
          "Bar Chart (City vs Design)" = "bar",
          "Scatterplot" = "scatter"
        )
      ),
      
      selectInput(
        "eda_x",
        "Select X Variable:",
        choices = c("Total SqFt" = "total_sqft",
                    "Building Value" = "BLDG_VALUE",
                    "Sale Price" = "SALE_PRICE")
      ),
      
      selectInput(
        "eda_fill",
        "Color / Group By:",
        choices = c("None" = "none",
                    "City" = "LOCCITY",
                    "Decade" = "DECADE")
      ),
      
      hr(),
      
      h4("Regression Model Settings"),
      
      checkboxGroupInput(
        "predictors", label = "Select predictors:",
        choices = c("Total SqFt" = "total_sqft",
                    "Building Value" = "BLDG_VALUE"),
        selected = "total_sqft"
      ),
      
      checkboxInput("interaction", "Include Interaction Term", FALSE),
      
      hr(),
      
      h4("Inference Controls"),
      
      numericInput(
        inputId = "reps",
        label = "Number of Bootstrap Samples",
        value = 1000, min = 100, max = 100000
      ),
      
      checkboxGroupInput(
        "typeofCI", label = "What type of CI?",
        choices = c("Percentile" = "pct", "Standard Error" = "se")
      ),
      
      radioButtons(
        "alpha", label = "Confidence Level:",
        choices = c("95%" = 0.95, "97%" = 0.97, "99%" = 0.99)
      ),
      
      selectInput(
        "diagCol", "Column for Independence & Variance Plots",
        choices = c("Observation Order" = "order",
                    "Total SqFt" = "total_sqft",
                    "Building Value" = "BLDG_VALUE",
                    "Decade Built" = "DECADE",
                    "City" = "LOCCITY")
      )
    ),
    
    mainPanel(
      
      fluidRow(column(width = 6, tableOutput("coefTable"))),
      br(),
      
      h3("Coefficient Interpretation"),
      verbatimTextOutput("coefExplain"),
      br(),
      
      h3("Coefficient Significance Testing"),
      verbatimTextOutput("sigText"),
      br(),
      
      h3("Simple Regression"),
      verbatimTextOutput("simple_text"),
      hr(),
      
      h3("Exploratory Data Analysis"),
      plotOutput("edaPlot"),
      br(),
      
      h3("Model vs Baseline"),
      plotOutput("scatterPlot"),
      br(),
      
      fluidRow(
        column(width = 6, plotOutput("ciPlot")),
        column(width = 6, plotOutput("nullPlot"))
      ),
      br(),
      
      h3("LINE Diagnostic Plots"),
      fluidRow(
        column(width = 6, plotOutput("linePlot")),
        column(width = 6, plotOutput("indepPlot"))
      ),
      fluidRow(
        column(width = 6, plotOutput("normPlot")),
        column(width = 6, plotOutput("eqvarPlot"))
      )
    )
  )
)

server <- function(input, output) {
  
  # --- Data ---
  housing <- reactive({
    read_csv('/Users/phillipvogt/Desktop/Data Sci/boulder-2020-residential_sales.csv') |>
      mutate(
        BLDG_VALUE  = as.numeric(str_remove_all(BLDG_VALUE, "\\$|,")),
        LAND_VALUE  = as.numeric(str_remove_all(LAND_VALUE, "\\$|,")),
        SALE_PRICE  = as.numeric(str_remove_all(SALE_PRICE, "\\$|,")),
        DECADE      = floor(BLDG1_YEAR_BUILT / 10) * 10,
        total_baths = FULL_BATHS + THREE_QTR_BATHS * 0.75 + HALF_BATHS * 0.5,
        total_sqft  = STUDIO_SQFT + GARAGE_SQFT + ABOVE_GROUND_SQFT +
          FINISHED_BSMT_SQFT + FINISHED_GARAGE_SQFT + UNFINISHED_BSMT_SQFT,
        BULD_SQFT      = BLDG_VALUE / total_sqft,
        PRICE_PER_SQFT = ifelse(total_sqft == 0, NA, SALE_PRICE / total_sqft)
      )
  })
  
  # --- Model ---
  current_model <- reactive({
    req(input$predictors)
    
    if (input$interaction && length(input$predictors) > 1) {
      predictors <- paste(input$predictors, collapse = " * ")
    } else {
      predictors <- paste(input$predictors, collapse = " + ")
    }
    
    lm(as.formula(paste("SALE_PRICE ~", predictors)), data = housing())
  })
  
  # --- Simple model (first selected predictor only) ---
  simple_model <- reactive({
    req(input$predictors)
    x <- input$predictors[1]
    lm(as.formula(paste("SALE_PRICE ~", x)), data = housing())
  })
  
  # --- Observed slope for bootstrap / null dist ---
  obs_slope <- reactive({
    req(input$predictors)
    x <- input$predictors[1]
    m <- lm(as.formula(paste("SALE_PRICE ~", x)), data = housing())
    coef(m)[[x]]
  })
  
  #Regression table
  output$coefTable <- renderTable({
    get_regression_table(current_model())
  })
  
  #  Coefficient interpretation in context
  output$coefExplain <- renderText({
    m <- current_model()
    coefs <- coef(m)
    
    txt <- ""
    
    if ("total_sqft" %in% names(coefs)) {
      txt <- paste0(
        txt,
        "Total SqFt: Holding other variables constant, a 1 unit increase in total square footage is associated with a change of $",
        round(coefs["total_sqft"], 2),
        " in sale price.\n\n"
      )
    }
    
    if ("BLDG_VALUE" %in% names(coefs)) {
      txt <- paste0(
        txt,
        "Building Value: Holding other variables constant, a $1 increase in building value is associated with a change of $",
        round(coefs["BLDG_VALUE"], 2),
        " in sale price.\n\n"
      )
    }
    
    if ("BLDG_VALUE:total_sqft" %in% names(coefs)) {
      txt <- paste0(
        txt,
        "Interaction: The effect of square footage on sale price depends on building value (and vice versa).\n\n"
      )
    }
    
    txt
  })
  
  # --- Simple regression interpretation ---
  output$simple_text <- renderText({
    m <- simple_model()
    coefs <- coef(m)
    
    intercept <- round(coefs[1], 2)
    slope <- round(coefs[2], 2)
    x <- input$predictors[1]
    
    paste(
      "Model Interpretation:\n",
      "This model predicts SALE_PRICE using", x, ".\n",
      "Intercept:", intercept, "\n",
      "When", x, "is 0, predicted price is", intercept, ".\n",
      "Slope:", slope, "\n",
      "For every 1 unit increase in", x,
      "SALE_PRICE increases by", slope, "on average."
    )
  })
  
  # Coefficient significance testing 
  output$sigText <- renderText({
    m <- current_model()
    coef_df <- as.data.frame(summary(m)$coefficients)
    coef_df$term <- rownames(coef_df)
    
    alpha <- 1 - as.numeric(input$alpha)
    coef_df <- coef_df[coef_df$term != "(Intercept)", ]
    
    txt <- paste(
      "Null Hypothesis (H0): Each predictor coefficient equals 0.",
      "Alternative Hypothesis (Ha): Each predictor coefficient does not equal 0.",
      "A t-test is used for each regression coefficient.",
      "", sep = "\n"
    )
    
    for (i in seq_len(nrow(coef_df))) {
      term <- coef_df$term[i]
      pval <- coef_df[i, 4]
      if (pval < alpha) {
        txt <- paste0(
          txt, term, ": p-value = ", round(pval, 5),
          " < alpha = ", alpha,
          ". Statistically significant. Reject H0.\n\n"
        )
      } else {
        txt <- paste0(
          txt, term, ": p-value = ", round(pval, 5),
          " >= alpha = ", alpha,
          ". Not statistically significant. Fail to reject H0.\n\n"
        )
      }
    }
    txt
  })
  
  #  Interactive EDA 
  output$edaPlot <- renderPlot({
    df <- housing()
    x_var <- input$eda_x
    fill_var <- input$eda_fill
    
    if (input$edaPlotType == "hist") {
      
      if (fill_var == "none") {
        ggplot(df, aes(x = .data[[x_var]])) +
          geom_histogram(color = "white", bins = 30) +
          labs(title = paste("Distribution of", x_var), x = x_var)
      } else {
        ggplot(df, aes(x = .data[[x_var]], fill = .data[[fill_var]])) +
          geom_histogram(color = "white", bins = 30, alpha = 0.7) +
          labs(title = paste("Distribution of", x_var), x = x_var)
      }
      
    } else if (input$edaPlotType == "bar") {
      
      ggplot(df, aes(x = fct_infreq(BLDG1_DESIGN), fill = LOCCITY)) +
        geom_bar() +
        coord_flip() +
        labs(title = "Building Design Distribution by City")
      
    } else if (input$edaPlotType == "scatter") {
      
      p <- ggplot(df, aes(x = .data[[x_var]], y = SALE_PRICE))
      
      if (fill_var != "none") {
        p <- p + aes(color = .data[[fill_var]])
      }
      
      p +
        geom_point(alpha = 0.5) +
        geom_smooth(method = "lm", se = FALSE) +
        labs(title = paste("Sale Price vs", x_var))
    }
  })
  
  # --- Model vs Baseline scatterplot ---
  output$scatterPlot <- renderPlot({
    m <- current_model()
    
    housing() |>
      mutate(pred = predict(m)) |>
      ggplot(aes(x = pred, y = SALE_PRICE)) +
      geom_point(alpha = 0.4) +
      geom_hline(yintercept = mean(housing()$SALE_PRICE), color = "red") +
      geom_smooth(method = "lm", color = "blue", se = TRUE) +
      labs(title = "Model vs Baseline Comparison")
  })
  
  # Null Distribution 
  output$nullPlot <- renderPlot({
    req(input$predictors)
    x <- input$predictors[1]
    
    housing() |>
      specify(as.formula(paste("SALE_PRICE ~", x))) |>
      hypothesize(null = "independence") |>
      generate(reps = input$reps, type = "permute") |>
      calculate(stat = "slope") |>
      visualize() +
      shade_p_value(obs_stat = obs_slope(), direction = "both") +
      labs(title = paste("Null Distribution of Slope for", x))
  })
  
  # --- Bootstrap CI ---
  output$ciPlot <- renderPlot({
    req(input$predictors)
    x <- input$predictors[1]
    
    boot <- housing() |>
      specify(as.formula(paste("SALE_PRICE ~", x))) |>
      generate(reps = input$reps, type = "bootstrap") |>
      calculate(stat = "slope")
    
    pct_ci <- boot |>
      get_confidence_interval(type = "percentile", level = as.numeric(input$alpha))
    se_ci <- boot |>
      get_confidence_interval(
        level = as.numeric(input$alpha), type = "se",
        point_estimate = obs_slope()
      )
    
    p <- boot |> visualize() +
      labs(title = paste("Bootstrap Distribution of Slope for", x))
    
    if ("pct" %in% input$typeofCI) {
      p <- p + shade_confidence_interval(
        endpoints = pct_ci, linetype = "solid", color = "cyan", fill = NULL
      )
    }
    if ("se" %in% input$typeofCI) {
      p <- p + shade_confidence_interval(
        endpoints = se_ci, linetype = "dashed", color = "purple", fill = NULL
      )
    }
    p
  })
  
  # --- Diagnostics ---
  output$linePlot <- renderPlot({
    m <- current_model()
    ggplot(data.frame(fitted = fitted(m), resid = residuals(m)),
           aes(fitted, resid)) +
      geom_point(alpha = 0.4) +
      geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
      labs(title = "Linearity (Residuals vs Fitted)",
           x = "Fitted Values", y = "Residuals")
  })
  
  output$indepPlot <- renderPlot({
    m <- current_model()
    df <- housing() |> mutate(.resid = residuals(m))
    col <- input$diagCol
    
    if (col == "order") {
      df <- df |> mutate(.order = row_number())
      ggplot(df, aes(x = .order, y = .resid)) +
        geom_point(alpha = 0.4) +
        geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
        labs(title = "Independence of Errors",
             x = "Observation Order", y = "Residuals")
    } else {
      ggplot(df, aes(x = .data[[col]], y = .resid)) +
        geom_point(alpha = 0.4) +
        geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
        labs(title = paste("Independence of Errors vs", col),
             x = col, y = "Residuals")
    }
  })
  
  output$normPlot <- renderPlot({
    m <- current_model()
    ggplot(data.frame(resid = residuals(m)), aes(sample = resid)) +
      stat_qq(alpha = 0.4) +
      stat_qq_line(color = "red") +
      labs(title = "Normality of Residuals (Q-Q Plot)")
  })
  
  output$eqvarPlot <- renderPlot({
    m <- current_model()
    df <- housing() |> mutate(.resid = residuals(m))
    col <- input$diagCol
    
    if (col == "order") {
      df <- df |> mutate(.order = row_number())
      ggplot(df, aes(x = .order, y = abs(.resid))) +
        geom_point(alpha = 0.4) +
        geom_smooth(method = "loess", se = FALSE, color = "red") +
        labs(title = "Equality of Variance",
             x = "Observation Order", y = "|Residuals|")
    } else {
      ggplot(df, aes(x = .data[[col]], y = abs(.resid))) +
        geom_point(alpha = 0.4) +
        geom_smooth(method = "loess", se = FALSE, color = "red") +
        labs(title = paste("Equality of Variance vs", col),
             x = col, y = "|Residuals|")
    }
  })
}

shinyApp(ui = ui, server = server)

Shiny applications not supported in static R Markdown documents