Models of Neighborhood Change in San Diego, CA

---
title: "Models of Neighborhood Change in San Diego, CA"
output: 
  flexdashboard::flex_dashboard:
    theme: bootstrap
    source: embed
    smart: false
    #runtime: shiny
---


```{r global, echo=FALSE}

library(flexdashboard)
library(shiny)
library(rsconnect)

library(dplyr)
library(tidyr)
library(stringr)
library(purrr)

library(sf)
library(sp)
library(geojsonio)
library(cartogram)
library(tmap)
tmap_mode("view")  
library(leaflet)

library(viridis)
library(pals)
library(ggplot2)

library(mclust)
library(DT)
library(pander)
library(knitr)
library(stargazer)
library(tidycensus)


```



```{r data, include=FALSE}
# 1. Load San Diego dorling cartogram from local file

sd <- geojson_read("data/sd_dorling.geojson", what = "sp")

# 2. Reproject the map to EPSG:3395

sd2 <- spTransform(sd, CRS("+init=epsg:3395"))

# 3. Convert to sf for ggplot / tmap

sd.sf <- st_as_sf(sd2)

# 4. Separate out the data frame (for histograms, regression, etc.)

d <- as.data.frame(sd.sf)

# 5. Make sure cluster is a factor

if("cluster" %in% names(sd2@data)){
sd2$cluster <- as.factor(sd2$cluster)
}

# 6. Bounding box for San Diego map in projected CRS

bb <- st_bbox(sd.sf)

# helper function for ggplot coord_sf using bb

coord_sd <- function() {
coord_sf(
xlim = c(bb["xmin"], bb["xmax"]),
ylim = c(bb["ymin"], bb["ymax"]),
datum = NA
)
}

```

Community Demographics  
=====================================  
This section allows you to explore census characteristics across San Diego census tracts.  
Select a variable from the sidebar, and the map + histogram will update automatically.

Inputs {.sidebar}
-------------------------------------
```{r}
# 1. list of variables
these.variables <- c(
  "pnhwht12","pnhblk12","phisp12","pntv12","pfb12","polang12",
  "phs12","pcol12","punemp12","pflabf12","pprof12","pmanuf12",
  "pvet12","psemp12","ppov12","pown12","pvac12","pmulti12",
  "p30old12","p10yrs12","p18und12","p60up12","p75up12",
  "pmar12","pwds12","pfhh12"
)

demo.labels <- paste("Variable:", these.variables)
names(demo.labels) <- NULL

radioButtons(
  inputId      = "demographics",
  label        = h3("Census Variables"),
  choiceNames  = demo.labels,
  choiceValues = these.variables,
  selected     = "pnhwht12"
)


```

Row {.tabset}
-------------------------------------


### Choropleth Map

```{r}
renderPlot({

req(input$demographics)

# create deciles of the selected variable

sd_plot <- sd.sf |>
mutate(q = ntile(.data[[input$demographics]], 10))

ggplot(sd_plot) +
geom_sf(aes(fill = q), color = NA) +
coord_sd() +
labs(
title   = paste0("Choropleth of: ", demo.labels[input$demographics]),
caption = "Source: Harmonized Census Files (LTDB)",
fill    = "Decile"
) +
scale_fill_gradientn(
colours = rev(ocean.balance(10)),
guide   = "colourbar"
)

})

```

### Variable Distribution 

```{r}
renderPlot({

req(input$demographics)

x <- d[[input$demographics]] |> unlist() |> as.numeric()
x <- x[is.finite(x)]

cut.points <- quantile(x, seq(0, 1, 0.1), na.rm = TRUE)

hist(
x,
breaks = 50,
col    = "gray",
border = "white",
yaxt   = "n",
main   = paste0("Histogram of ", demo.labels[input$demographics]),
xlab   = "Red lines represent decile cut points"
)

abline(v = cut.points, col = "darkred", lty = 3, lwd = 2)

})

```

Neighborhoods  
===================================== 

This interactive map shows the neighborhood cluster types identified from the model.
Each color represents a different community pattern, such as affluent coastal areas or 
working-class neighborhoods.

### Clusters 

```{r}
sd2.sf <- st_as_sf(sd2)

```

```{r}
# Descriptive labels (your data)

cluster.labels <- c(
  "1" = "Affluent Coastal & Uptown",
  "2" = "Stable Middle-Income Suburbs",
  "3" = "Working-Class Family Neighborhoods",
  "4" = "Lower-Income, High-Rent Areas",
  "5" = "Transitional / Mixed Demographic Areas",
  "6" = "Older, Lower-Density Neighborhoods"
)

# apply labels
sd2.sf$cluster_lab <- cluster.labels[as.character(sd2.sf$cluster)]

tmap_mode("view")

renderTmap({
  tm_basemap("CartoDB.Positron") +
    tm_shape(sd2.sf, bbox = bb) +
    tm_polygons(
      col = "cluster_lab",
      palette = "brewer.accent",
      title = "Community Types"
    )
})


```




NH Change 2000-2010  
===================================== 

These maps show how home values changed across San Diego between 2000 and 2010.
Choose a home value measure to explore the full distribution.


Inputs {.sidebar}
-------------------------------------

```{r}

hv.names  <- c(
  "Median Home Value (Adj. 2000)",
  "Median Home Value (2010)",
  "Value Change (2000–2010)",
  "Growth in Home Value (%)"
)

hv.values <- c(
  "mhv00_adj",
  "mhv10",
  "mhv_change",
  "mhv_growth"
)

radioButtons(
  inputId     = "home.value",
  label       = h3("Home Values"),
  choiceNames = hv.names,   # what users see
  choiceValues= hv.values,  # actual column names
  selected    = "mhv00_adj"
)


```

Row {.tabset}
-------------------------------------



### Median Home Values

```{r}
renderPlot({

req(input$home.value)

var <- input$home.value

sd_plot <- sd.sf |>
mutate(q = ntile(.data[[var]], 10))

title.text <- switch(
var,
"mhv00_adj"   = "Adjusted Median Home Value (2000, 2012 Dollars)",
"mhv10"       = "Median Home Value (2010)",
"mhv_change"  = "Change in Median Home Value (2000–2010)",
"mhv_growth"  = "Growth in Median Home Value (%) (2000–2010)",
paste("Home Value Measure:", var)
)

ggplot(sd_plot) +
geom_sf(aes(fill = q), color = NA) +
coord_sd() +
labs(
title   = title.text,
caption = "Source: LTDB 2000 & 2010 (Harmonized)",
fill    = "Decile"
) +
scale_fill_gradientn(
colours = rev(ocean.balance(10)),
guide   = "colourbar"
)

})

```

### Variable Distribution 

```{r}
renderPlot({

req(input$home.value)
var <- input$home.value

x <- d[[var]] |> unlist() |> as.numeric()
x <- x[is.finite(x)]

cut.points <- quantile(x, seq(0, 1, 0.1), na.rm = TRUE)

main.text <- switch(
var,
"mhv00_adj"   = "Histogram of Adjusted 2000 Home Values",
"mhv10"       = "Histogram of 2010 Home Values",
"mhv_change"  = "Histogram of Home Value Change (2000–2010)",
"mhv_growth"  = "Histogram of Home Value Growth (%)",
paste("Histogram of", var)
)

hist(
x,
breaks = 50,
col    = "gray",
border = "white",
yaxt   = "n",
main   = main.text,
xlab   = "Red lines represent decile cut points"
)

abline(v = cut.points, col = "darkred", lty = 3, lwd = 2)

})

```


Drivers of Change   
===================================== 

Select a dependent variable and covariates to estimate models predicting 
neighborhood change. Regression results update automatically.



Inputs {.sidebar}
-------------------------------------

```{r}
# Dependent variable choices (same home value measures)

# Dependent variable labels (printed)
dv.names <- c(
  "Median Home Value (Adj. 2000)",
  "Median Home Value (2010)",
  "Value Change (2000–2010)",
  "Growth in Home Value (%)"
)

# Dependent variable actual data columns
dv.values <- c(
  "mhv00_adj",
  "mhv10",
  "mhv_change",
  "mhv_growth"
)

radioButtons(
  inputId      = "dv",
  label        = h3("Select Dependent Variable"),
  choiceNames  = dv.names,      # text the user sees
  choiceValues = dv.values,     # real variable names
  selected     = "mhv_change"
)


# Covariates: reuse the same census variables as earlier

covariates <- c(
"pnhwht12", "pnhblk12", "phisp12", "pntv12", "pfb12", "polang12",
"phs12", "pcol12", "punemp12", "pflabf12", "pprof12", "pmanuf12",
"pvet12", "psemp12", "hinc12", "incpc12", "ppov12", "pown12",
"pvac12", "pmulti12", "mrent12", "mhmval12", "p30old12", "p10yrs12",
"p18und12", "p60up12", "p75up12", "pmar12", "pwds12", "pfhh12"
)

checkboxGroupInput(
inputId = "covariates",
label   = h3("Select Variables for Your Model"),
choices = covariates,
selected = c("pnhwht12", "pprof12", "pvac12")
)

```


Row {.tabset}
-------------------------------------



### Predicting Change 

```{r}
get_covariates <- reactive({ input$covariates })

renderUI({

req(input$dv)

covars <- get_covariates()

# need at least one covariate

validate(
need(length(covars) > 0, "Please select at least one covariate.")
)

# Build formula: DV ~ x1 + x2 + ...

formula.text <- paste0(input$dv, " ~ ", paste(covars, collapse = " + "))
fo <- as.formula(formula.text)

# Drop rows with missing DV or covariates

d.model <- d[, c(input$dv, covars)]
d.model <- d.model[complete.cases(d.model), ]

# Run regression

m <- lm(fo, data = d.model)

HTML(
c(
"<br><br><br>",
"<div type='regression' style='width: 80%; margin: 0px auto;'>",
stargazer(m, type = "html", omit.stat = c("rsq", "f")),
"</div>",
"<br><br><br>"
)
)

})

```

### Correlation Plots 

```{r}
renderPlot({

covars <- input$covariates
req(length(covars) >= 2)

d.cor <- d[, covars, drop = FALSE]
d.cor <- d.cor[complete.cases(d.cor), ]

# basic pairs plot 

pairs(d.cor)

})

```

Across San Diego, the greatest home-value gains between 2000 and 2010 occurred in
Affluent Coastal & Uptown neighborhoods, while Working-Class Family Neighborhoods
experienced more modest increases. Demographic factors—including percent White,
professional employment rates, and vacancy rates—were consistently influential
predictors in the regression models. These patterns highlight how socioeconomic
composition and housing stock shape neighborhood trajectories.