title: "Models of Neighborhood Change in TUSCALOOSA" output: flexdashboard::flex_dashboard: theme: bootstrap source: embed smart: false runtime: shiny ---

Part 1-3: Create Your Dorling Cartogram

```{r, echo=FALSE, warning = FALSE, message = FALSE}

library( geojsonio ) # read shapefiles library( sp ) # work with shapefiles library( sf ) # work with shapefiles - simple features format library( mclust ) # cluster analysis library( tmap ) # theme maps library( ggplot2 ) # graphing library( ggthemes ) # nice formats for ggplots library( dplyr ) # data wrangling library( pander ) # formatting RMD tables library( tidycensus ) library( cartogram ) # spatial maps w/ tract size bias reduction

library( maptools ) # spatial object manipulation

library( corrplot ) # correlation plots

PACKAGES

dashboard layout, widgets, and uploading

library( flexdashboard ) library( shiny ) library( rsconnect )

data wrangling

library( dplyr )

formatting output

library( DT ) library( pander ) library( knitr ) library( stargazer )

maps

library( ggmap ) library( leaflet ) library( viridis ) library( geojsonio ) library( sp ) library( sf ) library( tmap ) library( pals )

library( rgdal )

crosswalk <- read.csv( "https://raw.githubusercontent.com/DS4PS/cpp-529-master/master/data/cbsatocountycrosswalk.csv", stringsAsFactors=F, colClasses="character" )

grep( "^TUSCALOOSA, AL", crosswalk$msaname, value=TRUE )

these.msp <- crosswalk$msaname == grep( "^TUSCALOOSA, AL", crosswalk$msaname, value=TRUE )

these.fips <- crosswalk$fipscounty[ these.msp ] these.fips <- na.omit( these.fips )

set the api key

censusapikey( "d8d14b8cfd5b57dcffe5582cedf37a27c23749f6" )

state.fips <- substr( these.fips, 1, 2 ) county.fips <- substr( these.fips, 3, 5 )

cbind( these.fips, state.fips, county.fips ) %>% pander()

May need to install and load in pak

pak::pak("walkerke/tigris@ftp-patch")

tus.pop <- getacs( geography = "tract", variables = "B01003001", state = "01", county = "125", geometry = TRUE ) %>% select( GEOID, estimate ) %>% rename( POP=estimate )

Recode GEOID to match Census formatting (remove leading zero)

tus.pop$GEOID <- substr(tus.pop$GEOID, 2, nchar(tus.pop$GEOID))

Save the dataframe 'sd.pop' to an RData file:

save(sdd.pop, file = "sdd.pop.RData")

URL <- "https://github.com/DS4PS/cpp-529-master/raw/master/data/ltdbstd2010_sample.rds" census.dat <- readRDS(gzcon(url( URL )))

Merge the downloaded population data with the additional data

tus <- merge(tus.pop, census.dat, by.x="GEOID", by.y="tractid")

make sure there are no empty polygons

tus <- tus[ ! stisempty( tus ) , ]

Convert the simple features object (sf) to SpatialPolygonsDataFrame (sp)

tus.sp <- as_Spatial(tus)

Check the class of the object to confirm conversion

class(tus.sp)

Plot spatial data to visually inspect it

plot(tus.sp)

Transform map projection to Mercator (EPSG:3395) and remove empty or zero population tracts

tus.sp <- spTransform(tus.sp, CRS("+init=epsg:3395")) tus.sp <- tus.sp[tus.sp$POP != 0 & (!is.na(tus.sp$POP)), ]

Create Dorling cartogram (circles sized by population)

Weight (pop.w) is normalized by the maximum population

tus.sp$pop.w <- tus.sp$POP / 9000 # normalization factor tusdorling <- cartogramdorling(x = tus.sp, weight = "pop.w", k = 0.05)

Save Dorling cartogram to GeoJSON file (optional)

geojsonwrite(sddorling, file = "San_Diego.geojson", geometry = "polygon")

plot( tus_dorling )

tusdorlingsf <- stassf(tus_dorling)

tmshape( tusdorlingsf ) + tmpolygons( size="POP", col="hinc12", n=7, style="quantile", palette="Spectral" ) + tm_layout( "Dorling Cartogram \nof Household Income \nfor Tuscaloosa", title.position=c( "right","top" ) )

Save Dorling

data frame and polygon ID standardization in case a tract was dropped and IDs don't match

row.ids <- sapply( slot( tusdorling, "polygons" ), function(x) slot( x, "ID" ) ) row.names( tusdorling ) <- row.ids

project to standard lat-lon coordinate system

tusdorling <- spTransform( tusdorling, CRS("+proj=longlat +datum=WGS84") )

write to file

geojsonwrite( tusdorling, file="tus_dorling.geojson", geometry="polygon" ) ```

```{r, echo=FALSE, warning = FALSE, message = FALSE}

keep.these <- c("pnhwht12", "pnhblk12", "phisp12", "pntv12", "pfb12", "polang12", "phs12", "pcol12", "punemp12", "pflabf12", "pprof12", "pmanuf12", "pvet12", "psemp12", "hinc12", "incpc12", "ppov12", "pown12", "pvac12", "pmulti12", "mrent12", "mhmval12", "p30old12", "p10yrs12", "p18und12", "p60up12", "p75up12", "pmar12", "pwds12", "pfhh12")

d1 <- tus_dorling@data d2 <- select( d1, keep.these ) d3 <- apply( d2, 2, scale )

set.seed( 1234 ) fit <- Mclust( d3 ) tusdorlingsf$cluster <- as.factor( fit$classification ) summary( fit )

data.dictionary <- structure( list( LABEL = c( "pnhwht12", "pnhblk12", "phisp12", "pntv12", "pfb12", "polang12", "phs12", "pcol12", "punemp12", "pflabf12", "pprof12", "pmanuf12", "pvet12", "psemp12", "hinc12", "incpc12", "ppov12", "pown12", "pvac12", "pmulti12", "mrent12", "mhmval12", "p30old12", "p10yrs12", "p18und12", "p60up12", "p75up12", "pmar12", "pwds12", "pfhh12" ), VARIABLE = c( "Percent white, non-Hispanic", "Percent black, non-Hispanic", "Percent Hispanic", "Percent Native American race", "Percent foreign born", "Percent speaking other language at home, age 5 plus", "Percent with high school degree or less", "Percent with 4-year college degree or more", "Percent unemployed", "Percent female labor force participation", "Percent professional employees", "Percent manufacturing employees", "Percent veteran", "Percent self-employed", "Median HH income, total", "Per capita income", "Percent in poverty, total", "Percent owner-occupied units", "Percent vacant units", "Percent multi-family units", "Median rent", "Median home value", "Percent structures more than 30 years old", "Percent HH in neighborhood 10 years or less", "Percent 17 and under, total", "Percent 60 and older, total", "Percent 75 and older, total", "Percent currently married, not separated", "Percent widowed, divorced and separated", "Percent female-headed families with children" ) ), class = "data.frame", row.names = c( NA, -30L ) )

df.pct <- sapply( d2, ntile, 100 ) d4 <- as.data.frame( df.pct ) d4$cluster <- as.factor( paste0("GROUP-",fit$classification) )

cluster <- length( unique( fit$classification ) )

stats <- d4 %>% groupby( cluster ) %>% summariseeach( funs( mean ) )

t <- data.frame( t( stats ), stringsAsFactors=FALSE ) names( t ) <- paste0( "GROUP.", 1:cluster ) t <- t[-1,]

name individual cluster

cluster_names <- c("GROUP-1" = "Middle Ground", "GROUP-2" = "Old Homeowners", "GROUP-3" = "Young and Educated", "GROUP-4" = "Young and Female", "GROUP-5" = "Multi-Lingual Families", "GROUP-6" = "Black Homeowners")

intergrate into dataset

d4$clusternamed <- clusternames[d4$cluster]

for( i in 1:cluster ) { z <- t[,i] plot( rep(1,30), 1:30, bty="n", xlim=c(-75,100), type="n", xaxt="n", yaxt="n", xlab="Percentile", ylab="", main=cluster_names[paste0("GROUP-", i)] ) # Use named group labels abline( v=seq( 0, 100, 25 ), lty=3, lwd=1.5, col="gray90" ) segments( y0=1:30, x0=0, x1=100, col="gray70", lwd=2 ) text( -0.2, 1:30, data.dictionary$VARIABLE[-1], cex=0.85, pos=2 ) points( z, 1:30, pch=19, col="firebrick", cex=1.5 ) axis( side=1, at=c( 0,50,100 ), col.axis="gray", col="gray" ) }

```

```{r, echo=FALSE, warning = FALSE, message = FALSE} URL1 <- "https://github.com/DS4PS/cpp-529-fall-2020/raw/main/LABS/data/rodeo/LTDB-2000.rds" d1 <- readRDS( gzcon( url( URL1 ) ) )

URL2 <- "https://github.com/DS4PS/cpp-529-fall-2020/raw/main/LABS/data/rodeo/LTDB-2010.rds" d2 <- readRDS( gzcon( url( URL2 ) ) )

URLmd <- "https://github.com/DS4PS/cpp-529-fall-2020/raw/main/LABS/data/rodeo/LTDB-META-DATA.rds" md <- readRDS( gzcon( url( URLmd ) ) )

d1 <- select( d1, - year ) d2 <- select( d2, - year )

d <- merge( d1, d2, by="tractid" ) d <- merge( d, md, by="tractid" )

STANDARDIZE GEO IDs

note the current geoid format for the LTDB census data:

FIPS-STATE-COUNTY-TRACT: fips-01-001-020100

x <- d$tractid

head( x )

[1] "fips-01-001-020100" "fips-01-001-020200" "fips-01-001-020300"

[4] "fips-01-001-020400" "fips-01-001-020500" "fips-01-001-020600"

remove non-numeric strings

x <- gsub( "fips", "", x ) x <- gsub( "-", "", x )

head( x )

[1] "01001020100" "01001020200" "01001020300" "01001020400" "01001020500"

[6] "01001020600"

drop leading zeros

x <- as.numeric( x )

remember to add the variable back to the census dataset

d$tractid2 <- x

tus <- merge( tus, d, by.x="GEOID", by.y="tractid", all.x=T )

```

Part 4: Add median home value and change in median home value

```{r, echo=FALSE, warning = FALSE, message = FALSE}

clear the workspace

rm( list = ls() )

libraries

library( dplyr ) library( knitr ) library( pander ) library( stargazer ) library( scales )

set seed for reproducible results

set.seed( 1234 )

stargazer settings

s.type <- "text"

s.type <- "html"

helper function

panel.cor <- function(x, y, digits=2, prefix="", cex.cor) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- abs(cor(x, y)) txt <- format(c(r, 0.123456789), digits=digits)[1] txt <- paste(prefix, txt, sep="") if(missing(cex.cor)) cex <- 0.8/strwidth(txt)

test <- cor.test(x,y) # borrowed from printCoefmat Signif <- symnum(test$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("", "", "", ".", " "))

text(0.5, 0.5, txt, cex = 1.5 ) text(.7, .8, Signif, cex=cex, col=2) }

panel.smooth <- function (x, y, col = par("col"), bg = NA, pch = par("pch"), cex = 0.5, col.smooth = "red", span = 2/3, iter = 3, ...) { points(x, y, pch = 19, col = gray(0.7,0.2), bg = bg, cex = cex) ok <- is.finite(x) & is.finite(y) if (any(ok)) lines(stats::lowess(x[ok], y[ok], f = span, iter = iter), col = col.smooth, lwd=2, ...) }

custom plot

jplot <- function( x1, x2, lab1="", lab2="", draw.line=T, ... ) {

plot( x1, x2, pch=19, col=gray(0.6, alpha = 0.2), cex=0.5,
bty = "n", xlab=lab1, ylab=lab2, cex.lab=1.5, ... )

if( draw.line==T ){ ok <- is.finite(x1) & is.finite(x2) lines( lowess(x2[ok]~x1[ok]), col="red", lwd=3 ) }

}

URL1 <- "https://github.com/DS4PS/cpp-529-fall-2020/raw/main/LABS/data/rodeo/LTDB-2000.rds" d1 <- readRDS( gzcon( url( URL1 ) ) )

URL2 <- "https://github.com/DS4PS/cpp-529-fall-2020/raw/main/LABS/data/rodeo/LTDB-2010.rds" d2 <- readRDS( gzcon( url( URL2 ) ) )

URLmd <- "https://github.com/DS4PS/cpp-529-fall-2020/raw/main/LABS/data/rodeo/LTDB-META-DATA.rds" md <- readRDS( gzcon( url( URLmd ) ) )

d1 <- select( d1, - year ) d2 <- select( d2, - year )

d <- merge( d1, d2, by="tractid" ) d <- merge( d, md, by="tractid" )

d <- filter( d, urban == "urban" )

d <- select( d, tractid, mhmval00, mhmval12, unemp00, clf00, vac00,hu00, npov00, dpov00, cbsa, cbsaname )

d <- d %>% mutate( p.unemp = 100 * unemp00 / clf00, p.vacant = 100 * vac00 / hu00, pov.rate = 100 * npov00 / dpov00 )

adjust 2000 home values for inflation

mhv.00 <- d$mhmval00 * 1.28855
mhv.10 <- d$mhmval12

change in MHV in dollars

mhv.change <- mhv.10 - mhv.00

mhv.00[ mhv.00 < 1000 ] <- NA

change in MHV in percent

mhv.growth <- 100 * ( mhv.change / mhv.00 )

Omit cases with growth rates above 200%

mhv.growth[ mhv.growth > 200 ] <- NA

d$mhv.00 <- mhv.00 d$mhv.10 <- mhv.10 d$mhv.change <- mhv.change d$mhv.growth <- mhv.growth

average growth in median home value for the city

d <- d %>% group_by( cbsaname ) %>% mutate( metro.mhv.change = median( mhv.change, na.rm=T ), metro.mhv.growth = 100 * median( mhv.growth, na.rm=T ) ) %>% ungroup()

2000

hist( mhv.00, breaks=200, xlim=c(0,500000), col="gray20", border="white", axes=F, xlab="MHV", ylab="", main="Median Home Value in 2000 (2010 US dollars)" )

axis( side=1, at=seq(0,500000,100000), labels=c("$0","$100k","$200k","$300k","$400k","$500k") )

abline( v=median( mhv.00, na.rm=T ), col="orange", lwd=3 )

2010

hist( mhv.10, breaks=200, xlim=c( 0, max( mhv.10, na.rm = TRUE ) ), col="gray20", border="white", axes=F, xlab="MHV", ylab="", main="Median Home Value in 2010" )

axis( side=1, at=seq(0,1000000,250000), labels=c("$0","$250k","$500k","$750k","$1mil") )

abline( v=median( mhv.10, na.rm=T ), col="lightblue", lwd=3 )

hist( mhv.change/1000, breaks=500, xlim=c(-100,500), yaxt="n", xaxt="n", xlab="Thousand of US Dollars (adjusted to 2010)", cex.lab=1.5, ylab="", main="Change in Median Home Value 2000 to 2010", col="gray20", border="white" )

axis( side=1, at=seq( from=-100, to=500, by=100 ), labels=paste0( "$", seq( from=-100, to=500, by=100 ), "k" ) )

mean.x <- mean( mhv.change/1000, na.rm=T ) abline( v=mean.x, col="darkorange", lwd=2, lty=2 ) text( x=200, y=1500, labels=paste0( "Mean = ", dollar( round(1000*mean.x,0)) ), col="darkorange", cex=1.8, pos=3 )

median.x <- median( mhv.change/1000, na.rm=T ) abline( v=median.x, col="dodgerblue", lwd=2, lty=2 ) text( x=200, y=2000, labels=paste0( "Median = ", dollar( round(1000*median.x,0)) ), col="dodgerblue", cex=1.8, pos=3 )

hg <- hist( mhv.growth, breaks=5000, xlim=c(-100,200), yaxt="n", xaxt="n", xlab="", cex.main=1.5, ylab="", main="Growth in Home Value by\n Census Tract 2000 to 2010", col="gray40" )

axis( side=1, at=seq( from=-100, to=200, by=50 ), labels=paste0( seq( from=-100, to=200, by=50 ), "%" ) )

ymax <- max( hg$count )

mean.x <- mean( mhv.growth, na.rm=T ) abline( v=mean.x, col="firebrick", lwd=2, lty=2 ) text( x=75, y=(0.5*ymax), labels=paste0( "Mean = ", round(mean.x,0), "%"), col="firebrick", cex=1.2, pos=4 )

median.x <- median( mhv.growth, na.rm=T ) abline( v=median.x, col="darkgreen", lwd=2, lty=2 ) text( x=75, y=(0.6*ymax), labels=paste0( "Median = ", round(median.x,0), "%"), col="darkgreen", cex=1.2, pos=4 ) ```

Community Demographics

Inputs {.sidebar}

```{r} these.variables <- c( "pnhwht12", "pnhblk12", "phisp12", "pntv12", "pfb12", "polang12", "phs12", "pcol12", "punemp12", "pflabf12", "pprof12", "pmanuf12", "pvet12", "psemp12", "hinc12", "incpc12", "ppov12", "pown12", "pvac12", "pmulti12", "mrent12", "mhmval12", "p30old12", "p10yrs12", "p18und12", "p60up12", "p75up12", "pmar12", "pwds12", "pfhh12" )

Matching variable labels (descriptive names)

descriptive.labels <- c( "Percent white, non-Hispanic", "Percent black, non-Hispanic", "Percent Hispanic", "Percent Native American race", "Percent foreign born", "Percent speaking other language at home, age 5 plus", "Percent with high school degree or less", "Percent with 4-year college degree or more", "Percent unemployed", "Percent female labor force participation", "Percent professional employees", "Percent manufacturing employees", "Percent veteran", "Percent self-employed", "Median HH income, total", "Per capita income", "Percent in poverty, total", "Percent owner-occupied units", "Percent vacant units", "Percent multi-family units", "Median rent", "Median home value", "Percent structures more than 30 years old", "Percent HH in neighborhood 10 years or less", "Percent 17 and under, total", "Percent 60 and older, total", "Percent 75 and older, total", "Percent currently married, not separated", "Percent widowed, divorced and separated", "Percent female-headed families with children" )

Use descriptive labels as choiceNames and variable names as choiceValues

radioButtons( inputId = "demographics", label = h3("Census Variables"), choiceNames = descriptive.labels, choiceValues = these.variables, selected = "pnhwht12" )

```

Row {.tabset}

Choropleth Map

```{r}

tus <- geojsonread( "~/Community Analytics/tusdorling.geojson", what="sp" )

tus2 <- spTransform( tus, CRS("+init=epsg:3395") )

convert the sp map format to

an sf (simple features) format:

ggmap requires the sf format

tus.sf <- stassf( tus2 )

separate out the data frame from the map

d <- as.data.frame( tus.sf )

renderPlot({

split the selected variable into deciles

get_data <- reactive({ tus.sf <- tus.sf %>% mutate( q = ntile( get(input$demographics), 10 ) )
})

ggplot( getdata() ) + geomsf( aes( fill = q ), color=NA ) + coordsf( datum=NA ) + labs( title = paste0( "Choropleth of Select Demographics: ", toupper(input$demographics) ), caption = "Source: Harmonized Census Files", fill = "Population Deciles" ) + scalefill_gradientn( colours=rev(ocean.balance(10)), guide = "colourbar" ) + xlim( xmin = -12519146, xmax = -12421368 ) + ylim( ymin = 3899074, ymax = 3965924 )

})

```

Variable Distribution

```{r} renderPlot({

extract vector x from the data frame

x <- d[ "pnhwht12" ] %>% unlist()

getvariablex <- reactive({ d[ input$demographics ] })

x <- getvariablex() %>% unlist()

cut.points <- quantile( x, seq( 0, 1, 0.1 ) )

hist( x, breaks=50, col="gray", border="white", yaxt="n", main=paste0( "Histogram of variable ", toupper( input$demographics ) ), xlab="red lines represent decile cut points" )

abline( v=cut.points, col="darkred", lty=3, lwd=2 )

}) ```

Neighborhoods

Clusters

```{r}

define the bounding box corners

bb <- stbbox( c( xmin = -12519146, xmax = -12421368, ymax = 3965924, ymin = 3899074 ), crs = stcrs("+init=epsg:3395"))

apply labels

tus2$cluster <- as.character(tus2$cluster)

cluster names

cluster_names <- c(

"1" = "Middle Ground",

# "2" = "Old Homeowners", # "3" = "Young and Educated",

"4" = "Young and Female",

# "5" = "Multi-Lingual Families", # "6" = "Black Homeowners"

)

combine cluster and label

tus2$clusternamed <- clusternames[tus2$cluster]

tmap

renderTmap({

tmap_mode("view")

tm_basemap("CartoDB.Positron") +

# tmshape(tus2, bbox = bb) + # tmpolygons(col = "cluster_named", palette = "Accent", title = "Community Types")

})

```

NH Change 2000-2010

Inputs {.sidebar}

```{r}

button.labels <- c("Median Home Value 2000","Median Home Value 2010","Value Change 2000-2010","Growth in Home Value") button.values <- c("mhv.2000","mhv.2010","mhv.change","mhv.growth")

radioButtons( inputId="home.value", label = h3("Home Values"), # choices = these.variables, choiceNames=button.labels, choiceValues=button.values, selected="mhv.2000")

```

Row {.tabset}

Median Home Values

```{r}

renderPlot({

split the selected variable into deciles

get_data <- reactive({ phx.sf <- phx.sf %>% mutate( q = ntile( get(input$home.value), 10 ) )
})

ggplot( getdata() ) + geomsf( aes( fill = q ), color=NA ) + coordsf( datum=NA ) + labs( title = paste0( "Spatial Distribution of Home Values: ", toupper(input$demographics) ), caption = "Source: Harmonized Census Files", fill = "Home Value Deciles" ) + scalefill_gradientn( colours=rev(ocean.balance(10)), guide = "colourbar" ) + xlim( xmin = -12519146, xmax = -12421368 ) + ylim( ymin = 3899074, ymax = 3965924 )

})

```

Variable Distribution

```{r} renderPlot({

extract vector x from the data frame

x <- d[ "pnhwht12" ] %>% unlist()

getvariablex <- reactive({ d[ input$home.value ] })

x <- getvariablex() %>% unlist() %>% as.numeric()

cut.points <- quantile( x, seq( 0, 1, 0.1 ) )

hist( x, breaks=50, col="gray", border="white", yaxt="n", main=paste0( "Histogram of ", toupper( input$home.value ) ), xlab="red lines represent decile cut points" )

abline( v=cut.points, col="darkred", lty=3, lwd=2 )

}) ```

Drivers of Change

Inputs {.sidebar}

```{r}

button.labels <- c("Median Home Value 2000","Median Home Value 2010","Value Change 2000-2010","Growth in Home Value") button.values <- c("mhv.2000","mhv.2010","mhv.change","mhv.growth")

radioButtons( inputId="dv", label = h3("Select Your Dependent Variable"), choiceNames=button.labels, choiceValues=button.values, selected="mhv.change")

covariates <- c("pnhwht12", "pnhblk12", "phisp12", "pntv12", "pfb12", "polang12", "phs12", "pcol12", "punemp12", "pflabf12", "pprof12", "pmanuf12", "pvet12", "psemp12", "hinc12", "incpc12", "ppov12", "pown12", "pvac12", "pmulti12", "mrent12", "mhmval12", "p30old12", "p10yrs12", "p18und12", "p60up12", "p75up12", "pmar12", "pwds12", "pfhh12")

covariate.labels <- c( ... )

checkboxGroupInput( inputId="covariates", label = h3("Select Variables for Your Model"), choices = covariates, # choiceNames=covariate.labels, # choiceValues=covariates, selected=c("pnhwht12","pprof12","pvac12") )

```

Row {.tabset}

Predicting Change

```{r, results="asis"}

RUNNING A REGRESSION WITH USER INPUTS

create a formula object

by constructing the formula from user selections

as a string then casting as a formula object

x.s <- c("x1","x2","x3" )

formula.text <- paste( "y", " ~ ", paste0( x.s, collapse=" + ") )

formula.object <- as.formula( formula.text )

lm( formula.object, data=d )

make sure all variables are in d

check boxes return vectors

get_covariates <- reactive({ input$covariates })

renderUI({

covariates <- get_covariates()

formula.text <- paste0( "mhmval12", " ~ ", paste( covariates, collapse=" + " ) ) fo <- as.formula( formula.text )

m <- lm( fo, data=d )

HTML( "
" )

HTML(

c("


", "

", stargazer( m, type="html", omit.stat=c("rsq","f") ), "
", "


" )

)

})

HTML( reg.table )

```

Correlation Plots

{r} pairs( iris )