title: "Models of Neighborhood Change in TUSCALOOSA" output: flexdashboard::flex_dashboard: theme: bootstrap source: embed smart: false runtime: shiny ---
```{r, echo=FALSE, warning = FALSE, message = FALSE}
library( geojsonio ) # read shapefiles library( sp ) # work with shapefiles library( sf ) # work with shapefiles - simple features format library( mclust ) # cluster analysis library( tmap ) # theme maps library( ggplot2 ) # graphing library( ggthemes ) # nice formats for ggplots library( dplyr ) # data wrangling library( pander ) # formatting RMD tables library( tidycensus ) library( cartogram ) # spatial maps w/ tract size bias reduction
library( corrplot ) # correlation plots
library( flexdashboard ) library( shiny ) library( rsconnect )
library( dplyr )
library( DT ) library( pander ) library( knitr ) library( stargazer )
library( ggmap ) library( leaflet ) library( viridis ) library( geojsonio ) library( sp ) library( sf ) library( tmap ) library( pals )
crosswalk <- read.csv( "https://raw.githubusercontent.com/DS4PS/cpp-529-master/master/data/cbsatocountycrosswalk.csv", stringsAsFactors=F, colClasses="character" )
grep( "^TUSCALOOSA, AL", crosswalk$msaname, value=TRUE )
these.msp <- crosswalk$msaname == grep( "^TUSCALOOSA, AL", crosswalk$msaname, value=TRUE )
these.fips <- crosswalk$fipscounty[ these.msp ] these.fips <- na.omit( these.fips )
censusapikey( "d8d14b8cfd5b57dcffe5582cedf37a27c23749f6" )
state.fips <- substr( these.fips, 1, 2 ) county.fips <- substr( these.fips, 3, 5 )
cbind( these.fips, state.fips, county.fips ) %>% pander()
tus.pop <- getacs( geography = "tract", variables = "B01003001", state = "01", county = "125", geometry = TRUE ) %>% select( GEOID, estimate ) %>% rename( POP=estimate )
tus.pop$GEOID <- substr(tus.pop$GEOID, 2, nchar(tus.pop$GEOID))
URL <- "https://github.com/DS4PS/cpp-529-master/raw/master/data/ltdbstd2010_sample.rds" census.dat <- readRDS(gzcon(url( URL )))
tus <- merge(tus.pop, census.dat, by.x="GEOID", by.y="tractid")
tus <- tus[ ! stisempty( tus ) , ]
tus.sp <- as_Spatial(tus)
class(tus.sp)
plot(tus.sp)
tus.sp <- spTransform(tus.sp, CRS("+init=epsg:3395")) tus.sp <- tus.sp[tus.sp$POP != 0 & (!is.na(tus.sp$POP)), ]
tus.sp$pop.w <- tus.sp$POP / 9000 # normalization factor tusdorling <- cartogramdorling(x = tus.sp, weight = "pop.w", k = 0.05)
plot( tus_dorling )
tusdorlingsf <- stassf(tus_dorling)
tmshape( tusdorlingsf ) + tmpolygons( size="POP", col="hinc12", n=7, style="quantile", palette="Spectral" ) + tm_layout( "Dorling Cartogram \nof Household Income \nfor Tuscaloosa", title.position=c( "right","top" ) )
row.ids <- sapply( slot( tusdorling, "polygons" ), function(x) slot( x, "ID" ) ) row.names( tusdorling ) <- row.ids
tusdorling <- spTransform( tusdorling, CRS("+proj=longlat +datum=WGS84") )
geojsonwrite( tusdorling, file="tus_dorling.geojson", geometry="polygon" ) ```
```{r, echo=FALSE, warning = FALSE, message = FALSE}
keep.these <- c("pnhwht12", "pnhblk12", "phisp12", "pntv12", "pfb12", "polang12", "phs12", "pcol12", "punemp12", "pflabf12", "pprof12", "pmanuf12", "pvet12", "psemp12", "hinc12", "incpc12", "ppov12", "pown12", "pvac12", "pmulti12", "mrent12", "mhmval12", "p30old12", "p10yrs12", "p18und12", "p60up12", "p75up12", "pmar12", "pwds12", "pfhh12")
d1 <- tus_dorling@data d2 <- select( d1, keep.these ) d3 <- apply( d2, 2, scale )
set.seed( 1234 ) fit <- Mclust( d3 ) tusdorlingsf$cluster <- as.factor( fit$classification ) summary( fit )
data.dictionary <- structure( list( LABEL = c( "pnhwht12", "pnhblk12", "phisp12", "pntv12", "pfb12", "polang12", "phs12", "pcol12", "punemp12", "pflabf12", "pprof12", "pmanuf12", "pvet12", "psemp12", "hinc12", "incpc12", "ppov12", "pown12", "pvac12", "pmulti12", "mrent12", "mhmval12", "p30old12", "p10yrs12", "p18und12", "p60up12", "p75up12", "pmar12", "pwds12", "pfhh12" ), VARIABLE = c( "Percent white, non-Hispanic", "Percent black, non-Hispanic", "Percent Hispanic", "Percent Native American race", "Percent foreign born", "Percent speaking other language at home, age 5 plus", "Percent with high school degree or less", "Percent with 4-year college degree or more", "Percent unemployed", "Percent female labor force participation", "Percent professional employees", "Percent manufacturing employees", "Percent veteran", "Percent self-employed", "Median HH income, total", "Per capita income", "Percent in poverty, total", "Percent owner-occupied units", "Percent vacant units", "Percent multi-family units", "Median rent", "Median home value", "Percent structures more than 30 years old", "Percent HH in neighborhood 10 years or less", "Percent 17 and under, total", "Percent 60 and older, total", "Percent 75 and older, total", "Percent currently married, not separated", "Percent widowed, divorced and separated", "Percent female-headed families with children" ) ), class = "data.frame", row.names = c( NA, -30L ) )
df.pct <- sapply( d2, ntile, 100 ) d4 <- as.data.frame( df.pct ) d4$cluster <- as.factor( paste0("GROUP-",fit$classification) )
cluster <- length( unique( fit$classification ) )
stats <- d4 %>% groupby( cluster ) %>% summariseeach( funs( mean ) )
t <- data.frame( t( stats ), stringsAsFactors=FALSE ) names( t ) <- paste0( "GROUP.", 1:cluster ) t <- t[-1,]
cluster_names <- c("GROUP-1" = "Middle Ground", "GROUP-2" = "Old Homeowners", "GROUP-3" = "Young and Educated", "GROUP-4" = "Young and Female", "GROUP-5" = "Multi-Lingual Families", "GROUP-6" = "Black Homeowners")
d4$clusternamed <- clusternames[d4$cluster]
for( i in 1:cluster ) { z <- t[,i] plot( rep(1,30), 1:30, bty="n", xlim=c(-75,100), type="n", xaxt="n", yaxt="n", xlab="Percentile", ylab="", main=cluster_names[paste0("GROUP-", i)] ) # Use named group labels abline( v=seq( 0, 100, 25 ), lty=3, lwd=1.5, col="gray90" ) segments( y0=1:30, x0=0, x1=100, col="gray70", lwd=2 ) text( -0.2, 1:30, data.dictionary$VARIABLE[-1], cex=0.85, pos=2 ) points( z, 1:30, pch=19, col="firebrick", cex=1.5 ) axis( side=1, at=c( 0,50,100 ), col.axis="gray", col="gray" ) }
```
```{r, echo=FALSE, warning = FALSE, message = FALSE} URL1 <- "https://github.com/DS4PS/cpp-529-fall-2020/raw/main/LABS/data/rodeo/LTDB-2000.rds" d1 <- readRDS( gzcon( url( URL1 ) ) )
URL2 <- "https://github.com/DS4PS/cpp-529-fall-2020/raw/main/LABS/data/rodeo/LTDB-2010.rds" d2 <- readRDS( gzcon( url( URL2 ) ) )
URLmd <- "https://github.com/DS4PS/cpp-529-fall-2020/raw/main/LABS/data/rodeo/LTDB-META-DATA.rds" md <- readRDS( gzcon( url( URLmd ) ) )
d1 <- select( d1, - year ) d2 <- select( d2, - year )
d <- merge( d1, d2, by="tractid" ) d <- merge( d, md, by="tractid" )
x <- d$tractid
x <- gsub( "fips", "", x ) x <- gsub( "-", "", x )
x <- as.numeric( x )
d$tractid2 <- x
tus <- merge( tus, d, by.x="GEOID", by.y="tractid", all.x=T )
```
```{r, echo=FALSE, warning = FALSE, message = FALSE}
rm( list = ls() )
library( dplyr ) library( knitr ) library( pander ) library( stargazer ) library( scales )
set.seed( 1234 )
s.type <- "text"
panel.cor <- function(x, y, digits=2, prefix="", cex.cor) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- abs(cor(x, y)) txt <- format(c(r, 0.123456789), digits=digits)[1] txt <- paste(prefix, txt, sep="") if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
test <- cor.test(x,y) # borrowed from printCoefmat Signif <- symnum(test$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("", "", "", ".", " "))
text(0.5, 0.5, txt, cex = 1.5 ) text(.7, .8, Signif, cex=cex, col=2) }
panel.smooth <- function (x, y, col = par("col"), bg = NA, pch = par("pch"), cex = 0.5, col.smooth = "red", span = 2/3, iter = 3, ...) { points(x, y, pch = 19, col = gray(0.7,0.2), bg = bg, cex = cex) ok <- is.finite(x) & is.finite(y) if (any(ok)) lines(stats::lowess(x[ok], y[ok], f = span, iter = iter), col = col.smooth, lwd=2, ...) }
jplot <- function( x1, x2, lab1="", lab2="", draw.line=T, ... ) {
plot( x1, x2, pch=19, col=gray(0.6, alpha = 0.2), cex=0.5,
bty = "n", xlab=lab1, ylab=lab2, cex.lab=1.5, ... )
if( draw.line==T ){ ok <- is.finite(x1) & is.finite(x2) lines( lowess(x2[ok]~x1[ok]), col="red", lwd=3 ) }
}
URL1 <- "https://github.com/DS4PS/cpp-529-fall-2020/raw/main/LABS/data/rodeo/LTDB-2000.rds" d1 <- readRDS( gzcon( url( URL1 ) ) )
URL2 <- "https://github.com/DS4PS/cpp-529-fall-2020/raw/main/LABS/data/rodeo/LTDB-2010.rds" d2 <- readRDS( gzcon( url( URL2 ) ) )
URLmd <- "https://github.com/DS4PS/cpp-529-fall-2020/raw/main/LABS/data/rodeo/LTDB-META-DATA.rds" md <- readRDS( gzcon( url( URLmd ) ) )
d1 <- select( d1, - year ) d2 <- select( d2, - year )
d <- merge( d1, d2, by="tractid" ) d <- merge( d, md, by="tractid" )
d <- filter( d, urban == "urban" )
d <- select( d, tractid, mhmval00, mhmval12, unemp00, clf00, vac00,hu00, npov00, dpov00, cbsa, cbsaname )
d <- d %>% mutate( p.unemp = 100 * unemp00 / clf00, p.vacant = 100 * vac00 / hu00, pov.rate = 100 * npov00 / dpov00 )
mhv.00 <- d$mhmval00 * 1.28855
mhv.10 <- d$mhmval12
mhv.change <- mhv.10 - mhv.00
mhv.00[ mhv.00 < 1000 ] <- NA
mhv.growth <- 100 * ( mhv.change / mhv.00 )
mhv.growth[ mhv.growth > 200 ] <- NA
d$mhv.00 <- mhv.00 d$mhv.10 <- mhv.10 d$mhv.change <- mhv.change d$mhv.growth <- mhv.growth
d <- d %>% group_by( cbsaname ) %>% mutate( metro.mhv.change = median( mhv.change, na.rm=T ), metro.mhv.growth = 100 * median( mhv.growth, na.rm=T ) ) %>% ungroup()
hist( mhv.00, breaks=200, xlim=c(0,500000), col="gray20", border="white", axes=F, xlab="MHV", ylab="", main="Median Home Value in 2000 (2010 US dollars)" )
axis( side=1, at=seq(0,500000,100000), labels=c("$0","$100k","$200k","$300k","$400k","$500k") )
abline( v=median( mhv.00, na.rm=T ), col="orange", lwd=3 )
hist( mhv.10, breaks=200, xlim=c( 0, max( mhv.10, na.rm = TRUE ) ), col="gray20", border="white", axes=F, xlab="MHV", ylab="", main="Median Home Value in 2010" )
axis( side=1, at=seq(0,1000000,250000), labels=c("$0","$250k","$500k","$750k","$1mil") )
abline( v=median( mhv.10, na.rm=T ), col="lightblue", lwd=3 )
hist( mhv.change/1000, breaks=500, xlim=c(-100,500), yaxt="n", xaxt="n", xlab="Thousand of US Dollars (adjusted to 2010)", cex.lab=1.5, ylab="", main="Change in Median Home Value 2000 to 2010", col="gray20", border="white" )
axis( side=1, at=seq( from=-100, to=500, by=100 ), labels=paste0( "$", seq( from=-100, to=500, by=100 ), "k" ) )
mean.x <- mean( mhv.change/1000, na.rm=T ) abline( v=mean.x, col="darkorange", lwd=2, lty=2 ) text( x=200, y=1500, labels=paste0( "Mean = ", dollar( round(1000*mean.x,0)) ), col="darkorange", cex=1.8, pos=3 )
median.x <- median( mhv.change/1000, na.rm=T ) abline( v=median.x, col="dodgerblue", lwd=2, lty=2 ) text( x=200, y=2000, labels=paste0( "Median = ", dollar( round(1000*median.x,0)) ), col="dodgerblue", cex=1.8, pos=3 )
hg <- hist( mhv.growth, breaks=5000, xlim=c(-100,200), yaxt="n", xaxt="n", xlab="", cex.main=1.5, ylab="", main="Growth in Home Value by\n Census Tract 2000 to 2010", col="gray40" )
axis( side=1, at=seq( from=-100, to=200, by=50 ), labels=paste0( seq( from=-100, to=200, by=50 ), "%" ) )
ymax <- max( hg$count )
mean.x <- mean( mhv.growth, na.rm=T ) abline( v=mean.x, col="firebrick", lwd=2, lty=2 ) text( x=75, y=(0.5*ymax), labels=paste0( "Mean = ", round(mean.x,0), "%"), col="firebrick", cex=1.2, pos=4 )
median.x <- median( mhv.growth, na.rm=T ) abline( v=median.x, col="darkgreen", lwd=2, lty=2 ) text( x=75, y=(0.6*ymax), labels=paste0( "Median = ", round(median.x,0), "%"), col="darkgreen", cex=1.2, pos=4 ) ```
```{r} these.variables <- c( "pnhwht12", "pnhblk12", "phisp12", "pntv12", "pfb12", "polang12", "phs12", "pcol12", "punemp12", "pflabf12", "pprof12", "pmanuf12", "pvet12", "psemp12", "hinc12", "incpc12", "ppov12", "pown12", "pvac12", "pmulti12", "mrent12", "mhmval12", "p30old12", "p10yrs12", "p18und12", "p60up12", "p75up12", "pmar12", "pwds12", "pfhh12" )
descriptive.labels <- c( "Percent white, non-Hispanic", "Percent black, non-Hispanic", "Percent Hispanic", "Percent Native American race", "Percent foreign born", "Percent speaking other language at home, age 5 plus", "Percent with high school degree or less", "Percent with 4-year college degree or more", "Percent unemployed", "Percent female labor force participation", "Percent professional employees", "Percent manufacturing employees", "Percent veteran", "Percent self-employed", "Median HH income, total", "Per capita income", "Percent in poverty, total", "Percent owner-occupied units", "Percent vacant units", "Percent multi-family units", "Median rent", "Median home value", "Percent structures more than 30 years old", "Percent HH in neighborhood 10 years or less", "Percent 17 and under, total", "Percent 60 and older, total", "Percent 75 and older, total", "Percent currently married, not separated", "Percent widowed, divorced and separated", "Percent female-headed families with children" )
choiceNames and variable
names as choiceValuesradioButtons( inputId = "demographics", label = h3("Census Variables"), choiceNames = descriptive.labels, choiceValues = these.variables, selected = "pnhwht12" )
```
```{r}
tus <- geojsonread( "~/Community Analytics/tusdorling.geojson", what="sp" )
tus2 <- spTransform( tus, CRS("+init=epsg:3395") )
tus.sf <- stassf( tus2 )
d <- as.data.frame( tus.sf )
renderPlot({
get_data <- reactive({ tus.sf <- tus.sf %>% mutate( q =
ntile( get(input$demographics), 10 ) )
})
ggplot( getdata() ) + geomsf( aes( fill = q ), color=NA ) + coordsf( datum=NA ) + labs( title = paste0( "Choropleth of Select Demographics: ", toupper(input$demographics) ), caption = "Source: Harmonized Census Files", fill = "Population Deciles" ) + scalefill_gradientn( colours=rev(ocean.balance(10)), guide = "colourbar" ) + xlim( xmin = -12519146, xmax = -12421368 ) + ylim( ymin = 3899074, ymax = 3965924 )
})
```
```{r} renderPlot({
x <- d[ "pnhwht12" ] %>% unlist()
getvariablex <- reactive({ d[ input$demographics ] })
x <- getvariablex() %>% unlist()
cut.points <- quantile( x, seq( 0, 1, 0.1 ) )
hist( x, breaks=50, col="gray", border="white", yaxt="n", main=paste0( "Histogram of variable ", toupper( input$demographics ) ), xlab="red lines represent decile cut points" )
abline( v=cut.points, col="darkred", lty=3, lwd=2 )
}) ```
```{r}
bb <- stbbox( c( xmin = -12519146, xmax = -12421368, ymax = 3965924, ymin = 3899074 ), crs = stcrs("+init=epsg:3395"))
# "2" = "Old Homeowners", # "3" = "Young and Educated",
# "5" = "Multi-Lingual Families", # "6" = "Black Homeowners"
# tmshape(tus2, bbox = bb) + # tmpolygons(col = "cluster_named", palette = "Accent", title = "Community Types")
```
```{r}
button.labels <- c("Median Home Value 2000","Median Home Value 2010","Value Change 2000-2010","Growth in Home Value") button.values <- c("mhv.2000","mhv.2010","mhv.change","mhv.growth")
radioButtons( inputId="home.value", label = h3("Home Values"), # choices = these.variables, choiceNames=button.labels, choiceValues=button.values, selected="mhv.2000")
```
```{r}
renderPlot({
get_data <- reactive({ phx.sf <- phx.sf %>% mutate( q =
ntile( get(input$home.value), 10 ) )
})
ggplot( getdata() ) + geomsf( aes( fill = q ), color=NA ) + coordsf( datum=NA ) + labs( title = paste0( "Spatial Distribution of Home Values: ", toupper(input$demographics) ), caption = "Source: Harmonized Census Files", fill = "Home Value Deciles" ) + scalefill_gradientn( colours=rev(ocean.balance(10)), guide = "colourbar" ) + xlim( xmin = -12519146, xmax = -12421368 ) + ylim( ymin = 3899074, ymax = 3965924 )
})
```
```{r} renderPlot({
getvariablex <- reactive({ d[ input$home.value ] })
x <- getvariablex() %>% unlist() %>% as.numeric()
cut.points <- quantile( x, seq( 0, 1, 0.1 ) )
hist( x, breaks=50, col="gray", border="white", yaxt="n", main=paste0( "Histogram of ", toupper( input$home.value ) ), xlab="red lines represent decile cut points" )
abline( v=cut.points, col="darkred", lty=3, lwd=2 )
}) ```
```{r}
button.labels <- c("Median Home Value 2000","Median Home Value 2010","Value Change 2000-2010","Growth in Home Value") button.values <- c("mhv.2000","mhv.2010","mhv.change","mhv.growth")
radioButtons( inputId="dv", label = h3("Select Your Dependent Variable"), choiceNames=button.labels, choiceValues=button.values, selected="mhv.change")
covariates <- c("pnhwht12", "pnhblk12", "phisp12", "pntv12", "pfb12", "polang12", "phs12", "pcol12", "punemp12", "pflabf12", "pprof12", "pmanuf12", "pvet12", "psemp12", "hinc12", "incpc12", "ppov12", "pown12", "pvac12", "pmulti12", "mrent12", "mhmval12", "p30old12", "p10yrs12", "p18und12", "p60up12", "p75up12", "pmar12", "pwds12", "pfhh12")
checkboxGroupInput( inputId="covariates", label = h3("Select Variables for Your Model"), choices = covariates, # choiceNames=covariate.labels, # choiceValues=covariates, selected=c("pnhwht12","pprof12","pvac12") )
```
```{r, results="asis"}
get_covariates <- reactive({ input$covariates })
renderUI({
covariates <- get_covariates()
formula.text <- paste0( "mhmval12", " ~ ", paste( covariates, collapse=" + " ) ) fo <- as.formula( formula.text )
m <- lm( fo, data=d )
HTML(
c("
",
"
)
})
```
{r} pairs( iris )