San Antonio is one of the fastest growing cities in Texas. Between 2015 and 2020 the SA metro area added 300,000 new residents reaching a total of 2.6 million, and is expected to gain an additional 1 million residents by 2035 and another million by 2050 reaching a total of 4.4 million residents1. With population growth, an increased demand for land is expected which will in turn create neighborhood change pressures in more desirable locations throughout the SA metro area. [context]
Population growth –(+)–> Housing Demand –(+)–> Housing Prices –(+)–> Neighborhood Change
Assessing neighborhood change is key identify sings of potential gentrification and displacement and ultimately the loss of cultural value in urban communities. This research main question is:
Population growth –(+)–> Income –(+)–> Housing Demand –(+)–> Housing Prices –(+)–> Neighborhood Change
This empirical exercise will test and operationalize the following hypothesis:
This research will use American Community Survey (ACS) data from the
United States Census Bureau gathered using the tidycensus R
Package. A census tract will be categorized as having disproportionate
growth if it’s growth value is over the county median value. Each of the
hypotheses previously stated will be operationalized as:
B25077_001 variable (Median Home Value in Dollars).#downloading data using ACS API
bexar_homevalue_17 <- get_acs(geography = "tract", variables = "B25077_001",
state = "TX", county = "Bexar", geometry = TRUE,year = 2017)
bexar_homevalue_22 <- get_acs(geography = "tract", variables = "B25077_001",
state = "TX", county = "Bexar", geometry = FALSE,year = 2022)
bexar_medianincome_17 <- get_acs(geography = "tract", variables = "B25099_001",
state = "TX", county = "Bexar", geometry = TRUE,year = 2017)
bexar_medianincome_22 <- get_acs(geography = "tract", variables = "B25099_001",
state = "TX", county = "Bexar", geometry = FALSE,year = 2022)
Disproportionate home value growth is calculated as:
\[ Disp_{c,t_f-t_i} =(\frac{value_{c,t_f}}{value_{c,t_i}})-1 \] where for any census tract \(c\), percent change values are calculated using the final year \(t_f\) and the initial year \(t_i\) values of the same variable (\(value\))
#Fixing variable names
names(bexar_homevalue_17)[names(bexar_homevalue_17)%in%c("estimate","moe")] <-c("estimate_mhv_17","moe_mhv_17")
names(bexar_homevalue_22)[names(bexar_homevalue_22)%in%c("estimate","moe")] <-c("estimate_mhv_22","moe_mhv_22")
names(bexar_medianincome_17)[names(bexar_medianincome_17)%in%c("estimate","moe")] <-c("estimate_mhi_17","moe_mhi_17")
names(bexar_medianincome_22)[names(bexar_medianincome_22)%in%c("estimate","moe")] <-c("estimate_mhi_22","moe_mhi_22")
#Merging data
bexar_mhv<-merge(bexar_homevalue_17,bexar_homevalue_22,by="GEOID",sort = F)
bexar_mhi<-merge(bexar_medianincome_17,bexar_medianincome_22,by="GEOID",sort = F)
#Calculating the percentage change
bexar_mhv$mhv_per_change<-round(((bexar_mhv$estimate_mhv_22/bexar_mhv$estimate_mhv_17)-1),2)
bexar_mhi$mhi_per_change<-round(((bexar_mhi$estimate_mhi_22/bexar_mhi$estimate_mhi_17)-1),2)
#Calculating the indicator variable of which neighborhood is disproportionate (over the median of the county) or not
county_median_mhv<-quantile(bexar_mhv$mhv_per_change,0.5,na.rm=T)
county_median_mhi<-quantile(bexar_mhi$mhi_per_change,0.5,na.rm=T)
bexar_mhv$disp_mhv_pc<-as.numeric(bexar_mhv$mhv_per_change>=county_median_mhv)
bexar_mhi$disp_mhi_pc<-as.numeric(bexar_mhi$mhi_per_change>=county_median_mhi)
Descriptive Statistic Table for Median Home Values (mhv) in Bexar County
`mhv 2017`<-summary(bexar_mhv$estimate_mhv_17) # 2017 summary statistics
`mhv 2022`<-summary(bexar_mhv$estimate_mhv_22) # 2022 summary statistics
`Per. Change mhv` <-summary(bexar_mhv$mhv_per_change)
`mhi 2017`<-summary(bexar_mhi$estimate_mhi_17) # 2017 summary statistics
`mhi 2022`<-summary(bexar_mhi$estimate_mhi_22) # 2022 summary statistics
`Per. Change mhi` <-summary(bexar_mhv$mhi_per_change)
rbind(`mhv 2017`,`mhv 2022`,`Per. Change mhv`)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## mhv 2017 46400.0 7.865e+04 1.211e+05 1.481041e+05 177850.00 675600.00
## mhv 2022 66800.0 1.333e+05 1.908e+05 2.205041e+05 270750.00 845400.00
## Per. Change mhv -0.2 4.125e-01 5.150e-01 5.659942e-01 0.67 3.82
## NA's
## mhv 2017 8
## mhv 2022 9
## Per. Change mhv 9
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
bexar_data<-st_join(bexar_mhv,bexar_mhi)
library(classInt)
#defining breaks
bbins_mhv<-classIntervals(var = bexar_mhv$mhv_per_change,n = 5,style = "jenks")
bbins_mhi<-classIntervals(var = bexar_mhi$mhi_per_change,n = 5,style = "jenks")
library(mapview)
mapviewOptions(fgb = F)
mapview(bexar_mhv,zcol="mhv_per_change",at=bbins_mhv$brks)
mapview(bexar_mhi,zcol="mhi_per_change",at=bbins_mhi$brks)
library(graphics)
boxplot(bexar_data$mhi_per_change, bexar_data$disp_mhi_pc)
# Chi-square test
library(data.table)
library(foreign)
disp_table<-table(bexar_data$disp_mhi_pc,bexar_data$disp_mhv_pc)
disp.cs<-chisq.test(x = bexar_data$disp_mhi_pc, y = bexar_data$disp_mhv_pc,correct = F)
disp.cs
##
## Pearson's Chi-squared test
##
## data: bexar_data$disp_mhi_pc and bexar_data$disp_mhv_pc
## X-squared = 4.8052, df = 1, p-value = 0.02837
# the p-value is less than 0.05 so we can reject the Null Hypothesis so there is an association between the number of census tracts classified as having disproportionate income growth and disproportionate home value growth
# Cramer's V for a Chi-Square test
library(rcompanion)
disp_table2<-table(bexar_data$disp_mhi_pc,bexar_data$mhv_per_change)
cramerV(disp_table2)
## Cramer V
## 0.1926
#There is a small association between those variables
#Spearman correlation
cor.test(bexar_data$mhv_per_change, bexar_data$mhi_per_change, method="spearman")
## Warning in cor.test.default(bexar_data$mhv_per_change,
## bexar_data$mhi_per_change, : Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: bexar_data$mhv_per_change and bexar_data$mhi_per_change
## S = 1985762504, p-value = 0.0004466
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.07249136
#p value is less than 0.05 so there is some correlation between the variables
#ANOVA
CD<-read.csv("/Users/gabbyrodriguez/myrepo/midterm-exam-gabrielle-rodriguezbailon/data/council_districts.csv")
colnames(bexar_data)[1]="GEOID"
bexar_data<-merge(bexar_data,CD,by="GEOID",sort = T)
library(tidyr)
bexar_data %>%
drop_na(District)
## Simple feature collection with 3744 features and 24 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -98.80593 ymin: 29.11444 xmax: -98.18632 ymax: 29.76071
## Geodetic CRS: NAD83
## First 10 features:
## GEOID NAME.x.x variable.x.x
## 1 48029110100 Census Tract 1101, Bexar County, Texas B25077_001
## 2 48029110100 Census Tract 1101, Bexar County, Texas B25077_001
## 3 48029110100 Census Tract 1101, Bexar County, Texas B25077_001
## 4 48029110100 Census Tract 1101, Bexar County, Texas B25077_001
## 5 48029110100 Census Tract 1101, Bexar County, Texas B25077_001
## 6 48029110100 Census Tract 1101, Bexar County, Texas B25077_001
## 7 48029110100 Census Tract 1101, Bexar County, Texas B25077_001
## 8 48029110100 Census Tract 1101, Bexar County, Texas B25077_001
## 9 48029110100 Census Tract 1101, Bexar County, Texas B25077_001
## 10 48029110100 Census Tract 1101, Bexar County, Texas B25077_001
## estimate_mhv_17 moe_mhv_17 NAME.y.x
## 1 343500 67090 Census Tract 1101; Bexar County; Texas
## 2 343500 67090 Census Tract 1101; Bexar County; Texas
## 3 343500 67090 Census Tract 1101; Bexar County; Texas
## 4 343500 67090 Census Tract 1101; Bexar County; Texas
## 5 343500 67090 Census Tract 1101; Bexar County; Texas
## 6 343500 67090 Census Tract 1101; Bexar County; Texas
## 7 343500 67090 Census Tract 1101; Bexar County; Texas
## 8 343500 67090 Census Tract 1101; Bexar County; Texas
## 9 343500 67090 Census Tract 1101; Bexar County; Texas
## 10 343500 67090 Census Tract 1101; Bexar County; Texas
## variable.y.x estimate_mhv_22 moe_mhv_22 mhv_per_change disp_mhv_pc
## 1 B25077_001 590300 68863 0.72 1
## 2 B25077_001 590300 68863 0.72 1
## 3 B25077_001 590300 68863 0.72 1
## 4 B25077_001 590300 68863 0.72 1
## 5 B25077_001 590300 68863 0.72 1
## 6 B25077_001 590300 68863 0.72 1
## 7 B25077_001 590300 68863 0.72 1
## 8 B25077_001 590300 68863 0.72 1
## 9 B25077_001 590300 68863 0.72 1
## 10 B25077_001 590300 68863 0.72 1
## GEOID.y NAME.x.y variable.x.y
## 1 48029110700 Census Tract 1107, Bexar County, Texas B25099_001
## 2 48029110700 Census Tract 1107, Bexar County, Texas B25099_001
## 3 48029110700 Census Tract 1107, Bexar County, Texas B25099_001
## 4 48029192100 Census Tract 1921, Bexar County, Texas B25099_001
## 5 48029192100 Census Tract 1921, Bexar County, Texas B25099_001
## 6 48029192100 Census Tract 1921, Bexar County, Texas B25099_001
## 7 48029110600 Census Tract 1106, Bexar County, Texas B25099_001
## 8 48029110600 Census Tract 1106, Bexar County, Texas B25099_001
## 9 48029110600 Census Tract 1106, Bexar County, Texas B25099_001
## 10 48029110300 Census Tract 1103, Bexar County, Texas B25099_001
## estimate_mhi_17 moe_mhi_17 NAME.y.y
## 1 38259 10726 Census Tract 1107; Bexar County; Texas
## 2 38259 10726 Census Tract 1107; Bexar County; Texas
## 3 38259 10726 Census Tract 1107; Bexar County; Texas
## 4 93616 12354 Census Tract 1921; Bexar County; Texas
## 5 93616 12354 Census Tract 1921; Bexar County; Texas
## 6 93616 12354 Census Tract 1921; Bexar County; Texas
## 7 41157 24270 Census Tract 1106; Bexar County; Texas
## 8 41157 24270 Census Tract 1106; Bexar County; Texas
## 9 41157 24270 Census Tract 1106; Bexar County; Texas
## 10 75000 30295 Census Tract 1103; Bexar County; Texas
## variable.y.y estimate_mhi_22 moe_mhi_22 mhi_per_change disp_mhi_pc District
## 1 B25099_001 60938 21436 0.59 1 2
## 2 B25099_001 60938 21436 0.59 1 1
## 3 B25099_001 60938 21436 0.59 1 5
## 4 B25099_001 96042 21924 0.03 0 2
## 5 B25099_001 96042 21924 0.03 0 1
## 6 B25099_001 96042 21924 0.03 0 5
## 7 B25099_001 46250 42323 0.12 0 2
## 8 B25099_001 46250 42323 0.12 0 1
## 9 B25099_001 46250 42323 0.12 0 5
## 10 B25099_001 75938 6825 0.01 0 2
## Name geometry
## 1 Jalen McKee-Rodriguez MULTIPOLYGON (((-98.50172 2...
## 2 Mario Bravo MULTIPOLYGON (((-98.50172 2...
## 3 Teri Castillo MULTIPOLYGON (((-98.50172 2...
## 4 Jalen McKee-Rodriguez MULTIPOLYGON (((-98.50172 2...
## 5 Mario Bravo MULTIPOLYGON (((-98.50172 2...
## 6 Teri Castillo MULTIPOLYGON (((-98.50172 2...
## 7 Jalen McKee-Rodriguez MULTIPOLYGON (((-98.50172 2...
## 8 Mario Bravo MULTIPOLYGON (((-98.50172 2...
## 9 Teri Castillo MULTIPOLYGON (((-98.50172 2...
## 10 Jalen McKee-Rodriguez MULTIPOLYGON (((-98.50172 2...
library(data.table)
setDT(bexar_data)
bexar_data[,.N,by=District]
## District N
## <int> <int>
## 1: 2 399
## 2: 1 427
## 3: 5 383
## 4: 10 305
## 5: 9 361
## 6: 3 365
## 7: 4 348
## 8: 6 330
## 9: 7 442
## 10: 8 384
bexar_data<-bexar_data[District%in%c(1,2,3,4,5,6,7,8,9,10)]
library(ggplot2)
ggplot(data=bexar_data[,.(Mean_mhv=mean(mhv_per_change)),by=District], aes(x=District,y=Mean_mhv))+
geom_point()
## Warning: Removed 8 rows containing missing values (`geom_point()`).
hist(bexar_data$mhv_per_change)
ggplot(data=bexar_data, aes(x=District, y=mhv_per_change))+
geom_boxplot()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
## Warning: Removed 89 rows containing non-finite values (`stat_boxplot()`).
bexar_data_bp<-boxplot(bexar_data$mhv_per_change~bexar_data$District)
outliers <- bexar_data_bp$out
bexar_data2<-bexar_data[!mhv_per_change%in%outliers,]
boxplot(bexar_data2$mhv_per_change~bexar_data2$District)
bartlett.test(mhv_per_change ~ District, data=bexar_data2)
##
## Bartlett test of homogeneity of variances
##
## data: mhv_per_change by District
## Bartlett's K-squared = 417.08, df = 9, p-value < 2.2e-16
fit<-aov(bexar_data2$mhv_per_change~bexar_data2$District)
summary(fit)
## Df Sum Sq Mean Sq F value Pr(>F)
## bexar_data2$District 1 12.9 12.904 382.9 <2e-16 ***
## Residuals 3259 109.8 0.034
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 89 observations deleted due to missingness
library(ggplot2)
ggplot(data = bexar_data, mapping = aes(x=mhv_per_change, y=mhi_per_change))+
geom_point()
## Warning: Removed 240 rows containing missing values (`geom_point()`).
# The lower the change in median household values the lower the change in median household income
2050 demographic projections. Alamo Area Metropolitan Planning Organization https://aampo-mobility-2050-atginc.hub.arcgis.com↩︎