1. Introduction

San Antonio is one of the fastest growing cities in Texas. Between 2015 and 2020 the SA metro area added 300,000 new residents reaching a total of 2.6 million, and is expected to gain an additional 1 million residents by 2035 and another million by 2050 reaching a total of 4.4 million residents1. With population growth, an increased demand for land is expected which will in turn create neighborhood change pressures in more desirable locations throughout the SA metro area. [context]

1.1 Conceptual framework

Population growth –(+)–> Housing Demand –(+)–> Housing Prices –(+)–> Neighborhood Change

Assessing neighborhood change is key identify sings of potential gentrification and displacement and ultimately the loss of cultural value in urban communities. This research main question is:

Population growth –(+)–> Income –(+)–> Housing Demand –(+)–> Housing Prices –(+)–> Neighborhood Change

1.2 Hypotheses

This empirical exercise will test and operationalize the following hypothesis:

1.3 Data & Variables of interest (operationalization)

This research will use American Community Survey (ACS) data from the United States Census Bureau gathered using the tidycensus R Package. A census tract will be categorized as having disproportionate growth if it’s growth value is over the county median value. Each of the hypotheses previously stated will be operationalized as:

#downloading data using ACS API
bexar_homevalue_17 <- get_acs(geography = "tract", variables = "B25077_001",
                           state = "TX", county = "Bexar", geometry = TRUE,year = 2017)
bexar_homevalue_22 <- get_acs(geography = "tract", variables = "B25077_001",
                           state = "TX", county = "Bexar", geometry = FALSE,year = 2022)
bexar_medianincome_17 <- get_acs(geography = "tract", variables = "B25099_001",
                           state = "TX", county = "Bexar", geometry = TRUE,year = 2017)
bexar_medianincome_22 <- get_acs(geography = "tract", variables = "B25099_001",
                           state = "TX", county = "Bexar", geometry = FALSE,year = 2022)
1.4 Calculating disproportionate home value value growth

Disproportionate home value growth is calculated as:

\[ Disp_{c,t_f-t_i} =(\frac{value_{c,t_f}}{value_{c,t_i}})-1 \] where for any census tract \(c\), percent change values are calculated using the final year \(t_f\) and the initial year \(t_i\) values of the same variable (\(value\))

#Fixing variable names 
names(bexar_homevalue_17)[names(bexar_homevalue_17)%in%c("estimate","moe")] <-c("estimate_mhv_17","moe_mhv_17")
names(bexar_homevalue_22)[names(bexar_homevalue_22)%in%c("estimate","moe")] <-c("estimate_mhv_22","moe_mhv_22")

names(bexar_medianincome_17)[names(bexar_medianincome_17)%in%c("estimate","moe")] <-c("estimate_mhi_17","moe_mhi_17")
names(bexar_medianincome_22)[names(bexar_medianincome_22)%in%c("estimate","moe")] <-c("estimate_mhi_22","moe_mhi_22")

#Merging data
bexar_mhv<-merge(bexar_homevalue_17,bexar_homevalue_22,by="GEOID",sort = F)
bexar_mhi<-merge(bexar_medianincome_17,bexar_medianincome_22,by="GEOID",sort = F)


#Calculating the percentage change
bexar_mhv$mhv_per_change<-round(((bexar_mhv$estimate_mhv_22/bexar_mhv$estimate_mhv_17)-1),2)
bexar_mhi$mhi_per_change<-round(((bexar_mhi$estimate_mhi_22/bexar_mhi$estimate_mhi_17)-1),2)

#Calculating the indicator variable of which neighborhood is disproportionate (over the median of the county) or not
county_median_mhv<-quantile(bexar_mhv$mhv_per_change,0.5,na.rm=T)
county_median_mhi<-quantile(bexar_mhi$mhi_per_change,0.5,na.rm=T)

bexar_mhv$disp_mhv_pc<-as.numeric(bexar_mhv$mhv_per_change>=county_median_mhv)
bexar_mhi$disp_mhi_pc<-as.numeric(bexar_mhi$mhi_per_change>=county_median_mhi)

Descriptive Statistic Table for Median Home Values (mhv) in Bexar County

`mhv 2017`<-summary(bexar_mhv$estimate_mhv_17) # 2017 summary statistics
`mhv 2022`<-summary(bexar_mhv$estimate_mhv_22) # 2022 summary statistics
`Per. Change mhv` <-summary(bexar_mhv$mhv_per_change)

`mhi 2017`<-summary(bexar_mhi$estimate_mhi_17) # 2017 summary statistics
`mhi 2022`<-summary(bexar_mhi$estimate_mhi_22) # 2022 summary statistics
`Per. Change mhi` <-summary(bexar_mhv$mhi_per_change)

rbind(`mhv 2017`,`mhv 2022`,`Per. Change mhv`)
##                    Min.   1st Qu.    Median         Mean   3rd Qu.      Max.
## mhv 2017        46400.0 7.865e+04 1.211e+05 1.481041e+05 177850.00 675600.00
## mhv 2022        66800.0 1.333e+05 1.908e+05 2.205041e+05 270750.00 845400.00
## Per. Change mhv    -0.2 4.125e-01 5.150e-01 5.659942e-01      0.67      3.82
##                 NA's
## mhv 2017           8
## mhv 2022           9
## Per. Change mhv    9
1.5 Merge the home value and income data
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
bexar_data<-st_join(bexar_mhv,bexar_mhi)
1.6 Visualizing Data: Choropleth map
library(classInt)


#defining breaks
bbins_mhv<-classIntervals(var = bexar_mhv$mhv_per_change,n = 5,style = "jenks")
bbins_mhi<-classIntervals(var = bexar_mhi$mhi_per_change,n = 5,style = "jenks")

library(mapview)
mapviewOptions(fgb = F)

mapview(bexar_mhv,zcol="mhv_per_change",at=bbins_mhv$brks)
mapview(bexar_mhi,zcol="mhi_per_change",at=bbins_mhi$brks)
1.7 Visualizing Data: Boxplot
library(graphics)

boxplot(bexar_data$mhi_per_change, bexar_data$disp_mhi_pc)

1.8 Statistical Analysis:
# Chi-square test

library(data.table)
library(foreign)

disp_table<-table(bexar_data$disp_mhi_pc,bexar_data$disp_mhv_pc)

disp.cs<-chisq.test(x = bexar_data$disp_mhi_pc, y = bexar_data$disp_mhv_pc,correct = F) 
disp.cs
## 
##  Pearson's Chi-squared test
## 
## data:  bexar_data$disp_mhi_pc and bexar_data$disp_mhv_pc
## X-squared = 4.8052, df = 1, p-value = 0.02837
# the p-value is less than 0.05 so we can reject the Null Hypothesis so there is an association between the number of census tracts classified as having disproportionate income growth and disproportionate home value growth
1.9 Statistical Analysis:
# Cramer's V for a Chi-Square test

library(rcompanion)
disp_table2<-table(bexar_data$disp_mhi_pc,bexar_data$mhv_per_change)

cramerV(disp_table2)
## Cramer V 
##   0.1926
#There is a small association between those variables
1.10 Statistical Analysis:
#Spearman correlation

cor.test(bexar_data$mhv_per_change, bexar_data$mhi_per_change, method="spearman")
## Warning in cor.test.default(bexar_data$mhv_per_change,
## bexar_data$mhi_per_change, : Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  bexar_data$mhv_per_change and bexar_data$mhi_per_change
## S = 1985762504, p-value = 0.0004466
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.07249136
#p value is less than 0.05 so there is some correlation between the variables
1.11 Statistical Analysis:
#ANOVA

CD<-read.csv("/Users/gabbyrodriguez/myrepo/midterm-exam-gabrielle-rodriguezbailon/data/council_districts.csv")
colnames(bexar_data)[1]="GEOID"
bexar_data<-merge(bexar_data,CD,by="GEOID",sort = T)

library(tidyr)
bexar_data %>%
  drop_na(District)
## Simple feature collection with 3744 features and 24 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -98.80593 ymin: 29.11444 xmax: -98.18632 ymax: 29.76071
## Geodetic CRS:  NAD83
## First 10 features:
##          GEOID                               NAME.x.x variable.x.x
## 1  48029110100 Census Tract 1101, Bexar County, Texas   B25077_001
## 2  48029110100 Census Tract 1101, Bexar County, Texas   B25077_001
## 3  48029110100 Census Tract 1101, Bexar County, Texas   B25077_001
## 4  48029110100 Census Tract 1101, Bexar County, Texas   B25077_001
## 5  48029110100 Census Tract 1101, Bexar County, Texas   B25077_001
## 6  48029110100 Census Tract 1101, Bexar County, Texas   B25077_001
## 7  48029110100 Census Tract 1101, Bexar County, Texas   B25077_001
## 8  48029110100 Census Tract 1101, Bexar County, Texas   B25077_001
## 9  48029110100 Census Tract 1101, Bexar County, Texas   B25077_001
## 10 48029110100 Census Tract 1101, Bexar County, Texas   B25077_001
##    estimate_mhv_17 moe_mhv_17                               NAME.y.x
## 1           343500      67090 Census Tract 1101; Bexar County; Texas
## 2           343500      67090 Census Tract 1101; Bexar County; Texas
## 3           343500      67090 Census Tract 1101; Bexar County; Texas
## 4           343500      67090 Census Tract 1101; Bexar County; Texas
## 5           343500      67090 Census Tract 1101; Bexar County; Texas
## 6           343500      67090 Census Tract 1101; Bexar County; Texas
## 7           343500      67090 Census Tract 1101; Bexar County; Texas
## 8           343500      67090 Census Tract 1101; Bexar County; Texas
## 9           343500      67090 Census Tract 1101; Bexar County; Texas
## 10          343500      67090 Census Tract 1101; Bexar County; Texas
##    variable.y.x estimate_mhv_22 moe_mhv_22 mhv_per_change disp_mhv_pc
## 1    B25077_001          590300      68863           0.72           1
## 2    B25077_001          590300      68863           0.72           1
## 3    B25077_001          590300      68863           0.72           1
## 4    B25077_001          590300      68863           0.72           1
## 5    B25077_001          590300      68863           0.72           1
## 6    B25077_001          590300      68863           0.72           1
## 7    B25077_001          590300      68863           0.72           1
## 8    B25077_001          590300      68863           0.72           1
## 9    B25077_001          590300      68863           0.72           1
## 10   B25077_001          590300      68863           0.72           1
##        GEOID.y                               NAME.x.y variable.x.y
## 1  48029110700 Census Tract 1107, Bexar County, Texas   B25099_001
## 2  48029110700 Census Tract 1107, Bexar County, Texas   B25099_001
## 3  48029110700 Census Tract 1107, Bexar County, Texas   B25099_001
## 4  48029192100 Census Tract 1921, Bexar County, Texas   B25099_001
## 5  48029192100 Census Tract 1921, Bexar County, Texas   B25099_001
## 6  48029192100 Census Tract 1921, Bexar County, Texas   B25099_001
## 7  48029110600 Census Tract 1106, Bexar County, Texas   B25099_001
## 8  48029110600 Census Tract 1106, Bexar County, Texas   B25099_001
## 9  48029110600 Census Tract 1106, Bexar County, Texas   B25099_001
## 10 48029110300 Census Tract 1103, Bexar County, Texas   B25099_001
##    estimate_mhi_17 moe_mhi_17                               NAME.y.y
## 1            38259      10726 Census Tract 1107; Bexar County; Texas
## 2            38259      10726 Census Tract 1107; Bexar County; Texas
## 3            38259      10726 Census Tract 1107; Bexar County; Texas
## 4            93616      12354 Census Tract 1921; Bexar County; Texas
## 5            93616      12354 Census Tract 1921; Bexar County; Texas
## 6            93616      12354 Census Tract 1921; Bexar County; Texas
## 7            41157      24270 Census Tract 1106; Bexar County; Texas
## 8            41157      24270 Census Tract 1106; Bexar County; Texas
## 9            41157      24270 Census Tract 1106; Bexar County; Texas
## 10           75000      30295 Census Tract 1103; Bexar County; Texas
##    variable.y.y estimate_mhi_22 moe_mhi_22 mhi_per_change disp_mhi_pc District
## 1    B25099_001           60938      21436           0.59           1        2
## 2    B25099_001           60938      21436           0.59           1        1
## 3    B25099_001           60938      21436           0.59           1        5
## 4    B25099_001           96042      21924           0.03           0        2
## 5    B25099_001           96042      21924           0.03           0        1
## 6    B25099_001           96042      21924           0.03           0        5
## 7    B25099_001           46250      42323           0.12           0        2
## 8    B25099_001           46250      42323           0.12           0        1
## 9    B25099_001           46250      42323           0.12           0        5
## 10   B25099_001           75938       6825           0.01           0        2
##                     Name                       geometry
## 1  Jalen McKee-Rodriguez MULTIPOLYGON (((-98.50172 2...
## 2            Mario Bravo MULTIPOLYGON (((-98.50172 2...
## 3          Teri Castillo MULTIPOLYGON (((-98.50172 2...
## 4  Jalen McKee-Rodriguez MULTIPOLYGON (((-98.50172 2...
## 5            Mario Bravo MULTIPOLYGON (((-98.50172 2...
## 6          Teri Castillo MULTIPOLYGON (((-98.50172 2...
## 7  Jalen McKee-Rodriguez MULTIPOLYGON (((-98.50172 2...
## 8            Mario Bravo MULTIPOLYGON (((-98.50172 2...
## 9          Teri Castillo MULTIPOLYGON (((-98.50172 2...
## 10 Jalen McKee-Rodriguez MULTIPOLYGON (((-98.50172 2...
library(data.table)
setDT(bexar_data)
bexar_data[,.N,by=District]
##     District     N
##        <int> <int>
##  1:        2   399
##  2:        1   427
##  3:        5   383
##  4:       10   305
##  5:        9   361
##  6:        3   365
##  7:        4   348
##  8:        6   330
##  9:        7   442
## 10:        8   384
bexar_data<-bexar_data[District%in%c(1,2,3,4,5,6,7,8,9,10)]

library(ggplot2)
ggplot(data=bexar_data[,.(Mean_mhv=mean(mhv_per_change)),by=District], aes(x=District,y=Mean_mhv))+
  geom_point()
## Warning: Removed 8 rows containing missing values (`geom_point()`).

hist(bexar_data$mhv_per_change) 

ggplot(data=bexar_data, aes(x=District, y=mhv_per_change))+
  geom_boxplot()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
## Warning: Removed 89 rows containing non-finite values (`stat_boxplot()`).

bexar_data_bp<-boxplot(bexar_data$mhv_per_change~bexar_data$District)

outliers <- bexar_data_bp$out

bexar_data2<-bexar_data[!mhv_per_change%in%outliers,]

boxplot(bexar_data2$mhv_per_change~bexar_data2$District)

bartlett.test(mhv_per_change ~ District, data=bexar_data2)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  mhv_per_change by District
## Bartlett's K-squared = 417.08, df = 9, p-value < 2.2e-16
fit<-aov(bexar_data2$mhv_per_change~bexar_data2$District)
summary(fit)
##                        Df Sum Sq Mean Sq F value Pr(>F)    
## bexar_data2$District    1   12.9  12.904   382.9 <2e-16 ***
## Residuals            3259  109.8   0.034                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 89 observations deleted due to missingness
1.12 Statistical Analysis: Scatter Plot
library(ggplot2)

ggplot(data = bexar_data, mapping = aes(x=mhv_per_change, y=mhi_per_change))+
  geom_point()
## Warning: Removed 240 rows containing missing values (`geom_point()`).

# The lower the change in median household values the lower the change in median household income

References


  1. 2050 demographic projections. Alamo Area Metropolitan Planning Organization https://aampo-mobility-2050-atginc.hub.arcgis.com↩︎