Loosely based off methodology from this paper: https://journals-sagepub-com.libweb.lib.utsa.edu/doi/10.1177/2332649218755178
Central question: how much of a single family home’s value in Bexar county can be attributed to it’s location within a certain school distric, or in school districts of a certain quality?
Parcels of single-family homes from Bexar County Appraisl District (BCAD) Data for 2019 downloaded from TNRIS (https://tnris.org/)
School District level data from 2019 ACS downloaded through tidycensus below + School District Quality data obtained from “Texas School District Report Cards” through the Texas Education Agency (TEA): https://txschools.gov/
Dependent Variable:
Independent Variables (All Z scored):
Parcel-level variables: Land Area (Square Feet)
School District variables:
Percent of Householders who are White, Non-Hispanic Homeowners
TEA School Rating (% rating)
#Use this to search for variables
#acs2019 <- load_variables(2019 , "acs5", cache = TRUE)
#cog2019 <- load_variables(2019 , "census of governments", cache = TRUE)
tx_sd<-get_acs(geography = "school district (unified)",
state="TX",
year = 2019,
variables=c("DP05_0001E",
"B11001_001E",
"B11001H_001E",
"B11001B_001E",
"B11001I_001E",
"B25107_001E",
"B25002_003E",
"B05003_014E",
"B05003_003E",
"DP03_0119PE",
"B25003_003E",
"B25003_002E",
"B19113_001E",
"DP03_0062E",
"B23025_005E",
"B23025_003E",
"B25002_001E",
"B25003_001E",
"B25021_001E",
"B25035_001E",
"B25003H_002E"),
geometry = T, output = "wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Fetching data by table type ("B/C", "S", "DP") and combining the result.
#rename variables and filter missing cases
tx_sd<-tx_sd%>%
mutate(totpop=DP05_0001E,
totHH = B11001_001E,
whiteHH = B11001H_001E,
pwhiteHH = whiteHH/(totHH+0.000001),
blackHH =B11001B_001E,
pblackHH = blackHH/(totHH+0.000001),
hispHH =B11001I_001E,
phispHH =hispHH/(totHH+0.000001),
totRent = B25003_003E,
totOwn = B25003_002E,
medHVal=B25107_001E,
occHU=B25003_001E,
OwnOcc_rt=totOwn/occHU,
vacantHU = B25002_003E,
totHUs =B25002_001E,
vacn_rt = vacantHU/totHUs,
med_rms = B25021_001E,
med_yrblt = B25035_001E,
totUnder18 = B05003_014E + B05003_003E,
ppov=DP03_0119PE,
MFI=B19113_001E,
MHI=DP03_0062E,
unemp =B23025_005E,
laborf =B23025_003E,
unemp_rt=unemp/laborf,
totOwn = B25003_002E,
whiteOwn = B25003H_002E,
totRentOwn = totRent + totOwn,
pRent = totRent/totRentOwn,
pOwn = totOwn/totRentOwn,
pwhiteOwn = whiteOwn/totRentOwn)
#pull bexar County shapefile for context
bexar<-get_acs(geography = "county", variables=c("DP05_0001E","B25003_003E", "B25003_002E","B25003H_002E"), county="Bexar", state="TX", year=2019, geometry = T, output = "wide")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## Fetching data by table type ("B/C", "S", "DP") and combining the result.
bexar<-bexar%>%
mutate(totRent = B25003_003E,
totOwn = B25003_002E,
whiteOwn = B25003H_002E,
totRentOwn = totRent + totOwn,
pwhiteOwn = whiteOwn/totRentOwn)
#Spatial join to filter out only the school districts that intersect Bexar County (20 total)
bexar_sd<-st_join(tx_sd, left = TRUE, join=st_intersects, largest=TRUE, bexar["NAME"])
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
bexar_sd <- subset(bexar_sd,NAME.y == "Bexar County, Texas")
#import TEA district score data for 2019 and join to bexar school districts - https://txschools.gov/districts
library(readxl)
tea_sd_scores <- read_excel("~/Desktop/UTSA/5_Fall21/Spatial Dem/Final/bexar_sd_TEA_Grades.xlsx", sheet = "bexar_sd")
bexar_sd <- merge(bexar_sd, tea_sd_scores, by='GEOID')
bexar_sd<-bexar_sd%>%
mutate(SD_Name=NAME.x)
#pull in centroids of BCAD Parcel data (already filtered for sigle family parcels only - local land use = 1)
bcad <- st_read("~/Desktop/UTSA/5_Fall21/Spatial Dem/Final/TNRIS_Bexar_SingleFam_Centroids.shp")
#pull in polygons of BCAD Parcel data
#bcad_poly <- st_read("~/Desktop/UTSA/5_Fall21/Spatial Dem/Final/TNRIS_Bexar_LocLU1_SingleFam.shp")
#Spatial Join to get parcels by school district
#bcad_proj<-st_transform(bcad,4269)
bcad_sd_join <- st_join(bcad, left = TRUE, join=st_within, bexar_sd)
# There are a large number of parcels without "legal area" data, which would be data from deeds as the legally recognized area of the parcel. (344,629 out of 355,318 variables did not have legal data). For this reason, I will use the GIS data. This is not a legal measure of the area, but can be used for our purposes to make approximations.
#bcad_sd_join<-bcad_sd_join%>% mutate(lot_sf=GIS_AREA*43560)
#Subseting to get only single family (Property Use Code = 1) since some exempt/rural areas remained in the data. And removing data where Year built = 210 (removes zeros and two values that are 210)
bcad_sd_join <- subset(bcad_sd_join, YEAR_BUILT>210 & MKT_VALUE > 0 & STAT_LAND_ == 'A1' & LAND_VALUE >0)
summary(bcad_sd_join)
#Preview Maps
library(tmap)
## Registered S3 methods overwritten by 'stars':
## method from
## st_bbox.SpatRaster sf
## st_crs.SpatRaster sf
tm_shape(bexar_sd)+
tm_polygons("SD_Name",title="School District",border.alpha = 0.5)+
tm_shape(bexar)+
tm_polygons(alpha=0, col = "MAP_COLORS",border.col = "black",border.alpha=1)+
tm_format("World", title="Bexar County School Districts", legend.outside=T)+
tm_scale_bar(position="LEFT")+
tm_compass()
## Legend labels were too wide. The labels have been resized to 0.44, 0.51, 0.52, 0.46, 0.35, 0.48, 0.41, 0.47, 0.51, 0.49, 0.44, 0.48, 0.26, 0.43, 0.46, 0.34, 0.49, 0.40, 0.48, 0.48. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.
tm_shape(bexar_sd)+
tm_polygons("TEA_Grade",title="TEA School District Letter Grade 2019",border.alpha = 0.5)+
tm_shape(bexar)+
tm_polygons(alpha=0, col = "MAP_COLORS",border.col = "black",border.alpha=1)+
tm_format("World", title="Bexar County School Districts", legend.outside=T)+
tm_scale_bar(position="LEFT")+
tm_compass()
tm_shape(bexar_sd)+
tm_polygons("TEA_Rating",title="TEA School District Rating (out of 100) 2019",border.alpha = 0.5)+
tm_shape(bexar)+
tm_polygons(alpha=0, col = "MAP_COLORS",border.col = "black",border.alpha=1)+
tm_format("World", title="Bexar County School Districts", legend.outside=T)+
tm_scale_bar(position="LEFT")+
tm_compass()
tm_shape(bexar_sd)+
tm_polygons("MFI",title="Median Family Income, 2019",border.alpha = 0.5)+
tm_shape(bexar)+
tm_polygons(alpha=0, col = "MAP_COLORS",border.col = "black",border.alpha=1)+
tm_format("World", title="Bexar County School Districts", legend.outside=T)+
tm_scale_bar(position="LEFT")+
tm_compass()
tm_shape(bexar_sd)+
tm_polygons("MHI",title="Median Household Income, 2019",border.alpha = 0.5)+
tm_shape(bexar)+
tm_polygons(alpha=0, col = "MAP_COLORS",border.col = "black",border.alpha=1)+
tm_format("World", title="Bexar County School Districts", legend.outside=T)+
tm_scale_bar(position="LEFT")+
tm_compass()
tm_shape(bexar_sd)+
tm_polygons("medHVal",title="Median Home Value, 2019",border.alpha = 0.5)+
tm_shape(bexar)+
tm_polygons(alpha=0, col = "MAP_COLORS",border.col = "black",border.alpha=1)+
tm_format("World", title="Bexar County School Districts", legend.outside=T)+
tm_scale_bar(position="LEFT")+
tm_compass()
tm_shape(bexar_sd)+
tm_polygons("med_rms",title="Median Rooms per Dwelling Unit (owned and rented), 2019",border.alpha = 0.5)+
tm_shape(bexar)+
tm_polygons(alpha=0, col = "MAP_COLORS",border.col = "black",border.alpha=1)+
tm_format("World", title="Bexar County School Districts", legend.outside=T)+
tm_scale_bar(position="LEFT")+
tm_compass()
tm_shape(bexar_sd)+
tm_polygons("ppov",title="Percent Poverty Rate, 2019",border.alpha = 0.5)+
tm_shape(bexar)+
tm_polygons(alpha=0,border.col = "black",border.alpha=1)+
tm_format("World", title="Bexar County School Districts", legend.outside=T)+
tm_scale_bar(position="LEFT")+
tm_compass()
tm_shape(bexar_sd)+
tm_polygons("pwhiteHH", title="Percent of White HHs", border.alpha = 0.5)+
tm_shape(bexar)+
tm_polygons(alpha=0, border.col = "black",border.alpha=1)+
tm_format("World", title="Bexar County School Districts", legend.outside=T)+
tm_scale_bar(position="LEFT")+
tm_compass()
tm_shape(bexar_sd)+
tm_polygons("pblackHH", title="Percent of Black HHs", border.alpha = 0.5)+
tm_shape(bexar)+
tm_polygons(alpha=0, border.col = "black",border.alpha=1)+
tm_format("World", title="Bexar County School Districts", legend.outside=T)+
tm_scale_bar(position="LEFT")+
tm_compass()
tm_shape(bexar_sd)+
tm_polygons("phispHH", title="Percent of Hispanic HHs", border.alpha = 0.5)+
tm_shape(bexar)+
tm_polygons(alpha=0, border.col = "black",border.alpha=1)+
tm_format("World", title="Bexar County School Districts", legend.outside=T)+
tm_scale_bar(position="LEFT")+
tm_compass()
tm_shape(bexar_sd)+
tm_polygons("pwhiteOwn", title="Percent of White, NonHispanic Homeowner Households", border.alpha = 0.5, style="quantile")+
tm_shape(bexar)+
tm_polygons(alpha=0, border.col = "black",border.alpha=1)+
tm_format("World", title="Bexar County School Districts", legend.outside=T)+
tm_scale_bar(position="LEFT")+
tm_compass()
#bcad_sd_join$TEA_Grade <- as.factor(bcad_sd_join$TEA_Grade)
#bcad_sd_join$TEA_Grade <- ordered(bcad_sd_join$TEA_Grade, levels = c("A", "B", "C"))
bcad_sd_join$TEA_A <- ifelse(bcad_sd_join$TEA_Grade == 'A', 1, 0)
bcad_sd_join$TEA_B <- ifelse(bcad_sd_join$TEA_Grade == 'B', 1, 0)
bcad_sd_join$TEA_C <- ifelse(bcad_sd_join$TEA_Grade == 'C', 1, 0)
bcad_sd_join$YearBuilt_c <-Recode(bcad_sd_join$YEAR_BUILT, recodes="0:1960='Before 1960'; 1961:1985='1961-1985'; 1986:2000='1985-2000'; 2000:2017='After 2000'", as.factor=T)
bcad_sd_join<-bcad_sd_join%>%
mutate(log_mktval = log(MKT_VALUE),
mktval_10k= MKT_VALUE/10000,
pct_nonWhtHH = 1-pwhiteHH,
mktlogpsf = log(dollarsPSF),
landPSF = LAND_VALUE/land_sf,
log_landPSF = log(landPSF),
logland = log(LAND_VALUE),
imprvRATIO = LAND_VALUE/(LAND_VALUE + IMP_VALUE))
bcad_sd_join$PctNW_c <-Recode(bcad_sd_join$pct_nonWhtHH, recodes="0:0.5='Less than 50%'; 0.5:1='More than 50%'", as.factor=T)
quantile(bcad_sd_join$pct_nonWhtHH)
## 0% 25% 50% 75% 100%
## 0.2016372 0.5017621 0.6406382 0.8279033 0.9711224
#Independent vars
##Parcel-level variables: Lot square footage, Year Built
#School District Racial/Ethnic Composition: Percent White NH Households, Percent Hispanic/Latino Households, Percent Black NH Households
#School District Socioeconomic Attributes: Owner Occupancy Rate, Poverty Rate, Unemployment Rate
#School District Housing Stock: Median Number of Rooms, Median Year Built, Vacancy Rate
#School District Quality: TEA School Ratings (letter grade and % rating)
bcad_dat<-dplyr::select(bcad_sd_join, c('PROP_ID', 'log_mktval', 'mktval_10k', 'MKT_VALUE', 'LAND_VALUE', 'imprvRATIO', 'logland', 'dollarsPSF', 'mktlogpsf', 'landPSF', 'log_landPSF', 'land_sf','YEAR_BUILT', 'YearBuilt_c', 'SD_Name','totpop','totHH','whiteHH','pwhiteHH', 'pct_nonWhtHH', 'PctNW_c', 'blackHH','pblackHH','hispHH','phispHH', 'OwnOcc_rt', 'ppov', ,'pwhiteOwn','unemp_rt', 'med_rms', 'med_yrblt', 'vacn_rt', 'TEA_Rating', 'TEA_Grade', 'TEA_A', 'TEA_B', 'TEA_C',))
bcad_dat<-na.omit(bcad_dat)
# Examining Improvement Ratio
histogram(bcad_dat$imprvRATIO)
summary(bcad_dat$imprvRATIO)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.007964 0.153844 0.178981 0.197305 0.211341 1.000000
histogram(scale(bcad_dat$imprvRATIO))
bcad_dat$impvRt_Z <- scale(bcad_dat$imprvRATIO)
# Removing outliers with Z scores (for land SF, land price per square foot, log of land price psf, and Year built), and removing two school districts that have less than 10 parcels (all other districts have more than 1,000). Leaving us with 15 districts total to examine with single family parcels out of the 20 that cross Bexar County.
bcad_dat$land_Z <- scale(bcad_dat$land_sf)
bcad_dat$land_psf_Z <- scale(bcad_dat$landPSF)
bcad_dat$log_land_psf_Z <- scale(bcad_dat$log_landPSF)
bcad_dat$year_Z <- scale(bcad_dat$YEAR_BUILT)
bcad_dat <- subset(bcad_dat, land_Z > -3 & land_Z < 3 & year_Z > -3 & year_Z < 3 & land_psf_Z > -3 & land_psf_Z < 3 & log_land_psf_Z > -3 & log_land_psf_Z < 3 & SD_Name != "Fort Sam Houston Independent School District, Texas" & SD_Name != "Somerset Independent School District, Texas")
quantile(bcad_dat$YEAR_BUILT)
## 0% 25% 50% 75% 100%
## 1906 1962 1984 2003 2017
summary(bcad_dat$land_sf)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 40 8262 10104 12208 12926 79709
histogram(bcad_dat$log_landPSF)
summary(bcad_dat$log_landPSF)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.8453 0.5532 0.9625 0.9917 1.4269 2.7673
bcad_df <- as.data.frame(bcad_dat)
bcad_df <- bcad_df %>% group_by(SD_Name)
table <- bcad_df %>% summarise(
meanval = mean(MKT_VALUE),
n = n(),
pct_nonWhtHH = mean(pct_nonWhtHH),
mean_sf = mean(land_sf),
med_yrblt = median(YEAR_BUILT),
mean_mktvalue_sf = mean(dollarsPSF),
mean_landvalue_sf = mean(landPSF),
md_sf = median(land_sf),
md_mktvalue_sf = median(dollarsPSF),
md_landvalue_sf = median(landPSF),
TEA_Rating = median(TEA_Rating),
TEA_Grade = max(TEA_Grade),
pwhiteOwn = mean(pwhiteOwn))
plot(log(table$md_landvalue_sf),table$pwhiteOwn)
plot(log(table$mean_landvalue_sf),table$TEA_Rating)
plot(log(table$mean_landvalue_sf),table$med_yrblt)
plot(table$pwhiteOwn,table$TEA_Rating)
boxplot(pwhiteOwn~TEA_Grade,data=bcad_dat, main="Exploratory",
xlab="TEA District Grade", ylab="Percent White Homeowners in District")
boxplot(landPSF~TEA_Grade,data=bcad_dat, main="Exploratory",
xlab="TEA District Grade", ylab="Land Value")
boxplot(log_landPSF~pwhiteOwn,data=bcad_dat, main="x",
xlab="Pct White Homeowners in District", ylab="Log(LandValue Per Square Foot)")
plot(bcad_dat$log_landPSF,bcad_dat$pwhiteOwn)
boxplot(logland~SD_Name,data=bcad_dat, main="Box Plot of Land Value by School District",
xlab="School District ID", ylab="Log of Land Value by Parcel")
boxplot(logland~TEA_Grade,data=bcad_dat, main="Box Plot of Land Value by School District Grade",
xlab="School District Letter Grade", ylab="Log of Land Value by Parcel")
plot(scale(bcad_dat$pwhiteOwn),bcad_dat$log_landPSF)
Reference:
CSparks Stats II: https://rpubs.com/corey_sparks/376202
How to interpret + report significance: https://jontalle.web.engr.illinois.edu/MISC/lme4/bw_LME_tutorial.pdf
It took me about a day to realize that I was interpreting the significance wrong. I thought my model was just really miserable. But, I am still unsure how to test for significance unfortunately.
# Dummy variable for pct white homeowners (if higher than top quantile/Bexar county average = 27%)
bcad_dat$HighWhiteOwn <- ifelse(bcad_dat$pwhiteOwn > '0.3', 1, 0)
# Dummy variable for TEA Score (higher than 89)
bcad_dat$TEA_Top <- ifelse(bcad_dat$TEA_Rating > '88', 1, 0)
plot(scale(bcad_dat$TEA_Rating),scale(bcad_dat$pwhiteOwn))
#Dummy variable to create index of high performing and high pct white homeowners
bcad_dat$Top_TEA_WhiteOwn <- ifelse((bcad_dat$HighWhiteOwn + bcad_dat$TEA_Top) == '2', 1, 0)
# Model 1: Examining effect of just school district rating on land values
fit1<- lmer(logland~scale(land_sf)+scale(bcad_dat$TEA_Rating)+scale(bcad_dat$pwhiteOwn)+(1|SD_Name), data=bcad_dat)
arm::display(fit1, detail=T)
## lmer(formula = logland ~ scale(land_sf) + scale(bcad_dat$TEA_Rating) +
## scale(bcad_dat$pwhiteOwn) + (1 | SD_Name), data = bcad_dat)
## coef.est coef.se t value
## (Intercept) 10.24 0.09 108.54
## scale(land_sf) 0.22 0.00 324.62
## scale(bcad_dat$TEA_Rating) 0.06 0.09 0.68
## scale(bcad_dat$pwhiteOwn) 0.19 0.09 2.23
##
## Error terms:
## Groups Name Std.Dev.
## SD_Name (Intercept) 0.32
## Residual 0.45
## ---
## number of obs: 465911, groups: SD_Name, 15
## AIC = 584865, DIC = 584806.6
## deviance = 584830.0
summary(fit1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## logland ~ scale(land_sf) + scale(bcad_dat$TEA_Rating) + scale(bcad_dat$pwhiteOwn) +
## (1 | SD_Name)
## Data: bcad_dat
##
## REML criterion at convergence: 584853.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.0376 -0.6646 -0.0270 0.5985 5.6054
##
## Random effects:
## Groups Name Variance Std.Dev.
## SD_Name (Intercept) 0.1023 0.3198
## Residual 0.2054 0.4532
## Number of obs: 465911, groups: SD_Name, 15
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.024e+01 9.430e-02 1.199e+01 108.542 <2e-16 ***
## scale(land_sf) 2.218e-01 6.833e-04 4.659e+05 324.624 <2e-16 ***
## scale(bcad_dat$TEA_Rating) 5.800e-02 8.503e-02 1.201e+01 0.682 0.508
## scale(bcad_dat$pwhiteOwn) 1.936e-01 8.700e-02 1.201e+01 2.225 0.046 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) scl(_) s(_$TE
## scl(lnd_sf) 0.000
## sc(_$TEA_R) 0.434 0.000
## scl(bcd_$O) -0.475 -0.002 -0.809
# Check for multicolinearity
vif(fit1)
## scale(land_sf) scale(bcad_dat$TEA_Rating)
## 1.000011 2.889795
## scale(bcad_dat$pwhiteOwn)
## 2.889802