#WRANGLING
###Why I pivoted
st_crs(census.1960.sf)
## Coordinate Reference System:
## User input: USA_Contiguous_Albers_Equal_Area_Conic
## wkt:
## PROJCRS["USA_Contiguous_Albers_Equal_Area_Conic",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["Degree",0.0174532925199433]]],
## CONVERSION["USA_Contiguous_Albers_Equal_Area_Conic",
## METHOD["Albers Equal Area",
## ID["EPSG",9822]],
## PARAMETER["Latitude of false origin",37.5,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-96,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",29.5,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",45.5,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["(E)",east,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["(N)",north,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["Not known."],
## AREA["United States (USA) - CONUS onshore - Alabama; Arizona; Arkansas; California; Colorado; Connecticut; Delaware; Florida; Georgia; Idaho; Illinois; Indiana; Iowa; Kansas; Kentucky; Louisiana; Maine; Maryland; Massachusetts; Michigan; Minnesota; Mississippi; Missouri; Montana; Nebraska; Nevada; New Hampshire; New Jersey; New Mexico; New York; North Carolina; North Dakota; Ohio; Oklahoma; Oregon; Pennsylvania; Rhode Island; South Carolina; South Dakota; Tennessee; Texas; Utah; Vermont; Virginia; Washington; West Virginia; Wisconsin; Wyoming."],
## BBOX[24.41,-124.79,49.38,-66.91]],
## ID["ESRI",102003]]
# using sf
census_cent_1960 <- st_centroid(census.1960.sf)
## Warning in st_centroid.sf(census.1960.sf): st_centroid assumes attributes are
## constant over geometries of x
st_crs(census_cent_1960)
## Coordinate Reference System:
## User input: USA_Contiguous_Albers_Equal_Area_Conic
## wkt:
## PROJCRS["USA_Contiguous_Albers_Equal_Area_Conic",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["Degree",0.0174532925199433]]],
## CONVERSION["USA_Contiguous_Albers_Equal_Area_Conic",
## METHOD["Albers Equal Area",
## ID["EPSG",9822]],
## PARAMETER["Latitude of false origin",37.5,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-96,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",29.5,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",45.5,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["(E)",east,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["(N)",north,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["Not known."],
## AREA["United States (USA) - CONUS onshore - Alabama; Arizona; Arkansas; California; Colorado; Connecticut; Delaware; Florida; Georgia; Idaho; Illinois; Indiana; Iowa; Kansas; Kentucky; Louisiana; Maine; Maryland; Massachusetts; Michigan; Minnesota; Mississippi; Missouri; Montana; Nebraska; Nevada; New Hampshire; New Jersey; New Mexico; New York; North Carolina; North Dakota; Ohio; Oklahoma; Oregon; Pennsylvania; Rhode Island; South Carolina; South Dakota; Tennessee; Texas; Utah; Vermont; Virginia; Washington; West Virginia; Wisconsin; Wyoming."],
## BBOX[24.41,-124.79,49.38,-66.91]],
## ID["ESRI",102003]]
nrow(census.1960.sf)
## [1] 1984
nrow(census_cent_2018)
## [1] 3217
ggplot() +
geom_sf(data = census.1960.sf, fill = 'white') +
labs(subtitle = "1960 Decennial Census Tracts")
ggplot() +
geom_sf(data = census.2018.sf, fill = 'white') +
labs(subtitle = "2018 ACS-5 Years Census Tracts")
ggplot() +
geom_sf(data = census.1960.sf, fill = 'white') +
geom_sf(data = census_cent_2018, color = 'red', size = 0.25, alpha = 0.3) +
labs(subtitle = "Centroids of 2018 Census Tracts on top of 1960 Decennial Census Tracts")
ggplot() +
geom_sf(data = census.2018.sf, fill = 'white') +
geom_sf(data = census_cent_1960, color = 'red', size = 0.25, alpha = 0.3) +
labs(subtitle = "Centroids of 1960 Census Tracts on top of 2018 ACS 5 Year Census Tracts")
#reading in sf
pa_churches <- st_read("./data/PA_Churches_geom.shp")
## Reading layer `PA_Churches_geom' from data source
## `/Users/matthewpalagyi/Documents/R Folder/'22 Folder/Term_Project/Data/PA_Churches_geom.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1923 features and 36 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -1.797693e+308 ymin: -1.797693e+308 xmax: -74.77794 ymax: 42.17825
## Geodetic CRS: WGS 84
pa_churches_1 <- pa_churches %>%
filter(Region != "Alabama")
catholic.sf <- pa_churches_1 %>%
select("City",
"Subregion",
"Postal",
"USER_CHURC",
"USER_ADDRE")
catholic.sf <- st_transform(catholic.sf, crs = st_crs(tracts_2010))
catholic.sf2 <- pa_churches_1 %>%
select(!1:36)
catholic.sf2 <- st_transform(catholic.sf2, crs = st_crs(tracts_2010))
#reading in sf
TALE_3CITIES <- st_read("./data/tale_of_3_cities.shp")
## Reading layer `tale_of_3_cities' from data source
## `/Users/matthewpalagyi/Documents/R Folder/'22 Folder/Term_Project/Data/tale_of_3_cities.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 3 features and 2 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -79.99742 ymin: 39.94985 xmax: -74.00422 ymax: 40.70763
## Geodetic CRS: WGS 84
TALE.sf <- st_transform(TALE_3CITIES, crs = st_crs(tracts_2010))
#changing ID column to match the reality of the names of these cities
TALE.sf$Id[c(1, 2, 3)] <- c("Pittsburgh", "Philadelphia", "New York")
church density thing
#
# # EDA Graphics & last bits of wrangling new vars
# catholic.sf %>%
# st_transform('ESRI:102286') %>%
# distinct()
#
# PA_County <-
# st_read("./data/PaCounty2022_04.geojson")
# st_crs(PA_County)
#
# PA_County.sf <- st_transform(PA_County, crs = st_crs(catholic.sf))
#
# philly.sf <- PA_County %>%
# filter(COUNTY_NAM == "PHILADELPHIA")
#
# philly.sf <- st_transform(philly.sf, crs = st_crs(catholic.sf))
#
# pittsburgh.sf <- PA_County %>%
# filter(COUNTY_NAM == "ALLEGHENY")
#
# pittsburgh.sf <- st_transform(pittsburgh.sf, crs = st_crs(catholic.sf))
#
# ggplot() + geom_sf(data = PA_County.sf, fill = "grey40") +
# stat_density2d(data = data.frame(st_coordinates(catholic.sf)),
# aes(X, Y, fill = ..level.., alpha = ..level..),
# size = 0.01, bins = 40, geom = 'polygon') +
# scale_fill_gradient(low = "#25CB10", high = "#FA7800",
# breaks=c(0.000000003,0.00000003),
# labels=c("Minimum","Maximum"), name = "Density") +
# scale_alpha(range = c(0.00, 0.35), guide = "none") +
# labs(title = "Density of Catholic Churches, Pennsylvania") +
# mapTheme()
#
# # st_crs(tract_distinct)
# p1 <- ggplot() +
# geom_sf(data = PA_County.sf, fill = 'white') +
# # geom_sf(data = PA_tracts_1, fill = 'orange') +
# geom_sf(data = catholic.sf, color = 'red')
# Plot polygon + points
ggplot() +
geom_sf(data = tracts_2010, fill = 'white') +
geom_sf(data = catholic.sf, color = 'red', size = 0.25, alpha = 0.3) +
labs(subtitle = "Catholic Churches on top of 2010 Decennial Census Tracts")
# Intersection between polygon and points ---------------------------------
#how many catholic churches exist in each census tract
tracts_2010$Cath_Chur <- lengths(st_intersects(tracts_2010, catholic.sf))
tracts_2010 <- tracts_2010 %>%
relocate("Cath_Chur", .after = "PCT_FB_1970")
tracts_1960$Cath_Chur <- lengths(st_intersects(tracts_1960, catholic.sf))
tracts_1960 <- tracts_1960 %>%
relocate("Cath_Chur", .after = "PCT_FB_1960")
# This was found to be too small and arbitrary to use as an indicator - multiple census tracts should be allowed to share the same Catholic church
# Catholic Buffer (A Better Option Theoretically) ---------------------------------
tracts_1960$CATH_BUFF <- st_buffer(tracts_1960, 800) %>%
aggregate(mutate(catholic.sf2, counter = 1), ., sum) %>%
pull(counter)
tracts_1960 <- tracts_1960 %>%
relocate("CATH_BUFF", .after = "Cath_Chur")
tracts_1960$CATH_BUFF <- tracts_1960$CATH_BUFF %>% replace(is.na(.), 0)
tracts_2010$CATH_BUFF <- st_buffer(tracts_2010, 800) %>%
aggregate(mutate(catholic.sf2, counter = 1), ., sum) %>%
pull(counter)
tracts_2010 <- tracts_2010 %>%
relocate("CATH_BUFF", .after = "Cath_Chur")
tracts_2010$CATH_BUFF <- tracts_2010$CATH_BUFF %>% replace(is.na(.), 0)
###BIG METROs BUFFER
#Big Metro Buffer (Similar to above, but making these categorical variables for each city)
city_points_utm <- st_transform(TALE.sf, crs = st_crs(tracts_2010)) # a double check
#25 mile buffer - kinda arbitrary - but used typical distance from interstate beltway to the center of the city
#1960 DATA FIRST
##PITT
Pittsburgh_buffer <- city_points_utm %>%
filter(city_points_utm$Id == "Pittsburgh")
Pittsburgh_buffer <- st_buffer(Pittsburgh_buffer, 40233.6) #25 miles
tracts_1960$PITT_BUFF <- lengths(st_intersects(tracts_1960, Pittsburgh_buffer)) # done
tracts_1960 <- tracts_1960 %>%
relocate("PITT_BUFF", .after = "Cath_Chur")
##PHILLY
Philly_buffer <- city_points_utm %>%
filter(city_points_utm$Id == "Philadelphia")
Philly_buffer <- st_buffer(Philly_buffer, 40233.6) #25 miles
tracts_1960$PHIL_BUFF <- lengths(st_intersects(tracts_1960, Philly_buffer)) # done
tracts_1960 <- tracts_1960 %>%
relocate("PHIL_BUFF", .after = "PITT_BUFF")
##BIG APPLE
NY_buffer <- city_points_utm %>%
filter(city_points_utm$Id == "New York")
NY_buffer <- st_buffer(NY_buffer, 160934) #100 miles baby (NY is a diff beast and this also accounts for New Jersey)
tracts_1960$NY_BUFF_setup <- lengths(st_intersects(tracts_1960, NY_buffer)) # done
tracts_1960 <- tracts_1960 %>%
relocate("NY_BUFF_setup", .after = "PHIL_BUFF")
#filtering out New York Buffer that overlaps Philly's
tracts_1960 <- tracts_1960 %>%
mutate(NY_BUFF = if_else(PHIL_BUFF == NY_BUFF_setup | NY_BUFF_setup == 0, 0, 1))
tracts_1960 <- tracts_1960 %>%
select(-"NY_BUFF_setup") %>%
relocate("NY_BUFF", .after = "PHIL_BUFF")
#2010 DATA SECOND
##PITT
Pittsburgh_buffer <- city_points_utm %>%
filter(city_points_utm$Id == "Pittsburgh")
Pittsburgh_buffer <- st_buffer(Pittsburgh_buffer, 40233.6)
tracts_2010$PITT_BUFF <- lengths(st_intersects(tracts_2010, Pittsburgh_buffer)) # done
tracts_2010 <- tracts_2010 %>%
relocate("PITT_BUFF", .after = "Cath_Chur")
##PHILLY
Philly_buffer <- city_points_utm %>%
filter(city_points_utm$Id == "Philadelphia")
Philly_buffer <- st_buffer(Philly_buffer, 40233.6)
tracts_2010$PHIL_BUFF <- lengths(st_intersects(tracts_2010, Philly_buffer)) # done
tracts_2010 <- tracts_2010 %>%
relocate("PHIL_BUFF", .after = "PITT_BUFF")
##BIG APPLE
NY_buffer <- city_points_utm %>%
filter(city_points_utm$Id == "New York")
NY_buffer <- st_buffer(NY_buffer, 160934) #100 miles baby (to account for New Jersey)
tracts_2010$NY_BUFF_setup <- lengths(st_intersects(tracts_2010, NY_buffer)) # done
tracts_2010 <- tracts_2010 %>%
relocate("NY_BUFF_setup", .after = "PHIL_BUFF")
#filtering out New York Buffer that overlaps Philly's
tracts_2010 <- tracts_2010 %>%
mutate(NY_BUFF = if_else(PHIL_BUFF == NY_BUFF_setup | NY_BUFF_setup == 0, 0, 1))
tracts_2010 <- tracts_2010 %>%
select(-"NY_BUFF_setup") %>%
relocate("NY_BUFF", .after = "PHIL_BUFF")
##DONE
## LAST TOUCHES - Converting SHAPE_AREA to Miles
tracts_1960 <- tracts_1960 %>%
dplyr::mutate("AREA_SQMI" = SHAPE_AREA / 1609) %>%
relocate("AREA_SQMI", .after = "SHAPE_AREA")
tracts_2010 <- tracts_2010 %>%
dplyr::mutate("AREA_SQMI" = Shape_area / 1609) %>%
relocate("AREA_SQMI", .after = "Shape_area")
#TMAP 1960
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(tracts_1960, name = "GISJOIN") +
tm_borders(col = "dark gray", lwd = 0.25) +
tm_fill(col = "PCT_FB_1960", palette = "PuRd", id = "NHGISNAM",
colorNA = "gray",
textNA = "Data unavailable",
title = "Percent Foreign Born by Census Tract 1960",
popup.vars = c("Number of Catholic Churches within Half Mile: " = "CATH_BUFF"
, "Total population: " = "POP_1960"
, "AREA in Sq Miles: " = "AREA_SQMI")) +
#, "County: " = "slavePercentage")) +
tm_shape(catholic.sf, name = "USER_CHURC") +
tm_dots(col = "dodgerblue", alpha = 0.1
, id =""
, title = "Catholic Churches"
, popup.vars = c("Name: " = "USER_CHURC"
, "City: " = "City"
, "County: " = "Subregion"
, "Zip: " = "Postal")) +
tm_layout(title = "Catholic Churches & Percent Foreign Born by Census Tract 1960", frame = FALSE) +
tm_basemap(c("Esri.WorldGrayCanvas", "CartoDB.Positron", "Esri.WorldTopoMap")) +
tmap_options(check.and.fix = TRUE)
## Warning: The shape GISJOIN is invalid (after reprojection). See sf::st_is_valid
#tm_layout(title = "Confederate Monuments and Enslaved Population in 1860") +
#tm_basemap(c("Esri.WorldGrayCanvas", "CartoDB.Positron", "Esri.WorldTopoMap", "CartoDB.DarkMatter"))