# Define ACS variables
vars <- c(
income = "B19013_001",
education = "B15003_022",
total_edu = "B15003_001",
white = "B02001_002",
black = "B02001_003",
hispanic = "B03003_003",
total_pop = "B03003_001"
)
# Download ACS data for Hennepin and Ramsey counties
mn_data <- get_acs(
geography = "tract",
variables = vars,
state = "MN",
county = c("Hennepin", "Ramsey"),
geometry = TRUE,
year = 2021
)
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 13% | |========== | 14% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 23% | |================= | 25% | |================== | 26% | |==================== | 28% | |===================== | 30% | |====================== | 31% | |======================= | 33% | |======================== | 34% | |========================= | 36% | |========================== | 38% | |=========================== | 39% | |============================ | 41% | |============================== | 42% | |=============================== | 44% | |================================ | 45% | |================================= | 47% | |================================== | 49% | |=================================== | 50% | |==================================== | 52% | |===================================== | 53% | |======================================= | 55% | |======================================== | 57% | |========================================= | 58% | |========================================== | 60% | |=========================================== | 61% | |============================================ | 63% | |============================================= | 65% | |============================================== | 66% | |=============================================== | 68% | |================================================= | 69% | |================================================== | 71% | |=================================================== | 73% | |==================================================== | 74% | |===================================================== | 76% | |====================================================== | 77% | |======================================================= | 79% | |======================================================== | 81% | |========================================================= | 82% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================= | 87% | |============================================================== | 88% | |=============================================================== | 90% | |================================================================ | 92% | |================================================================= | 93% | |================================================================== | 95% | |=================================================================== | 96% | |===================================================================== | 98% | |======================================================================| 100%
mn_wide <- mn_data %>%
select(GEOID, NAME, variable, estimate) %>%
pivot_wider(names_from = variable, values_from = estimate) %>%
mutate(
pct_white = white / total_pop,
pct_black = black / total_pop,
pct_hispanic = hispanic / total_pop,
pct_bachelors = education / total_edu
) %>%
drop_na(pct_white, pct_black, pct_hispanic, pct_bachelors, income)
mn_sf_proj <- st_transform(mn_wide, crs = 3857)
mn_sf_proj$income_w <- mn_sf_proj$income / 10000
mn_dorling <- cartogram_dorling(mn_sf_proj, "income_w", k = 0.8)
cluster_df <- st_drop_geometry(mn_sf_proj) %>%
select(income, pct_white, pct_black, pct_hispanic, pct_bachelors)
cluster_scaled <- scale(cluster_df)
pam_result <- pam(cluster_scaled, k = 5)
mn_dorling$cluster <- factor(pam_result$clustering)
# Load harmonized LTDB data
URL1 <- "https://github.com/DS4PS/cpp-529-fall-2020/raw/main/LABS/data/rodeo/LTDB-2000.rds"
URL2 <- "https://github.com/DS4PS/cpp-529-fall-2020/raw/main/LABS/data/rodeo/LTDB-2010.rds"
URLmd <- "https://github.com/DS4PS/cpp-529-fall-2020/raw/main/LABS/data/rodeo/LTDB-META-DATA.rds"
d1 <- readRDS(gzcon(url(URL1))) %>% select(-year)
d2 <- readRDS(gzcon(url(URL2))) %>% select(-year)
md <- readRDS(gzcon(url(URLmd)))
d <- merge(d1, d2, by = "tractid") %>%
merge(md, by = "tractid")
# Standardize GEOID
d$tractid2 <- as.numeric(gsub("-", "", gsub("fips", "", d$tractid)))
# Merge with spatial object
mn_dorling$GEOID_num <- as.numeric(mn_dorling$GEOID)
mn_dorling <- merge(mn_dorling, d, by.x = "GEOID_num", by.y = "tractid2", all.x = TRUE)
mn_dorling <- mn_dorling %>%
mutate(
p.col = 100 * col00 / ag25up00,
p.vacant = 100 * vac00 / hu00,
p.prof = 100 * prof00 / empclf00,
mhv.00 = mhmval00 * 1.28855,
mhv.10 = mhmval12,
mhv.change = mhv.10 - mhv.00,
mhv.growth = 100 * (mhv.change / mhv.00),
log.p.col = log(p.col + 1),
log.p.vacant = log(p.vacant + 1),
log.p.prof = log(p.prof + 1)
)
# Convert to sp object
mn_sp <- as_Spatial(mn_dorling)
row.ids <- sapply(slot(mn_sp, "polygons"), function(x) slot(x, "ID"))
row.names(mn_sp) <- row.ids
# Reproject to lat-long
mn_sp <- spTransform(mn_sp, CRS("+proj=longlat +datum=WGS84"))
# Save to file
geojson_write(mn_sp, file = "mn_final.geojson", geometry = "polygon")
## <geojson-file>
## Path: mn_final.geojson
## From class: SpatialPolygonsDataFrame