options(repos = c(CRAN = "https://cloud.r-project.org"))
# Load necessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
install.packages("tidycensus")
##
## The downloaded binary packages are in
## /var/folders/4b/1p0sp0rs33xg19wq8_5j5lm40000gn/T//RtmpblDu57/downloaded_packages
library(tidycensus)
# Load the CA_MSA dataset
df <- fread("/Users/jayjay/Downloads/CA_MSA.csv")
names(df)
## [1] "GEOID.x" "tpop" "tpopr" "pnhwhite" "pnhasn" "pnhblk"
## [7] "phisp" "nhwhite" "nhasn" "nhblk" "hisp" "nonwhite"
## [13] "pnonwhite" "oth" "poth" "CSAFP" "CBSAFP" "GEOID.y"
## [19] "NAME" "NAMELSAD" "LSAD" "geometry" "nhwhitec" "nonwhitec"
## [25] "nhasnc" "nhblkc" "othc" "hispc" "tpoprc" "wb"
## [31] "wa" "wh"
# Calculate total population by metropolitan area
total_population <- df %>%
group_by(NAME) %>%
summarize(total_pop = sum(tpop))
# Calculate the racial dissimilarity between Asians and Whites
df$asian_white_diff <- abs(df$nhasn / df$nhasnc - df$nhwhite / df$nhwhitec)
# Group by metropolitan area and calculate the dissimilarity index
dissimilarity_results <- df %>%
group_by(NAME) %>%
summarize(AWD = 0.5 * sum(asian_white_diff, na.rm = TRUE))
# View the dissimilarity results
print(dissimilarity_results)
## # A tibble: 6 × 2
## NAME AWD
## <chr> <dbl>
## 1 Fresno, CA 0.378
## 2 Los Angeles-Long Beach-Anaheim, CA 0.476
## 3 Riverside-San Bernardino-Ontario, CA 0.421
## 4 San Diego-Chula Vista-Carlsbad, CA 0.480
## 5 San Francisco-Oakland-Berkeley, CA 0.455
## 6 San Jose-Sunnyvale-Santa Clara, CA 0.428
# Load HOLC dataset
HOLC <- fread("/Users/jayjay/Downloads/holc_census_tracts.csv")
names(HOLC)
## [1] "holc_id" "holc_grade" "id" "polygon_id" "sheets"
## [6] "name" "municipali" "holc_area" "year" "msamd"
## [11] "state_code" "county_cod" "census_tra" "geoid" "tract_prop"
## [16] "holc_prop" "map_id" "st_name" "state"
# Filter HOLC data for San Antonio
san_antonio_holc <- HOLC %>%
filter(st_name == "San Antonio")
# Check the data
head(san_antonio_holc)
## holc_id holc_grade id polygon_id sheets name municipali holc_area year
## <char> <char> <int> <int> <int> <char> <lgcl> <num> <int>
## 1: C 160 8046 0 NA 0.6275395 2019
## 2: B 160 8044 0 NA 4.8224292 2019
## 3: A 160 8035 0 NA 2.7200901 2019
## 4: B 160 8038 0 NA 0.9772639 2019
## 5: B 160 8038 0 NA 0.9772639 2019
## 6: B 160 8039 0 NA 2.1680394 2019
## msamd state_code county_cod census_tra geoid tract_prop holc_prop
## <int> <int> <int> <int> <i64> <num> <num>
## 1: 41700 48 29 190604 48029190604 1.129017e-04 1.131153e-04
## 2: 41700 48 29 140100 48029140100 3.453003e-03 2.728566e-04
## 3: 41700 48 29 170600 48029170600 3.639832e-03 1.003530e-03
## 4: 41700 48 29 150100 48029150100 3.711024e-05 3.709674e-05
## 5: 41700 48 29 160901 48029160901 7.666279e-04 7.522779e-04
## 6: 41700 48 29 130300 48029130300 1.137713e-02 2.450554e-03
## map_id st_name state
## <num> <char> <char>
## 1: 160 San Antonio TX
## 2: 160 San Antonio TX
## 3: 160 San Antonio TX
## 4: 160 San Antonio TX
## 5: 160 San Antonio TX
## 6: 160 San Antonio TX
# Calculate average HOLC area by state
average_holc_area <- HOLC %>%
group_by(state) %>%
summarize(avg_area = mean(holc_area, na.rm = TRUE))
# View the results
print(average_holc_area)
## # A tibble: 38 × 2
## state avg_area
## <chr> <dbl>
## 1 AL 1.43
## 2 AR 0.626
## 3 AZ 0.773
## 4 CA 1.30
## 5 CO 0.704
## 6 CT 0.763
## 7 FL 1.38
## 8 GA 0.507
## 9 IA 1.99
## 10 IL 0.575
## # ℹ 28 more rows
# Boxplot of HOLC area distribution by state
library(ggplot2)
ggplot(HOLC, aes(x = state, y = holc_area)) +
geom_boxplot() +
labs(title = "HOLC Area Distribution by State", x = "State", y = "HOLC Area") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
# Filter HOLC data for Texas (Grade D)
TX_HOLC <- HOLC %>%
filter(state == "TX", holc_grade == "D")
# Summarize the count of HOLC Grade D in Texas cities
holc_grade_d_summary <- HOLC %>%
filter(state == "TX", holc_grade == "D") %>%
group_by(st_name) %>%
summarize(count = n())
# View summary
print(holc_grade_d_summary)
## # A tibble: 8 × 2
## st_name count
## <chr> <int>
## 1 Austin 29
## 2 Dallas 41
## 3 El Paso 28
## 4 Fort Worth 74
## 5 Galveston 6
## 6 Houston 42
## 7 San Antonio 49
## 8 Waco 4
# Bar plot of HOLC Grade D by Texas cities
ggplot(TX_HOLC, aes(x = st_name, fill = st_name)) +
geom_bar()
# Filter HOLC data for San Antonio
SA <- HOLC %>%
filter(st_name == "San Antonio") %>%
select(geoid, holc_area, holc_grade)
# Convert geoid to character
SA$GEOID <- as.character(SA$GEOID)
# Census data for Bexar County, TX
var <- c(poptotal='B03002_001E', hispanic='B03002_012E', white='B03002_003E', black='B03002_004E', asian='B03002_006E', poptotal2='B17017_001E', poverty='B17017_002E')
ct <- get_acs(geography = "tract", variables = var, state = "TX", county = "Bexar", year = 2021, output = "wide", geometry = TRUE)
## Getting data from the 2017-2021 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
# Calculate percentage of black population and poverty
ct$black_percentage <- ct$black / ct$poptotal
ct$poverty_percentage <- ct$poverty / ct$poptotal2
# Merge census and HOLC data
merging_data <- merge(ct, HOLC, by.x = "GEOID", by.y = "geoid")
# Average black population percentage by HOLC grade
average_of_black_percentage_by_grade <- merging_data %>%
group_by(holc_grade) %>%
summarize(avg_black_percentage = mean(black_percentage))
# Bar plot of average black population percentage by HOLC grade
ggplot(average_of_black_percentage_by_grade, aes(x = holc_grade, y = avg_black_percentage)) +
geom_bar(stat = "identity")
# Boxplot of HOLC area by grade
ggplot(merging_data, aes(x = holc_grade, y = holc_area)) +
geom_boxplot() +
theme_minimal()