library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
dataframe <- fread("C:/Users/cruzs/Downloads/New folder/CA_MSA.csv")
#Calculate total population for each metropolitan areas in CA_MSA.csv using the chain approach (%>%)
dataframe$wb<- abs(dataframe$nhblk/dataframe$nhblkc-dataframe$nhwhite/dataframe$nhwhitec)
dataframe$wh<- abs(dataframe$hisp/dataframe$hispc-dataframe$nhwhite/dataframe$nhwhitec)
dataframe$wa<- abs(dataframe$nhasn/dataframe$nhasnc-dataframe$nhwhite/dataframe$nhwhitec)
# Calculate the asian-white dissimilarity index for each metropolitan areas in CA_MSA.csv
dataframe %>% group_by(NAME) %>% summarize(diss_w_a=sum(wa))
## # A tibble: 6 × 2
## NAME diss_w_a
## <chr> <dbl>
## 1 Fresno, CA 0.755
## 2 Los Angeles-Long Beach-Anaheim, CA 0.952
## 3 Riverside-San Bernardino-Ontario, CA 0.842
## 4 San Diego-Chula Vista-Carlsbad, CA 0.960
## 5 San Francisco-Oakland-Berkeley, CA 0.911
## 6 San Jose-Sunnyvale-Santa Clara, CA 0.855
# Calculate the average HOLC area for each state
HOLC <- fread("C:/Users/cruzs/Downloads/New folder/holc_census_tracts.csv")
avg_holc_by_state <- HOLC %>%
group_by(state) %>%
summarize(avg_holc_area = mean(holc_area))
# Make a boxplot that shows the data distribution of holc_area each state, x axis for 50 states, and y axis indicate the boxplot distribution
library(ggplot2)
ggplot(HOLC, aes(y = holc_area, x = state)) +
geom_boxplot(fill = "blue", color = "black") +
labs(title = "Distribution of HOLC Area by State",
y = "HOLC Area",
x = "50 States")
# Create a dataframe that summarizes the count of HOLC grade D each Texas city
TX_HOLC_D <- HOLC[HOLC$state=="TX" & HOLC$holc_grade=="D",]
# Use Census API to get the poverty and black percentage of each census tract in San Antonio
tx_san_antonio <- HOLC[HOLC$st_name=="San Antonio",]
tx_san_antonio <- tx_san_antonio[,c(2,14,18)]
tx_san_antonio$geoid <- as.character(tx_san_antonio$geoid)
names(tx_san_antonio)[2] <- "GEOID"
library(tidycensus)
census_api_key("92a7ca5b7ef852414eb88b8a3b33a4af98c5cd14", install = TRUE, overwrite = TRUE)
## Your original .Renviron will be backed up and stored in your R HOME directory if needed.
## Your API key has been stored in your .Renviron and can be accessed by Sys.getenv("CENSUS_API_KEY").
## To use now, restart R or run `readRenviron("~/.Renviron")`
## [1] "92a7ca5b7ef852414eb88b8a3b33a4af98c5cd14"
vars <- c(total_pop = 'B03002_001E',
black_pop = 'B03002_004E',
totalpop_poverty = 'B17017_001E',
poverty_pop = 'B17017_002E' )
st <- "TX"
ct <- "Bexar"
ct <- get_acs(geography = "tract", variables = vars, count = "Bexar",
state = "Texas", output= "wide", year = 2021, geometry = TRUE)
## Getting data from the 2017-2021 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 13% | |========== | 14% | |=========== | 16% | |============ | 17% | |============= | 18% | |============== | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |=================== | 27% | |==================== | 28% | |==================== | 29% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 46% | |================================= | 47% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================== | 83% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 93% | |================================================================== | 94% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
# Merge the census data with HOLC data (1'), and make a bar chart showing the average black percentage for different HOLC grades for all San Antonio tracts
ct$black_pct <-ct$black_pop/ct$total_pop
ct$poverty_pct <- ct$poverty_pop /ct$totalpop_poverty
blk_avg_holc <-merge(tx_san_antonio, ct, by = "GEOID")
ggplot(blk_avg_holc, aes(x = holc_grade, y = black_pop)) +
geom_bar(stat = "identity", fill = "lightblue",) +
labs(title = "Average Black Percentage by HOLC Grade in San Antonio",
y = "Average Black Percentage",
x = "HOLC Grade") +
theme_minimal()
# Make a boxplot showing the HOLC_area data structure (y axis) of each grade (x axis) for San Antonio
SA <- HOLC[HOLC$st_name=="San Antonio",]
SA <- SA
ggplot(SA, aes(x = holc_grade, y = holc_area)) +
geom_boxplot(fill = "navyblue", color = "black") +
labs(title = "Distribution of HOLC Area by HOLC Grade in San Antonio",
y = "HOLC Area",
x = "HOLC Grade")