options(repos = c(CRAN = "https://cloud.r-project.org"))
# Load necessary libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
install.packages("tidycensus")
## 
## The downloaded binary packages are in
##  /var/folders/4b/1p0sp0rs33xg19wq8_5j5lm40000gn/T//RtmpblDu57/downloaded_packages
library(tidycensus)


# Load the CA_MSA dataset
df <- fread("/Users/jayjay/Downloads/CA_MSA.csv")
names(df)
##  [1] "GEOID.x"   "tpop"      "tpopr"     "pnhwhite"  "pnhasn"    "pnhblk"   
##  [7] "phisp"     "nhwhite"   "nhasn"     "nhblk"     "hisp"      "nonwhite" 
## [13] "pnonwhite" "oth"       "poth"      "CSAFP"     "CBSAFP"    "GEOID.y"  
## [19] "NAME"      "NAMELSAD"  "LSAD"      "geometry"  "nhwhitec"  "nonwhitec"
## [25] "nhasnc"    "nhblkc"    "othc"      "hispc"     "tpoprc"    "wb"       
## [31] "wa"        "wh"
# Calculate total population by metropolitan area
total_population <- df %>%
  group_by(NAME) %>%
  summarize(total_pop = sum(tpop))

# Calculate the racial dissimilarity between Asians and Whites
df$asian_white_diff <- abs(df$nhasn / df$nhasnc - df$nhwhite / df$nhwhitec)

# Group by metropolitan area and calculate the dissimilarity index
dissimilarity_results <- df %>%
  group_by(NAME) %>%
  summarize(AWD = 0.5 * sum(asian_white_diff, na.rm = TRUE))

# View the dissimilarity results
print(dissimilarity_results)
## # A tibble: 6 × 2
##   NAME                                   AWD
##   <chr>                                <dbl>
## 1 Fresno, CA                           0.378
## 2 Los Angeles-Long Beach-Anaheim, CA   0.476
## 3 Riverside-San Bernardino-Ontario, CA 0.421
## 4 San Diego-Chula Vista-Carlsbad, CA   0.480
## 5 San Francisco-Oakland-Berkeley, CA   0.455
## 6 San Jose-Sunnyvale-Santa Clara, CA   0.428
# Load HOLC dataset
HOLC <- fread("/Users/jayjay/Downloads/holc_census_tracts.csv")
names(HOLC)
##  [1] "holc_id"    "holc_grade" "id"         "polygon_id" "sheets"    
##  [6] "name"       "municipali" "holc_area"  "year"       "msamd"     
## [11] "state_code" "county_cod" "census_tra" "geoid"      "tract_prop"
## [16] "holc_prop"  "map_id"     "st_name"    "state"
# Filter HOLC data for San Antonio
san_antonio_holc <- HOLC %>%
  filter(st_name == "San Antonio")

# Check the data
head(san_antonio_holc)
##    holc_id holc_grade    id polygon_id sheets   name municipali holc_area  year
##     <char>     <char> <int>      <int>  <int> <char>     <lgcl>     <num> <int>
## 1:                  C   160       8046      0                NA 0.6275395  2019
## 2:                  B   160       8044      0                NA 4.8224292  2019
## 3:                  A   160       8035      0                NA 2.7200901  2019
## 4:                  B   160       8038      0                NA 0.9772639  2019
## 5:                  B   160       8038      0                NA 0.9772639  2019
## 6:                  B   160       8039      0                NA 2.1680394  2019
##    msamd state_code county_cod census_tra       geoid   tract_prop    holc_prop
##    <int>      <int>      <int>      <int>       <i64>        <num>        <num>
## 1: 41700         48         29     190604 48029190604 1.129017e-04 1.131153e-04
## 2: 41700         48         29     140100 48029140100 3.453003e-03 2.728566e-04
## 3: 41700         48         29     170600 48029170600 3.639832e-03 1.003530e-03
## 4: 41700         48         29     150100 48029150100 3.711024e-05 3.709674e-05
## 5: 41700         48         29     160901 48029160901 7.666279e-04 7.522779e-04
## 6: 41700         48         29     130300 48029130300 1.137713e-02 2.450554e-03
##    map_id     st_name  state
##     <num>      <char> <char>
## 1:    160 San Antonio     TX
## 2:    160 San Antonio     TX
## 3:    160 San Antonio     TX
## 4:    160 San Antonio     TX
## 5:    160 San Antonio     TX
## 6:    160 San Antonio     TX
# Calculate average HOLC area by state
average_holc_area <- HOLC %>%
  group_by(state) %>%
  summarize(avg_area = mean(holc_area, na.rm = TRUE))

# View the results
print(average_holc_area)
## # A tibble: 38 × 2
##    state avg_area
##    <chr>    <dbl>
##  1 AL       1.43 
##  2 AR       0.626
##  3 AZ       0.773
##  4 CA       1.30 
##  5 CO       0.704
##  6 CT       0.763
##  7 FL       1.38 
##  8 GA       0.507
##  9 IA       1.99 
## 10 IL       0.575
## # ℹ 28 more rows
# Boxplot of HOLC area distribution by state
library(ggplot2)

ggplot(HOLC, aes(x = state, y = holc_area)) +
  geom_boxplot() +
  labs(title = "HOLC Area Distribution by State", x = "State", y = "HOLC Area") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

# Filter HOLC data for Texas (Grade D)
TX_HOLC <- HOLC %>%
  filter(state == "TX", holc_grade == "D")

# Summarize the count of HOLC Grade D in Texas cities
holc_grade_d_summary <- HOLC %>%
  filter(state == "TX", holc_grade == "D") %>%
  group_by(st_name) %>%
  summarize(count = n())

# View summary
print(holc_grade_d_summary)
## # A tibble: 8 × 2
##   st_name     count
##   <chr>       <int>
## 1 Austin         29
## 2 Dallas         41
## 3 El Paso        28
## 4 Fort Worth     74
## 5 Galveston       6
## 6 Houston        42
## 7 San Antonio    49
## 8 Waco            4
# Bar plot of HOLC Grade D by Texas cities
ggplot(TX_HOLC, aes(x = st_name, fill = st_name)) + 
  geom_bar()

# Filter HOLC data for San Antonio
SA <- HOLC %>%
  filter(st_name == "San Antonio") %>%
  select(geoid, holc_area, holc_grade)

# Convert geoid to character
SA$GEOID <- as.character(SA$GEOID)

# Census data for Bexar County, TX
var <- c(poptotal='B03002_001E', hispanic='B03002_012E', white='B03002_003E', black='B03002_004E', asian='B03002_006E', poptotal2='B17017_001E', poverty='B17017_002E')

ct <- get_acs(geography = "tract", variables = var, state = "TX", county = "Bexar", year = 2021, output = "wide", geometry = TRUE)
## Getting data from the 2017-2021 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
##   |                                                                              |                                                                      |   0%  |                                                                              |=                                                                     |   1%  |                                                                              |=                                                                     |   2%  |                                                                              |==                                                                    |   2%  |                                                                              |==                                                                    |   3%  |                                                                              |===                                                                   |   4%  |                                                                              |===                                                                   |   5%  |                                                                              |====                                                                  |   5%  |                                                                              |====                                                                  |   6%  |                                                                              |=====                                                                 |   7%  |                                                                              |=====                                                                 |   8%  |                                                                              |======                                                                |   8%  |                                                                              |======                                                                |   9%  |                                                                              |=======                                                               |   9%  |                                                                              |=======                                                               |  10%  |                                                                              |=======                                                               |  11%  |                                                                              |========                                                              |  11%  |                                                                              |========                                                              |  12%  |                                                                              |=========                                                             |  12%  |                                                                              |=========                                                             |  13%  |                                                                              |==========                                                            |  14%  |                                                                              |==========                                                            |  15%  |                                                                              |===========                                                           |  15%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  17%  |                                                                              |============                                                          |  18%  |                                                                              |=============                                                         |  18%  |                                                                              |=============                                                         |  19%  |                                                                              |==============                                                        |  19%  |                                                                              |==============                                                        |  20%  |                                                                              |==============                                                        |  21%  |                                                                              |===============                                                       |  21%  |                                                                              |===============                                                       |  22%  |                                                                              |================                                                      |  22%  |                                                                              |================                                                      |  23%  |                                                                              |=================                                                     |  24%  |                                                                              |=================                                                     |  25%  |                                                                              |==================                                                    |  25%  |                                                                              |==================                                                    |  26%  |                                                                              |===================                                                   |  27%  |                                                                              |===================                                                   |  28%  |                                                                              |====================                                                  |  28%  |                                                                              |====================                                                  |  29%  |                                                                              |=====================                                                 |  29%  |                                                                              |=====================                                                 |  30%  |                                                                              |=====================                                                 |  31%  |                                                                              |======================                                                |  31%  |                                                                              |======================                                                |  32%  |                                                                              |=======================                                               |  32%  |                                                                              |=======================                                               |  33%  |                                                                              |========================                                              |  34%  |                                                                              |========================                                              |  35%  |                                                                              |=========================                                             |  35%  |                                                                              |=========================                                             |  36%  |                                                                              |==========================                                            |  37%  |                                                                              |==========================                                            |  38%  |                                                                              |===========================                                           |  38%  |                                                                              |===========================                                           |  39%  |                                                                              |============================                                          |  40%  |                                                                              |============================                                          |  41%  |                                                                              |=============================                                         |  41%  |                                                                              |=============================                                         |  42%  |                                                                              |==============================                                        |  42%  |                                                                              |==============================                                        |  43%  |                                                                              |==============================                                        |  44%  |                                                                              |===============================                                       |  44%  |                                                                              |===============================                                       |  45%  |                                                                              |================================                                      |  45%  |                                                                              |================================                                      |  46%  |                                                                              |=================================                                     |  46%  |                                                                              |=================================                                     |  47%  |                                                                              |=================================                                     |  48%  |                                                                              |==================================                                    |  48%  |                                                                              |==================================                                    |  49%  |                                                                              |===================================                                   |  49%  |                                                                              |===================================                                   |  50%  |                                                                              |====================================                                  |  51%  |                                                                              |====================================                                  |  52%  |                                                                              |=====================================                                 |  52%  |                                                                              |=====================================                                 |  53%  |                                                                              |======================================                                |  54%  |                                                                              |======================================                                |  55%  |                                                                              |=======================================                               |  55%  |                                                                              |=======================================                               |  56%  |                                                                              |========================================                              |  57%  |                                                                              |=========================================                             |  58%  |                                                                              |=========================================                             |  59%  |                                                                              |==========================================                            |  59%  |                                                                              |==========================================                            |  60%  |                                                                              |==========================================                            |  61%  |                                                                              |===========================================                           |  61%  |                                                                              |===========================================                           |  62%  |                                                                              |============================================                          |  62%  |                                                                              |============================================                          |  63%  |                                                                              |=============================================                         |  64%  |                                                                              |=============================================                         |  65%  |                                                                              |==============================================                        |  65%  |                                                                              |==============================================                        |  66%  |                                                                              |===============================================                       |  67%  |                                                                              |===============================================                       |  68%  |                                                                              |================================================                      |  68%  |                                                                              |================================================                      |  69%  |                                                                              |=================================================                     |  70%  |                                                                              |=================================================                     |  71%  |                                                                              |==================================================                    |  71%  |                                                                              |==================================================                    |  72%  |                                                                              |===================================================                   |  72%  |                                                                              |===================================================                   |  73%  |                                                                              |===================================================                   |  74%  |                                                                              |====================================================                  |  74%  |                                                                              |====================================================                  |  75%  |                                                                              |=====================================================                 |  75%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  76%  |                                                                              |======================================================                |  77%  |                                                                              |======================================================                |  78%  |                                                                              |=======================================================               |  78%  |                                                                              |=======================================================               |  79%  |                                                                              |========================================================              |  79%  |                                                                              |========================================================              |  80%  |                                                                              |=========================================================             |  81%  |                                                                              |=========================================================             |  82%  |                                                                              |==========================================================            |  82%  |                                                                              |==========================================================            |  83%  |                                                                              |===========================================================           |  84%  |                                                                              |===========================================================           |  85%  |                                                                              |============================================================          |  85%  |                                                                              |============================================================          |  86%  |                                                                              |=============================================================         |  87%  |                                                                              |=============================================================         |  88%  |                                                                              |==============================================================        |  88%  |                                                                              |==============================================================        |  89%  |                                                                              |===============================================================       |  90%  |                                                                              |===============================================================       |  91%  |                                                                              |================================================================      |  91%  |                                                                              |================================================================      |  92%  |                                                                              |=================================================================     |  92%  |                                                                              |=================================================================     |  93%  |                                                                              |==================================================================    |  94%  |                                                                              |==================================================================    |  95%  |                                                                              |===================================================================   |  95%  |                                                                              |===================================================================   |  96%  |                                                                              |====================================================================  |  97%  |                                                                              |====================================================================  |  98%  |                                                                              |===================================================================== |  98%  |                                                                              |===================================================================== |  99%  |                                                                              |======================================================================|  99%  |                                                                              |======================================================================| 100%
# Calculate percentage of black population and poverty
ct$black_percentage <- ct$black / ct$poptotal
ct$poverty_percentage <- ct$poverty / ct$poptotal2

# Merge census and HOLC data
merging_data <- merge(ct, HOLC, by.x = "GEOID", by.y = "geoid")

# Average black population percentage by HOLC grade
average_of_black_percentage_by_grade <- merging_data %>%
  group_by(holc_grade) %>%
  summarize(avg_black_percentage = mean(black_percentage))

# Bar plot of average black population percentage by HOLC grade
ggplot(average_of_black_percentage_by_grade, aes(x = holc_grade, y = avg_black_percentage)) + 
  geom_bar(stat = "identity")

# Boxplot of HOLC area by grade
ggplot(merging_data, aes(x = holc_grade, y = holc_area)) + 
  geom_boxplot() + 
  theme_minimal()