library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
CA_MSA <- read.csv("Desktop/Methods /CA_MSA.csv")

GG<-CA_MSA
total_population <- GG %>%
  group_by(NAME) %>%
  summarize(total_pop = sum(tpop))  
GG$asian_white_diff <- abs(GG$nhasn / GG$nhasnc - GG$nhwhite / GG$nhwhitec)
dissimilarity_results <- GG %>%
  group_by(NAME) %>%
  summarize(AWD = 0.5 * sum(asian_white_diff, na.rm = TRUE))
print(dissimilarity_results)
## # A tibble: 6 × 2
##   NAME                                   AWD
##   <chr>                                <dbl>
## 1 Fresno, CA                           0.378
## 2 Los Angeles-Long Beach-Anaheim, CA   0.476
## 3 Riverside-San Bernardino-Ontario, CA 0.421
## 4 San Diego-Chula Vista-Carlsbad, CA   0.480
## 5 San Francisco-Oakland-Berkeley, CA   0.455
## 6 San Jose-Sunnyvale-Santa Clara, CA   0.428
holc_census_tracts <- read.csv("Desktop/Methods /holc_census_tracts.csv")
GG2<-holc_census_tracts
average_holc_area <- GG2 %>%
  group_by(state) %>%
  summarize(avg_area = mean(holc_area, na.rm = TRUE))
print(average_holc_area)
## # A tibble: 38 × 2
##    state avg_area
##    <chr>    <dbl>
##  1 AL       1.43 
##  2 AR       0.626
##  3 AZ       0.773
##  4 CA       1.30 
##  5 CO       0.704
##  6 CT       0.763
##  7 FL       1.38 
##  8 GA       0.507
##  9 IA       1.99 
## 10 IL       0.575
## # ℹ 28 more rows
library(ggplot2)
ggplot(GG2, aes(x = state, y = holc_area)) +
  geom_boxplot() +
  labs(title = "HOLC Area Distribution by State", x = "State", y = "HOLC Area")

TX_HOLC <- GG2 %>%
  filter(state == "TX" & holc_grade == "D") %>%
  group_by(st_name) %>%
  summarize(grade_D_count = n())
print(TX_HOLC)
## # A tibble: 8 × 2
##   st_name     grade_D_count
##   <chr>               <int>
## 1 Austin                 29
## 2 Dallas                 41
## 3 El Paso                28
## 4 Fort Worth             74
## 5 Galveston               6
## 6 Houston                42
## 7 San Antonio            49
## 8 Waco                    4
library(tidycensus)
vars <- c(poptotal = 'B03002_001E',  black = 'B03002_004E',  poverty = 'B17017_002E')
sa_data <- get_acs(  geography = "tract",   state = "TX",   county = "Bexar", 
                     year = 2021,   output = "wide", variables = vars,)
## Getting data from the 2017-2021 5-year ACS
sa_data <- sa_data %>%
  mutate(black_pct = black / poptotal * 100,poverty_pct = poverty / poptotal * 100)
print(sa_data)  
## # A tibble: 375 × 10
##    GEOID       NAME   poptotal B03002_001M black B03002_004M poverty B17017_002M
##    <chr>       <chr>     <dbl>       <dbl> <dbl>       <dbl>   <dbl>       <dbl>
##  1 48029110100 Censu…     2934         565    58          47     400         147
##  2 48029110300 Censu…     2930         652    49          48     342         132
##  3 48029110500 Censu…     2201         309   152         111     638         131
##  4 48029110600 Censu…     5384        1620   862         490     683         177
##  5 48029110700 Censu…      982         246    24          29     254          88
##  6 48029111000 Censu…     2709         404    86          83     241          74
##  7 48029111100 Censu…     3717         518   125          61     300          94
##  8 48029120100 Censu…     4655         751   609         216      69          40
##  9 48029120301 Censu…     3294         480    46          67      93          80
## 10 48029120302 Censu…     4066         478     8          15     121         101
## # ℹ 365 more rows
## # ℹ 2 more variables: black_pct <dbl>, poverty_pct <dbl>
SA_HOLC <- GG2 %>%
  filter(st_name == "San Antonio")
SA_HOLC$geoid <- as.character(SA_HOLC$geoid)
sa_data$GEOID <- as.character(sa_data$GEOID)
SA_HOLC_demo <- merge(SA_HOLC, sa_data, by.x = "geoid", by.y = "GEOID")
avg_black_pct <- SA_HOLC_demo %>%
  group_by(holc_grade) %>%
  summarize(avg_black_pct = mean(black_pct, na.rm = TRUE))
ggplot(avg_black_pct, aes(x = holc_grade, y = avg_black_pct, fill = holc_grade)) +
  geom_bar(stat = "identity") +
  labs(title = "Average Black Percentage by HOLC Grade in San Antonio", x = "HOLC Grade", y = "Average Black Percentage")

ggplot(SA_HOLC, aes(x = holc_grade, y = holc_area, fill = holc_grade)) +
  geom_boxplot() +
  labs(title = "HOLC Area Distribution by Grade in San Antonio", x = "HOLC Grade", y = "HOLC Area")