library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
CA_MSA <- read.csv("Desktop/Methods /CA_MSA.csv")
GG<-CA_MSA
total_population <- GG %>%
group_by(NAME) %>%
summarize(total_pop = sum(tpop))
GG$asian_white_diff <- abs(GG$nhasn / GG$nhasnc - GG$nhwhite / GG$nhwhitec)
dissimilarity_results <- GG %>%
group_by(NAME) %>%
summarize(AWD = 0.5 * sum(asian_white_diff, na.rm = TRUE))
print(dissimilarity_results)
## # A tibble: 6 × 2
## NAME AWD
## <chr> <dbl>
## 1 Fresno, CA 0.378
## 2 Los Angeles-Long Beach-Anaheim, CA 0.476
## 3 Riverside-San Bernardino-Ontario, CA 0.421
## 4 San Diego-Chula Vista-Carlsbad, CA 0.480
## 5 San Francisco-Oakland-Berkeley, CA 0.455
## 6 San Jose-Sunnyvale-Santa Clara, CA 0.428
holc_census_tracts <- read.csv("Desktop/Methods /holc_census_tracts.csv")
GG2<-holc_census_tracts
average_holc_area <- GG2 %>%
group_by(state) %>%
summarize(avg_area = mean(holc_area, na.rm = TRUE))
print(average_holc_area)
## # A tibble: 38 × 2
## state avg_area
## <chr> <dbl>
## 1 AL 1.43
## 2 AR 0.626
## 3 AZ 0.773
## 4 CA 1.30
## 5 CO 0.704
## 6 CT 0.763
## 7 FL 1.38
## 8 GA 0.507
## 9 IA 1.99
## 10 IL 0.575
## # ℹ 28 more rows
library(ggplot2)
ggplot(GG2, aes(x = state, y = holc_area)) +
geom_boxplot() +
labs(title = "HOLC Area Distribution by State", x = "State", y = "HOLC Area")

TX_HOLC <- GG2 %>%
filter(state == "TX" & holc_grade == "D") %>%
group_by(st_name) %>%
summarize(grade_D_count = n())
print(TX_HOLC)
## # A tibble: 8 × 2
## st_name grade_D_count
## <chr> <int>
## 1 Austin 29
## 2 Dallas 41
## 3 El Paso 28
## 4 Fort Worth 74
## 5 Galveston 6
## 6 Houston 42
## 7 San Antonio 49
## 8 Waco 4
library(tidycensus)
vars <- c(poptotal = 'B03002_001E', black = 'B03002_004E', poverty = 'B17017_002E')
sa_data <- get_acs( geography = "tract", state = "TX", county = "Bexar",
year = 2021, output = "wide", variables = vars,)
## Getting data from the 2017-2021 5-year ACS
sa_data <- sa_data %>%
mutate(black_pct = black / poptotal * 100,poverty_pct = poverty / poptotal * 100)
print(sa_data)
## # A tibble: 375 × 10
## GEOID NAME poptotal B03002_001M black B03002_004M poverty B17017_002M
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 48029110100 Censu… 2934 565 58 47 400 147
## 2 48029110300 Censu… 2930 652 49 48 342 132
## 3 48029110500 Censu… 2201 309 152 111 638 131
## 4 48029110600 Censu… 5384 1620 862 490 683 177
## 5 48029110700 Censu… 982 246 24 29 254 88
## 6 48029111000 Censu… 2709 404 86 83 241 74
## 7 48029111100 Censu… 3717 518 125 61 300 94
## 8 48029120100 Censu… 4655 751 609 216 69 40
## 9 48029120301 Censu… 3294 480 46 67 93 80
## 10 48029120302 Censu… 4066 478 8 15 121 101
## # ℹ 365 more rows
## # ℹ 2 more variables: black_pct <dbl>, poverty_pct <dbl>
SA_HOLC <- GG2 %>%
filter(st_name == "San Antonio")
SA_HOLC$geoid <- as.character(SA_HOLC$geoid)
sa_data$GEOID <- as.character(sa_data$GEOID)
SA_HOLC_demo <- merge(SA_HOLC, sa_data, by.x = "geoid", by.y = "GEOID")
avg_black_pct <- SA_HOLC_demo %>%
group_by(holc_grade) %>%
summarize(avg_black_pct = mean(black_pct, na.rm = TRUE))
ggplot(avg_black_pct, aes(x = holc_grade, y = avg_black_pct, fill = holc_grade)) +
geom_bar(stat = "identity") +
labs(title = "Average Black Percentage by HOLC Grade in San Antonio", x = "HOLC Grade", y = "Average Black Percentage")

ggplot(SA_HOLC, aes(x = holc_grade, y = holc_area, fill = holc_grade)) +
geom_boxplot() +
labs(title = "HOLC Area Distribution by Grade in San Antonio", x = "HOLC Grade", y = "HOLC Area")
