The article that was used: https://projects.fivethirtyeight.com/redlining/
The article I chose is about redlining which is practice that was known to be discriminatory. It classified areas at different levels of risk in order to determine if the neighborhoods were “bad” to investments. What I am going to attempt to do is show the different population diversity based on the HOLC grades. In order to determine if there was a trend toward more diverse areas being at a lower Score “D” compared to the others “A”, “B”, “C”.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(ggplot2)
pollist = read.table(file="https://raw.githubusercontent.com/fivethirtyeight/data/master/redlining/metro-grades.csv",comment.char = "", header=TRUE,sep=",")
head(pollist)
## metro_area holc_grade white_pop black_pop hisp_pop asian_pop
## 1 Akron, OH A 24702 8624 956 688
## 2 Akron, OH B 41531 16499 2208 3367
## 3 Akron, OH C 73105 22847 3149 6291
## 4 Akron, OH D 6179 6921 567 455
## 5 Albany-Schenectady-Troy, NY A 16989 1818 1317 1998
## 6 Albany-Schenectady-Troy, NY B 26644 7094 4334 2509
## other_pop total_pop pct_white pct_black pct_hisp pct_asian pct_other lq_white
## 1 1993 36963 66.83 23.33 2.59 1.86 5.39 0.94
## 2 4211 67816 61.24 24.33 3.26 4.96 6.21 0.86
## 3 7302 112694 64.87 20.27 2.79 5.58 6.48 0.91
## 4 1022 15144 40.80 45.70 3.75 3.00 6.75 0.57
## 5 1182 23303 72.91 7.80 5.65 8.57 5.07 1.09
## 6 4650 45230 58.91 15.68 9.58 5.55 10.28 0.88
## lq_black lq_hisp lq_asian lq_other surr_area_white_pop surr_area_black_pop
## 1 1.41 1.00 0.46 0.97 304399 70692
## 2 1.47 1.26 1.23 1.11 304399 70692
## 3 1.23 1.08 1.38 1.16 304399 70692
## 4 2.76 1.45 0.74 1.21 304399 70692
## 5 0.66 0.77 1.21 0.72 387016 68371
## 6 1.33 1.30 0.78 1.47 387016 68371
## surr_area_hisp_pop surr_area_asian_pop surr_area_other_pop
## 1 11037 17295 23839
## 2 11037 17295 23839
## 3 11037 17295 23839
## 4 11037 17295 23839
## 5 42699 41112 40596
## 6 42699 41112 40596
## surr_area_pct_white surr_area_pct_black surr_area_pct_hisp
## 1 71.24 16.55 2.58
## 2 71.24 16.55 2.58
## 3 71.24 16.55 2.58
## 4 71.24 16.55 2.58
## 5 66.75 11.79 7.36
## 6 66.75 11.79 7.36
## surr_area_pct_asian surr_area_pct_other
## 1 4.05 5.58
## 2 4.05 5.58
## 3 4.05 5.58
## 4 4.05 5.58
## 5 7.09 7.00
## 6 7.09 7.00
pollist_3 <- subset(pollist, metro_area != 'Scranton--Wilkes-Barre, PA') #Scranton--Wilkes-Barre, PA only has Grades A,B,D no C
pollist_2 <- pollist_3[,-14:-28]
#Example of replacing values
#pollist_1$Grade <- str_replace(string=pollist_1$Grade, pattern="A", replacement="Best"
pollist_1 <- pollist_2 %>% rename(Area = metro_area, Total = total_pop, Grade = holc_grade)
head(pollist_1)
## Area Grade white_pop black_pop hisp_pop asian_pop
## 1 Akron, OH A 24702 8624 956 688
## 2 Akron, OH B 41531 16499 2208 3367
## 3 Akron, OH C 73105 22847 3149 6291
## 4 Akron, OH D 6179 6921 567 455
## 5 Albany-Schenectady-Troy, NY A 16989 1818 1317 1998
## 6 Albany-Schenectady-Troy, NY B 26644 7094 4334 2509
## other_pop Total pct_white pct_black pct_hisp pct_asian pct_other
## 1 1993 36963 66.83 23.33 2.59 1.86 5.39
## 2 4211 67816 61.24 24.33 3.26 4.96 6.21
## 3 7302 112694 64.87 20.27 2.79 5.58 6.48
## 4 1022 15144 40.80 45.70 3.75 3.00 6.75
## 5 1182 23303 72.91 7.80 5.65 8.57 5.07
## 6 4650 45230 58.91 15.68 9.58 5.55 10.28
A <- subset(pollist_1,Grade == 'A')
B <- subset(pollist_1,Grade == 'B')
C <- subset(pollist_1,Grade == 'C')
D <- subset(pollist_1,Grade == 'D')
A_Sum <- data.frame(A$pct_white,
A$pct_black,
A$pct_hisp,
A$pct_asian,
A$pct_other)
B_Sum <- data.frame(B$pct_white,
B$pct_black,
B$pct_hisp,
B$pct_asian,
B$pct_other)
C_Sum <- data.frame(C$pct_white,
C$pct_black,
C$pct_hisp,
C$pct_asian,
C$pct_other)
D_Sum <- data.frame(D$pct_white,
D$pct_black,
D$pct_hisp,
D$pct_asian,
D$pct_other)
#Population Percentage Diversity for HOLC Grade A
summary(A_Sum)
## A.pct_white A.pct_black A.pct_hisp A.pct_asian
## Min. :11.29 Min. : 0.310 Min. : 1.540 Min. : 0.31
## 1st Qu.:68.69 1st Qu.: 2.360 1st Qu.: 3.760 1st Qu.: 1.23
## Median :77.79 Median : 5.080 Median : 5.260 Median : 1.94
## Mean :73.86 Mean : 8.873 Mean : 8.858 Mean : 3.11
## 3rd Qu.:83.06 3rd Qu.:11.560 3rd Qu.:10.120 3rd Qu.: 3.92
## Max. :94.12 Max. :65.930 Max. :74.850 Max. :17.70
## A.pct_other
## Min. : 1.780
## 1st Qu.: 4.110
## Median : 5.100
## Mean : 5.294
## 3rd Qu.: 6.140
## Max. :13.280
#Population Percentage Diversity for HOLC Grade B
summary(B_Sum)
## B.pct_white B.pct_black B.pct_hisp B.pct_asian
## Min. : 6.63 Min. : 1.19 Min. : 1.60 Min. : 0.180
## 1st Qu.:49.95 1st Qu.: 6.62 1st Qu.: 4.78 1st Qu.: 1.070
## Median :62.79 Median :12.51 Median : 7.76 Median : 2.120
## Mean :59.81 Mean :16.68 Mean :14.31 Mean : 3.315
## 3rd Qu.:71.96 3rd Qu.:23.73 3rd Qu.:17.55 3rd Qu.: 4.330
## Max. :90.97 Max. :76.27 Max. :90.77 Max. :31.390
## B.pct_other
## Min. : 1.040
## 1st Qu.: 4.620
## Median : 5.600
## Mean : 5.885
## 3rd Qu.: 7.020
## Max. :15.460
#Population Percentage Diversity for HOLC Grade C
summary(C_Sum)
## C.pct_white C.pct_black C.pct_hisp C.pct_asian
## Min. : 6.99 Min. : 1.85 Min. : 1.83 Min. : 0.140
## 1st Qu.:33.69 1st Qu.: 8.70 1st Qu.: 6.33 1st Qu.: 0.950
## Median :48.43 Median :19.96 Median :11.26 Median : 2.220
## Mean :48.65 Mean :23.01 Mean :18.89 Mean : 3.427
## 3rd Qu.:63.34 3rd Qu.:31.32 3rd Qu.:27.13 3rd Qu.: 3.750
## Max. :87.65 Max. :83.36 Max. :88.33 Max. :24.380
## C.pct_other
## Min. : 1.190
## 1st Qu.: 4.680
## Median : 5.790
## Mean : 6.025
## 3rd Qu.: 7.280
## Max. :15.220
#Population Percentage Diversity for HOLC Grade D
summary(D_Sum)
## D.pct_white D.pct_black D.pct_hisp D.pct_asian
## Min. : 3.77 Min. : 1.21 Min. : 1.10 Min. : 0.090
## 1st Qu.:22.46 1st Qu.:11.99 1st Qu.: 5.98 1st Qu.: 0.630
## Median :39.81 Median :28.64 Median :11.74 Median : 1.720
## Mean :39.05 Mean :31.65 Mean :20.18 Mean : 3.118
## 3rd Qu.:52.94 3rd Qu.:43.41 3rd Qu.:28.36 3rd Qu.: 3.790
## Max. :86.14 Max. :85.40 Max. :93.90 Max. :23.710
## D.pct_other
## Min. : 0.880
## 1st Qu.: 4.440
## Median : 5.610
## Mean : 6.001
## 3rd Qu.: 7.390
## Max. :17.730
#Comparison Side by Side
ggplot(pollist_1,aes(Grade,pct_white)) + geom_boxplot() + labs(y = 'Percentage of White',title = "Comparing the Percentage Of White with HOLC Grades") + theme(plot.title = element_text(hjust = 0.5))
ggplot(pollist_1,aes(Grade,pct_black)) + geom_boxplot() + labs(y = 'Percentage of African American',title = "Comparing the Percentage Of African Americans with HOLC Grades") + theme(plot.title = element_text(hjust = 0.5))
ggplot(pollist_1,aes(Grade,pct_hisp)) + geom_boxplot() + labs(y = 'Percentage of Hispanics/Latino',title = "Comparing the Percentage of Hispanics/Latino with HOLC Grades") + theme(plot.title = element_text(hjust = 0.5))
What was found is a generalize trend that from A to D there was an increasing percentage of Hispanics/Latinos and African American as the grade got lower. As it seems that lower grades were directly aligned with a lower white percentage. For the Decreasing of the Grades, the mean of percentage of white is 73.8, 59.8, 48.6, and 39.0. As for Hispanics/Latinos and African Americans were 8.8 and 8.8 for class A, 14.3 and 16.6 for class B, 18.8 and 23.0 for Class C, 28.3 and 31.6 for Class D respectively.