The article that was used: https://projects.fivethirtyeight.com/redlining/

Introduction

The article I chose is about redlining which is practice that was known to be discriminatory. It classified areas at different levels of risk in order to determine if the neighborhoods were “bad” to investments. What I am going to attempt to do is show the different population diversity based on the HOLC grades. In order to determine if there was a trend toward more diverse areas being at a lower Score “D” compared to the others “A”, “B”, “C”.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(ggplot2)

pollist = read.table(file="https://raw.githubusercontent.com/fivethirtyeight/data/master/redlining/metro-grades.csv",comment.char = "", header=TRUE,sep=",")
head(pollist)
##                    metro_area holc_grade white_pop black_pop hisp_pop asian_pop
## 1                   Akron, OH          A     24702      8624      956       688
## 2                   Akron, OH          B     41531     16499     2208      3367
## 3                   Akron, OH          C     73105     22847     3149      6291
## 4                   Akron, OH          D      6179      6921      567       455
## 5 Albany-Schenectady-Troy, NY          A     16989      1818     1317      1998
## 6 Albany-Schenectady-Troy, NY          B     26644      7094     4334      2509
##   other_pop total_pop pct_white pct_black pct_hisp pct_asian pct_other lq_white
## 1      1993     36963     66.83     23.33     2.59      1.86      5.39     0.94
## 2      4211     67816     61.24     24.33     3.26      4.96      6.21     0.86
## 3      7302    112694     64.87     20.27     2.79      5.58      6.48     0.91
## 4      1022     15144     40.80     45.70     3.75      3.00      6.75     0.57
## 5      1182     23303     72.91      7.80     5.65      8.57      5.07     1.09
## 6      4650     45230     58.91     15.68     9.58      5.55     10.28     0.88
##   lq_black lq_hisp lq_asian lq_other surr_area_white_pop surr_area_black_pop
## 1     1.41    1.00     0.46     0.97              304399               70692
## 2     1.47    1.26     1.23     1.11              304399               70692
## 3     1.23    1.08     1.38     1.16              304399               70692
## 4     2.76    1.45     0.74     1.21              304399               70692
## 5     0.66    0.77     1.21     0.72              387016               68371
## 6     1.33    1.30     0.78     1.47              387016               68371
##   surr_area_hisp_pop surr_area_asian_pop surr_area_other_pop
## 1              11037               17295               23839
## 2              11037               17295               23839
## 3              11037               17295               23839
## 4              11037               17295               23839
## 5              42699               41112               40596
## 6              42699               41112               40596
##   surr_area_pct_white surr_area_pct_black surr_area_pct_hisp
## 1               71.24               16.55               2.58
## 2               71.24               16.55               2.58
## 3               71.24               16.55               2.58
## 4               71.24               16.55               2.58
## 5               66.75               11.79               7.36
## 6               66.75               11.79               7.36
##   surr_area_pct_asian surr_area_pct_other
## 1                4.05                5.58
## 2                4.05                5.58
## 3                4.05                5.58
## 4                4.05                5.58
## 5                7.09                7.00
## 6                7.09                7.00
pollist_3 <- subset(pollist, metro_area != 'Scranton--Wilkes-Barre, PA') #Scranton--Wilkes-Barre, PA only has Grades A,B,D no C
pollist_2 <- pollist_3[,-14:-28]

#Example of replacing values 
#pollist_1$Grade <- str_replace(string=pollist_1$Grade, pattern="A", replacement="Best"

pollist_1 <- pollist_2  %>% rename(Area = metro_area, Total = total_pop, Grade = holc_grade)
head(pollist_1)
##                          Area Grade white_pop black_pop hisp_pop asian_pop
## 1                   Akron, OH     A     24702      8624      956       688
## 2                   Akron, OH     B     41531     16499     2208      3367
## 3                   Akron, OH     C     73105     22847     3149      6291
## 4                   Akron, OH     D      6179      6921      567       455
## 5 Albany-Schenectady-Troy, NY     A     16989      1818     1317      1998
## 6 Albany-Schenectady-Troy, NY     B     26644      7094     4334      2509
##   other_pop  Total pct_white pct_black pct_hisp pct_asian pct_other
## 1      1993  36963     66.83     23.33     2.59      1.86      5.39
## 2      4211  67816     61.24     24.33     3.26      4.96      6.21
## 3      7302 112694     64.87     20.27     2.79      5.58      6.48
## 4      1022  15144     40.80     45.70     3.75      3.00      6.75
## 5      1182  23303     72.91      7.80     5.65      8.57      5.07
## 6      4650  45230     58.91     15.68     9.58      5.55     10.28
A <- subset(pollist_1,Grade == 'A')
B <- subset(pollist_1,Grade == 'B')
C <- subset(pollist_1,Grade == 'C')
D <- subset(pollist_1,Grade == 'D')

A_Sum <- data.frame(A$pct_white,
                    A$pct_black,
                    A$pct_hisp,
                    A$pct_asian,
                    A$pct_other)

B_Sum <- data.frame(B$pct_white,
                    B$pct_black,
                    B$pct_hisp,
                    B$pct_asian,
                    B$pct_other)

C_Sum <- data.frame(C$pct_white,
                    C$pct_black,
                    C$pct_hisp,
                    C$pct_asian,
                    C$pct_other)

D_Sum <- data.frame(D$pct_white,
                    D$pct_black,
                    D$pct_hisp,
                    D$pct_asian,
                    D$pct_other)

#Population Percentage Diversity for HOLC Grade A
summary(A_Sum)
##   A.pct_white     A.pct_black       A.pct_hisp      A.pct_asian   
##  Min.   :11.29   Min.   : 0.310   Min.   : 1.540   Min.   : 0.31  
##  1st Qu.:68.69   1st Qu.: 2.360   1st Qu.: 3.760   1st Qu.: 1.23  
##  Median :77.79   Median : 5.080   Median : 5.260   Median : 1.94  
##  Mean   :73.86   Mean   : 8.873   Mean   : 8.858   Mean   : 3.11  
##  3rd Qu.:83.06   3rd Qu.:11.560   3rd Qu.:10.120   3rd Qu.: 3.92  
##  Max.   :94.12   Max.   :65.930   Max.   :74.850   Max.   :17.70  
##   A.pct_other    
##  Min.   : 1.780  
##  1st Qu.: 4.110  
##  Median : 5.100  
##  Mean   : 5.294  
##  3rd Qu.: 6.140  
##  Max.   :13.280
#Population Percentage Diversity for HOLC Grade B
summary(B_Sum)
##   B.pct_white     B.pct_black      B.pct_hisp     B.pct_asian    
##  Min.   : 6.63   Min.   : 1.19   Min.   : 1.60   Min.   : 0.180  
##  1st Qu.:49.95   1st Qu.: 6.62   1st Qu.: 4.78   1st Qu.: 1.070  
##  Median :62.79   Median :12.51   Median : 7.76   Median : 2.120  
##  Mean   :59.81   Mean   :16.68   Mean   :14.31   Mean   : 3.315  
##  3rd Qu.:71.96   3rd Qu.:23.73   3rd Qu.:17.55   3rd Qu.: 4.330  
##  Max.   :90.97   Max.   :76.27   Max.   :90.77   Max.   :31.390  
##   B.pct_other    
##  Min.   : 1.040  
##  1st Qu.: 4.620  
##  Median : 5.600  
##  Mean   : 5.885  
##  3rd Qu.: 7.020  
##  Max.   :15.460
#Population Percentage Diversity for HOLC Grade C
summary(C_Sum)
##   C.pct_white     C.pct_black      C.pct_hisp     C.pct_asian    
##  Min.   : 6.99   Min.   : 1.85   Min.   : 1.83   Min.   : 0.140  
##  1st Qu.:33.69   1st Qu.: 8.70   1st Qu.: 6.33   1st Qu.: 0.950  
##  Median :48.43   Median :19.96   Median :11.26   Median : 2.220  
##  Mean   :48.65   Mean   :23.01   Mean   :18.89   Mean   : 3.427  
##  3rd Qu.:63.34   3rd Qu.:31.32   3rd Qu.:27.13   3rd Qu.: 3.750  
##  Max.   :87.65   Max.   :83.36   Max.   :88.33   Max.   :24.380  
##   C.pct_other    
##  Min.   : 1.190  
##  1st Qu.: 4.680  
##  Median : 5.790  
##  Mean   : 6.025  
##  3rd Qu.: 7.280  
##  Max.   :15.220
#Population Percentage Diversity for HOLC Grade D
summary(D_Sum)
##   D.pct_white     D.pct_black      D.pct_hisp     D.pct_asian    
##  Min.   : 3.77   Min.   : 1.21   Min.   : 1.10   Min.   : 0.090  
##  1st Qu.:22.46   1st Qu.:11.99   1st Qu.: 5.98   1st Qu.: 0.630  
##  Median :39.81   Median :28.64   Median :11.74   Median : 1.720  
##  Mean   :39.05   Mean   :31.65   Mean   :20.18   Mean   : 3.118  
##  3rd Qu.:52.94   3rd Qu.:43.41   3rd Qu.:28.36   3rd Qu.: 3.790  
##  Max.   :86.14   Max.   :85.40   Max.   :93.90   Max.   :23.710  
##   D.pct_other    
##  Min.   : 0.880  
##  1st Qu.: 4.440  
##  Median : 5.610  
##  Mean   : 6.001  
##  3rd Qu.: 7.390  
##  Max.   :17.730
#Comparison Side by Side
ggplot(pollist_1,aes(Grade,pct_white)) + geom_boxplot() + labs(y = 'Percentage of White',title = "Comparing the Percentage Of White with HOLC Grades") +   theme(plot.title = element_text(hjust = 0.5))

ggplot(pollist_1,aes(Grade,pct_black)) + geom_boxplot() + labs(y = 'Percentage of African American',title = "Comparing the Percentage Of African Americans with HOLC Grades") +   theme(plot.title = element_text(hjust = 0.5))

ggplot(pollist_1,aes(Grade,pct_hisp)) + geom_boxplot() + labs(y = 'Percentage of Hispanics/Latino',title = "Comparing the Percentage of Hispanics/Latino with HOLC Grades") +   theme(plot.title = element_text(hjust = 0.5))

Conclusions

What was found is a generalize trend that from A to D there was an increasing percentage of Hispanics/Latinos and African American as the grade got lower. As it seems that lower grades were directly aligned with a lower white percentage. For the Decreasing of the Grades, the mean of percentage of white is 73.8, 59.8, 48.6, and 39.0. As for Hispanics/Latinos and African Americans were 8.8 and 8.8 for class A, 14.3 and 16.6 for class B, 18.8 and 23.0 for Class C, 28.3 and 31.6 for Class D respectively.