library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
df<- fread("D:/Documents/RFiles/Datasets/CA_MSA.csv")
df$wa<- abs(df$nhasn/df$nhasnc-df$nhwhite/df$nhwhitec)
#1
tpop_all <- df %>% group_by(NAME) %>% summarize("Total Population"=sum(tpop)) #tpop_all = total population of all
#2
wad<-df %>%
group_by(NAME) %>%
summarize("White-Asian Dissimilarity" = 0.5*sum(wa)) #wad = white-asian dissimilarity index
#3
HOLC<- fread("D:/Documents/RFiles/Datasets/holc_census_tracts.csv")
HOLC_aarea <- HOLC %>% group_by(state) %>% summarize("HOLC Average Area"=mean(holc_area)) #HOLC_aarea = HOLC Average Area
#4
library(ggplot2)
ggplot(HOLC, aes(x = state,y = holc_area, fill = state)) +
geom_boxplot() +
labs(x = "State", y = "HOLC Area", fill = "State")
#5
TX_HOLC_D <- HOLC %>%
group_by(st_name) %>%
filter(state == "TX" & holc_grade == "D") %>%
summarize(count=n())
#6
library(tidycensus)
#census_var <- load_variables(2021, 'acs5', cache = TRUE)
var <- c(poptotal='B03002_001E',
black='B03002_004E',
poverty='B17017_002E')
st <-"TX"
ct <-"Bexar"
TX_poverty_b <- get_acs(geography = "tract", variables = var, county=ct,
state = st, output="wide", year = 2021, geometry = TRUE)
## Getting data from the 2017-2021 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |================ | 23% | |================= | 25% | |=================== | 27% | |==================== | 29% | |====================== | 31% | |======================== | 34% | |========================== | 37% | |=========================== | 39% | |============================ | 40% | |=================================================== | 72% | |======================================================================| 100%
TX_poverty_b$black_pct <- TX_poverty_b$black/TX_poverty_b$poptotal*100
TX_poverty_b$poverty_pct <- TX_poverty_b$poverty /TX_poverty_b$poptotal*100
#TX_poverty_b<-TX_poverty_b[,c(1,12,13)]
#7
HOLC_SA <- HOLC[HOLC$st_name == "San Antonio"]
#HOLC_SA <- HOLC_SA[,c(2,8,14,18)]
HOLC_SA$geoid <- as.character(HOLC_SA$geoid)
#names(HOLC_SA)[3] <-"GEOID"
SA_HOLC_b <-merge(HOLC_SA,TX_poverty_b, by.x="geoid", by.y="GEOID")
avg_black_pct <- SA_HOLC_b %>%
group_by(holc_grade)%>%
summarize(avg_black_pct = mean(black_pct, na.rm = TRUE))
ggplot(avg_black_pct, aes(x = holc_grade, y = avg_black_pct, fill = holc_grade)) +
geom_bar(stat = "identity") +
labs(title = "Average Black Percentage by HOLC Grade", x = "HOLC Grade", y = "Black Percentage")
#8
ggplot(HOLC_SA, aes(x = holc_grade, y = holc_area, fill = holc_grade)) +
geom_boxplot() +
labs(x = "HOLC Grade", y = "HOLC Area")