This creates datasets for a shiny app: https://isquared.shinyapps.io/OurCommunities_Maryland/. This process will speed up the app performance by skipping API access each time when the app is loaded each time.
Data come from US Census Bureau data tables, based on decennial censuses or American Community Surveys.
*** Year = 2020
*** All ACS estimates are five-year data (i.e., 2016-2020)
See ACS tables here:
* https://api.census.gov/data/2020/acs/acs5/variables
* https://api.census.gov/data/2020/acs/acs1/variables
dta <- get_decennial(
geography = "county",
variables = "P1_001N", #Total pop
year = 2020,
state = "MD",
geometry = FALSE
)
colnames(dta)<-tolower(colnames(dta))
md_co_totalpop <- dta %>%
mutate(
county = name,
county = gsub(", Maryland", "", county),
county = gsub(" County", "", county),
totalpop = value
)%>%
select(-variable, -value)
dta <- get_decennial(
geography = "tract",
variables = "P1_001N", #Total pop
year = 2020,
state = "MD",
geometry = FALSE
)
colnames(dta)<-tolower(colnames(dta))
md_tr_totalpop <- dta %>%
mutate(
county = name,
county = gsub(", Maryland", "", county),
county = gsub(" County", "", county),
county = sub("^.*?, ", "", county),
totalpop = value
)%>%
select(-variable, -value)
race_vars <- c(
NH_White = "B03002_003", #NH White
NH_Black = "B03002_004",
NH_AIAN = "B03002_005",
NH_Asian = "B03002_006",
NH_NHPI = "B03002_007",
NH_Other = "B03002_008",
NH_Multiple = "B03002_009",
Hispanic = "B03002_012"
)
dta <- get_acs(
state = "MD",
geography = "county",
variables = race_vars, #8 mutually exclusive groups
summary_var = "B03002_001", #Total population
year = 2020
)
colnames(dta)<-tolower(colnames(dta))
md_co_racelong <- dta %>%
mutate(
county=name,
county = gsub(", Maryland", "", county),
county = gsub(" County", "", county),
size = estimate,
percent = round( 100 * (estimate / summary_est), 2) ) %>%
select(geoid, county, variable, size, percent)%>%
rename(race=variable)%>%
mutate(race = ifelse(race=="NH_AIAN", "NH_AIAN/NHPI/Other",
ifelse(race=="NH_NHPI", "NH_AIAN/NHPI/Other",
ifelse(race=="NH_Other", "NH_AIAN/NHPI/Other",
race))))%>%
group_by(geoid, county, race)%>%
summarize(
size = sum(size),
percent = sum(percent) )%>%
mutate(group="race")
dta <- get_acs(
state = "MD",
geography = "county",
variable = "B06002_001", #Median age
year = 2020
)
colnames(dta)<-tolower(colnames(dta))
md_co_medage <- dta %>%
select(geoid, estimate, moe) %>%
mutate(medageround=round(estimate)) %>%
rename(
medage = estimate,
medage_moe = moe)
dta <- get_acs(
state = "MD",
geography = "tract",
variable = "B06002_001", #Median age
year = 2020,
geometry = FALSE
)
colnames(dta)<-tolower(colnames(dta))
md_tr_medage <- dta %>%
select(geoid, estimate, moe) %>%
mutate(medageround=round(estimate)) %>%
rename(
medage = estimate,
medage_moe = moe)
maleage_vars <- c(
age_0_4 = "B01001_003",
age_5_9 = "B01001_004",
age_10_14 = "B01001_005",
age_15_17 = "B01001_006",
age_18_19 = "B01001_007",
age_20 = "B01001_008",
age_21 = "B01001_009",
age_22_24 = "B01001_010",
age_25_29 = "B01001_011",
age_30_34 = "B01001_012",
age_35_39 = "B01001_013",
age_40_44 = "B01001_014",
age_45_49 = "B01001_015",
age_50_54 = "B01001_016",
age_55_59 = "B01001_017",
age_60_61 = "B01001_018",
age_62_64 = "B01001_019",
age_65_66 = "B01001_020",
age_67_69 = "B01001_021",
age_70_74 = "B01001_022",
age_75_79 = "B01001_023",
age_80_84 = "B01001_024",
age_85_and_over = "B01001_025"
)
femaleage_vars <- c(
age_0_4 = "B01001_027",
age_5_9 = "B01001_028",
age_10_14 = "B01001_029",
age_15_17 = "B01001_030",
age_18_19 = "B01001_031",
age_20 = "B01001_032",
age_21 = "B01001_033",
age_22_24 = "B01001_034",
age_25_29 = "B01001_035",
age_30_34 = "B01001_036",
age_35_39 = "B01001_037",
age_40_44 = "B01001_038",
age_45_49 = "B01001_039",
age_50_54 = "B01001_040",
age_55_59 = "B01001_041",
age_60_61 = "B01001_042",
age_62_64 = "B01001_043",
age_65_66 = "B01001_044",
age_67_69 = "B01001_045",
age_70_74 = "B01001_046",
age_75_79 = "B01001_047",
age_80_84 = "B01001_048",
age_85_and_over = "B01001_049"
)
dtamale <- get_acs(
state = "MD",
geography = "county",
variables = maleage_vars,
summary_var = "B01001_002", #Total population
year = 2020
)
dtafemale <- get_acs(
state = "MD",
geography = "county",
variables = femaleage_vars,
summary_var = "B01001_026", #Total population
year = 2020
)
colnames(dtamale)<-tolower(colnames(dtamale))
colnames(dtafemale)<-tolower(colnames(dtafemale))
dta<-left_join(dtamale, dtafemale,
by=c("geoid", "variable"))
md_co_agelong <- dta %>%
rename(
agegroup=variable,
name = name.x)%>%
mutate(
estimate = estimate.x + estimate.y,
summary_est = summary_est.x + summary_est.y,
percent = round( 100 * (estimate / summary_est), 2) )%>%
select(geoid, name, agegroup, estimate, summary_est, percent)%>%
mutate(
#beginning age for the age interval
age=as.numeric(sapply(strsplit(agegroup,"_"), `[`, 2)),
age=floor(age/5)*5
)%>%
group_by(geoid, name, age)%>%
summarize(percent = sum(percent))%>%
mutate(
agegroup=paste0(age, "-", age+4),
county = name,
county = gsub(", Maryland", "", county),
county = gsub(" County", "", county))
md_co_pop25over <- md_co_agelong %>%
filter(age>=25)%>%
group_by(geoid)%>%
summarize_at(vars(percent), funs(sum(.)))%>%
rename(percent25over = percent)
md_co_pop25over <-left_join(md_co_totalpop, md_co_pop25over,
by=c("geoid"))%>%
mutate(pop25over=round(totalpop*percent25over/100, 0))%>%
select(geoid, pop25over)
dtamale <- get_acs(
state = "MD",
geography = "tract",
variables = maleage_vars,
summary_var = "B01001_002", #Total population
year = 2020
)
dtafemale <- get_acs(
state = "MD",
geography = "tract",
variables = femaleage_vars,
summary_var = "B01001_026", #Total population
year = 2020
)
colnames(dtamale)<-tolower(colnames(dtamale))
colnames(dtafemale)<-tolower(colnames(dtafemale))
dta<-left_join(dtamale, dtafemale,
by=c("geoid", "variable"))
md_tr_agelong <- dta %>%
rename(
agegroup=variable,
name = name.x)%>%
mutate(
estimate = estimate.x + estimate.y,
summary_est = summary_est.x + summary_est.y,
percent = round( 100 * (estimate / summary_est), 2) )%>%
select(geoid, name, agegroup, estimate, summary_est, percent)%>%
mutate(
#beginning age for the age interval
age=as.numeric(sapply(strsplit(agegroup,"_"), `[`, 2)),
age=floor(age/5)*5
)%>%
group_by(geoid, name, age)%>%
summarize(percent = sum(percent))%>%
mutate(
agegroup=paste0(age, "-", age+4),
county = name,
county = gsub(", Maryland", "", county),
county = gsub(" County", "", county),
county = sub("^.*?, ", "", county))
md_tr_pop25over <- md_tr_agelong %>%
filter(age>=25)%>%
group_by(geoid)%>%
summarize_at(vars(percent), funs(sum(.)))%>%
rename(percent25over = percent)
md_tr_pop25over <-left_join(md_tr_totalpop, md_tr_pop25over,
by=c("geoid"))%>%
mutate(pop25over=round(totalpop*percent25over/100, 0))%>%
select(geoid, pop25over)
ses_vars <- c(
NH_Black = "B03002_004",
NH_AIAN = "B03002_005"
)
dta <- get_acs(
state = "MD",
geography = "county",
variables = "B19013_001", #Median household income
year = 2020
)
colnames(dta)<-tolower(colnames(dta))
md_co_hhmedincome <- dta %>%
mutate(
hhmedincome = round(estimate/1000, 1) , #now converted to thousands
hhmedincomemoe = round(moe/1000, 1),
hhmedincomeround = round(estimate/1000, 0)) %>%
select(geoid, starts_with("hhmedincome"))
#EDUCATION
#https://api.census.gov/data/2019/acs/acs1/groups/B15003.html
dta <- get_acs(
state = "MD",
geography = "county",
variables = c("B15003_025E", "B15003_024E", "B15003_023E", "B15003_022E",
"B15003_021E", "B15003_020E", "B15003_019E"),
year = 2020
)
colnames(dta)<-tolower(colnames(dta))
md_co_edu <- dta %>%
select(-moe, -name)%>%
arrange(geoid, variable)%>%
spread(key = "variable",
value = "estimate", convert = TRUE)%>%
rename(
edu_doc = "B15003_025", #Doctorate degree","EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND
edu_prof = "B15003_024", #Professional school degree","EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER"],
edu_master = "B15003_023", #Master's degree","EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER"],
edu_bachelor = "B15003_022", #Bachelor's degree",
edu_somelessthan1 = "B15003_019", #Some college, less than 1 year
edu_somemorethan1 = "B15003_020", #Some college, 1 or more years, no degree
edu_someassoc = "B15003_021" #Associate's degree
)%>%
mutate(
edu_profdoc = edu_doc + edu_prof,
edu_somecollege = edu_somelessthan1 + edu_somemorethan1 + edu_someassoc
)
md_co_edu<-left_join(md_co_pop25over, md_co_edu, by=c("geoid"))%>%
select(-ends_with(".x"), -ends_with(".y"))%>%
mutate(
pctedu_profdoc = round(100 * edu_profdoc / pop25over, 0),
pctedu_master = round(100 * edu_master / pop25over, 0),
pctedu_bachelor = round(100 * edu_bachelor / pop25over, 0),
pctedu_somecollege = round(100 * edu_somecollege / pop25over, 0),
pctedu_lesscollege = 100 - (pctedu_profdoc + pctedu_master + pctedu_bachelor + pctedu_somecollege),
pctedu_mastermore = pctedu_master + pctedu_profdoc,
pctedu_bachelormore = pctedu_bachelor + pctedu_master + pctedu_profdoc
)
dta <- get_acs(
state = "MD",
geography = "tract",
variables = "B19013_001", #Median household income
year = 2020
)
colnames(dta)<-tolower(colnames(dta))
md_tr_hhmedincome <- dta %>%
mutate(
hhmedincome = round(estimate/1000, 1) , #now converted to thousands
hhmedincomemoe = round(moe/1000, 1),
hhmedincomeround = round(estimate/1000, 0)) %>%
select(geoid, starts_with("hhmedincome"))
dta <- get_acs(
state = "MD",
geography = "tract",
variables = c("B15003_025E", "B15003_024E", "B15003_023E", "B15003_022E",
"B15003_021E", "B15003_020E", "B15003_019E"),
year = 2020
)
colnames(dta)<-tolower(colnames(dta))
md_tr_edu <- dta %>%
select(-moe, -name)%>%
arrange(geoid, variable)%>%
spread(key = "variable",
value = "estimate", convert = TRUE)%>%
rename(
edu_doc = "B15003_025", #Doctorate degree","EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND
edu_prof = "B15003_024", #Professional school degree","EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER"],
edu_master = "B15003_023", #Master's degree","EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER"],
edu_bachelor = "B15003_022", #Bachelor's degree",
edu_somelessthan1 = "B15003_019", #Some college, less than 1 year
edu_somemorethan1 = "B15003_020", #Some college, 1 or more years, no degree
edu_someassoc = "B15003_021" #Associate's degree
)%>%
mutate(
edu_profdoc = edu_doc + edu_prof,
edu_somecollege = edu_somelessthan1 + edu_somemorethan1 + edu_someassoc
)
md_tr_edu<-left_join(md_tr_pop25over, md_tr_edu, by=c("geoid"))%>%
select(-ends_with(".x"), -ends_with(".y"))%>%
mutate(
pctedu_profdoc = round(100 * edu_profdoc / pop25over, 0),
pctedu_master = round(100 * edu_master / pop25over, 0),
pctedu_bachelor = round(100 * edu_bachelor / pop25over, 0),
pctedu_somecollege = round(100 * edu_somecollege / pop25over, 0),
pctedu_lesscollege = 100 - (pctedu_profdoc + pctedu_master + pctedu_bachelor + pctedu_somecollege),
pctedu_mastermore = pctedu_master + pctedu_profdoc,
pctedu_bachelormore = pctedu_bachelor + pctedu_master + pctedu_profdoc
)
[“B06001_001E”,“Estimate!!Total:”,“PLACE OF BIRTH BY AGE IN THE UNITED STATES”], [“B06007_001E”,“Estimate!!Total:”,“PLACE OF BIRTH BY LANGUAGE SPOKEN AT HOME AND ABILITY TO SPEAK ENGLISH IN THE UNITED STATES”],
#https://api.census.gov/data/2010/dec/sf1/variables.html
dta2010 <- get_decennial(
geography = "county",
variables = "P001001", #Total pop in 2010. Note different var name.
year = 2010,
state = "MD",
geometry = FALSE)%>%
mutate(geoid = GEOID, totalpop2010 = value)%>%
select(geoid, starts_with("totalpop"))
#https://api.census.gov/data/2000/dec/sf1/variables.html
dta2000 <- get_decennial(
geography = "county",
variables = "P001001", #Total pop in 2000. Note different var name.
year = 2000,
state = "MD",
geometry = FALSE)%>%
mutate(geoid = GEOID, totalpop2000 = value)%>%
select(geoid, starts_with("totalpop"))
md_co_totalpop <- left_join(md_co_totalpop, dta2010, by=c("geoid"))
md_co_totalpop <- left_join(md_co_totalpop, dta2000, by=c("geoid"))
dfcolist <-list(
md_co_totalpop,
#md_co_pop25over,
md_co_medage,
md_co_hhmedincome,
md_co_edu
)
for(df in dfcolist) {
print(class(df))
print(dim(df))
print(colnames(df))
}
[1] “tbl_df” “tbl” “data.frame” [1] 24 6 [1] “geoid” “name” “county” “totalpop” “totalpop2010” [6] “totalpop2000” [1] “tbl_df” “tbl” “data.frame” [1] 24 4 [1] “geoid” “medage” “medage_moe” “medageround” [1] “tbl_df” “tbl” “data.frame” [1] 24 4 [1] “geoid” “hhmedincome” “hhmedincomemoe” “hhmedincomeround” [1] “tbl_df” “tbl” “data.frame” [1] 24 18 [1] “geoid” “pop25over” “edu_somelessthan1”
[4] “edu_somemorethan1” “edu_someassoc” “edu_bachelor”
[7] “edu_master” “edu_prof” “edu_doc”
[10] “edu_profdoc” “edu_somecollege” “pctedu_profdoc”
[13] “pctedu_master” “pctedu_bachelor” “pctedu_somecollege” [16] “pctedu_lesscollege” “pctedu_mastermore” “pctedu_bachelormore”
md_co <- left_join(md_co_totalpop, md_co_medage, by=c("geoid"))
md_co <- left_join(md_co, md_co_hhmedincome, by=c("geoid"))
md_co <- left_join(md_co, md_co_edu, by=c("geoid"))
md_co <- md_co %>% mutate(geoid = as.character(geoid))
dim(md_co)
[1] 24 29
colnames(md_co)
[1] “geoid” “name” “county”
[4] “totalpop” “totalpop2010” “totalpop2000”
[7] “medage” “medage_moe” “medageround”
[10] “hhmedincome” “hhmedincomemoe” “hhmedincomeround”
[13] “pop25over” “edu_somelessthan1” “edu_somemorethan1”
[16] “edu_someassoc” “edu_bachelor” “edu_master”
[19] “edu_prof” “edu_doc” “edu_profdoc”
[22] “edu_somecollege” “pctedu_profdoc” “pctedu_master”
[25] “pctedu_bachelor” “pctedu_somecollege” “pctedu_lesscollege” [28] “pctedu_mastermore” “pctedu_bachelormore”
dftrlist <-list(
md_tr_totalpop,
#md_tr_pop25over,
md_tr_medage,
md_tr_hhmedincome,
md_tr_edu
)
for(df in dftrlist) {
print(class(df))
print(dim(df))
print(colnames(df))
}
[1] “tbl_df” “tbl” “data.frame” [1] 1475 4 [1] “geoid” “name” “county” “totalpop” [1] “tbl_df” “tbl” “data.frame” [1] 1475 4 [1] “geoid” “medage” “medage_moe” “medageround” [1] “tbl_df” “tbl” “data.frame” [1] 1475 4 [1] “geoid” “hhmedincome” “hhmedincomemoe” “hhmedincomeround” [1] “tbl_df” “tbl” “data.frame” [1] 1475 18 [1] “geoid” “pop25over” “edu_somelessthan1”
[4] “edu_somemorethan1” “edu_someassoc” “edu_bachelor”
[7] “edu_master” “edu_prof” “edu_doc”
[10] “edu_profdoc” “edu_somecollege” “pctedu_profdoc”
[13] “pctedu_master” “pctedu_bachelor” “pctedu_somecollege” [16] “pctedu_lesscollege” “pctedu_mastermore” “pctedu_bachelormore”
md_tr <- left_join(md_tr_totalpop, md_tr_medage, by=c("geoid"))
md_tr <- left_join(md_tr, md_tr_hhmedincome, by=c("geoid"))
md_tr <- left_join(md_tr, md_tr_edu, by=c("geoid"))
md_tr <- md_tr %>% mutate(geoid = as.character(geoid))
dim(md_tr)
[1] 1475 27
colnames(md_tr)
[1] “geoid” “name” “county”
[4] “totalpop” “medage” “medage_moe”
[7] “medageround” “hhmedincome” “hhmedincomemoe”
[10] “hhmedincomeround” “pop25over” “edu_somelessthan1”
[13] “edu_somemorethan1” “edu_someassoc” “edu_bachelor”
[16] “edu_master” “edu_prof” “edu_doc”
[19] “edu_profdoc” “edu_somecollege” “pctedu_profdoc”
[22] “pctedu_master” “pctedu_bachelor” “pctedu_somecollege” [25] “pctedu_lesscollege” “pctedu_mastermore” “pctedu_bachelormore”
colnames(md_co_racelong)
[1] “geoid” “county” “race” “size” “percent” “group”
colnames(md_co_agelong)
[1] “geoid” “name” “age” “percent” “agegroup” “county”
colnames(md_tr_agelong)
[1] “geoid” “name” “age” “percent” “agegroup” “county”
write.csv (md_co, "Shiny_OurCommunities_MD/md_co.csv", na='.')
write.csv (md_co_racelong, "Shiny_OurCommunities_MD/md_co_racelong.csv", na='.')
write.csv (md_co_agelong, "Shiny_OurCommunities_MD/md_co_agelong.csv", na='.')
write.csv (md_tr, "Shiny_OurCommunities_MD/md_tr.csv", na='.')
write.csv (md_tr_agelong, "Shiny_OurCommunities_MD/md_tr_agelong.csv", na='.')