Exported all PUMAs for state of Texas, kept as data_raw
library(ipumsr)
# Note to self: Must set wd to where all files are, then open ddi and feed into read_ipums_micro
setwd("~/Desktop/Every Texan")
ddi <- read_ipums_ddi("usa_00027.xml")
data_raw <- read_ipums_micro(ddi)
## Use of data from IPUMS USA is subject to conditions including that users should
## cite the data appropriately. Use command `ipums_conditions()` for more details.
data <- data_raw
names(data)
## [1] "YEAR" "SAMPLE" "SERIAL" "CBSERIAL" "HHWT" "CLUSTER"
## [7] "STATEFIP" "PUMA" "STRATA" "GQ" "OWNERSHP" "OWNERSHPD"
## [13] "OWNCOST" "RENT" "HHINCOME" "CINETHH" "VEHICLES" "PERNUM"
## [19] "PERWT" "AGE" "RACE" "RACED" "HISPAN" "HISPAND"
## [25] "HCOVANY" "SCHOOL" "EDUC" "EDUCD" "GRADEATT" "GRADEATTD"
## [31] "EMPSTAT" "EMPSTATD" "LABFORCE" "CLASSWKR" "CLASSWKRD" "OCCSOC"
## [37] "WKSWORK1" "UHRSWORK" "INCTOT" "INCWAGE" "POVERTY" "TRANTIME"
library(stringr)
data$PUMA_4<-str_pad(data$PUMA, 5, pad = "0") #add leading zeros
data$PUMAID <- paste(data$STATEFIP,data$PUMA_4,sep = "") #concatenate state fips and puma fips
data$PUMAID_num <- as.numeric(data$PUMAID)
ids <- plyr::count(data,'PUMAID')
dal_codes<-c(4802304,4802305,4802306,4802307,4802309,4802310,4802311,4802312,4802313,4802314,4802315,4802316,4802319,4801901,4802001)
dal_codes_string<-c("4802304","4802305","4802306","4802307","4802309","4802310","4802311","4802312","4802313","4802314","4802315","4802316","4802319","4801901","4802001")
data$DalPumas <- ifelse(data$PUMAID %in% dal_codes_string,1,0)
data<-subset(data, DalPumas == 1)
dal_ids <- plyr::count(data,'PUMAID')
dal_ids
Note: Frequencies in these initial test tables are unweighted counts of records, not population/households.
# Recoding Hispanic/Latino ethnicity
data$HISPAN <- as.numeric(data$HISPAN)
data$hisp <- Recode(data$HISPAN, recodes = "0='Not Hispanic'; 1:4='Hispanic/Latino'; else = NA", as.factor=T)
# Recoding Race
data$RACE <- as.numeric(data$RACE)
data$race_c <- Recode(data$RACE, recodes = "1='White'; 2='Black'; 3='Native American'; 4:6='Asian/PI'; else = 'Other or 2+'", as.factor=T)
#Creating Interaction Term for Race/Ethnicity to create simplified categories
data$race_eth <- interaction(data$hisp, data$race_c, sep = "_")
data$race_eth <- as.factor(ifelse(substr(as.character(data$race_eth),1,8) == "Hispanic", "Hispanic, Any Race", as.character(data$race_eth)))
data$race_eth <-Recode(data$race_eth, recodes="'Not Hispanic_Other or 2+'='Other NH'; 'Not Hispanic_White'='White NH';'Not Hispanic_Black'='Black NH';'Not Hispanic_Native American'='AIAN NH'; 'Not Hispanic_Asian/PI'='Asian/PI NH'", as.factor=T)
# Coding Age
data$AGE <- as.numeric(data$AGE)
data$age_c <- data$age_c <-Recode(data$AGE, recodes="0:17='< 18';18:24='18-24'; 25:44='25-44';45:64='45-64';65:100='65+'", as.factor=T)
Below I’m creating mostly dummy variables (binary 1,0) to use to calculate overall totals and percentages using the survey design/weights in next section. The ratios will be calculated in Excel or using “svymean.”
#INDICATOR 2 -- Business Ownership: adults aged 25-64 who are self-employed (i.e. own an incorporated business business).
data$work_age <- ifelse(data$AGE>=25 & data$AGE<65,1,0) #working age = 1
data$self_emp <- ifelse(data$CLASSWKRD==14,1,0) #incorporated owner = 1
#Incorporated Owner-> CLASSWRKD=14. Unincorporated owner = 13
data$bus_own <- ifelse(data$work_age+data$self_emp==2,1,0) #work age + incorporated owner = 1
data$self_emp_broad <- ifelse(data$CLASSWKR==1,1,0) #broader "self-employed category regardless of incorporation
#INDICATOR 4 -- Labor Force Non-Participation: adults aged 25-64 who are not in the labor force.
data$EMPSTAT <- as.numeric(data$EMPSTAT)
data$empl_c <-Recode(data$EMPSTAT, recodes="0=NA; 1='Employed'; 2='Unemployed'; 3='Not in LF'", as.factor=T)
data$not_LF <- ifelse(data$work_age==1 & data$EMPSTAT==3,1,0) # working age + not in LF = 1
#INDICATOR 5 -- Unemployment: adults aged 25-64 who are unemployed
data$unemp <- ifelse(data$work_age==1 & data$EMPSTAT==2,1,0) # working age + unemployed = 1
#INDICATOR 6 -- High Growth High Paying Jobs: adults aged 25-64 in high-growth, high-paying occupations.
#OCCSOC field has occupation codes. First 2 digits are broad cross-industry categories of occupations, the same 7 codes that were high earning/growth in the 2019 report are the ones above $70k today as well (lowest is $73k).
Hwage_occ <- c(11, 23, 15, 17, 29, 13, 19) # 7 codes from MSA BLS download
data$soc2dig <- substr(data$OCCSOC, 1, 2) # grabbing first two digits
data$Hwage_occ <- ifelse(data$soc2dig %in% Hwage_occ,1,0) #if high wage/high growth = 1
data$wrkage_HwageOcc <- ifelse(data$Hwage_occ+data$work_age==2,1,0) #if hw/hg + working age = 1
#INDICATOR 7 -- Median Full-Time Income: median annual incomes for currently employed adults aged 25-64 working 30+ hours per week.
data$wrk30hr <- ifelse(data$UHRSWORK>=30,1,0) #works more than 30 hrs = 1
data$workingdef <- ifelse(data$work_age+data$wrk30hr==2 & data$empl_c=='Employed',1,0) # if works 30+ hours + working age + employed = 1
data$cpi <-Recode(data$YEAR, recodes="2019=1.05 ; 2021 = 1") #adjust for inflation: CPI Calculator $1 in 2019 = $1.05 in 2021.
#Using "INCWAGE" (income from wages) variable instead of "INCTOT" (total income) Variable since "INCWAGE" was used in 2019 report notes.
data$wage_inc_adj <- data$INCWAGE * data$cpi #adjusting for inflation to 2021 dollars
data$ind7_medinc_fulltime <- ifelse(data$workingdef == 1, data$wage_inc_adj,NA) #filtering for only those defined as working 30+ hours a week so we can get the correct median value in the export table.
#INDICATOR 8 -- Median Hourly Wage: median hourly wages for White and Hispanic adults aged 25-64 employed part-time or full-time.
# 2019 report STATA notes describe using the weeks worked per year to calculate this value, instead of the average hours worked per week. Here we can use the WKSWORK1 variable, which has the direct number of weeks worked (in 2019 they had to use WKSWORK2 which had the weeks worked as intervals). WKSWORK1 is available for both 2019 and 2021.
data$HRS_per_year <- data$WKSWORK1 * data$UHRSWORK #calculating the number of hours worked per year
data$hourly_wage <- data$wage_inc_adj / data$HRS_per_year #calculating the average hourly wage based on wage income and hours worked per year.
data$ind8_hrlywage <- ifelse(data$work_age==1,data$hourly_wage,NA) #filtering for only those defined as adults aged 25-64.
#INDICATOR 9 -- Median Household Income:
data<- data %>% mutate_at(c('HHINCOME'), ~na_if(.,9999999)) #code NAs
data$HHINCOME <- as.numeric(data$HHINCOME)
data$HHINCOME_adj <- data$HHINCOME * data$cpi #convert $2019 to $2021
hist(data$HHINCOME_adj) #see negative values
data$HHINCOME_adj <-ifelse(data$HHINCOME_adj<0,0,data$HHINCOME_adj) #remove negative values, code to $0 income
#INDICATOR 10 -- Child Poverty: children living at or below 100% of the poverty threshold.
data$child <- ifelse(data$AGE<18,1,0) # if child = 1
data$pov_c <- Recode(as.numeric(data$POVERTY), recodes="0=NA; 1:100='At or Below Poverty Threshold'; else='Over Poverty Threshold'", as.factor=T)
data$pov_c_100 <- Recode(as.numeric(data$POVERTY), recodes="0=NA; 1:100=1; else=0", as.factor=F)
data$povuniv = Recode(data$pov_c, recodes="NA=0; else=1") #poverty calculations often exclude some of the population for which it is not tabulated, so this is creating the poverty universe
data$childpov <- ifelse(data$child + data$pov_c_100==2 ,1,0)
#INDICATOR 11 -- Senior Poverty: adults aged 65+ living at or below 100% of the poverty threshold.
data$senior <- ifelse(data$AGE>=65,1,0)
data$seniorpov <- ifelse(data$senior + data$pov_c_100==2 ,1,0)
#INDICATOR 12 -- Working Poverty: adults aged 25-64 currently employed 30+ hours per week and living at or below 200% of the poverty threshold.
data$pov_c_200 <- Recode(as.numeric(data$POVERTY), recodes="0=NA; 1:200=1; else=0", as.factor=F)
data$work_defFT <- ifelse(data$work_age+data$wrk30hr==2,1,0) # working age adults working 30+ hrs/week = full time workers
data$workpov <- ifelse(data$work_defFT+data$pov_c_200==2,1,0) # full time workers in poverty
iPUMS ACS 1 yr indicators: 13, 14, 22, 23, 24
Income Group Definitions from Dallas Report for Indicator 22 “Socioeconomic classifications are based on the poverty threshold defined by the U.S. Census Bureau, resulting in three categories: households with income less than 100% of the poverty threshold, households with income equal to 100%-185% of the poverty threshold, or households with income greater than 185% of the poverty threshold. For this report, we refer to these groups as the lowest income group, the middle income group, and the highest income group, respectively.”
#13 Early Education Enrollment by Race: Ratio between the percentages of White and Hispanic three- and four-year-olds enrolled in pre-K.
data$age_3YO_4YO <-Recode(data$AGE, recodes="3:4=1; else=0", as.factor=T)
data$SCHOOL <- as.numeric(data$SCHOOL)
data$inschool <-Recode(data$SCHOOL, recodes="1='Not in school'; 2='In school'; else=NA", as.factor=T)
#plyr::count(data,'SCHOOL') # Examined the "SCHOOL" and "GRADEATT" Variables, decided SCHOOL is adequate (same totals from each). So I am just looking at whether each child aged 3 or 4 attends school
#plyr::count(data,'GRADEATTD')
#table(data$age_3YO_4YO, data$SCHOOL)
#table(data$age_3YO_4YO, data$GRADEATTD)
#14 Early Education Enrollment by Income: Ratio between the percentages of three- and four-year-olds in the top and middle income groups enrolled in pre-K
data$pov_income_groups <- Recode(as.numeric(data$POVERTY), recodes="0=NA; 1:100='Lowest Income Group'; 101:185 ='Middle Income Group'; else ='Highest Income Group'", as.factor=T)
data$pov_income_groups <- ordered(data$pov_income_groups, levels = c("Lowest Income Group", "Middle Income Group", "Highest Income Group"))
table(data$race_eth, data$pov_income_groups)
##
## Lowest Income Group Middle Income Group
## AIAN NH 12 7
## Asian/PI NH 230 178
## Black NH 1070 942
## Hispanic, Any Race 1677 2462
## Other NH 78 113
## White NH 851 969
##
## Highest Income Group
## AIAN NH 66
## Asian/PI NH 1593
## Black NH 2797
## Hispanic, Any Race 6208
## Other NH 720
## White NH 12553
#22 Adults with No High School Diploma: Ratio between the percentages of Hispanic and White adults aged 25-64 with no high school diploma
data$EDUCD <- as.numeric(data$EDUCD)
data$edu_c <-Recode(data$EDUCD, recodes="1:61='Less than High School'; 62:64='High School Diploma or GED'; 65:90='Some College or Associates Degree'; 101='Bachelors Degree'; 114:116='Higher Degree'; else=NA", as.factor=T)
data$edu_c <- ordered(data$edu_c, levels = c("Less than High School", "High School Diploma or GED", "Some College or Associates Degree","Bachelors Degree","Higher Degree"))
data$noHSdipl <-Recode(data$EDUCD, recodes="1:61=1; NA=NA; else=0", as.factor=T)
#23 High School Graduates Living in Poverty: Ratio between the percentages of Black and White adults aged 25-64 with at least a high school diploma who are living below 100% of the poverty threshold.
data$yesHSdipl <-Recode(data$EDUCD, recodes="62:116=1; NA=NA; else=0", as.factor=T)
table(data$yesHSdipl,data$race_eth)
##
## AIAN NH Asian/PI NH Black NH Hispanic, Any Race Other NH White NH
## 0 19 513 1414 5625 339 2626
## 1 76 1536 3634 4889 595 12178
#24 College-Educated Adults: Ratio between the percentages of Asian and Hispanic adults aged 25-64 with a bachelor’s degree or higher.
data$yesCollegeBS <- Recode(data$EDUCD, recodes="101:116=1; NA=NA; else=0", as.factor=T)
table(data$yesCollegeBS,data$race_eth)
##
## AIAN NH Asian/PI NH Black NH Hispanic, Any Race Other NH White NH
## 0 73 989 4097 9259 618 7119
## 1 22 1060 951 1255 316 7685
iPUMS ACS 1 yr indicators: 25, 28, 29, 34, & 35
#25 Homeownership: Ratio between the percentages of White and Black households who own their home.
data$hhder <-Recode(data$PERNUM, recodes="1='Householder'; else='Not Householder'", as.factor=T) #defining householder (PERNUM = 1)
data$OWNERSHP <- as.numeric(data$OWNERSHP)
data$tenure <- Recode(data$OWNERSHP, recodes="1='Owned/Mortgaged'; 2='Rented'; else=NA", as.factor=T)
data$ownocc <- ifelse(data$tenure=='Owned/Mortgaged' & data$hhder == 'Householder',1,0)
#28 Housing Cost Burden: Ratio between the percentages of Black and White households with housing costs exceeding 30% of income.
#the previous analysis removed those who owned their homes outright (i.e. no mortgage) - I don't think this is necessarily the right approach, since they could, in theory, have much more paid in owner costs for upkeep/taxes/etc. I just use the two ipums variables OWNCOST and RENT and compare this to HHINCOME.
data$owncost_c <- ifelse(data$OWNCOST==99999,NA,data$OWNCOST*data$cpi*12) #flagging NA values, adjusting for inflation + multiplying by 12 for annual amount paid
data$rent_c <- ifelse(data$RENT==0,NA,ifelse(data$RENT==1,0,data$RENT*data$cpi*12)) #flagging NA values, 1 = $0 in rent in the ipums codebook, adjusting for inflation + multiplying by 12 for annual amount paid
data$hous_pctINC <- ifelse(is.na(data$owncost_c), (data$rent_c)/data$HHINCOME_adj, (data$owncost_c)/data$HHINCOME_adj)
summary(data$hous_pctINC)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.1146 0.1879 Inf 0.3097 Inf 1393
#recode for greater than or less than 30% of income
data$HCB <- ifelse(data$hous_pctINC>0.3,"HCB",ifelse(is.na(data$hous_pctINC),NA,"Not HCB"))
#29 Internet Access: Ratio between the percentages of Black and White households without access to the internet.
data$CINETHH <- as.numeric(data$CINETHH)
data$int_acc <- Recode(data$CINETHH, recodes="0=NA; 1:2='Internet Access in Home'; 3='No Internet Access in Home'", as.factor=T)
#34 Private Vehicle Availability: Ratio between the average number of vehicles available per person aged 16+ in White and Black households.
data$num_vehicles <- Recode(data$VEHICLES, recodes="0=NA; 1=1; 2=2; 3=3; 4=4; 5=5; 6=6; 9=0", as.factor=F) #changing iPUMS coding to match numeric number of vehicles available
data$pers_16up <- ifelse(data$AGE>=16,1,0) #persons above 16 = 1
#the "SERIAL" number for each record is the household number. summing the people 16+ by serial gives us the number of people 16+ by household
agg_pers16up <- data %>%
group_by(SERIAL) %>%
summarise(HH_16up = sum(pers_16up))
#rejoin back to full data table to calculate cars per person 16up
data_temp <- left_join(data, agg_pers16up, by = 'SERIAL')
data <- data_temp
#divide by cars in HH
data$veh_pp <- data$num_vehicles / data$HH_16up
#35 Commute Time: Ratio between the average time spent commuting one way to work (in minutes) for Hispanic and White adults aged 25-64.
data$trantime_c <-ifelse(data$TRANTIME==0,NA,data$TRANTIME)
#50 Health Insurance: Ratio between the percentages of Hispanic and White residents without health insurance.
data$HCOVANY <- as.numeric(data$HCOVANY)
data$hcov_c <-Recode(data$HCOVANY, recodes="1='No Health Insurance'; 2='Yes Health Insurance'", as.factor=T)
From ipums: “By default, the extract system rectangularizes the data: it puts the household information on the person records and drops the separate household record. This can distort analyses at the household level. The number of observations will be inflated to the number of person records. You can either select the first person in each household (PERNUM) or select the”hierarchical” box in the extract system to get the proper number of household observations. The rectangularizing feature also drops any vacant households, which are otherwise available in some samples. Despite these complications, the great majority of researchers prefer the rectangularized format, which is why it is the default output of our system.”
## SURVEY DESIGN
#install.packages("survey")
library(survey)
options(survey.lonely.psu = "adjust")
#PERSON WEIGHTS DESIGN
des_p <- svydesign(id=~CLUSTER, strata=~interaction(STRATA,YEAR), weights=~PERWT, data=data)
#HOUSEHOLD WEIGHTS DESIGN
data_hh <- filter(data, PERNUM==1)
des_hh <- svydesign(id=~CLUSTER, strata=~interaction(STRATA, YEAR), weights=~HHWT, data=data_hh)
The svyby function allows us to take the survey design objects (above by person (des_p) and household (des_hh)) and create statistics by subsets of these objects with corresponding error calculations and weights. Each table number corresponds to the indicator number.
names(data)
## [1] "YEAR" "SAMPLE" "SERIAL"
## [4] "CBSERIAL" "HHWT" "CLUSTER"
## [7] "STATEFIP" "PUMA" "STRATA"
## [10] "GQ" "OWNERSHP" "OWNERSHPD"
## [13] "OWNCOST" "RENT" "HHINCOME"
## [16] "CINETHH" "VEHICLES" "PERNUM"
## [19] "PERWT" "AGE" "RACE"
## [22] "RACED" "HISPAN" "HISPAND"
## [25] "HCOVANY" "SCHOOL" "EDUC"
## [28] "EDUCD" "GRADEATT" "GRADEATTD"
## [31] "EMPSTAT" "EMPSTATD" "LABFORCE"
## [34] "CLASSWKR" "CLASSWKRD" "OCCSOC"
## [37] "WKSWORK1" "UHRSWORK" "INCTOT"
## [40] "INCWAGE" "POVERTY" "TRANTIME"
## [43] "PUMA_4" "PUMAID" "PUMAID_num"
## [46] "DalPumas" "hisp" "race_c"
## [49] "race_eth" "age_c" "work_age"
## [52] "self_emp" "bus_own" "self_emp_broad"
## [55] "empl_c" "not_LF" "unemp"
## [58] "soc2dig" "Hwage_occ" "wrkage_HwageOcc"
## [61] "wrk30hr" "workingdef" "cpi"
## [64] "wage_inc_adj" "ind7_medinc_fulltime" "HRS_per_year"
## [67] "hourly_wage" "ind8_hrlywage" "HHINCOME_adj"
## [70] "child" "pov_c" "pov_c_100"
## [73] "povuniv" "childpov" "senior"
## [76] "seniorpov" "pov_c_200" "work_defFT"
## [79] "workpov" "age_3YO_4YO" "inschool"
## [82] "pov_income_groups" "edu_c" "noHSdipl"
## [85] "yesHSdipl" "yesCollegeBS" "hhder"
## [88] "tenure" "ownocc" "owncost_c"
## [91] "rent_c" "hous_pctINC" "HCB"
## [94] "int_acc" "num_vehicles" "pers_16up"
## [97] "HH_16up" "veh_pp" "trantime_c"
## [100] "hcov_c"
table_ptot <- svyby(formula = ~age_c, by = ~YEAR+race_eth, des_p, svytotal, vartype=c("se"), ci=FALSE, na.rm=FALSE)
table_hhtot <- svyby(formula = ~age_c, by = ~YEAR+race_eth, des_hh, svytotal, vartype=c("se"), ci=FALSE, na.rm=FALSE)
table2 <- svyby(formula = ~self_emp+self_emp_broad, by = ~YEAR+race_eth, subset(des_p, work_age==1), svytotal, vartype=c("se"), ci=FALSE, na.rm=TRUE) #self empl only incorporated and broad (inc and uninc)
table4 <- svyby(formula = ~not_LF+work_age, by = ~YEAR+race_eth, des_p, svytotal, vartype=c("se"), ci=FALSE, na.rm=TRUE)
table5 <- svyby(formula = ~not_LF+work_age+unemp, by = ~YEAR+race_eth, des_p, svytotal, vartype=c("se"), ci=FALSE, na.rm=TRUE)
table6 <- svyby(formula = ~wrkage_HwageOcc+work_age, by = ~YEAR+race_eth, des_p, svytotal, vartype=c("se"), ci=FALSE, na.rm=TRUE)
table7 <- svyby(formula = ~ind7_medinc_fulltime, by = ~YEAR+race_eth, des_p, svyquantile, quantiles=c(0.5), method="constant",ties="rounded",vartype=c("se","ci"),na.rm=TRUE)
table7_tot <- svyby(formula = ~ind7_medinc_fulltime, by = ~YEAR, des_p, svyquantile, quantiles=c(0.5), method="constant", ties="rounded",vartype=c("se","ci"), na.rm=TRUE)
table8 <- svyby(formula = ~ind8_hrlywage, by = ~YEAR+race_eth, des_p, svyquantile, quantiles=c(0.5), method="constant",ties="rounded", na.rm=TRUE)
table8_tot <- svyby(formula = ~ind8_hrlywage, by = ~YEAR, des_p, svyquantile, quantiles=c(0.5), method="constant", ties="rounded", na.rm=TRUE)
table9 <- svyby(formula = ~HHINCOME_adj, by = ~YEAR+race_eth, des_hh, svyquantile, quantiles=c(0.5), method="constant",ties="rounded", na.rm=TRUE)
table9_tot <- svyby(formula = ~HHINCOME_adj, by = ~YEAR, des_hh, svyquantile, quantiles=c(0.5), method="constant", ties="rounded", na.rm=TRUE)
table10 <- svyby(formula = ~childpov+child, by = ~YEAR+race_eth+povuniv, des_p, svytotal, vartype=c("se"), ci=FALSE, na.rm=FALSE)
table11 <- svyby(formula = ~seniorpov+senior, by = ~YEAR+race_eth, des_p, svytotal, vartype=c("se"), ci=FALSE, na.rm=TRUE)
table12 <- svyby(formula = ~work_defFT+workpov, by = ~YEAR+race_eth, des_p, svytotal, vartype=c("se"), ci=FALSE, na.rm=TRUE)
table13 <- svyby(formula = ~age_3YO_4YO+inschool, by = ~YEAR+race_eth, subset(des_p, age_3YO_4YO==1), svytotal, vartype=c("se"), ci=FALSE, na.rm=TRUE) #subset makes this only for 3 and 4 yos
table14 <- svyby(formula = ~age_3YO_4YO+inschool, by = ~YEAR+pov_income_groups+povuniv, subset(des_p, age_3YO_4YO==1), svytotal, vartype=c("se"), ci=FALSE, na.rm=TRUE) #subset makes this only for 3 and 4 yos
table22 <- svyby(formula = ~noHSdipl, by = ~YEAR+race_eth, subset(des_p, work_age==1), svytotal, vartype=c("se"), ci=FALSE, na.rm=TRUE) #subset makes this for only the working age (25-64) pop
table23 <- svyby(formula = ~pov_c_100, by = ~YEAR+race_eth, subset(des_p, work_age==1 & yesHSdipl==1), svytotal, vartype=c("se"), ci=FALSE, na.rm=TRUE) #subset makes this for only the working age (25-64) pop
table24 <- svyby(formula = ~yesCollegeBS, by = ~YEAR+race_eth, subset(des_p, work_age==1), svytotal, vartype=c("se"), ci=FALSE, na.rm=TRUE) #subset makes this for only the working age (25-64) pop
table25 <- svyby(formula = ~tenure/hhder, by = ~YEAR+race_eth, des_hh, svytotal, vartype=c("se"), ci=FALSE, na.rm=TRUE)
table25_tot <- svyby(formula = ~tenure/hhder, by = ~YEAR, des_hh, svymean, vartype=c("se"), ci=FALSE, na.rm=TRUE)
table28 <- svyby(formula = ~HCB, by = ~YEAR+race_eth, des_hh, svytotal, vartype=c("se"), ci=FALSE, na.rm=TRUE)
table28_mean <- svyby(formula = ~HCB, by = ~YEAR+race_eth, des_hh, svymean, vartype=c("se"), ci=FALSE, na.rm=TRUE)
table29 <- svyby(formula = ~int_acc, by = ~YEAR+race_eth, des_hh, svytotal, vartype=c("se"), ci=FALSE, na.rm=TRUE)
table34 <- svyby(formula=~veh_pp, by=~YEAR+race_eth, des_hh, svymean, vartype=c("se"), ci=FALSE, na.rm=TRUE)
table34_tot <- svyby(formula=~veh_pp, by=~YEAR, des_hh, svymean, vartype=c("se"), ci=FALSE, na.rm=TRUE)
table35 <- svyby(formula=~trantime_c, by=~YEAR+race_eth, subset(des_p, work_age==1), svymean, vartype=c("se"), ci=FALSE, na.rm=TRUE)
table35_tot <- svyby(formula=~trantime_c, by=~YEAR+work_age, subset(des_p, work_age==1), svymean, vartype=c("se"), ci=FALSE, na.rm=TRUE)
table50 <- svyby(formula = ~hcov_c, by = ~YEAR+race_eth, des_p, svytotal, vartype=c("se"), ci=FALSE, na.rm=TRUE)
table50_tot <- svyby(formula = ~hcov_c, by = ~YEAR, des_p, svytotal, vartype=c("se"), ci=FALSE, na.rm=TRUE)
From p. 13 of 2019 Report: “Although most of the indicators that compare outcomes by race/ethnicity use individual-level data, this data was unavailable in some instances. For those indicators, neighborhoods (defined by census tracts or zip codes) are used as a proxy. In this report, we compare neighborhoods according to their majority (more than 50%) racial/ethnic makeup. If no majority exists, we label that neighborhood “racially diverse.” In Dallas, the following majority racial/ethnic groups for neighborhoods are used: White, Black, and Hispanic. No neighborhoods were majority-Asian in either year.”
Note: for tract and zip code data, we have to use 5 year ACS. This will be used for both the 2019 and 2021 samples.
#NOTE: 5 Year ACS for tracts in Dallas' intersecting Counties (Dallas, Collin, Denton, Kaufman, Rockwall)
dal_counties <- df <- data.frame (CountyName = c("Dallas", "Collin", "Denton", "Kaufman", "Rockwall"),
CountyFIPS3 = c("113","085","121","257","397"),
CountyFIPS5 = c("48113","48085","48121","48257","48397"))
dal_tract<-get_acs(geography = "tract",
state="TX",
county = dal_counties$CountyFIPS3,
year = 2021,
survey = "acs5",
variables=c("B03002_001E",
"B03002_001E",
"B03002_003E",
"B03002_004E",
"B03002_006E",
"B03002_007E",
"B03002_005E",
"B03002_008E",
"B03002_009E",
"B03002_012"),
geometry = T, output = "wide")
##
|
| | 0%
|
|= | 1%
|
|== | 2%
|
|===== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======= | 11%
|
|========== | 15%
|
|============ | 18%
|
|============= | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|=================== | 27%
|
|=================== | 28%
|
|===================== | 30%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|================================ | 46%
|
|===================================== | 53%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 59%
|
|========================================== | 60%
|
|============================================= | 65%
|
|============================================== | 66%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|=========================================================== | 85%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================= | 93%
|
|======================================================================| 99%
|
|======================================================================| 100%
#select tracts that have nonzero population
#dal_tract <- filter(dal_tract, B03002_001E>0)
#rename variables
dal_tract<-dal_tract%>%
mutate(totpop=B03002_001E,
whiteNH = B03002_003E,
blackNH = B03002_004E,
aapiNH = B03002_006E + B03002_007E,
aianNH = B03002_005E,
other2moreNH = B03002_008E + B03002_009E,
HispLat = B03002_012E,
pctwhiteNH = whiteNH/totpop,
pctblackNH = blackNH/totpop,
pctaapiNH = aapiNH/ totpop,
pctaianNH = aianNH/totpop,
pctother2moreNH = other2moreNH/ totpop,
pctHispLat = HispLat / totpop)
dal_tract$nonzero_pop <- ifelse(dal_tract$totpop>0,1,0)
dal_tract$neighb_race <- ifelse(dal_tract$pctwhiteNH>0.5,"Majority White NH",(ifelse(dal_tract$pctblackNH>0.5,"Majority Black NH",(ifelse(dal_tract$pctaapiNH>0.5,"Majority AAPI NH",(ifelse(dal_tract$pctaapiNH>0.5,"Majority Other or 2+ NH",(ifelse(dal_tract$pctHispLat>0.5,"Majority Hispanic/Latino",(ifelse(dal_tract$nonzero_pop==1,"Racially Diverse",0)))))))))))
table(dal_tract$neighb_race)
##
## Majority AAPI NH Majority Black NH Majority Hispanic/Latino
## 21 75 221
## Majority White NH Racially Diverse
## 442 352
install.packages("writexl")
##
## The downloaded binary packages are in
## /var/folders/x9/scwlftds0vn0c3vfyyrprz7r0000gn/T//Rtmpl7BuWl/downloaded_packages
library("writexl")
write_xlsx(dal_tract,"~/Desktop/Temp/table.xlsx")
tx_places<-get_acs(geography = "place", state="Texas", variables=c("DP05_0001E"), year=2021, geometry = T, output = "wide")
dallas_place<-tx_places[tx_places$GEOID=="4819000",]
library(tmap)
tm_shape(dal_tract,bbox = dallas_bb)+
tm_polygons("neighb_race",title="Tracts",border.alpha = 0.25, palette="Set2", border.col = "black",style="pretty",legend.hist=TRUE)+
tm_format("World", title="Tract Categorization, 2017-2021 5 YR ACS", legend.outside=T)+
tm_shape(dallas_place)+
tm_polygons(alpha=0,border.col="black")+
tm_scale_bar(position="LEFT")+
tm_compass()
tm_shape(dal_tract,bbox = dallas_bb)+
tm_polygons("pctwhiteNH",title="Tracts",border.alpha = 0.25, palette="YlGnBu", border.col = "black",style="pretty",legend.hist=TRUE)+
tm_format("World", title="Percent White NH, 2017-2021 5 YR ACS", legend.outside=T)+
tm_shape(dallas_place)+
tm_polygons(alpha=0,border.col="black")+
tm_scale_bar(position="LEFT")+
tm_compass()
tm_shape(dal_tract,bbox = dallas_bb)+
tm_polygons("pctblackNH",title="Tracts",border.alpha = 0.25, palette="YlGnBu", border.col = "black",style="pretty",legend.hist=TRUE)+
tm_format("World", title="Percent Black NH, 2017-2021 5 YR ACS", legend.outside=T)+
tm_shape(dallas_place)+
tm_polygons(alpha=0,border.col="black")+
tm_scale_bar(position="LEFT")+
tm_compass()
tm_shape(dal_tract,bbox = dallas_bb)+
tm_polygons("pctHispLat",title="Tracts",border.alpha = 0.25, palette="YlGnBu", border.col = "black",style="pretty",legend.hist=TRUE)+
tm_format("World", title="Percent Hispanic/Latino NH, 2017-2021 5 YR ACS", legend.outside=T)+
tm_shape(dallas_place)+
tm_polygons(alpha=0,border.col="black")+
tm_scale_bar(position="LEFT")+
tm_compass()
tm_shape(dal_tract,bbox = dallas_bb)+
tm_polygons("pctaapiNH",title="Tracts",border.alpha = 0.25, palette="YlGnBu", border.col = "black",style="pretty",legend.hist=TRUE)+
tm_format("World", title="Percent AAPI NH, 2017-2021 5 YR ACS", legend.outside=T)+
tm_shape(dallas_place)+
tm_polygons(alpha=0,border.col="black")+
tm_scale_bar(position="LEFT")+
tm_compass()
#Percent Other or 2+ of all tracts is below 25% across board.
tm_shape(dal_tract,bbox = dallas_bb)+
tm_polygons("pctother2moreNH",title="Tracts",border.alpha = 0.25, palette="YlGnBu", border.col = "black",style="pretty",legend.hist=TRUE)+
tm_format("World", title="Percent Other/2+ NH, 2017-2021 5 YR ACS", legend.outside=T)+
tm_shape(dallas_place)+
tm_polygons(alpha=0,border.col="black")+
tm_scale_bar(position="LEFT")+
tm_compass()
#Percent AIAN of all tracts is below 8% across board.
tm_shape(dal_tract,bbox = dallas_bb)+
tm_polygons("pctaianNH",title="Tracts",border.alpha = 0.25, palette="YlGnBu", border.col = "black",style="pretty",legend.hist=TRUE)+
tm_format("World", title="Percent AIAN NH, 2017-2021 5 YR ACS", legend.outside=T)+
tm_shape(dallas_place)+
tm_polygons(alpha=0,border.col="black")+
tm_scale_bar(position="LEFT")+
tm_compass()
#Below to explore color palettes via Color Brewer
#library (tmaptools)
# install.packages("shinyjs")
# palette_explorer()
# get_brewer_pal("Set2", n = 5)
dal_counties_geo<-get_acs(geography = "county",
year = 2021,
state = "TX",
county = dal_counties$CountyFIPS3,
survey = "acs5",
variables=c("B03002_001E"),
geometry = T, output = "wide")
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 16%
|
|============ | 17%
|
|============= | 19%
|
|================ | 23%
|
|================== | 26%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================== | 34%
|
|======================== | 35%
|
|=========================== | 38%
|
|============================= | 42%
|
|================================ | 45%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|=================================================== | 73%
|
|===================================================== | 76%
|
|======================================================== | 80%
|
|========================================================== | 82%
|
|============================================================ | 86%
|
|============================================================== | 89%
|
|================================================================ | 91%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|==================================================================== | 97%
|
|===================================================================== | 99%
|
|======================================================================| 100%
us_zip<-get_acs(geography = "zip code tabulation area",
year = 2021,
survey = "acs5",
variables=c("B03002_001E",
"B03002_001E",
"B03002_003E",
"B03002_004E",
"B03002_006E",
"B03002_007E",
"B03002_005E",
"B03002_008E",
"B03002_009E",
"B03002_012"),
geometry = T, output = "wide")
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================ | 24%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|============================== | 44%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|=================================================== | 74%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================= | 94%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
dal_zip <- us_zip[dal_counties_geo, , op = st_intersects]
#select tracts that have nonzero population
#dal_tract <- filter(dal_tract, B03002_001E>0)
#rename variables
dal_zip<-dal_zip%>%
mutate(totpop=B03002_001E,
whiteNH = B03002_003E,
blackNH = B03002_004E,
aapiNH = B03002_006E + B03002_007E,
aianNH = B03002_005E,
other2moreNH = B03002_008E + B03002_009E,
HispLat = B03002_012E,
pctwhiteNH = whiteNH/totpop,
pctblackNH = blackNH/totpop,
pctaapiNH = aapiNH/ totpop,
pctaianNH = aianNH/totpop,
pctother2moreNH = other2moreNH/ totpop,
pctHispLat = HispLat / totpop)
dal_zip$nonzero_pop <- ifelse(dal_zip$totpop>0,1,0)
dal_zip$neighb_race <- ifelse(dal_zip$pctwhiteNH>0.5,"Majority White NH",(ifelse(dal_zip$pctblackNH>0.5,"Majority Black NH",(ifelse(dal_zip$pctaapiNH>0.5,"Majority AAPI NH",(ifelse(dal_zip$pctother2moreNH >0.5,"Majority Other or 2+ NH",(ifelse(dal_zip$pctHispLat>0.5,"Majority Hispanic/Latino",(ifelse(dal_zip$nonzero_pop==1,"Racially Diverse",0)))))))))))
table(dal_zip$neighb_race)
##
## Majority AAPI NH Majority Black NH Majority Hispanic/Latino
## 1 9 22
## Majority White NH Racially Diverse
## 96 50
tx_places<-get_acs(geography = "place", state="Texas", variables=c("DP05_0001E"), year=2021, geometry = T, output = "wide")
dallas_place<-tx_places[tx_places$GEOID=="4819000",]
library(tmap)
tm_shape(dal_zip,bbox = dallas_bb)+
tm_polygons("neighb_race",title="ZCTAs",border.alpha = 0.25, palette="Set2", border.col = "black",legend.hist=TRUE)+
tm_format("World", title="ZCTA Categorization, 2017-2021 5 YR ACS", legend.outside=T)+
tm_shape(dallas_place)+
tm_polygons(alpha=0,border.col="black")+
tm_scale_bar(position="LEFT")+
tm_compass()