library(survey, quietly = T)
library(dplyr, quietly = T)
library(car, quietly = T)
library(ggplot2, quietly = T)
library(tigris, quietly = T)
library(classInt, quietly = T)
library(srvyr, quietly = T)
library(reldist, quietly = T)
## Warning: package 'reldist' was built under R version 4.1.3
library(ipumsr, quietly = T)
## Warning: package 'ipumsr' was built under R version 4.1.3
library(sf, quietly = T)
library(mapview, quietly = T)
library(janitor, quietly = T)
## Warning: package 'janitor' was built under R version 4.1.3
library(survey, quietly = T)
library(tidyverse, quietly = T)
library(car, quietly = T)
library(ggplot2, quietly = T)
library(tigris, quietly = T)
library(classInt, quietly = T)
library(tmap, quietly = T)
## Warning: package 'tmap' was built under R version 4.1.3
library(reldist, quietly = T)
library(haven)
ddi <- read_ipums_ddi("C:/Users/spara/OneDrive/Desktop/Gis Project/usa_00005.xml")
data <- read_ipums_micro(ddi)
## Use of data from IPUMS USA is subject to conditions including that users should
## cite the data appropriately. Use command `ipums_conditions()` for more details.
data<- haven::zap_labels(data)
names(data) <- tolower(gsub(pattern = "_",replacement = "",x = names(data)))
data<-haven::zap_labels(data) #necessary to avoid problems with "labelled" data class
names(data)<-tolower(names(data))
data2<- data%>%
filter(age>= 25 & countyfip == 029 | countyfip == 201 |
countyfip == 113| countyfip == 439| countyfip == 453)
## Prepare variables
#weight variables
data2$pwt <- data2$perwt/100
data2$hwt <- data2$hhwt/100
#Race and ethnicity variables
data2$hisp <- Recode(data2$hispan, recodes = "9=NA; 1:4='Hispanic'; 0='NonHispanic'")
data2$race_rec <- Recode(data2$race, recodes = "1='White'; 2='Black'; 3='Other'; 4:6='Asian'; 7:9='Other'")
data2$race_eth <- interaction(data2$hisp, data2$race_rec, sep = "_")
data2$race_eth <- as.factor(ifelse(substr(as.character(data2$race_eth),1,8) == "Hispanic", "Hispanic", as.character(data2$race_eth)))
data2$race_eth <- relevel(data2$race_eth, ref = "NonHispanic_White")
#Sex
data2$sex <- Recode(data2$sex, recodes = "1 ='Male'; 2 ='Female'; else=NA", as.factor =T)
#Education
data2$educ2<- Recode(data2$educd, recodes = "2:61='0' ;62:116='1'; else=NA", as.factor=T)
#Age
data2$agecat<-cut(data2$age, breaks = c(25, 30, 40, 50, 65, 120), include.lowest = T)
Here we identify the person weights and the survey design variables.
des<-svydesign(ids=~cluster, strata=~ strata, weights = ~pwt, data=data2)
cos<-counties(cb= T,state = "TX", year =2019)
cos$countyfip <- as.numeric(cos$COUNTYFP)
cos_est_educ2<-svyby( ~I(educ2==1),
by = ~countyfip,
design=des,
FUN=svymean,
na.rm = TRUE ) %>%
clean_names() %>%
mutate(Pctsh=round((i_educ2_1_true*100),1),Pctnsh=round((i_educ2_1_false*100),1)) %>%
rename(propnsh=i_educ2_1_false,
propsh=i_educ2_1_true,
Stderr=se_i_educ2_1_true) %>%
select(countyfip,Pctsh,Pctnsh,Stderr)
geo3<-left_join(cos, cos_est_educ2, by =c("countyfip"= "countyfip"))
## Figure 1: Educational Attainment in Texas Counties, 2015-2019
tmap_mode("plot")
## tmap mode set to plotting
tm_basemap("OpenStreetMap.Mapnik")+
tm_shape(geo3)+
tm_polygons("Pctsh",
style="kmeans",
title=c( "% of people with high school or more"),
palette="Blues",
n=5,
legend.hist = TRUE) +
tm_layout(legend.outside = TRUE,
title = "Educational Attainment in Texas Counties \n 2015-2019",
title.size =1.5,
legend.frame = TRUE,
) + tm_compass(position = c("left","top")) + tm_format("World",
legend.position = c("left", "bottom"),
main.title.position =c("center")) + tm_scale_bar(position = c("left","bottom"))
library(table1)
## Warning: package 'table1' was built under R version 4.1.3
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
label(data2$sex) <- "Sex"
label(data2$race_eth)<-"Race/Ethnicity"
label(data2$agecat)<-"Age"
tbl1<-table1(~ sex + race_eth + agecat | educ2, data=data2, na.rm = T)
tbl1
| 0 (N=149365) |
1 (N=345372) |
Overall (N=510886) |
|
|---|---|---|---|
| Sex | |||
| Female | 73513 (49.2%) | 180725 (52.3%) | 262170 (51.3%) |
| Male | 75852 (50.8%) | 164647 (47.7%) | 248716 (48.7%) |
| Race/Ethnicity | |||
| NonHispanic_White | 35906 (24.0%) | 177305 (51.3%) | 218609 (42.8%) |
| Hispanic | 82403 (55.2%) | 91035 (26.4%) | 180415 (35.3%) |
| NonHispanic_Asian | 8474 (5.7%) | 22790 (6.6%) | 32247 (6.3%) |
| NonHispanic_Black | 18693 (12.5%) | 47555 (13.8%) | 68312 (13.4%) |
| NonHispanic_Other | 3889 (2.6%) | 6687 (1.9%) | 11303 (2.2%) |
| Age | |||
| [25,30] | 4707 (3.2%) | 42339 (12.3%) | 47046 (9.2%) |
| (30,40] | 10656 (7.1%) | 64025 (18.5%) | 74681 (14.6%) |
| (40,50] | 11583 (7.8%) | 57435 (16.6%) | 69018 (13.5%) |
| (50,65] | 15905 (10.6%) | 87360 (25.3%) | 103265 (20.2%) |
| (65,120] | 12731 (8.5%) | 60151 (17.4%) | 72882 (14.3%) |
| Missing | 93783 (62.8%) | 34062 (9.9%) | 143994 (28.2%) |
library(gtsummary)
## #Uighur
des %>% tbl_svysummary(by= educ2,
include=c(sex, race_eth,agecat,educ2),
label = list(race_eth~ "Race/Ethnicity" ,
agecat~"Age",
sex~"Sex" ))%>%
add_p() %>%
add_overall() %>%
modify_spanning_header(c("stat_1", "stat_2") ~ "**Educational Attainment in Texas**") %>%
as_gt() %>%
# modify with gt functions
gt::tab_header("Table 2: Weighted Demographic Proporties of People By Educational Attainment, IPUMS, 2015-2019") %>%
gt::tab_options(
table.font.size = "small",
data_row.padding = gt::px(1))
## 16149 observations missing `educ2` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `educ2` column before passing to `tbl_svysummary()`.
| Table 2: Weighted Demographic Proporties of People By Educational Attainment, IPUMS, 2015-2019 | ||||
|---|---|---|---|---|
| Characteristic | Overall, N = 113,1681 | Educational Attainment in Texas | p-value2 | |
| 0, N = 37,9491 | 1, N = 75,2191 | |||
| Sex | <0.001 | |||
| Female | 57,316 (51%) | 18,508 (49%) | 38,808 (52%) | |
| Male | 55,852 (49%) | 19,441 (51%) | 36,411 (48%) | |
| Race/Ethnicity | <0.001 | |||
| NonHispanic_White | 39,705 (35%) | 7,090 (19%) | 32,615 (43%) | |
| Hispanic | 45,253 (40%) | 22,541 (59%) | 22,711 (30%) | |
| NonHispanic_Asian | 6,939 (6.1%) | 1,892 (5.0%) | 5,048 (6.7%) | |
| NonHispanic_Black | 18,839 (17%) | 5,526 (15%) | 13,313 (18%) | |
| NonHispanic_Other | 2,432 (2.1%) | 900 (2.4%) | 1,531 (2.0%) | |
| Age | <0.001 | |||
| [25,30] | 12,568 (16%) | 1,378 (10%) | 11,190 (17%) | |
| (30,40] | 18,899 (24%) | 3,080 (22%) | 15,819 (24%) | |
| (40,50] | 16,322 (20%) | 3,138 (23%) | 13,184 (20%) | |
| (50,65] | 20,460 (25%) | 3,683 (27%) | 16,777 (25%) | |
| (65,120] | 12,112 (15%) | 2,415 (18%) | 9,697 (15%) | |
| Unknown | 32,807 | 24,255 | 8,552 | |
|
1
n (%)
2
chi-squared test with Rao & Scott's second-order correction
|
||||
## Figure 2: Educational Attainment by Race/Ethnicity, 2015-2019
Fig1 <- ggplot(data = data2, aes(x=educ2, fill= race_eth))+
geom_bar(position = 'fill')+
labs(title="Educational Attainment by Race and Ethnicity in Texas, 2015-2019",
x="Education", y = "Population Proportion", fill ="Legend")+
theme(legend.position="right")
Fig1