setwd("~/Downloads")
library(survey)
library(tidyverse)
library(dplyr)
library(car)
library(ggplot2)
library(tigris)
library(classInt)
library(mapview)
library(srvyr)
library(tmap)
library(classInt)
library(janitor)
library(ipumsr)
ddi <- read_ipums_ddi("usa_00007.xml")
gis <- read_ipums_micro(ddi)
## Use of data from IPUMS USA is subject to conditions including that users should
## cite the data appropriately. Use command `ipums_conditions()` for more details.
gis<- haven::zap_labels(gis)
names(gis) <- tolower(gsub(pattern = "_",replacement = "",x = names(gis)))
#weight variables
gis$pwt <- gis$perwt
gis$hwt <- gis$hhwt
gis$hisp <- Recode(gis$hispan, recodes = "9=NA; 1:4='Hispanic'; 0='NonHispanic'")
gis$race_rec <- Recode(gis$race, recodes = "1='White'; 2='Black'; 3='Other'; 4:6='Asian'; 7:9='Other'")
gis$race_eth <- interaction(gis$hisp, gis$race_rec, sep = "_")
gis$race_eth <- as.factor(ifelse(substr(as.character(gis$race_eth),1,8) == "Hispanic", "Hispanic", as.character(gis$race_eth)))
gis$race_eth <- relevel(gis$race_eth, ref = "NonHispanic_White")
gis$male <- ifelse(gis$sex == 1,1,0)
gis$sex <- Recode(gis$sex, recodes = "1='Male'; 2='Female';else=NA")
gis$insuremp<- ifelse(gis$hinsemp == 2,1,0)
gis$insurany<- ifelse(gis$hcovany == 2,1,0)
gis$insurany2<- Recode(gis$insurany, recodes = "1='Yes';0='No'; else=NA")
gis$insurany2<- as.factor(gis$insurany2)
gis$insurany2<-relevel(gis$insurany2, ref='Yes')
gis$educ_level<- Recode(gis$educd, recodes = "2:61='1_LT/HS';62:64='1_LT/HS';65:80='2_somcol';90:100='2_somcol'; 81:83='3_AsDeg';101='4_BA+Grd'; 110:116='4_BA+Grd'; else=NA")
#gis$educ_level<- Recode(gis$educd, recodes = "2:61='0LT_HS';62:64='1_HSD/GED';65:80='2_somecoll';90:100='2_somecoll'; 81:83='3_AssocDegree';101='4_bachelordegree'; 110:116='4_BAplus_GradDegree'; else=NA")
#data$employed <- Recode(data$wrklstwk, recodes = "1=0;2=1; else=NA")
gis$metrostatus <- Recode(gis$metro, recodes = "0='indeterminable'; 2:4='Metro'; 1='Non-metro'")
gis$familyincome<-car::Recode(gis$ftotinc, recodes = "-22905:0= '0Loss'; 1:24999= '1Less_than_$25,000'; 25000:34999 = '2$25,000 - $34,999 '; 35000:49999 = '3$35,000 - $49,999 '; 50000:74999 = '4$50,000 - $74,999 '; 75000:99999= '5$75,000 - $99,999 '; 100000:149999= '6$100,000 - $149,999 '; 150000:199999= '7$150,000 - 3$199,999 '; 200000:9999997= '8$200,000 and above ';else=NA")
gis$personalincome<-car::Recode(gis$incwage, recodes = "-1:24999= '1Less_than_$25,000'; 25000:34999 = '2$25,000 - $34,999 '; 35000:49999 = '3$35,000 - $49,999 '; 50000:74999 = '4$50,000 - $74,999 '; 75000:99999= '5$75,000 - $99,999 '; 100000:149999= '6$100,000 - $149,999 '; 150000:199999= '7$150,000 - 3$199,999 '; 200000:999996= '8$200,000 and above ';else=NA")
#data$ind_group<-Recode(data$ind, recodes = "170:490='ag_extract'; 770='construction'; 1070:3990='manufac'; 4070:5790='whole_retail'; 6070:6390='trans'; 6470:6780='information'; 6870:7190= 'fire'; 7270=7790='prof/sci/manage'; 7860:8470='edu/social'; 8560:8690='arts'; 8770:9290='other'; 9370:9590='public_adm'; 9670:9870='military'; else=NA ")
#data$proftech <- Recode(data$ind, recodes = "7270:7490=1; 0=NA; else=0")
gis$agecat<-cut(gis$age, breaks = c(16, 20, 30, 40, 50, 65), include.lowest = T)
#data$edu_scal_inc <- ave(data$incwage, data$male, FUN = scale)
gis2 <- gis%>%
filter(age >=16 & age<=64)
gis2 <- gis2%>%
filter(empstat ==1)
gis2 <- gis2%>%
filter(statefip ==48)
des<-svydesign(ids = ~cluster,
strata = ~strata,
weights = ~pwt,
data = gis2)
#Estimate by Texas
# Get estimate, percent and standard error for people that migrate or not by PUMA, taking into account the survey design
puma_est_insurany<-svyby( ~I(insurany==1),
by = ~puma,
design=des,
FUN=svymean,
na.rm = TRUE ) %>%
clean_names() %>%
mutate(Pctsh=round((i_insurany_1_true*100),1),Pctnsh=round((i_insurany_1_false*100),1)) %>%
rename(propnsh=i_insurany_1_false,
propsh=i_insurany_1_true,
Stderr=se_i_insurany_1_true) %>%
select(puma,Pctsh,Pctnsh,Stderr)
pumas<-pumas(state = "TX",
year = 2019,
cb = T) %>%
mutate(puma=as.numeric(PUMACE10))
#geo1<-left_join(pumas, puma_est_insurany, "puma", "puma")
geo1<-left_join(pumas, puma_est_insurany, by =c("puma"= "puma"))
tmap_mode("plot")
## tmap mode set to plotting
tm_basemap("OpenStreetMap.Mapnik")+
tm_shape(geo1)+
tm_polygons("Pctsh",
style="kmeans",
title=c("Rate of Insurance"),
palette="Blues",
n=5,
legend.hist = TRUE) +
tm_layout(legend.outside = TRUE,
title = "Workers Insurance rate in Texas \n 2015-2019",
title.size =1.5,
legend.frame = TRUE,
) + tm_compass(position = c("left","top")) + tm_format("World",
legend.position = c("left", "bottom"),
main.title.position =c("center")) + tm_scale_bar(position = c("left","bottom"))
tmap_mode("view")
## tmap mode set to interactive viewing
geo1 %>%
tm_shape () +
tm_polygons("Pctsh",
style= "kmeans",
n=8,
legend.hist= TRUE) +
tm_layout(legend.outside= TRUE,
title= "Insurance rate in Texas \ 2015-2019")
tm_shape(geo1)+
tm_polygons(c("Pctsh"),
title=c("% in Insurance rate"),
palette="Blues",
style="quantile",
n=5)+
#tm_format("World", legend.outside=T, title.size =4)+
tm_scale_bar()+
tm_layout(title="Texas Insurace rate in 2019- Quantile Breaks",
title.size =1.5,
legend.frame = TRUE,
title.position = c('right', 'top'))+
tm_compass()+
tm_format("World",
legend.position = c("left", "bottom"),
main.title.position =c("center"))
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
#Estimate by Metro Area
# Get estimate, percent and standard error for people that migrate or not by PUMA, taking into account the survey design
met_est_insurany<-svyby( ~I(insurany==1),
by = ~met2013,
design=des,
FUN=svymean,
na.rm = TRUE ) %>%
clean_names() %>%
mutate(Pctsh=round((i_insurany_1_true*100),1),Pctnsh=round((i_insurany_1_false*100),1)) %>%
rename(propnsh=i_insurany_1_false,
propsh=i_insurany_1_true,
Stderr=se_i_insurany_1_true) %>%
select(met2013,Pctsh,Pctnsh,Stderr)
# Get California Boundary File from Tigris and merge the migrate estimates with it .
#geo1<-left_join(pumas, puma_est_insurany, "puma", "puma")
mets<-core_based_statistical_areas(cb = T, year = 2018)
mets<-mets[grep(mets$NAME,pattern = "TX"),]
mets$met2013<- as.numeric(mets$GEOID)
geo2<-left_join(mets, met_est_insurany, by =c("met2013"= "met2013"))
tmap_mode("plot")
## tmap mode set to plotting
tm_basemap("OpenStreetMap.Mapnik")+
tm_shape(geo2)+
tm_polygons("Pctsh",
style="kmeans",
title=c("Rate of Insurance"),
palette="Blues",
n=5,
legend.hist = TRUE) +
tm_layout(legend.outside = TRUE,
title = "Workers Insurance rate in Texas Metro \n 2015-2019",
title.size =1.5,
legend.frame = TRUE,
) + tm_compass(position = c("left","top")) + tm_format("World",
legend.position = c("left", "bottom"),
main.title.position =c("center")) + tm_scale_bar(position = c("left","bottom"))
tmap_mode("view")
## tmap mode set to interactive viewing
geo2 %>%
tm_shape () +
tm_polygons("Pctsh",
style= "kmeans",
n=8,
legend.hist= TRUE) +
tm_layout(legend.outside= TRUE,
title= "Insurance rate in Texas Metro Area \ 2015-2019")
sts<-states(cb=T,year = 2019)
sts<-sts%>%
filter(GEOID==48)
cos<-counties(cb= T,state = "TX", year =2019)
cos$countyfip <- as.numeric(cos$COUNTYFP)
# Get estimate, percent and standard error for people that migrate or not by PUMA, taking into account the survey design
cos_est_insurany<-svyby( ~I(insurany==1),
by = ~countyfip,
design=des,
FUN=svymean,
na.rm = TRUE ) %>%
clean_names() %>%
mutate(Pctsh=round((i_insurany_1_true*100),1),Pctnsh=round((i_insurany_1_false*100),1)) %>%
rename(propnsh=i_insurany_1_false,
propsh=i_insurany_1_true,
Stderr=se_i_insurany_1_true) %>%
select(countyfip,Pctsh,Pctnsh,Stderr)
# Get California Boundary File from Tigris and merge the migrate estimates with it .
#geo1<-left_join(pumas, puma_est_insurany, "puma", "puma")
geo3<-left_join(cos, cos_est_insurany, by =c("countyfip"= "countyfip"))
tmap_mode("plot")
## tmap mode set to plotting
tm_basemap("OpenStreetMap.Mapnik")+
tm_shape(geo3)+
tm_polygons("Pctsh",
style="kmeans",
title=c("Rate of Insurance"),
palette="Blues",
n=5,
legend.hist = TRUE) +
tm_layout(legend.outside = TRUE,
title = "Workers Insurance rate in Texas County \n 2015-2019",
title.size =1.5,
legend.frame = TRUE,
) + tm_compass(position = c("left","top")) + tm_format("World",
legend.position = c("left", "bottom"),
main.title.position =c("center")) + tm_scale_bar(position = c("left","bottom"))
tmap_mode("view")
## tmap mode set to interactive viewing
geo3 %>%
tm_shape () +
tm_polygons("Pctsh",
style= "kmeans",
n=8,
legend.hist= TRUE) +
tm_layout(legend.outside= TRUE,
title= "Insurance rate in Texas County Area \ 2015-2019")
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
label(gis2$sex)<-"Sex"
label(gis2$race_eth)<-"Race/Ethnicity"
label(gis2$agecat)<-"Age"
label(gis2$metrostatus)<-"Metropolitan Status"
label(gis2$personalincome)<-"Personal Income"
tbl1<- table1(~ sex + race_eth + agecat + educ_level + metrostatus + personalincome | insurany2, data=gis2)
table1(~ sex + race_eth + agecat + educ_level + metrostatus + personalincome | insurany2, data=gis2)
Yes (N=471078) |
No (N=96409) |
Overall (N=567487) |
|
---|---|---|---|
Sex | |||
Female | 224661 (47.7%) | 39800 (41.3%) | 264461 (46.6%) |
Male | 246417 (52.3%) | 56609 (58.7%) | 303026 (53.4%) |
Race/Ethnicity | |||
NonHispanic_White | 261239 (55.5%) | 29068 (30.2%) | 290307 (51.2%) |
Hispanic | 130962 (27.8%) | 55350 (57.4%) | 186312 (32.8%) |
NonHispanic_Asian | 26992 (5.7%) | 2863 (3.0%) | 29855 (5.3%) |
NonHispanic_Black | 42874 (9.1%) | 7598 (7.9%) | 50472 (8.9%) |
NonHispanic_Other | 9011 (1.9%) | 1530 (1.6%) | 10541 (1.9%) |
Age | |||
[16,20] | 23215 (4.9%) | 6648 (6.9%) | 29863 (5.3%) |
(20,30] | 90999 (19.3%) | 26245 (27.2%) | 117244 (20.7%) |
(30,40] | 104176 (22.1%) | 24236 (25.1%) | 128412 (22.6%) |
(40,50] | 107691 (22.9%) | 20542 (21.3%) | 128233 (22.6%) |
(50,65] | 144997 (30.8%) | 18738 (19.4%) | 163735 (28.9%) |
Educational attainment [detailed version] | |||
1_LT/HS | 136715 (29.0%) | 59401 (61.6%) | 196116 (34.6%) |
2_somcol | 111020 (23.6%) | 21765 (22.6%) | 132785 (23.4%) |
3_AsDeg | 39660 (8.4%) | 5317 (5.5%) | 44977 (7.9%) |
4_BA+Grd | 183683 (39.0%) | 9926 (10.3%) | 193609 (34.1%) |
Metropolitan Status | |||
indeterminable | 61646 (13.1%) | 13724 (14.2%) | 75370 (13.3%) |
Metro | 393132 (83.5%) | 78002 (80.9%) | 471134 (83.0%) |
Non-metro | 16300 (3.5%) | 4683 (4.9%) | 20983 (3.7%) |
Personal Income | |||
1Less_than_$25,000 | 137739 (29.2%) | 59989 (62.2%) | 197728 (34.8%) |
2$25,000 - $34,999 | 57002 (12.1%) | 14890 (15.4%) | 71892 (12.7%) |
3$35,000 - $49,999 | 71739 (15.2%) | 10651 (11.0%) | 82390 (14.5%) |
4$50,000 - $74,999 | 92558 (19.6%) | 6984 (7.2%) | 99542 (17.5%) |
5$75,000 - $99,999 | 43934 (9.3%) | 2029 (2.1%) | 45963 (8.1%) |
6$100,000 - $149,999 | 39659 (8.4%) | 1216 (1.3%) | 40875 (7.2%) |
7$150,000 - 3$199,999 | 12812 (2.7%) | 321 (0.3%) | 13133 (2.3%) |
8$200,000 and above | 15635 (3.3%) | 329 (0.3%) | 15964 (2.8%) |
##With survey
library(gtsummary)
des %>% tbl_svysummary(by= insurany,
include=c(sex,race_eth,agecat,educ_level,metrostatus,personalincome),
label = list(educ_level~ "Educantion Level",
race_eth~ "Race/Ethnicity" ,
agecat~"Age",
metrostatus~"Metropolitan Status",
personalincome~"Personal Income",
sex~"Sex" ))%>%
add_p() %>%
add_overall() %>%
modify_spanning_header(c("stat_1", "stat_2") ~ "**Workers Insurance rate in Texas**") %>%
as_gt() %>%
# modify with gt functions
gt::tab_header("Table 1: Table 1 Characteristics of Workers By Insurance rate, IPUMS, 2015-2019") %>%
gt::tab_options(
table.font.size = "small",
data_row.padding = gt::px(1))
Table 1: Table 1 Characteristics of Workers By Insurance rate, IPUMS, 2015-2019 | ||||
---|---|---|---|---|
Characteristic | Overall, N = 12,680,7661 | Workers Insurance rate in Texas | p-value2 | |
0, N = 2,597,1411 | 1, N = 10,083,6251 | |||
Sex | <0.001 | |||
Female | 5,773,196 (46%) | 1,025,138 (39%) | 4,748,058 (47%) | |
Male | 6,907,570 (54%) | 1,572,003 (61%) | 5,335,567 (53%) | |
Race/Ethnicity | <0.001 | |||
NonHispanic_White | 5,466,905 (43%) | 615,436 (24%) | 4,851,469 (48%) | |
Hispanic | 4,804,038 (38%) | 1,623,234 (63%) | 3,180,804 (32%) | |
NonHispanic_Asian | 671,089 (5.3%) | 71,183 (2.7%) | 599,906 (5.9%) | |
NonHispanic_Black | 1,509,528 (12%) | 250,480 (9.6%) | 1,259,048 (12%) | |
NonHispanic_Other | 229,206 (1.8%) | 36,808 (1.4%) | 192,398 (1.9%) | |
Age | <0.001 | |||
[16,20] | 708,202 (5.6%) | 177,452 (6.8%) | 530,750 (5.3%) | |
(20,30] | 3,036,986 (24%) | 774,274 (30%) | 2,262,712 (22%) | |
(30,40] | 3,085,392 (24%) | 690,309 (27%) | 2,395,083 (24%) | |
(40,50] | 2,832,379 (22%) | 542,522 (21%) | 2,289,857 (23%) | |
(50,65] | 3,017,807 (24%) | 412,584 (16%) | 2,605,223 (26%) | |
Educantion Level | <0.001 | |||
1_LT/HS | 4,764,392 (38%) | 1,661,002 (64%) | 3,103,390 (31%) | |
2_somcol | 3,027,912 (24%) | 569,278 (22%) | 2,458,634 (24%) | |
3_AsDeg | 976,007 (7.7%) | 129,503 (5.0%) | 846,504 (8.4%) | |
4_BA+Grd | 3,912,455 (31%) | 237,358 (9.1%) | 3,675,097 (36%) | |
Metropolitan Status | <0.001 | |||
indeterminable | 1,258,503 (9.9%) | 270,776 (10%) | 987,727 (9.8%) | |
Metro | 10,999,414 (87%) | 2,219,791 (85%) | 8,779,623 (87%) | |
Non-metro | 422,849 (3.3%) | 106,574 (4.1%) | 316,275 (3.1%) | |
Personal Income | <0.001 | |||
1Less_than_$25,000 | 4,653,579 (37%) | 1,602,402 (62%) | 3,051,177 (30%) | |
2$25,000 - $34,999 | 1,729,207 (14%) | 418,757 (16%) | 1,310,450 (13%) | |
3$35,000 - $49,999 | 1,860,388 (15%) | 289,178 (11%) | 1,571,210 (16%) | |
4$50,000 - $74,999 | 2,129,361 (17%) | 183,618 (7.1%) | 1,945,743 (19%) | |
5$75,000 - $99,999 | 949,723 (7.5%) | 55,346 (2.1%) | 894,377 (8.9%) | |
6$100,000 - $149,999 | 813,839 (6.4%) | 31,782 (1.2%) | 782,057 (7.8%) | |
7$150,000 - 3$199,999 | 255,135 (2.0%) | 8,713 (0.3%) | 246,422 (2.4%) | |
8$200,000 and above | 289,534 (2.3%) | 7,345 (0.3%) | 282,189 (2.8%) | |
1
n (%)
2
chi-squared test with Rao & Scott's second-order correction
|
METRO<-svyby(formula = ~insurany,
by = ~metrostatus,
design = des,
FUN = svymean,
na.rm=T)
svychisq(~insurany+metrostatus,
design = des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~insurany + metrostatus, design = des)
## F = 67.615, ndf = 1.9986e+00, ddf = 7.1103e+05, p-value < 2.2e-16
METRO%>%
mutate(metro = fct_relevel(metrostatus,
"Metro", "Non-metro", "Overweight", "indeterminable")) %>%
ggplot()+
geom_point(aes(x=metro,y=insurany))+
geom_errorbar(aes(x=metro, ymin = insurany-1.96*se,
ymax= insurany+1.96*se),
width=.30)+
labs(title = "Percent % of Texas Workers Insured by Metropolitan Area",
caption = "Source: IPUMS USA Data, 2015-2019 \n Calculations by Joseph Jaiyeola ",
x = "Metropolitan Categories",
y = "% Insured")+
theme_minimal()+ theme(
plot.title = element_text(size= 10, face = "bold"))
## Warning: Unknown levels in `f`: Overweight
RACE<-svyby(formula = ~insurany,
by = ~race_eth,
design = des,
FUN = svymean,
na.rm=T)
svychisq(~insurany+race_eth,
design = des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~insurany + race_eth, design = des)
## F = 3782.9, ndf = 3.9653e+00, ddf = 1.4107e+06, p-value < 2.2e-16
RACE%>%
#mutate(metro = fct_relevel(race_eth,
# "Metro", "Non-metro", "Overweight", "indeterminable")) %>%
ggplot()+
geom_point(aes(x=race_eth,y=insurany))+
geom_errorbar(aes(x=race_eth, ymin = insurany-1.96*se,
ymax= insurany+1.96*se),
width=.30)+
labs(title = "Percent % of Texas Workers Insured by Race/Ethnicity",
caption = "Source: IPUMS USA Data, 2015-2019 \n Calculations by Joseph Jaiyeola ",
x = "Race/Ethnicity Categories",
y = "% Insured")+
theme_minimal()+ theme(
plot.title = element_text(size= 10, face = "bold"))
crossre<-svyby(formula = ~insurany,
by = ~race_eth+educ_level,
design = des,
FUN = svymean,
na.rm=T)
crossre%>%
#mutate(educa = fct_relevel(educa,
# "<HS", "HS", ">HS")) %>%
ggplot()+
geom_point(aes(x=educ_level, y = insurany))+
geom_errorbar(aes(x=educ_level, ymin = insurany-1.96*se,
ymax=insurany+1.96*se),
width=.25)+
facet_wrap(~ race_eth, nrow = 3)+
labs(title = "Percent % of Texas Workers Insured by Race/Ethnicity and Education",
caption = "Source: IPUMS USA Data, 2015-2019 \n Calculations by Joseph Jaiyeola",
x = "Education",
y = "% Insured")+
theme_minimal()+ theme(
plot.title = element_text(size= 10, face = "bold"))