setwd("~/Downloads")

library(survey)
library(tidyverse)
library(dplyr)
library(car)
library(ggplot2)
library(tigris)
library(classInt)
library(mapview)
library(srvyr)
library(tmap)
library(classInt)
library(janitor)

library(ipumsr)

ddi <- read_ipums_ddi("usa_00007.xml")
gis <- read_ipums_micro(ddi)

## Use of data from IPUMS USA is subject to conditions including that users should
## cite the data appropriately. Use command `ipums_conditions()` for more details.

gis<- haven::zap_labels(gis)

names(gis) <- tolower(gsub(pattern = "_",replacement =  "",x =  names(gis)))

#weight variables
gis$pwt <- gis$perwt
gis$hwt <- gis$hhwt

gis$hisp <- Recode(gis$hispan, recodes = "9=NA; 1:4='Hispanic'; 0='NonHispanic'")
gis$race_rec <- Recode(gis$race, recodes = "1='White'; 2='Black'; 3='Other'; 4:6='Asian'; 7:9='Other'")
gis$race_eth <- interaction(gis$hisp, gis$race_rec, sep = "_")
gis$race_eth  <- as.factor(ifelse(substr(as.character(gis$race_eth),1,8) == "Hispanic", "Hispanic", as.character(gis$race_eth)))
gis$race_eth <- relevel(gis$race_eth, ref = "NonHispanic_White")

gis$male <- ifelse(gis$sex == 1,1,0)


gis$sex <- Recode(gis$sex, recodes = "1='Male'; 2='Female';else=NA")

gis$insuremp<- ifelse(gis$hinsemp == 2,1,0)

gis$insurany<- ifelse(gis$hcovany == 2,1,0)

gis$insurany2<-  Recode(gis$insurany, recodes = "1='Yes';0='No'; else=NA")

gis$insurany2<- as.factor(gis$insurany2)
gis$insurany2<-relevel(gis$insurany2, ref='Yes')


gis$educ_level<- Recode(gis$educd, recodes = "2:61='1_LT/HS';62:64='1_LT/HS';65:80='2_somcol';90:100='2_somcol'; 81:83='3_AsDeg';101='4_BA+Grd'; 110:116='4_BA+Grd'; else=NA")


#gis$educ_level<- Recode(gis$educd, recodes = "2:61='0LT_HS';62:64='1_HSD/GED';65:80='2_somecoll';90:100='2_somecoll'; 81:83='3_AssocDegree';101='4_bachelordegree'; 110:116='4_BAplus_GradDegree'; else=NA")

#data$employed <- Recode(data$wrklstwk, recodes = "1=0;2=1; else=NA")

gis$metrostatus <- Recode(gis$metro, recodes = "0='indeterminable'; 2:4='Metro'; 1='Non-metro'")

gis$familyincome<-car::Recode(gis$ftotinc, recodes = "-22905:0= '0Loss'; 1:24999= '1Less_than_$25,000'; 25000:34999 = '2$25,000 - $34,999 '; 35000:49999 = '3$35,000 - $49,999 '; 50000:74999 = '4$50,000 - $74,999  '; 75000:99999= '5$75,000 - $99,999 '; 100000:149999= '6$100,000 - $149,999 '; 150000:199999= '7$150,000 - 3$199,999 '; 200000:9999997= '8$200,000 and above ';else=NA")


gis$personalincome<-car::Recode(gis$incwage, recodes = "-1:24999= '1Less_than_$25,000'; 25000:34999 = '2$25,000 - $34,999 '; 35000:49999 = '3$35,000 - $49,999 '; 50000:74999 = '4$50,000 - $74,999  '; 75000:99999= '5$75,000 - $99,999 '; 100000:149999= '6$100,000 - $149,999 '; 150000:199999= '7$150,000 - 3$199,999 '; 200000:999996= '8$200,000 and above ';else=NA")



#data$ind_group<-Recode(data$ind, recodes = "170:490='ag_extract'; 770='construction'; 1070:3990='manufac'; 4070:5790='whole_retail'; 6070:6390='trans'; 6470:6780='information'; 6870:7190= 'fire'; 7270=7790='prof/sci/manage'; 7860:8470='edu/social'; 8560:8690='arts'; 8770:9290='other'; 9370:9590='public_adm'; 9670:9870='military'; else=NA ")

#data$proftech <- Recode(data$ind, recodes = "7270:7490=1; 0=NA; else=0")

gis$agecat<-cut(gis$age, breaks = c(16, 20, 30, 40, 50, 65), include.lowest = T)


#data$edu_scal_inc <- ave(data$incwage, data$male, FUN = scale)

You most likely might not need this section

KINDLY NOTE THAT MY MAIN OUTCOME VARIABLE IS “INSURANY”- THAT IS IF THEY HAVE INSURACE, LOOK HOW I DID CODE IT IN MY LINE 72

gis2 <- gis%>%
filter(age >=16 & age<=64)

gis2 <- gis2%>%
filter(empstat ==1)

gis2 <- gis2%>%
filter(statefip ==48)

des<-svydesign(ids = ~cluster,
               strata = ~strata,
               weights = ~pwt,
               data = gis2)

#Estimate by Texas

# Get estimate, percent and standard error for people that migrate or not by PUMA, taking into account the survey design
puma_est_insurany<-svyby( ~I(insurany==1),
                       by = ~puma,
                       design=des,
                       FUN=svymean,
                       na.rm = TRUE ) %>% 
  clean_names() %>% 
  mutate(Pctsh=round((i_insurany_1_true*100),1),Pctnsh=round((i_insurany_1_false*100),1)) %>% 
  rename(propnsh=i_insurany_1_false,
         propsh=i_insurany_1_true,
         Stderr=se_i_insurany_1_true) %>% 
  select(puma,Pctsh,Pctnsh,Stderr)

pumas<-pumas(state = "TX",
             year = 2019,
             cb = T) %>% 
  mutate(puma=as.numeric(PUMACE10))

#geo1<-left_join(pumas, puma_est_insurany, "puma", "puma")

geo1<-left_join(pumas, puma_est_insurany, by =c("puma"= "puma"))

Use this particular map alone for geo1(all texas)

tmap_mode("plot")

## tmap mode set to plotting

tm_basemap("OpenStreetMap.Mapnik")+
  tm_shape(geo1)+
  tm_polygons("Pctsh",
              style="kmeans",
              title=c("Rate of Insurance"),
              palette="Blues",
              n=5,
              legend.hist = TRUE) +
  tm_layout(legend.outside = TRUE,
            title = "Workers Insurance rate in Texas \n 2015-2019",
            title.size =1.5,
            legend.frame = TRUE,
            ) +  tm_compass(position = c("left","top")) + tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center")) + tm_scale_bar(position = c("left","bottom"))

You dont need this

tmap_mode("view")

## tmap mode set to interactive viewing

geo1 %>%
  tm_shape () +
  tm_polygons("Pctsh",
              style= "kmeans",
              n=8,
              legend.hist= TRUE) +
  tm_layout(legend.outside= TRUE,
            title= "Insurance rate in Texas \ 2015-2019")

You dont need this

tm_shape(geo1)+
  tm_polygons(c("Pctsh"),
              title=c("% in Insurance rate"),
              palette="Blues",
              style="quantile",
              n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="Texas Insurace rate in 2019- Quantile Breaks",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))

## Compass not supported in view mode.

## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.

#Estimate by Metro Area

# Get estimate, percent and standard error for people that migrate or not by PUMA, taking into account the survey design
met_est_insurany<-svyby( ~I(insurany==1),
                       by = ~met2013,
                       design=des,
                       FUN=svymean,
                       na.rm = TRUE ) %>% 
  clean_names() %>% 
  mutate(Pctsh=round((i_insurany_1_true*100),1),Pctnsh=round((i_insurany_1_false*100),1)) %>% 
  rename(propnsh=i_insurany_1_false,
         propsh=i_insurany_1_true,
         Stderr=se_i_insurany_1_true) %>% 
  select(met2013,Pctsh,Pctnsh,Stderr)

# Get California Boundary File from Tigris and merge the migrate estimates with it .

#geo1<-left_join(pumas, puma_est_insurany, "puma", "puma")

mets<-core_based_statistical_areas(cb = T, year = 2018)
mets<-mets[grep(mets$NAME,pattern =  "TX"),]
mets$met2013<- as.numeric(mets$GEOID)



geo2<-left_join(mets, met_est_insurany, by =c("met2013"= "met2013"))

Use this particular map alone for geo2( metro area)

tmap_mode("plot")

## tmap mode set to plotting

tm_basemap("OpenStreetMap.Mapnik")+
  tm_shape(geo2)+
  tm_polygons("Pctsh",
              style="kmeans",
              title=c("Rate of Insurance"),
              palette="Blues",
              n=5,
              legend.hist = TRUE) +
  tm_layout(legend.outside = TRUE,
            title = "Workers Insurance rate in Texas Metro \n 2015-2019",
            title.size =1.5,
            legend.frame = TRUE,
            ) +  tm_compass(position = c("left","top")) + tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center")) + tm_scale_bar(position = c("left","bottom"))

You dont need this

tmap_mode("view")

## tmap mode set to interactive viewing

geo2 %>%
  tm_shape () +
  tm_polygons("Pctsh",
              style= "kmeans",
              n=8,
              legend.hist= TRUE) +
  tm_layout(legend.outside= TRUE,
            title= "Insurance rate in Texas Metro Area \ 2015-2019")

sts<-states(cb=T,year = 2019)
sts<-sts%>%
  filter(GEOID==48)

Estimates for counties

cos<-counties(cb= T,state = "TX", year =2019)
cos$countyfip <- as.numeric(cos$COUNTYFP)

# Get estimate, percent and standard error for people that migrate or not by PUMA, taking into account the survey design
cos_est_insurany<-svyby( ~I(insurany==1),
                       by = ~countyfip,
                       design=des,
                       FUN=svymean,
                       na.rm = TRUE ) %>% 
  clean_names() %>% 
  mutate(Pctsh=round((i_insurany_1_true*100),1),Pctnsh=round((i_insurany_1_false*100),1)) %>% 
  rename(propnsh=i_insurany_1_false,
         propsh=i_insurany_1_true,
         Stderr=se_i_insurany_1_true) %>% 
  select(countyfip,Pctsh,Pctnsh,Stderr)

# Get California Boundary File from Tigris and merge the migrate estimates with it .

#geo1<-left_join(pumas, puma_est_insurany, "puma", "puma")

geo3<-left_join(cos, cos_est_insurany, by =c("countyfip"= "countyfip"))

Use this particular map alone for geo3( counties)

tmap_mode("plot")

## tmap mode set to plotting

tm_basemap("OpenStreetMap.Mapnik")+
  tm_shape(geo3)+
  tm_polygons("Pctsh",
              style="kmeans",
              title=c("Rate of Insurance"),
              palette="Blues",
              n=5,
              legend.hist = TRUE) +
  tm_layout(legend.outside = TRUE,
            title = "Workers Insurance rate in Texas County \n 2015-2019",
            title.size =1.5,
            legend.frame = TRUE,
            ) +  tm_compass(position = c("left","top")) + tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center")) + tm_scale_bar(position = c("left","bottom"))

You dont neeed this

tmap_mode("view")

## tmap mode set to interactive viewing

geo3 %>%
  tm_shape () +
  tm_polygons("Pctsh",
              style= "kmeans",
              n=8,
              legend.hist= TRUE) +
  tm_layout(legend.outside= TRUE,
            title= "Insurance rate in Texas County Area \ 2015-2019")

DESCRIPTIVE SECTION

without survey

library(table1)

## 
## Attaching package: 'table1'

## The following objects are masked from 'package:base':
## 
##     units, units<-

label(gis2$sex)<-"Sex"
label(gis2$race_eth)<-"Race/Ethnicity"
label(gis2$agecat)<-"Age"
label(gis2$metrostatus)<-"Metropolitan Status"
label(gis2$personalincome)<-"Personal Income"
tbl1<- table1(~ sex + race_eth + agecat + educ_level + metrostatus + personalincome | insurany2, data=gis2)
table1(~ sex + race_eth + agecat + educ_level + metrostatus + personalincome | insurany2, data=gis2)

	Yes (N=471078)	No (N=96409)	Overall (N=567487)
Sex
Female	224661 (47.7%)	39800 (41.3%)	264461 (46.6%)
Male	246417 (52.3%)	56609 (58.7%)	303026 (53.4%)
Race/Ethnicity
NonHispanic_White	261239 (55.5%)	29068 (30.2%)	290307 (51.2%)
Hispanic	130962 (27.8%)	55350 (57.4%)	186312 (32.8%)
NonHispanic_Asian	26992 (5.7%)	2863 (3.0%)	29855 (5.3%)
NonHispanic_Black	42874 (9.1%)	7598 (7.9%)	50472 (8.9%)
NonHispanic_Other	9011 (1.9%)	1530 (1.6%)	10541 (1.9%)
Age
[16,20]	23215 (4.9%)	6648 (6.9%)	29863 (5.3%)
(20,30]	90999 (19.3%)	26245 (27.2%)	117244 (20.7%)
(30,40]	104176 (22.1%)	24236 (25.1%)	128412 (22.6%)
(40,50]	107691 (22.9%)	20542 (21.3%)	128233 (22.6%)
(50,65]	144997 (30.8%)	18738 (19.4%)	163735 (28.9%)
Educational attainment [detailed version]
1_LT/HS	136715 (29.0%)	59401 (61.6%)	196116 (34.6%)
2_somcol	111020 (23.6%)	21765 (22.6%)	132785 (23.4%)
3_AsDeg	39660 (8.4%)	5317 (5.5%)	44977 (7.9%)
4_BA+Grd	183683 (39.0%)	9926 (10.3%)	193609 (34.1%)
Metropolitan Status
indeterminable	61646 (13.1%)	13724 (14.2%)	75370 (13.3%)
Metro	393132 (83.5%)	78002 (80.9%)	471134 (83.0%)
Non-metro	16300 (3.5%)	4683 (4.9%)	20983 (3.7%)
Personal Income
1Less_than_$25,000	137739 (29.2%)	59989 (62.2%)	197728 (34.8%)
2$25,000 - $34,999	57002 (12.1%)	14890 (15.4%)	71892 (12.7%)
3$35,000 - $49,999	71739 (15.2%)	10651 (11.0%)	82390 (14.5%)
4$50,000 - $74,999	92558 (19.6%)	6984 (7.2%)	99542 (17.5%)
5$75,000 - $99,999	43934 (9.3%)	2029 (2.1%)	45963 (8.1%)
6$100,000 - $149,999	39659 (8.4%)	1216 (1.3%)	40875 (7.2%)
7$150,000 - 3$199,999	12812 (2.7%)	321 (0.3%)	13133 (2.3%)
8$200,000 and above	15635 (3.3%)	329 (0.3%)	15964 (2.8%)

##With survey

library(gtsummary)
 des %>% tbl_svysummary(by= insurany, 
                       include=c(sex,race_eth,agecat,educ_level,metrostatus,personalincome),
                       label = list(educ_level~ "Educantion Level",
                                    race_eth~ "Race/Ethnicity" ,
                                   agecat~"Age",
                                   metrostatus~"Metropolitan Status",
                                   personalincome~"Personal Income",
                                  sex~"Sex" ))%>%
  add_p() %>%
add_overall() %>%
  modify_spanning_header(c("stat_1", "stat_2") ~ "**Workers Insurance rate in Texas**") %>% 
as_gt() %>%
  # modify with gt functions
  gt::tab_header("Table 1: Table 1 Characteristics of Workers By Insurance rate, IPUMS, 2015-2019") %>% 
  gt::tab_options(
    table.font.size = "small",
    data_row.padding = gt::px(1))

Table 1: Table 1 Characteristics of Workers By Insurance rate, IPUMS, 2015-2019
Characteristic	Overall, N = 12,680,766¹	Workers Insurance rate in Texas		p-value²
Characteristic	Overall, N = 12,680,766¹	0, N = 2,597,141¹	1, N = 10,083,625¹	p-value²
Sex				<0.001
Female	5,773,196 (46%)	1,025,138 (39%)	4,748,058 (47%)
Male	6,907,570 (54%)	1,572,003 (61%)	5,335,567 (53%)
Race/Ethnicity				<0.001
NonHispanic_White	5,466,905 (43%)	615,436 (24%)	4,851,469 (48%)
Hispanic	4,804,038 (38%)	1,623,234 (63%)	3,180,804 (32%)
NonHispanic_Asian	671,089 (5.3%)	71,183 (2.7%)	599,906 (5.9%)
NonHispanic_Black	1,509,528 (12%)	250,480 (9.6%)	1,259,048 (12%)
NonHispanic_Other	229,206 (1.8%)	36,808 (1.4%)	192,398 (1.9%)
Age				<0.001
[16,20]	708,202 (5.6%)	177,452 (6.8%)	530,750 (5.3%)
(20,30]	3,036,986 (24%)	774,274 (30%)	2,262,712 (22%)
(30,40]	3,085,392 (24%)	690,309 (27%)	2,395,083 (24%)
(40,50]	2,832,379 (22%)	542,522 (21%)	2,289,857 (23%)
(50,65]	3,017,807 (24%)	412,584 (16%)	2,605,223 (26%)
Educantion Level				<0.001
1_LT/HS	4,764,392 (38%)	1,661,002 (64%)	3,103,390 (31%)
2_somcol	3,027,912 (24%)	569,278 (22%)	2,458,634 (24%)
3_AsDeg	976,007 (7.7%)	129,503 (5.0%)	846,504 (8.4%)
4_BA+Grd	3,912,455 (31%)	237,358 (9.1%)	3,675,097 (36%)
Metropolitan Status				<0.001
indeterminable	1,258,503 (9.9%)	270,776 (10%)	987,727 (9.8%)
Metro	10,999,414 (87%)	2,219,791 (85%)	8,779,623 (87%)
Non-metro	422,849 (3.3%)	106,574 (4.1%)	316,275 (3.1%)
Personal Income				<0.001
1Less_than_$25,000	4,653,579 (37%)	1,602,402 (62%)	3,051,177 (30%)
2$25,000 - $34,999	1,729,207 (14%)	418,757 (16%)	1,310,450 (13%)
3$35,000 - $49,999	1,860,388 (15%)	289,178 (11%)	1,571,210 (16%)
4$50,000 - $74,999	2,129,361 (17%)	183,618 (7.1%)	1,945,743 (19%)
5$75,000 - $99,999	949,723 (7.5%)	55,346 (2.1%)	894,377 (8.9%)
6$100,000 - $149,999	813,839 (6.4%)	31,782 (1.2%)	782,057 (7.8%)
7$150,000 - 3$199,999	255,135 (2.0%)	8,713 (0.3%)	246,422 (2.4%)
8$200,000 and above	289,534 (2.3%)	7,345 (0.3%)	282,189 (2.8%)
¹ n (%) ² chi-squared test with Rao & Scott's second-order correction

METRO<-svyby(formula = ~insurany,
           by = ~metrostatus,
           design = des,
           FUN = svymean,
           na.rm=T)

svychisq(~insurany+metrostatus,
         design = des)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~insurany + metrostatus, design = des)
## F = 67.615, ndf = 1.9986e+00, ddf = 7.1103e+05, p-value < 2.2e-16

METRO%>%
   mutate(metro = fct_relevel(metrostatus, 
            "Metro", "Non-metro", "Overweight", "indeterminable")) %>%
  ggplot()+
  geom_point(aes(x=metro,y=insurany))+
  geom_errorbar(aes(x=metro, ymin = insurany-1.96*se, 
                    ymax= insurany+1.96*se),
                width=.30)+
   labs(title = "Percent % of Texas Workers Insured by Metropolitan Area", 
        caption = "Source: IPUMS USA Data, 2015-2019 \n Calculations by Joseph Jaiyeola ",
       x = "Metropolitan Categories",
       y = "%  Insured")+
  theme_minimal()+ theme(
  plot.title = element_text(size= 10, face = "bold"))

## Warning: Unknown levels in `f`: Overweight

RACE<-svyby(formula = ~insurany,
           by = ~race_eth,
           design = des,
           FUN = svymean,
           na.rm=T)

svychisq(~insurany+race_eth,
         design = des)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~insurany + race_eth, design = des)
## F = 3782.9, ndf = 3.9653e+00, ddf = 1.4107e+06, p-value < 2.2e-16

RACE%>%
  #mutate(metro = fct_relevel(race_eth, 
          #  "Metro", "Non-metro", "Overweight", "indeterminable")) %>%
  ggplot()+
  geom_point(aes(x=race_eth,y=insurany))+
  geom_errorbar(aes(x=race_eth, ymin = insurany-1.96*se, 
                    ymax= insurany+1.96*se),
                width=.30)+
   labs(title = "Percent % of Texas Workers Insured by Race/Ethnicity", 
        caption = "Source: IPUMS USA Data, 2015-2019 \n Calculations by Joseph Jaiyeola ",
       x = "Race/Ethnicity Categories",
       y = "%  Insured")+
  theme_minimal()+ theme(
  plot.title = element_text(size= 10, face = "bold"))

crossre<-svyby(formula = ~insurany,
              by = ~race_eth+educ_level,
              design = des,
              FUN = svymean,
              na.rm=T)

crossre%>%
   #mutate(educa = fct_relevel(educa, 
           # "<HS", "HS", ">HS")) %>%
  ggplot()+
  geom_point(aes(x=educ_level, y = insurany))+ 
  geom_errorbar(aes(x=educ_level, ymin = insurany-1.96*se, 
                    ymax=insurany+1.96*se),
                width=.25)+
  facet_wrap(~ race_eth, nrow = 3)+
  labs(title = "Percent % of Texas Workers Insured by Race/Ethnicity and Education", 
        caption = "Source: IPUMS USA Data, 2015-2019 \n Calculations by Joseph Jaiyeola",
       x = "Education",
       y = "%  Insured")+
  theme_minimal()+ theme(
  plot.title = element_text(size= 10, face = "bold"))

newday

Joseph Jaiyeola

4/6/2022

You most likely might not need this section

KINDLY NOTE THAT MY MAIN OUTCOME VARIABLE IS “INSURANY”- THAT IS IF THEY HAVE INSURACE, LOOK HOW I DID CODE IT IN MY LINE 72

Use this particular map alone for geo1(all texas)

You dont need this

You dont need this

Use this particular map alone for geo2( metro area)

You dont need this

Estimates for counties

Use this particular map alone for geo3( counties)

You dont neeed this

DESCRIPTIVE SECTION

without survey