setwd("~/Downloads")
library(survey)
library(tidyverse)
library(dplyr)
library(car)
library(ggplot2)
library(tigris)
library(classInt)
library(mapview)
library(srvyr)
library(tmap)
library(classInt)
library(janitor)
library(ipumsr)
ddi <- read_ipums_ddi("usa_00007.xml")
gis <- read_ipums_micro(ddi)
## Use of data from IPUMS USA is subject to conditions including that users should
## cite the data appropriately. Use command `ipums_conditions()` for more details.
gis<- haven::zap_labels(gis)
names(gis) <- tolower(gsub(pattern = "_",replacement =  "",x =  names(gis)))
#weight variables
gis$pwt <- gis$perwt
gis$hwt <- gis$hhwt

gis$hisp <- Recode(gis$hispan, recodes = "9=NA; 1:4='Hispanic'; 0='NonHispanic'")
gis$race_rec <- Recode(gis$race, recodes = "1='White'; 2='Black'; 3='Other'; 4:6='Asian'; 7:9='Other'")
gis$race_eth <- interaction(gis$hisp, gis$race_rec, sep = "_")
gis$race_eth  <- as.factor(ifelse(substr(as.character(gis$race_eth),1,8) == "Hispanic", "Hispanic", as.character(gis$race_eth)))
gis$race_eth <- relevel(gis$race_eth, ref = "NonHispanic_White")

gis$male <- ifelse(gis$sex == 1,1,0)


gis$sex <- Recode(gis$sex, recodes = "1='Male'; 2='Female';else=NA")

gis$insuremp<- ifelse(gis$hinsemp == 2,1,0)

gis$insurany<- ifelse(gis$hcovany == 2,1,0)

gis$insurany2<-  Recode(gis$insurany, recodes = "1='Yes';0='No'; else=NA")

gis$insurany2<- as.factor(gis$insurany2)
gis$insurany2<-relevel(gis$insurany2, ref='Yes')


gis$educ_level<- Recode(gis$educd, recodes = "2:61='1_LT/HS';62:64='1_LT/HS';65:80='2_somcol';90:100='2_somcol'; 81:83='3_AsDeg';101='4_BA+Grd'; 110:116='4_BA+Grd'; else=NA")


#gis$educ_level<- Recode(gis$educd, recodes = "2:61='0LT_HS';62:64='1_HSD/GED';65:80='2_somecoll';90:100='2_somecoll'; 81:83='3_AssocDegree';101='4_bachelordegree'; 110:116='4_BAplus_GradDegree'; else=NA")

#data$employed <- Recode(data$wrklstwk, recodes = "1=0;2=1; else=NA")

gis$metrostatus <- Recode(gis$metro, recodes = "0='indeterminable'; 2:4='Metro'; 1='Non-metro'")

gis$familyincome<-car::Recode(gis$ftotinc, recodes = "-22905:0= '0Loss'; 1:24999= '1Less_than_$25,000'; 25000:34999 = '2$25,000 - $34,999 '; 35000:49999 = '3$35,000 - $49,999 '; 50000:74999 = '4$50,000 - $74,999  '; 75000:99999= '5$75,000 - $99,999 '; 100000:149999= '6$100,000 - $149,999 '; 150000:199999= '7$150,000 - 3$199,999 '; 200000:9999997= '8$200,000 and above ';else=NA")


gis$personalincome<-car::Recode(gis$incwage, recodes = "-1:24999= '1Less_than_$25,000'; 25000:34999 = '2$25,000 - $34,999 '; 35000:49999 = '3$35,000 - $49,999 '; 50000:74999 = '4$50,000 - $74,999  '; 75000:99999= '5$75,000 - $99,999 '; 100000:149999= '6$100,000 - $149,999 '; 150000:199999= '7$150,000 - 3$199,999 '; 200000:999996= '8$200,000 and above ';else=NA")



#data$ind_group<-Recode(data$ind, recodes = "170:490='ag_extract'; 770='construction'; 1070:3990='manufac'; 4070:5790='whole_retail'; 6070:6390='trans'; 6470:6780='information'; 6870:7190= 'fire'; 7270=7790='prof/sci/manage'; 7860:8470='edu/social'; 8560:8690='arts'; 8770:9290='other'; 9370:9590='public_adm'; 9670:9870='military'; else=NA ")

#data$proftech <- Recode(data$ind, recodes = "7270:7490=1; 0=NA; else=0")

gis$agecat<-cut(gis$age, breaks = c(16, 20, 30, 40, 50, 65), include.lowest = T)


#data$edu_scal_inc <- ave(data$incwage, data$male, FUN = scale)

You most likely might not need this section

KINDLY NOTE THAT MY MAIN OUTCOME VARIABLE IS “INSURANY”- THAT IS IF THEY HAVE INSURACE, LOOK HOW I DID CODE IT IN MY LINE 72

gis2 <- gis%>%
filter(age >=16 & age<=64)

gis2 <- gis2%>%
filter(empstat ==1)

gis2 <- gis2%>%
filter(statefip ==48)
des<-svydesign(ids = ~cluster,
               strata = ~strata,
               weights = ~pwt,
               data = gis2)

#Estimate by Texas

# Get estimate, percent and standard error for people that migrate or not by PUMA, taking into account the survey design
puma_est_insurany<-svyby( ~I(insurany==1),
                       by = ~puma,
                       design=des,
                       FUN=svymean,
                       na.rm = TRUE ) %>% 
  clean_names() %>% 
  mutate(Pctsh=round((i_insurany_1_true*100),1),Pctnsh=round((i_insurany_1_false*100),1)) %>% 
  rename(propnsh=i_insurany_1_false,
         propsh=i_insurany_1_true,
         Stderr=se_i_insurany_1_true) %>% 
  select(puma,Pctsh,Pctnsh,Stderr)
pumas<-pumas(state = "TX",
             year = 2019,
             cb = T) %>% 
  mutate(puma=as.numeric(PUMACE10))

#geo1<-left_join(pumas, puma_est_insurany, "puma", "puma")
geo1<-left_join(pumas, puma_est_insurany, by =c("puma"= "puma"))

Use this particular map alone for geo1(all texas)

tmap_mode("plot")
## tmap mode set to plotting
tm_basemap("OpenStreetMap.Mapnik")+
  tm_shape(geo1)+
  tm_polygons("Pctsh",
              style="kmeans",
              title=c("Rate of Insurance"),
              palette="Blues",
              n=5,
              legend.hist = TRUE) +
  tm_layout(legend.outside = TRUE,
            title = "Workers Insurance rate in Texas \n 2015-2019",
            title.size =1.5,
            legend.frame = TRUE,
            ) +  tm_compass(position = c("left","top")) + tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center")) + tm_scale_bar(position = c("left","bottom"))

You dont need this

tmap_mode("view")
## tmap mode set to interactive viewing
geo1 %>%
  tm_shape () +
  tm_polygons("Pctsh",
              style= "kmeans",
              n=8,
              legend.hist= TRUE) +
  tm_layout(legend.outside= TRUE,
            title= "Insurance rate in Texas \ 2015-2019")

You dont need this

tm_shape(geo1)+
  tm_polygons(c("Pctsh"),
              title=c("% in Insurance rate"),
              palette="Blues",
              style="quantile",
              n=5)+
  #tm_format("World", legend.outside=T, title.size =4)+
  tm_scale_bar()+
  tm_layout(title="Texas Insurace rate in 2019- Quantile Breaks",
            title.size =1.5,
            legend.frame = TRUE,
            title.position = c('right', 'top'))+
  tm_compass()+
  tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center"))
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.

#Estimate by Metro Area

# Get estimate, percent and standard error for people that migrate or not by PUMA, taking into account the survey design
met_est_insurany<-svyby( ~I(insurany==1),
                       by = ~met2013,
                       design=des,
                       FUN=svymean,
                       na.rm = TRUE ) %>% 
  clean_names() %>% 
  mutate(Pctsh=round((i_insurany_1_true*100),1),Pctnsh=round((i_insurany_1_false*100),1)) %>% 
  rename(propnsh=i_insurany_1_false,
         propsh=i_insurany_1_true,
         Stderr=se_i_insurany_1_true) %>% 
  select(met2013,Pctsh,Pctnsh,Stderr)

# Get California Boundary File from Tigris and merge the migrate estimates with it .

#geo1<-left_join(pumas, puma_est_insurany, "puma", "puma")
mets<-core_based_statistical_areas(cb = T, year = 2018)
mets<-mets[grep(mets$NAME,pattern =  "TX"),]
mets$met2013<- as.numeric(mets$GEOID)



geo2<-left_join(mets, met_est_insurany, by =c("met2013"= "met2013"))

Use this particular map alone for geo2( metro area)

tmap_mode("plot")
## tmap mode set to plotting
tm_basemap("OpenStreetMap.Mapnik")+
  tm_shape(geo2)+
  tm_polygons("Pctsh",
              style="kmeans",
              title=c("Rate of Insurance"),
              palette="Blues",
              n=5,
              legend.hist = TRUE) +
  tm_layout(legend.outside = TRUE,
            title = "Workers Insurance rate in Texas Metro \n 2015-2019",
            title.size =1.5,
            legend.frame = TRUE,
            ) +  tm_compass(position = c("left","top")) + tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center")) + tm_scale_bar(position = c("left","bottom"))

You dont need this

tmap_mode("view")
## tmap mode set to interactive viewing
geo2 %>%
  tm_shape () +
  tm_polygons("Pctsh",
              style= "kmeans",
              n=8,
              legend.hist= TRUE) +
  tm_layout(legend.outside= TRUE,
            title= "Insurance rate in Texas Metro Area \ 2015-2019")
sts<-states(cb=T,year = 2019)
sts<-sts%>%
  filter(GEOID==48)

Estimates for counties

cos<-counties(cb= T,state = "TX", year =2019)
cos$countyfip <- as.numeric(cos$COUNTYFP)
# Get estimate, percent and standard error for people that migrate or not by PUMA, taking into account the survey design
cos_est_insurany<-svyby( ~I(insurany==1),
                       by = ~countyfip,
                       design=des,
                       FUN=svymean,
                       na.rm = TRUE ) %>% 
  clean_names() %>% 
  mutate(Pctsh=round((i_insurany_1_true*100),1),Pctnsh=round((i_insurany_1_false*100),1)) %>% 
  rename(propnsh=i_insurany_1_false,
         propsh=i_insurany_1_true,
         Stderr=se_i_insurany_1_true) %>% 
  select(countyfip,Pctsh,Pctnsh,Stderr)

# Get California Boundary File from Tigris and merge the migrate estimates with it .

#geo1<-left_join(pumas, puma_est_insurany, "puma", "puma")
geo3<-left_join(cos, cos_est_insurany, by =c("countyfip"= "countyfip"))

Use this particular map alone for geo3( counties)

tmap_mode("plot")
## tmap mode set to plotting
tm_basemap("OpenStreetMap.Mapnik")+
  tm_shape(geo3)+
  tm_polygons("Pctsh",
              style="kmeans",
              title=c("Rate of Insurance"),
              palette="Blues",
              n=5,
              legend.hist = TRUE) +
  tm_layout(legend.outside = TRUE,
            title = "Workers Insurance rate in Texas County \n 2015-2019",
            title.size =1.5,
            legend.frame = TRUE,
            ) +  tm_compass(position = c("left","top")) + tm_format("World",
            legend.position =  c("left", "bottom"),
            main.title.position =c("center")) + tm_scale_bar(position = c("left","bottom"))

You dont neeed this

tmap_mode("view")
## tmap mode set to interactive viewing
geo3 %>%
  tm_shape () +
  tm_polygons("Pctsh",
              style= "kmeans",
              n=8,
              legend.hist= TRUE) +
  tm_layout(legend.outside= TRUE,
            title= "Insurance rate in Texas County Area \ 2015-2019")

DESCRIPTIVE SECTION

without survey

library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
label(gis2$sex)<-"Sex"
label(gis2$race_eth)<-"Race/Ethnicity"
label(gis2$agecat)<-"Age"
label(gis2$metrostatus)<-"Metropolitan Status"
label(gis2$personalincome)<-"Personal Income"
tbl1<- table1(~ sex + race_eth + agecat + educ_level + metrostatus + personalincome | insurany2, data=gis2)
table1(~ sex + race_eth + agecat + educ_level + metrostatus + personalincome | insurany2, data=gis2)
Yes
(N=471078)
No
(N=96409)
Overall
(N=567487)
Sex
Female 224661 (47.7%) 39800 (41.3%) 264461 (46.6%)
Male 246417 (52.3%) 56609 (58.7%) 303026 (53.4%)
Race/Ethnicity
NonHispanic_White 261239 (55.5%) 29068 (30.2%) 290307 (51.2%)
Hispanic 130962 (27.8%) 55350 (57.4%) 186312 (32.8%)
NonHispanic_Asian 26992 (5.7%) 2863 (3.0%) 29855 (5.3%)
NonHispanic_Black 42874 (9.1%) 7598 (7.9%) 50472 (8.9%)
NonHispanic_Other 9011 (1.9%) 1530 (1.6%) 10541 (1.9%)
Age
[16,20] 23215 (4.9%) 6648 (6.9%) 29863 (5.3%)
(20,30] 90999 (19.3%) 26245 (27.2%) 117244 (20.7%)
(30,40] 104176 (22.1%) 24236 (25.1%) 128412 (22.6%)
(40,50] 107691 (22.9%) 20542 (21.3%) 128233 (22.6%)
(50,65] 144997 (30.8%) 18738 (19.4%) 163735 (28.9%)
Educational attainment [detailed version]
1_LT/HS 136715 (29.0%) 59401 (61.6%) 196116 (34.6%)
2_somcol 111020 (23.6%) 21765 (22.6%) 132785 (23.4%)
3_AsDeg 39660 (8.4%) 5317 (5.5%) 44977 (7.9%)
4_BA+Grd 183683 (39.0%) 9926 (10.3%) 193609 (34.1%)
Metropolitan Status
indeterminable 61646 (13.1%) 13724 (14.2%) 75370 (13.3%)
Metro 393132 (83.5%) 78002 (80.9%) 471134 (83.0%)
Non-metro 16300 (3.5%) 4683 (4.9%) 20983 (3.7%)
Personal Income
1Less_than_$25,000 137739 (29.2%) 59989 (62.2%) 197728 (34.8%)
2$25,000 - $34,999 57002 (12.1%) 14890 (15.4%) 71892 (12.7%)
3$35,000 - $49,999 71739 (15.2%) 10651 (11.0%) 82390 (14.5%)
4$50,000 - $74,999 92558 (19.6%) 6984 (7.2%) 99542 (17.5%)
5$75,000 - $99,999 43934 (9.3%) 2029 (2.1%) 45963 (8.1%)
6$100,000 - $149,999 39659 (8.4%) 1216 (1.3%) 40875 (7.2%)
7$150,000 - 3$199,999 12812 (2.7%) 321 (0.3%) 13133 (2.3%)
8$200,000 and above 15635 (3.3%) 329 (0.3%) 15964 (2.8%)

##With survey

library(gtsummary)
 des %>% tbl_svysummary(by= insurany, 
                       include=c(sex,race_eth,agecat,educ_level,metrostatus,personalincome),
                       label = list(educ_level~ "Educantion Level",
                                    race_eth~ "Race/Ethnicity" ,
                                   agecat~"Age",
                                   metrostatus~"Metropolitan Status",
                                   personalincome~"Personal Income",
                                  sex~"Sex" ))%>%
  add_p() %>%
add_overall() %>%
  modify_spanning_header(c("stat_1", "stat_2") ~ "**Workers Insurance rate in Texas**") %>% 
as_gt() %>%
  # modify with gt functions
  gt::tab_header("Table 1: Table 1 Characteristics of Workers By Insurance rate, IPUMS, 2015-2019") %>% 
  gt::tab_options(
    table.font.size = "small",
    data_row.padding = gt::px(1))
Table 1: Table 1 Characteristics of Workers By Insurance rate, IPUMS, 2015-2019
Characteristic Overall, N = 12,680,7661 Workers Insurance rate in Texas p-value2
0, N = 2,597,1411 1, N = 10,083,6251
Sex <0.001
Female 5,773,196 (46%) 1,025,138 (39%) 4,748,058 (47%)
Male 6,907,570 (54%) 1,572,003 (61%) 5,335,567 (53%)
Race/Ethnicity <0.001
NonHispanic_White 5,466,905 (43%) 615,436 (24%) 4,851,469 (48%)
Hispanic 4,804,038 (38%) 1,623,234 (63%) 3,180,804 (32%)
NonHispanic_Asian 671,089 (5.3%) 71,183 (2.7%) 599,906 (5.9%)
NonHispanic_Black 1,509,528 (12%) 250,480 (9.6%) 1,259,048 (12%)
NonHispanic_Other 229,206 (1.8%) 36,808 (1.4%) 192,398 (1.9%)
Age <0.001
[16,20] 708,202 (5.6%) 177,452 (6.8%) 530,750 (5.3%)
(20,30] 3,036,986 (24%) 774,274 (30%) 2,262,712 (22%)
(30,40] 3,085,392 (24%) 690,309 (27%) 2,395,083 (24%)
(40,50] 2,832,379 (22%) 542,522 (21%) 2,289,857 (23%)
(50,65] 3,017,807 (24%) 412,584 (16%) 2,605,223 (26%)
Educantion Level <0.001
1_LT/HS 4,764,392 (38%) 1,661,002 (64%) 3,103,390 (31%)
2_somcol 3,027,912 (24%) 569,278 (22%) 2,458,634 (24%)
3_AsDeg 976,007 (7.7%) 129,503 (5.0%) 846,504 (8.4%)
4_BA+Grd 3,912,455 (31%) 237,358 (9.1%) 3,675,097 (36%)
Metropolitan Status <0.001
indeterminable 1,258,503 (9.9%) 270,776 (10%) 987,727 (9.8%)
Metro 10,999,414 (87%) 2,219,791 (85%) 8,779,623 (87%)
Non-metro 422,849 (3.3%) 106,574 (4.1%) 316,275 (3.1%)
Personal Income <0.001
1Less_than_$25,000 4,653,579 (37%) 1,602,402 (62%) 3,051,177 (30%)
2$25,000 - $34,999 1,729,207 (14%) 418,757 (16%) 1,310,450 (13%)
3$35,000 - $49,999 1,860,388 (15%) 289,178 (11%) 1,571,210 (16%)
4$50,000 - $74,999 2,129,361 (17%) 183,618 (7.1%) 1,945,743 (19%)
5$75,000 - $99,999 949,723 (7.5%) 55,346 (2.1%) 894,377 (8.9%)
6$100,000 - $149,999 813,839 (6.4%) 31,782 (1.2%) 782,057 (7.8%)
7$150,000 - 3$199,999 255,135 (2.0%) 8,713 (0.3%) 246,422 (2.4%)
8$200,000 and above 289,534 (2.3%) 7,345 (0.3%) 282,189 (2.8%)

1 n (%)

2 chi-squared test with Rao & Scott's second-order correction

METRO<-svyby(formula = ~insurany,
           by = ~metrostatus,
           design = des,
           FUN = svymean,
           na.rm=T)

svychisq(~insurany+metrostatus,
         design = des)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~insurany + metrostatus, design = des)
## F = 67.615, ndf = 1.9986e+00, ddf = 7.1103e+05, p-value < 2.2e-16
METRO%>%
   mutate(metro = fct_relevel(metrostatus, 
            "Metro", "Non-metro", "Overweight", "indeterminable")) %>%
  ggplot()+
  geom_point(aes(x=metro,y=insurany))+
  geom_errorbar(aes(x=metro, ymin = insurany-1.96*se, 
                    ymax= insurany+1.96*se),
                width=.30)+
   labs(title = "Percent % of Texas Workers Insured by Metropolitan Area", 
        caption = "Source: IPUMS USA Data, 2015-2019 \n Calculations by Joseph Jaiyeola ",
       x = "Metropolitan Categories",
       y = "%  Insured")+
  theme_minimal()+ theme(
  plot.title = element_text(size= 10, face = "bold"))
## Warning: Unknown levels in `f`: Overweight

RACE<-svyby(formula = ~insurany,
           by = ~race_eth,
           design = des,
           FUN = svymean,
           na.rm=T)

svychisq(~insurany+race_eth,
         design = des)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~insurany + race_eth, design = des)
## F = 3782.9, ndf = 3.9653e+00, ddf = 1.4107e+06, p-value < 2.2e-16
RACE%>%
  #mutate(metro = fct_relevel(race_eth, 
          #  "Metro", "Non-metro", "Overweight", "indeterminable")) %>%
  ggplot()+
  geom_point(aes(x=race_eth,y=insurany))+
  geom_errorbar(aes(x=race_eth, ymin = insurany-1.96*se, 
                    ymax= insurany+1.96*se),
                width=.30)+
   labs(title = "Percent % of Texas Workers Insured by Race/Ethnicity", 
        caption = "Source: IPUMS USA Data, 2015-2019 \n Calculations by Joseph Jaiyeola ",
       x = "Race/Ethnicity Categories",
       y = "%  Insured")+
  theme_minimal()+ theme(
  plot.title = element_text(size= 10, face = "bold"))

crossre<-svyby(formula = ~insurany,
              by = ~race_eth+educ_level,
              design = des,
              FUN = svymean,
              na.rm=T)

crossre%>%
   #mutate(educa = fct_relevel(educa, 
           # "<HS", "HS", ">HS")) %>%
  ggplot()+
  geom_point(aes(x=educ_level, y = insurany))+ 
  geom_errorbar(aes(x=educ_level, ymin = insurany-1.96*se, 
                    ymax=insurany+1.96*se),
                width=.25)+
  facet_wrap(~ race_eth, nrow = 3)+
  labs(title = "Percent % of Texas Workers Insured by Race/Ethnicity and Education", 
        caption = "Source: IPUMS USA Data, 2015-2019 \n Calculations by Joseph Jaiyeola",
       x = "Education",
       y = "%  Insured")+
  theme_minimal()+ theme(
  plot.title = element_text(size= 10, face = "bold"))