# load packages
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(ggplot2)
library(haven)
library(tableone)
library(gtsummary)
## #BlackLivesMatter
uganda16 <- read_dta("C:/Users/rlutt/Downloads/UGIR7BFL.DTA")
uganda16<-zap_labels(uganda16)
#recodes
#bank account
uganda16$bank<-car::Recode(uganda16$v170, recodes= "0= 'No'; 1= 'Yes'")
#age groups
uganda16$agegroup <- car::Recode(uganda16$v013, recodes = "1='15-19'; 2='20-24';3='25-29'; 4 = '30-34';5='35-39';6='40-44';7='45+' ", as.factor=T)
#fertility preferences
uganda16$wantanotherchild<-ifelse(uganda16$v602!=9&uganda16$v602==1,1,0)
uganda16$fertilitypreference<-car::Recode(uganda16$wantanotherchild, recodes= "0= 'Do Not Want another child'; 1= 'Do Want another child'")
#education level
uganda16$educationlevel <- car::recode(uganda16$v106, 
                            recodes = "0 = 'none'; 1 = 'primary'; 2:3='secondary and above' ",
                            as.factor=T)
#internet
uganda16$internet<-as.factor(uganda16$v171a)
uganda16$internet<-car::Recode(uganda16$v171a, recodes= "0='never'; 1= 'in the past year'; 2='over a year ago'; 3= 'yes, but unsure when'", as.factor=T)

#contraception
uganda16$contraception<-as.factor(uganda16$v313)
uganda16$contraception<-car::Recode(uganda16$v313, recodes= "0='none'; 1='folkloric method'; 2='traditional method' ;3= 'modern method'", as.factor=T)

# survey design variables
uganda16$psu <- uganda16$v021
uganda16$strata <- uganda16$v022
uganda16$pwt <- uganda16$v005/1000000

design<-svydesign(ids = ~ psu, strata = ~ strata, weights =~ pwt, data=uganda16)
#add variables to model one by one (controlling for age & education)
model1<-svyglm(wantanotherchild ~ agegroup + contraception + educationlevel,
            family = binomial,
            design = design)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(model1)
## 
## Call:
## svyglm(formula = wantanotherchild ~ agegroup + contraception + 
##     educationlevel, design = design, family = binomial)
## 
## Survey design:
## svydesign(ids = ~psu, strata = ~strata, weights = ~pwt, data = uganda16)
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        1.41768    0.49672   2.854  0.00445 ** 
## agegroup20-24                     -0.19157    0.08856  -2.163  0.03088 *  
## agegroup25-29                     -1.10112    0.08870 -12.415  < 2e-16 ***
## agegroup30-34                     -2.13288    0.08114 -26.287  < 2e-16 ***
## agegroup35-39                     -3.12327    0.08693 -35.929  < 2e-16 ***
## agegroup40-44                     -4.11390    0.11898 -34.578  < 2e-16 ***
## agegroup45+                       -4.79567    0.15235 -31.477  < 2e-16 ***
## contraceptionmodern method         0.31642    0.47640   0.664  0.50681    
## contraceptionnone                  0.43767    0.47947   0.913  0.36167    
## contraceptiontraditional method    0.85921    0.49697   1.729  0.08430 .  
## educationlevelprimary              0.12149    0.08639   1.406  0.16012    
## educationlevelsecondary and above  0.70304    0.09710   7.241 1.26e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.009267)
## 
## Number of Fisher Scoring iterations: 5
#add variables to model one by one (controlling for age & education)
model2<-svyglm(wantanotherchild ~ agegroup + contraception + internet,
            family = binomial,
            design = design)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(model2)
## 
## Call:
## svyglm(formula = wantanotherchild ~ agegroup + contraception + 
##     internet, design = design, family = binomial)
## 
## Survey design:
## svydesign(ids = ~psu, strata = ~strata, weights = ~pwt, data = uganda16)
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      2.48190    0.53712   4.621 4.61e-06 ***
## agegroup20-24                   -0.18349    0.08710  -2.107   0.0355 *  
## agegroup25-29                   -1.11182    0.08862 -12.546  < 2e-16 ***
## agegroup30-34                   -2.16385    0.08127 -26.626  < 2e-16 ***
## agegroup35-39                   -3.19015    0.08602 -37.088  < 2e-16 ***
## agegroup40-44                   -4.18566    0.11815 -35.426  < 2e-16 ***
## agegroup45+                     -4.86831    0.15250 -31.923  < 2e-16 ***
## contraceptionmodern method       0.38298    0.51353   0.746   0.4561    
## contraceptionnone                0.46845    0.51641   0.907   0.3647    
## contraceptiontraditional method  0.92191    0.53217   1.732   0.0837 .  
## internetnever                   -0.85964    0.13128  -6.548 1.18e-10 ***
## internetover a year ago         -0.55186    0.26588  -2.076   0.0383 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.007343)
## 
## Number of Fisher Scoring iterations: 5
#finalmodel
finalmodel<-svyglm(wantanotherchild ~ agegroup + contraception + bank + internet + educationlevel,
            family = binomial,
            design = design)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(finalmodel)
## 
## Call:
## svyglm(formula = wantanotherchild ~ agegroup + contraception + 
##     bank + internet + educationlevel, design = design, family = binomial)
## 
## Survey design:
## svydesign(ids = ~psu, strata = ~strata, weights = ~pwt, data = uganda16)
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        1.90405    0.52656   3.616 0.000322 ***
## agegroup20-24                     -0.21141    0.08792  -2.404 0.016479 *  
## agegroup25-29                     -1.13566    0.08907 -12.750  < 2e-16 ***
## agegroup30-34                     -2.16686    0.08165 -26.539  < 2e-16 ***
## agegroup35-39                     -3.16834    0.08866 -35.737  < 2e-16 ***
## agegroup40-44                     -4.15785    0.12088 -34.397  < 2e-16 ***
## agegroup45+                       -4.83811    0.15306 -31.609  < 2e-16 ***
## contraceptionmodern method         0.31506    0.48775   0.646 0.518534    
## contraceptionnone                  0.43906    0.49105   0.894 0.371591    
## contraceptiontraditional method    0.82570    0.50911   1.622 0.105324    
## bankYes                            0.19372    0.07471   2.593 0.009726 ** 
## internetnever                     -0.46131    0.13172  -3.502 0.000493 ***
## internetover a year ago           -0.41643    0.25208  -1.652 0.099016 .  
## educationlevelprimary              0.10683    0.08691   1.229 0.219429    
## educationlevelsecondary and above  0.54156    0.09839   5.504 5.35e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.009863)
## 
## Number of Fisher Scoring iterations: 5
## don't forget parameter estimates and odds ratios
library(gtsummary)
model1%>%
  tbl_regression(exponentiate=TRUE)
Characteristic OR1 95% CI1 p-value
agegroup
15-19
20-24 0.83 0.69, 0.98 0.031
25-29 0.33 0.28, 0.40 <0.001
30-34 0.12 0.10, 0.14 <0.001
35-39 0.04 0.04, 0.05 <0.001
40-44 0.02 0.01, 0.02 <0.001
45+ 0.01 0.01, 0.01 <0.001
contraception
folkloric method
modern method 1.37 0.54, 3.50 0.5
none 1.55 0.60, 3.97 0.4
traditional method 2.36 0.89, 6.27 0.084
educationlevel
none
primary 1.13 0.95, 1.34 0.2
secondary and above 2.02 1.67, 2.44 <0.001

1 OR = Odds Ratio, CI = Confidence Interval

model2%>%
  tbl_regression(exponentiate=TRUE)
Characteristic OR1 95% CI1 p-value
agegroup
15-19
20-24 0.83 0.70, 0.99 0.036
25-29 0.33 0.28, 0.39 <0.001
30-34 0.11 0.10, 0.13 <0.001
35-39 0.04 0.03, 0.05 <0.001
40-44 0.02 0.01, 0.02 <0.001
45+ 0.01 0.01, 0.01 <0.001
contraception
folkloric method
modern method 1.47 0.54, 4.02 0.5
none 1.60 0.58, 4.40 0.4
traditional method 2.51 0.88, 7.15 0.084
internet
in the past year
never 0.42 0.33, 0.55 <0.001
over a year ago 0.58 0.34, 0.97 0.038

1 OR = Odds Ratio, CI = Confidence Interval

finalmodel%>%
  tbl_regression(exponentiate=TRUE)
Characteristic OR1 95% CI1 p-value
agegroup
15-19
20-24 0.81 0.68, 0.96 0.016
25-29 0.32 0.27, 0.38 <0.001
30-34 0.11 0.10, 0.13 <0.001
35-39 0.04 0.04, 0.05 <0.001
40-44 0.02 0.01, 0.02 <0.001
45+ 0.01 0.01, 0.01 <0.001
contraception
folkloric method
modern method 1.37 0.53, 3.57 0.5
none 1.55 0.59, 4.07 0.4
traditional method 2.28 0.84, 6.21 0.11
has an account in a bank or other financial institution
No
Yes 1.21 1.05, 1.41 0.010
internet
in the past year
never 0.63 0.49, 0.82 <0.001
over a year ago 0.66 0.40, 1.08 0.10
educationlevel
none
primary 1.11 0.94, 1.32 0.2
secondary and above 1.72 1.42, 2.08 <0.001

1 OR = Odds Ratio, CI = Confidence Interval

#interesting cases
library(emmeans)
grid1<-ref_grid(finalmodel)

specialcases<-emmeans(object = grid1,
              specs = c( "bank",  "internet", "agegroup"),
              type="response" )

knitr::kable(specialcases,  digits = 4)
bank internet agegroup prob SE df asymp.LCL asymp.UCL
No in the past year 15-19 0.9252 0.0134 Inf 0.8943 0.9476
Yes in the past year 15-19 0.9375 0.0116 Inf 0.9105 0.9568
No never 15-19 0.8863 0.0151 Inf 0.8531 0.9128
Yes never 15-19 0.9044 0.0141 Inf 0.8730 0.9287
No over a year ago 15-19 0.8908 0.0265 Inf 0.8270 0.9330
Yes over a year ago 15-19 0.9082 0.0237 Inf 0.8501 0.9453
No in the past year 20-24 0.9092 0.0159 Inf 0.8728 0.9359
Yes in the past year 20-24 0.9240 0.0139 Inf 0.8918 0.9471
No never 20-24 0.8632 0.0168 Inf 0.8269 0.8929
Yes never 20-24 0.8845 0.0160 Inf 0.8493 0.9124
No over a year ago 20-24 0.8684 0.0304 Inf 0.7967 0.9175
Yes over a year ago 20-24 0.8890 0.0275 Inf 0.8228 0.9326
No in the past year 25-29 0.7989 0.0295 Inf 0.7349 0.8506
Yes in the past year 25-29 0.8282 0.0260 Inf 0.7711 0.8734
No never 25-29 0.7147 0.0284 Inf 0.6559 0.7669
Yes never 25-29 0.7525 0.0273 Inf 0.6952 0.8020
No over a year ago 25-29 0.7237 0.0539 Inf 0.6069 0.8163
Yes over a year ago 25-29 0.7607 0.0506 Inf 0.6484 0.8457
No in the past year 30-34 0.5862 0.0434 Inf 0.4994 0.6680
Yes in the past year 30-34 0.6323 0.0419 Inf 0.5470 0.7100
No never 30-34 0.4718 0.0335 Inf 0.4070 0.5375
Yes never 30-34 0.5202 0.0361 Inf 0.4495 0.5900
No over a year ago 30-34 0.4830 0.0660 Inf 0.3576 0.6105
Yes over a year ago 30-34 0.5314 0.0682 Inf 0.3986 0.6598
No in the past year 35-39 0.3423 0.0414 Inf 0.2663 0.4273
Yes in the past year 35-39 0.3871 0.0430 Inf 0.3068 0.4740
No never 35-39 0.2470 0.0262 Inf 0.1994 0.3018
Yes never 35-39 0.2848 0.0297 Inf 0.2303 0.3463
No over a year ago 35-39 0.2555 0.0503 Inf 0.1697 0.3655
Yes over a year ago 35-39 0.2940 0.0564 Inf 0.1965 0.4149
No in the past year 40-44 0.1621 0.0267 Inf 0.1163 0.2214
Yes in the past year 40-44 0.1902 0.0295 Inf 0.1389 0.2548
No never 40-44 0.1087 0.0152 Inf 0.0824 0.1422
Yes never 40-44 0.1289 0.0177 Inf 0.0980 0.1679
No over a year ago 40-44 0.1131 0.0279 Inf 0.0688 0.1804
Yes over a year ago 40-44 0.1341 0.0329 Inf 0.0816 0.2125
No in the past year 45+ 0.0892 0.0185 Inf 0.0590 0.1328
Yes in the past year 45+ 0.1063 0.0215 Inf 0.0710 0.1562
No never 45+ 0.0582 0.0103 Inf 0.0410 0.0819
Yes never 45+ 0.0697 0.0124 Inf 0.0490 0.0983
No over a year ago 45+ 0.0607 0.0172 Inf 0.0345 0.1045
Yes over a year ago 45+ 0.0727 0.0208 Inf 0.0411 0.1254

###Some key takeaways that I am deriving from this analysis are: 1) maybe I should take contraception out of the model as it isn’t significant & 2) Access to internet and a bank are interesting cases to look at together. Next, I should look for more variables that showcase empowerment to add to my analysis. I also need to add whether or not a woman already has children to my analysis and maybe religion, too.