#load libraries
library(haven)
library(gtsummary)
library(MASS)
## Warning: package 'MASS' was built under R version 4.1.3
##
## Attaching package: 'MASS'
## The following object is masked from 'package:gtsummary':
##
## select
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
#read in data
uganda16 <- read_dta("C:/Users/rlutt/Downloads/UGIR7BFL.DTA")
uganda16<-zap_labels(uganda16)
#recodes
#bank account
uganda16$bank<-car::Recode(uganda16$v170, recodes= "0= 'No'; 1= 'Yes'")
#age groups
uganda16$agegroup <- car::Recode(uganda16$v013, recodes = "1='15-19'; 2='20-24';3='25-29'; 4 = '30-34';5='35-39';6='40-44';7='45+' ", as.factor=T)
#fertility preferences
uganda16$wantanotherchild<-ifelse(uganda16$v602!=9&uganda16$v602==1,1,0)
#education level
uganda16$educationlevel <- car::recode(uganda16$v106,
recodes = "0 = 'none'; 1 = 'primary'; 2:3='secondary and above' ",
as.factor=T)
#internet
uganda16$internet<-as.factor(uganda16$v171a)
uganda16$internet<-car::Recode(uganda16$v171a, recodes= "0='never'; 1= 'in the past year'; 2='over a year ago'; 3= 'yes, but unsure when'", as.factor=T)
#contraception
uganda16$contraception<-as.factor(uganda16$v313)
uganda16$contraception<-car::Recode(uganda16$v313, recodes= "0='none'; 1='folkloric method'; 2='traditional method' ;3= 'modern method'", as.factor=T)
#usingmoderncontraception
uganda16$moderncon<-car::Recode(uganda16$v313, recodes= "0='no'; 1='no'; 2='no'; 3='yes'", as.factor=T)
#has another kid
uganda16$currentchildren<-uganda16$v202+uganda16$v203
#regions
uganda16$region<-as.factor(uganda16$v024)
uganda16$regions<-car::Recode(uganda16$v024, recodes= "0='Kampala'; 1= 'South Buganda'; 2='North Buganda';3='Busoga';4='Bukedi'; 5='Bugisu'; 6='Teso';7='Karamoja'; 8='Lango'; 9='Acholi'; 10='West Nile'; 11='Bunyoro';12='Tooro'; 13='Ankole';14='Kigezi'")
# survey design variables
uganda16$psu <- uganda16$v021
uganda16$strata <- uganda16$v022
uganda16$pwt <- uganda16$v005/1000000
desi<-svydesign(ids = ~ psu, strata = ~ strata, weights =~ pwt, data=uganda16)
At what rate are women in Uganda actively using contraception? In other words: how many women per women in child-rearing years are actively using contraception per region?
Yes, the offset term that is necessary in this context is the population of women in Uganda in their child-rearing years (ages 15-49). Offset terms are used in the Poisson model, to convert a count outcome into a rate.
#offset term
#add person weight to get total population
uganda16$pwt <- uganda16$v005/1000000
#count number of respondents and apply weights
nrow(uganda16)
## [1] 18506
uganda16$offset2<-18506*uganda16$pwt
#get weighted # of women
library(survey)
svytable(~uganda16$moderncon+uganda16$regions, design=desi)
## uganda16$regions
## uganda16$moderncon Acholi Ankole Bugisu Bukedi Bunyoro
## no 706.90778 1084.96017 606.68150 836.62567 774.38142
## yes 217.51421 412.69561 313.99111 332.74685 239.49458
## uganda16$regions
## uganda16$moderncon Busoga Kampala Karamoja Kigezi Lango
## no 1273.64691 731.79926 344.00124 510.59635 702.38910
## yes 416.48729 292.95630 20.64405 220.96587 307.28332
## uganda16$regions
## uganda16$moderncon North Buganda South Buganda Teso Tooro West Nile
## no 1322.89924 1715.57354 840.77748 939.49550 1065.27710
## yes 639.85610 778.13304 257.72262 417.34015 182.15685
#filter na
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x dplyr::select() masks MASS::select(), gtsummary::select()
## x tidyr::unpack() masks Matrix::unpack()
uganda16 %>%
drop_na()
## # A tibble: 0 x 4,986
## # ... with 4,986 variables: caseid <chr>, v000 <chr>, v001 <dbl>, v002 <dbl>,
## # v003 <dbl>, v004 <dbl>, v005 <dbl>, v006 <dbl>, v007 <dbl>, v008 <dbl>,
## # v008a <dbl>, v009 <dbl>, v010 <dbl>, v011 <dbl>, v012 <dbl>, v013 <dbl>,
## # v014 <dbl>, v015 <dbl>, v016 <dbl>, v017 <dbl>, v018 <dbl>, v019 <dbl>,
## # v019a <dbl>, v020 <dbl>, v021 <dbl>, v022 <dbl>, v023 <dbl>, v024 <dbl>,
## # v025 <dbl>, v026 <dbl>, v027 <dbl>, v028 <dbl>, v029 <dbl>, v030 <dbl>,
## # v031 <dbl>, v032 <dbl>, v034 <dbl>, v040 <dbl>, v042 <dbl>, v044 <dbl>, ...
options(na.action = "na.omit")
#Here I use the original variables because the model will not run with factor varibles, which I how I recoded them
#poisson model
glmp_s<-glm(v313 ~offset(log(offset2))+factor(regions), data=uganda16, family=poisson)
#check dispersion
1-pchisq(glmp_s$deviance, df=glmp_s$df.residual)
## [1] 0
#The p value is 0 which means this model does not fit the data.
#3) Consider a Negative binomial model
#negative binomial
init.coef = c(1, 14)
glmb<-glm.nb(v313 ~offset(log(offset2))+ factor(regions) + factor(agegroup) +currentchildren, data=uganda16)
glmb%>%
tbl_regression(exp=T)
Characteristic | IRR1 | 95% CI1 | p-value |
---|---|---|---|
factor(regions) | |||
Acholi | — | — | |
Ankole | 0.84 | 0.70, 1.01 | 0.064 |
Bugisu | 1.49 | 1.22, 1.82 | <0.001 |
Bukedi | 1.16 | 0.97, 1.40 | 0.11 |
Bunyoro | 1.23 | 1.02, 1.48 | 0.030 |
Busoga | 1.51 | 1.25, 1.82 | <0.001 |
Kampala | 1.72 | 1.43, 2.07 | <0.001 |
Karamoja | 0.49 | 0.38, 0.64 | <0.001 |
Kigezi | 1.43 | 1.17, 1.74 | <0.001 |
Lango | 1.36 | 1.13, 1.64 | <0.001 |
North Buganda | 1.35 | 1.12, 1.62 | <0.001 |
South Buganda | 1.23 | 1.03, 1.48 | 0.017 |
Teso | 1.10 | 0.91, 1.32 | 0.3 |
Tooro | 1.17 | 0.97, 1.40 | 0.095 |
West Nile | 0.54 | 0.45, 0.66 | <0.001 |
factor(agegroup) | |||
15-19 | — | — | |
20-24 | 3.26 | 2.91, 3.65 | <0.001 |
25-29 | 3.78 | 3.35, 4.28 | <0.001 |
30-34 | 3.73 | 3.27, 4.27 | <0.001 |
35-39 | 3.37 | 2.91, 3.91 | <0.001 |
40-44 | 3.31 | 2.84, 3.86 | <0.001 |
45+ | 1.86 | 1.57, 2.21 | <0.001 |
sons at home | 1.11 | 1.08, 1.13 | <0.001 |
1
IRR = Incidence Rate Ratio, CI = Confidence Interval
|
#4) Compare the model fits of the alternative models using AIC
#calculate AICs
AIC(glmp_s, glmb)
## df AIC
## glmp_s 15 57900.07
## glmb 23 44674.72
#In summary, the negative binomial is a better choice because of a lower AIC.