#load libraries
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(ggplot2)
library(haven)
library(tableone)
#load data
library(haven)
uganda16 <- read_dta("C:/Users/rlutt/Downloads/UGIR7BFL.DTA")
uganda16<-zap_labels(uganda16)

###Research Question: What factors affect whether a woman in Uganda will give zero to many births?

###Define an ordinal or multinomial outcome variable of your choosing and define how you will recode the original variable.

###The outcome variable of interest I am going to analyze is the amount of children a woman already has given birth to. To use a ordinal probability model, I will recode the amount of children into ‘none’, ‘1-3’ , and ‘3-5’ and (6:18) to make it into a scale (0-4: meaning no births–> a few–> a lot –> many)

###The independent variables I will test in the model are education level, marital status, and contraceptive use

#recodes to set up variables of interest

#outcome- how many children a woman has on a scale
uganda16$children<-car::recode(uganda16$v201, recodes= "0=0; 1:3=1; 3:5=2; 6:18=3; else=NA", as.factor=T)

#IVs

#education level
uganda16$educationlevel <- car::recode(uganda16$v106, 
                            recodes = "0 = 'none'; 1 = 'primary'; 2:3='secondary and above' ",
                            as.factor=T)

#marital status
uganda16$married<-car::Recode(uganda16$v501, recodes= "0='no'; 1='married'; 2='co-habiting' ;3= 'widowed'; 4='divorced'; 5= 'separated'", as.factor=T)

#contraception
uganda16$contraception<-as.factor(uganda16$v313)
uganda16$contraception<-car::Recode(uganda16$v313, recodes= "0='none'; 1='folkloric method'; 2='traditional method' ;3= 'modern method'", as.factor=T)


# survey design variables
uganda16$psu <- uganda16$v021
uganda16$strata <- uganda16$v022
uganda16$pwt <- uganda16$v005/1000000



uganda16$children<-relevel(uganda16$children, ref= "0")
uganda16$childrenb<-car::recode(uganda16$v201, recodes= "0=0; 1:3=1; 3:5=2; 6:18=3; else=NA", as.factor=F)

options(survey.lonely.psu = "adjust")

desi<-svydesign(ids = ~ psu, strata = ~ strata, weights =~ pwt, data=uganda16)

###Describe the results of your model, and present output from the model in terms of odds ratios and confidence intervals for all model parameters in a table. ### The regression results

library(gtsummary)

# nested model for the health outcome
fit.solr1<-svyolr(uganda16$children~married+contraception+ educationlevel,
                  design = desi)
fit.solr1%>%
  tbl_regression()
Characteristic log(OR)1 95% CI1
married
co-habiting
divorced 0.94 0.57, 1.3
married 0.71 0.63, 0.79
no -4.0 -4.1, -3.8
separated 0.12 0.01, 0.23
widowed 1.4 1.2, 1.6
contraception
folkloric method
modern method -1.1 -1.8, -0.52
none -1.8 -2.4, -1.1
traditional method -1.2 -1.9, -0.59
educationlevel
none
primary -1.3 -1.4, -1.1
secondary and above -2.2 -2.3, -2.0

1 OR = Odds Ratio, CI = Confidence Interval

###If you use the ordinal model, are the assumptions of the proportional odds model met? How did you evaluate the proportional odds assumption?

###An assumption of the proportional odds model is that each predictor variable affects the probability of the outcome in the same way, meaning all the betas are the same.

#check for assumption of proportional odds

ex1<-svyglm(I(childrenb >0)~married+ contraception+ educationlevel,
            design = desi,
            family="binomial", rescale=TRUE)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
ex2<-svyglm(I(childrenb >1)~married+contraception+ educationlevel,
            design = desi,
            family="binomial", rescale=TRUE)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
ex3<-svyglm(I(childrenb >2)~married+contraception+ educationlevel,
            design = desi,
            family="binomial", rescale=TRUE)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
ex4<-svyglm(I(childrenb>3)~married+contraception+ educationlevel,
            design = desi,
            family="binomial", rescale=TRUE)
## Warning: glm.fit: algorithm did not converge
coefs<-data.frame(parms =c( coefficients(ex1)[-1],
                            coefficients(ex2)[-1],
                            coefficients (ex3)[-1],
                            coefficients (ex4)[-1]),
                  beta = rep(names(coefficients(ex1))[-1]), 
                  mod = c(rep("I(childrenb>0)"),
                        rep("I(childrenb>1)"),
                        rep("I(childrenb>2"),
                        rep("I(childrenb>3)")))
coefs%>%
  
  ggplot()+
  geom_point(aes(x=beta, y=parms, color=mod))+
  theme(axis.text.x = element_text(angle = 45))+
  labs(title = "Graphical Summary of Proportional Odds Assumption",
       x= "Beta",
       y= "Estimate")

###using this visual I can identify that the betas do not meet the proportional odds assumptions, therefore it would be better to use a different model that allows for different effect sizes for each predictor variable.

#present odds ratios
round(exp(rbind(coef(ex1)[-1],
                coef(ex2)[-1], coef(ex3)[-1], coef(ex4)[-1])),4)
##      marrieddivorced marriedmarried marriedno marriedseparated marriedwidowed
## [1,]         14.8872         2.4388    0.0212           2.4794        28.0073
## [2,]          2.0997         1.9568    0.0255           0.9961         4.4266
## [3,]          2.4643         2.0274    0.0306           1.0050         2.9330
## [4,]          1.0070         1.0323    1.0203           0.9810         1.0041
##      contraceptionmodern method contraceptionnone
## [1,]                     0.0000            0.0000
## [2,]                     0.2820            0.1695
## [3,]                     0.2830            0.2102
## [4,]                     0.9956            1.0053
##      contraceptiontraditional method educationlevelprimary
## [1,]                          0.0000                0.3141
## [2,]                          0.2578                0.2940
## [3,]                          0.3468                0.3228
## [4,]                          0.9705                0.9709
##      educationlevelsecondary and above
## [1,]                            0.2200
## [2,]                            0.0953
## [3,]                            0.0633
## [4,]                            0.9151