Outcome variable: Depression (deprx), measured as the use of prescription medication
Predictor variables: age, total family income last year (famtotincinterval), marital status (mars), and race and ethnicity (race_eth).
Continuous variables: Age and Total family income last year (famtotincinterval)
### Libraries
library(mgcv, quietly = T) #one library for GAMs
## This is mgcv 1.8-38. For overview type 'help("mgcv-package")'.
library(ggplot2, quietly = T)
## Warning in register(): Can't find generic `scale_type` in package ggplot2 to
## register S3 method.
library(dplyr, quietly = T)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:nlme':
##
## collapse
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(car, quietly = T)
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(emmeans, quietly = T)
library(forcats, quietly = T)
library(stargazer, quietly = T)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey, quietly = T)
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(questionr, quietly = T)
library(forcats, quietly = T)
library(tidyverse, quietly = T)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.6 ✓ purrr 0.3.4
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::collapse() masks nlme::collapse()
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x car::recode() masks dplyr::recode()
## x purrr::some() masks car::some()
## x tidyr::unpack() masks Matrix::unpack()
library(srvyr, quietly = T)
##
## Attaching package: 'srvyr'
## The following object is masked from 'package:stats':
##
## filter
library( gtsummary, quietly = T)
library(caret, quietly = T)
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
## The following object is masked from 'package:survival':
##
## cluster
library(tableone, quietly = T)
library(stargazer, quietly = T)
library(pander, quietly = T)
library(knitr, quietly = T)
library(factoextra, quietly = T)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(FactoMineR, quietly = T)
library(mice, quietly = T)
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(splines, quietly = T)
library(ipumsr, quietly = T)
ddi <- read_ipums_ddi("/Volumes/Jyoti/Stat 2 /PROJECT/nhis_00012.xml")
data <- read_ipums_micro(ddi)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
data<- haven::zap_labels(data)
names(data) <- tolower(gsub(pattern = "_",replacement = "",x = names(data)))
data <- data%>%
filter(age >=18 & age<=81)
# medication for depression
data$deprx <- as.factor(data$deprx)
data$deprx<- car::Recode(data$deprx,
recodes="1=0; 2=1;else=NA",
as.factor=T)
#currently Pregnant
data$pregnantnow<-as.factor(data$pregnantnow)
data$curpreg<-car::Recode(data$pregnantnow,
recodes="0='Yes';else=NA",
as.factor=T)
#Age cut into intervals
data$ageinterval<-cut(data$age, breaks=c(18,29,39,59,79,99))
#income
data$famtotincinterval<-cut(data$famtotinc, breaks=c(0, 50000, 100000,150000,200000,250000))
## marital status
data$mars<- car::Recode(data$marstat,
recodes ="10:13='Married'; 20='Widowed'; 30:40='Divorced/Separated';
; 50='Never Married'; else=NA",
as.factor=T)
#race/ethnicity
data$black<- car::Recode(data$hisprace,
recodes="03=1; 99=NA; else=0")
data$white<- car::Recode(data$hisprace,
recodes="02=1; 99=NA; else=0")
data$other<- car::Recode(data$hisprace,
recodes="4:7=1; 99=NA; else=0")
data$hispanic<- car::Recode(data$hisprace,
recodes="01=1; 99=NA; else=0")
data$hisprace<- as.factor(data$hisprace)
data$race_eth<-car::Recode(data$hisprace,
recodes="01='Hispanic'; 02='NH_White'; 03='NH_Black';04:07='NH_Other'; else=NA",
as.factor = T)
data$race_eth<-relevel(data$race_eth,
ref = "NH_White")
Selecting only needed variables
data2<-data%>%
select(sampweight, strata, age,famtotinc,deprx, mars, race_eth)%>%
filter(complete.cases(.))
# Survey Design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~strata, weights=~sampweight, data = data2 )
des
## Stratified Independent Sampling design (with replacement)
## svydesign(ids = ~1, strata = ~strata, weights = ~sampweight,
## data = data2)
Here is a GAM fit to the data, using linear terms.
datagam1<-gam(deprx~age+ famtotinc+ race_eth+mars,
data=data2,
weights = sampweight/mean(sampweight, na.rm=T),
family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in gam.fit3(x = args$X, y = args$y, sp = lsp, Eb = args$Eb, UrS =
## args$UrS, : Algorithm did not converge
summary(datagam1)
##
## Family: binomial
## Link function: logit
##
## Formula:
## deprx ~ age + famtotinc + race_eth + mars
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.381e+00 6.848e-02 -20.162 < 2e-16 ***
## age 3.602e-03 9.993e-04 3.605 0.000313 ***
## famtotinc -5.011e-06 2.845e-07 -17.617 < 2e-16 ***
## race_ethHispanic -9.105e-01 4.678e-02 -19.461 < 2e-16 ***
## race_ethNH_Black -7.769e-01 5.101e-02 -15.229 < 2e-16 ***
## race_ethNH_Other -9.527e-01 6.662e-02 -14.300 < 2e-16 ***
## marsMarried -4.198e-01 3.963e-02 -10.595 < 2e-16 ***
## marsNever Married -4.129e-01 4.763e-02 -8.670 < 2e-16 ***
## marsWidowed 5.928e-03 6.344e-02 0.093 0.925542
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0.0249 Deviance explained = 3.73%
## UBRE = -0.36747 Scale est. = 1 n = 57842
Here is a GAM fit to the data, using smooth terms for age and total family income last year (famtotincinterval)
datagam2<-gam(deprx ~ s(age)+ s(famtotinc)+race_eth +mars ,
data=data2,
weights = sampweight/mean(sampweight, na.rm=T),
family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(datagam2)
##
## Family: binomial
## Link function: logit
##
## Formula:
## deprx ~ s(age) + s(famtotinc) + race_eth + mars
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.68285 0.03527 -47.716 < 2e-16 ***
## race_ethHispanic -0.95493 0.04710 -20.276 < 2e-16 ***
## race_ethNH_Black -0.84898 0.05150 -16.485 < 2e-16 ***
## race_ethNH_Other -0.98856 0.06685 -14.787 < 2e-16 ***
## marsMarried -0.32442 0.04049 -8.013 1.12e-15 ***
## marsNever Married -0.26373 0.04974 -5.303 1.14e-07 ***
## marsWidowed 0.17623 0.06539 2.695 0.00704 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(age) 8.325 8.873 166.9 <2e-16 ***
## s(famtotinc) 4.973 6.059 502.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.031 Deviance explained = 4.51%
## UBRE = -0.37215 Scale est. = 1 n = 57842
anova( datagam1, datagam2, test="Chisq")
plot(datagam2)