library(splines)
library(mgcv)
## Loading required package: nlme
## This is mgcv 1.8-33. For overview type 'help("mgcv-package")'.
library(foreign)
library(tidyverse)
## -- Attaching packages ------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.3
## v tidyr 1.1.1 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts --------------------------------------------------- tidyverse_conflicts() --
## x dplyr::collapse() masks nlme::collapse()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
Load in NHANES data
load("C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7283 Stats 2/NHANES data/nhanes_10_18_merged.Rdata")
# The outcome variable used will be a1c levels of diabetes. The continuous predictor will be age and bmi.
library(mgcv) #one library for GAMs
nhanes_all$diab2 <- Recode(nhanes_all$diab, recodes= "'yes'=1; 'no'=0")
nhns_gam <- gam(diab2~ridageyr+bmxbmi+sex+race_eth+educ,
data=nhanes_all,
weights=weight/mean(weight, na.rm=T),
family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(nhns_gam)
##
## Family: binomial
## Link function: logit
##
## Formula:
## diab2 ~ ridageyr + bmxbmi + sex + race_eth + educ
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.498970 0.258785 -32.842 < 2e-16 ***
## ridageyr 0.057154 0.002456 23.267 < 2e-16 ***
## bmxbmi 0.093525 0.004511 20.734 < 2e-16 ***
## sexMale 0.439629 0.070168 6.265 3.72e-10 ***
## race_eth2 hispanic 0.684258 0.101269 6.757 1.41e-11 ***
## race_eth3 black 0.667812 0.103956 6.424 1.33e-10 ***
## race_eth4 other 1.019279 0.137630 7.406 1.30e-13 ***
## educ2 hs -0.199424 0.103231 -1.932 0.05338 .
## educ3 some col -0.329817 0.100149 -3.293 0.00099 ***
## educ4 col grad -0.587749 0.108780 -5.403 6.55e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0.0913 Deviance explained = 16.2%
## UBRE = -0.474 Scale est. = 1 n = 11335
nhns_gam2 <- gam(diab2~s(ridageyr)+s(bmxbmi)+sex+race_eth+educ,
data=nhanes_all,
weights=weight/mean(weight, na.rm=T),
family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(nhns_gam2)
##
## Family: binomial
## Link function: logit
##
## Formula:
## diab2 ~ s(ridageyr) + s(bmxbmi) + sex + race_eth + educ
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.01399 0.10740 -28.063 < 2e-16 ***
## sexMale 0.41899 0.07056 5.938 2.89e-09 ***
## race_eth2 hispanic 0.64861 0.10212 6.351 2.14e-10 ***
## race_eth3 black 0.66136 0.10361 6.383 1.74e-10 ***
## race_eth4 other 1.08309 0.13951 7.764 8.26e-15 ***
## educ2 hs -0.22085 0.10380 -2.128 0.033373 *
## educ3 some col -0.37283 0.10079 -3.699 0.000216 ***
## educ4 col grad -0.64878 0.10981 -5.908 3.46e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(ridageyr) 2.874 3.594 464.7 <2e-16 ***
## s(bmxbmi) 3.746 4.736 427.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.109 Deviance explained = 17.9%
## UBRE = -0.48363 Scale est. = 1 n = 11335
anova( nhns_gam2, nhns_gam, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: diab2 ~ s(ridageyr) + s(bmxbmi) + sex + race_eth + educ
## Model 2: diab2 ~ ridageyr + bmxbmi + sex + race_eth + educ
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 11319 5823.8
## 2 11325 5942.2 -6.3302 -118.45 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The model with smooth terms fit the data better
plot(nhns_gam2)