In this example, we will describe how to use splines to model nonlinearities in predictor variables and the generalized additive model.
Splines are nothing more than a set of linear or non-linear functions that are tied together to construct a model for data.
In the simplest example, consider a regression function whose form changes at some point in the data. So if we see data like this:
b0<-1; b1=.5;b2=35
x<-seq(1:100)
y1<-rnorm(70,b0+b1*x[1:70], 1)
y2<-rnorm(30, b2, 1)
y<-c(y1, y2)
plot(y~x, main="linear fit to nonlinear data")
we might say, yes, I can fit a linear model to that:
plot(y~x, main="linear fit to nonlinear data")
lin<-lm(y~x)
summary(lin)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5338 -2.3689 0.4286 2.2613 5.5451
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.59458 0.63262 7.263 9.14e-11 ***
## x 0.37634 0.01088 34.604 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.139 on 98 degrees of freedom
## Multiple R-squared: 0.9243, Adjusted R-squared: 0.9236
## F-statistic: 1197 on 1 and 98 DF, p-value: < 2.2e-16
lines(fitted(lin)~x,col=3)
but we would be sorely mistaken, as this model clearly doesn’t fit the data.
Instead, we can model the change in the underlying model structure by using a series of knots in the function, or points along the data in which the form of the function changes.
Mathematically, knots can be written as:
$$Y(x) = \[\begin{Bmatrix} F_1(x) \text { for } x\in [x_1, x_2]\\ F_2(x) \text { for } x\in [x_2, x_3]\\ \cdots \\ F_{k}(x) \text { for } x\in [x_{k-1}, x_k]\\ \end{Bmatrix}\]$$
Where each of the \(F_k (x)\) functions imply a different form in the interval between \(x\in [x_{k-1}, x_k]\), where the \(k\) breaks are at a given knot in the data.
This would generate a model that looks like this:
sp<-(lm(y~x*I(x<71)+x*I(x>71)))
summary(sp)
##
## Call:
## lm(formula = y ~ x * I(x < 71) + x * I(x > 71))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.09929 -0.73705 -0.01578 0.74149 2.18256
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.28812 1.84687 17.483 <2e-16 ***
## x 0.02251 0.02196 1.025 0.308
## I(x < 71)TRUE -31.34488 1.86228 -16.831 <2e-16 ***
## I(x > 71)TRUE 0.71688 1.05903 0.677 0.500
## x:I(x < 71)TRUE 0.47920 0.02273 21.082 <2e-16 ***
## x:I(x > 71)TRUE NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9896 on 95 degrees of freedom
## Multiple R-squared: 0.9927, Adjusted R-squared: 0.9924
## F-statistic: 3236 on 4 and 95 DF, p-value: < 2.2e-16
plot(y~x,main="Piece-wise linear fit to nonlinear data, knot at x=70")
lines(fitted(sp)~x,col=2)
Of course, this can be much more complicated, with multiple knots in the data, to model multiple changes in the underlying data generating fucntion
n <- 100 # number of data points
t <- seq(0,2*pi,,100)
a <- 3
b <- 2
c.norm <- rnorm(n)
amp <- 1
# generate data and calculate "y"
set.seed(1)
y2 <- a*sin(b*t)+c.norm*amp # Gaussian/normal error
# plot results
plot(t, y2, t="l", ylim=range(y2)*c(1,1.2), main="Sine curve with gaussian noise")
plot(t, y2, t="l", ylim=range(y2)*c(1,1.2), main="Terrible linear model")
abline(lm(y2~t), col=3)
Splines are often characterized by the number of knots used to build them
library(splines)
library(mgcv)
## Loading required package: nlme
## This is mgcv 1.8-28. For overview type 'help("mgcv-package")'.
plot(t, y2, t="l", ylim=range(y2)*c(1,1.2), main="Increasingly complex splines")
fit<-spline(y2,t)
lines(spline(t,y2, n = 4),col="darkgreen",lwd=2,type="l")
lines(spline(t,y2, n = 10),col="red",lwd=2,type="l")
lines(spline(t,y2, n = 20),col="blue",lwd=2,type="l")
lines(spline(t,y2, n = 30),col="orange",lwd=2,type="l")
The splines above were piecewise linear, we can also fit piecewise cubic splines, where at each knot a cubic polynomial is used. In the plot below, we can see that by increasing the number of knots, we better approximate the nonlinearities in the data.
plot(t, y2, t="l", ylim=range(y2)*c(1,1.2), main="Increasingly complex splines")
fit<-smooth.spline(t, y2, df=2)
fit5<-smooth.spline(t, y2, df=5)
fit10<-smooth.spline(t, y2, df=10)
lines(fit, col="red")
lines(fit5, col="green")
lines(fit10, col="blue")
The generalized additive model (GAM) is a modeling framework that forms a linear predictor for a regression function through the combination of both smooth and linear terms.
The GAM model forms the linear predctor of a GLM as:
\[E(y)= \beta_0 + f(x_1) + \beta_1 x_2\]
where the \(f(x_1)\) term is a regression spline of one of the variables. The models can be a mixture of linear and smooth terms.
library(mgcv) #one library for GAMs
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:nlme':
##
## collapse
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dat<-data.frame(y=y2, x=t)
gamfit<-gam(y~s(x), data=dat)
summary(gamfit)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## y ~ s(x)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.10273 0.09228 1.113 0.269
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(x) 7.92 8.694 54.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.829 Deviance explained = 84.3%
## GCV = 0.93506 Scale est. = 0.85165 n = 100
plot(gamfit)
dat%>%
ggplot(aes(x=x, y=y))+geom_point()+geom_smooth(method = "gam", formula = y ~s(x))
load("~/Google Drive/classes/dem7283/class_19_7283/data/brfss16_mmsa.Rdata")
set.seed(12345)
#samps<-sample(1:nrow(brfss16m), size = 40000, replace=F)
#brfss16m<-brfss16m[samps,]
#The names in the data are very ugly, so I make them less ugly
nams<-names(brfss16m)
#we see some names are lower case, some are upper and some have a little _ in the first position. This is a nightmare.
newnames<-gsub(pattern = "_",replacement = "",x = nams)
names(brfss16m)<-tolower(newnames)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(dplyr)
#nice MSA name
brfss16m$mmsa_name<-substr(brfss16m$mmsaname, 1,nchar(brfss16m$mmsaname)-31)
#sex
brfss16m$male<-ifelse(brfss16m$sex==1, 1, 0)
#BMI
brfss16m$bmi<-ifelse(is.na(brfss16m$bmi5)==T, NA, brfss16m$bmi5/100)
#Healthy days
brfss16m$healthdays<-Recode(brfss16m$physhlth, recodes = "88=0; 77=NA; 99=NA")
#Healthy mental health days
brfss16m$healthmdays<-Recode(brfss16m$menthlth, recodes = "88=0; 77=NA; 99=NA")
brfss16m$badhealth<-Recode(brfss16m$genhlth, recodes="4:5=1; 1:3=0; else=NA")
#race/ethnicity
brfss16m$black<-Recode(brfss16m$racegr3, recodes="2=1; 9=NA; else=0")
brfss16m$white<-Recode(brfss16m$racegr3, recodes="1=1; 9=NA; else=0")
brfss16m$other<-Recode(brfss16m$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss16m$hispanic<-Recode(brfss16m$racegr3, recodes="5=1; 9=NA; else=0")
brfss16m$race_eth<-Recode(brfss16m$racegr3,
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
as.factor = T)
brfss16m$race_eth<-relevel(brfss16m$race_eth, ref = "nhwhite")
#insurance
brfss16m$ins<-Recode(brfss16m$hlthpln1, recodes ="7:9=NA; 1=1;2=0")
#income grouping
brfss16m$inc<-Recode(brfss16m$incomg, recodes = "9= NA;1='1_lt15k'; 2='2_15-25k';3='3_25-35k';4='4_35-50k';5='5_50kplus'", as.factor = T)
brfss16m$inc<-as.ordered(brfss16m$inc)
#education level
brfss16m$educ<-Recode(brfss16m$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA",
as.factor=T)
brfss16m$educ<-relevel(brfss16m$educ, ref='2hsgrad')
#employment
brfss16m$employ<-Recode(brfss16m$employ1,
recodes="1:2='employloyed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA",
as.factor=T)
brfss16m$employ<-relevel(brfss16m$employ, ref='employloyed')
#marital status
brfss16m$marst<-Recode(brfss16m$marital,
recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA",
as.factor=T)
brfss16m$marst<-relevel(brfss16m$marst, ref='married')
#Age cut into intervals
brfss16m$agec<-cut(brfss16m$age80, breaks=c(0,24,39,59,79,99))
#BMI, in the brfss16ma the bmi variable has 2 implied decimal places,
#so we must divide by 100 to get real bmi's
brfss16m$bmi<-brfss16m$bmi5/100
#smoking currently
brfss16m$smoke<-Recode(brfss16m$smoker3,
recodes="1:2=1; 3:4=0; else=NA")
#brfss16m$smoke<-relevel(brfss16m$smoke, ref = "NeverSmoked")
brfss16m$obese<-ifelse(is.na(brfss16m$bmi)==T, NA,
ifelse(brfss16m$bmi>30,1,0))
library(tidycensus)
library(dplyr)
usacs<-get_acs(geography = "metropolitan statistical area/micropolitan statistical area", year = 2011,
variables=c( "DP05_0001E", "DP03_0009P", "DP03_0062E", "DP03_0119PE",
"DP05_0001E","DP02_0009PE","DP02_0008PE", "DP02_0040E","DP02_0038E",
"DP02_0066PE","DP02_0067PE","DP02_0080PE","DP02_0092PE",
"DP03_0005PE","DP03_0028PE","DP03_0062E","DP03_0099PE","DP03_0101PE",
"DP03_0119PE","DP04_0046PE","DP05_0072PE","DP05_0073PE",
"DP05_0066PE","DP05_0033","DP05_0032", "DP05_0072PE", "DP02_0113PE", "DP04_0003PE") ,
summary_var = "B01001_001",
geometry = F, output = "wide")
## Getting data from the 2007-2011 5-year ACS
## Using the ACS Data Profile
## Using the ACS Data Profile
usacs<-usacs%>%
mutate(totpop= DP05_0001E, fertrate = DP02_0040E,pwhite=DP05_0072PE/100, nwhite=DP05_0032E,nblack=DP05_0033E,
pblack=DP05_0073PE/100 , phisp=DP05_0066PE/100, pfemhh=DP02_0008PE/100,
phsormore=DP02_0066PE/100,punemp=DP03_0009PE/100, medhhinc=DP03_0062E,
ppov=DP03_0119PE/100, pforn=DP02_0092PE/100,plep=DP02_0113PE/100, pvacant=DP04_0003PE/100)%>%
select(GEOID,NAME, totpop, pwhite, pblack, phisp, pfemhh, phsormore, punemp, medhhinc, ppov, pforn,plep, pvacant)
head(usacs)
myscale<-function(x){as.numeric(scale(x))}
usacs<-usacs %>%
mutate_at(c("ppov","punemp", "pblack","phisp", "pfemhh", "phsormore", "medhhinc", "pvacant"),myscale)
usacs<-usacs%>%filter(complete.cases(.))
merged<-merge(x=brfss16m,
y=usacs,
by.x="mmsa",
by.y="GEOID",
all.x=F)
merged<-merged%>%
select( mmsa, ststr, agec,bmi,healthmdays, age80, educ, black, hispanic, other,smoke,obese, badhealth,pblack,phisp, medhhinc,ppov, male, mmsawt, mmsa_name )%>%
filter(complete.cases(.))
Here is a GAM fit to the BRFSS, using smooth terms for age, bmi and number of poor mental health days
brfgam<-gam(badhealth~s(age80)+s(bmi)+s(healthmdays)+educ+male+black+hispanic+other, data=merged, weights = mmsawt/mean(mmsawt, na.rm=T), family=binomial)
summary(brfgam)
##
## Family: binomial
## Link function: logit
##
## Formula:
## badhealth ~ s(age80) + s(bmi) + s(healthmdays) + educ + male +
## black + hispanic + other
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.65882 0.01641 -101.112 < 2e-16 ***
## educ0Prim 1.10644 0.03303 33.494 < 2e-16 ***
## educ1somehs 0.66010 0.02428 27.183 < 2e-16 ***
## educ3somecol -0.33752 0.01836 -18.382 < 2e-16 ***
## educ4colgrad -1.01834 0.02280 -44.671 < 2e-16 ***
## male 0.09580 0.01505 6.363 1.98e-10 ***
## black 0.37279 0.02125 17.541 < 2e-16 ***
## hispanic 0.57222 0.02145 26.677 < 2e-16 ***
## other 0.38646 0.03292 11.738 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(age80) 6.364 7.503 5585 <2e-16 ***
## s(bmi) 7.563 8.360 3067 <2e-16 ***
## s(healthmdays) 8.805 8.984 10047 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.194 Deviance explained = 19.1%
## UBRE = -0.27536 Scale est. = 1 n = 168050
plot(brfgam)
Versus the linear terms
brfgam_l<-gam(badhealth~age80+bmi+healthmdays+educ+male+black+hispanic+other, data=merged, weights = mmsawt/mean(mmsawt, na.rm=T), family=binomial)
summary(brfgam_l)
##
## Family: binomial
## Link function: logit
##
## Formula:
## badhealth ~ age80 + bmi + healthmdays + educ + male + black +
## hispanic + other
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.1136949 0.0445867 -114.691 <2e-16 ***
## age80 0.0310745 0.0004443 69.943 <2e-16 ***
## bmi 0.0550070 0.0010734 51.248 <2e-16 ***
## healthmdays 0.0747987 0.0007573 98.777 <2e-16 ***
## educ0Prim 1.0924998 0.0326516 33.459 <2e-16 ***
## educ1somehs 0.6625019 0.0241257 27.460 <2e-16 ***
## educ3somecol -0.3322283 0.0182259 -18.228 <2e-16 ***
## educ4colgrad -1.0036175 0.0225264 -44.553 <2e-16 ***
## male 0.0183525 0.0147303 1.246 0.213
## black 0.3670013 0.0210577 17.428 <2e-16 ***
## hispanic 0.5187349 0.0212329 24.431 <2e-16 ***
## other 0.3922489 0.0328170 11.953 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0.184 Deviance explained = 18.2%
## UBRE = -0.26773 Scale est. = 1 n = 168050
anova( brfgam_l, brfgam, test="Chisq")
The regression splines can also be used in any generalized linear model using functions from the splines
library.
The bs()
function is a B-spline basis, and the ns()
is the natural spline function. The natural splines often behave better at the boundaries of the data.
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'lme4'
## The following object is masked from 'package:nlme':
##
## lmList
samp<-sample(1:dim(merged)[1], size = 10000)
fit_glmm_bs<-glmer(badhealth ~ bs(scale(age80))+male+black+hispanic+other+(1|mmsa_name),
data=merged[samp,],
family=binomial,
weights=mmsawt/mean(mmsawt),
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=1e9)))
## Warning in eval(family$initialize, rho): non-integer #successes in a
## binomial glm!
fit_glmm<-glmer(badhealth ~ scale(age80)+male+black+hispanic+other+(1|mmsa_name),
data=merged[samp,],
family=binomial,
weights=mmsawt/mean(mmsawt),
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=1e9)))
## Warning in eval(family$initialize, rho): non-integer #successes in a
## binomial glm!
fit_glmm_ML<-glmer(badhealth ~ bs(scale(age80))+male+black+hispanic+other+bs(medhhinc)+(1|mmsa_name),
data=merged[samp,],
family=binomial,
weights=mmsawt/mean(mmsawt),
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=1e9)))
## Warning in eval(family$initialize, rho): non-integer #successes in a
## binomial glm!
summary(fit_glmm)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: badhealth ~ scale(age80) + male + black + hispanic + other +
## (1 | mmsa_name)
## Data: merged[samp, ]
## Weights: mmsawt/mean(mmsawt)
## Control:
## glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e+09))
##
## AIC BIC logLik deviance df.resid
## 7562.9 7613.3 -3774.4 7548.9 9993
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8617 -0.3972 -0.2430 -0.1210 10.3918
##
## Random effects:
## Groups Name Variance Std.Dev.
## mmsa_name (Intercept) 0.18 0.4242
## Number of obs: 10000, groups: mmsa_name, 122
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.787519 0.069669 -25.657 <2e-16 ***
## scale(age80) 0.412074 0.030026 13.724 <2e-16 ***
## male 0.004205 0.059282 0.071 0.9435
## black 0.829131 0.082670 10.029 <2e-16 ***
## hispanic 1.184666 0.082449 14.368 <2e-16 ***
## other 0.263457 0.134076 1.965 0.0494 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) sc(80) male black hispnc
## scale(ag80) 0.052
## male -0.432 0.050
## black -0.250 0.134 -0.009
## hispanic -0.250 0.248 -0.001 0.285
## other -0.169 0.093 -0.005 0.162 0.197
summary(fit_glmm_bs)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: badhealth ~ bs(scale(age80)) + male + black + hispanic + other +
## (1 | mmsa_name)
## Data: merged[samp, ]
## Weights: mmsawt/mean(mmsawt)
## Control:
## glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e+09))
##
## AIC BIC logLik deviance df.resid
## 7564.0 7628.9 -3773.0 7546.0 9991
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7877 -0.3975 -0.2429 -0.1218 10.5942
##
## Random effects:
## Groups Name Variance Std.Dev.
## mmsa_name (Intercept) 0.1806 0.425
## Number of obs: 10000, groups: mmsa_name, 122
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.634830 0.132319 -19.913 < 2e-16 ***
## bs(scale(age80))1 0.211551 0.322998 0.655 0.5125
## bs(scale(age80))2 1.304305 0.202530 6.440 1.19e-10 ***
## bs(scale(age80))3 1.292442 0.174433 7.409 1.27e-13 ***
## male -0.001442 0.059390 -0.024 0.9806
## black 0.830534 0.082951 10.012 < 2e-16 ***
## hispanic 1.181008 0.082552 14.306 < 2e-16 ***
## other 0.257212 0.134117 1.918 0.0551 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) b((80))1 b((80))2 b((80))3 male black hispnc
## bs(sc(80))1 -0.732
## bs(sc(80))2 -0.055 -0.361
## bs(sc(80))3 -0.713 0.777 -0.338
## male -0.248 0.018 -0.037 0.058
## black -0.137 -0.063 0.051 0.037 -0.008
## hispanic -0.245 0.025 0.065 0.161 0.000 0.284
## other -0.138 0.022 0.008 0.073 -0.003 0.161 0.197
summary(fit_glmm_ML)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: badhealth ~ bs(scale(age80)) + male + black + hispanic + other +
## bs(medhhinc) + (1 | mmsa_name)
## Data: merged[samp, ]
## Weights: mmsawt/mean(mmsawt)
## Control:
## glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e+09))
##
## AIC BIC logLik deviance df.resid
## 7559.3 7645.8 -3767.6 7535.3 9988
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7586 -0.3981 -0.2415 -0.1214 10.5142
##
## Random effects:
## Groups Name Variance Std.Dev.
## mmsa_name (Intercept) 0.1615 0.4019
## Number of obs: 10000, groups: mmsa_name, 122
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.091629 0.292201 -7.158 8.18e-13 ***
## bs(scale(age80))1 0.226036 0.323236 0.699 0.4844
## bs(scale(age80))2 1.308335 0.202635 6.457 1.07e-10 ***
## bs(scale(age80))3 1.293460 0.174711 7.403 1.33e-13 ***
## male -0.001209 0.059418 -0.020 0.9838
## black 0.833798 0.082911 10.057 < 2e-16 ***
## hispanic 1.177991 0.082517 14.276 < 2e-16 ***
## other 0.266734 0.135113 1.974 0.0484 *
## bs(medhhinc)1 -0.809058 0.771020 -1.049 0.2940
## bs(medhhinc)2 -1.117894 0.610475 -1.831 0.0671 .
## bs(medhhinc)3 -0.670659 0.564093 -1.189 0.2345
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) b((80))1 b((80))2 b((80))3 male black hispnc other
## bs(sc(80))1 -0.321
## bs(sc(80))2 -0.012 -0.360
## bs(sc(80))3 -0.304 0.777 -0.338
## male -0.117 0.020 -0.037 0.060
## black -0.044 -0.063 0.051 0.038 -0.008
## hispanic -0.116 0.026 0.063 0.161 0.001 0.284
## other -0.041 0.022 0.006 0.077 -0.001 0.161 0.198
## bs(mdhhnc)1 -0.840 -0.010 -0.011 -0.025 0.006 -0.016 0.009 -0.024
## bs(mdhhnc)2 0.218 -0.003 -0.008 0.024 -0.005 -0.005 -0.008 0.039
## bs(mdhhnc)3 -0.481 -0.005 0.004 -0.038 -0.009 -0.013 -0.018 -0.115
## bs(m)1 bs(m)2
## bs(sc(80))1
## bs(sc(80))2
## bs(sc(80))3
## male
## black
## hispanic
## other
## bs(mdhhnc)1
## bs(mdhhnc)2 -0.552
## bs(mdhhnc)3 0.555 -0.434
anova(fit_glmm, fit_glmm_bs, fit_glmm_ML)
You can also fit GLMM’s within the GAM framework
library(gamm4)
gam_mm<-gamm4(badhealth ~ s(scale(age80))+male+black+hispanic+other,
data=merged[samp,],
random=~(1|mmsa_name),
family = binomial)
summary(gam_mm$mer)
summary(gam_mm$gam)