Splines and Generalized

Author : Arvind Saini

Splines

cat("\014")

library(splines)
library(ISLR)
attach(Wage)
fit=lm(wage~bs(age,knots=c(25,40,60)),data=Wage)
agelimits=range(age)
age.grid = seq(from=agelimits[1],to=agelimits[2])
plot(age,wage,col="darkgrey")
lines(age.grid,predict(fit,list(age=age.grid)),col="green",lwd=2)
abline(v=c(25,40,60),lty=2,col="green")
fit=smooth.spline(age,wage,df=15)     # smoothing splines 
lines(fit,col="red",lwd=2)
fit=smooth.spline(age,wage,cv=T)      # splines with minimum cv using LOOCV
## Warning in smooth.spline(age, wage, cv = T): cross-validation with non-
## unique 'x' values seems doubtful
lines(fit,col="purple",lwd=2)

fit
## Call:
## smooth.spline(x = age, y = wage, cv = T)
## 
## Smoothing Parameter  spar= 0.6988943  lambda= 0.02792303 (12 iterations)
## Equivalent Degrees of Freedom (Df): 6.794596
## Penalized Criterion: 75215.9
## PRESS: 1593.383

Generalized additive models

library(gam)
## Loading required package: foreach
## Loaded gam 1.12
gam1=gam(wage~s(age,df=4)+s(year,df=4)+education,data=Wage)
par(mfrow=c(1,3))
plot(gam1,se=T)

gam2=gam(I(wage>250)~s(age,df=4)+s(year,df=4)+education,data=Wage,family="binomial")
plot(gam2,se=T)

Testing the model

gam2a=gam(I(wage>250)~s(wage,df=4)+year+education,data=Wage,family="binomial")
anova(gam2a,gam2,test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: I(wage > 250) ~ s(wage, df = 4) + year + education
## Model 2: I(wage > 250) ~ s(age, df = 4) + s(year, df = 4) + education
##   Resid. Df Resid. Dev     Df Deviance Pr(>Chi)
## 1      2990       0.00                         
## 2      2987     602.87 2.9999  -602.87

We can use other model fit in plot.gam as well

par(mfrow=c(1,3))
lm1=lm(wage~ns(age,df=4)+ns(year,df=4)+education,data=Wage)
plot.gam(lm1,se=T)