COVER PAGE
## high order poly, spline and gam for the Boston housing price w/ lstat
library(MASS)
## Warning: package 'MASS' was built under R version 3.1.2
data(Boston)
library(mgcv)
## Warning: package 'mgcv' was built under R version 3.1.2
## Loading required package: nlme
## Warning: package 'nlme' was built under R version 3.1.2
## This is mgcv 1.8-4. For overview type 'help("mgcv-package")'.
library(ggplot2)
fit1 = lm(medv~lstat, data=Boston) # 1
fit2=lm(medv~lstat + I(lstat^2), data=Boston) #2
fit3 = loess(medv~lstat, data=Boston)
fit4 = gam(medv~s(lstat), data=Boston)
gam.check(fit4)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 6 iterations.
## The RMS GCV score gradiant at convergence was 0.0001964646 .
## The Hessian was positive definite.
## The estimated model rank was 10 (maximum possible: 10)
## Model rank = 10 / 10
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(lstat) 9.00 7.40 1.06 0.9
#plot type1
#predict.plot(medv~lstat, data=Boston, partial=fit1)
#predict.plot(medv~lstat, data=Boston, partial=fit2)
#lines(fit3, col='green')
## COMPREHENSIVE VIEW OF ALL MODELS
ggplot(Boston, aes(x=lstat, y=medv))+geom_point()+
stat_smooth(method = 'lm', color='blue')+ # fit1
geom_line(y=predict(fit2), color='green')+ #fit2
stat_smooth(method='loess', color='red')+ #fit3
geom_line(y=predict(fit4), color='black') #fit4
# Analyze the relationship between medv and other variables using GAM
library(mgcv)
gammodel = gam(medv ~ s(lstat) + s(rm) + s(crim), data=Boston)
par(mfrow=c(1,3))
plot(gammodel, se=T, resid=T, pch=16)
#gam.check(gammodel)