COVER PAGE

## high order poly, spline and gam for the Boston housing price w/ lstat
library(MASS)
## Warning: package 'MASS' was built under R version 3.1.2
data(Boston)
library(mgcv)
## Warning: package 'mgcv' was built under R version 3.1.2
## Loading required package: nlme
## Warning: package 'nlme' was built under R version 3.1.2
## This is mgcv 1.8-4. For overview type 'help("mgcv-package")'.
library(ggplot2)
    
fit1 = lm(medv~lstat, data=Boston) # 1  
fit2=lm(medv~lstat + I(lstat^2), data=Boston) #2  
fit3 = loess(medv~lstat, data=Boston)
fit4 = gam(medv~s(lstat), data=Boston)
    gam.check(fit4)

## 
## Method: GCV   Optimizer: magic
## Smoothing parameter selection converged after 6 iterations.
## The RMS GCV score gradiant at convergence was 0.0001964646 .
## The Hessian was positive definite.
## The estimated model rank was 10 (maximum possible: 10)
## Model rank =  10 / 10 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##            k'  edf k-index p-value
## s(lstat) 9.00 7.40    1.06     0.9
#plot type1
    #predict.plot(medv~lstat, data=Boston, partial=fit1)
    #predict.plot(medv~lstat, data=Boston, partial=fit2)
    #lines(fit3, col='green')
## COMPREHENSIVE VIEW OF ALL MODELS
ggplot(Boston, aes(x=lstat, y=medv))+geom_point()+
    stat_smooth(method = 'lm', color='blue')+   # fit1
    geom_line(y=predict(fit2), color='green')+  #fit2
    stat_smooth(method='loess', color='red')+   #fit3
    geom_line(y=predict(fit4), color='black')  #fit4

# Analyze the relationship between medv and other variables using GAM 
library(mgcv)
gammodel = gam(medv ~ s(lstat) + s(rm) + s(crim), data=Boston) 
par(mfrow=c(1,3))
plot(gammodel, se=T, resid=T, pch=16)

#gam.check(gammodel)