#install.packages("faraway"); install.packages("mda"); install.packages("earth")
require(earth, quietly = T); require(caret, quietly = T); require(faraway, quietly = T); require(mda, quietly = T)
## Warning: package 'earth' was built under R version 3.2.2
## Warning: package 'plotmo' was built under R version 3.2.2
## Warning: package 'plotrix' was built under R version 3.2.2
## Warning: package 'TeachingDemos' was built under R version 3.2.2
## Warning: package 'caret' was built under R version 3.2.2
## Warning: package 'ggplot2' was built under R version 3.2.2
## Warning: package 'faraway' was built under R version 3.2.2
##
## Attaching package: 'faraway'
##
## The following object is masked from 'package:lattice':
##
## melanoma
## Warning: package 'mda' was built under R version 3.2.2
## Loaded mda 0.4-7
setwd("C:/Users/sumeetaneja/Documents/ACS/RFL2014Model/DataSet2014Model")
fat <- read.csv("fat.csv", header=T)
#data(fat)
head(fat);
## brozek siri density age weight height adipos free neck chest abdom
## 1 12.6 12.3 1.0708 23 154.25 67.75 23.7 134.9 36.2 93.1 85.2
## 2 6.9 6.1 1.0853 22 173.25 72.25 23.4 161.3 38.5 93.6 83.0
## 3 24.6 25.3 1.0414 22 154.00 66.25 24.7 116.0 34.0 95.8 87.9
## 4 10.9 10.4 1.0751 26 184.75 72.25 24.9 164.7 37.4 101.8 86.4
## 5 27.8 28.7 1.0340 24 184.25 71.25 25.6 133.1 34.4 97.3 100.0
## 6 20.6 20.9 1.0502 24 210.25 74.75 26.5 167.0 39.0 104.5 94.4
## hip thigh knee ankle biceps forearm wrist
## 1 94.5 59.0 37.3 21.9 32.0 27.4 17.1
## 2 98.7 58.7 37.3 23.4 30.5 28.9 18.2
## 3 99.2 59.6 38.9 24.0 28.8 25.2 16.6
## 4 101.2 60.1 37.3 22.8 32.4 29.4 18.2
## 5 101.9 63.2 42.2 24.0 32.2 27.7 17.7
## 6 107.8 66.0 42.0 25.6 35.7 30.6 18.8
data0 <- fat[c(-1,-3, -8)]
mfit <- mars(data0[,-1], data0[,1], degree=1, prune=T)
mfit$coef
## [,1]
## [1,] 14.80795672
## [2,] -0.10715821
## [3,] 2.84377719
## [4,] -1.26743240
## [5,] -0.43375363
## [6,] 1.81930622
## [7,] -0.08755307
## [8,] -1.50178567
## [9,] 1.15735067
## [10,] -0.57251777
## [11,] 0.65232601
## [12,] -1.94923169
head(mfit$x)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## [1,] 1 0.0 1.4 0 3.7 0.0 34 0 1.9 0.0 0.0 0.6
## [2,] 1 6.5 0.3 0 5.2 1.5 35 0 0.0 0.0 0.0 2.9
## [3,] 1 0.0 1.9 0 6.9 0.0 35 0 4.6 0.0 0.0 0.0
## [4,] 1 18.0 0.3 0 3.3 0.4 31 0 3.1 0.0 0.0 1.8
## [5,] 1 17.5 0.8 0 3.5 0.0 33 0 16.7 5.9 0.6 0.0
## [6,] 1 43.5 0.0 0 0.0 2.0 33 0 11.1 0.3 1.5 3.4
mfit$gcv
## [1] 17.74072
sum(mfit$res^2)
## [1] 3691.83
mfit$selected.terms
## [1] 1 4 7 8 9 10 13 15 16 18 20 24
mfit$cuts
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## [1,] 0 0.00 0 0 0.0 0 0.0 0 0.0 0 0.0 0.0
## [2,] 0 0.00 0 0 0.0 0 111.2 0 0.0 0 0.0 0.0
## [3,] 0 0.00 0 0 0.0 0 111.2 0 0.0 0 0.0 0.0
## [4,] 0 166.75 0 0 0.0 0 0.0 0 0.0 0 0.0 0.0
## [5,] 0 166.75 0 0 0.0 0 0.0 0 0.0 0 0.0 0.0
## [6,] 0 0.00 0 0 0.0 0 0.0 0 0.0 0 0.0 0.0
## [7,] 0 0.00 0 0 0.0 0 0.0 0 0.0 0 0.0 0.0
## [8,] 0 0.00 0 0 0.0 0 0.0 0 0.0 0 0.0 35.7
## [9,] 0 0.00 0 0 0.0 0 0.0 0 0.0 0 0.0 35.7
## [10,] 0 0.00 0 0 37.0 0 0.0 0 0.0 0 0.0 0.0
## [11,] 0 0.00 0 0 37.0 0 0.0 0 0.0 0 0.0 0.0
## [12,] 57 0.00 0 0 0.0 0 0.0 0 0.0 0 0.0 0.0
## [13,] 57 0.00 0 0 0.0 0 0.0 0 0.0 0 0.0 0.0
## [14,] 0 0.00 0 0 0.0 0 0.0 0 54.7 0 0.0 0.0
## [15,] 0 0.00 0 0 0.0 0 0.0 0 54.7 0 0.0 0.0
## [16,] 0 0.00 0 0 0.0 0 83.3 0 0.0 0 0.0 0.0
## [17,] 0 0.00 0 0 0.0 0 83.3 0 0.0 0 0.0 0.0
## [18,] 0 0.00 0 0 0.0 0 94.1 0 0.0 0 0.0 0.0
## [19,] 0 0.00 0 0 0.0 0 94.1 0 0.0 0 0.0 0.0
## [20,] 0 0.00 0 25 0.0 0 0.0 0 0.0 0 0.0 0.0
## [21,] 0 0.00 0 25 0.0 0 0.0 0 0.0 0 0.0 0.0
## [22,] 0 0.00 0 0 0.0 0 0.0 0 0.0 0 21.9 0.0
## [23,] 0 0.00 0 0 0.0 0 0.0 0 0.0 0 21.9 0.0
## [24,] 0 0.00 0 0 35.6 0 0.0 0 0.0 0 0.0 0.0
## [25,] 0 0.00 0 0 35.6 0 0.0 0 0.0 0 0.0 0.0
## [26,] 0 0.00 0 0 0.0 0 0.0 0 0.0 0 0.0 0.0
## [27,] 0 0.00 0 0 0.0 0 0.0 0 0.0 0 0.0 0.0
## [28,] 48 0.00 0 0 0.0 0 0.0 0 0.0 0 0.0 0.0
## [29,] 48 0.00 0 0 0.0 0 0.0 0 0.0 0 0.0 0.0
## [,13] [,14]
## [1,] 0.0 0.0
## [2,] 0.0 0.0
## [3,] 0.0 0.0
## [4,] 0.0 0.0
## [5,] 0.0 0.0
## [6,] 0.0 18.5
## [7,] 0.0 18.5
## [8,] 0.0 0.0
## [9,] 0.0 0.0
## [10,] 0.0 0.0
## [11,] 0.0 0.0
## [12,] 0.0 0.0
## [13,] 0.0 0.0
## [14,] 0.0 0.0
## [15,] 0.0 0.0
## [16,] 0.0 0.0
## [17,] 0.0 0.0
## [18,] 0.0 0.0
## [19,] 0.0 0.0
## [20,] 0.0 0.0
## [21,] 0.0 0.0
## [22,] 0.0 0.0
## [23,] 0.0 0.0
## [24,] 0.0 0.0
## [25,] 0.0 0.0
## [26,] 26.2 0.0
## [27,] 26.2 0.0
## [28,] 0.0 0.0
## [29,] 0.0 0.0
cuts <- mfit$cuts[mfit$selected.terms, ]
dimnames(cuts) <- list(NULL, names(data0[-1]))
cuts
## age weight height adipos neck chest abdom hip thigh knee ankle
## [1,] 0 0.00 0 0 0.0 0 0.0 0 0.0 0 0
## [2,] 0 166.75 0 0 0.0 0 0.0 0 0.0 0 0
## [3,] 0 0.00 0 0 0.0 0 0.0 0 0.0 0 0
## [4,] 0 0.00 0 0 0.0 0 0.0 0 0.0 0 0
## [5,] 0 0.00 0 0 0.0 0 0.0 0 0.0 0 0
## [6,] 0 0.00 0 0 37.0 0 0.0 0 0.0 0 0
## [7,] 57 0.00 0 0 0.0 0 0.0 0 0.0 0 0
## [8,] 0 0.00 0 0 0.0 0 0.0 0 54.7 0 0
## [9,] 0 0.00 0 0 0.0 0 83.3 0 0.0 0 0
## [10,] 0 0.00 0 0 0.0 0 94.1 0 0.0 0 0
## [11,] 0 0.00 0 25 0.0 0 0.0 0 0.0 0 0
## [12,] 0 0.00 0 0 35.6 0 0.0 0 0.0 0 0
## biceps forearm wrist
## [1,] 0.0 0 0.0
## [2,] 0.0 0 0.0
## [3,] 0.0 0 18.5
## [4,] 35.7 0 0.0
## [5,] 35.7 0 0.0
## [6,] 0.0 0 0.0
## [7,] 0.0 0 0.0
## [8,] 0.0 0 0.0
## [9,] 0.0 0 0.0
## [10,] 0.0 0 0.0
## [11,] 0.0 0 0.0
## [12,] 0.0 0 0.0
#---------------------------
#Mars with Earth packageName
marsFit <- earth(data0[,-1], data0[,1])
marsFit
## Selected 12 of 26 terms, and 8 of 14 predictors
## Termination condition: Reached nk 29
## Importance: abdom, hip, thigh, wrist, neck, adipos, age, biceps, ...
## Number of terms at each degree of interaction: 1 11 (additive model)
## GCV 17.50468 RSS 3642.709 GRSq 0.7510529 RSq 0.7927805
summary(marsFit)
## Call: earth(x=data0[,-1], y=data0[,1])
##
## coefficients
## (Intercept) 14.2874127
## h(57-age) -0.0870037
## h(adipos-25.2) 0.8384953
## h(neck-36) -2.3246809
## h(neck-37.2) 1.8840740
## h(abdom-82.8) 1.1864717
## h(abdom-94.1) -0.6677580
## h(hip-94.3) -0.3150710
## h(54.8-thigh) -1.5762031
## h(35.7-biceps) -0.3940005
## h(biceps-35.7) -1.4329575
## h(18.6-wrist) 2.8940520
##
## Selected 12 of 26 terms, and 8 of 14 predictors
## Termination condition: Reached nk 29
## Importance: abdom, hip, thigh, wrist, neck, adipos, age, biceps, ...
## Number of terms at each degree of interaction: 1 11 (additive model)
## GCV 17.50468 RSS 3642.709 GRSq 0.7510529 RSq 0.7927805
plotmo(marsFit) # lines for each variable
## grid: age weight height adipos neck chest abdom hip thigh knee ankle
## 43 176.5 70 25.05 38 99.65 90.95 99.3 59 38.5 22.8
## biceps forearm wrist
## 32.05 28.7 18.3

evimp(marsFit) # variable importance
## nsubsets gcv rss
## abdom 11 100.0 100.0
## hip 8 21.5 28.2
## thigh 8 21.5 28.2
## wrist 8 21.5 28.2
## neck 6 15.3 22.1
## adipos 5 12.3 19.1
## age 4 9.9 16.4
## biceps 3 8.6 14.2
## weight-unused 2 24.5> 25.9>
#-----------------------------
#Mars in Caret
set.seed(100)
marsGrid <- expand.grid(.degree=1:2, .nprune=2:12)
marsTuned <- train(data0[,-1], data0[,1], method="earth", tuneGrid = marsGrid, trControl=trainControl(method="cv"))
marsTuned
## Multivariate Adaptive Regression Spline
##
## 252 samples
## 14 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 227, 227, 225, 226, 226, 228, ...
## Resampling results across tuning parameters:
##
## degree nprune RMSE Rsquared RMSE SD Rsquared SD
## 1 2 5.044316 0.6727468 0.7809712 0.06100162
## 1 3 4.739452 0.6913423 0.4963091 0.06238442
## 1 4 4.642213 0.6997177 0.5786284 0.06984770
## 1 5 4.492786 0.7231939 0.6247598 0.06356960
## 1 6 4.611754 0.7101973 0.4411232 0.04914437
## 1 7 4.700857 0.6976376 0.4835834 0.05916469
## 1 8 4.842530 0.6843949 0.5581004 0.06305395
## 1 9 4.854457 0.6837980 0.5803173 0.06065912
## 1 10 4.772724 0.6902455 0.5790666 0.06540097
## 1 11 4.752607 0.6920398 0.6326078 0.06894019
## 1 12 4.768455 0.6898247 0.6587483 0.07838014
## 2 2 4.874222 0.6724236 0.9228848 0.09590532
## 2 3 4.793400 0.6942643 0.8705950 0.08542952
## 2 4 4.514271 0.7220157 0.6064155 0.07186804
## 2 5 4.603012 0.7144882 0.8138341 0.08028655
## 2 6 4.538188 0.7219043 0.6415806 0.07048993
## 2 7 4.640001 0.7027321 0.6793117 0.09698405
## 2 8 4.672119 0.7008755 0.6675928 0.09665834
## 2 9 4.692060 0.6985932 0.7567097 0.10397864
## 2 10 4.661412 0.7002975 0.8252502 0.11361999
## 2 11 4.623135 0.7023091 0.8642529 0.12150792
## 2 12 4.693517 0.6970898 0.8320978 0.11842722
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 5 and degree = 1.
plot(marsTuned) # lowers RMSE plot

varImp(marsTuned) # variable importance
## earth variable importance
##
## Overall
## abdom 100.000
## thigh 7.835
## hip 7.835
## wrist 7.835
## forearm 0.000
## ankle 0.000
## height 0.000
## age 0.000
## knee 0.000
## neck 0.000
## weight 0.000
## adipos 0.000
## chest 0.000
## biceps 0.000
head(predict(marsTuned, data0[,-1]))
## [1] 17.78167 10.19753 19.30012 12.21028 26.76206 15.42753