#install.packages("faraway"); install.packages("mda"); install.packages("earth")
require(earth, quietly = T); require(caret, quietly = T); require(faraway, quietly = T); require(mda, quietly = T)
## Warning: package 'earth' was built under R version 3.2.2
## Warning: package 'plotmo' was built under R version 3.2.2
## Warning: package 'plotrix' was built under R version 3.2.2
## Warning: package 'TeachingDemos' was built under R version 3.2.2
## Warning: package 'caret' was built under R version 3.2.2
## Warning: package 'ggplot2' was built under R version 3.2.2
## Warning: package 'faraway' was built under R version 3.2.2
## 
## Attaching package: 'faraway'
## 
## The following object is masked from 'package:lattice':
## 
##     melanoma
## Warning: package 'mda' was built under R version 3.2.2
## Loaded mda 0.4-7
setwd("C:/Users/sumeetaneja/Documents/ACS/RFL2014Model/DataSet2014Model")
fat <- read.csv("fat.csv", header=T)
#data(fat)
head(fat);
##   brozek siri density age weight height adipos  free neck chest abdom
## 1   12.6 12.3  1.0708  23 154.25  67.75   23.7 134.9 36.2  93.1  85.2
## 2    6.9  6.1  1.0853  22 173.25  72.25   23.4 161.3 38.5  93.6  83.0
## 3   24.6 25.3  1.0414  22 154.00  66.25   24.7 116.0 34.0  95.8  87.9
## 4   10.9 10.4  1.0751  26 184.75  72.25   24.9 164.7 37.4 101.8  86.4
## 5   27.8 28.7  1.0340  24 184.25  71.25   25.6 133.1 34.4  97.3 100.0
## 6   20.6 20.9  1.0502  24 210.25  74.75   26.5 167.0 39.0 104.5  94.4
##     hip thigh knee ankle biceps forearm wrist
## 1  94.5  59.0 37.3  21.9   32.0    27.4  17.1
## 2  98.7  58.7 37.3  23.4   30.5    28.9  18.2
## 3  99.2  59.6 38.9  24.0   28.8    25.2  16.6
## 4 101.2  60.1 37.3  22.8   32.4    29.4  18.2
## 5 101.9  63.2 42.2  24.0   32.2    27.7  17.7
## 6 107.8  66.0 42.0  25.6   35.7    30.6  18.8
data0 <- fat[c(-1,-3, -8)]
mfit <- mars(data0[,-1], data0[,1], degree=1, prune=T)
mfit$coef
##              [,1]
##  [1,] 14.80795672
##  [2,] -0.10715821
##  [3,]  2.84377719
##  [4,] -1.26743240
##  [5,] -0.43375363
##  [6,]  1.81930622
##  [7,] -0.08755307
##  [8,] -1.50178567
##  [9,]  1.15735067
## [10,] -0.57251777
## [11,]  0.65232601
## [12,] -1.94923169
head(mfit$x)
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## [1,]    1  0.0  1.4    0  3.7  0.0   34    0  1.9   0.0   0.0   0.6
## [2,]    1  6.5  0.3    0  5.2  1.5   35    0  0.0   0.0   0.0   2.9
## [3,]    1  0.0  1.9    0  6.9  0.0   35    0  4.6   0.0   0.0   0.0
## [4,]    1 18.0  0.3    0  3.3  0.4   31    0  3.1   0.0   0.0   1.8
## [5,]    1 17.5  0.8    0  3.5  0.0   33    0 16.7   5.9   0.6   0.0
## [6,]    1 43.5  0.0    0  0.0  2.0   33    0 11.1   0.3   1.5   3.4
mfit$gcv
## [1] 17.74072
sum(mfit$res^2)
## [1] 3691.83
mfit$selected.terms
##  [1]  1  4  7  8  9 10 13 15 16 18 20 24
mfit$cuts
##       [,1]   [,2] [,3] [,4] [,5] [,6]  [,7] [,8] [,9] [,10] [,11] [,12]
##  [1,]    0   0.00    0    0  0.0    0   0.0    0  0.0     0   0.0   0.0
##  [2,]    0   0.00    0    0  0.0    0 111.2    0  0.0     0   0.0   0.0
##  [3,]    0   0.00    0    0  0.0    0 111.2    0  0.0     0   0.0   0.0
##  [4,]    0 166.75    0    0  0.0    0   0.0    0  0.0     0   0.0   0.0
##  [5,]    0 166.75    0    0  0.0    0   0.0    0  0.0     0   0.0   0.0
##  [6,]    0   0.00    0    0  0.0    0   0.0    0  0.0     0   0.0   0.0
##  [7,]    0   0.00    0    0  0.0    0   0.0    0  0.0     0   0.0   0.0
##  [8,]    0   0.00    0    0  0.0    0   0.0    0  0.0     0   0.0  35.7
##  [9,]    0   0.00    0    0  0.0    0   0.0    0  0.0     0   0.0  35.7
## [10,]    0   0.00    0    0 37.0    0   0.0    0  0.0     0   0.0   0.0
## [11,]    0   0.00    0    0 37.0    0   0.0    0  0.0     0   0.0   0.0
## [12,]   57   0.00    0    0  0.0    0   0.0    0  0.0     0   0.0   0.0
## [13,]   57   0.00    0    0  0.0    0   0.0    0  0.0     0   0.0   0.0
## [14,]    0   0.00    0    0  0.0    0   0.0    0 54.7     0   0.0   0.0
## [15,]    0   0.00    0    0  0.0    0   0.0    0 54.7     0   0.0   0.0
## [16,]    0   0.00    0    0  0.0    0  83.3    0  0.0     0   0.0   0.0
## [17,]    0   0.00    0    0  0.0    0  83.3    0  0.0     0   0.0   0.0
## [18,]    0   0.00    0    0  0.0    0  94.1    0  0.0     0   0.0   0.0
## [19,]    0   0.00    0    0  0.0    0  94.1    0  0.0     0   0.0   0.0
## [20,]    0   0.00    0   25  0.0    0   0.0    0  0.0     0   0.0   0.0
## [21,]    0   0.00    0   25  0.0    0   0.0    0  0.0     0   0.0   0.0
## [22,]    0   0.00    0    0  0.0    0   0.0    0  0.0     0  21.9   0.0
## [23,]    0   0.00    0    0  0.0    0   0.0    0  0.0     0  21.9   0.0
## [24,]    0   0.00    0    0 35.6    0   0.0    0  0.0     0   0.0   0.0
## [25,]    0   0.00    0    0 35.6    0   0.0    0  0.0     0   0.0   0.0
## [26,]    0   0.00    0    0  0.0    0   0.0    0  0.0     0   0.0   0.0
## [27,]    0   0.00    0    0  0.0    0   0.0    0  0.0     0   0.0   0.0
## [28,]   48   0.00    0    0  0.0    0   0.0    0  0.0     0   0.0   0.0
## [29,]   48   0.00    0    0  0.0    0   0.0    0  0.0     0   0.0   0.0
##       [,13] [,14]
##  [1,]   0.0   0.0
##  [2,]   0.0   0.0
##  [3,]   0.0   0.0
##  [4,]   0.0   0.0
##  [5,]   0.0   0.0
##  [6,]   0.0  18.5
##  [7,]   0.0  18.5
##  [8,]   0.0   0.0
##  [9,]   0.0   0.0
## [10,]   0.0   0.0
## [11,]   0.0   0.0
## [12,]   0.0   0.0
## [13,]   0.0   0.0
## [14,]   0.0   0.0
## [15,]   0.0   0.0
## [16,]   0.0   0.0
## [17,]   0.0   0.0
## [18,]   0.0   0.0
## [19,]   0.0   0.0
## [20,]   0.0   0.0
## [21,]   0.0   0.0
## [22,]   0.0   0.0
## [23,]   0.0   0.0
## [24,]   0.0   0.0
## [25,]   0.0   0.0
## [26,]  26.2   0.0
## [27,]  26.2   0.0
## [28,]   0.0   0.0
## [29,]   0.0   0.0
cuts <- mfit$cuts[mfit$selected.terms, ]
dimnames(cuts) <- list(NULL, names(data0[-1]))
cuts
##       age weight height adipos neck chest abdom hip thigh knee ankle
##  [1,]   0   0.00      0      0  0.0     0   0.0   0   0.0    0     0
##  [2,]   0 166.75      0      0  0.0     0   0.0   0   0.0    0     0
##  [3,]   0   0.00      0      0  0.0     0   0.0   0   0.0    0     0
##  [4,]   0   0.00      0      0  0.0     0   0.0   0   0.0    0     0
##  [5,]   0   0.00      0      0  0.0     0   0.0   0   0.0    0     0
##  [6,]   0   0.00      0      0 37.0     0   0.0   0   0.0    0     0
##  [7,]  57   0.00      0      0  0.0     0   0.0   0   0.0    0     0
##  [8,]   0   0.00      0      0  0.0     0   0.0   0  54.7    0     0
##  [9,]   0   0.00      0      0  0.0     0  83.3   0   0.0    0     0
## [10,]   0   0.00      0      0  0.0     0  94.1   0   0.0    0     0
## [11,]   0   0.00      0     25  0.0     0   0.0   0   0.0    0     0
## [12,]   0   0.00      0      0 35.6     0   0.0   0   0.0    0     0
##       biceps forearm wrist
##  [1,]    0.0       0   0.0
##  [2,]    0.0       0   0.0
##  [3,]    0.0       0  18.5
##  [4,]   35.7       0   0.0
##  [5,]   35.7       0   0.0
##  [6,]    0.0       0   0.0
##  [7,]    0.0       0   0.0
##  [8,]    0.0       0   0.0
##  [9,]    0.0       0   0.0
## [10,]    0.0       0   0.0
## [11,]    0.0       0   0.0
## [12,]    0.0       0   0.0
#---------------------------
#Mars with Earth packageName
  marsFit <- earth(data0[,-1], data0[,1])
  marsFit
## Selected 12 of 26 terms, and 8 of 14 predictors
## Termination condition: Reached nk 29
## Importance: abdom, hip, thigh, wrist, neck, adipos, age, biceps, ...
## Number of terms at each degree of interaction: 1 11 (additive model)
## GCV 17.50468    RSS 3642.709    GRSq 0.7510529    RSq 0.7927805
  summary(marsFit)
## Call: earth(x=data0[,-1], y=data0[,1])
## 
##                coefficients
## (Intercept)      14.2874127
## h(57-age)        -0.0870037
## h(adipos-25.2)    0.8384953
## h(neck-36)       -2.3246809
## h(neck-37.2)      1.8840740
## h(abdom-82.8)     1.1864717
## h(abdom-94.1)    -0.6677580
## h(hip-94.3)      -0.3150710
## h(54.8-thigh)    -1.5762031
## h(35.7-biceps)   -0.3940005
## h(biceps-35.7)   -1.4329575
## h(18.6-wrist)     2.8940520
## 
## Selected 12 of 26 terms, and 8 of 14 predictors
## Termination condition: Reached nk 29
## Importance: abdom, hip, thigh, wrist, neck, adipos, age, biceps, ...
## Number of terms at each degree of interaction: 1 11 (additive model)
## GCV 17.50468    RSS 3642.709    GRSq 0.7510529    RSq 0.7927805
  plotmo(marsFit) # lines for each variable
##  grid:    age weight height adipos neck chest abdom  hip thigh knee ankle
##            43  176.5     70  25.05   38 99.65 90.95 99.3    59 38.5  22.8
##  biceps forearm wrist
##   32.05    28.7  18.3

  evimp(marsFit) # variable importance
##               nsubsets   gcv    rss
## abdom               11 100.0  100.0
## hip                  8  21.5   28.2
## thigh                8  21.5   28.2
## wrist                8  21.5   28.2
## neck                 6  15.3   22.1
## adipos               5  12.3   19.1
## age                  4   9.9   16.4
## biceps               3   8.6   14.2
## weight-unused        2  24.5>  25.9>
#-----------------------------
#Mars in Caret    
  set.seed(100)
  marsGrid <- expand.grid(.degree=1:2, .nprune=2:12)
  marsTuned <- train(data0[,-1], data0[,1], method="earth", tuneGrid = marsGrid, trControl=trainControl(method="cv"))
  marsTuned 
## Multivariate Adaptive Regression Spline 
## 
## 252 samples
##  14 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 227, 227, 225, 226, 226, 228, ... 
## Resampling results across tuning parameters:
## 
##   degree  nprune  RMSE      Rsquared   RMSE SD    Rsquared SD
##   1        2      5.044316  0.6727468  0.7809712  0.06100162 
##   1        3      4.739452  0.6913423  0.4963091  0.06238442 
##   1        4      4.642213  0.6997177  0.5786284  0.06984770 
##   1        5      4.492786  0.7231939  0.6247598  0.06356960 
##   1        6      4.611754  0.7101973  0.4411232  0.04914437 
##   1        7      4.700857  0.6976376  0.4835834  0.05916469 
##   1        8      4.842530  0.6843949  0.5581004  0.06305395 
##   1        9      4.854457  0.6837980  0.5803173  0.06065912 
##   1       10      4.772724  0.6902455  0.5790666  0.06540097 
##   1       11      4.752607  0.6920398  0.6326078  0.06894019 
##   1       12      4.768455  0.6898247  0.6587483  0.07838014 
##   2        2      4.874222  0.6724236  0.9228848  0.09590532 
##   2        3      4.793400  0.6942643  0.8705950  0.08542952 
##   2        4      4.514271  0.7220157  0.6064155  0.07186804 
##   2        5      4.603012  0.7144882  0.8138341  0.08028655 
##   2        6      4.538188  0.7219043  0.6415806  0.07048993 
##   2        7      4.640001  0.7027321  0.6793117  0.09698405 
##   2        8      4.672119  0.7008755  0.6675928  0.09665834 
##   2        9      4.692060  0.6985932  0.7567097  0.10397864 
##   2       10      4.661412  0.7002975  0.8252502  0.11361999 
##   2       11      4.623135  0.7023091  0.8642529  0.12150792 
##   2       12      4.693517  0.6970898  0.8320978  0.11842722 
## 
## RMSE was used to select the optimal model using  the smallest value.
## The final values used for the model were nprune = 5 and degree = 1.
  plot(marsTuned) # lowers RMSE plot

  varImp(marsTuned)  # variable importance
## earth variable importance
## 
##         Overall
## abdom   100.000
## thigh     7.835
## hip       7.835
## wrist     7.835
## forearm   0.000
## ankle     0.000
## height    0.000
## age       0.000
## knee      0.000
## neck      0.000
## weight    0.000
## adipos    0.000
## chest     0.000
## biceps    0.000
  head(predict(marsTuned, data0[,-1]))
## [1] 17.78167 10.19753 19.30012 12.21028 26.76206 15.42753