Amazon link for Applied Predictive Models
Book Webpage for Data and Codes
### CHAPTER -3
library(AppliedPredictiveModeling)
data(twoClassData)
str(predictors)
## 'data.frame': 208 obs. of 2 variables:
## $ PredictorA: num 0.158 0.655 0.706 0.199 0.395 ...
## $ PredictorB: num 0.1609 0.4918 0.6333 0.0881 0.4152 ...
str(classes)
## Factor w/ 2 levels "Class1","Class2": 2 2 2 2 2 2 2 2 2 2 ...
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(MASS)
library(e1071)
# Set the random number seed so we can reproduce the results
set.seed(1)
trainingRows <- createDataPartition(classes, p = .80, list= FALSE)
# Subset the data into objects for training using
# integer sub-setting.
trainPredictors <- predictors[trainingRows, ]
trainClasses <- classes[trainingRows]
# Do the same for the test set using negative integers.
testPredictors <- predictors[-trainingRows, ]
testClasses <- classes[-trainingRows]
## Resampling
set.seed(1)
# For illustration, generate the information needed for three
# resampled versions of the training set.
repeatedSplits <- createDataPartition(trainClasses, p = .80, times = 3)
set.seed(1)
cvSplits <- createFolds(trainClasses, k = 10, returnTrain = TRUE)
# Get the first set of row numbers from the list.
fold1 <- cvSplits[[1]]
cvPredictors1 <- trainPredictors[fold1,]
cvClasses1 <- trainClasses[fold1]
nrow(trainPredictors)
## [1] 167
nrow(cvPredictors1)
## [1] 151
data(GermanCredit)
dim(GermanCredit)
## [1] 1000 62
smp <- floor(0.75 * nrow(GermanCredit))
set.seed(1056)
train<- sample(seq_len(nrow(GermanCredit)), size = smp)
GermanCreditTrain <- GermanCredit[train, ]
GermanCreditTest <- GermanCredit[-train, ]
GermanCreditTrain <- GermanCreditTrain[, -c(45, 27)]
nrow(GermanCreditTrain)
## [1] 750
nrow(GermanCreditTest)
## [1] 250
GermanCrTrain <- GermanCreditTrain[,-10]
svmFit
## Support Vector Machines with Radial Basis Function Kernel
##
## 750 samples
## 59 predictor
## 2 classes: 'Bad', 'Good'
##
## Pre-processing: centered, scaled
## Resampling: Cross-Validated (10 fold, repeated 5 times)
##
## Summary of sample sizes: 675, 676, 675, 676, 676, 674, ...
##
## Resampling results across tuning parameters:
##
## C Accuracy Kappa Accuracy SD Kappa SD
## 0.25 0.7018888 0.002515678 0.005863237 0.01244942
## 0.50 0.7405416 0.220277514 0.025376534 0.08637099
## 1.00 0.7634283 0.354748781 0.037828874 0.11130363
## 2.00 0.7586169 0.369256639 0.043017447 0.11137303
## 4.00 0.7575570 0.381431626 0.044844448 0.11041982
## 8.00 0.7591252 0.395678716 0.043805380 0.10415694
## 16.00 0.7447413 0.371788663 0.050686639 0.11296241
## 32.00 0.7367409 0.362353981 0.049554760 0.11205542
## 64.00 0.7265926 0.340618910 0.052166999 0.11731634
## 128.00 0.7252484 0.338827732 0.052349832 0.11661624
##
## Tuning parameter 'sigma' was held constant at a value of 0.009438083
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.009438083 and C = 1.
# A line plot of the average performance
plot(svmFit, scales = list(x = list(log = 2)))
predictedClasses <- predict(svmFit, GermanCreditTest)
## Run SVM by using e1071
svm.model <- svm(Class ~ ., data = GermanCreditTrain)
summary(svm.model)
##
## Call:
## svm(formula = Class ~ ., data = GermanCreditTrain)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
## gamma: 0.01694915
##
## Number of Support Vectors: 515
##
## ( 299 216 )
##
##
## Number of Classes: 2
##
## Levels:
## Bad Good
svm.pred <- predict(svm.model, GermanCreditTest[,-10])
table(pred = svm.pred, true = GermanCreditTest[,10])
## true
## pred Bad Good
## Bad 24 16
## Good 52 158
### CHAPTER- 5
data(solubility)
ls(pattern = "^solT")
## [1] "solTestX" "solTestXtrans" "solTestY" "solTrainX"
## [5] "solTrainXtrans" "solTrainY"
set.seed(2)
sample(names(solTrainX), 8)
## [1] "FP043" "FP160" "FP130" "FP038"
## [5] "NumBonds" "NumNonHAtoms" "FP029" "FP185"
trainingData <- solTrainXtrans
## Add the solubility outcome
trainingData$Solubility <- solTrainY
lmFitAllPredictors <- lm(Solubility ~ ., data = trainingData)
summary(lmFitAllPredictors)
##
## Call:
## lm(formula = Solubility ~ ., data = trainingData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.75620 -0.28304 0.01165 0.30030 1.54887
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.431e+00 2.162e+00 1.124 0.261303
## FP001 3.594e-01 3.185e-01 1.128 0.259635
## FP002 1.456e-01 2.637e-01 0.552 0.580960
## FP003 -3.969e-02 1.314e-01 -0.302 0.762617
## FP004 -3.049e-01 1.371e-01 -2.223 0.026520 *
## FP005 2.837e+00 9.598e-01 2.956 0.003223 **
## FP006 -6.886e-02 2.041e-01 -0.337 0.735917
## FP007 4.044e-02 1.152e-01 0.351 0.725643
## FP008 1.121e-01 1.636e-01 0.685 0.493331
## FP009 -8.242e-01 8.395e-01 -0.982 0.326536
## FP010 4.193e-01 3.136e-01 1.337 0.181579
## FP011 5.158e-02 2.198e-01 0.235 0.814503
## FP012 -1.346e-02 1.611e-01 -0.084 0.933452
## FP013 -4.519e-01 5.473e-01 -0.826 0.409311
## FP014 3.281e-01 4.550e-01 0.721 0.471044
## FP015 -1.839e-01 1.521e-01 -1.209 0.226971
## FP016 -1.367e-01 1.548e-01 -0.883 0.377340
## FP017 -1.704e-01 1.386e-01 -1.230 0.219187
## FP018 -3.824e-01 2.388e-01 -1.602 0.109655
## FP019 -3.131e-01 3.863e-01 -0.811 0.417862
## FP020 2.072e-01 2.135e-01 0.971 0.332078
## FP021 -5.956e-02 2.632e-01 -0.226 0.821060
## FP022 2.336e-01 3.456e-01 0.676 0.499180
## FP023 -3.193e-01 1.909e-01 -1.672 0.094866 .
## FP024 -4.272e-01 2.827e-01 -1.511 0.131162
## FP025 4.376e-01 4.538e-01 0.964 0.335184
## FP026 2.068e-01 2.564e-01 0.806 0.420273
## FP027 2.424e-01 2.429e-01 0.998 0.318594
## FP028 1.070e-01 1.200e-01 0.892 0.372547
## FP029 -9.857e-02 2.199e-01 -0.448 0.654163
## FP030 -2.361e-01 2.468e-01 -0.957 0.339048
## FP031 8.690e-02 1.346e-01 0.646 0.518754
## FP032 -1.204e+00 7.772e-01 -1.550 0.121628
## FP033 5.766e-01 4.236e-01 1.361 0.173882
## FP034 -1.794e-01 2.618e-01 -0.685 0.493486
## FP035 -2.140e-01 1.704e-01 -1.256 0.209605
## FP036 7.701e-02 1.657e-01 0.465 0.642133
## FP037 1.098e-01 1.725e-01 0.636 0.524693
## FP038 2.721e-01 1.888e-01 1.441 0.150030
## FP039 2.011e-02 2.888e-01 0.070 0.944491
## FP040 5.477e-01 1.890e-01 2.898 0.003873 **
## FP041 -4.265e-01 3.004e-01 -1.420 0.156143
## FP042 -9.901e-01 7.078e-01 -1.399 0.162294
## FP043 -3.725e-02 2.096e-01 -0.178 0.859011
## FP044 -3.860e-01 2.184e-01 -1.768 0.077562 .
## FP045 2.120e-01 1.299e-01 1.631 0.103238
## FP046 -3.504e-02 2.733e-01 -0.128 0.898010
## FP047 -1.675e-02 1.414e-01 -0.118 0.905775
## FP048 2.610e-01 2.434e-01 1.073 0.283810
## FP049 1.241e-01 1.971e-01 0.630 0.529036
## FP050 9.087e-03 1.410e-01 0.064 0.948648
## FP051 1.050e-01 2.014e-01 0.521 0.602210
## FP052 -4.569e-01 2.482e-01 -1.841 0.066029 .
## FP053 2.994e-01 2.466e-01 1.214 0.225129
## FP054 2.734e-02 1.829e-01 0.149 0.881229
## FP055 -3.662e-01 1.970e-01 -1.858 0.063530 .
## FP056 -2.961e-01 2.979e-01 -0.994 0.320541
## FP057 -1.002e-01 1.379e-01 -0.727 0.467703
## FP058 3.100e-01 8.074e-01 0.384 0.701129
## FP059 -1.615e-01 1.690e-01 -0.956 0.339514
## FP060 2.350e-01 1.474e-01 1.595 0.111209
## FP061 -6.365e-01 1.440e-01 -4.421 1.13e-05 ***
## FP062 -5.224e-01 2.961e-01 -1.764 0.078078 .
## FP063 -2.001e+00 1.287e+00 -1.554 0.120553
## FP064 2.549e-01 1.221e-01 2.087 0.037207 *
## FP065 -2.844e-01 1.197e-01 -2.377 0.017714 *
## FP066 2.093e-01 1.264e-01 1.655 0.098301 .
## FP067 -1.406e-01 1.540e-01 -0.913 0.361631
## FP068 4.964e-01 2.028e-01 2.447 0.014630 *
## FP069 1.324e-01 8.824e-02 1.501 0.133885
## FP070 3.453e-03 8.088e-02 0.043 0.965963
## FP071 1.474e-01 1.237e-01 1.192 0.233775
## FP072 -9.773e-01 2.763e-01 -3.537 0.000431 ***
## FP073 -4.671e-01 2.072e-01 -2.254 0.024474 *
## FP074 1.793e-01 1.206e-01 1.487 0.137566
## FP075 1.231e-01 1.035e-01 1.188 0.235034
## FP076 5.166e-01 1.704e-01 3.031 0.002525 **
## FP077 1.644e-01 1.236e-01 1.331 0.183739
## FP078 -3.715e-01 1.588e-01 -2.339 0.019608 *
## FP079 4.254e-01 1.881e-01 2.262 0.023992 *
## FP080 3.101e-01 1.554e-01 1.996 0.046340 *
## FP081 -3.208e-01 1.117e-01 -2.873 0.004192 **
## FP082 1.243e-01 9.524e-02 1.305 0.192379
## FP083 -6.916e-01 2.134e-01 -3.241 0.001248 **
## FP084 3.626e-01 2.381e-01 1.523 0.128171
## FP085 -3.310e-01 1.428e-01 -2.317 0.020785 *
## FP086 1.169e-02 9.774e-02 0.120 0.904834
## FP087 4.559e-02 2.797e-01 0.163 0.870568
## FP088 2.416e-01 9.959e-02 2.425 0.015534 *
## FP089 5.999e-01 2.320e-01 2.586 0.009915 **
## FP090 -2.450e-02 1.154e-01 -0.212 0.831930
## FP091 -2.858e-01 3.185e-01 -0.897 0.369847
## FP092 2.665e-01 2.069e-01 1.288 0.198156
## FP093 1.974e-01 1.087e-01 1.816 0.069803 .
## FP094 -1.991e-01 1.441e-01 -1.381 0.167707
## FP095 -1.403e-01 1.124e-01 -1.248 0.212449
## FP096 -5.024e-01 1.459e-01 -3.445 0.000605 ***
## FP097 -2.635e-01 1.666e-01 -1.582 0.114020
## FP098 -2.865e-01 1.633e-01 -1.754 0.079863 .
## FP099 2.592e-01 2.568e-01 1.009 0.313136
## FP100 -4.008e-01 3.034e-01 -1.321 0.186949
## FP101 -1.760e-01 3.019e-01 -0.583 0.560147
## FP102 2.445e-01 3.449e-01 0.709 0.478579
## FP103 -1.493e-01 9.148e-02 -1.632 0.103176
## FP104 -1.428e-01 1.176e-01 -1.214 0.225238
## FP105 -6.912e-02 1.395e-01 -0.495 0.620482
## FP106 1.128e-01 1.288e-01 0.876 0.381495
## FP107 2.778e+00 8.247e-01 3.369 0.000796 ***
## FP108 8.836e-03 1.852e-01 0.048 0.961970
## FP109 8.200e-01 2.267e-01 3.617 0.000319 ***
## FP110 3.680e-01 3.311e-01 1.111 0.266811
## FP111 -5.565e-01 1.420e-01 -3.918 9.80e-05 ***
## FP112 -1.079e-01 2.705e-01 -0.399 0.690108
## FP113 1.511e-01 9.481e-02 1.594 0.111478
## FP114 -1.201e-01 1.891e-01 -0.635 0.525628
## FP115 -1.896e-01 1.405e-01 -1.349 0.177736
## FP116 7.778e-03 1.897e-01 0.041 0.967300
## FP117 2.583e-01 1.779e-01 1.452 0.147070
## FP118 -1.964e-01 1.230e-01 -1.596 0.110940
## FP119 7.515e-01 2.630e-01 2.857 0.004402 **
## FP120 -1.814e-01 1.794e-01 -1.011 0.312362
## FP121 -4.731e-02 3.957e-01 -0.120 0.904866
## FP122 1.048e-01 1.041e-01 1.007 0.314268
## FP123 3.926e-02 1.765e-01 0.222 0.824066
## FP124 1.235e-01 1.705e-01 0.724 0.469243
## FP125 -2.633e-04 1.151e-01 -0.002 0.998175
## FP126 -2.782e-01 1.177e-01 -2.363 0.018373 *
## FP127 -6.123e-01 1.739e-01 -3.521 0.000457 ***
## FP128 -5.424e-01 1.932e-01 -2.807 0.005136 **
## FP129 -6.731e-02 2.243e-01 -0.300 0.764167
## FP130 -1.034e+00 4.106e-01 -2.518 0.012009 *
## FP131 2.158e-01 1.617e-01 1.335 0.182405
## FP132 -1.976e-01 2.382e-01 -0.830 0.406998
## FP133 -1.573e-01 1.217e-01 -1.293 0.196319
## FP134 2.496e+00 1.196e+00 2.086 0.037310 *
## FP135 1.818e-01 1.319e-01 1.379 0.168460
## FP136 -7.763e-02 3.131e-01 -0.248 0.804237
## FP137 -4.613e-02 2.978e-01 -0.155 0.876947
## FP138 -9.392e-02 1.906e-01 -0.493 0.622251
## FP139 7.659e-02 4.063e-01 0.189 0.850517
## FP140 3.145e-01 2.149e-01 1.463 0.143784
## FP141 2.219e-01 2.765e-01 0.802 0.422532
## FP142 6.272e-01 1.488e-01 4.214 2.83e-05 ***
## FP143 9.981e-01 2.929e-01 3.407 0.000692 ***
## FP144 2.207e-01 2.839e-01 0.777 0.437195
## FP145 -1.146e-01 1.188e-01 -0.964 0.335169
## FP146 -2.324e-01 2.086e-01 -1.114 0.265716
## FP147 1.502e-01 1.228e-01 1.223 0.221703
## FP148 -1.600e-01 1.319e-01 -1.213 0.225560
## FP149 1.172e-01 1.650e-01 0.710 0.477770
## FP150 9.046e-02 1.577e-01 0.574 0.566368
## FP151 2.899e-01 3.120e-01 0.929 0.353202
## FP152 -2.544e-01 2.990e-01 -0.851 0.395087
## FP153 -3.765e-01 2.773e-01 -1.358 0.175029
## FP154 -1.027e+00 2.033e-01 -5.054 5.50e-07 ***
## FP155 4.888e-01 2.916e-01 1.676 0.094163 .
## FP156 -3.602e-02 3.636e-01 -0.099 0.921109
## FP157 -4.715e-01 2.468e-01 -1.910 0.056505 .
## FP158 1.669e-02 1.925e-01 0.087 0.930943
## FP159 1.800e-01 2.432e-01 0.740 0.459378
## FP160 1.525e-02 2.177e-01 0.070 0.944155
## FP161 -2.440e-01 1.433e-01 -1.703 0.089063 .
## FP162 4.910e-02 1.859e-01 0.264 0.791710
## FP163 4.785e-01 3.121e-01 1.533 0.125659
## FP164 5.096e-01 1.899e-01 2.684 0.007446 **
## FP165 5.793e-01 2.146e-01 2.700 0.007103 **
## FP166 -6.582e-02 2.185e-01 -0.301 0.763293
## FP167 -6.044e-01 2.515e-01 -2.403 0.016502 *
## FP168 -1.187e-01 1.872e-01 -0.634 0.526173
## FP169 -1.705e-01 8.312e-02 -2.051 0.040650 *
## FP170 -7.902e-02 1.560e-01 -0.506 0.612745
## FP171 4.651e-01 1.186e-01 3.922 9.64e-05 ***
## FP172 -4.426e-01 2.440e-01 -1.814 0.070120 .
## FP173 4.243e-01 1.657e-01 2.561 0.010634 *
## FP174 -1.010e-01 2.098e-01 -0.481 0.630311
## FP175 -4.657e-02 2.481e-01 -0.188 0.851136
## FP176 9.736e-01 2.644e-01 3.682 0.000249 ***
## FP177 1.386e-01 2.393e-01 0.579 0.562538
## FP178 6.497e-02 2.079e-01 0.313 0.754691
## FP179 -3.415e-02 2.232e-01 -0.153 0.878437
## FP180 -7.905e-01 5.523e-01 -1.431 0.152839
## FP181 4.925e-01 3.218e-01 1.531 0.126309
## FP182 -1.124e-01 1.310e-01 -0.858 0.391384
## FP183 2.998e-01 7.143e-01 0.420 0.674836
## FP184 4.876e-01 1.580e-01 3.087 0.002103 **
## FP185 -3.778e-01 2.037e-01 -1.854 0.064108 .
## FP186 -3.654e-01 1.953e-01 -1.871 0.061710 .
## FP187 4.457e-01 2.682e-01 1.662 0.097015 .
## FP188 1.475e-01 1.258e-01 1.172 0.241519
## FP189 -1.984e-02 3.468e-01 -0.057 0.954384
## FP190 2.629e-01 3.018e-01 0.871 0.383981
## FP191 2.799e-01 1.465e-01 1.911 0.056388 .
## FP192 -2.404e-01 2.751e-01 -0.874 0.382534
## FP193 1.502e-01 1.494e-01 1.005 0.315159
## FP194 8.029e-01 6.379e-01 1.259 0.208566
## FP195 5.967e-02 3.435e-01 0.174 0.862158
## FP196 1.091e-02 2.544e-01 0.043 0.965812
## FP197 -3.736e-02 1.569e-01 -0.238 0.811793
## FP198 1.896e-01 2.665e-01 0.712 0.476893
## FP199 -9.932e-02 1.797e-01 -0.553 0.580702
## FP200 -6.421e-02 2.161e-01 -0.297 0.766462
## FP201 -4.838e-01 1.980e-01 -2.444 0.014771 *
## FP202 5.664e-01 1.869e-01 3.031 0.002527 **
## FP203 2.586e-01 6.447e-01 0.401 0.688462
## FP204 -1.371e-01 2.543e-01 -0.539 0.590008
## FP205 7.177e-02 1.561e-01 0.460 0.645857
## FP206 -6.769e-02 1.860e-01 -0.364 0.716094
## FP207 -5.538e-03 2.060e-01 -0.027 0.978560
## FP208 -5.338e-01 6.324e-01 -0.844 0.398925
## MolWeight -1.232e+00 2.296e-01 -5.365 1.09e-07 ***
## NumAtoms -1.478e+01 3.473e+00 -4.257 2.35e-05 ***
## NumNonHAtoms 1.795e+01 3.166e+00 5.670 2.07e-08 ***
## NumBonds 9.843e+00 2.681e+00 3.671 0.000260 ***
## NumNonHBonds -1.030e+01 1.793e+00 -5.746 1.35e-08 ***
## NumMultBonds 2.107e-01 1.754e-01 1.201 0.229990
## NumRotBonds -5.213e-01 1.334e-01 -3.908 0.000102 ***
## NumDblBonds -7.492e-01 3.163e-01 -2.369 0.018111 *
## NumAromaticBonds -2.364e+00 6.232e-01 -3.794 0.000161 ***
## NumHydrogen 8.347e-01 1.880e-01 4.439 1.04e-05 ***
## NumCarbon 1.730e-02 3.763e-01 0.046 0.963335
## NumNitrogen 6.125e+00 3.045e+00 2.011 0.044645 *
## NumOxygen 2.389e+00 4.523e-01 5.283 1.69e-07 ***
## NumSulfer -8.508e+00 3.619e+00 -2.351 0.018994 *
## NumChlorine -7.449e+00 1.989e+00 -3.744 0.000195 ***
## NumHalogen 1.408e+00 2.109e+00 0.668 0.504615
## NumRings 1.276e+00 6.716e-01 1.901 0.057731 .
## HydrophilicFactor 1.099e-02 1.137e-01 0.097 0.922998
## SurfaceArea1 8.825e-02 6.058e-02 1.457 0.145643
## SurfaceArea2 9.555e-02 5.615e-02 1.702 0.089208 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5524 on 722 degrees of freedom
## Multiple R-squared: 0.9446, Adjusted R-squared: 0.9271
## F-statistic: 54.03 on 228 and 722 DF, p-value: < 2.2e-16
lmPred1 <- predict(lmFitAllPredictors, solTestXtrans)
head(lmPred1)
## 20 21 23 25 28 31
## 0.99370933 0.06834627 -0.69877632 0.84796356 -0.16578324 1.40815083
lmValues1 <- data.frame(obs = solTestY, pred = lmPred1)
defaultSummary(lmValues1)
## RMSE Rsquared
## 0.7455802 0.8722236
rlmFitAllPredictors <- rlm(Solubility ~ ., data = trainingData)
ctrl <- trainControl(method = "cv", number = 10)
set.seed(100)
lmFit1 <- train(x = solTrainXtrans, y = solTrainY, method = "lm",
trControl = ctrl)
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient
## fit may be misleading
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient
## fit may be misleading
lmFit1
## Linear Regression
##
## 951 samples
## 228 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
##
## Summary of sample sizes: 856, 857, 855, 856, 856, 855, ...
##
## Resampling results
##
## RMSE Rsquared RMSE SD Rsquared SD
## 0.7210355 0.8768359 0.06998223 0.02467069
##
##
Compiled by: Subasish Das