knn Model
# Get a clean copy of cdc
load("~/Dropbox/RProjects/Module 8/cdc.Rdata")
# Dummify and factorize
ds = dummyVars(~.,data=cdc,fullRank = T)
cdc2 = as.data.frame(predict(ds,cdc))
cdc2$smoke100 = as.factor(cdc2$smoke100)
# Create traind and testd
set.seed(2345)
cdcsample = createDataPartition(cdc2$smoke100,p=.8,list=F)
traind = cdc2[cdcsample,]
testd = cdc2[-cdcsample,]
# Set Structure of Train Process
tc = trainControl(method="cv",number=5)
# Create model
modk1= train(smoke100 ~.,
method = "knn",
preProcess = c("center","scale"),
tuneLength = 40,
trControl=tc,data=traind)
# Examine tuning results
plot(modk1)

str(modk1)
## List of 23
## $ method : chr "knn"
## $ modelInfo :List of 13
## ..$ label : chr "k-Nearest Neighbors"
## ..$ library : NULL
## ..$ loop : NULL
## ..$ type : chr [1:2] "Classification" "Regression"
## ..$ parameters:'data.frame': 1 obs. of 3 variables:
## .. ..$ parameter: Factor w/ 1 level "k": 1
## .. ..$ class : Factor w/ 1 level "numeric": 1
## .. ..$ label : Factor w/ 1 level "#Neighbors": 1
## ..$ grid :function (x, y, len = NULL, search = "grid")
## .. ..- attr(*, "srcref")= 'srcref' int [1:8] 8 26 16 19 26 19 8 16
## .. .. ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x7f7f465ceeb0>
## ..$ fit :function (x, y, wts, param, lev, last, classProbs, ...)
## .. ..- attr(*, "srcref")= 'srcref' int [1:8] 17 25 24 19 25 19 17 24
## .. .. ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x7f7f465ceeb0>
## ..$ predict :function (modelFit, newdata, submodels = NULL)
## .. ..- attr(*, "srcref")= 'srcref' int [1:8] 25 29 33 19 29 19 25 33
## .. .. ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x7f7f465ceeb0>
## ..$ predictors:function (x, ...)
## .. ..- attr(*, "srcref")= 'srcref' int [1:8] 34 32 34 67 32 67 34 34
## .. .. ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x7f7f465ceeb0>
## ..$ tags : chr "Prototype Models"
## ..$ prob :function (modelFit, newdata, submodels = NULL)
## .. ..- attr(*, "srcref")= 'srcref' int [1:8] 36 26 37 61 26 61 36 37
## .. .. ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x7f7f465ceeb0>
## ..$ levels :function (x)
## .. ..- attr(*, "srcref")= 'srcref' int [1:8] 38 28 38 56 28 56 38 38
## .. .. ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x7f7f465ceeb0>
## ..$ sort :function (x)
## .. ..- attr(*, "srcref")= 'srcref' int [1:8] 39 26 39 54 26 54 39 39
## .. .. ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x7f7f465ceeb0>
## $ modelType : chr "Classification"
## $ results :'data.frame': 40 obs. of 5 variables:
## ..$ k : int [1:40] 5 7 9 11 13 15 17 19 21 23 ...
## ..$ Accuracy : num [1:40] 0.557 0.56 0.562 0.569 0.572 ...
## ..$ Kappa : num [1:40] 0.11 0.115 0.119 0.133 0.138 ...
## ..$ AccuracySD: num [1:40] 0.00625 0.00691 0.00656 0.00543 0.00903 ...
## ..$ KappaSD : num [1:40] 0.0131 0.0142 0.0137 0.011 0.0181 ...
## $ pred : NULL
## $ bestTune :'data.frame': 1 obs. of 1 variable:
## ..$ k: int 53
## $ call : language train.formula(form = smoke100 ~ ., data = traind, method = "knn", preProcess = c("center", "scale"), tuneLen| __truncated__
## $ dots : list()
## $ metric : chr "Accuracy"
## $ control :List of 27
## ..$ method : chr "cv"
## ..$ number : num 5
## ..$ repeats : logi NA
## ..$ search : chr "grid"
## ..$ p : num 0.75
## ..$ initialWindow : NULL
## ..$ horizon : num 1
## ..$ fixedWindow : logi TRUE
## ..$ skip : num 0
## ..$ verboseIter : logi FALSE
## ..$ returnData : logi TRUE
## ..$ returnResamp : chr "final"
## ..$ savePredictions : chr "none"
## ..$ classProbs : logi FALSE
## ..$ summaryFunction :function (data, lev = NULL, model = NULL)
## ..$ selectionFunction: chr "best"
## ..$ preProcOptions :List of 6
## .. ..$ thresh : num 0.95
## .. ..$ ICAcomp : num 3
## .. ..$ k : num 5
## .. ..$ freqCut : num 19
## .. ..$ uniqueCut: num 10
## .. ..$ cutoff : num 0.9
## ..$ sampling : NULL
## ..$ index :List of 5
## .. ..$ Fold1: int [1:12801] 1 5 6 7 8 9 10 11 12 14 ...
## .. ..$ Fold2: int [1:12800] 1 2 3 4 5 6 7 8 9 11 ...
## .. ..$ Fold3: int [1:12800] 1 2 3 4 5 6 10 11 12 13 ...
## .. ..$ Fold4: int [1:12802] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..$ Fold5: int [1:12801] 2 3 4 7 8 9 10 11 12 13 ...
## ..$ indexOut :List of 5
## .. ..$ Resample1: int [1:3200] 2 3 4 13 15 17 19 21 25 26 ...
## .. ..$ Resample2: int [1:3201] 10 12 16 20 35 36 48 53 60 66 ...
## .. ..$ Resample3: int [1:3201] 7 8 9 27 28 37 38 42 43 47 ...
## .. ..$ Resample4: int [1:3199] 11 18 22 23 32 33 40 41 46 49 ...
## .. ..$ Resample5: int [1:3200] 1 5 6 14 24 29 30 34 45 50 ...
## ..$ indexFinal : NULL
## ..$ timingSamps : num 0
## ..$ predictionBounds : logi [1:2] FALSE FALSE
## ..$ seeds :List of 6
## .. ..$ : int [1:40] 584201 708596 921924 500185 909341 691136 904158 668331 792488 311745 ...
## .. ..$ : int [1:40] 710116 902092 402103 938440 806844 997466 191750 583127 250591 871748 ...
## .. ..$ : int [1:40] 799263 166522 12490 759589 866546 372306 10524 456192 386774 521737 ...
## .. ..$ : int [1:40] 475705 552751 46672 832001 783601 692225 978331 168496 141220 21487 ...
## .. ..$ : int [1:40] 277383 420028 523184 794690 74468 768453 615879 478539 631760 217087 ...
## .. ..$ : int 305207
## ..$ adaptive :List of 4
## .. ..$ min : num 5
## .. ..$ alpha : num 0.05
## .. ..$ method : chr "gls"
## .. ..$ complete: logi TRUE
## ..$ trim : logi FALSE
## ..$ allowParallel : logi TRUE
## $ finalModel :List of 8
## ..$ learn :List of 2
## .. ..$ y: Factor w/ 2 levels "0","1": 1 2 2 1 1 1 2 1 2 2 ...
## .. .. ..- attr(*, "names")= chr [1:16001] "1" "2" "3" "5" ...
## .. ..$ X: num [1:16001, 1:11] -0.732 -0.732 -0.732 1.366 1.366 ...
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:16001] "X1" "X2" "X3" "X5" ...
## .. .. .. ..$ : chr [1:11] "`genhlth.very good`" "genhlth.good" "genhlth.fair" "genhlth.poor" ...
## ..$ k : int 53
## ..$ theDots : list()
## ..$ xNames : chr [1:11] "`genhlth.very good`" "genhlth.good" "genhlth.fair" "genhlth.poor" ...
## ..$ problemType: chr "Classification"
## ..$ tuneValue :'data.frame': 1 obs. of 1 variable:
## .. ..$ k: int 53
## ..$ obsLevels : chr [1:2] "0" "1"
## .. ..- attr(*, "ordered")= logi FALSE
## ..$ param : list()
## ..- attr(*, "class")= chr "knn3"
## $ preProcess :List of 22
## ..$ dim : int [1:2] 16001 11
## ..$ bc : NULL
## ..$ yj : NULL
## ..$ et : NULL
## ..$ invHyperbolicSine: NULL
## ..$ mean : Named num [1:11] 0.349 0.2817 0.1019 0.0345 0.7433 ...
## .. ..- attr(*, "names")= chr [1:11] "`genhlth.very good`" "genhlth.good" "genhlth.fair" "genhlth.poor" ...
## ..$ std : Named num [1:11] 0.477 0.45 0.302 0.183 0.437 ...
## .. ..- attr(*, "names")= chr [1:11] "`genhlth.very good`" "genhlth.good" "genhlth.fair" "genhlth.poor" ...
## ..$ ranges : NULL
## ..$ rotation : NULL
## ..$ method :List of 3
## .. ..$ center: chr [1:11] "`genhlth.very good`" "genhlth.good" "genhlth.fair" "genhlth.poor" ...
## .. ..$ scale : chr [1:11] "`genhlth.very good`" "genhlth.good" "genhlth.fair" "genhlth.poor" ...
## .. ..$ ignore: chr(0)
## ..$ thresh : num 0.95
## ..$ pcaComp : NULL
## ..$ numComp : NULL
## ..$ ica : NULL
## ..$ wildcards :List of 2
## .. ..$ PCA: chr(0)
## .. ..$ ICA: chr(0)
## ..$ k : num 5
## ..$ knnSummary :function (x, ...)
## ..$ bagImp : NULL
## ..$ median : NULL
## ..$ data : NULL
## ..$ rangeBounds : num [1:2] 0 1
## ..$ call : chr "scrubed"
## ..- attr(*, "class")= chr "preProcess"
## $ trainingData:'data.frame': 16001 obs. of 12 variables:
## ..$ .outcome : Factor w/ 2 levels "0","1": 1 2 2 1 1 1 2 1 2 2 ...
## ..$ genhlth.very good: num [1:16001] 0 0 0 1 1 1 0 0 0 0 ...
## ..$ genhlth.good : num [1:16001] 1 1 1 0 0 0 1 1 0 0 ...
## ..$ genhlth.fair : num [1:16001] 0 0 0 0 0 0 0 0 1 0 ...
## ..$ genhlth.poor : num [1:16001] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ exerany : num [1:16001] 0 0 1 0 1 0 0 1 1 1 ...
## ..$ hlthplan : num [1:16001] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ height : num [1:16001] 70 64 60 61 64 67 65 70 69 70 ...
## ..$ weight : num [1:16001] 175 125 105 150 114 170 150 180 168 170 ...
## ..$ wtdesire : num [1:16001] 175 115 105 130 114 160 130 170 148 170 ...
## ..$ age : num [1:16001] 77 33 49 55 55 45 27 44 62 69 ...
## ..$ gender.f : num [1:16001] 0 1 1 1 1 0 1 0 0 0 ...
## $ resample :'data.frame': 5 obs. of 3 variables:
## ..$ Accuracy: num [1:5] 0.596 0.589 0.592 0.586 0.589
## ..$ Kappa : num [1:5] 0.184 0.169 0.175 0.162 0.169
## ..$ Resample: chr [1:5] "Fold1" "Fold4" "Fold5" "Fold2" ...
## $ resampledCM :'data.frame': 200 obs. of 6 variables:
## ..$ cell1 : num [1:200] 1016 1016 1035 1053 1058 ...
## ..$ cell2 : num [1:200] 673 673 654 636 631 620 591 607 601 596 ...
## ..$ cell3 : num [1:200] 775 769 770 762 771 763 748 747 763 763 ...
## ..$ cell4 : num [1:200] 736 742 741 749 740 748 763 764 748 748 ...
## ..$ k : int [1:200] 5 7 9 11 13 15 17 19 21 23 ...
## ..$ Resample: chr [1:200] "Fold1" "Fold1" "Fold1" "Fold1" ...
## $ perfNames : chr [1:2] "Accuracy" "Kappa"
## $ maximize : logi TRUE
## $ yLimits : NULL
## $ times :List of 3
## ..$ everything: 'proc_time' Named num [1:5] 96.589 0.845 97.479 0 0
## .. ..- attr(*, "names")= chr [1:5] "user.self" "sys.self" "elapsed" "user.child" ...
## ..$ final : 'proc_time' Named num [1:5] 0.053 0 0.052 0 0
## .. ..- attr(*, "names")= chr [1:5] "user.self" "sys.self" "elapsed" "user.child" ...
## ..$ prediction: logi [1:3] NA NA NA
## $ levels : chr [1:2] "0" "1"
## ..- attr(*, "ordered")= logi FALSE
## $ terms :Classes 'terms', 'formula' language smoke100 ~ `genhlth.very good` + genhlth.good + genhlth.fair + genhlth.poor + exerany + hlthplan + height + | __truncated__
## .. ..- attr(*, "variables")= language list(smoke100, `genhlth.very good`, genhlth.good, genhlth.fair, genhlth.poor, exerany, hlthplan, height, wei| __truncated__
## .. ..- attr(*, "factors")= int [1:12, 1:11] 0 1 0 0 0 0 0 0 0 0 ...
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:12] "smoke100" "`genhlth.very good`" "genhlth.good" "genhlth.fair" ...
## .. .. .. ..$ : chr [1:11] "`genhlth.very good`" "genhlth.good" "genhlth.fair" "genhlth.poor" ...
## .. ..- attr(*, "term.labels")= chr [1:11] "`genhlth.very good`" "genhlth.good" "genhlth.fair" "genhlth.poor" ...
## .. ..- attr(*, "order")= int [1:11] 1 1 1 1 1 1 1 1 1 1 ...
## .. ..- attr(*, "intercept")= int 1
## .. ..- attr(*, "response")= int 1
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. ..- attr(*, "predvars")= language list(smoke100, `genhlth.very good`, genhlth.good, genhlth.fair, genhlth.poor, exerany, hlthplan, height, wei| __truncated__
## .. ..- attr(*, "dataClasses")= Named chr [1:12] "factor" "numeric" "numeric" "numeric" ...
## .. .. ..- attr(*, "names")= chr [1:12] "smoke100" "genhlth.very good" "genhlth.good" "genhlth.fair" ...
## $ coefnames : chr [1:11] "`genhlth.very good`" "genhlth.good" "genhlth.fair" "genhlth.poor" ...
## $ xlevels : Named list()
## - attr(*, "class")= chr [1:2] "train" "train.formula"
modk1$bestTune
## k
## 25 53