data <- read.csv('F:/Machine Learning/Data Science/Machine Learning/RF/Cardiotocographic.csv')
str(data)
## 'data.frame': 2126 obs. of 22 variables:
## $ LB : num 120 132 133 134 132 134 134 122 122 122 ...
## $ AC : num 0 0.01 0 0 0.01 0 0 0 0 0 ...
## $ FM : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UC : num 0 0.01 0.01 0.01 0.01 0.01 0.01 0 0 0 ...
## $ DL : num 0 0 0 0 0 0.01 0.01 0 0 0 ...
## $ DS : num 0 0 0 0 0 0 0 0 0 0 ...
## $ DP : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ASTV : num 73 17 16 16 16 26 29 83 84 86 ...
## $ MSTV : num 0.5 2.1 2.1 2.4 2.4 5.9 6.3 0.5 0.5 0.3 ...
## $ ALTV : num 43 0 0 0 0 0 0 6 5 6 ...
## $ MLTV : num 2.4 10.4 13.4 23 19.9 0 0 15.6 13.6 10.6 ...
## $ Width : num 64 130 130 117 117 150 150 68 68 68 ...
## $ Min : num 62 68 68 53 53 50 50 62 62 62 ...
## $ Max : num 126 198 198 170 170 200 200 130 130 130 ...
## $ Nmax : num 2 6 5 11 9 5 6 0 0 1 ...
## $ Nzeros : num 0 1 1 0 0 3 3 0 0 0 ...
## $ Mode : num 120 141 141 137 137 76 71 122 122 122 ...
## $ Mean : num 137 136 135 134 136 107 107 122 122 122 ...
## $ Median : num 121 140 138 137 138 107 106 123 123 123 ...
## $ Variance: num 73 12 13 13 11 170 215 3 3 1 ...
## $ Tendency: num 1 0 0 1 1 0 0 1 1 1 ...
## $ NSP : num 2 1 1 1 1 3 3 3 3 3 ...
#convert numeric to factor
data$NSP <- as.factor(data$NSP)
str(data)
## 'data.frame': 2126 obs. of 22 variables:
## $ LB : num 120 132 133 134 132 134 134 122 122 122 ...
## $ AC : num 0 0.01 0 0 0.01 0 0 0 0 0 ...
## $ FM : num 0 0 0 0 0 0 0 0 0 0 ...
## $ UC : num 0 0.01 0.01 0.01 0.01 0.01 0.01 0 0 0 ...
## $ DL : num 0 0 0 0 0 0.01 0.01 0 0 0 ...
## $ DS : num 0 0 0 0 0 0 0 0 0 0 ...
## $ DP : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ASTV : num 73 17 16 16 16 26 29 83 84 86 ...
## $ MSTV : num 0.5 2.1 2.1 2.4 2.4 5.9 6.3 0.5 0.5 0.3 ...
## $ ALTV : num 43 0 0 0 0 0 0 6 5 6 ...
## $ MLTV : num 2.4 10.4 13.4 23 19.9 0 0 15.6 13.6 10.6 ...
## $ Width : num 64 130 130 117 117 150 150 68 68 68 ...
## $ Min : num 62 68 68 53 53 50 50 62 62 62 ...
## $ Max : num 126 198 198 170 170 200 200 130 130 130 ...
## $ Nmax : num 2 6 5 11 9 5 6 0 0 1 ...
## $ Nzeros : num 0 1 1 0 0 3 3 0 0 0 ...
## $ Mode : num 120 141 141 137 137 76 71 122 122 122 ...
## $ Mean : num 137 136 135 134 136 107 107 122 122 122 ...
## $ Median : num 121 140 138 137 138 107 106 123 123 123 ...
## $ Variance: num 73 12 13 13 11 170 215 3 3 1 ...
## $ Tendency: num 1 0 0 1 1 0 0 1 1 1 ...
## $ NSP : Factor w/ 3 levels "1","2","3": 2 1 1 1 1 3 3 3 3 3 ...
#frequency of class
table(data$NSP)
##
## 1 2 3
## 1655 295 176
There is presence of class imbalance as majority of the observations are referred to as class 1 category. But for the moment we’ll go ahead and build RF model.
#Data Partition
set.seed(2311)
pd <- sample(2, nrow(data), replace = T, prob = c(0.75,0.25))
train <- data[pd==1,]
test <- data[pd==2,]
dim(train)
## [1] 1599 22
dim(test)
## [1] 527 22
#Random Forest
library(randomForest)
## Warning: package 'randomForest' was built under R version 3.5.3
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
#set.seed - just to make it repeatable
set.seed(639)
model.rf <- randomForest(NSP ~ ., data=train)
print(model.rf)
##
## Call:
## randomForest(formula = NSP ~ ., data = train)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 4
##
## OOB estimate of error rate: 6.63%
## Confusion matrix:
## 1 2 3 class.error
## 1 1225 18 6 0.01921537
## 2 56 154 6 0.28703704
## 3 10 10 114 0.14925373
#Prediction on train data
p1 <- predict(model.rf, train)
#actual class
head(train$NSP)
## [1] 2 1 1 3 3 3
## Levels: 1 2 3
#predicted class
head(p1)
## 1 3 5 6 7 9
## 2 1 1 3 3 3
## Levels: 1 2 3
#confusion matrix
library(caret)
## Warning: package 'caret' was built under R version 3.5.2
## Loading required package: lattice
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
confusionMatrix(train$NSP, p1)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 1249 0 0
## 2 1 215 0
## 3 0 0 134
##
## Overall Statistics
##
## Accuracy : 0.9994
## 95% CI : (0.9965, 1)
## No Information Rate : 0.7817
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9983
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.9992 1.0000 1.0000
## Specificity 1.0000 0.9993 1.0000
## Pos Pred Value 1.0000 0.9954 1.0000
## Neg Pred Value 0.9971 1.0000 1.0000
## Prevalence 0.7817 0.1345 0.0838
## Detection Rate 0.7811 0.1345 0.0838
## Detection Prevalence 0.7811 0.1351 0.0838
## Balanced Accuracy 0.9996 0.9996 1.0000
#misclassification error on train data
(tab <- table(train$NSP, p1))
## p1
## 1 2 3
## 1 1249 0 0
## 2 1 215 0
## 3 0 0 134
1-sum(diag(tab))/sum(tab)
## [1] 0.0006253909
#predict test data
p2 <- predict(model.rf, test)
confusionMatrix(test$NSP, p2)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 397 5 4
## 2 24 54 1
## 3 4 1 37
##
## Overall Statistics
##
## Accuracy : 0.926
## 95% CI : (0.9002, 0.9468)
## No Information Rate : 0.8065
## P-Value [Acc > NIR] : 9.606e-15
##
## Kappa : 0.7917
## Mcnemar's Test P-Value : 0.005995
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.9341 0.9000 0.88095
## Specificity 0.9118 0.9465 0.98969
## Pos Pred Value 0.9778 0.6835 0.88095
## Neg Pred Value 0.7686 0.9866 0.98969
## Prevalence 0.8065 0.1139 0.07970
## Detection Rate 0.7533 0.1025 0.07021
## Detection Prevalence 0.7704 0.1499 0.07970
## Balanced Accuracy 0.9229 0.9232 0.93532
#misclassification error on test data
(tab1 <- table(test$NSP, p2))
## p2
## 1 2 3
## 1 397 5 4
## 2 24 54 1
## 3 4 1 37
1-sum(diag(tab1))/sum(tab1)
## [1] 0.0740038
There is about 7.6% misclassification error in my test data.
#Assess error rate on model
#plot
plot(model.rf)
As the number of trees grow, we can see error (OOB error) initially drops down and then it became more or less constant.
#model tune
t <- tuneRF(train[,-22], train[,22],
stepFactor = 0.5,
plot = TRUE,
ntreeTry = 300,
trace = TRUE,
improve = 0.05)
## mtry = 4 OOB error = 6.32%
## Searching left ...
## mtry = 8 OOB error = 6.19%
## 0.01980198 0.05
## Searching right ...
## mtry = 2 OOB error = 7.57%
## -0.1980198 0.05
#tune random forest model
model.rf.tune <- randomForest(NSP ~ ., data=train, ntree=300,
mtry=8,
importance=TRUE,
proximity=TRUE)
print(model.rf.tune)
##
## Call:
## randomForest(formula = NSP ~ ., data = train, ntree = 300, mtry = 8, importance = TRUE, proximity = TRUE)
## Type of random forest: classification
## Number of trees: 300
## No. of variables tried at each split: 8
##
## OOB estimate of error rate: 5.94%
## Confusion matrix:
## 1 2 3 class.error
## 1 1226 18 5 0.01841473
## 2 53 158 5 0.26851852
## 3 7 7 120 0.10447761
Now we can see out-of-baggage error has come down to 6.13% as compared to 6.63% in previous model.
#predict train data based on the tuned model
p3 <- predict(model.rf.tune, train)
confusionMatrix(train$NSP, p3)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 1249 0 0
## 2 1 215 0
## 3 0 0 134
##
## Overall Statistics
##
## Accuracy : 0.9994
## 95% CI : (0.9965, 1)
## No Information Rate : 0.7817
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9983
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.9992 1.0000 1.0000
## Specificity 1.0000 0.9993 1.0000
## Pos Pred Value 1.0000 0.9954 1.0000
## Neg Pred Value 0.9971 1.0000 1.0000
## Prevalence 0.7817 0.1345 0.0838
## Detection Rate 0.7811 0.1345 0.0838
## Detection Prevalence 0.7811 0.1351 0.0838
## Balanced Accuracy 0.9996 0.9996 1.0000
#misclassification eror on train data
(tab3 <- table(train$NSP, p3))
## p3
## 1 2 3
## 1 1249 0 0
## 2 1 215 0
## 3 0 0 134
1-sum(diag(tab3))/sum(tab3)
## [1] 0.0006253909
#predict test data based on the tuned model
p4 <- predict(model.rf.tune, test)
confusionMatrix(test$NSP, p4)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 397 5 4
## 2 24 55 0
## 3 4 1 37
##
## Overall Statistics
##
## Accuracy : 0.9279
## 95% CI : (0.9024, 0.9485)
## No Information Rate : 0.8065
## P-Value [Acc > NIR] : 3.15e-15
##
## Kappa : 0.797
## Mcnemar's Test P-Value : 0.003761
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.9341 0.9016 0.90244
## Specificity 0.9118 0.9485 0.98971
## Pos Pred Value 0.9778 0.6962 0.88095
## Neg Pred Value 0.7686 0.9866 0.99175
## Prevalence 0.8065 0.1157 0.07780
## Detection Rate 0.7533 0.1044 0.07021
## Detection Prevalence 0.7704 0.1499 0.07970
## Balanced Accuracy 0.9229 0.9251 0.94608
#misclassification eror on test data
(tab4 <- table(test$NSP, p4))
## p4
## 1 2 3
## 1 397 5 4
## 2 24 55 0
## 3 4 1 37
1-sum(diag(tab4))/sum(tab4)
## [1] 0.07210626
#size of trees
hist(treesize(model.rf.tune),
main = "No. of Nodes for the Trees",
col = "skyblue")
#Variable Importance
#We can find out which variables play an important role in the model
varImpPlot(model.rf.tune)
#Quantitative values of important variables
importance(model.rf.tune)
## 1 2 3 MeanDecreaseAccuracy
## LB 18.988106 10.0662036 7.3900467 22.496239
## AC 10.684776 5.3569650 3.5495452 11.291522
## FM 7.193225 5.9542216 1.2718967 8.421946
## UC 5.919436 11.6539520 10.9359839 13.991154
## DL 5.633990 -2.0119608 2.4445839 5.714253
## DS 0.000000 0.0000000 0.0000000 0.000000
## DP 0.000000 0.0000000 0.0000000 0.000000
## ASTV 21.438450 35.4955411 30.8953364 37.942666
## MSTV 16.040224 26.3031829 27.9475573 29.968936
## ALTV 31.951966 30.5542305 31.7209229 44.277321
## MLTV 15.864795 10.7044122 9.2014777 20.046501
## Width 14.600477 6.0376623 5.0364253 16.352951
## Min 14.377770 5.8003500 9.6455755 17.209855
## Max 14.106621 4.7542247 3.7610882 16.421261
## Nmax 14.792497 2.8330954 4.9612644 14.463043
## Nzeros 3.793574 5.2360077 0.0846484 6.540763
## Mode 19.300786 10.2548959 9.7619355 22.180935
## Mean 27.327601 10.9449574 25.6151327 35.230992
## Median 16.972071 8.8690557 10.5780053 21.724362
## Variance 15.249323 -0.4641505 8.5916684 14.911443
## Tendency 8.107981 0.9589586 3.0796020 7.886058
## MeanDecreaseGini
## LB 20.3395074
## AC 3.8491874
## FM 6.3986712
## UC 6.9616373
## DL 0.8506429
## DS 0.0000000
## DP 0.0000000
## ASTV 97.7488543
## MSTV 95.9042792
## ALTV 90.5510618
## MLTV 28.6725794
## Width 21.1700252
## Min 20.2322714
## Max 16.3618017
## Nmax 12.9068543
## Nzeros 2.8528819
## Mode 34.3321276
## Mean 71.5040418
## Median 30.9508859
## Variance 15.4577737
## Tendency 3.9763672
varImp(model.rf.tune)
## 1 2 3
## LB 18.988106 10.0662036 7.3900467
## AC 10.684776 5.3569650 3.5495452
## FM 7.193225 5.9542216 1.2718967
## UC 5.919436 11.6539520 10.9359839
## DL 5.633990 -2.0119608 2.4445839
## DS 0.000000 0.0000000 0.0000000
## DP 0.000000 0.0000000 0.0000000
## ASTV 21.438450 35.4955411 30.8953364
## MSTV 16.040224 26.3031829 27.9475573
## ALTV 31.951966 30.5542305 31.7209229
## MLTV 15.864795 10.7044122 9.2014777
## Width 14.600477 6.0376623 5.0364253
## Min 14.377770 5.8003500 9.6455755
## Max 14.106621 4.7542247 3.7610882
## Nmax 14.792497 2.8330954 4.9612644
## Nzeros 3.793574 5.2360077 0.0846484
## Mode 19.300786 10.2548959 9.7619355
## Mean 27.327601 10.9449574 25.6151327
## Median 16.972071 8.8690557 10.5780053
## Variance 15.249323 -0.4641505 8.5916684
## Tendency 8.107981 0.9589586 3.0796020
#Usage of variable in the entire random forest model
varUsed(model.rf.tune)
## [1] 1759 242 747 509 118 0 0 2893 1617 2755 2127 1947 1841 1717
## [15] 1357 458 1653 2097 1828 1352 507
#partial plot using ASTV variable versus class 1
partialPlot(model.rf.tune, train, ASTV, '1')
#Extract single tree from the Forest
getTree(model.rf.tune, 1, labelVar = TRUE)
## left daughter right daughter split var split point status prediction
## 1 2 3 ASTV 58.500 1 <NA>
## 2 4 5 Mean 107.500 1 <NA>
## 3 6 7 ALTV 13.500 1 <NA>
## 4 8 9 Variance 30.500 1 <NA>
## 5 10 11 ALTV 60.500 1 <NA>
## 6 12 13 MLTV 0.050 1 <NA>
## 7 14 15 ALTV 68.500 1 <NA>
## 8 0 0 <NA> 0.000 -1 1
## 9 16 17 MSTV 5.300 1 <NA>
## 10 18 19 Min 137.500 1 <NA>
## 11 0 0 <NA> 0.000 -1 2
## 12 20 21 AC 0.005 1 <NA>
## 13 22 23 Mode 106.000 1 <NA>
## 14 24 25 UC 0.005 1 <NA>
## 15 26 27 ALTV 71.500 1 <NA>
## 16 0 0 <NA> 0.000 -1 3
## 17 0 0 <NA> 0.000 -1 2
## 18 28 29 Mean 148.500 1 <NA>
## 19 30 31 Min 141.500 1 <NA>
## 20 32 33 Min 68.500 1 <NA>
## 21 0 0 <NA> 0.000 -1 1
## 22 34 35 MSTV 2.450 1 <NA>
## 23 36 37 ALTV 3.500 1 <NA>
## 24 38 39 Tendency -0.500 1 <NA>
## 25 40 41 MLTV 5.050 1 <NA>
## 26 42 43 Width 13.000 1 <NA>
## 27 0 0 <NA> 0.000 -1 3
## 28 44 45 ALTV 45.500 1 <NA>
## 29 46 47 Width 70.500 1 <NA>
## 30 48 49 LB 149.500 1 <NA>
## 31 50 51 Min 145.500 1 <NA>
## 32 0 0 <NA> 0.000 -1 3
## 33 52 53 UC 0.005 1 <NA>
## 34 0 0 <NA> 0.000 -1 3
## 35 54 55 Nmax 8.000 1 <NA>
## 36 56 57 LB 151.000 1 <NA>
## 37 58 59 Min 63.500 1 <NA>
## 38 60 61 Nmax 1.500 1 <NA>
## 39 62 63 MSTV 0.750 1 <NA>
## 40 0 0 <NA> 0.000 -1 1
## 41 64 65 Tendency 0.500 1 <NA>
## 42 0 0 <NA> 0.000 -1 1
## 43 0 0 <NA> 0.000 -1 3
## 44 66 67 LB 148.500 1 <NA>
## 45 68 69 Mode 143.000 1 <NA>
## 46 70 71 MLTV 10.850 1 <NA>
## 47 72 73 ASTV 44.500 1 <NA>
## 48 74 75 Mode 151.500 1 <NA>
## 49 0 0 <NA> 0.000 -1 1
## 50 0 0 <NA> 0.000 -1 1
## 51 76 77 Min 150.500 1 <NA>
## 52 0 0 <NA> 0.000 -1 3
## 53 0 0 <NA> 0.000 -1 1
## 54 0 0 <NA> 0.000 -1 3
## 55 0 0 <NA> 0.000 -1 1
## 56 78 79 Min 61.000 1 <NA>
## 57 0 0 <NA> 0.000 -1 2
## 58 80 81 ASTV 74.000 1 <NA>
## 59 82 83 MLTV 5.200 1 <NA>
## 60 84 85 LB 125.500 1 <NA>
## 61 86 87 Median 144.500 1 <NA>
## 62 88 89 Max 174.000 1 <NA>
## 63 90 91 LB 138.500 1 <NA>
## 64 92 93 Width 18.000 1 <NA>
## 65 94 95 Median 150.000 1 <NA>
## 66 96 97 FM 0.300 1 <NA>
## 67 98 99 Mean 136.500 1 <NA>
## 68 0 0 <NA> 0.000 -1 1
## 69 0 0 <NA> 0.000 -1 2
## 70 0 0 <NA> 0.000 -1 1
## 71 100 101 MLTV 12.250 1 <NA>
## 72 102 103 Min 100.000 1 <NA>
## 73 104 105 ALTV 9.000 1 <NA>
## 74 0 0 <NA> 0.000 -1 1
## 75 106 107 Variance 4.000 1 <NA>
## 76 108 109 Width 17.000 1 <NA>
## 77 110 111 Median 164.000 1 <NA>
## 78 112 113 LB 133.500 1 <NA>
## 79 114 115 Mean 99.000 1 <NA>
## 80 0 0 <NA> 0.000 -1 2
## 81 0 0 <NA> 0.000 -1 3
## 82 0 0 <NA> 0.000 -1 1
## 83 116 117 MSTV 0.450 1 <NA>
## 84 0 0 <NA> 0.000 -1 3
## 85 0 0 <NA> 0.000 -1 1
## 86 0 0 <NA> 0.000 -1 2
## 87 0 0 <NA> 0.000 -1 3
## 88 118 119 ASTV 78.500 1 <NA>
## 89 120 121 FM 0.010 1 <NA>
## 90 0 0 <NA> 0.000 -1 1
## 91 0 0 <NA> 0.000 -1 2
## 92 0 0 <NA> 0.000 -1 1
## 93 122 123 Mode 138.500 1 <NA>
## 94 0 0 <NA> 0.000 -1 1
## 95 0 0 <NA> 0.000 -1 2
## 96 124 125 Max 191.500 1 <NA>
## 97 126 127 MSTV 2.250 1 <NA>
## 98 0 0 <NA> 0.000 -1 2
## 99 128 129 Width 149.500 1 <NA>
## 100 130 131 ASTV 41.000 1 <NA>
## 101 0 0 <NA> 0.000 -1 1
## 102 0 0 <NA> 0.000 -1 1
## 103 0 0 <NA> 0.000 -1 2
## 104 132 133 LB 151.500 1 <NA>
## 105 134 135 MSTV 0.700 1 <NA>
## 106 0 0 <NA> 0.000 -1 2
## 107 136 137 Median 160.500 1 <NA>
## 108 0 0 <NA> 0.000 -1 1
## 109 0 0 <NA> 0.000 -1 2
## 110 138 139 Mode 160.500 1 <NA>
## 111 0 0 <NA> 0.000 -1 1
## 112 0 0 <NA> 0.000 -1 3
## 113 0 0 <NA> 0.000 -1 1
## 114 0 0 <NA> 0.000 -1 3
## 115 140 141 LB 142.500 1 <NA>
## 116 142 143 Median 147.500 1 <NA>
## 117 144 145 MLTV 10.100 1 <NA>
## 118 146 147 Width 30.500 1 <NA>
## 119 148 149 Min 128.500 1 <NA>
## 120 0 0 <NA> 0.000 -1 3
## 121 0 0 <NA> 0.000 -1 2
## 122 150 151 Min 125.000 1 <NA>
## 123 0 0 <NA> 0.000 -1 2
## 124 152 153 Mean 109.500 1 <NA>
## 125 154 155 ASTV 52.500 1 <NA>
## 126 0 0 <NA> 0.000 -1 1
## 127 0 0 <NA> 0.000 -1 2
## 128 0 0 <NA> 0.000 -1 2
## 129 0 0 <NA> 0.000 -1 1
## 130 0 0 <NA> 0.000 -1 1
## 131 0 0 <NA> 0.000 -1 2
## 132 0 0 <NA> 0.000 -1 1
## 133 0 0 <NA> 0.000 -1 2
## 134 0 0 <NA> 0.000 -1 1
## 135 0 0 <NA> 0.000 -1 2
## 136 0 0 <NA> 0.000 -1 1
## 137 0 0 <NA> 0.000 -1 2
## 138 0 0 <NA> 0.000 -1 1
## 139 0 0 <NA> 0.000 -1 2
## 140 156 157 Variance 1.500 1 <NA>
## 141 158 159 MSTV 0.550 1 <NA>
## 142 0 0 <NA> 0.000 -1 1
## 143 160 161 Width 32.000 1 <NA>
## 144 162 163 ASTV 59.500 1 <NA>
## 145 0 0 <NA> 0.000 -1 3
## 146 0 0 <NA> 0.000 -1 2
## 147 164 165 Min 119.000 1 <NA>
## 148 166 167 ALTV 41.500 1 <NA>
## 149 0 0 <NA> 0.000 -1 3
## 150 0 0 <NA> 0.000 -1 2
## 151 0 0 <NA> 0.000 -1 1
## 152 168 169 Median 117.000 1 <NA>
## 153 0 0 <NA> 0.000 -1 1
## 154 0 0 <NA> 0.000 -1 1
## 155 0 0 <NA> 0.000 -1 2
## 156 170 171 MLTV 7.200 1 <NA>
## 157 0 0 <NA> 0.000 -1 1
## 158 0 0 <NA> 0.000 -1 2
## 159 0 0 <NA> 0.000 -1 1
## 160 0 0 <NA> 0.000 -1 2
## 161 0 0 <NA> 0.000 -1 1
## 162 0 0 <NA> 0.000 -1 1
## 163 172 173 ALTV 10.500 1 <NA>
## 164 174 175 Width 33.500 1 <NA>
## 165 176 177 Width 35.500 1 <NA>
## 166 0 0 <NA> 0.000 -1 3
## 167 0 0 <NA> 0.000 -1 2
## 168 0 0 <NA> 0.000 -1 1
## 169 0 0 <NA> 0.000 -1 3
## 170 0 0 <NA> 0.000 -1 3
## 171 0 0 <NA> 0.000 -1 1
## 172 0 0 <NA> 0.000 -1 2
## 173 0 0 <NA> 0.000 -1 1
## 174 0 0 <NA> 0.000 -1 3
## 175 0 0 <NA> 0.000 -1 2
## 176 0 0 <NA> 0.000 -1 2
## 177 178 179 Mean 145.500 1 <NA>
## 178 0 0 <NA> 0.000 -1 2
## 179 180 181 Max 158.000 1 <NA>
## 180 0 0 <NA> 0.000 -1 1
## 181 182 183 Width 37.000 1 <NA>
## 182 0 0 <NA> 0.000 -1 1
## 183 0 0 <NA> 0.000 -1 2
#Multi-dimensional scaling plot of proximity matrix
MDSplot(model.rf.tune, train$NSP)