This document contains examples of each machine learning algorithm leveraging the packages and data sets available in R.
Two data sets will be used in these examples; longley and iris.
The longley dataset describes 7 economic variables observed from 1947 to 1962 used to predict the number of people employed yearly.
The iris dataset describes the measurements of iris flowers and requires classification of each observation to one of three flower species.
Regression: “Predict values”
Forecast the future by estimating the relationship between variables.
-Estimate product demand
-Predict sales figures
-Analyze marketing returns
Anomaly Detection: “Find unusual occurrences”
Identify and predict rare or unusual data points.
-Predict credit risk
-Detect fraud
-Catch abnormal equipment readings
Clustering: “Discover structure”
Separate similar data points into intuitive groups.
-Perform customer segmentation
-Predict customer tastes
-Determine market price
Classification: “Predict Categories”
Identify what category new information belongs in
Two-Class Classification (Binary, simple) Is this tweet positive?
Will this customer renew?
Which of two coupons draws more customers?
Multi-Class Classification (Complex) What is the mood of this tweet?
Which service will this customer choose?
Which of several promotions draws more customers?
###########################LINEAR REGRESSION MODELS##############################
#################################################################################
##############################Ordinary Least Squares Regression##################
# load data
data(longley)
# fit model
fit <- lm(Employed~., longley)
# summarize the fit
summary(fit)
##
## Call:
## lm(formula = Employed ~ ., data = longley)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41011 -0.15767 -0.02816 0.10155 0.45539
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.482e+03 8.904e+02 -3.911 0.003560 **
## GNP.deflator 1.506e-02 8.492e-02 0.177 0.863141
## GNP -3.582e-02 3.349e-02 -1.070 0.312681
## Unemployed -2.020e-02 4.884e-03 -4.136 0.002535 **
## Armed.Forces -1.033e-02 2.143e-03 -4.822 0.000944 ***
## Population -5.110e-02 2.261e-01 -0.226 0.826212
## Year 1.829e+00 4.555e-01 4.016 0.003037 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3049 on 9 degrees of freedom
## Multiple R-squared: 0.9955, Adjusted R-squared: 0.9925
## F-statistic: 330.3 on 6 and 9 DF, p-value: 4.984e-10
# make predictions
predictions <- predict(fit, longley)
# summarize accuracy
mse <- mean((longley$Employed - predictions)^2)
print(mse)
## [1] 0.0522765
############################Stepwize Linear Regression##########################
# load data
data(longley)
# fit model
base <- lm(Employed~., longley)
# summarize the fit
summary(base)
##
## Call:
## lm(formula = Employed ~ ., data = longley)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41011 -0.15767 -0.02816 0.10155 0.45539
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.482e+03 8.904e+02 -3.911 0.003560 **
## GNP.deflator 1.506e-02 8.492e-02 0.177 0.863141
## GNP -3.582e-02 3.349e-02 -1.070 0.312681
## Unemployed -2.020e-02 4.884e-03 -4.136 0.002535 **
## Armed.Forces -1.033e-02 2.143e-03 -4.822 0.000944 ***
## Population -5.110e-02 2.261e-01 -0.226 0.826212
## Year 1.829e+00 4.555e-01 4.016 0.003037 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3049 on 9 degrees of freedom
## Multiple R-squared: 0.9955, Adjusted R-squared: 0.9925
## F-statistic: 330.3 on 6 and 9 DF, p-value: 4.984e-10
# perform step-wise feature selection
fit <- step(base)
## Start: AIC=-33.22
## Employed ~ GNP.deflator + GNP + Unemployed + Armed.Forces + Population +
## Year
##
## Df Sum of Sq RSS AIC
## - GNP.deflator 1 0.00292 0.83935 -35.163
## - Population 1 0.00475 0.84117 -35.129
## - GNP 1 0.10631 0.94273 -33.305
## <none> 0.83642 -33.219
## - Year 1 1.49881 2.33524 -18.792
## - Unemployed 1 1.59014 2.42656 -18.178
## - Armed.Forces 1 2.16091 2.99733 -14.798
##
## Step: AIC=-35.16
## Employed ~ GNP + Unemployed + Armed.Forces + Population + Year
##
## Df Sum of Sq RSS AIC
## - Population 1 0.01933 0.8587 -36.799
## <none> 0.8393 -35.163
## - GNP 1 0.14637 0.9857 -34.592
## - Year 1 1.52725 2.3666 -20.578
## - Unemployed 1 2.18989 3.0292 -16.628
## - Armed.Forces 1 2.39752 3.2369 -15.568
##
## Step: AIC=-36.8
## Employed ~ GNP + Unemployed + Armed.Forces + Year
##
## Df Sum of Sq RSS AIC
## <none> 0.8587 -36.799
## - GNP 1 0.4647 1.3234 -31.879
## - Year 1 1.8980 2.7567 -20.137
## - Armed.Forces 1 2.3806 3.2393 -17.556
## - Unemployed 1 4.0491 4.9077 -10.908
# summarize the selected model
summary(fit)
##
## Call:
## lm(formula = Employed ~ GNP + Unemployed + Armed.Forces + Year,
## data = longley)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.42165 -0.12457 -0.02416 0.08369 0.45268
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.599e+03 7.406e+02 -4.859 0.000503 ***
## GNP -4.019e-02 1.647e-02 -2.440 0.032833 *
## Unemployed -2.088e-02 2.900e-03 -7.202 1.75e-05 ***
## Armed.Forces -1.015e-02 1.837e-03 -5.522 0.000180 ***
## Year 1.887e+00 3.828e-01 4.931 0.000449 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2794 on 11 degrees of freedom
## Multiple R-squared: 0.9954, Adjusted R-squared: 0.9937
## F-statistic: 589.8 on 4 and 11 DF, p-value: 9.5e-13
# make predictions
predictions <- predict(fit, longley)
# summarize accuracy
mse <- mean((longley$Employed - predictions)^2)
print(mse)
## [1] 0.05366753
######################Principal Component Regression############################
library(pls)
## Warning: package 'pls' was built under R version 3.3.3
##
## Attaching package: 'pls'
## The following object is masked from 'package:stats':
##
## loadings
# load data
data(longley)
# fit model
fit <- pcr(Employed~., data=longley, validation="CV")
# summarize the fit
summary(fit)
## Data: X dimension: 16 6
## Y dimension: 16 1
## Fit method: svdpc
## Number of components considered: 6
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 3.627 1.841 1.323 0.5357 0.6275 0.6092 0.4271
## adjCV 3.627 1.817 1.307 0.5284 0.6133 0.5940 0.4152
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## X 64.96 94.90 99.99 100.00 100.00 100.00
## Employed 78.42 89.73 98.51 98.56 98.83 99.55
# make predictions
predictions <- predict(fit, longley, ncomp=6)
# summarize accuracy
mse <- mean((longley$Employed - predictions)^2)
print(mse)
## [1] 0.0522765
########################Partial Least Squares Regression########################
# load the package
library(pls)
# load data
data(longley)
# fit model
fit <- plsr(Employed~., data=longley, validation="CV")
# summarize the fit
summary(fit)
## Data: X dimension: 16 6
## Y dimension: 16 1
## Fit method: kernelpls
## Number of components considered: 6
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 3.627 1.401 1.115 0.5717 0.6403 0.4925 0.4311
## adjCV 3.627 1.379 1.102 0.5608 0.6234 0.4831 0.4180
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## X 63.88 93.35 99.99 100.00 100.00 100.00
## Employed 87.91 93.70 98.51 98.65 99.16 99.55
# make predictions
predictions <- predict(fit, longley, ncomp=6)
# summarize accuracy
mse <- mean((longley$Employed - predictions)^2)
print(mse)
## [1] 0.0522765
#########################NON-LINEAR REGRESSION MODELS############################
#################################################################################
################# Multivariate Adaptive Regression Spilines #####################
# load the package
library(earth)
## Warning: package 'earth' was built under R version 3.3.3
## Loading required package: plotmo
## Warning: package 'plotmo' was built under R version 3.3.3
## Loading required package: plotrix
## Warning: package 'plotrix' was built under R version 3.3.3
## Loading required package: TeachingDemos
## Warning: package 'TeachingDemos' was built under R version 3.3.3
# load data
data(longley)
# fit model
fit <- earth(Employed~., longley)
# summarize the fit
summary(fit)
## Call: earth(formula=Employed~., data=longley)
##
## coefficients
## (Intercept) -1682.60259
## Year 0.89475
## h(293.6-Unemployed) 0.01226
## h(Unemployed-293.6) -0.01596
## h(Armed.Forces-263.7) -0.01470
##
## Selected 5 of 8 terms, and 3 of 6 predictors
## Termination condition: GRSq -Inf at 8 terms
## Importance: Year, Unemployed, Armed.Forces, GNP.deflator-unused, ...
## Number of terms at each degree of interaction: 1 4 (additive model)
## GCV 0.2389853 RSS 0.7318924 GRSq 0.9818348 RSq 0.996044
# summarize the importance of input variables
evimp(fit)
## nsubsets gcv rss
## Year 4 100.0 100.0
## Unemployed 3 24.1 23.0
## Armed.Forces 2 10.4 10.8
# make predictions
predictions <- predict(fit, longley)
# summarize accuracy
mse <- mean((longley$Employed - predictions)^2)
print(mse)
## [1] 0.04574327
################## SVM #######################
# load the package
library(kernlab)
# load data
data(longley)
# fit model
fit <- ksvm(Employed~., longley)
# summarize the fit
summary(fit)
## Length Class Mode
## 1 ksvm S4
# make predictions
predictions <- predict(fit, longley)
# summarize accuracy
mse <- mean((longley$Employed - predictions)^2)
print(mse)
## [1] 0.1351403
###########################k-Nearest Neighbor###############################
# load the package
library(caret)
## Warning: package 'caret' was built under R version 3.3.3
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.3.3
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:kernlab':
##
## alpha
##
## Attaching package: 'caret'
## The following object is masked from 'package:pls':
##
## R2
# load data
data(longley)
# fit model
fit <- knnreg(longley[,1:6], longley[,7], k=3)
# summarize the fit
summary(fit)
## Length Class Mode
## learn 2 -none- list
## k 1 -none- numeric
## theDots 0 -none- list
# make predictions
predictions <- predict(fit, longley[,1:6])
# summarize accuracy
mse <- mean((longley$Employed - predictions)^2)
print(mse)
## [1] 0.9259962
########################## Neural Network ################################
library(nnet)
# load data
data(longley)
x <- longley[,1:6]
y <- longley[,7]
# fit model
fit <- nnet(Employed~., longley, size=12, maxit=500, linout=T, decay=0.01)
## # weights: 97
## initial value 72836.075278
## iter 10 value 318.446884
## iter 20 value 223.138644
## iter 30 value 55.731195
## iter 40 value 42.912137
## iter 50 value 32.278811
## iter 60 value 22.312866
## iter 70 value 12.983179
## iter 80 value 9.509902
## iter 90 value 9.280529
## iter 100 value 9.195147
## iter 110 value 8.409815
## iter 120 value 8.332759
## iter 130 value 8.145912
## iter 140 value 7.042335
## iter 150 value 6.532398
## iter 160 value 6.492506
## iter 170 value 6.278927
## iter 180 value 6.179457
## iter 190 value 6.146584
## iter 200 value 6.017004
## iter 210 value 5.264058
## iter 220 value 5.012772
## iter 230 value 4.931220
## iter 240 value 4.823919
## iter 250 value 4.806420
## iter 260 value 4.805233
## iter 270 value 4.565142
## iter 280 value 4.517023
## iter 290 value 4.513466
## iter 300 value 4.508987
## iter 310 value 4.365616
## iter 320 value 4.120186
## iter 330 value 4.076938
## iter 340 value 4.066887
## iter 350 value 4.059309
## iter 360 value 4.055786
## iter 370 value 4.053395
## iter 380 value 4.034264
## iter 390 value 4.004369
## iter 400 value 3.992093
## iter 410 value 3.979057
## iter 420 value 3.968426
## iter 430 value 3.956760
## iter 440 value 3.942373
## iter 450 value 3.928397
## iter 460 value 3.915889
## iter 470 value 3.902728
## iter 480 value 3.899292
## iter 490 value 3.897474
## iter 500 value 3.895324
## final value 3.895324
## stopped after 500 iterations
# summarize the fit
summary(fit)
## a 6-12-1 network with 97 weights
## options were - linear output units decay=0.01
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1
## 0.00 0.25 0.02 -0.03 -0.06 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2
## 0.00 0.61 -0.14 -0.04 0.29 0.05 -0.03
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3
## 0.00 0.02 0.18 -0.04 0.01 0.01 -0.01
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4
## 0.00 1.15 -0.10 0.00 -0.02 0.20 -0.05
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5
## 0.00 0.00 -0.01 -0.03 -0.01 0.00 0.02
## b->h6 i1->h6 i2->h6 i3->h6 i4->h6 i5->h6 i6->h6
## 0.00 0.00 -0.02 0.05 0.01 0.00 0.00
## b->h7 i1->h7 i2->h7 i3->h7 i4->h7 i5->h7 i6->h7
## 0.00 -0.53 0.09 -0.01 0.00 0.14 0.00
## b->h8 i1->h8 i2->h8 i3->h8 i4->h8 i5->h8 i6->h8
## 0.00 -0.85 0.08 -0.01 -0.01 0.83 -0.02
## b->h9 i1->h9 i2->h9 i3->h9 i4->h9 i5->h9 i6->h9
## 0.00 -0.01 0.02 -0.02 0.01 0.06 0.00
## b->h10 i1->h10 i2->h10 i3->h10 i4->h10 i5->h10 i6->h10
## 0.00 0.05 -0.09 0.11 0.18 -0.02 -0.01
## b->h11 i1->h11 i2->h11 i3->h11 i4->h11 i5->h11 i6->h11
## 0.00 0.00 0.00 0.00 0.03 0.00 0.01
## b->h12 i1->h12 i2->h12 i3->h12 i4->h12 i5->h12 i6->h12
## 0.00 0.02 0.07 -0.01 -0.04 0.00 0.00
## b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o h8->o h9->o
## 5.58 5.53 5.56 5.58 5.27 5.58 5.58 3.95 5.55 5.57
## h10->o h11->o h12->o
## 5.58 5.61 5.59
# make predictions
predictions <- predict(fit, x, type="raw")
# summarize accuracy
mse <- mean((y - predictions)^2)
print(mse)
## [1] 0.0002531017
###################### Non-Linear Classification with Decision Trees ###############
####################################################################################
################### Classification and Regression Trees (CART) #####################
#Classification and Regression Trees (CART) split attributes based on values that minimize a loss function, such as sum of squared errors.
# load the package
library(rpart)
# load data
data(iris)
# fit model
fit <- rpart(Species~., data=iris)
# summarize the fit
summary(fit)
## Call:
## rpart(formula = Species ~ ., data = iris)
## n= 150
##
## CP nsplit rel error xerror xstd
## 1 0.50 0 1.00 1.13 0.0527952
## 2 0.44 1 0.50 0.60 0.0600000
## 3 0.01 2 0.06 0.11 0.0319270
##
## Variable importance
## Petal.Width Petal.Length Sepal.Length Sepal.Width
## 34 31 21 14
##
## Node number 1: 150 observations, complexity param=0.5
## predicted class=setosa expected loss=0.6666667 P(node) =1
## class counts: 50 50 50
## probabilities: 0.333 0.333 0.333
## left son=2 (50 obs) right son=3 (100 obs)
## Primary splits:
## Petal.Length < 2.45 to the left, improve=50.00000, (0 missing)
## Petal.Width < 0.8 to the left, improve=50.00000, (0 missing)
## Sepal.Length < 5.45 to the left, improve=34.16405, (0 missing)
## Sepal.Width < 3.35 to the right, improve=19.03851, (0 missing)
## Surrogate splits:
## Petal.Width < 0.8 to the left, agree=1.000, adj=1.00, (0 split)
## Sepal.Length < 5.45 to the left, agree=0.920, adj=0.76, (0 split)
## Sepal.Width < 3.35 to the right, agree=0.833, adj=0.50, (0 split)
##
## Node number 2: 50 observations
## predicted class=setosa expected loss=0 P(node) =0.3333333
## class counts: 50 0 0
## probabilities: 1.000 0.000 0.000
##
## Node number 3: 100 observations, complexity param=0.44
## predicted class=versicolor expected loss=0.5 P(node) =0.6666667
## class counts: 0 50 50
## probabilities: 0.000 0.500 0.500
## left son=6 (54 obs) right son=7 (46 obs)
## Primary splits:
## Petal.Width < 1.75 to the left, improve=38.969400, (0 missing)
## Petal.Length < 4.75 to the left, improve=37.353540, (0 missing)
## Sepal.Length < 6.15 to the left, improve=10.686870, (0 missing)
## Sepal.Width < 2.45 to the left, improve= 3.555556, (0 missing)
## Surrogate splits:
## Petal.Length < 4.75 to the left, agree=0.91, adj=0.804, (0 split)
## Sepal.Length < 6.15 to the left, agree=0.73, adj=0.413, (0 split)
## Sepal.Width < 2.95 to the left, agree=0.67, adj=0.283, (0 split)
##
## Node number 6: 54 observations
## predicted class=versicolor expected loss=0.09259259 P(node) =0.36
## class counts: 0 49 5
## probabilities: 0.000 0.907 0.093
##
## Node number 7: 46 observations
## predicted class=virginica expected loss=0.02173913 P(node) =0.3066667
## class counts: 0 1 45
## probabilities: 0.000 0.022 0.978
# make predictions
predictions <- predict(fit, iris[,1:4], type="class")
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 49 5
## virginica 0 1 45
####################################### C4.5 ######################################
#The C4.5 algorithm is an extension of the ID3 algorithm and constructs a decision tree to maximize information gain (difference in entropy).
# load the package
#library(RWeka)
# load data
#data(iris)
# fit model
#fit <- J48(Species~., data=iris)
# summarize the fit
#summary(fit)
# make predictions
#predictions <- predict(fit, iris[,1:4])
# summarize accuracy
#table(predictions, iris$Species)
###################################### PART #####################################
#PART is a rule system that creates pruned C4.5 decision trees for the data set and extracts rules and those instances that are covered by the rules are removed from the training data. The process is repeated until all instances are covered by extracted rules.
# load the package
#library(RWeka)
# load data
#data(iris)
# fit model
#fit <- PART(Species~., data=iris)
# summarize the fit
#summary(fit)
# make predictions
#predictions <- predict(fit, iris[,1:4])
# summarize accuracy
#table(predictions, iris$Species)
################################# Bagging CART ###################################
#Bootstrapped Aggregation (Bagging) is an ensemble method that creates multiple models of the same type from different sub-samples of the same dataset. The predictions from each separate model are combined together to provide a superior result. This approach has shown participially effective for high-variance methods such as decision trees.
# load the package
library(ipred)
## Warning: package 'ipred' was built under R version 3.3.3
# load data
data(iris)
# fit model
fit <- bagging(Species~., data=iris)
# summarize the fit
summary(fit)
##
## Bagging classification trees with 25 bootstrap replications
##
## Call: bagging.data.frame(formula = Species ~ ., data = iris)
## $y
## [1] setosa setosa setosa setosa setosa setosa
## [7] setosa setosa setosa setosa setosa setosa
## [13] setosa setosa setosa setosa setosa setosa
## [19] setosa setosa setosa setosa setosa setosa
## [25] setosa setosa setosa setosa setosa setosa
## [31] setosa setosa setosa setosa setosa setosa
## [37] setosa setosa setosa setosa setosa setosa
## [43] setosa setosa setosa setosa setosa setosa
## [49] setosa setosa versicolor versicolor versicolor versicolor
## [55] versicolor versicolor versicolor versicolor versicolor versicolor
## [61] versicolor versicolor versicolor versicolor versicolor versicolor
## [67] versicolor versicolor versicolor versicolor versicolor versicolor
## [73] versicolor versicolor versicolor versicolor versicolor versicolor
## [79] versicolor versicolor versicolor versicolor versicolor versicolor
## [85] versicolor versicolor versicolor versicolor versicolor versicolor
## [91] versicolor versicolor versicolor versicolor versicolor versicolor
## [97] versicolor versicolor versicolor versicolor virginica virginica
## [103] virginica virginica virginica virginica virginica virginica
## [109] virginica virginica virginica virginica virginica virginica
## [115] virginica virginica virginica virginica virginica virginica
## [121] virginica virginica virginica virginica virginica virginica
## [127] virginica virginica virginica virginica virginica virginica
## [133] virginica virginica virginica virginica virginica virginica
## [139] virginica virginica virginica virginica virginica virginica
## [145] virginica virginica virginica virginica virginica virginica
## Levels: setosa versicolor virginica
##
## $X
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.1 3.5 1.4 0.2
## 2 4.9 3.0 1.4 0.2
## 3 4.7 3.2 1.3 0.2
## 4 4.6 3.1 1.5 0.2
## 5 5.0 3.6 1.4 0.2
## 6 5.4 3.9 1.7 0.4
## 7 4.6 3.4 1.4 0.3
## 8 5.0 3.4 1.5 0.2
## 9 4.4 2.9 1.4 0.2
## 10 4.9 3.1 1.5 0.1
## 11 5.4 3.7 1.5 0.2
## 12 4.8 3.4 1.6 0.2
## 13 4.8 3.0 1.4 0.1
## 14 4.3 3.0 1.1 0.1
## 15 5.8 4.0 1.2 0.2
## 16 5.7 4.4 1.5 0.4
## 17 5.4 3.9 1.3 0.4
## 18 5.1 3.5 1.4 0.3
## 19 5.7 3.8 1.7 0.3
## 20 5.1 3.8 1.5 0.3
## 21 5.4 3.4 1.7 0.2
## 22 5.1 3.7 1.5 0.4
## 23 4.6 3.6 1.0 0.2
## 24 5.1 3.3 1.7 0.5
## 25 4.8 3.4 1.9 0.2
## 26 5.0 3.0 1.6 0.2
## 27 5.0 3.4 1.6 0.4
## 28 5.2 3.5 1.5 0.2
## 29 5.2 3.4 1.4 0.2
## 30 4.7 3.2 1.6 0.2
## 31 4.8 3.1 1.6 0.2
## 32 5.4 3.4 1.5 0.4
## 33 5.2 4.1 1.5 0.1
## 34 5.5 4.2 1.4 0.2
## 35 4.9 3.1 1.5 0.2
## 36 5.0 3.2 1.2 0.2
## 37 5.5 3.5 1.3 0.2
## 38 4.9 3.6 1.4 0.1
## 39 4.4 3.0 1.3 0.2
## 40 5.1 3.4 1.5 0.2
## 41 5.0 3.5 1.3 0.3
## 42 4.5 2.3 1.3 0.3
## 43 4.4 3.2 1.3 0.2
## 44 5.0 3.5 1.6 0.6
## 45 5.1 3.8 1.9 0.4
## 46 4.8 3.0 1.4 0.3
## 47 5.1 3.8 1.6 0.2
## 48 4.6 3.2 1.4 0.2
## 49 5.3 3.7 1.5 0.2
## 50 5.0 3.3 1.4 0.2
## 51 7.0 3.2 4.7 1.4
## 52 6.4 3.2 4.5 1.5
## 53 6.9 3.1 4.9 1.5
## 54 5.5 2.3 4.0 1.3
## 55 6.5 2.8 4.6 1.5
## 56 5.7 2.8 4.5 1.3
## 57 6.3 3.3 4.7 1.6
## 58 4.9 2.4 3.3 1.0
## 59 6.6 2.9 4.6 1.3
## 60 5.2 2.7 3.9 1.4
## 61 5.0 2.0 3.5 1.0
## 62 5.9 3.0 4.2 1.5
## 63 6.0 2.2 4.0 1.0
## 64 6.1 2.9 4.7 1.4
## 65 5.6 2.9 3.6 1.3
## 66 6.7 3.1 4.4 1.4
## 67 5.6 3.0 4.5 1.5
## 68 5.8 2.7 4.1 1.0
## 69 6.2 2.2 4.5 1.5
## 70 5.6 2.5 3.9 1.1
## 71 5.9 3.2 4.8 1.8
## 72 6.1 2.8 4.0 1.3
## 73 6.3 2.5 4.9 1.5
## 74 6.1 2.8 4.7 1.2
## 75 6.4 2.9 4.3 1.3
## 76 6.6 3.0 4.4 1.4
## 77 6.8 2.8 4.8 1.4
## 78 6.7 3.0 5.0 1.7
## 79 6.0 2.9 4.5 1.5
## 80 5.7 2.6 3.5 1.0
## 81 5.5 2.4 3.8 1.1
## 82 5.5 2.4 3.7 1.0
## 83 5.8 2.7 3.9 1.2
## 84 6.0 2.7 5.1 1.6
## 85 5.4 3.0 4.5 1.5
## 86 6.0 3.4 4.5 1.6
## 87 6.7 3.1 4.7 1.5
## 88 6.3 2.3 4.4 1.3
## 89 5.6 3.0 4.1 1.3
## 90 5.5 2.5 4.0 1.3
## 91 5.5 2.6 4.4 1.2
## 92 6.1 3.0 4.6 1.4
## 93 5.8 2.6 4.0 1.2
## 94 5.0 2.3 3.3 1.0
## 95 5.6 2.7 4.2 1.3
## 96 5.7 3.0 4.2 1.2
## 97 5.7 2.9 4.2 1.3
## 98 6.2 2.9 4.3 1.3
## 99 5.1 2.5 3.0 1.1
## 100 5.7 2.8 4.1 1.3
## 101 6.3 3.3 6.0 2.5
## 102 5.8 2.7 5.1 1.9
## 103 7.1 3.0 5.9 2.1
## 104 6.3 2.9 5.6 1.8
## 105 6.5 3.0 5.8 2.2
## 106 7.6 3.0 6.6 2.1
## 107 4.9 2.5 4.5 1.7
## 108 7.3 2.9 6.3 1.8
## 109 6.7 2.5 5.8 1.8
## 110 7.2 3.6 6.1 2.5
## 111 6.5 3.2 5.1 2.0
## 112 6.4 2.7 5.3 1.9
## 113 6.8 3.0 5.5 2.1
## 114 5.7 2.5 5.0 2.0
## 115 5.8 2.8 5.1 2.4
## 116 6.4 3.2 5.3 2.3
## 117 6.5 3.0 5.5 1.8
## 118 7.7 3.8 6.7 2.2
## 119 7.7 2.6 6.9 2.3
## 120 6.0 2.2 5.0 1.5
## 121 6.9 3.2 5.7 2.3
## 122 5.6 2.8 4.9 2.0
## 123 7.7 2.8 6.7 2.0
## 124 6.3 2.7 4.9 1.8
## 125 6.7 3.3 5.7 2.1
## 126 7.2 3.2 6.0 1.8
## 127 6.2 2.8 4.8 1.8
## 128 6.1 3.0 4.9 1.8
## 129 6.4 2.8 5.6 2.1
## 130 7.2 3.0 5.8 1.6
## 131 7.4 2.8 6.1 1.9
## 132 7.9 3.8 6.4 2.0
## 133 6.4 2.8 5.6 2.2
## 134 6.3 2.8 5.1 1.5
## 135 6.1 2.6 5.6 1.4
## 136 7.7 3.0 6.1 2.3
## 137 6.3 3.4 5.6 2.4
## 138 6.4 3.1 5.5 1.8
## 139 6.0 3.0 4.8 1.8
## 140 6.9 3.1 5.4 2.1
## 141 6.7 3.1 5.6 2.4
## 142 6.9 3.1 5.1 2.3
## 143 5.8 2.7 5.1 1.9
## 144 6.8 3.2 5.9 2.3
## 145 6.7 3.3 5.7 2.5
## 146 6.7 3.0 5.2 2.3
## 147 6.3 2.5 5.0 1.9
## 148 6.5 3.0 5.2 2.0
## 149 6.2 3.4 5.4 2.3
## 150 5.9 3.0 5.1 1.8
##
## $mtrees
## $mtrees[[1]]
## $bindx
## [1] 148 57 63 3 100 111 131 49 89 17 149 103 125 84 64 64 98
## [18] 4 77 54 2 57 126 80 111 40 101 45 35 44 140 43 36 106
## [35] 2 108 35 128 102 4 59 25 14 31 56 43 39 1 2 34 134
## [52] 75 144 13 2 75 150 124 114 27 124 82 108 80 65 88 10 65
## [69] 98 11 148 129 125 76 56 120 72 49 43 73 42 6 84 15 33
## [86] 101 29 30 53 14 89 129 50 46 103 26 75 139 26 116 10 62
## [103] 70 15 91 68 49 124 75 118 97 47 67 35 7 104 117 17 57
## [120] 134 134 125 118 27 140 56 100 19 91 69 147 137 140 40 149 71
## [137] 35 101 141 108 98 119 95 7 122 118 8 126 133 132
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 97 setosa (0.35333333 0.30000000 0.34666667)
## 2) Petal.Length< 2.7 53 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.7 97 45 virginica (0.00000000 0.46391753 0.53608247)
## 6) Petal.Width< 1.7 48 4 versicolor (0.00000000 0.91666667 0.08333333)
## 12) Petal.Length< 4.95 42 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 13) Petal.Length>=4.95 6 2 virginica (0.00000000 0.33333333 0.66666667)
## 26) Petal.Width>=1.55 2 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 27) Petal.Width< 1.55 4 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Width>=1.7 49 1 virginica (0.00000000 0.02040816 0.97959184)
## 14) Petal.Length< 4.85 2 1 versicolor (0.00000000 0.50000000 0.50000000)
## 28) Sepal.Length< 5.95 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 29) Sepal.Length>=5.95 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 15) Petal.Length>=4.85 47 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[2]]
## $bindx
## [1] 19 72 12 148 36 8 141 74 54 116 87 92 130 140 143 44 9
## [18] 99 96 76 85 39 39 86 91 9 107 131 62 124 86 105 69 83
## [35] 110 107 75 51 14 17 12 122 141 65 36 50 44 24 103 104 33
## [52] 120 35 3 108 97 6 115 66 132 128 132 33 117 144 37 52 26
## [69] 127 95 146 116 110 83 43 119 9 54 32 22 13 144 102 121 38
## [86] 67 92 5 69 82 41 40 68 109 103 24 15 20 83 73 77 131
## [103] 103 41 77 90 32 28 14 46 47 142 58 145 144 119 43 10 45
## [120] 113 47 145 75 85 89 25 19 122 87 41 104 131 88 54 148 106
## [137] 148 121 8 49 115 80 8 28 40 67 119 48 74 94
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 96 setosa (0.36000000 0.30000000 0.34000000)
## 2) Petal.Length< 2.45 54 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.45 96 45 virginica (0.00000000 0.46875000 0.53125000)
## 6) Petal.Width< 1.65 47 2 versicolor (0.00000000 0.95744681 0.04255319)
## 12) Petal.Length< 4.95 45 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 13) Petal.Length>=4.95 2 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Width>=1.65 49 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[3]]
## $bindx
## [1] 24 67 122 138 125 71 88 96 91 77 133 28 117 99 109 134 52
## [18] 80 41 3 50 123 10 31 108 115 32 100 109 17 69 27 147 106
## [35] 150 15 79 60 55 26 30 9 100 45 53 7 78 82 150 6 115
## [52] 94 54 23 10 105 77 20 124 27 41 90 37 109 131 95 133 145
## [69] 9 28 39 64 96 56 39 131 119 20 25 85 126 87 113 81 141
## [86] 108 92 9 123 105 98 110 71 98 5 77 13 124 100 25 134 7
## [103] 133 17 80 22 67 119 121 80 64 90 53 31 139 140 62 1 35
## [120] 106 61 12 99 65 80 58 119 61 45 9 143 137 5 50 3 10
## [137] 119 71 76 79 78 67 127 46 11 91 2 82 83 88
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 93 versicolor (0.32666667 0.38000000 0.29333333)
## 2) Petal.Length< 2.45 49 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.45 101 44 versicolor (0.00000000 0.56435644 0.43564356)
## 6) Petal.Width< 1.75 56 2 versicolor (0.00000000 0.96428571 0.03571429)
## 12) Petal.Length< 5.05 54 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 13) Petal.Length>=5.05 2 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Width>=1.75 45 3 virginica (0.00000000 0.06666667 0.93333333)
## 14) Petal.Length< 4.85 5 2 versicolor (0.00000000 0.60000000 0.40000000)
## 28) Sepal.Length< 5.95 3 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 29) Sepal.Length>=5.95 2 0 virginica (0.00000000 0.00000000 1.00000000) *
## 15) Petal.Length>=4.85 40 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[4]]
## $bindx
## [1] 33 38 2 146 103 82 128 29 14 70 112 35 122 7 102 46 102
## [18] 21 31 50 71 35 90 31 24 18 45 92 48 56 139 105 41 92
## [35] 51 73 118 48 130 10 27 10 110 57 67 5 107 11 84 29 110
## [52] 15 73 106 17 59 37 53 93 67 8 28 85 78 72 17 73 29
## [69] 73 80 75 33 112 38 126 62 140 110 96 47 35 36 56 139 33
## [86] 47 83 110 48 124 79 74 69 125 1 52 4 17 94 140 20 83
## [103] 89 125 23 7 51 143 39 63 92 121 141 97 52 21 24 113 149
## [120] 73 10 9 50 16 79 18 7 128 118 41 8 100 76 11 138 109
## [137] 58 86 35 53 88 12 143 91 69 133 91 81 135 6
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 89 setosa (0.40666667 0.34666667 0.24666667)
## 2) Petal.Length< 2.6 61 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.6 89 37 versicolor (0.00000000 0.58426966 0.41573034)
## 6) Petal.Width< 1.75 54 3 versicolor (0.00000000 0.94444444 0.05555556)
## 12) Petal.Length< 5.35 52 1 versicolor (0.00000000 0.98076923 0.01923077)
## 24) Sepal.Length>=4.95 50 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 25) Sepal.Length< 4.95 2 1 versicolor (0.00000000 0.50000000 0.50000000)
## 50) Sepal.Width< 2.45 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 51) Sepal.Width>=2.45 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 13) Petal.Length>=5.35 2 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Width>=1.75 35 1 virginica (0.00000000 0.02857143 0.97142857)
## 14) Petal.Length< 4.85 3 1 virginica (0.00000000 0.33333333 0.66666667)
## 28) Sepal.Length< 5.95 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 29) Sepal.Length>=5.95 2 0 virginica (0.00000000 0.00000000 1.00000000) *
## 15) Petal.Length>=4.85 32 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[5]]
## $bindx
## [1] 84 59 10 16 22 84 49 106 63 46 61 62 79 150 4 22 68
## [18] 17 119 121 10 109 35 88 137 20 13 16 95 35 146 39 45 142
## [35] 38 18 31 72 86 119 25 90 134 100 118 74 143 147 60 65 125
## [52] 90 137 60 149 135 5 88 70 112 97 135 112 18 77 84 130 126
## [69] 53 134 100 118 43 91 55 101 93 143 58 39 75 68 124 54 1
## [86] 92 99 109 100 100 58 108 30 34 110 65 90 98 76 92 102 12
## [103] 101 116 49 131 24 149 26 117 52 75 138 63 16 99 56 116 1
## [120] 33 130 84 37 17 7 77 145 4 78 3 91 30 71 59 109 63
## [137] 141 62 119 25 32 123 8 58 19 34 127 137 3 74
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 92 versicolor (0.30666667 0.38666667 0.30666667)
## 2) Petal.Length< 2.45 46 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.45 104 46 versicolor (0.00000000 0.55769231 0.44230769)
## 6) Petal.Length< 4.85 53 1 versicolor (0.00000000 0.98113208 0.01886792)
## 12) Petal.Width< 1.7 51 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 13) Petal.Width>=1.7 2 1 versicolor (0.00000000 0.50000000 0.50000000)
## 26) Sepal.Length< 6.05 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 27) Sepal.Length>=6.05 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Length>=4.85 51 6 virginica (0.00000000 0.11764706 0.88235294)
## 14) Petal.Width< 1.75 12 6 versicolor (0.00000000 0.50000000 0.50000000)
## 28) Sepal.Length< 6.05 4 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 29) Sepal.Length>=6.05 8 2 virginica (0.00000000 0.25000000 0.75000000)
## 58) Petal.Length< 5.05 2 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 59) Petal.Length>=5.05 6 0 virginica (0.00000000 0.00000000 1.00000000) *
## 15) Petal.Width>=1.75 39 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[6]]
## $bindx
## [1] 44 103 144 120 51 110 2 92 61 119 43 20 80 147 74 20 138
## [18] 2 114 140 146 67 2 22 83 124 95 141 126 49 126 149 139 36
## [35] 7 118 96 5 46 12 57 74 6 5 136 25 96 20 9 59 15
## [52] 123 64 74 61 107 95 1 51 69 38 25 41 7 110 147 140 146
## [69] 105 8 62 142 30 115 99 84 77 142 63 137 39 3 103 17 69
## [86] 119 119 71 4 102 14 105 75 99 127 119 78 121 55 6 81 67
## [103] 68 142 33 75 81 131 12 21 79 20 137 80 86 100 88 100 116
## [120] 13 103 105 140 142 90 10 23 67 95 146 121 110 5 62 78 77
## [137] 3 45 52 41 31 124 57 52 124 122 74 38 64 2
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 98 versicolor (0.31333333 0.34666667 0.34000000)
## 2) Petal.Length< 2.45 47 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.45 103 51 versicolor (0.00000000 0.50485437 0.49514563)
## 6) Petal.Width< 1.75 53 2 versicolor (0.00000000 0.96226415 0.03773585)
## 12) Sepal.Length>=4.95 52 1 versicolor (0.00000000 0.98076923 0.01923077)
## 24) Petal.Length< 4.9 48 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 25) Petal.Length>=4.9 4 1 versicolor (0.00000000 0.75000000 0.25000000)
## 50) Sepal.Width>=2.45 3 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 51) Sepal.Width< 2.45 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 13) Sepal.Length< 4.95 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Width>=1.75 50 1 virginica (0.00000000 0.02000000 0.98000000)
## 14) Petal.Length< 4.85 3 1 virginica (0.00000000 0.33333333 0.66666667)
## 28) Sepal.Length< 5.95 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 29) Sepal.Length>=5.95 2 0 virginica (0.00000000 0.00000000 1.00000000) *
## 15) Petal.Length>=4.85 47 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[7]]
## $bindx
## [1] 65 15 55 105 65 57 34 45 95 35 72 107 86 103 112 111 147
## [18] 122 117 17 33 118 91 139 79 56 126 150 58 116 98 34 53 103
## [35] 9 36 120 87 148 114 102 11 119 103 121 46 35 77 40 137 65
## [52] 12 119 24 21 52 19 56 110 137 149 149 126 63 26 142 82 91
## [69] 123 3 115 45 43 53 46 130 48 30 44 47 62 123 133 64 112
## [86] 74 116 87 20 91 83 101 84 94 143 25 20 84 53 14 117 35
## [103] 69 54 116 89 10 12 11 123 58 118 96 61 133 146 78 6 76
## [120] 92 70 142 87 104 138 102 131 129 127 44 134 44 72 105 28 94
## [137] 35 87 128 108 113 80 139 23 14 146 19 117 9 69
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 91 virginica (0.28666667 0.32000000 0.39333333)
## 2) Petal.Width< 1.75 95 47 versicolor (0.45263158 0.50526316 0.04210526)
## 4) Petal.Length< 2.6 43 0 setosa (1.00000000 0.00000000 0.00000000) *
## 5) Petal.Length>=2.6 52 4 versicolor (0.00000000 0.92307692 0.07692308)
## 10) Petal.Length< 4.95 46 1 versicolor (0.00000000 0.97826087 0.02173913)
## 20) Petal.Width< 1.65 45 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 21) Petal.Width>=1.65 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 11) Petal.Length>=4.95 6 3 versicolor (0.00000000 0.50000000 0.50000000)
## 22) Petal.Width>=1.55 4 1 versicolor (0.00000000 0.75000000 0.25000000)
## 44) Sepal.Length< 6.95 3 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 45) Sepal.Length>=6.95 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 23) Petal.Width< 1.55 2 0 virginica (0.00000000 0.00000000 1.00000000) *
## 3) Petal.Width>=1.75 55 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[8]]
## $bindx
## [1] 5 60 38 150 89 128 74 23 137 147 118 94 74 17 115 2 150
## [18] 122 7 73 128 74 134 23 85 100 60 148 61 118 19 113 41 90
## [35] 76 100 121 44 130 99 82 13 137 80 66 107 34 65 118 130 10
## [52] 94 93 69 70 53 108 21 101 44 123 45 44 12 24 131 101 143
## [69] 94 137 17 73 92 76 40 72 4 47 123 16 89 108 117 5 140
## [86] 138 28 89 30 148 131 116 31 78 11 109 74 58 90 53 20 58
## [103] 10 65 11 9 131 131 145 134 33 94 106 123 60 26 127 123 50
## [120] 125 79 5 47 86 110 101 59 138 123 114 65 91 84 53 17 124
## [137] 12 115 122 107 58 55 137 65 85 61 139 81 40 129
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 94 virginica (0.27333333 0.35333333 0.37333333)
## 2) Petal.Width< 1.65 97 45 versicolor (0.42268041 0.53608247 0.04123711)
## 4) Petal.Length< 2.45 41 0 setosa (1.00000000 0.00000000 0.00000000) *
## 5) Petal.Length>=2.45 56 4 versicolor (0.00000000 0.92857143 0.07142857)
## 10) Petal.Length< 5 51 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 11) Petal.Length>=5 5 1 virginica (0.00000000 0.20000000 0.80000000)
## 22) Sepal.Length< 6.15 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 23) Sepal.Length>=6.15 4 0 virginica (0.00000000 0.00000000 1.00000000) *
## 3) Petal.Width>=1.65 53 1 virginica (0.00000000 0.01886792 0.98113208)
## 6) Petal.Width< 1.75 3 1 virginica (0.00000000 0.33333333 0.66666667)
## 12) Sepal.Length>=5.8 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 13) Sepal.Length< 5.8 2 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Width>=1.75 50 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[9]]
## $bindx
## [1] 139 69 135 100 12 86 10 120 23 130 51 106 98 42 114 68 100
## [18] 99 75 16 148 9 74 121 124 89 41 53 52 57 7 96 28 115
## [35] 136 55 26 72 74 99 112 130 103 109 35 134 115 21 118 61 17
## [52] 99 85 109 129 61 96 81 138 56 96 91 51 50 112 142 72 116
## [69] 135 133 22 84 121 16 23 60 55 33 138 88 79 124 84 64 8
## [86] 18 55 31 60 127 141 129 130 144 79 13 13 88 32 105 64 31
## [103] 26 143 46 45 32 147 105 134 42 58 64 130 128 127 30 147 108
## [120] 112 112 29 45 101 86 92 1 145 111 19 149 100 21 134 102 14
## [137] 124 136 81 29 54 148 46 95 102 142 89 64 97 113
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 92 virginica (0.26000000 0.35333333 0.38666667)
## 2) Petal.Length< 4.75 89 39 versicolor (0.43820225 0.56179775 0.00000000)
## 4) Petal.Length< 2.45 39 0 setosa (1.00000000 0.00000000 0.00000000) *
## 5) Petal.Length>=2.45 50 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 3) Petal.Length>=4.75 61 3 virginica (0.00000000 0.04918033 0.95081967)
## 6) Petal.Width< 1.7 13 3 virginica (0.00000000 0.23076923 0.76923077)
## 12) Sepal.Length< 6.05 3 1 versicolor (0.00000000 0.66666667 0.33333333)
## 24) Sepal.Width>=2.45 2 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 25) Sepal.Width< 2.45 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 13) Sepal.Length>=6.05 10 1 virginica (0.00000000 0.10000000 0.90000000)
## 26) Sepal.Width>=3.05 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 27) Sepal.Width< 3.05 9 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Width>=1.7 48 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[10]]
## $bindx
## [1] 140 55 81 83 63 42 83 77 57 132 113 113 25 65 23 62 103
## [18] 76 53 13 5 126 110 133 45 69 19 79 31 100 2 53 83 68
## [35] 126 25 38 116 19 43 121 11 127 23 69 103 44 74 27 83 43
## [52] 92 30 57 84 39 77 110 90 19 6 48 18 94 37 47 19 4
## [69] 88 144 149 125 16 105 42 148 128 8 23 71 63 146 81 62 72
## [86] 65 31 38 118 37 10 43 91 7 129 4 126 16 34 5 122 79
## [103] 19 78 59 85 18 37 94 112 39 4 53 8 56 116 76 21 24
## [120] 134 103 85 49 149 69 131 81 48 6 15 67 96 110 17 148 83
## [137] 55 22 106 124 15 44 70 38 45 45 1 23 11 34
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 85 setosa (0.43333333 0.33333333 0.23333333)
## 2) Petal.Length< 2.6 65 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.6 85 35 versicolor (0.00000000 0.58823529 0.41176471)
## 6) Petal.Width< 1.75 50 1 versicolor (0.00000000 0.98000000 0.02000000)
## 12) Petal.Length< 5.05 48 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 13) Petal.Length>=5.05 2 1 versicolor (0.00000000 0.50000000 0.50000000)
## 26) Sepal.Length< 6.15 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 27) Sepal.Length>=6.15 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Width>=1.75 35 1 virginica (0.00000000 0.02857143 0.97142857)
## 14) Sepal.Length< 6 2 1 versicolor (0.00000000 0.50000000 0.50000000)
## 28) Sepal.Length>=5.75 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 29) Sepal.Length< 5.75 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 15) Sepal.Length>=6 33 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[11]]
## $bindx
## [1] 99 79 88 52 107 114 102 62 73 53 37 137 59 114 39 72 100
## [18] 32 43 116 75 16 21 111 147 45 61 30 3 67 98 102 77 102
## [35] 59 10 37 109 87 74 49 9 14 69 93 27 17 143 5 99 10
## [52] 54 124 15 141 93 62 115 150 140 16 139 68 18 35 74 41 16
## [69] 118 113 15 37 48 95 54 146 24 1 76 149 11 94 24 29 98
## [86] 51 26 63 50 70 137 45 14 121 143 32 35 55 143 126 82 111
## [103] 148 93 79 46 146 112 74 44 125 12 20 125 81 36 24 54 140
## [120] 95 4 67 78 91 6 54 125 18 89 149 122 145 98 85 33 63
## [137] 55 70 44 97 118 82 79 50 35 41 14 22 96 121
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 94 versicolor (0.36000000 0.37333333 0.26666667)
## 2) Petal.Length< 2.45 54 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.45 96 40 versicolor (0.00000000 0.58333333 0.41666667)
## 6) Petal.Width< 1.6 55 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 7) Petal.Width>=1.6 41 1 virginica (0.00000000 0.02439024 0.97560976)
## 14) Petal.Width< 1.75 2 1 versicolor (0.00000000 0.50000000 0.50000000)
## 28) Sepal.Length>=5.8 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 29) Sepal.Length< 5.8 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 15) Petal.Width>=1.75 39 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[12]]
## $bindx
## [1] 56 13 83 15 109 44 72 117 14 31 25 137 97 40 82 1 25
## [18] 138 87 136 100 9 67 103 55 148 140 112 134 74 124 80 42 102
## [35] 25 29 24 88 114 99 112 141 114 128 94 26 106 110 24 122 42
## [52] 16 115 116 13 124 26 139 19 18 138 96 26 70 117 140 14 44
## [69] 144 57 146 52 109 144 14 20 112 139 60 107 69 79 11 110 78
## [86] 107 100 23 81 23 56 96 36 38 26 9 33 123 41 150 66 1
## [103] 100 43 62 84 69 116 130 53 53 138 135 43 54 57 119 77 96
## [120] 150 109 35 115 49 96 71 88 119 106 85 67 26 119 87 86 104
## [137] 123 102 57 122 13 16 139 29 52 46 116 28 26 78
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 95 virginica (0.32000000 0.31333333 0.36666667)
## 2) Petal.Length< 2.45 48 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.45 102 47 virginica (0.00000000 0.46078431 0.53921569)
## 6) Petal.Width< 1.75 51 5 versicolor (0.00000000 0.90196078 0.09803922)
## 12) Petal.Length< 5.05 47 2 versicolor (0.00000000 0.95744681 0.04255319)
## 24) Sepal.Length>=4.95 45 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 25) Sepal.Length< 4.95 2 0 virginica (0.00000000 0.00000000 1.00000000) *
## 13) Petal.Length>=5.05 4 1 virginica (0.00000000 0.25000000 0.75000000)
## 26) Sepal.Length< 6.05 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 27) Sepal.Length>=6.05 3 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Width>=1.75 51 1 virginica (0.00000000 0.01960784 0.98039216)
## 14) Petal.Length< 4.85 4 1 virginica (0.00000000 0.25000000 0.75000000)
## 28) Sepal.Length< 5.95 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 29) Sepal.Length>=5.95 3 0 virginica (0.00000000 0.00000000 1.00000000) *
## 15) Petal.Length>=4.85 47 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[13]]
## $bindx
## [1] 124 20 40 98 97 103 98 68 115 114 17 28 67 43 116 19 51
## [18] 69 42 53 129 73 111 146 9 102 32 95 90 121 22 125 63 14
## [35] 1 58 112 143 22 4 140 37 19 32 64 25 58 7 131 131 60
## [52] 106 33 144 118 123 38 56 100 143 21 90 123 42 66 111 114 139
## [69] 48 50 10 147 144 37 95 89 80 140 135 100 18 77 114 66 73
## [86] 31 145 65 27 95 134 97 88 148 42 22 19 23 106 89 64 35
## [103] 76 38 125 52 99 108 76 104 122 75 45 102 29 132 110 43 102
## [120] 116 16 129 91 94 83 143 23 139 131 145 70 52 89 39 114 121
## [137] 66 14 143 110 101 22 28 32 140 44 121 36 133 66
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 94 virginica (0.32000000 0.30666667 0.37333333)
## 2) Petal.Width< 1.65 96 48 setosa (0.50000000 0.47916667 0.02083333)
## 4) Petal.Length< 2.45 48 0 setosa (1.00000000 0.00000000 0.00000000) *
## 5) Petal.Length>=2.45 48 2 versicolor (0.00000000 0.95833333 0.04166667)
## 10) Petal.Length< 5 46 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 11) Petal.Length>=5 2 0 virginica (0.00000000 0.00000000 1.00000000) *
## 3) Petal.Width>=1.65 54 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[14]]
## $bindx
## [1] 103 87 64 32 25 48 97 122 48 88 2 113 25 98 26 56 54
## [18] 18 102 51 93 93 6 94 76 83 79 119 93 77 101 147 86 86
## [35] 96 86 47 134 146 72 104 104 107 102 75 15 147 122 146 133 132
## [52] 68 117 41 150 2 69 101 67 26 39 105 45 142 34 60 133 134
## [69] 93 56 110 52 62 135 29 39 97 66 7 14 34 103 133 89 67
## [86] 144 113 134 118 72 1 134 131 51 78 40 139 133 57 114 44 125
## [103] 6 64 41 61 123 83 57 43 147 26 39 111 9 93 114 100 94
## [120] 126 44 90 110 7 84 147 141 47 125 2 18 6 20 1 45 119
## [137] 144 107 23 31 73 23 114 34 49 4 22 49 69 134
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 97 virginica (0.32000000 0.32666667 0.35333333)
## 2) Petal.Length< 2.6 48 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.6 102 49 virginica (0.00000000 0.48039216 0.51960784)
## 6) Petal.Length< 4.75 47 2 versicolor (0.00000000 0.95744681 0.04255319)
## 12) Sepal.Length>=4.95 45 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 13) Sepal.Length< 4.95 2 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Length>=4.75 55 4 virginica (0.00000000 0.07272727 0.92727273)
## 14) Petal.Width< 1.75 10 4 virginica (0.00000000 0.40000000 0.60000000)
## 28) Petal.Length< 5.05 3 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 29) Petal.Length>=5.05 7 1 virginica (0.00000000 0.14285714 0.85714286)
## 58) Sepal.Length< 6.05 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 59) Sepal.Length>=6.05 6 0 virginica (0.00000000 0.00000000 1.00000000) *
## 15) Petal.Width>=1.75 45 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[15]]
## $bindx
## [1] 138 94 8 67 54 103 49 82 39 94 117 37 18 81 49 69 25
## [18] 9 38 24 110 7 2 49 92 15 107 149 18 57 128 78 51 16
## [35] 10 9 119 145 126 120 21 16 2 115 5 91 139 112 132 133 27
## [52] 68 60 113 36 122 122 38 29 83 63 84 71 109 127 117 145 18
## [69] 24 146 47 129 144 24 18 130 89 108 22 64 95 130 33 49 9
## [86] 45 78 139 137 103 40 115 89 62 42 129 40 149 142 123 105 134
## [103] 57 42 22 66 134 104 33 135 41 148 80 8 38 20 51 75 85
## [120] 150 5 26 142 30 140 42 34 39 32 62 56 51 93 112 121 139
## [137] 20 53 1 13 121 136 48 57 93 142 108 38 36 71
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 91 setosa (0.39333333 0.25333333 0.35333333)
## 2) Petal.Length< 2.6 59 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.6 91 38 virginica (0.00000000 0.41758242 0.58241758)
## 6) Petal.Length< 4.75 33 1 versicolor (0.00000000 0.96969697 0.03030303)
## 12) Sepal.Length>=4.95 32 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 13) Sepal.Length< 4.95 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Length>=4.75 58 6 virginica (0.00000000 0.10344828 0.89655172)
## 14) Petal.Length< 5.05 13 5 virginica (0.00000000 0.38461538 0.61538462)
## 28) Sepal.Length>=6.45 3 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 29) Sepal.Length< 6.45 10 2 virginica (0.00000000 0.20000000 0.80000000)
## 58) Sepal.Width>=3.1 2 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 59) Sepal.Width< 3.1 8 0 virginica (0.00000000 0.00000000 1.00000000) *
## 15) Petal.Length>=5.05 45 1 virginica (0.00000000 0.02222222 0.97777778)
## 30) Sepal.Length< 6.05 4 1 virginica (0.00000000 0.25000000 0.75000000)
## 60) Sepal.Length>=5.95 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 61) Sepal.Length< 5.95 3 0 virginica (0.00000000 0.00000000 1.00000000) *
## 31) Sepal.Length>=6.05 41 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[16]]
## $bindx
## [1] 51 76 80 131 96 8 39 5 9 16 88 75 38 86 61 42 36
## [18] 76 111 99 129 74 148 133 71 145 131 124 56 38 17 53 11 128
## [35] 138 102 7 39 95 77 52 31 133 127 99 118 118 143 140 124 38
## [52] 45 30 34 123 148 126 71 140 134 103 26 69 58 73 83 148 124
## [69] 57 115 119 96 138 75 97 67 5 28 97 22 136 111 95 51 54
## [86] 83 43 14 73 77 19 89 63 26 28 115 85 138 27 16 12 126
## [103] 140 110 75 22 63 96 132 143 31 70 20 38 139 90 12 68 108
## [120] 98 55 150 83 132 87 149 68 47 3 124 64 147 25 64 128 27
## [137] 90 126 8 38 30 40 64 33 49 79 35 82 130 83
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 94 versicolor (0.30666667 0.37333333 0.32000000)
## 2) Petal.Length< 2.45 46 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.45 104 48 versicolor (0.00000000 0.53846154 0.46153846)
## 6) Petal.Width< 1.7 56 2 versicolor (0.00000000 0.96428571 0.03571429)
## 12) Petal.Length< 5 54 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 13) Petal.Length>=5 2 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Width>=1.7 48 2 virginica (0.00000000 0.04166667 0.95833333)
## 14) Petal.Length< 4.85 4 2 versicolor (0.00000000 0.50000000 0.50000000)
## 28) Sepal.Length< 5.95 2 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 29) Sepal.Length>=5.95 2 0 virginica (0.00000000 0.00000000 1.00000000) *
## 15) Petal.Length>=4.85 44 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[17]]
## $bindx
## [1] 97 84 145 82 43 41 30 72 120 121 18 21 43 106 131 64 12
## [18] 41 120 119 17 110 88 58 120 135 11 55 106 24 103 1 100 128
## [35] 27 59 48 23 141 62 21 27 28 123 42 79 108 96 24 98 43
## [52] 72 148 104 141 146 40 147 149 115 89 121 138 117 130 136 48 147
## [69] 64 145 66 136 142 2 137 143 19 123 131 53 84 144 142 103 135
## [86] 94 115 146 85 26 102 52 75 149 125 5 61 86 98 63 75 113
## [103] 80 138 47 72 11 140 117 35 105 87 74 107 88 31 70 69 22
## [120] 14 49 24 57 9 74 104 137 119 102 16 17 118 47 4 65 82
## [137] 107 15 124 135 8 135 55 3 115 144 30 50 147 84
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 87 virginica (0.30000000 0.28000000 0.42000000)
## 2) Petal.Length< 2.5 45 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.5 105 42 virginica (0.00000000 0.40000000 0.60000000)
## 6) Petal.Length< 4.8 40 2 versicolor (0.00000000 0.95000000 0.05000000)
## 12) Petal.Width< 1.65 38 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 13) Petal.Width>=1.65 2 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Length>=4.8 65 4 virginica (0.00000000 0.06153846 0.93846154)
## 14) Petal.Width< 1.7 12 4 virginica (0.00000000 0.33333333 0.66666667)
## 28) Sepal.Width>=2.65 5 1 versicolor (0.00000000 0.80000000 0.20000000)
## 56) Sepal.Length< 7.05 4 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 57) Sepal.Length>=7.05 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 29) Sepal.Width< 2.65 7 0 virginica (0.00000000 0.00000000 1.00000000) *
## 15) Petal.Width>=1.7 53 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[18]]
## $bindx
## [1] 106 101 51 62 121 114 45 10 77 56 129 67 46 27 36 94 50
## [18] 128 141 17 3 27 85 52 67 36 77 124 95 142 58 148 26 77
## [35] 117 150 59 26 71 43 13 112 78 5 41 58 141 6 73 94 2
## [52] 5 89 7 108 38 1 97 127 63 56 10 50 106 40 56 100 146
## [69] 113 22 102 102 6 109 10 40 113 39 112 98 146 17 116 1 146
## [86] 133 136 60 135 103 120 76 55 90 108 19 149 106 17 62 9 146
## [103] 72 37 112 46 103 2 56 116 93 33 147 30 25 28 57 69 125
## [120] 37 40 23 18 26 28 133 126 108 21 114 77 21 77 49 110 25
## [137] 37 146 29 145 20 99 122 114 59 31 123 145 130 65
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 94 setosa (0.37333333 0.27333333 0.35333333)
## 2) Petal.Length< 2.45 56 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.45 94 41 virginica (0.00000000 0.43617021 0.56382979)
## 6) Petal.Length< 4.85 40 1 versicolor (0.00000000 0.97500000 0.02500000)
## 12) Petal.Width< 1.7 38 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 13) Petal.Width>=1.7 2 1 versicolor (0.00000000 0.50000000 0.50000000)
## 26) Sepal.Length< 6.05 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 27) Sepal.Length>=6.05 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Length>=4.85 54 2 virginica (0.00000000 0.03703704 0.96296296)
## 14) Petal.Width< 1.75 5 2 virginica (0.00000000 0.40000000 0.60000000)
## 28) Sepal.Length>=6.2 3 1 versicolor (0.00000000 0.66666667 0.33333333)
## 56) Sepal.Length< 6.95 2 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 57) Sepal.Length>=6.95 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 29) Sepal.Length< 6.2 2 0 virginica (0.00000000 0.00000000 1.00000000) *
## 15) Petal.Width>=1.75 49 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[19]]
## $bindx
## [1] 68 103 23 79 94 4 96 38 123 40 66 14 117 65 23 33 33
## [18] 127 144 76 53 5 107 66 127 111 96 78 5 48 129 23 21 119
## [35] 142 75 95 44 98 7 139 148 38 71 137 136 116 89 62 26 4
## [52] 14 79 2 9 17 10 103 30 75 126 64 4 79 102 59 105 148
## [69] 1 5 65 84 124 21 123 59 70 86 65 114 31 104 38 25 106
## [86] 69 146 107 95 66 65 40 133 31 18 19 149 49 102 129 122 150
## [103] 31 117 1 23 12 122 8 15 44 10 132 90 8 31 150 30 58
## [120] 24 28 26 85 24 126 62 145 103 33 85 15 55 112 29 21 102
## [137] 59 37 115 147 63 60 149 77 138 36 127 13 100 66
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 92 setosa (0.3866667 0.2933333 0.3200000)
## 2) Petal.Length< 2.6 58 0 setosa (1.0000000 0.0000000 0.0000000) *
## 3) Petal.Length>=2.6 92 44 virginica (0.0000000 0.4782609 0.5217391)
## 6) Petal.Width< 1.65 42 0 versicolor (0.0000000 1.0000000 0.0000000) *
## 7) Petal.Width>=1.65 50 2 virginica (0.0000000 0.0400000 0.9600000)
## 14) Petal.Width< 1.75 3 1 virginica (0.0000000 0.3333333 0.6666667)
## 28) Sepal.Length>=5.8 1 0 versicolor (0.0000000 1.0000000 0.0000000) *
## 29) Sepal.Length< 5.8 2 0 virginica (0.0000000 0.0000000 1.0000000) *
## 15) Petal.Width>=1.75 47 1 virginica (0.0000000 0.0212766 0.9787234)
## 30) Petal.Length< 4.85 5 1 virginica (0.0000000 0.2000000 0.8000000)
## 60) Sepal.Length< 5.95 1 0 versicolor (0.0000000 1.0000000 0.0000000) *
## 61) Sepal.Length>=5.95 4 0 virginica (0.0000000 0.0000000 1.0000000) *
## 31) Petal.Length>=4.85 42 0 virginica (0.0000000 0.0000000 1.0000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[20]]
## $bindx
## [1] 117 5 86 85 45 16 4 19 2 79 26 131 127 52 17 124 107
## [18] 93 130 122 51 20 75 15 60 48 62 109 91 119 87 28 119 65
## [35] 22 90 8 102 126 105 148 116 45 92 145 93 140 3 55 148 130
## [52] 135 53 21 129 56 25 82 131 27 108 141 1 45 51 98 55 93
## [69] 91 72 144 148 142 41 30 130 100 102 116 129 32 93 9 119 145
## [86] 106 11 101 137 16 20 143 129 32 109 118 133 77 131 22 45 98
## [103] 48 122 13 114 82 74 91 35 39 45 19 83 69 34 129 58 88
## [120] 81 96 16 55 95 120 120 23 62 98 136 112 96 143 107 147 107
## [137] 117 86 110 115 67 63 71 58 66 132 30 118 122 28
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 91 virginica (0.28000000 0.32666667 0.39333333)
## 2) Petal.Length< 4.85 94 46 versicolor (0.44680851 0.51063830 0.04255319)
## 4) Petal.Length< 2.6 42 0 setosa (1.00000000 0.00000000 0.00000000) *
## 5) Petal.Length>=2.6 52 4 versicolor (0.00000000 0.92307692 0.07692308)
## 10) Petal.Width< 1.65 47 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 11) Petal.Width>=1.65 5 1 virginica (0.00000000 0.20000000 0.80000000)
## 22) Sepal.Width>=3 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 23) Sepal.Width< 3 4 0 virginica (0.00000000 0.00000000 1.00000000) *
## 3) Petal.Length>=4.85 56 1 virginica (0.00000000 0.01785714 0.98214286)
## 6) Petal.Width< 1.55 4 1 virginica (0.00000000 0.25000000 0.75000000)
## 12) Sepal.Length>=6.5 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 13) Sepal.Length< 6.5 3 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Width>=1.55 52 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[21]]
## $bindx
## [1] 34 4 91 145 56 135 26 105 140 38 99 32 86 70 102 100 85
## [18] 129 142 127 68 56 52 129 133 93 129 49 29 117 99 1 130 27
## [35] 55 56 44 124 96 112 61 9 145 62 62 30 23 91 17 138 134
## [52] 106 84 148 54 52 16 103 134 36 146 1 95 124 43 111 17 37
## [69] 128 110 138 2 129 105 70 95 71 6 66 123 40 22 53 111 105
## [86] 106 149 102 55 130 143 23 96 112 146 110 133 44 40 23 37 120
## [103] 46 56 55 83 109 63 133 41 132 34 41 66 11 50 118 112 51
## [120] 93 77 69 10 23 78 84 98 16 57 49 80 50 135 34 4 96
## [137] 80 133 75 15 64 73 71 133 141 113 143 87 53 120
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 95 virginica (0.2866667 0.3466667 0.3666667)
## 2) Petal.Length< 2.35 43 0 setosa (1.0000000 0.0000000 0.0000000) *
## 3) Petal.Length>=2.35 107 52 virginica (0.0000000 0.4859813 0.5140187)
## 6) Petal.Length< 4.85 47 1 versicolor (0.0000000 0.9787234 0.0212766)
## 12) Petal.Width< 1.7 44 0 versicolor (0.0000000 1.0000000 0.0000000) *
## 13) Petal.Width>=1.7 3 1 versicolor (0.0000000 0.6666667 0.3333333)
## 26) Sepal.Length< 6.05 2 0 versicolor (0.0000000 1.0000000 0.0000000) *
## 27) Sepal.Length>=6.05 1 0 virginica (0.0000000 0.0000000 1.0000000) *
## 7) Petal.Length>=4.85 60 6 virginica (0.0000000 0.1000000 0.9000000)
## 14) Petal.Width< 1.75 14 6 virginica (0.0000000 0.4285714 0.5714286)
## 28) Petal.Length< 4.95 3 0 versicolor (0.0000000 1.0000000 0.0000000) *
## 29) Petal.Length>=4.95 11 3 virginica (0.0000000 0.2727273 0.7272727)
## 58) Petal.Width>=1.55 5 2 versicolor (0.0000000 0.6000000 0.4000000)
## 116) Sepal.Length< 6.95 3 0 versicolor (0.0000000 1.0000000 0.0000000) *
## 117) Sepal.Length>=6.95 2 0 virginica (0.0000000 0.0000000 1.0000000) *
## 59) Petal.Width< 1.55 6 0 virginica (0.0000000 0.0000000 1.0000000) *
## 15) Petal.Width>=1.75 46 0 virginica (0.0000000 0.0000000 1.0000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[22]]
## $bindx
## [1] 105 40 149 47 2 73 22 51 67 38 99 92 83 98 124 82 111
## [18] 16 3 80 1 149 83 108 33 12 99 84 3 104 33 1 149 59
## [35] 115 101 43 80 15 31 71 55 56 104 103 130 64 92 13 99 129
## [52] 131 88 102 30 137 70 74 54 99 32 146 145 69 61 119 76 96
## [69] 42 94 129 150 135 15 101 88 117 112 26 79 35 127 45 92 117
## [86] 130 40 129 78 15 100 71 72 115 142 82 24 135 41 81 34 105
## [103] 75 88 45 116 55 80 46 149 70 35 22 96 31 133 134 143 29
## [120] 27 130 109 106 30 33 79 7 31 107 60 144 136 84 39 135 63
## [137] 94 7 64 8 82 83 25 51 124 4 66 143 145 113
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 95 versicolor (0.30000000 0.36666667 0.33333333)
## 2) Petal.Length< 2.45 45 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.45 105 50 versicolor (0.00000000 0.52380952 0.47619048)
## 6) Petal.Length< 4.85 53 2 versicolor (0.00000000 0.96226415 0.03773585)
## 12) Sepal.Length>=4.95 52 1 versicolor (0.00000000 0.98076923 0.01923077)
## 24) Petal.Length< 4.75 49 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 25) Petal.Length>=4.75 3 1 versicolor (0.00000000 0.66666667 0.33333333)
## 50) Sepal.Length< 6.05 2 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 51) Sepal.Length>=6.05 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 13) Sepal.Length< 4.95 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Length>=4.85 52 4 virginica (0.00000000 0.07692308 0.92307692)
## 14) Petal.Width< 1.75 11 4 virginica (0.00000000 0.36363636 0.63636364)
## 28) Petal.Length< 5.35 5 1 versicolor (0.00000000 0.80000000 0.20000000)
## 56) Petal.Width>=1.55 3 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 57) Petal.Width< 1.55 2 1 versicolor (0.00000000 0.50000000 0.50000000)
## 114) Sepal.Width< 2.65 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 115) Sepal.Width>=2.65 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 29) Petal.Length>=5.35 6 0 virginica (0.00000000 0.00000000 1.00000000) *
## 15) Petal.Width>=1.75 41 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[23]]
## $bindx
## [1] 64 92 23 143 60 84 51 42 35 27 51 34 26 88 50 59 72
## [18] 14 49 82 44 79 119 49 51 87 23 62 133 128 137 98 9 19
## [35] 68 146 132 21 149 150 147 149 85 112 68 123 99 118 1 148 58
## [52] 9 19 120 52 56 95 79 82 75 69 94 30 56 120 98 5 127
## [69] 44 93 42 42 83 69 145 140 1 47 45 22 113 41 70 4 66
## [86] 1 11 117 42 120 32 70 72 138 109 17 79 53 135 90 50 105
## [103] 67 77 37 98 14 49 33 15 9 18 26 45 92 32 100 127 110
## [120] 113 84 118 46 138 107 86 43 36 44 24 114 98 100 112 53 89
## [137] 12 70 67 15 9 121 2 31 80 65 91 22 97 52
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 92 versicolor (0.37333333 0.38666667 0.24000000)
## 2) Petal.Length< 2.45 56 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.45 94 36 versicolor (0.00000000 0.61702128 0.38297872)
## 6) Petal.Width< 1.65 62 4 versicolor (0.00000000 0.93548387 0.06451613)
## 12) Petal.Length< 4.95 56 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 13) Petal.Length>=4.95 6 2 virginica (0.00000000 0.33333333 0.66666667)
## 26) Sepal.Width>=2.65 2 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 27) Sepal.Width< 2.65 4 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Width>=1.65 32 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[24]]
## $bindx
## [1] 131 115 52 21 65 126 85 21 5 79 57 15 70 122 37 109 67
## [18] 83 124 64 139 105 109 123 84 49 75 3 100 74 137 128 60 26
## [35] 95 57 41 16 98 99 48 35 102 85 111 100 25 108 144 15 84
## [52] 33 74 86 72 148 89 23 111 86 86 40 122 42 56 90 95 17
## [69] 37 56 84 113 109 87 139 105 143 11 25 110 111 56 43 32 47
## [86] 85 61 31 95 60 1 89 77 96 77 116 27 136 59 56 58 83
## [103] 103 54 7 69 82 82 26 96 43 12 25 77 56 105 134 143 100
## [120] 10 15 81 123 113 100 133 75 4 7 41 15 29 82 129 137 72
## [137] 113 137 23 73 104 103 123 64 16 92 14 114 44 88
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 87 versicolor (0.2933333 0.4200000 0.2866667)
## 2) Petal.Length< 2.45 44 0 setosa (1.0000000 0.0000000 0.0000000) *
## 3) Petal.Length>=2.45 106 43 versicolor (0.0000000 0.5943396 0.4056604)
## 6) Petal.Width< 1.7 64 1 versicolor (0.0000000 0.9843750 0.0156250)
## 12) Petal.Length< 5 60 0 versicolor (0.0000000 1.0000000 0.0000000) *
## 13) Petal.Length>=5 4 1 versicolor (0.0000000 0.7500000 0.2500000)
## 26) Petal.Width>=1.55 3 0 versicolor (0.0000000 1.0000000 0.0000000) *
## 27) Petal.Width< 1.55 1 0 virginica (0.0000000 0.0000000 1.0000000) *
## 7) Petal.Width>=1.7 42 0 virginica (0.0000000 0.0000000 1.0000000) *
##
## attr(,"class")
## class
## "sclass"
##
## $mtrees[[25]]
## $bindx
## [1] 134 140 104 21 55 2 40 35 145 121 63 58 28 80 123 99 134
## [18] 124 136 8 94 115 41 19 59 65 21 115 109 2 118 122 116 135
## [35] 49 134 11 59 117 34 108 36 139 63 21 96 126 92 78 43 18
## [52] 124 61 59 65 69 132 55 54 31 81 148 60 118 103 115 49 123
## [69] 83 35 103 42 148 45 99 5 11 137 121 147 130 17 139 28 100
## [86] 20 62 121 13 57 21 140 93 60 131 77 89 95 27 132 121 107
## [103] 100 70 27 111 31 75 8 81 138 31 93 126 36 144 75 70 94
## [120] 61 137 85 29 9 14 51 31 82 91 108 129 20 48 36 59 5
## [137] 79 120 93 5 33 139 65 116 38 36 104 88 51 128
##
## $btree
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 97 virginica (0.31333333 0.33333333 0.35333333)
## 2) Petal.Length< 2.45 47 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.45 103 50 virginica (0.00000000 0.48543689 0.51456311)
## 6) Petal.Length< 4.75 49 1 versicolor (0.00000000 0.97959184 0.02040816)
## 12) Petal.Width< 1.65 48 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 13) Petal.Width>=1.65 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 7) Petal.Length>=4.75 54 2 virginica (0.00000000 0.03703704 0.96296296)
## 14) Petal.Width< 1.45 2 1 versicolor (0.00000000 0.50000000 0.50000000)
## 28) Sepal.Length>=6.45 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 29) Sepal.Length< 6.45 1 0 virginica (0.00000000 0.00000000 1.00000000) *
## 15) Petal.Width>=1.45 52 1 virginica (0.00000000 0.01923077 0.98076923)
## 30) Petal.Width< 1.75 6 1 virginica (0.00000000 0.16666667 0.83333333)
## 60) Petal.Width>=1.65 1 0 versicolor (0.00000000 1.00000000 0.00000000) *
## 61) Petal.Width< 1.65 5 0 virginica (0.00000000 0.00000000 1.00000000) *
## 31) Petal.Width>=1.75 46 0 virginica (0.00000000 0.00000000 1.00000000) *
##
## attr(,"class")
## class
## "sclass"
##
##
## $OOB
## [1] FALSE
##
## $comb
## [1] FALSE
##
## $call
## bagging.data.frame(formula = Species ~ ., data = iris)
##
## attr(,"class")
## [1] "summary.bagging"
# make predictions
predictions <- predict(fit, iris[,1:4], type="class")
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 50 0
## virginica 0 0 50
############################# Random Forest ########################################
#Random Forest is variation on Bagging of decision trees by reducing the attributes available to making a tree at each decision point to a random sub-sample. This further increases the variance of the trees and more trees are required.
# load the package
library(randomForest)
## Warning: package 'randomForest' was built under R version 3.3.3
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
# load data
data(iris)
# fit model
fit <- randomForest(Species~., data=iris)
# summarize the fit
summary(fit)
## Length Class Mode
## call 3 -none- call
## type 1 -none- character
## predicted 150 factor numeric
## err.rate 2000 -none- numeric
## confusion 12 -none- numeric
## votes 450 matrix numeric
## oob.times 150 -none- numeric
## classes 3 -none- character
## importance 4 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 150 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## terms 3 terms call
# make predictions
predictions <- predict(fit, iris[,1:4])
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 50 0
## virginica 0 0 50
######################### Gradient Boosted Machine ##################################
#Boosting is an ensemble method developed for classification for reducing bias where models are added to learn the misclassification errors in existing models. It has been generalized and adapted in the form of Gradient Boosted Machines (GBM) for use with CART decision trees for classification and regression.
# load the package
#library(gbm)
# load data
#data(iris)
# fit model
#fit <- gbm(Species~., data=iris, distribution="multinomial")
# summarize the fit
#print(fit)
# make predictions
#predictions <- predict(fit, iris)
# summarize accuracy
#table(predictions, iris$Species)
################################ Boosted C5.0 ######################################
#The C5.0 method is a further extension of C4.5 and pinnacle of that line of methods. It was proprietary for a long time, although the code was released recently and is available in the C50 package.
# load the package
library(C50)
## Warning: package 'C50' was built under R version 3.3.3
# load data
data(iris)
# fit model
fit <- C5.0(Species~., data=iris, trials=10)
# summarize the fit
print(fit)
##
## Call:
## C5.0.formula(formula = Species ~ ., data = iris, trials = 10)
##
## Classification Tree
## Number of samples: 150
## Number of predictors: 4
##
## Number of boosting iterations: 10
## Average tree size: 4.9
##
## Non-standard options: attempt to group attributes
# make predictions
predictions <- predict(fit, iris)
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 50 0
## virginica 0 0 50
############################ Penalized Regression #######################
#########################################################################
########################### Ridge Regression ############################
# load the package
library(glmnet)
## Warning: package 'glmnet' was built under R version 3.3.3
## Loading required package: Matrix
## Loading required package: foreach
## Warning: package 'foreach' was built under R version 3.3.3
## Loaded glmnet 2.0-5
# load data
data(longley)
x <- as.matrix(longley[,1:6])
y <- as.matrix(longley[,7])
# fit model
fit <- glmnet(x, y, family="gaussian", alpha=0, lambda=0.001)
# summarize the fit
summary(fit)
## Length Class Mode
## a0 1 -none- numeric
## beta 6 dgCMatrix S4
## df 1 -none- numeric
## dim 2 -none- numeric
## lambda 1 -none- numeric
## dev.ratio 1 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## call 6 -none- call
## nobs 1 -none- numeric
# make predictions
predictions <- predict(fit, x, type="link")
# summarize accuracy
mse <- mean((y - predictions)^2)
print(mse)
## [1] 0.05919831
########## Least Absolute Shrinkage and Selection Operator (LASSO) ############
# load the package
library(lars)
## Loaded lars 1.2
# load data
data(longley)
x <- as.matrix(longley[,1:6])
y <- as.matrix(longley[,7])
# fit model
fit <- lars(x, y, type="lasso")
# summarize the fit
summary(fit)
## LARS/LASSO
## Call: lars(x = x, y = y, type = "lasso")
## Df Rss Cp
## 0 1 185.009 1976.7120
## 1 2 6.642 59.4712
## 2 3 3.883 31.7832
## 3 4 3.468 29.3165
## 4 5 1.563 10.8183
## 5 4 1.339 6.4068
## 6 5 1.024 5.0186
## 7 6 0.998 6.7388
## 8 7 0.907 7.7615
## 9 6 0.847 5.1128
## 10 7 0.836 7.0000
# select a step with a minimum error
best_step <- fit$df[which.min(fit$RSS)]
# make predictions
predictions <- predict(fit, x, s=best_step, type="fit")$fit
# summarize accuracy
mse <- mean((y - predictions)^2)
print(mse)
## [1] 0.06400169
################################# Elastic Net #################################
# load the package
library(glmnet)
# load data
data(longley)
x <- as.matrix(longley[,1:6])
y <- as.matrix(longley[,7])
# fit model
fit <- glmnet(x, y, family="gaussian", alpha=0.5, lambda=0.001)
# summarize the fit
summary(fit)
## Length Class Mode
## a0 1 -none- numeric
## beta 6 dgCMatrix S4
## df 1 -none- numeric
## dim 2 -none- numeric
## lambda 1 -none- numeric
## dev.ratio 1 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## call 6 -none- call
## nobs 1 -none- numeric
# make predictions
predictions <- predict(fit, x, type="link")
# summarize accuracy
mse <- mean((y - predictions)^2)
print(mse)
## [1] 0.0590839
################## Non-Linear Regression with Decision Trees ####################
#################################################################################
############## Classification and Regression Trees (CART) ################
####################Recursive partitioning decision tree##################
#Classification and Regression Trees (CART) split attributes based on values that minimize a loss function, such as sum of squared errors.
# load the package
library(rpart)
# load data
data(longley)
# fit model
fit <- rpart(Employed~., data=longley, control=rpart.control(minsplit=5))
# summarize the fit
summary(fit)
## Call:
## rpart(formula = Employed ~ ., data = longley, control = rpart.control(minsplit = 5))
## n= 16
##
## CP nsplit rel error xerror xstd
## 1 0.78633969 0 1.00000000 1.1483356 0.22573165
## 2 0.11081853 1 0.21366031 0.3036077 0.08238183
## 3 0.06153007 2 0.10284178 0.1680315 0.06039812
## 4 0.01000000 3 0.04131171 0.1465624 0.05141589
##
## Variable importance
## GNP GNP.deflator Population Year Unemployed
## 19 19 19 19 12
## Armed.Forces
## 11
##
## Node number 1: 16 observations, complexity param=0.7863397
## mean=65.317, MSE=11.56305
## left son=2 (8 obs) right son=3 (8 obs)
## Primary splits:
## GNP.deflator < 100.6 to the left, improve=0.7863397, (0 missing)
## GNP < 381.427 to the left, improve=0.7863397, (0 missing)
## Population < 116.8035 to the left, improve=0.7863397, (0 missing)
## Year < 1954.5 to the left, improve=0.7863397, (0 missing)
## Armed.Forces < 208.2 to the left, improve=0.6143062, (0 missing)
## Surrogate splits:
## GNP < 381.427 to the left, agree=1.000, adj=1.000, (0 split)
## Population < 116.8035 to the left, agree=1.000, adj=1.000, (0 split)
## Year < 1954.5 to the left, agree=1.000, adj=1.000, (0 split)
## Unemployed < 258.9 to the left, agree=0.812, adj=0.625, (0 split)
## Armed.Forces < 208.2 to the left, agree=0.750, adj=0.500, (0 split)
##
## Node number 2: 8 observations, complexity param=0.1108185
## mean=62.30163, MSE=2.884251
## left son=4 (4 obs) right son=5 (4 obs)
## Primary splits:
## GNP.deflator < 92.85 to the left, improve=0.8885499, (0 missing)
## Population < 111.502 to the left, improve=0.8885499, (0 missing)
## Year < 1950.5 to the left, improve=0.8885499, (0 missing)
## GNP < 306.787 to the left, improve=0.8885499, (0 missing)
## Armed.Forces < 237.45 to the left, improve=0.8885499, (0 missing)
## Surrogate splits:
## GNP < 306.787 to the left, agree=1.000, adj=1.00, (0 split)
## Armed.Forces < 237.45 to the left, agree=1.000, adj=1.00, (0 split)
## Population < 111.502 to the left, agree=1.000, adj=1.00, (0 split)
## Year < 1950.5 to the left, agree=1.000, adj=1.00, (0 split)
## Unemployed < 221.2 to the right, agree=0.875, adj=0.75, (0 split)
##
## Node number 3: 8 observations, complexity param=0.06153007
## mean=68.33237, MSE=2.05688
## left son=6 (4 obs) right son=7 (4 obs)
## Primary splits:
## GNP.deflator < 111.7 to the left, improve=0.6918007, (0 missing)
## GNP < 463.625 to the left, improve=0.6918007, (0 missing)
## Population < 122.658 to the left, improve=0.6918007, (0 missing)
## Year < 1958.5 to the left, improve=0.6918007, (0 missing)
## Armed.Forces < 284.2 to the right, improve=0.3150859, (0 missing)
## Surrogate splits:
## GNP < 463.625 to the left, agree=1.000, adj=1.00, (0 split)
## Population < 122.658 to the left, agree=1.000, adj=1.00, (0 split)
## Year < 1958.5 to the left, agree=1.000, adj=1.00, (0 split)
## Unemployed < 337.45 to the left, agree=0.875, adj=0.75, (0 split)
## Armed.Forces < 260.45 to the right, agree=0.875, adj=0.75, (0 split)
##
## Node number 4: 4 observations
## mean=60.70075, MSE=0.2093052
##
## Node number 5: 4 observations
## mean=63.9025, MSE=0.4335948
##
## Node number 6: 4 observations
## mean=67.1395, MSE=0.8056747
##
## Node number 7: 4 observations
## mean=69.52525, MSE=0.4621832
# make predictions
predictions <- predict(fit, longley[,1:6])
# summarize accuracy
mse <- mean((longley$Employed - predictions)^2)
print(mse)
## [1] 0.4776895
#########################Conditional Decision Trees########################
#Condition Decision Trees are created using statistical tests to select split points on attributes rather than a loss function.
# load the package
library(party)
## Warning: package 'party' was built under R version 3.3.3
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
##
## Attaching package: 'modeltools'
## The following object is masked from 'package:kernlab':
##
## prior
## Loading required package: strucchange
## Warning: package 'strucchange' was built under R version 3.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.3.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 3.3.3
# load data
data(longley)
# fit model
fit <- ctree(Employed~., data=longley, controls=ctree_control(minsplit=2,minbucket=2,testtype="Univariate"))
# summarize the fit
summary(fit)
## Length Class Mode
## 1 BinaryTree S4
# make predictions
predictions <- predict(fit, longley[,1:6])
# summarize accuracy
mse <- mean((longley$Employed - predictions)^2)
print(mse)
## [1] 0.4776895
################################ Model Trees ###################################
################################ M5P ###################################
#Model Trees create a decision tree and use a linear model at each node to make a prediction rather than using an average value.
# load the package
#library(RWeka)
# load data
#data(longley)
# fit model
#fit <- M5P(Employed~., data=longley)
# summarize the fit
#summary(fit)
# make predictions
#predictions <- predict(fit, longley[,1:6])
# summarize accuracy
#mse <- mean((longley$Employed - predictions)^2)
#print(mse)
################################ M5Rules #############################################
#Rule Systems can be created by extracting and simplifying the rules from a decision tree.
# load the package
#library(RWeka)
# load data
#data(longley)
# fit model
#fit <- M5Rules(Employed~., data=longley)
# summarize the fit
#summary(fit)
# make predictions
#predictions <- predict(fit, longley[,1:6])
# summarize accuracy
#mse <- mean((longley$Employed - predictions)^2)
#print(mse)
################################# Bagging CART #######################################
#Bootstrapped Aggregation (Bagging) is an ensemble method that creates multiple models of the same type from different sub-samples of the same dataset. The predictions from each separate model are combined together to provide a superior result. This approach has shown participially effective for high-variance methods such as decision trees.
#The following code demonstrates bagging applied to the recursive partitioning decision tree.
# load the package
library(ipred)
# load data
data(longley)
# fit model
fit <- bagging(Employed~., data=longley, control=rpart.control(minsplit=5))
# summarize the fit
summary(fit)
## Length Class Mode
## y 16 -none- numeric
## X 6 data.frame list
## mtrees 25 -none- list
## OOB 1 -none- logical
## comb 1 -none- logical
## call 4 -none- call
# make predictions
predictions <- predict(fit, longley[,1:6])
# summarize accuracy
mse <- mean((longley$Employed - predictions)^2)
print(mse)
## [1] 0.4854173
############################ Random Forest ######################################
#Random Forest is variation on Bagging of decision trees by reducing the attributes available to making a tree at each decision point to a random sub-sample. This further increases the variance of the trees and more trees are required.
# load the package
library(randomForest)
# load data
data(longley)
# fit model
fit <- randomForest(Employed~., data=longley)
# summarize the fit
summary(fit)
## Length Class Mode
## call 3 -none- call
## type 1 -none- character
## predicted 16 -none- numeric
## mse 500 -none- numeric
## rsq 500 -none- numeric
## oob.times 16 -none- numeric
## importance 6 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 11 -none- list
## coefs 0 -none- NULL
## y 16 -none- numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## terms 3 terms call
# make predictions
predictions <- predict(fit, longley[,1:6])
# summarize accuracy
mse <- mean((longley$Employed - predictions)^2)
########################## Gradient Boosted Machine ##############################
#Boosting is an ensemble method developed for classification for reducing bias where models are added to learn the misclassification errors in existing models. It has been generalized and adapted in the form of Gradient Boosted Machines (GBM) for use with CART decision trees for classification and regression.
#library(gbm)
# load data
#data(longley)
# fit model
#fit <- gbm(Employed~., data=longley, distribution="gaussian")
# summarize the fit
#summary(fit)
# make predictions
#predictions <- predict(fit, longley)
# summarize accuracy
#mse <- mean((longley$Employed - predictions)^2)
#print(mse)
##################################### Cubist ###################################
# load the package
library(Cubist)
## Warning: package 'Cubist' was built under R version 3.3.3
# load data
data(longley)
# fit model
fit <- cubist(longley[,1:6], longley[,7])
# summarize the fit
summary(fit)
##
## Call:
## cubist.default(x = longley[, 1:6], y = longley[, 7])
##
##
## Cubist [Release 2.07 GPL Edition] Tue May 30 15:56:22 2017
## ---------------------------------
##
## Target attribute `outcome'
##
## Read 16 cases (7 attributes) from undefined.data
##
## Model:
##
## Rule 1: [16 cases, mean 65.3170, range 60.171 to 70.551, est err 0.4828]
##
## outcome = 53.3637 + 0.0408 GNP - 0.008 Unemployed - 0.0048 Armed.Forces
##
##
## Evaluation on training data (16 cases):
##
## Average |error| 0.3881
## Relative |error| 0.13
## Correlation coefficient 0.99
##
##
## Attribute usage:
## Conds Model
##
## 100% GNP
## 100% Unemployed
## 100% Armed.Forces
##
##
## Time: 0.0 secs
# make predictions
predictions <- predict(fit, longley[,1:6])
# summarize accuracy
mse <- mean((longley$Employed - predictions)^2)
print(mse)
## [1] 0.1757807
################################ Linear Classification ##########################
#################################################################################
############################### Logistic Regression #############################
#Logistic Regression is a classification method that models the probability of an observation belonging to one of two classes. As such, normally logistic regression is demonstrated with binary classification problem (2 classes). Logistic Regression can also be used on problems with more than two classes (multinomial), as in this case.
# load the package
library(VGAM)
## Warning: package 'VGAM' was built under R version 3.3.3
## Loading required package: splines
##
## Attaching package: 'VGAM'
## The following object is masked from 'package:caret':
##
## predictors
## The following object is masked from 'package:kernlab':
##
## nvar
# load data
data(iris)
# fit model
fit <- vglm(Species~., family=multinomial, data=iris)
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 2 diagonal elements of the working weights variable 'wz' have
## been replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 13 diagonal elements of the working weights variable 'wz' have
## been replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 22 diagonal elements of the working weights variable 'wz' have
## been replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 34 diagonal elements of the working weights variable 'wz' have
## been replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 39 diagonal elements of the working weights variable 'wz' have
## been replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 41 diagonal elements of the working weights variable 'wz' have
## been replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 47 diagonal elements of the working weights variable 'wz' have
## been replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 50 diagonal elements of the working weights variable 'wz' have
## been replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 54 diagonal elements of the working weights variable 'wz' have
## been replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 59 diagonal elements of the working weights variable 'wz' have
## been replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 63 diagonal elements of the working weights variable 'wz' have
## been replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 78 diagonal elements of the working weights variable 'wz' have
## been replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 91 diagonal elements of the working weights variable 'wz' have
## been replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 101 diagonal elements of the working weights variable 'wz'
## have been replaced by 1.819e-12
## Warning in slot(family, "linkinv")(eta, extra = extra): fitted
## probabilities numerically 0 or 1 occurred
## Warning in tfun(mu = mu, y = y, w = w, res = FALSE, eta = eta, extra =
## extra): fitted values close to 0 or 1
## Warning in slot(family, "linkinv")(eta, extra = extra): fitted
## probabilities numerically 0 or 1 occurred
## Warning in tfun(mu = mu, y = y, w = w, res = FALSE, eta = eta, extra =
## extra): fitted values close to 0 or 1
## Warning in slot(family, "linkinv")(eta, extra = extra): fitted
## probabilities numerically 0 or 1 occurred
## Warning in tfun(mu = mu, y = y, w = w, res = FALSE, eta = eta, extra =
## extra): fitted values close to 0 or 1
## Warning in slot(family, "linkinv")(eta, extra = extra): fitted
## probabilities numerically 0 or 1 occurred
## Warning in tfun(mu = mu, y = y, w = w, res = FALSE, eta = eta, extra =
## extra): fitted values close to 0 or 1
## Warning in slot(family, "linkinv")(eta, extra = extra): fitted
## probabilities numerically 0 or 1 occurred
## Warning in tfun(mu = mu, y = y, w = w, res = FALSE, eta = eta, extra =
## extra): fitted values close to 0 or 1
## Warning in slot(family, "linkinv")(eta, extra = extra): fitted
## probabilities numerically 0 or 1 occurred
## Warning in tfun(mu = mu, y = y, w = w, res = FALSE, eta = eta, extra =
## extra): fitted values close to 0 or 1
## Warning in vglm.fitter(x = x, y = y, w = w, offset = offset, Xm2 =
## Xm2, : some quantities such as z, residuals, SEs may be inaccurate due to
## convergence at a half-step
# summarize the fit
summary(fit)
##
## Call:
## vglm(formula = Species ~ ., family = multinomial, data = iris)
##
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## log(mu[,1]/mu[,3]) -0.0003362 7.294e-10 2.102e-09 9.960e-07 0.0003164
## log(mu[,2]/mu[,3]) -1.9700374 -3.420e-04 -4.358e-06 4.635e-04 2.5601905
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 35.361 25704.949 0.001 0.9989
## (Intercept):2 42.638 25.708 1.659 0.0972 .
## Sepal.Length:1 9.637 7631.535 0.001 0.9990
## Sepal.Length:2 2.465 2.394 1.030 0.3032
## Sepal.Width:1 12.359 3557.648 0.003 0.9972
## Sepal.Width:2 6.681 4.480 1.491 0.1359
## Petal.Length:1 -23.214 5435.364 -0.004 0.9966
## Petal.Length:2 -9.429 4.737 -1.990 0.0465 *
## Petal.Width:1 -34.102 8576.875 -0.004 0.9968
## Petal.Width:2 -18.286 9.743 -1.877 0.0605 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 2
##
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
##
## Residual deviance: 11.8985 on 290 degrees of freedom
##
## Log-likelihood: -5.9493 on 290 degrees of freedom
##
## Number of iterations: 20
##
## Reference group is level 3 of the response
# make predictions
probabilities <- predict(fit, iris[,1:4], type="response")
predictions <- apply(probabilities, 1, which.max)
predictions[which(predictions=="1")] <- levels(iris$Species)[1]
predictions[which(predictions=="2")] <- levels(iris$Species)[2]
predictions[which(predictions=="3")] <- levels(iris$Species)[3]
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 49 1
## virginica 0 1 49
######################## Linear Discriminant Analysis ###########################
#LDA is a classification method that finds a linear combination of data attributes that best separate the data into classes.
# load the package
library(MASS)
data(iris)
# fit model
fit <- lda(Species~., data=iris)
# summarize the fit
summary(fit)
## Length Class Mode
## prior 3 -none- numeric
## counts 3 -none- numeric
## means 12 -none- numeric
## scaling 8 -none- numeric
## lev 3 -none- character
## svd 2 -none- numeric
## N 1 -none- numeric
## call 3 -none- call
## terms 3 terms call
## xlevels 0 -none- list
# make predictions
predictions <- predict(fit, iris[,1:4])$class
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 1
## virginica 0 2 49
###################### Partial Least Squares Discriminant Analysis #############
#Partial Least Squares Discriminate Analysis is the application of LDA on a dimension-reducing projection of the input data (partial least squares).
# load the package
#library(caret)
#data(iris)
#x <- iris[,1:4]
#y <- iris[,5]
# fit model
#fit <- plsda(x, y, probMethod="Bayes")
# summarize the fit
#summary(fit)
# make predictions
#predictions <- predict(fit, iris[,1:4])
# summarize accuracy
#table(predictions, iris$Species)
########################### Non-Linear Classification ###########################
#################################################################################
############################# Mixture Discriminant Analysis #####################
# load the package
library(mda)
## Warning: package 'mda' was built under R version 3.3.3
## Loading required package: class
## Loaded mda 0.4-9
data(iris)
# fit model
fit <- mda(Species~., data=iris)
# summarize the fit
summary(fit)
## Length Class Mode
## percent.explained 4 -none- numeric
## values 8 -none- numeric
## means 36 -none- numeric
## theta.mod 32 -none- numeric
## dimension 1 -none- numeric
## sub.prior 3 -none- list
## fit 5 polyreg list
## call 3 -none- call
## weights 3 -none- list
## prior 3 table numeric
## assign.theta 3 -none- list
## deviance 1 -none- numeric
## confusion 9 table numeric
## terms 3 terms call
# make predictions
predictions <- predict(fit, iris[,1:4])
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 1
## virginica 0 2 49
####################### Quadratic Discriminant Analysis #########################
#QDA seeks a quadratic relationship between attributes that maximizes the distance between the classes.
# load the package
library(MASS)
data(iris)
# fit model
fit <- qda(Species~., data=iris)
# summarize the fit
summary(fit)
## Length Class Mode
## prior 3 -none- numeric
## counts 3 -none- numeric
## means 12 -none- numeric
## scaling 48 -none- numeric
## ldet 3 -none- numeric
## lev 3 -none- character
## N 1 -none- numeric
## call 3 -none- call
## terms 3 terms call
## xlevels 0 -none- list
# make predictions
predictions <- predict(fit, iris[,1:4])$class
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 1
## virginica 0 2 49
######################### Regularized Discriminant Analysis #####################
# load the package
library(klaR)
## Warning: package 'klaR' was built under R version 3.3.3
##
## Attaching package: 'klaR'
## The following object is masked from 'package:TeachingDemos':
##
## triplot
data(iris)
# fit model
fit <- rda(Species~., data=iris, gamma=0.05, lambda=0.01)
# summarize the fit
summary(fit)
## Length Class Mode
## call 5 -none- call
## regularization 2 -none- numeric
## classes 3 -none- character
## prior 3 -none- numeric
## error.rate 1 -none- numeric
## varnames 4 -none- character
## means 12 -none- numeric
## covariances 48 -none- numeric
## covpooled 16 -none- numeric
## converged 1 -none- logical
## iter 1 -none- numeric
## terms 3 terms call
## xlevels 0 -none- list
# make predictions
predictions <- predict(fit, iris[,1:4])$class
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 1
## virginica 0 2 49
############################## Neural Network ####################################
#A Neural Network (NN) is a graph of computational units that receive inputs and transfer the result into an output that is passed on. The units are ordered into layers to connect the features of an input vector to the features of an output vector. With training, such as the Back-Propagation algorithm, neural networks can be designed and trained to model the underlying relationship in data.
# load the package
library(nnet)
data(iris)
# fit model
fit <- nnet(Species~., data=iris, size=4, decay=0.0001, maxit=500)
## # weights: 35
## initial value 182.809256
## iter 10 value 51.238627
## iter 20 value 5.925552
## iter 30 value 4.911488
## iter 40 value 4.739684
## iter 50 value 4.719001
## iter 60 value 4.637989
## iter 70 value 3.876958
## iter 80 value 3.518131
## iter 90 value 3.391245
## iter 100 value 3.267144
## iter 110 value 3.241626
## iter 120 value 3.150556
## iter 130 value 3.134623
## iter 140 value 3.107427
## iter 150 value 3.102760
## iter 160 value 3.094376
## iter 170 value 3.088984
## iter 180 value 3.084011
## iter 190 value 3.043167
## iter 200 value 2.539715
## iter 210 value 0.863158
## iter 220 value 0.613268
## iter 230 value 0.557568
## iter 240 value 0.529453
## iter 250 value 0.489126
## iter 260 value 0.446164
## iter 270 value 0.401867
## iter 280 value 0.393307
## iter 290 value 0.378308
## iter 300 value 0.373936
## iter 310 value 0.368782
## iter 320 value 0.366786
## iter 330 value 0.361877
## iter 340 value 0.360467
## iter 350 value 0.358882
## iter 360 value 0.357742
## iter 370 value 0.355657
## iter 380 value 0.351570
## iter 390 value 0.349251
## iter 400 value 0.348013
## iter 410 value 0.347497
## iter 420 value 0.347316
## iter 430 value 0.347097
## iter 440 value 0.346964
## iter 450 value 0.346816
## iter 460 value 0.346750
## iter 470 value 0.346662
## iter 480 value 0.346594
## iter 490 value 0.346567
## iter 500 value 0.346544
## final value 0.346544
## stopped after 500 iterations
# summarize the fit
summary(fit)
## a 4-4-3 network with 35 weights
## options were - softmax modelling decay=1e-04
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1
## 3.98 8.29 1.33 -10.43 -6.04
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2
## -0.57 -0.63 -1.93 3.14 1.24
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3
## -17.35 3.21 3.36 0.99 -10.75
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4
## 8.82 0.74 8.89 0.63 -24.34
## b->o1 h1->o1 h2->o1 h3->o1 h4->o1
## 2.06 2.65 -12.05 2.63 2.07
## b->o2 h1->o2 h2->o2 h3->o2 h4->o2
## -7.87 9.70 6.18 -16.47 14.13
## b->o3 h1->o3 h2->o3 h3->o3 h4->o3
## 5.81 -12.35 5.87 13.84 -16.20
# make predictions
predictions <- predict(fit, iris[,1:4], type="class")
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 50 0
## virginica 0 0 50
######################## Flexible Discriminant Analysis ##########################
# load the package
library(mda)
data(iris)
# fit model
fit <- fda(Species~., data=iris)
# summarize the fit
summary(fit)
## Length Class Mode
## percent.explained 2 -none- numeric
## values 2 -none- numeric
## means 6 -none- numeric
## theta.mod 4 -none- numeric
## dimension 1 -none- numeric
## prior 3 table numeric
## fit 5 polyreg list
## call 3 -none- call
## terms 3 terms call
## confusion 9 table numeric
# make predictions
predictions <- predict(fit, iris[,1:4])
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 1
## virginica 0 2 49
########################### Support Vector Machine ###############################
#Support Vector Machines (SVM) are a method that uses points in a transformed problem space that best separate classes into two groups. Classification for multiple classes is supported by a one-vs-all method. SVM also supports regression by modeling the function with a minimum amount of allowable error.
# load the package
library(kernlab)
data(iris)
# fit model
fit <- ksvm(Species~., data=iris)
# summarize the fit
summary(fit)
## Length Class Mode
## 1 ksvm S4
# make predictions
predictions <- predict(fit, iris[,1:4], type="response")
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 2
## virginica 0 2 48
################################# k-Nearest Neighbors ##############################
#The k-Nearest Neighbor (kNN) method makes predictions by locating similar cases to a given data instance (using a similarity function) and returning the average or majority of the most similar data instances.
# load the package
library(caret)
data(iris)
# fit model
fit <- knn3(Species~., data=iris, k=5)
# summarize the fit
summary(fit)
## Length Class Mode
## learn 2 -none- list
## k 1 -none- numeric
## terms 3 terms call
## xlevels 0 -none- list
## theDots 0 -none- list
# make predictions
predictions <- predict(fit, iris[,1:4], type="class")
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 47 2
## virginica 0 3 48
#################################### Naive Bayes ####################################
#Naive Bayes uses Bayes Theorem to model the conditional relationship of each attribute to the class variable.
# load the package
library(e1071)
## Warning: package 'e1071' was built under R version 3.3.3
data(iris)
# fit model
fit <- naiveBayes(Species~., data=iris)
# summarize the fit
summary(fit)
## Length Class Mode
## apriori 3 table numeric
## tables 4 -none- list
## levels 3 -none- character
## call 4 -none- call
# make predictions
predictions <- predict(fit, iris[,1:4])
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 47 3
## virginica 0 3 47