# load data
data(longley)
head(longley)
## GNP.deflator GNP Unemployed Armed.Forces Population Year Employed
## 1947 83.0 234.289 235.6 159.0 107.608 1947 60.323
## 1948 88.5 259.426 232.5 145.6 108.632 1948 61.122
## 1949 88.2 258.054 368.2 161.6 109.773 1949 60.171
## 1950 89.5 284.599 335.1 165.0 110.929 1950 61.187
## 1951 96.2 328.975 209.9 309.9 112.075 1951 63.221
## 1952 98.1 346.999 193.2 359.4 113.270 1952 63.639
dim(longley)
## [1] 16 7
str(longley)
## 'data.frame': 16 obs. of 7 variables:
## $ GNP.deflator: num 83 88.5 88.2 89.5 96.2 ...
## $ GNP : num 234 259 258 285 329 ...
## $ Unemployed : num 236 232 368 335 210 ...
## $ Armed.Forces: num 159 146 162 165 310 ...
## $ Population : num 108 109 110 111 112 ...
## $ Year : int 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 ...
## $ Employed : num 60.3 61.1 60.2 61.2 63.2 ...
library(party)
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
# fit model
fit <- lm(Employed~., longley)
# summarize the fit
summary(fit)
##
## Call:
## lm(formula = Employed ~ ., data = longley)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41011 -0.15767 -0.02816 0.10155 0.45539
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.482e+03 8.904e+02 -3.911 0.003560 **
## GNP.deflator 1.506e-02 8.492e-02 0.177 0.863141
## GNP -3.582e-02 3.349e-02 -1.070 0.312681
## Unemployed -2.020e-02 4.884e-03 -4.136 0.002535 **
## Armed.Forces -1.033e-02 2.143e-03 -4.822 0.000944 ***
## Population -5.110e-02 2.261e-01 -0.226 0.826212
## Year 1.829e+00 4.555e-01 4.016 0.003037 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3049 on 9 degrees of freedom
## Multiple R-squared: 0.9955, Adjusted R-squared: 0.9925
## F-statistic: 330.3 on 6 and 9 DF, p-value: 4.984e-10
# make predictions
predictions <- predict(fit, longley)
# summarize accuracy
rmse <- mean((longley$Employed - predictions)^2)
print(rmse)
## [1] 0.0522765
data(longley)
# fit model
base <- lm(Employed~., longley)
# summarize the fit
summary(base)
##
## Call:
## lm(formula = Employed ~ ., data = longley)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41011 -0.15767 -0.02816 0.10155 0.45539
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.482e+03 8.904e+02 -3.911 0.003560 **
## GNP.deflator 1.506e-02 8.492e-02 0.177 0.863141
## GNP -3.582e-02 3.349e-02 -1.070 0.312681
## Unemployed -2.020e-02 4.884e-03 -4.136 0.002535 **
## Armed.Forces -1.033e-02 2.143e-03 -4.822 0.000944 ***
## Population -5.110e-02 2.261e-01 -0.226 0.826212
## Year 1.829e+00 4.555e-01 4.016 0.003037 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3049 on 9 degrees of freedom
## Multiple R-squared: 0.9955, Adjusted R-squared: 0.9925
## F-statistic: 330.3 on 6 and 9 DF, p-value: 4.984e-10
# perform step-wise feature selection
fit <- step(base)
## Start: AIC=-33.22
## Employed ~ GNP.deflator + GNP + Unemployed + Armed.Forces + Population +
## Year
##
## Df Sum of Sq RSS AIC
## - GNP.deflator 1 0.00292 0.83935 -35.163
## - Population 1 0.00475 0.84117 -35.129
## - GNP 1 0.10631 0.94273 -33.305
## <none> 0.83642 -33.219
## - Year 1 1.49881 2.33524 -18.792
## - Unemployed 1 1.59014 2.42656 -18.178
## - Armed.Forces 1 2.16091 2.99733 -14.798
##
## Step: AIC=-35.16
## Employed ~ GNP + Unemployed + Armed.Forces + Population + Year
##
## Df Sum of Sq RSS AIC
## - Population 1 0.01933 0.8587 -36.799
## <none> 0.8393 -35.163
## - GNP 1 0.14637 0.9857 -34.592
## - Year 1 1.52725 2.3666 -20.578
## - Unemployed 1 2.18989 3.0292 -16.628
## - Armed.Forces 1 2.39752 3.2369 -15.568
##
## Step: AIC=-36.8
## Employed ~ GNP + Unemployed + Armed.Forces + Year
##
## Df Sum of Sq RSS AIC
## <none> 0.8587 -36.799
## - GNP 1 0.4647 1.3234 -31.879
## - Year 1 1.8980 2.7567 -20.137
## - Armed.Forces 1 2.3806 3.2393 -17.556
## - Unemployed 1 4.0491 4.9077 -10.908
# summarize the selected model
summary(fit)
##
## Call:
## lm(formula = Employed ~ GNP + Unemployed + Armed.Forces + Year,
## data = longley)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.42165 -0.12457 -0.02416 0.08369 0.45268
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.599e+03 7.406e+02 -4.859 0.000503 ***
## GNP -4.019e-02 1.647e-02 -2.440 0.032833 *
## Unemployed -2.088e-02 2.900e-03 -7.202 1.75e-05 ***
## Armed.Forces -1.015e-02 1.837e-03 -5.522 0.000180 ***
## Year 1.887e+00 3.828e-01 4.931 0.000449 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2794 on 11 degrees of freedom
## Multiple R-squared: 0.9954, Adjusted R-squared: 0.9937
## F-statistic: 589.8 on 4 and 11 DF, p-value: 9.5e-13
# make predictions
predictions <- predict(fit, longley)
# summarize accuracy
rmse <- mean((longley$Employed - predictions)^2)
print(rmse)
## [1] 0.05366753
# load the package
library(pls)
##
## Attaching package: 'pls'
## The following object is masked from 'package:stats':
##
## loadings
# load data
data(longley)
# fit model
fit <- pcr(Employed~., data=longley, validation="CV")
# summarize the fit
summary(fit)
## Data: X dimension: 16 6
## Y dimension: 16 1
## Fit method: svdpc
## Number of components considered: 6
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 3.627 2.000 1.279 0.5270 0.5821 0.6097 0.4383
## adjCV 3.627 1.954 1.269 0.5204 0.5708 0.5944 0.4251
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## X 64.96 94.90 99.99 100.00 100.00 100.00
## Employed 78.42 89.73 98.51 98.56 98.83 99.55
# make predictions
predictions <- predict(fit, longley, ncomp=6)
# summarize accuracy
rmse <- mean((longley$Employed - predictions)^2)
print(rmse)
## [1] 0.0522765
library(pls)
# load data
data(longley)
# fit model
fit <- plsr(Employed~., data=longley, validation="CV")
# summarize the fit
summary(fit)
## Data: X dimension: 16 6
## Y dimension: 16 1
## Fit method: kernelpls
## Number of components considered: 6
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 3.627 1.359 1.005 0.5241 0.6422 0.4946 0.4170
## adjCV 3.627 1.346 1.006 0.5184 0.6258 0.4877 0.4058
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## X 63.88 93.35 99.99 100.00 100.00 100.00
## Employed 87.91 93.70 98.51 98.65 99.16 99.55
# make predictions
predictions <- predict(fit, longley, ncomp=6)
# summarize accuracy
rmse <- mean((longley$Employed - predictions)^2)
print(rmse)
## [1] 0.0522765
# load the package
library(glmnet)
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-2
# load data
data(longley)
x <- as.matrix(longley[,1:6])
y <- as.matrix(longley[,7])
# fit model
fit <- glmnet(x, y, family="gaussian", alpha=0, lambda=0.001)
# summarize the fit
summary(fit)
## Length Class Mode
## a0 1 -none- numeric
## beta 6 dgCMatrix S4
## df 1 -none- numeric
## dim 2 -none- numeric
## lambda 1 -none- numeric
## dev.ratio 1 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## call 6 -none- call
## nobs 1 -none- numeric
# make predictions
predictions <- predict(fit, x, type="link")
# summarize accuracy
rmse <- mean((y - predictions)^2)
print(rmse)
## [1] 0.05919831
# load the package
library(lars)
## Loaded lars 1.2
# load data
data(longley)
x <- as.matrix(longley[,1:6])
y <- as.matrix(longley[,7])
# fit model
fit <- lars(x, y, type="lasso")
# summarize the fit
summary(fit)
## LARS/LASSO
## Call: lars(x = x, y = y, type = "lasso")
## Df Rss Cp
## 0 1 185.009 1976.7120
## 1 2 6.642 59.4712
## 2 3 3.883 31.7832
## 3 4 3.468 29.3165
## 4 5 1.563 10.8183
## 5 4 1.339 6.4068
## 6 5 1.024 5.0186
## 7 6 0.998 6.7388
## 8 7 0.907 7.7615
## 9 6 0.847 5.1128
## 10 7 0.836 7.0000
# select a step with a minimum error
best_step <- fit$df[which.min(fit$RSS)]
# make predictions
predictions <- predict(fit, x, s=best_step, type="fit")$fit
# summarize accuracy
rmse <- mean((y - predictions)^2)
print(rmse)
## [1] 0.06400169
library(glmnet)
# load data
data(longley)
x <- as.matrix(longley[,1:6])
y <- as.matrix(longley[,7])
# fit model
fit <- glmnet(x, y, family="gaussian", alpha=0.5, lambda=0.001)
# summarize the fit
summary(fit)
## Length Class Mode
## a0 1 -none- numeric
## beta 6 dgCMatrix S4
## df 1 -none- numeric
## dim 2 -none- numeric
## lambda 1 -none- numeric
## dev.ratio 1 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## call 6 -none- call
## nobs 1 -none- numeric
# make predictions
predictions <- predict(fit, x, type="link")
# summarize accuracy
rmse <- mean((y - predictions)^2)
print(rmse)
## [1] 0.0590839
# load the package
library(earth)
## Loading required package: plotmo
## Loading required package: plotrix
# load data
data(longley)
# fit model
fit <- earth(Employed~., longley)
# summarize the fit
summary(fit)
## Call: earth(formula=Employed~., data=longley)
##
## coefficients
## (Intercept) -1682.60259
## Year 0.89475
## h(293.6-Unemployed) 0.01226
## h(Unemployed-293.6) -0.01596
## h(Armed.Forces-263.7) -0.01470
##
## Selected 5 of 8 terms, and 3 of 6 predictors
## Termination condition: GRSq -Inf at 8 terms
## Importance: Year, Unemployed, Armed.Forces, GNP.deflator-unused, ...
## Number of terms at each degree of interaction: 1 4 (additive model)
## GCV 0.2389853 RSS 0.7318924 GRSq 0.9818348 RSq 0.996044
# summarize the importance of input variables
evimp(fit)
## nsubsets gcv rss
## Year 4 100.0 100.0
## Unemployed 3 24.1 23.0
## Armed.Forces 2 10.4 10.8
# make predictions
predictions <- predict(fit, longley)
# summarize accuracy
rmse <- mean((longley$Employed - predictions)^2)
print(rmse)
## [1] 0.04574327
# load the package
library(kernlab)
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:modeltools':
##
## prior
# load data
data(longley)
# fit model
fit <- ksvm(Employed~., longley)
## Using automatic sigma estimation (sigest) for RBF or laplace kernel
# summarize the fit
fit
## Support Vector Machine object of class "ksvm"
##
## SV type: eps-svr (regression)
## parameter : epsilon = 0.1 cost C = 1
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.160833268897214
##
## Number of Support Vectors : 11
##
## Objective Function Value : -2.5798
## Training error : 0.014709
# make predictions
predictions <- predict(fit, longley)
# summarize accuracy
rmse <- mean((longley$Employed - predictions)^2)
print(rmse)
## [1] 0.1814229
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:kernlab':
##
## alpha
##
## Attaching package: 'caret'
## The following object is masked from 'package:pls':
##
## R2
# load data
data(longley)
# fit model
fit <- knnreg(longley[,1:6], longley[,7], k=3)
# summarize the fit
summary(fit)
## Length Class Mode
## learn 2 -none- list
## k 1 -none- numeric
## call 4 -none- call
## theDots 0 -none- list
# make predictions
predictions <- predict(fit, longley[,1:6])
# summarize accuracy
rmse <- mean((longley$Employed - predictions)^2)
print(rmse)
## [1] 0.9259962
library(nnet)
# load data
data(longley)
x <- longley[,1:6]
y <- longley[,7]
# fit model
fit <- nnet(Employed~., longley, size=12, maxit=500, linout=T, decay=0.01)
## # weights: 97
## initial value 70506.309019
## iter 10 value 189.662539
## iter 20 value 96.455806
## iter 30 value 93.079369
## iter 40 value 53.913617
## iter 50 value 9.206336
## iter 60 value 8.522869
## iter 70 value 7.479730
## iter 80 value 6.297581
## iter 90 value 5.563048
## iter 100 value 5.553295
## iter 110 value 5.537381
## iter 120 value 5.415332
## iter 130 value 5.320461
## iter 140 value 5.256244
## iter 150 value 5.141417
## iter 160 value 5.065006
## iter 170 value 5.002899
## iter 180 value 4.981752
## iter 190 value 4.956449
## iter 200 value 4.935073
## iter 210 value 4.889367
## iter 220 value 4.847443
## iter 230 value 4.833111
## iter 240 value 4.822865
## iter 250 value 4.646989
## iter 260 value 4.566455
## iter 270 value 4.562012
## iter 280 value 4.559689
## iter 290 value 4.557778
## iter 300 value 4.554763
## iter 310 value 4.552587
## iter 320 value 4.550060
## iter 330 value 4.548885
## iter 340 value 4.547771
## iter 350 value 4.546701
## iter 360 value 4.546013
## iter 370 value 4.545408
## iter 380 value 4.545100
## iter 390 value 4.544830
## iter 400 value 4.544597
## iter 410 value 4.544399
## iter 420 value 4.544301
## iter 430 value 4.544163
## iter 440 value 4.543483
## iter 450 value 4.542653
## iter 460 value 4.542058
## iter 470 value 4.541588
## iter 480 value 4.540893
## iter 490 value 4.540489
## iter 500 value 4.540332
## final value 4.540332
## stopped after 500 iterations
# summarize the fit
summary(fit)
## a 6-12-1 network with 97 weights
## options were - linear output units decay=0.01
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1
## 0.00 0.01 0.06 0.02 0.01 0.00 -0.01
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.01
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3
## 0.00 -0.01 -0.13 -0.12 0.00 -0.01 0.06
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4
## 0.00 0.09 -0.08 -0.04 0.25 0.02 -0.01
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5
## 0.00 -0.01 -0.14 0.01 0.00 -0.01 0.04
## b->h6 i1->h6 i2->h6 i3->h6 i4->h6 i5->h6 i6->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.01
## b->h7 i1->h7 i2->h7 i3->h7 i4->h7 i5->h7 i6->h7
## 0.00 0.00 0.03 -0.02 -0.03 0.00 0.01
## b->h8 i1->h8 i2->h8 i3->h8 i4->h8 i5->h8 i6->h8
## 0.00 0.07 0.11 -0.01 -0.05 0.00 -0.01
## b->h9 i1->h9 i2->h9 i3->h9 i4->h9 i5->h9 i6->h9
## 0.00 0.05 0.04 -0.01 -0.01 0.14 -0.02
## b->h10 i1->h10 i2->h10 i3->h10 i4->h10 i5->h10 i6->h10
## 0.00 -0.84 0.04 0.01 0.01 0.54 0.00
## b->h11 i1->h11 i2->h11 i3->h11 i4->h11 i5->h11 i6->h11
## 0.00 0.00 0.01 0.01 0.00 0.00 0.02
## b->h12 i1->h12 i2->h12 i3->h12 i4->h12 i5->h12 i6->h12
## 0.00 0.00 0.00 0.00 0.00 0.00 0.01
## b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o h8->o h9->o
## 6.42 6.42 6.42 -0.32 6.42 -0.24 6.42 6.42 6.42 6.30
## h10->o h11->o h12->o
## 6.43 6.42 6.42
# make predictions
predictions <- predict(fit, x, type="raw")
# summarize accuracy
rmse <- mean((y - predictions)^2)
print(rmse)
## [1] 0.0002762231
# load the package
library(rpart)
# load data
data(longley)
# fit model
fit <- rpart(Employed~., data=longley, control=rpart.control(minsplit=5))
# summarize the fit
summary(fit)
## Call:
## rpart(formula = Employed ~ ., data = longley, control = rpart.control(minsplit = 5))
## n= 16
##
## CP nsplit rel error xerror xstd
## 1 0.78633969 0 1.00000000 1.1494573 0.24754421
## 2 0.11081853 1 0.21366031 0.4727667 0.12216905
## 3 0.06153007 2 0.10284178 0.3832298 0.11930441
## 4 0.01000000 3 0.04131171 0.2084462 0.07178521
##
## Variable importance
## GNP GNP.deflator Population Year Unemployed
## 19 19 19 19 12
## Armed.Forces
## 11
##
## Node number 1: 16 observations, complexity param=0.7863397
## mean=65.317, MSE=11.56305
## left son=2 (8 obs) right son=3 (8 obs)
## Primary splits:
## GNP.deflator < 100.6 to the left, improve=0.7863397, (0 missing)
## GNP < 381.427 to the left, improve=0.7863397, (0 missing)
## Population < 116.8035 to the left, improve=0.7863397, (0 missing)
## Year < 1954.5 to the left, improve=0.7863397, (0 missing)
## Armed.Forces < 208.2 to the left, improve=0.6143062, (0 missing)
## Surrogate splits:
## GNP < 381.427 to the left, agree=1.000, adj=1.000, (0 split)
## Population < 116.8035 to the left, agree=1.000, adj=1.000, (0 split)
## Year < 1954.5 to the left, agree=1.000, adj=1.000, (0 split)
## Unemployed < 258.9 to the left, agree=0.812, adj=0.625, (0 split)
## Armed.Forces < 208.2 to the left, agree=0.750, adj=0.500, (0 split)
##
## Node number 2: 8 observations, complexity param=0.1108185
## mean=62.30163, MSE=2.884251
## left son=4 (4 obs) right son=5 (4 obs)
## Primary splits:
## GNP.deflator < 92.85 to the left, improve=0.8885499, (0 missing)
## Population < 111.502 to the left, improve=0.8885499, (0 missing)
## Year < 1950.5 to the left, improve=0.8885499, (0 missing)
## GNP < 306.787 to the left, improve=0.8885499, (0 missing)
## Armed.Forces < 237.45 to the left, improve=0.8885499, (0 missing)
## Surrogate splits:
## GNP < 306.787 to the left, agree=1.000, adj=1.00, (0 split)
## Armed.Forces < 237.45 to the left, agree=1.000, adj=1.00, (0 split)
## Population < 111.502 to the left, agree=1.000, adj=1.00, (0 split)
## Year < 1950.5 to the left, agree=1.000, adj=1.00, (0 split)
## Unemployed < 221.2 to the right, agree=0.875, adj=0.75, (0 split)
##
## Node number 3: 8 observations, complexity param=0.06153007
## mean=68.33237, MSE=2.05688
## left son=6 (4 obs) right son=7 (4 obs)
## Primary splits:
## GNP.deflator < 111.7 to the left, improve=0.6918007, (0 missing)
## GNP < 463.625 to the left, improve=0.6918007, (0 missing)
## Population < 122.658 to the left, improve=0.6918007, (0 missing)
## Year < 1958.5 to the left, improve=0.6918007, (0 missing)
## Armed.Forces < 284.2 to the right, improve=0.3150859, (0 missing)
## Surrogate splits:
## GNP < 463.625 to the left, agree=1.000, adj=1.00, (0 split)
## Population < 122.658 to the left, agree=1.000, adj=1.00, (0 split)
## Year < 1958.5 to the left, agree=1.000, adj=1.00, (0 split)
## Unemployed < 337.45 to the left, agree=0.875, adj=0.75, (0 split)
## Armed.Forces < 260.45 to the right, agree=0.875, adj=0.75, (0 split)
##
## Node number 4: 4 observations
## mean=60.70075, MSE=0.2093052
##
## Node number 5: 4 observations
## mean=63.9025, MSE=0.4335948
##
## Node number 6: 4 observations
## mean=67.1395, MSE=0.8056747
##
## Node number 7: 4 observations
## mean=69.52525, MSE=0.4621832
library(rpart.plot)
rpart.plot(fit, main="Classification and Regression Trees")
library(rattle)
## Rattle: A free graphical interface for data mining with R.
## Version 3.4.1 Copyright (c) 2006-2014 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
fancyRpartPlot(fit)
# make predictions
predictions <- predict(fit, longley[,1:6])
# summarize accuracy
rmse <- mean((longley$Employed - predictions)^2)
print(rmse)
## [1] 0.4776895
# load the package
library(party)
# load data
data(longley)
# fit model
fit <- ctree(Employed~., data=longley, controls=ctree_control(minsplit=2,minbucket=2,testtype="Univariate"))
# summarize the fit
summary(fit)
## Length Class Mode
## 1 BinaryTree S4
plot(fit)
# make predictions
predictions <- predict(fit, longley[,1:6])
# summarize accuracy
rmse <- mean((longley$Employed - predictions)^2)
print(rmse)
## [1] 0.4776895
# load the package
library(RWeka)
# load data
data(longley)
# fit model
fit <- M5P(Employed~., data=longley)
# summarize the fit
summary(fit)
##
## === Summary ===
##
## Correlation coefficient 0.9709
## Mean absolute error 0.6682
## Root mean squared error 0.8144
## Relative absolute error 22.16 %
## Root relative squared error 23.9491 %
## Total Number of Instances 16
# make predictions
predictions <- predict(fit, longley[,1:6])
# summarize accuracy
rmse <- mean((longley$Employed - predictions)^2)
print(rmse)
## [1] 0.663211
library(RWeka)
# load data
data(longley)
# fit model
fit <- M5Rules(Employed~., data=longley)
# summarize the fit
summary(fit)
##
## === Summary ===
##
## Correlation coefficient 0.9709
## Mean absolute error 0.6682
## Root mean squared error 0.8144
## Relative absolute error 22.16 %
## Root relative squared error 23.9491 %
## Total Number of Instances 16
# make predictions
predictions <- predict(fit, longley[,1:6])
# summarize accuracy
rmse <- mean((longley$Employed - predictions)^2)
print(rmse)
## [1] 0.663211
library(ipred)
# load data
data(longley)
# fit model
fit <- bagging(Employed~., data=longley, control=rpart.control(minsplit=5))
# summarize the fit
summary(fit)
## Length Class Mode
## y 16 -none- numeric
## X 6 data.frame list
## mtrees 25 -none- list
## OOB 1 -none- logical
## comb 1 -none- logical
## call 4 -none- call
# make predictions
predictions <- predict(fit, longley[,1:6])
# summarize accuracy
rmse <- mean((longley$Employed - predictions)^2)
print(rmse)
## [1] 0.3621898
# load the package
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
# load data
data(longley)
# fit model
fit <- randomForest(Employed~., data=longley)
# summarize the fit
summary(fit)
## Length Class Mode
## call 3 -none- call
## type 1 -none- character
## predicted 16 -none- numeric
## mse 500 -none- numeric
## rsq 500 -none- numeric
## oob.times 16 -none- numeric
## importance 6 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 11 -none- list
## coefs 0 -none- NULL
## y 16 -none- numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## terms 3 terms call
plot(fit)
# make predictions
predictions <- predict(fit, longley[,1:6])
# summarize accuracy
rmse <- mean((longley$Employed - predictions)^2)
print(rmse)
## [1] 0.3158921
# load the package
library(gbm)
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
##
## cluster
## Loading required package: splines
## Loading required package: parallel
## Loaded gbm 2.1.1
# load data
data(longley)
# fit model
#fit <- gbm(Employed~., data=longley, distribution="gaussian")
### Error in gbm.fit(x, y, offset = offset, distribution = distribution, w = w, : The dataset size is too small or subsampling rate is too large: nTrain*bag.fraction <= n.minobsinnode
# summarize the fit
#summary(fit)
# make predictions
##predictions <- predict(fit, longley)
# summarize accuracy
#3rmse <- mean((longley$Employed - predictions)^2)
##print(rmse)
library(Cubist)
# load data
data(longley)
# fit model
fit <- cubist(longley[,1:6], longley[,7])
# summarize the fit
summary(fit)
##
## Call:
## cubist.default(x = longley[, 1:6], y = longley[, 7])
##
##
## Cubist [Release 2.07 GPL Edition] Wed Apr 13 09:32:24 2016
## ---------------------------------
##
## Target attribute `outcome'
##
## Read 16 cases (7 attributes) from undefined.data
##
## Model:
##
## Rule 1: [16 cases, mean 65.3170, range 60.171 to 70.551, est err 0.4828]
##
## outcome = 53.3637 + 0.0408 GNP - 0.008 Unemployed - 0.0048 Armed.Forces
##
##
## Evaluation on training data (16 cases):
##
## Average |error| 0.3881
## Relative |error| 0.13
## Correlation coefficient 0.99
##
##
## Attribute usage:
## Conds Model
##
## 100% GNP
## 100% Unemployed
## 100% Armed.Forces
##
##
## Time: 0.0 secs
# make predictions
predictions <- predict(fit, longley[,1:6])
# summarize accuracy
rmse <- mean((longley$Employed - predictions)^2)
print(rmse)
## [1] 0.1757807
# load the package
library(VGAM)
##
## Attaching package: 'VGAM'
## The following object is masked from 'package:rattle':
##
## wine
## The following object is masked from 'package:caret':
##
## predictors
## The following object is masked from 'package:kernlab':
##
## nvar
# load data
data(iris)
# fit model
fit <- vglm(Species~., family=multinomial, data=iris)
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 2 elements replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 13 elements replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 22 elements replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 34 elements replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 39 elements replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 41 elements replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 47 elements replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 50 elements replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 54 elements replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 59 elements replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 63 elements replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 78 elements replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 91 elements replaced by 1.819e-12
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 101 elements replaced by 1.819e-12
## Warning in slot(family, "linkinv")(eta, extra): fitted probabilities
## numerically 0 or 1 occurred
## Warning in tfun(mu = mu, y = y, w = w, res = FALSE, eta = eta, extra):
## fitted values close to 0 or 1
## Warning in checkwz(wz, M = M, trace = trace, wzepsilon = control
## $wzepsilon): 121 elements replaced by 1.819e-12
## Warning in slot(family, "linkinv")(eta, extra): fitted probabilities
## numerically 0 or 1 occurred
## Warning in tfun(mu = mu, y = y, w = w, res = FALSE, eta = eta, extra):
## fitted values close to 0 or 1
## Warning in vglm.fitter(x = x, y = y, w = w, offset = offset, Xm2 = Xm2, :
## convergence not obtained in 21 iterations
# summarize the fit
summary(fit)
##
## Call:
## vglm(formula = Species ~ ., family = multinomial, data = iris)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## log(mu[,1]/mu[,3]) -2.09e-06 -1.787e-07 4.586e-08 6.329e-08 1.327e-05
## log(mu[,2]/mu[,3]) -1.97e+00 -3.382e-04 3.159e-07 4.569e-04 2.560e+00
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 34.243 42494.920 0.001 0.9994
## (Intercept):2 42.638 25.708 1.659 0.0972 .
## Sepal.Length:1 10.747 12615.952 0.001 0.9993
## Sepal.Length:2 2.465 2.394 1.030 0.3032
## Sepal.Width:1 12.815 5841.307 0.002 0.9982
## Sepal.Width:2 6.681 4.480 1.491 0.1359
## Petal.Length:1 -25.043 8946.662 -0.003 0.9978
## Petal.Length:2 -9.429 4.737 -1.990 0.0465 *
## Petal.Width:1 -36.060 14050.767 -0.003 0.9980
## Petal.Width:2 -18.286 9.743 -1.877 0.0605 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 2
##
## Names of linear predictors: log(mu[,1]/mu[,3]), log(mu[,2]/mu[,3])
##
## Dispersion Parameter for multinomial family: 1
##
## Residual deviance: 11.8985 on 290 degrees of freedom
##
## Log-likelihood: -5.9493 on 290 degrees of freedom
##
## Number of iterations: 21
# make predictions
probabilities <- predict(fit, iris[,1:4], type="response")
## Warning in object@family@linkinv(predictor, extra): fitted probabilities
## numerically 0 or 1 occurred
predictions <- apply(probabilities, 1, which.max)
predictions[which(predictions=="1")] <- levels(iris$Species)[1]
predictions[which(predictions=="2")] <- levels(iris$Species)[2]
predictions[which(predictions=="3")] <- levels(iris$Species)[3]
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 49 1
## virginica 0 1 49
# load the package
library(MASS)
data(iris)
# fit model
fit <- lda(Species~., data=iris)
# summarize the fit
summary(fit)
## Length Class Mode
## prior 3 -none- numeric
## counts 3 -none- numeric
## means 12 -none- numeric
## scaling 8 -none- numeric
## lev 3 -none- character
## svd 2 -none- numeric
## N 1 -none- numeric
## call 3 -none- call
## terms 3 terms call
## xlevels 0 -none- list
# make predictions
predictions <- predict(fit, iris[,1:4])$class
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 1
## virginica 0 2 49
# load the package
library(caret)
data(iris)
x <- iris[,1:4]
y <- iris[,5]
# fit model
fit <- plsda(x, y, probMethod="Bayes")
# summarize the fit
fit
## Partial least squares classification, fitted with the kernel algorithm.
## Bayes rule was used to compute class probabilities.
##
## Call:
## plsr(formula = y ~ x, ncomp = ncomp, data = tmpData)
# make predictions
predictions <- predict(fit, iris[,1:4])
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 45 3
## virginica 0 5 47
# load the package
library(mda)
## Loading required package: class
## Loaded mda 0.4-4
data(iris)
# fit model
fit <- mda(Species~., data=iris)
# summarize the fit
summary(fit)
## Length Class Mode
## percent.explained 4 -none- numeric
## values 8 -none- numeric
## means 36 -none- numeric
## theta.mod 32 -none- numeric
## dimension 1 -none- numeric
## sub.prior 3 -none- list
## fit 5 polyreg list
## call 3 -none- call
## weights 3 -none- list
## prior 3 table numeric
## assign.theta 3 -none- list
## deviance 1 -none- numeric
## confusion 9 -none- numeric
## terms 3 terms call
# make predictions
predictions <- predict(fit, iris[,1:4])
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 0
## virginica 0 2 50
# load the package
library(MASS)
data(iris)
# fit model
fit <- qda(Species~., data=iris)
# summarize the fit
summary(fit)
## Length Class Mode
## prior 3 -none- numeric
## counts 3 -none- numeric
## means 12 -none- numeric
## scaling 48 -none- numeric
## ldet 3 -none- numeric
## lev 3 -none- character
## N 1 -none- numeric
## call 3 -none- call
## terms 3 terms call
## xlevels 0 -none- list
# make predictions
predictions <- predict(fit, iris[,1:4])$class
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 1
## virginica 0 2 49
# load the package
library(MASS)
data(iris)
# fit model
fit <- qda(Species~., data=iris)
# summarize the fit
summary(fit)
## Length Class Mode
## prior 3 -none- numeric
## counts 3 -none- numeric
## means 12 -none- numeric
## scaling 48 -none- numeric
## ldet 3 -none- numeric
## lev 3 -none- character
## N 1 -none- numeric
## call 3 -none- call
## terms 3 terms call
## xlevels 0 -none- list
# make predictions
predictions <- predict(fit, iris[,1:4])$class
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 1
## virginica 0 2 49
library(nnet)
data(iris)
# fit model
fit <- nnet(Species~., data=iris, size=4, decay=0.0001, maxit=500)
## # weights: 35
## initial value 217.481057
## iter 10 value 28.384743
## iter 20 value 8.137154
## iter 30 value 6.584185
## iter 40 value 6.487354
## iter 50 value 6.419851
## iter 60 value 6.351273
## iter 70 value 6.308211
## iter 80 value 6.272718
## iter 90 value 6.256841
## iter 100 value 6.151996
## iter 110 value 6.007086
## iter 120 value 5.959630
## iter 130 value 5.922555
## iter 140 value 5.890119
## iter 150 value 5.884876
## iter 160 value 5.873360
## iter 170 value 5.854814
## iter 180 value 5.850713
## iter 190 value 5.846126
## iter 200 value 5.844268
## iter 210 value 5.811941
## iter 220 value 5.445049
## iter 230 value 4.193766
## iter 240 value 3.545877
## iter 250 value 3.456726
## iter 260 value 3.390721
## iter 270 value 3.358434
## iter 280 value 3.240888
## iter 290 value 3.183066
## iter 300 value 3.141638
## iter 310 value 3.128221
## iter 320 value 3.090710
## iter 330 value 3.073255
## iter 340 value 3.067029
## iter 350 value 3.060900
## iter 360 value 3.056494
## iter 370 value 3.051524
## iter 380 value 3.040791
## iter 390 value 3.007168
## iter 400 value 2.197288
## iter 410 value 0.841178
## iter 420 value 0.643786
## iter 430 value 0.609216
## iter 440 value 0.582012
## iter 450 value 0.521656
## iter 460 value 0.502593
## iter 470 value 0.494683
## iter 480 value 0.490777
## iter 490 value 0.484214
## iter 500 value 0.477706
## final value 0.477706
## stopped after 500 iterations
# summarize the fit
summary(fit)
## Neural Network build options: softmax modelling;decay=1e-04.
##
## In the following table:
## b represents the bias associated with a node
## h1 represents hidden layer node 1
## i1 represents input node 1 (i.e., input variable 1)
## o represents the output node
##
## Weights for node h1:
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1
## 32.05 -1.21 1.62 -8.33 8.75
##
## Weights for node h2:
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2
## 0.09 -0.12 0.02 -0.21 -0.37
##
## Weights for node h3:
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3
## -13.12 -0.89 -7.53 -1.47 27.19
##
## Weights for node h4:
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4
## 0.23 0.82 1.82 -3.08 -1.69
##
## Weights for node o1:
## b->o1 h1->o1 h2->o1 h3->o1 h4->o1
## -4.44 1.64 0.33 -2.40 11.93
##
## Weights for node o2:
## b->o2 h1->o2 h2->o2 h3->o2 h4->o2
## -6.80 17.66 -0.16 -17.22 -11.83
##
## Weights for node o3:
## b->o3 h1->o3 h2->o3 h3->o3 h4->o3
## 11.24 -19.31 -0.17 19.62 -0.09
# make predictions
predictions <- predict(fit, iris[,1:4], type="class")
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 50 0
## virginica 0 0 50
library(mda)
data(iris)
# fit model
fit <- fda(Species~., data=iris)
# summarize the fit
summary(fit)
## Length Class Mode
## percent.explained 2 -none- numeric
## values 2 -none- numeric
## means 6 -none- numeric
## theta.mod 4 -none- numeric
## dimension 1 -none- numeric
## prior 3 table numeric
## fit 5 polyreg list
## call 3 -none- call
## terms 3 terms call
## confusion 9 -none- numeric
# make predictions
predictions <- predict(fit, iris[,1:4])
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 1
## virginica 0 2 49
library(kernlab)
data(iris)
# fit model
fit <- ksvm(Species~., data=iris)
## Using automatic sigma estimation (sigest) for RBF or laplace kernel
# summarize the fit
summary(fit)
## Length Class Mode
## 1 ksvm S4
# make predictions
predictions <- predict(fit, iris[,1:4], type="response")
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 2
## virginica 0 2 48
library(caret)
data(iris)
# fit model
fit <- knn3(Species~., data=iris, k=5)
# summarize the fit
summary(fit)
## Length Class Mode
## learn 2 -none- list
## k 1 -none- numeric
## terms 3 terms call
## call 4 -none- call
## xlevels 0 -none- list
## theDots 0 -none- list
# make predictions
predictions <- predict(fit, iris[,1:4], type="class")
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 47 2
## virginica 0 3 48
# load the package
library(e1071)
data(iris)
# fit model
fit <- naiveBayes(Species~., data=iris)
# summarize the fit
summary(fit)
## Length Class Mode
## apriori 3 table numeric
## tables 4 -none- list
## levels 3 -none- character
## call 4 -none- call
# make predictions
predictions <- predict(fit, iris[,1:4])
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 47 3
## virginica 0 3 47
# load the package
library(rpart)
# load data
data(iris)
dim(iris)
## [1] 150 5
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
# fit model
fit <- rpart(Species~., data=iris)
# summarize the fit
summary(fit)
## Call:
## rpart(formula = Species ~ ., data = iris)
## n= 150
##
## CP nsplit rel error xerror xstd
## 1 0.50 0 1.00 1.14 0.05230679
## 2 0.44 1 0.50 0.61 0.06016090
## 3 0.01 2 0.06 0.09 0.02908608
##
## Variable importance
## Petal.Width Petal.Length Sepal.Length Sepal.Width
## 34 31 21 14
##
## Node number 1: 150 observations, complexity param=0.5
## predicted class=setosa expected loss=0.6666667 P(node) =1
## class counts: 50 50 50
## probabilities: 0.333 0.333 0.333
## left son=2 (50 obs) right son=3 (100 obs)
## Primary splits:
## Petal.Length < 2.45 to the left, improve=50.00000, (0 missing)
## Petal.Width < 0.8 to the left, improve=50.00000, (0 missing)
## Sepal.Length < 5.45 to the left, improve=34.16405, (0 missing)
## Sepal.Width < 3.35 to the right, improve=19.03851, (0 missing)
## Surrogate splits:
## Petal.Width < 0.8 to the left, agree=1.000, adj=1.00, (0 split)
## Sepal.Length < 5.45 to the left, agree=0.920, adj=0.76, (0 split)
## Sepal.Width < 3.35 to the right, agree=0.833, adj=0.50, (0 split)
##
## Node number 2: 50 observations
## predicted class=setosa expected loss=0 P(node) =0.3333333
## class counts: 50 0 0
## probabilities: 1.000 0.000 0.000
##
## Node number 3: 100 observations, complexity param=0.44
## predicted class=versicolor expected loss=0.5 P(node) =0.6666667
## class counts: 0 50 50
## probabilities: 0.000 0.500 0.500
## left son=6 (54 obs) right son=7 (46 obs)
## Primary splits:
## Petal.Width < 1.75 to the left, improve=38.969400, (0 missing)
## Petal.Length < 4.75 to the left, improve=37.353540, (0 missing)
## Sepal.Length < 6.15 to the left, improve=10.686870, (0 missing)
## Sepal.Width < 2.45 to the left, improve= 3.555556, (0 missing)
## Surrogate splits:
## Petal.Length < 4.75 to the left, agree=0.91, adj=0.804, (0 split)
## Sepal.Length < 6.15 to the left, agree=0.73, adj=0.413, (0 split)
## Sepal.Width < 2.95 to the left, agree=0.67, adj=0.283, (0 split)
##
## Node number 6: 54 observations
## predicted class=versicolor expected loss=0.09259259 P(node) =0.36
## class counts: 0 49 5
## probabilities: 0.000 0.907 0.093
##
## Node number 7: 46 observations
## predicted class=virginica expected loss=0.02173913 P(node) =0.3066667
## class counts: 0 1 45
## probabilities: 0.000 0.022 0.978
library(rpart.plot)
rpart.plot(fit, main="Classification and Regression Trees")
library(rattle)
fancyRpartPlot(fit)
# make predictions
predictions <- predict(fit, iris[,1:4], type="class")
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 49 5
## virginica 0 1 45
library(RWeka)
# load data
data(iris)
# fit model
fit <- J48(Species~., data=iris)
# summarize the fit
summary(fit)
##
## === Summary ===
##
## Correctly Classified Instances 147 98 %
## Incorrectly Classified Instances 3 2 %
## Kappa statistic 0.97
## Mean absolute error 0.0233
## Root mean squared error 0.108
## Relative absolute error 5.2482 %
## Root relative squared error 22.9089 %
## Coverage of cases (0.95 level) 98.6667 %
## Mean rel. region size (0.95 level) 34 %
## Total Number of Instances 150
##
## === Confusion Matrix ===
##
## a b c <-- classified as
## 50 0 0 | a = setosa
## 0 49 1 | b = versicolor
## 0 2 48 | c = virginica
# make predictions
predictions <- predict(fit, iris[,1:4])
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 49 2
## virginica 0 1 48
library(RWeka)
# load data
data(iris)
# fit model
fit <- PART(Species~., data=iris)
# summarize the fit
summary(fit)
##
## === Summary ===
##
## Correctly Classified Instances 146 97.3333 %
## Incorrectly Classified Instances 4 2.6667 %
## Kappa statistic 0.96
## Mean absolute error 0.0338
## Root mean squared error 0.1301
## Relative absolute error 7.6122 %
## Root relative squared error 27.5902 %
## Coverage of cases (0.95 level) 99.3333 %
## Mean rel. region size (0.95 level) 44.8889 %
## Total Number of Instances 150
##
## === Confusion Matrix ===
##
## a b c <-- classified as
## 50 0 0 | a = setosa
## 0 47 3 | b = versicolor
## 0 1 49 | c = virginica
# make predictions
predictions <- predict(fit, iris[,1:4])
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 47 1
## virginica 0 3 49
library(ipred)
# load data
data(iris)
# fit model
fit <- bagging(Species~., data=iris)
# summarize the fit
## summary(fit) [Too big]
# make predictions
predictions <- predict(fit, iris[,1:4], type="class")
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 50 0
## virginica 0 0 50
library(randomForest)
# load data
data(iris)
# fit model
fit <- randomForest(Species~., data=iris)
# summarize the fit
summary(fit)
## Length Class Mode
## call 3 -none- call
## type 1 -none- character
## predicted 150 factor numeric
## err.rate 2000 -none- numeric
## confusion 12 -none- numeric
## votes 450 matrix numeric
## oob.times 150 -none- numeric
## classes 3 -none- character
## importance 4 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 150 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## terms 3 terms call
# make predictions
predictions <- predict(fit, iris[,1:4])
# summarize accuracy
table(predictions, iris$Species)
##
## predictions setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 50 0
## virginica 0 0 50