AppliedPredictiveModeling: v1.1.6
caret: v6.0.47 used:6.0.76
ElemStatLearn: v2012.04-0 used: 2015.6.26 pgmm: v1.1 used: 1.2
rpart: v4.1.8 used: 4.1.10
library(AppliedPredictiveModeling)
data(segmentationOriginal)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
#names(segmentationOriginal)
# you may use subset to split the file.It will create training and test with 50% of the occurences each.
subset<-split(segmentationOriginal, segmentationOriginal$Case)
dim(subset$Train)
## [1] 1009 119
dim(subset$Test)
## [1] 1010 119
set.seed(125)
modFit<- train(Class ~ ., method="rpart", data=subset$Train)
## Loading required package: rpart
modFit$finalModel
## n= 1009
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 1009 373 PS (0.63032706 0.36967294)
## 2) TotalIntenCh2< 45323.5 454 34 PS (0.92511013 0.07488987) *
## 3) TotalIntenCh2>=45323.5 555 216 WS (0.38918919 0.61081081)
## 6) FiberWidthCh1< 9.673245 154 47 PS (0.69480519 0.30519481) *
## 7) FiberWidthCh1>=9.673245 401 109 WS (0.27182045 0.72817955) *
library(rattle)
## Rattle: A free graphical interface for data science with R.
## Version 5.1.0 Copyright (c) 2006-2017 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
fancyRpartPlot(modFit$finalModel)
a. TotalIntench2 = 23,000; FiberWidthCh1 = 10; PerimStatusCh1=2
b. TotalIntench2 = 50,000; FiberWidthCh1 = 10;VarIntenCh4 = 100
c. TotalIntench2 = 57,000; FiberWidthCh1 = 8;VarIntenCh4 = 100
d. FiberWidthCh1 = 8;VarIntenCh4 = 100; PerimStatusCh1=2
a. PS
b. WS
c. PS
d. Not possible to predict
- PS
- Not possible to predict
- PS
- WS
- PS
- WS
- PS
- WS
- Not possible to predict
- WS
- PS
- PS
library(pgmm)
data(olive)
olive = olive[,-1]
newdata = as.data.frame(t(colMeans(olive)))
modOlive<-rpart(Area ~., data=olive)
modOlive
## n= 572
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 572 3171.32000 4.599650
## 2) Eicosenoic>=6.5 323 176.82970 2.783282
## 4) Oleic>=7770.5 19 16.10526 1.315789 *
## 5) Oleic< 7770.5 304 117.25000 2.875000 *
## 3) Eicosenoic< 6.5 249 546.51410 6.955823
## 6) Linoleic>=1053.5 98 21.88776 5.336735 *
## 7) Linoleic< 1053.5 151 100.99340 8.006623
## 14) Oleic< 7895 95 23.72632 7.515789 *
## 15) Oleic>=7895 56 15.55357 8.839286 *
predict(modOlive, newdata)
## 1
## 2.875
library(ElemStatLearn)
data(SAheart)
set.seed(8484)
train = sample(1:dim(SAheart)[1],size=dim(SAheart)[1]/2,replace=F)
trainSA = SAheart[train,]
testSA = SAheart[-train,]
missClass = function(values,prediction){sum(((prediction > 0.5)*1) != values)/length(values)}
set.seed(13234)
fit<-train(chd ~ age+alcohol+obesity+tobacco+typea+ldl, data=trainSA, method="glm", family="binomial")
## Warning in train.default(x, y, weights = w, ...): You are trying to do
## regression and your outcome only has two possible values Are you trying to
## do classification? If so, use a 2 level factor as your outcome column.
predictTrainSA<-predict(fit)
missClass(trainSA$chd, predictTrainSA)
## [1] 0.2727273
predictTestSA<-predict(fit,testSA)
missClass(testSA$chd,predictTestSA)
## [1] 0.3116883
Training Set: 0.31
Training Set: 0.31
Training Set: 0.31
library(ElemStatLearn)
data(vowel.train)
data(vowel.test)
vowel.train$y<-as.factor(vowel.train$y)
vowel.test$y<-as.factor(vowel.test$y)
set.seed(33833)
library(caret)
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:rattle':
##
## importance
## The following object is masked from 'package:ggplot2':
##
## margin
vowelModel<-randomForest(y ~ ., data=vowel.train)
order(varImp(vowelModel), decreasing=T)
## [1] 2 1 5 6 8 4 9 3 7 10