Q1
library(caret); library(pgmm); library(rpart); library(rpart.plot); library(ElemStatLearn); library(AppliedPredictiveModeling);library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
data("segmentationOriginal")
#segmentationOriginal
inTrain <- createDataPartition(y = segmentationOriginal$Class, p = .6, list = FALSE)
training <- segmentationOriginal[inTrain, ]
testing <- segmentationOriginal[-inTrain, ]
set.seed(125)
modelFit <- train(Class~., data = training, method = 'rpart')
a = data.frame(TotalIntench2 = 23000, FiberWidthCh1 = 10, PerimStatusCh1=2)
b = data.frame(TotalIntench2 = 50000, FiberWidthCh1 = 10, VarIntenCh4 = 100)
c = data.frame(TotalIntench2 = 57000, FiberWidthCh1 = 8, VarIntenCh4 = 100)
d = data.frame(FiberWidthCh1 = 8, VarIntenCh4 = 100, PerimStatusCh1=2)
library(rpart.plot)
rpart.plot(modelFit$finalModel)

Q3
data("olive")
head(olive)
## Region Area Palmitic Palmitoleic Stearic Oleic Linoleic Linolenic
## 1 1 1 1075 75 226 7823 672 36
## 2 1 1 1088 73 224 7709 781 31
## 3 1 1 911 54 246 8113 549 31
## 4 1 1 966 57 240 7952 619 50
## 5 1 1 1051 67 259 7771 672 50
## 6 1 1 911 49 268 7924 678 51
## Arachidic Eicosenoic
## 1 60 29
## 2 61 29
## 3 63 29
## 4 78 35
## 5 80 46
## 6 70 44
modelFit <- train(Area~., data = olive, method = 'rpart')
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.
newdata = as.data.frame(t(colMeans(olive)))
predict(modelFit$finalModel, newdata)
## 1
## 2.783282
Q4
library(ElemStatLearn)
data(SAheart)
set.seed(8484)
#SAheart = SAheart[, aa]
train = sample(1:dim(SAheart)[1],size=dim(SAheart)[1]/2,replace=F)
trainSA = SAheart[train,]
testSA = SAheart[-train,]
set.seed(13234)
modelFit <- train(chd ~ age + alcohol + obesity + tobacco + typea + ldl, data = trainSA, method = 'glm', family = 'binomial')
## Warning in train.default(x, y, weights = w, ...): You are trying to do
## regression and your outcome only has two possible values Are you trying to
## do classification? If so, use a 2 level factor as your outcome column.
missClass = function(values,prediction){sum(((prediction > 0.5)*1) != values)/length(values)}
trainSA.P = predict(modelFit$finalModel, newdata = trainSA)
testSA.p = predict(modelFit$finalModel, newdata = testSA)
missClass(trainSA$chd, trainSA.P)
## [1] 0.2597403
missClass(testSA$chd, testSA.p)
## [1] 0.2943723
Q5
library(ElemStatLearn)
data(vowel.train)
data(vowel.test)
vowel.train$y = as.factor(vowel.train$y)
vowel.test$y = as.factor(vowel.test$y)
set.seed(33833)
modelFit <- randomForest(y~., data = vowel.train)
re = varImp(modelFit)
order(re)
## [1] 10 7 3 9 4 8 6 5 1 2