data(iris)
library(ggplot2)
library(caret)
## Loading required package: lattice
inTrain<- createDataPartition(y=iris$Species,p=0.7,list=FALSE)
training<- iris[inTrain,]
testing<- iris[-inTrain,]
kMeans1<- kmeans(subset(training,select=-c(Species)),center=3)
training$clusters<- as.factor(kMeans1$cluster)
qplot(Petal.Width,Petal.Length,col=clusters,data=training)
#Compare to real labels
table(kMeans1$cluster,training$Species)
##
## setosa versicolor virginica
## 1 0 34 8
## 2 35 0 0
## 3 0 1 27
modFit<- train(clusters~.,data=subset(training,select=-c(Species)),method="rpart")
## Loading required package: rpart
table(predict(modFit,training),training$Species)
##
## setosa versicolor virginica
## 1 0 35 9
## 2 35 0 0
## 3 0 0 26
#On the training set, there is diversity on prediction of virginica class
testClusterPred<- predict(modFit,testing)
table(testClusterPred,testing$Species)
##
## testClusterPred setosa versicolor virginica
## 1 0 15 7
## 2 15 0 0
## 3 0 0 8
#Simiarly, the prediction on virginica does not perform well, but the other two class are good