setwd(“C:/Users/pullannagari.manasa/Desktop”) irisData <- read.csv(“C:/Users/pullannagari.manasa/Desktop/irisData.csv”)
str(irisData)
table(irisData$class)
hist(irisData$sepal.length.in.cm.)
hist(irisData$sepal.width.in.cm)
hist(irisData$petal.length.in.cm)
hist(irisData$petal.width.in.cm)
plot(irisData$petal.width.in.cm)
install.packages(“pastecs”)
library(pastecs)
stat.desc(irisData)
stat.desc(irisData[,c(“sepal.length.in.cm.”,“sepal.width.in.cm”,“petal.length.in.cm”,“petal.width.in.cm”)])
boxplot(irisData)
x <- irisData$sepal.width.in.cm
qnt <- quantile(x, probs=c(.25, .75), na.rm = T)
caps <- quantile(x, probs=c(.05, .95), na.rm = T)
H <- 1.5 * IQR(x, na.rm = T)
x[x < (qnt[1] - H)] <- caps[1]
x[x > (qnt[2] + H)] <- caps[2] hist(x)
irisData$sepal.width.in.cm<-x
| ### Missing Value Imputation ### |
library(“missForest”)
library(“mice”)
iris.mis <- prodNA(irisData[,-5], noNA = 0.1)
summary(iris.mis)
stat.desc(iris.mis)
library(mice)
md.pattern(iris.mis)
library(VIM) mice_plot <- aggr(iris.mis, col=c(‘navyblue’,‘yellow’), numbers=TRUE, sortVars=TRUE, labels=names(iris.mis), cex.axis=.7,gap=3, ylab=c(“Missing data”,“Pattern”))
imputed_Data <- mice(iris.mis, m=5, maxit = 50, method = ‘pmm’, seed = 500)
summary(imputed_Data)
completeData <- complete(imputed_Data,2)
completeData<-cbind(completeData,irisData[5])
colnames(completeData)<-c(“sepal_length”,“sepal_width”,“petal_length”,“petal_width”,“class”)
| #### DIVIDING DATA INTO TESTING AND TRAINING |
index <- sample(1:nrow(completeData),size=nrow(completeData)*0.7)
train<- completeData[index,]
View(train)
test <- completeData[-index,]
View(test)
pairs(train)
| #### decision tree based gradient boosting model ############### |
model <- gbm(class ~ petal_length+sepal_length + sepal_width + petal_width, data=train, n.trees=1000, interaction.depth=2, distribution=“multinomial”)
summary(model)##important variables(petal_length,petal_width)
prediction <- predict.gbm(model, test[-5], type=“response”, n.trees=1000)
prediction_round<-data.frame(round(prediction, 3))
colnames(prediction_round)<-c(“Iris-setosa”,“Iris-versicolor”,“Iris-virginica”) prediction_round$sc<-apply(prediction_round,1,max)
prediction_round$category<-names(prediction_round)[max.col(prediction_round,ties.method=“first”)]
cm1<-table(test[,5],prediction_round[,5])
print(cm1)
1-sum(diag(cm1)/sum(cm1))
library(caret)
confusionMatrix(cm1)## accuracy 93%
library(nnet)
logistic<-multinom(class ~.,data=train)
logistic
prediction_new<-data.frame(predict(logistic,newdata=test[,-5],type=“class”))
cm2<-table(test[,5],prediction_new[,1])
print(cm2)
1-sum(diag(cm2)/sum(cm2))
confusionMatrix(cm2)## accuracy=93%