library(rpart) # R package for decision Tree
library(caret) # R package for decision Tree
## Loading required package: lattice
## Loading required package: ggplot2
setwd("D:/Users/gkokate/Desktop/Markdown")
build <- read.csv(file = "Build .csv",sep = ",", header = TRUE)
test1 <- read.csv(file = "test.csv", sep = "," , header = TRUE)
# Sample observations
head(build)
## gponOntAniOpInfoOpticalSignalLevel gponOntAniOpInfoTxOpticalSignalLevel
## 1 -7711 1142
## 2 -7703 1288
## 3 -7703 1081
## 4 -7703 1207
## 5 -7688 1276
## 6 -7688 1282
## gponOntOltsideOpInfoRxOpticalSignalLevel X15MinDnFwdByteCounter
## 1 -171 8.888281
## 2 -170 9.544178
## 3 -170 8.915710
## 4 -171 7.555582
## 5 -170 7.475159
## 6 -169 7.236687
## X15MinUpFwdByteCounter bponOntOpInfoDistance ifOperStatus
## 1 7.245204 38 up
## 2 7.764763 38 up
## 3 7.648214 38 up
## 4 6.976296 38 up
## 5 6.646812 38 up
## 6 6.449259 38 up
#dependent variable as a factor (categorical)
build$ifOperStatus <- as.factor(build$ifOperStatus)
# Split data into training (70%) and validation (30%)
set.seed(100)
split <- sample(nrow(build),floor(nrow(build)*0.7))
train <- build[split,]
val <- build[-split,]
set.seed(100)
mtree <- rpart(ifOperStatus~ .,data=train,method = "class",
parms = list(prior = c(0.3,0.7),split = "gini"))
#parms = list(prior = c(0.5, 0.5)
rpartpred <- predict(mtree,val,type="class")
confusionMatrix(rpartpred,val$ifOperStatus)
## Confusion Matrix and Statistics
##
## Reference
## Prediction down up
## down 235 358
## up 35 3936
##
## Accuracy : 0.9139
## 95% CI : (0.9054, 0.9219)
## No Information Rate : 0.9408
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.5043
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.87037
## Specificity : 0.91663
## Pos Pred Value : 0.39629
## Neg Pred Value : 0.99119
## Prevalence : 0.05916
## Detection Rate : 0.05149
## Detection Prevalence : 0.12993
## Balanced Accuracy : 0.89350
##
## 'Positive' Class : down
##
Overall accuracy for CART model on validation dataset 92% and “Down” class accuracy is 42%
library(rattle)
## Rattle: A free graphical interface for data mining with R.
## Version 4.1.0 Copyright (c) 2006-2015 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(rpart.plot)
library(RColorBrewer)
#plot
prp(mtree, faclen = 0, cex = 0.8, extra = 1)
tot_count <- function(x, labs, digits, varlen)
{paste(labs, "\n\nn =", x$frame$n)}
prp(mtree, faclen = 0, cex = 0.8, node.fun=tot_count)
fancyRpartPlot(mtree)
# Decision Tree Rules:
1.If RxOpticalSignalLevel less than -210 then network is going “Down”.
2.If RxOpticalSignalLevel more than -210 and greater than -172 then Network “Up”.
3.IF Distance less than 38, RxOpticalSignalLevel more than -210 and less than -172 then Network “Up”.
printcp(mtree)
##
## Classification tree:
## rpart(formula = ifOperStatus ~ ., data = train, method = "class",
## parms = list(prior = c(0.3, 0.7), split = "gini"))
##
## Variables actually used in tree construction:
## [1] bponOntOpInfoDistance
## [2] gponOntOltsideOpInfoRxOpticalSignalLevel
## [3] X15MinUpFwdByteCounter
##
## Root node error: 3194.4/10648 = 0.3
##
## n= 10648
##
## CP nsplit rel error xerror xstd
## 1 0.60477 0 1.00000 1.00000 0.037669
## 2 0.01131 1 0.39523 0.39523 0.022264
## 3 0.01000 4 0.36130 0.40131 0.020475
bestcp <- mtree$cptable[which.min(mtree$cptable[,"xerror"]),"CP"]
#Pruning & classification matrix of Pruning
pruned <- prune(mtree, cp = bestcp)
#prp(pruned, faclen = 0, cex = 0.8, extra = 1)
predictions <- predict(pruned, val, type="class")
#confusionMatrix(predictions,val$ifOperStatus)
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
val1 = predict(pruned, val, type = "prob")
pred_val <-prediction(val1[,2],val$ifOperStatus)
perf_val <- performance(pred_val,"auc")
perf_val
## An object of class "performance"
## Slot "x.name":
## [1] "None"
##
## Slot "y.name":
## [1] "Area under the ROC curve"
##
## Slot "alpha.name":
## [1] "none"
##
## Slot "x.values":
## list()
##
## Slot "y.values":
## [[1]]
## [1] 0.8626516
##
##
## Slot "alpha.values":
## list()
plot(performance(pred_val, measure="lift", x.measure="rpp"), colorize=TRUE)
# Calculating True Positive and False Positive Rate
perf_val <- performance(pred_val, "tpr", "fpr")
#Plot the ROC curve
plot(perf_val, col = "green", lwd = 1.5)
#Calculating KS statistics
ks1.tree <- max(attr(perf_val, "y.values")[[1]] - (attr(perf_val, "x.values")[[1]]))
ks1.tree
## [1] 0.7253032
#Method1
library(ROSE)
## Loaded ROSE 0.0-3
ROSE.BOOT <- ROSE.eval(ifOperStatus ~ ., data = train, learner = rpart,method.assess = "BOOT", extr.pred = function(obj)obj[,2], seed = 1)
# Method2
library(caret)
tc <- trainControl("cv",10)
rpart.grid <- expand.grid(.cp=0.2)
(train.rpart <- train(ifOperStatus ~., data= train, method="rpart",trControl=tc,tuneGrid=rpart.grid))
## CART
##
## 10648 samples
## 6 predictor
## 2 classes: 'down', 'up'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 9583, 9583, 9583, 9583, 9583, 9584, ...
## Resampling results
##
## Accuracy Kappa Accuracy SD Kappa SD
## 0.9521978 0.6105373 0.005355055 0.04073637
##
## Tuning parameter 'cp' was held constant at a value of 0.2
##
ptest <- predict(mtree, test1)
answers <- as.vector(ptest)
pml_write_files = function(x) {
n = length(x)
for (i in 1:n) {
filename = paste0("problem_id_", i, ".txt")
write.table(x[i], file = filename, quote = FALSE, row.names = FALSE,
col.names = FALSE)
}
}
pml_write_files(answers)
## Prediction of probabilites new data
ptest
## down up
## 1 0.07318718 0.9268128
## 2 0.55563334 0.4443667
## 3 0.07318718 0.9268128
## 4 0.06539755 0.9346025
## 5 0.07318718 0.9268128
## 6 0.55563334 0.4443667
## 7 0.07318718 0.9268128
## 8 0.06539755 0.9346025
## 9 0.07318718 0.9268128
## 10 0.06539755 0.9346025
## 11 0.07318718 0.9268128
## 12 0.55563334 0.4443667
## 13 0.07318718 0.9268128
## 14 0.55563334 0.4443667
## 15 0.07318718 0.9268128
## 16 0.55563334 0.4443667
## 17 0.07318718 0.9268128
## 18 0.55563334 0.4443667
## 19 0.07318718 0.9268128
## 20 0.55563334 0.4443667
## 21 0.07318718 0.9268128
## 22 0.55563334 0.4443667
## 23 0.07318718 0.9268128
## 24 0.55563334 0.4443667
## 25 0.07318718 0.9268128
## 26 0.55563334 0.4443667
## 27 0.07318718 0.9268128
## 28 0.55563334 0.4443667
## 29 0.07318718 0.9268128
## 30 0.55563334 0.4443667
## 31 0.07318718 0.9268128
## 32 0.55563334 0.4443667
## 33 0.07318718 0.9268128
## 34 0.55563334 0.4443667
## 35 0.07318718 0.9268128
## 36 0.55563334 0.4443667
## 37 0.07318718 0.9268128
## 38 0.55563334 0.4443667
## 39 0.07318718 0.9268128
## 40 0.55563334 0.4443667
## 41 0.07318718 0.9268128
## 42 0.55563334 0.4443667
## 43 0.07318718 0.9268128
## 44 0.55563334 0.4443667
## 45 0.07318718 0.9268128
## 46 0.55563334 0.4443667
## 47 0.07318718 0.9268128
## 48 0.07318718 0.9268128
## 49 0.07318718 0.9268128
## 50 0.55563334 0.4443667
## 51 0.07318718 0.9268128
## 52 0.55563334 0.4443667
## 53 0.07318718 0.9268128
## 54 0.55563334 0.4443667
## 55 0.07318718 0.9268128
## 56 0.55563334 0.4443667
## 57 0.07318718 0.9268128
## 58 0.55563334 0.4443667
## 59 0.07318718 0.9268128
## 60 0.55563334 0.4443667
## 61 0.07318718 0.9268128
## 62 0.55563334 0.4443667
## 63 0.07318718 0.9268128
## 64 0.55563334 0.4443667
## 65 0.07318718 0.9268128
## 66 0.06539755 0.9346025
## 67 0.07318718 0.9268128
## 68 0.55563334 0.4443667
## 69 0.07318718 0.9268128
## 70 0.55563334 0.4443667
## 71 0.07318718 0.9268128
## 72 0.55563334 0.4443667
## 73 0.07318718 0.9268128
## 74 0.55563334 0.4443667
## 75 0.07318718 0.9268128
## 76 0.06539755 0.9346025
## 77 0.07318718 0.9268128
## 78 0.55563334 0.4443667
## 79 0.07318718 0.9268128
## 80 0.55563334 0.4443667
## 81 0.07318718 0.9268128
## 82 0.06539755 0.9346025
## 83 0.07318718 0.9268128
## 84 0.55563334 0.4443667
## 85 0.07318718 0.9268128
## 86 0.55563334 0.4443667
## 87 0.07318718 0.9268128
## 88 0.55563334 0.4443667
## 89 0.07318718 0.9268128
## 90 0.55563334 0.4443667
## 91 0.07318718 0.9268128
## 92 0.55563334 0.4443667
## 93 0.07318718 0.9268128
## 94 0.55563334 0.4443667
## 95 0.07318718 0.9268128
## 96 0.55563334 0.4443667
## 97 0.07318718 0.9268128
## 98 0.55563334 0.4443667
## 99 0.07318718 0.9268128
## 100 0.55563334 0.4443667
## 101 0.07318718 0.9268128
## 102 0.55563334 0.4443667
## 103 0.07318718 0.9268128
## 104 0.55563334 0.4443667
## 105 0.07318718 0.9268128
## 106 0.55563334 0.4443667
## 107 0.55563334 0.4443667
## 108 0.07318718 0.9268128
## 109 0.55563334 0.4443667
## 110 0.07318718 0.9268128
## 111 0.55563334 0.4443667
## 112 0.07318718 0.9268128
## 113 0.55563334 0.4443667
## 114 0.07318718 0.9268128
## 115 0.55563334 0.4443667
## 116 0.07318718 0.9268128
## 117 0.55563334 0.4443667
## 118 0.07318718 0.9268128
## 119 0.06539755 0.9346025
## 120 0.07318718 0.9268128