##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## tenure age income educ members churn
## 0 0 0 0 0 0
## 'data.frame': 1000 obs. of 6 variables:
## $ tenure : int 13 11 68 33 23 41 45 38 45 68 ...
## $ age : int 44 33 52 33 30 39 22 35 59 41 ...
## $ income : int 64 136 116 33 30 78 19 76 166 72 ...
## $ educ : int 4 5 1 2 1 2 2 2 4 1 ...
## $ members: int 2 6 2 1 4 1 5 3 5 3 ...
## $ churn : int 1 1 0 1 0 0 1 0 0 0 ...
## tenure age income educ
## Min. : 1.00 Min. :18.00 Min. : 9.00 Min. :1.000
## 1st Qu.:17.00 1st Qu.:32.00 1st Qu.: 29.00 1st Qu.:2.000
## Median :34.00 Median :40.00 Median : 47.00 Median :3.000
## Mean :35.53 Mean :41.68 Mean : 77.53 Mean :2.671
## 3rd Qu.:54.00 3rd Qu.:51.00 3rd Qu.: 83.00 3rd Qu.:4.000
## Max. :72.00 Max. :77.00 Max. :1668.00 Max. :5.000
## members churn
## Min. :1.000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:0.000
## Median :2.000 Median :0.000
## Mean :2.331 Mean :0.274
## 3rd Qu.:3.000 3rd Qu.:1.000
## Max. :8.000 Max. :1.000
phone$churn <- as.factor(phone$churn)
str(phone)
## 'data.frame': 1000 obs. of 6 variables:
## $ tenure : int 13 11 68 33 23 41 45 38 45 68 ...
## $ age : int 44 33 52 33 30 39 22 35 59 41 ...
## $ income : int 64 136 116 33 30 78 19 76 166 72 ...
## $ educ : int 4 5 1 2 1 2 2 2 4 1 ...
## $ members: int 2 6 2 1 4 1 5 3 5 3 ...
## $ churn : Factor w/ 2 levels "0","1": 2 2 1 2 1 1 2 1 1 1 ...
summary(phone)
## tenure age income educ
## Min. : 1.00 Min. :18.00 Min. : 9.00 Min. :1.000
## 1st Qu.:17.00 1st Qu.:32.00 1st Qu.: 29.00 1st Qu.:2.000
## Median :34.00 Median :40.00 Median : 47.00 Median :3.000
## Mean :35.53 Mean :41.68 Mean : 77.53 Mean :2.671
## 3rd Qu.:54.00 3rd Qu.:51.00 3rd Qu.: 83.00 3rd Qu.:4.000
## Max. :72.00 Max. :77.00 Max. :1668.00 Max. :5.000
## members churn
## Min. :1.000 0:726
## 1st Qu.:1.000 1:274
## Median :2.000
## Mean :2.331
## 3rd Qu.:3.000
## Max. :8.000
prop.table(table(phone$churn))
##
## 0 1
## 0.726 0.274
ind <- sample(2, nrow(phone), replace=T, prob = c(0.7, 0.3))
training <- phone[ind==1,]
testing <- phone[ind==2, ]
table(training$churn)
##
## 0 1
## 520 185
prop.table(table(training$churn))
##
## 0 1
## 0.7375887 0.2624113
summary(training)
## tenure age income educ
## Min. : 1.00 Min. :18.00 Min. : 9.00 Min. :1.000
## 1st Qu.:17.00 1st Qu.:32.00 1st Qu.: 28.00 1st Qu.:2.000
## Median :35.00 Median :41.00 Median : 48.00 Median :3.000
## Mean :35.75 Mean :41.92 Mean : 79.49 Mean :2.661
## 3rd Qu.:54.00 3rd Qu.:52.00 3rd Qu.: 86.00 3rd Qu.:4.000
## Max. :72.00 Max. :76.00 Max. :1668.00 Max. :5.000
## members churn
## Min. :1.000 0:520
## 1st Qu.:1.000 1:185
## Median :2.000
## Mean :2.305
## 3rd Qu.:3.000
## Max. :8.000
## Loading required package: lattice
## Loaded ROSE 0.0-3
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 186 53
## 1 20 36
##
## Accuracy : 0.7525
## 95% CI : (0.6992, 0.8007)
## No Information Rate : 0.6983
## P-Value [Acc > NIR] : 0.0231183
##
## Kappa : 0.3436
## Mcnemar's Test P-Value : 0.0001802
##
## Sensitivity : 0.4045
## Specificity : 0.9029
## Pos Pred Value : 0.6429
## Neg Pred Value : 0.7782
## Prevalence : 0.3017
## Detection Rate : 0.1220
## Detection Prevalence : 0.1898
## Balanced Accuracy : 0.6537
##
## 'Positive' Class : 1
##
confusionMatrix(predict(rfunder, testing), testing$churn, positive = ‘1’) ###Oversampling
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 173 46
## 1 33 43
##
## Accuracy : 0.7322
## 95% CI : (0.6778, 0.7819)
## No Information Rate : 0.6983
## P-Value [Acc > NIR] : 0.1133
##
## Kappa : 0.3369
## Mcnemar's Test P-Value : 0.1770
##
## Sensitivity : 0.4831
## Specificity : 0.8398
## Pos Pred Value : 0.5658
## Neg Pred Value : 0.7900
## Prevalence : 0.3017
## Detection Rate : 0.1458
## Detection Prevalence : 0.2576
## Balanced Accuracy : 0.6615
##
## 'Positive' Class : 1
##
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 135 24
## 1 71 65
##
## Accuracy : 0.678
## 95% CI : (0.6214, 0.731)
## No Information Rate : 0.6983
## P-Value [Acc > NIR] : 0.7959
##
## Kappa : 0.3354
## Mcnemar's Test P-Value : 2.364e-06
##
## Sensitivity : 0.7303
## Specificity : 0.6553
## Pos Pred Value : 0.4779
## Neg Pred Value : 0.8491
## Prevalence : 0.3017
## Detection Rate : 0.2203
## Detection Prevalence : 0.4610
## Balanced Accuracy : 0.6928
##
## 'Positive' Class : 1
##
##
## 0 1
## 333 345
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 152 34
## 1 54 55
##
## Accuracy : 0.7017
## 95% CI : (0.6459, 0.7533)
## No Information Rate : 0.6983
## P-Value [Acc > NIR] : 0.47806
##
## Kappa : 0.3345
## Mcnemar's Test P-Value : 0.04283
##
## Sensitivity : 0.6180
## Specificity : 0.7379
## Pos Pred Value : 0.5046
## Neg Pred Value : 0.8172
## Prevalence : 0.3017
## Detection Rate : 0.1864
## Detection Prevalence : 0.3695
## Balanced Accuracy : 0.6779
##
## 'Positive' Class : 1
##