``` r
# Load data
data <- read.csv("Customer Churn.csv")
# Lihat struktur
str(data)
## 'data.frame': 3150 obs. of 14 variables:
## $ Call..Failure : int 8 0 10 10 3 11 4 13 7 7 ...
## $ Complains : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Subscription..Length : int 38 39 37 38 38 38 38 37 38 38 ...
## $ Charge..Amount : int 0 0 0 0 0 1 0 2 0 1 ...
## $ Seconds.of.Use : int 4370 318 2453 4198 2393 3775 2360 9115 13773 4515 ...
## $ Frequency.of.use : int 71 5 60 66 58 82 39 121 169 83 ...
## $ Frequency.of.SMS : int 5 7 359 1 2 32 285 144 0 2 ...
## $ Distinct.Called.Numbers: int 17 4 24 35 33 28 18 43 44 25 ...
## $ Age.Group : int 3 2 3 1 1 3 3 3 3 3 ...
## $ Tariff.Plan : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Status : int 1 2 1 1 1 1 1 1 1 1 ...
## $ Age : int 30 25 30 15 15 30 30 30 30 30 ...
## $ Customer.Value : num 198 46 1537 240 146 ...
## $ Churn : int 0 0 0 0 0 0 0 0 0 0 ...
summary(data)
## Call..Failure Complains Subscription..Length Charge..Amount
## Min. : 0.000 Min. :0.00000 Min. : 3.00 Min. : 0.0000
## 1st Qu.: 1.000 1st Qu.:0.00000 1st Qu.:30.00 1st Qu.: 0.0000
## Median : 6.000 Median :0.00000 Median :35.00 Median : 0.0000
## Mean : 7.628 Mean :0.07651 Mean :32.54 Mean : 0.9429
## 3rd Qu.:12.000 3rd Qu.:0.00000 3rd Qu.:38.00 3rd Qu.: 1.0000
## Max. :36.000 Max. :1.00000 Max. :47.00 Max. :10.0000
## Seconds.of.Use Frequency.of.use Frequency.of.SMS Distinct.Called.Numbers
## Min. : 0 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 1391 1st Qu.: 27.00 1st Qu.: 6.00 1st Qu.:10.00
## Median : 2990 Median : 54.00 Median : 21.00 Median :21.00
## Mean : 4472 Mean : 69.46 Mean : 73.17 Mean :23.51
## 3rd Qu.: 6478 3rd Qu.: 95.00 3rd Qu.: 87.00 3rd Qu.:34.00
## Max. :17090 Max. :255.00 Max. :522.00 Max. :97.00
## Age.Group Tariff.Plan Status Age Customer.Value
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :15 Min. : 0.0
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:25 1st Qu.: 113.8
## Median :3.000 Median :1.000 Median :1.000 Median :30 Median : 228.5
## Mean :2.826 Mean :1.078 Mean :1.248 Mean :31 Mean : 471.0
## 3rd Qu.:3.000 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.:30 3rd Qu.: 788.4
## Max. :5.000 Max. :2.000 Max. :2.000 Max. :55 Max. :2165.3
## Churn
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.1571
## 3rd Qu.:0.0000
## Max. :1.0000
# Cek missing value
colSums(is.na(data))
## Call..Failure Complains Subscription..Length
## 0 0 0
## Charge..Amount Seconds.of.Use Frequency.of.use
## 0 0 0
## Frequency.of.SMS Distinct.Called.Numbers Age.Group
## 0 0 0
## Tariff.Plan Status Age
## 0 0 0
## Customer.Value Churn
## 0 0
# Hapus missing value
data <- na.omit(data)
# ubah ke factor
data$Age.Group <- as.factor(data$Age.Group)
data$Churn <- as.factor(data$Churn)
data_model <- data
# hapus variabel yang tidak boleh
data_model$Age <- NULL
data_model$Customer.Value <- NULL
data_model$Churn <- NULL
str(data_model)
## 'data.frame': 3150 obs. of 11 variables:
## $ Call..Failure : int 8 0 10 10 3 11 4 13 7 7 ...
## $ Complains : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Subscription..Length : int 38 39 37 38 38 38 38 37 38 38 ...
## $ Charge..Amount : int 0 0 0 0 0 1 0 2 0 1 ...
## $ Seconds.of.Use : int 4370 318 2453 4198 2393 3775 2360 9115 13773 4515 ...
## $ Frequency.of.use : int 71 5 60 66 58 82 39 121 169 83 ...
## $ Frequency.of.SMS : int 5 7 359 1 2 32 285 144 0 2 ...
## $ Distinct.Called.Numbers: int 17 4 24 35 33 28 18 43 44 25 ...
## $ Age.Group : Factor w/ 5 levels "1","2","3","4",..: 3 2 3 1 1 3 3 3 3 3 ...
## $ Tariff.Plan : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Status : int 1 2 1 1 1 1 1 1 1 1 ...
num_vars <- sapply(data_model, is.numeric)
data_model[, num_vars] <- scale(data_model[, num_vars])
library(nnet)
model_multi <- multinom(Age.Group ~ ., data = data_model)
## # weights: 60 (44 variable)
## initial value 5069.729424
## iter 10 value 3607.638072
## iter 20 value 3528.999914
## iter 30 value 3387.385780
## iter 40 value 3328.669095
## iter 50 value 3259.459471
## iter 60 value 3255.455669
## iter 70 value 3255.126173
## iter 80 value 3255.098697
## final value 3255.098484
## converged
summary(model_multi)
## Call:
## multinom(formula = Age.Group ~ ., data = data_model)
##
## Coefficients:
## (Intercept) Call..Failure Complains Subscription..Length Charge..Amount
## 2 21.11019 -1.574715 0.5857639 -1.888496 3.777163
## 3 21.49741 -1.705091 0.7523498 -1.810097 4.786159
## 4 19.94301 -2.379109 0.6295184 -2.070446 5.294405
## 5 15.22615 -2.570622 0.5737230 -1.775352 6.127930
## Seconds.of.Use Frequency.of.use Frequency.of.SMS Distinct.Called.Numbers
## 2 2.9916490 -0.25968724 6.981129 -1.3374498
## 3 2.7728960 -0.01277623 7.177277 -1.6818177
## 4 2.8204878 -0.38921692 6.741789 -0.7327209
## 5 0.8787258 1.50964839 5.927897 -1.2892302
## Tariff.Plan Status
## 2 -2.438523 24.20021
## 3 -3.103421 24.24388
## 4 -3.467272 24.75829
## 5 -3.338364 18.00278
##
## Std. Errors:
## (Intercept) Call..Failure Complains Subscription..Length Charge..Amount
## 2 15.43357 0.2661690 0.3542356 0.2753179 0.4437952
## 3 15.43356 0.2669118 0.3540755 0.2757455 0.4469985
## 4 15.43369 0.2831568 0.3580439 0.2793101 0.4551701
## 5 46.27145 0.3176629 0.3742456 0.2861657 0.4680975
## Seconds.of.Use Frequency.of.use Frequency.of.SMS Distinct.Called.Numbers
## 2 0.6930049 0.6422911 1.122955 0.1576682
## 3 0.6952127 0.6450923 1.123354 0.1617052
## 4 0.7193688 0.6749532 1.125521 0.1611625
## 5 0.7630820 0.7308220 1.139993 0.1935210
## Tariff.Plan Status
## 2 0.3140319 26.84397
## 3 0.3176411 26.84396
## 4 0.3391701 26.84401
## 5 0.3357117 80.52627
##
## Residual Deviance: 6510.197
## AIC: 6598.197
z <- summary(model_multi)$coefficients / summary(model_multi)$standard.errors
p_value <- (1 - pnorm(abs(z))) * 2
p_value
## (Intercept) Call..Failure Complains Subscription..Length Charge..Amount
## 2 0.1713717 3.294206e-09 0.09820893 6.918466e-12 0
## 3 0.1636501 1.678293e-10 0.03360089 5.225109e-11 0
## 4 0.1962972 0.000000e+00 0.07871080 1.236788e-13 0
## 5 0.7421092 6.661338e-16 0.12527291 5.506993e-10 0
## Seconds.of.Use Frequency.of.use Frequency.of.SMS Distinct.Called.Numbers
## 2 1.582192e-05 0.68598187 5.075571e-10 0.000000e+00
## 3 6.647619e-05 0.98419871 1.668079e-10 0.000000e+00
## 4 8.826228e-05 0.56417077 2.099334e-09 5.455208e-06
## 5 2.495067e-01 0.03885818 1.993534e-07 2.701883e-11
## Tariff.Plan Status
## 2 8.21565e-15 0.3673150
## 3 0.00000e+00 0.3664510
## 4 0.00000e+00 0.3563708
## 5 0.00000e+00 0.8230966
exp(coef(model_multi))
## (Intercept) Call..Failure Complains Subscription..Length Charge..Amount
## 2 1472447761 0.20706656 1.796363 0.1512992 43.69189
## 3 2168733609 0.18175584 2.121980 0.1636382 119.84023
## 4 458286934 0.09263309 1.876706 0.1261295 199.21906
## 5 4098579 0.07648796 1.774863 0.1694238 458.48634
## Seconds.of.Use Frequency.of.use Frequency.of.SMS Distinct.Called.Numbers
## 2 19.91850 0.7712928 1076.1331 0.2625143
## 3 16.00492 0.9873050 1309.3384 0.1860355
## 4 16.78504 0.6775873 847.0744 0.4805995
## 5 2.40783 4.5251394 375.3641 0.2754828
## Tariff.Plan Status
## 2 0.08728970 32360777334
## 3 0.04489535 33805231141
## 4 0.03120205 56544551771
## 5 0.03549497 65842648
pred_multi <- predict(model_multi, data_model)
table(Prediksi = pred_multi, Aktual = data_model$Age.Group)
## Aktual
## Prediksi 1 2 3 4 5
## 1 75 17 8 16 1
## 2 30 362 253 71 5
## 3 17 648 1134 249 112
## 4 1 8 14 41 17
## 5 0 2 16 18 35