``` r
# Load data
data <- read.csv("Customer Churn.csv")

# Lihat struktur
str(data)
## 'data.frame':    3150 obs. of  14 variables:
##  $ Call..Failure          : int  8 0 10 10 3 11 4 13 7 7 ...
##  $ Complains              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Subscription..Length   : int  38 39 37 38 38 38 38 37 38 38 ...
##  $ Charge..Amount         : int  0 0 0 0 0 1 0 2 0 1 ...
##  $ Seconds.of.Use         : int  4370 318 2453 4198 2393 3775 2360 9115 13773 4515 ...
##  $ Frequency.of.use       : int  71 5 60 66 58 82 39 121 169 83 ...
##  $ Frequency.of.SMS       : int  5 7 359 1 2 32 285 144 0 2 ...
##  $ Distinct.Called.Numbers: int  17 4 24 35 33 28 18 43 44 25 ...
##  $ Age.Group              : int  3 2 3 1 1 3 3 3 3 3 ...
##  $ Tariff.Plan            : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Status                 : int  1 2 1 1 1 1 1 1 1 1 ...
##  $ Age                    : int  30 25 30 15 15 30 30 30 30 30 ...
##  $ Customer.Value         : num  198 46 1537 240 146 ...
##  $ Churn                  : int  0 0 0 0 0 0 0 0 0 0 ...
summary(data)
##  Call..Failure      Complains       Subscription..Length Charge..Amount   
##  Min.   : 0.000   Min.   :0.00000   Min.   : 3.00        Min.   : 0.0000  
##  1st Qu.: 1.000   1st Qu.:0.00000   1st Qu.:30.00        1st Qu.: 0.0000  
##  Median : 6.000   Median :0.00000   Median :35.00        Median : 0.0000  
##  Mean   : 7.628   Mean   :0.07651   Mean   :32.54        Mean   : 0.9429  
##  3rd Qu.:12.000   3rd Qu.:0.00000   3rd Qu.:38.00        3rd Qu.: 1.0000  
##  Max.   :36.000   Max.   :1.00000   Max.   :47.00        Max.   :10.0000  
##  Seconds.of.Use  Frequency.of.use Frequency.of.SMS Distinct.Called.Numbers
##  Min.   :    0   Min.   :  0.00   Min.   :  0.00   Min.   : 0.00          
##  1st Qu.: 1391   1st Qu.: 27.00   1st Qu.:  6.00   1st Qu.:10.00          
##  Median : 2990   Median : 54.00   Median : 21.00   Median :21.00          
##  Mean   : 4472   Mean   : 69.46   Mean   : 73.17   Mean   :23.51          
##  3rd Qu.: 6478   3rd Qu.: 95.00   3rd Qu.: 87.00   3rd Qu.:34.00          
##  Max.   :17090   Max.   :255.00   Max.   :522.00   Max.   :97.00          
##    Age.Group      Tariff.Plan        Status           Age     Customer.Value  
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :15   Min.   :   0.0  
##  1st Qu.:2.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:25   1st Qu.: 113.8  
##  Median :3.000   Median :1.000   Median :1.000   Median :30   Median : 228.5  
##  Mean   :2.826   Mean   :1.078   Mean   :1.248   Mean   :31   Mean   : 471.0  
##  3rd Qu.:3.000   3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:30   3rd Qu.: 788.4  
##  Max.   :5.000   Max.   :2.000   Max.   :2.000   Max.   :55   Max.   :2165.3  
##      Churn       
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.1571  
##  3rd Qu.:0.0000  
##  Max.   :1.0000
# Cek missing value
colSums(is.na(data))
##           Call..Failure               Complains    Subscription..Length 
##                       0                       0                       0 
##          Charge..Amount          Seconds.of.Use        Frequency.of.use 
##                       0                       0                       0 
##        Frequency.of.SMS Distinct.Called.Numbers               Age.Group 
##                       0                       0                       0 
##             Tariff.Plan                  Status                     Age 
##                       0                       0                       0 
##          Customer.Value                   Churn 
##                       0                       0
# Hapus missing value
data <- na.omit(data)
# ubah ke factor
data$Age.Group <- as.factor(data$Age.Group)
data$Churn <- as.factor(data$Churn)
data_model <- data

# hapus variabel yang tidak boleh
data_model$Age <- NULL
data_model$Customer.Value <- NULL
data_model$Churn <- NULL
str(data_model)
## 'data.frame':    3150 obs. of  11 variables:
##  $ Call..Failure          : int  8 0 10 10 3 11 4 13 7 7 ...
##  $ Complains              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Subscription..Length   : int  38 39 37 38 38 38 38 37 38 38 ...
##  $ Charge..Amount         : int  0 0 0 0 0 1 0 2 0 1 ...
##  $ Seconds.of.Use         : int  4370 318 2453 4198 2393 3775 2360 9115 13773 4515 ...
##  $ Frequency.of.use       : int  71 5 60 66 58 82 39 121 169 83 ...
##  $ Frequency.of.SMS       : int  5 7 359 1 2 32 285 144 0 2 ...
##  $ Distinct.Called.Numbers: int  17 4 24 35 33 28 18 43 44 25 ...
##  $ Age.Group              : Factor w/ 5 levels "1","2","3","4",..: 3 2 3 1 1 3 3 3 3 3 ...
##  $ Tariff.Plan            : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Status                 : int  1 2 1 1 1 1 1 1 1 1 ...
num_vars <- sapply(data_model, is.numeric)
data_model[, num_vars] <- scale(data_model[, num_vars])
library(nnet)

model_multi <- multinom(Age.Group ~ ., data = data_model)
## # weights:  60 (44 variable)
## initial  value 5069.729424 
## iter  10 value 3607.638072
## iter  20 value 3528.999914
## iter  30 value 3387.385780
## iter  40 value 3328.669095
## iter  50 value 3259.459471
## iter  60 value 3255.455669
## iter  70 value 3255.126173
## iter  80 value 3255.098697
## final  value 3255.098484 
## converged
summary(model_multi)
## Call:
## multinom(formula = Age.Group ~ ., data = data_model)
## 
## Coefficients:
##   (Intercept) Call..Failure Complains Subscription..Length Charge..Amount
## 2    21.11019     -1.574715 0.5857639            -1.888496       3.777163
## 3    21.49741     -1.705091 0.7523498            -1.810097       4.786159
## 4    19.94301     -2.379109 0.6295184            -2.070446       5.294405
## 5    15.22615     -2.570622 0.5737230            -1.775352       6.127930
##   Seconds.of.Use Frequency.of.use Frequency.of.SMS Distinct.Called.Numbers
## 2      2.9916490      -0.25968724         6.981129              -1.3374498
## 3      2.7728960      -0.01277623         7.177277              -1.6818177
## 4      2.8204878      -0.38921692         6.741789              -0.7327209
## 5      0.8787258       1.50964839         5.927897              -1.2892302
##   Tariff.Plan   Status
## 2   -2.438523 24.20021
## 3   -3.103421 24.24388
## 4   -3.467272 24.75829
## 5   -3.338364 18.00278
## 
## Std. Errors:
##   (Intercept) Call..Failure Complains Subscription..Length Charge..Amount
## 2    15.43357     0.2661690 0.3542356            0.2753179      0.4437952
## 3    15.43356     0.2669118 0.3540755            0.2757455      0.4469985
## 4    15.43369     0.2831568 0.3580439            0.2793101      0.4551701
## 5    46.27145     0.3176629 0.3742456            0.2861657      0.4680975
##   Seconds.of.Use Frequency.of.use Frequency.of.SMS Distinct.Called.Numbers
## 2      0.6930049        0.6422911         1.122955               0.1576682
## 3      0.6952127        0.6450923         1.123354               0.1617052
## 4      0.7193688        0.6749532         1.125521               0.1611625
## 5      0.7630820        0.7308220         1.139993               0.1935210
##   Tariff.Plan   Status
## 2   0.3140319 26.84397
## 3   0.3176411 26.84396
## 4   0.3391701 26.84401
## 5   0.3357117 80.52627
## 
## Residual Deviance: 6510.197 
## AIC: 6598.197
z <- summary(model_multi)$coefficients / summary(model_multi)$standard.errors
p_value <- (1 - pnorm(abs(z))) * 2

p_value
##   (Intercept) Call..Failure  Complains Subscription..Length Charge..Amount
## 2   0.1713717  3.294206e-09 0.09820893         6.918466e-12              0
## 3   0.1636501  1.678293e-10 0.03360089         5.225109e-11              0
## 4   0.1962972  0.000000e+00 0.07871080         1.236788e-13              0
## 5   0.7421092  6.661338e-16 0.12527291         5.506993e-10              0
##   Seconds.of.Use Frequency.of.use Frequency.of.SMS Distinct.Called.Numbers
## 2   1.582192e-05       0.68598187     5.075571e-10            0.000000e+00
## 3   6.647619e-05       0.98419871     1.668079e-10            0.000000e+00
## 4   8.826228e-05       0.56417077     2.099334e-09            5.455208e-06
## 5   2.495067e-01       0.03885818     1.993534e-07            2.701883e-11
##   Tariff.Plan    Status
## 2 8.21565e-15 0.3673150
## 3 0.00000e+00 0.3664510
## 4 0.00000e+00 0.3563708
## 5 0.00000e+00 0.8230966
exp(coef(model_multi))
##   (Intercept) Call..Failure Complains Subscription..Length Charge..Amount
## 2  1472447761    0.20706656  1.796363            0.1512992       43.69189
## 3  2168733609    0.18175584  2.121980            0.1636382      119.84023
## 4   458286934    0.09263309  1.876706            0.1261295      199.21906
## 5     4098579    0.07648796  1.774863            0.1694238      458.48634
##   Seconds.of.Use Frequency.of.use Frequency.of.SMS Distinct.Called.Numbers
## 2       19.91850        0.7712928        1076.1331               0.2625143
## 3       16.00492        0.9873050        1309.3384               0.1860355
## 4       16.78504        0.6775873         847.0744               0.4805995
## 5        2.40783        4.5251394         375.3641               0.2754828
##   Tariff.Plan      Status
## 2  0.08728970 32360777334
## 3  0.04489535 33805231141
## 4  0.03120205 56544551771
## 5  0.03549497    65842648
pred_multi <- predict(model_multi, data_model)

table(Prediksi = pred_multi, Aktual = data_model$Age.Group)
##         Aktual
## Prediksi    1    2    3    4    5
##        1   75   17    8   16    1
##        2   30  362  253   71    5
##        3   17  648 1134  249  112
##        4    1    8   14   41   17
##        5    0    2   16   18   35