Loading and Reconnaissance of the dataset

data <- read.table("C:/Users/Mehedi Hassan Galib/Desktop/R/Cardiotocographic.csv", header = TRUE, sep = ",")
str(data)
## 'data.frame':    2126 obs. of  22 variables:
##  $ LB      : int  120 132 133 134 132 134 134 122 122 122 ...
##  $ AC      : num  0 0.00638 0.00332 0.00256 0.00651 ...
##  $ FM      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ UC      : num  0 0.00638 0.00831 0.00768 0.00814 ...
##  $ DL      : num  0 0.00319 0.00332 0.00256 0 ...
##  $ DS      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ DP      : num  0 0 0 0 0 ...
##  $ ASTV    : int  73 17 16 16 16 26 29 83 84 86 ...
##  $ MSTV    : num  0.5 2.1 2.1 2.4 2.4 5.9 6.3 0.5 0.5 0.3 ...
##  $ ALTV    : int  43 0 0 0 0 0 0 6 5 6 ...
##  $ MLTV    : num  2.4 10.4 13.4 23 19.9 0 0 15.6 13.6 10.6 ...
##  $ Width   : int  64 130 130 117 117 150 150 68 68 68 ...
##  $ Min     : int  62 68 68 53 53 50 50 62 62 62 ...
##  $ Max     : int  126 198 198 170 170 200 200 130 130 130 ...
##  $ Nmax    : int  2 6 5 11 9 5 6 0 0 1 ...
##  $ Nzeros  : int  0 1 1 0 0 3 3 0 0 0 ...
##  $ Mode    : int  120 141 141 137 137 76 71 122 122 122 ...
##  $ Mean    : int  137 136 135 134 136 107 107 122 122 122 ...
##  $ Median  : int  121 140 138 137 138 107 106 123 123 123 ...
##  $ Variance: int  73 12 13 13 11 170 215 3 3 1 ...
##  $ Tendency: int  1 0 0 1 1 0 0 1 1 1 ...
##  $ NSP     : int  2 1 1 1 1 3 3 3 3 3 ...
head(data)
##    LB          AC FM          UC          DL DS          DP ASTV MSTV ALTV MLTV
## 1 120 0.000000000  0 0.000000000 0.000000000  0 0.000000000   73  0.5   43  2.4
## 2 132 0.006379585  0 0.006379585 0.003189793  0 0.000000000   17  2.1    0 10.4
## 3 133 0.003322259  0 0.008305648 0.003322259  0 0.000000000   16  2.1    0 13.4
## 4 134 0.002560819  0 0.007682458 0.002560819  0 0.000000000   16  2.4    0 23.0
## 5 132 0.006514658  0 0.008143322 0.000000000  0 0.000000000   16  2.4    0 19.9
## 6 134 0.001049318  0 0.010493179 0.009443861  0 0.002098636   26  5.9    0  0.0
##   Width Min Max Nmax Nzeros Mode Mean Median Variance Tendency NSP
## 1    64  62 126    2      0  120  137    121       73        1   2
## 2   130  68 198    6      1  141  136    140       12        0   1
## 3   130  68 198    5      1  141  135    138       13        0   1
## 4   117  53 170   11      0  137  134    137       13        1   1
## 5   117  53 170    9      0  137  136    138       11        1   1
## 6   150  50 200    5      3   76  107    107      170        0   3
summary(data)
##        LB              AC                 FM                 UC          
##  Min.   :106.0   Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:126.0   1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.001876  
##  Median :133.0   Median :0.001630   Median :0.000000   Median :0.004482  
##  Mean   :133.3   Mean   :0.003170   Mean   :0.009474   Mean   :0.004357  
##  3rd Qu.:140.0   3rd Qu.:0.005631   3rd Qu.:0.002512   3rd Qu.:0.006525  
##  Max.   :160.0   Max.   :0.019284   Max.   :0.480634   Max.   :0.014925  
##        DL                 DS                  DP                 ASTV      
##  Min.   :0.000000   Min.   :0.000e+00   Min.   :0.0000000   Min.   :12.00  
##  1st Qu.:0.000000   1st Qu.:0.000e+00   1st Qu.:0.0000000   1st Qu.:32.00  
##  Median :0.000000   Median :0.000e+00   Median :0.0000000   Median :49.00  
##  Mean   :0.001885   Mean   :3.585e-06   Mean   :0.0001566   Mean   :46.99  
##  3rd Qu.:0.003264   3rd Qu.:0.000e+00   3rd Qu.:0.0000000   3rd Qu.:61.00  
##  Max.   :0.015385   Max.   :1.353e-03   Max.   :0.0053476   Max.   :87.00  
##       MSTV            ALTV             MLTV            Width       
##  Min.   :0.200   Min.   : 0.000   Min.   : 0.000   Min.   :  3.00  
##  1st Qu.:0.700   1st Qu.: 0.000   1st Qu.: 4.600   1st Qu.: 37.00  
##  Median :1.200   Median : 0.000   Median : 7.400   Median : 67.50  
##  Mean   :1.333   Mean   : 9.847   Mean   : 8.188   Mean   : 70.45  
##  3rd Qu.:1.700   3rd Qu.:11.000   3rd Qu.:10.800   3rd Qu.:100.00  
##  Max.   :7.000   Max.   :91.000   Max.   :50.700   Max.   :180.00  
##       Min              Max           Nmax            Nzeros       
##  Min.   : 50.00   Min.   :122   Min.   : 0.000   Min.   : 0.0000  
##  1st Qu.: 67.00   1st Qu.:152   1st Qu.: 2.000   1st Qu.: 0.0000  
##  Median : 93.00   Median :162   Median : 3.000   Median : 0.0000  
##  Mean   : 93.58   Mean   :164   Mean   : 4.068   Mean   : 0.3236  
##  3rd Qu.:120.00   3rd Qu.:174   3rd Qu.: 6.000   3rd Qu.: 0.0000  
##  Max.   :159.00   Max.   :238   Max.   :18.000   Max.   :10.0000  
##       Mode            Mean           Median         Variance     
##  Min.   : 60.0   Min.   : 73.0   Min.   : 77.0   Min.   :  0.00  
##  1st Qu.:129.0   1st Qu.:125.0   1st Qu.:129.0   1st Qu.:  2.00  
##  Median :139.0   Median :136.0   Median :139.0   Median :  7.00  
##  Mean   :137.5   Mean   :134.6   Mean   :138.1   Mean   : 18.81  
##  3rd Qu.:148.0   3rd Qu.:145.0   3rd Qu.:148.0   3rd Qu.: 24.00  
##  Max.   :187.0   Max.   :182.0   Max.   :186.0   Max.   :269.00  
##     Tendency            NSP       
##  Min.   :-1.0000   Min.   :1.000  
##  1st Qu.: 0.0000   1st Qu.:1.000  
##  Median : 0.0000   Median :1.000  
##  Mean   : 0.3203   Mean   :1.304  
##  3rd Qu.: 1.0000   3rd Qu.:1.000  
##  Max.   : 1.0000   Max.   :3.000





Converting numeric variable to factor

data$NSP <- as.factor(data$NSP)





Data Partitioning

set.seed(1234)
pdata <- sample(2, nrow(data), replace = TRUE, prob = c(0.6,0.4))
train <- data[pdata==1,]
test<- data[pdata==2,]





Load necessary packages

library(nnet)
## Warning: package 'nnet' was built under R version 4.0.2





Decline a reference level for response variable

data$NSP <- relevel(data$NSP, ref = "1")





Multinomial Logistic Regression Model

dot(.) after NSP means all the column except NSP

model <- multinom(NSP~., data = train)
## # weights:  69 (44 variable)
## initial  value 1427.097363 
## iter  10 value 568.407169
## iter  20 value 514.734909
## iter  30 value 388.418227
## iter  40 value 350.517945
## iter  50 value 347.727029
## iter  60 value 309.219270
## iter  70 value 294.844843
## iter  80 value 286.996355
## iter  90 value 276.981332
## iter 100 value 276.981086
## final  value 276.981086 
## stopped after 100 iterations
summary(model)
## Call:
## multinom(formula = NSP ~ ., data = train)
## 
## Coefficients:
##   (Intercept)         LB        AC       FM        UC         DL         DS
## 2   -14.30807 -0.1282834 -1114.251 14.53243 -215.0808  -74.80964 -1.0011218
## 3   -17.15760  0.4805812  -109.936 19.18169 -486.4362 -115.42037 -0.2823651
##          DP       ASTV       MSTV       ALTV        MLTV       Width
## 2  92.46061 0.06858873 -0.9029939 0.02239884  0.03044093 0.004216847
## 3 102.07151 0.17585538 -1.0785037 0.06651402 -0.01509687 0.015256080
##           Min         Max       Nmax     Nzeros        Mode        Mean
## 2 0.003540818 0.007757686  0.0640517 -0.1011019 -0.03309807  0.15224195
## 3 0.013278061 0.028534144 -0.1389415 -0.3030035 -0.01666364 -0.05729411
##        Median   Variance   Tendency
## 2  0.06569322 0.05449380 -0.1975868
## 3 -0.42476856 0.05774668  0.5248027
## 
## Std. Errors:
##   (Intercept)         LB         AC        FM          UC          DL
## 2   2.1929631 0.03807685 0.01380034 2.2530747 0.009560359 0.001242820
## 3   0.4981916 0.05311664 0.00247061 0.6989769 0.005990731 0.001652476
##             DS           DP       ASTV      MSTV        ALTV       MLTV
## 2 3.031778e-05 0.0029199841 0.01293802 0.4384848 0.007551955 0.04390396
## 3 5.338860e-06 0.0007851513 0.02363638 0.5461079 0.012630513 0.09166803
##         Width         Min        Max      Nmax    Nzeros       Mode       Mean
## 2 0.005150434 0.009446086 0.01239238 0.0690752 0.1961564 0.03891016 0.06564666
## 3 0.007016006 0.012375689 0.01323054 0.1417435 0.5768937 0.05634422 0.04532211
##       Median    Variance  Tendency
## 2 0.08655992 0.009532058 0.3374092
## 3 0.07484688 0.013275330 0.5073097
## 
## Residual Deviance: 553.9622 
## AIC: 637.9622





2-tailed Z-test

in p, multiplied by 2 - cause it’s 2-tailed

z <- summary(model)$coefficients/summary(model)$standard.errors
p <- (1 - pnorm(abs(z), 0, 1)) * 2
p
##   (Intercept)           LB AC           FM UC DL DS DP         ASTV       MSTV
## 2 6.82121e-11 0.0007542362  0 1.118194e-10  0  0  0  0 1.149609e-07 0.03946068
## 3 0.00000e+00 0.0000000000  0 0.000000e+00  0  0  0  0 1.005862e-13 0.04828050
##           ALTV      MLTV      Width       Min        Max      Nmax    Nzeros
## 2 3.017336e-03 0.4880883 0.41293691 0.7077758 0.53131192 0.3537838 0.6062633
## 3 1.393240e-07 0.8691875 0.02967001 0.2833093 0.03102998 0.3269716 0.5994213
##        Mode       Mean       Median     Variance  Tendency
## 2 0.3949761 0.02038897 4.478924e-01 1.084866e-08 0.5581443
## 3 0.7674232 0.20617495 1.385534e-08 1.361847e-05 0.3009109





Removing variables from model

Observing the P-values through Z-test. When both P-values are >0.05, we will remove them from the model

model1 <- multinom(NSP~.-MLTV -Width -Min -Max -Nmax -Nzeros -Tendency, data = train)
## # weights:  48 (30 variable)
## initial  value 1427.097363 
## iter  10 value 537.418424
## iter  20 value 395.613312
## iter  30 value 366.291338
## iter  40 value 358.043641
## iter  50 value 303.354079
## iter  60 value 300.431316
## iter  70 value 300.238291
## iter  80 value 300.152457
## iter  90 value 297.111196
## iter 100 value 288.137511
## final  value 288.137511 
## stopped after 100 iterations
summary(model1)
## Call:
## multinom(formula = NSP ~ . - MLTV - Width - Min - Max - Nmax - 
##     Nzeros - Tendency, data = train)
## 
## Coefficients:
##   (Intercept)          LB         AC       FM        UC         DL          DS
## 2   -14.03925 -0.08183635 -731.37236 11.79441 -176.8528 -117.89342 -0.53210990
## 3   -16.30235  0.43389020  -55.93566 17.82456 -305.4062  -91.65896 -0.07107389
##         DP       ASTV       MSTV       ALTV        Mode        Mean      Median
## 2 55.79001 0.06678034 -0.5113691 0.02082366 -0.03753239  0.11436842  0.07348312
## 3 56.04020 0.18160851 -0.7743174 0.06350030 -0.01927882 -0.04915365 -0.35150630
##     Variance
## 2 0.04951895
## 3 0.05685436
## 
## Std. Errors:
##   (Intercept)         LB           AC        FM          UC          DL
## 2   1.9883215 0.03246681 0.0150681615 2.0816439 0.009917417 0.004195422
## 3   0.4195385 0.04635980 0.0009402023 0.4259067 0.006358635 0.001489038
##             DS           DP       ASTV      MSTV        ALTV       Mode
## 2 5.345475e-05 0.0025679898 0.01154037 0.3417879 0.006444585 0.03194949
## 3 4.185526e-06 0.0005941238 0.02017630 0.4952795 0.010525415 0.05369059
##         Mean     Median    Variance
## 2 0.05026916 0.06666187 0.007531889
## 3 0.04087680 0.07395231 0.011639537
## 
## Residual Deviance: 576.275 
## AIC: 636.275





2-tailed Z-test for model1

z1 <- summary(model1)$coefficients/summary(model1)$standard.errors
p1 <- (1 - pnorm(abs(z1), 0, 1)) * 2
p1
##    (Intercept)         LB AC           FM UC DL DS DP         ASTV      MSTV
## 2 1.654898e-12 0.01171498  0 1.462465e-08  0  0  0  0 7.179419e-09 0.1346122
## 3 0.000000e+00 0.00000000  0 0.000000e+00  0  0  0  0 0.000000e+00 0.1179597
##           ALTV      Mode       Mean       Median     Variance
## 2 1.232774e-03 0.2400981 0.02289867 2.703200e-01 4.879319e-11
## 3 1.608992e-09 0.7195407 0.22917648 2.002736e-06 1.036447e-06





In the equation, the response variable will look like

# ln[P(NSP=2)/P(NSP=1)] =
# ln[P(NSP=3)/P(NSP=1)] =  





Prediction (Train)

pred <- predict(model1, train)
head(pred)
## [1] 2 3 3 1 1 1
## Levels: 1 2 3
head(train$NSP)
## [1] 2 3 3 3 2 1
## Levels: 1 2 3





Confusion Matrics (Train)

tab <- table(pred, train$NSP)
tab
##     
## pred   1   2   3
##    1 980  55   5
##    2  36 106  14
##    3   6   9  88





Misclassification Error (Train)

1-sum(diag(tab))/sum(tab)
## [1] 0.09622787





Prediction (Test)

pred1 <- predict(model1, test)
head(pred1)
## [1] 1 1 1 1 3 1
## Levels: 1 2 3
head(test$NSP)
## [1] 1 1 1 1 3 3
## Levels: 1 2 3





Confusion Matrics (Test)

tab1 <- table(pred1, test$NSP)
tab1
##      
## pred1   1   2   3
##     1 609  43   7
##     2  21  69   8
##     3   3  13  54





Misclassification Error (Test)

1-sum(diag(tab1))/sum(tab1)
## [1] 0.114873





Model Assessment/Sensitivity (Train)

The model is 96% accurate for predicting normal patients

The model is 60% accurate for predicting suspect patients

The model is 82% accurate for predicting pathologic patients

tab/colSums(tab)
##     
## pred           1           2           3
##    1 0.958904110 0.053816047 0.004892368
##    2 0.211764706 0.623529412 0.082352941
##    3 0.056074766 0.084112150 0.822429907

Model Assessment/Sensitivity (Test)

The model is 95% accurate for predicting normal patients

The model is 66% accurate for predicting suspect patients

The model is 74% accurate for predicting pathologic patients

tab1/colSums(tab1)
##      
## pred1          1          2          3
##     1 0.96208531 0.06793049 0.01105845
##     2 0.16800000 0.55200000 0.06400000
##     3 0.04347826 0.18840580 0.78260870