#SUmber pendapatan terbesar rumah tangga di Jawa Barat

package

library(haven)
library(nnet)
library(readxl)
library(caret)

## Loading required package: ggplot2

## Loading required package: lattice

input data

data_sp <- read_excel("D:/ADK/Multinomial - Sumber Pendapatan/DATA SUSENAS.xlsx", 
    col_types = c("text", "text", "text",
        "numeric", "text", "text", "text", 
        "text", "text", "text"))
data_sp

## # A tibble: 20,337 × 10
##    Y     X1    X2       X3 X4    X5    X6    X7    X8    X9   
##    <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
##  1 0     0     1         2 0     1     1     1     1     1    
##  2 0     4     0         4 0     1     0     1     1     1    
##  3 0     2     1         3 1     1     1     1     1     1    
##  4 0     1     0         3 0     1     1     1     1     1    
##  5 0     2     3         3 0     1     1     1     1     1    
##  6 0     0     0         4 1     1     0     1     1     1    
##  7 0     1     3         2 1     1     0     1     1     1    
##  8 0     1     3         3 1     1     0     1     1     1    
##  9 0     0     3         4 1     1     0     1     1     1    
## 10 0     0     0         3 1     1     0     1     1     0    
## # ℹ 20,327 more rows

str(data_sp)

## tibble [20,337 × 10] (S3: tbl_df/tbl/data.frame)
##  $ Y : chr [1:20337] "0" "0" "0" "0" ...
##  $ X1: chr [1:20337] "0" "4" "2" "1" ...
##  $ X2: chr [1:20337] "1" "0" "1" "0" ...
##  $ X3: num [1:20337] 2 4 3 3 3 4 2 3 4 3 ...
##  $ X4: chr [1:20337] "0" "0" "1" "0" ...
##  $ X5: chr [1:20337] "1" "1" "1" "1" ...
##  $ X6: chr [1:20337] "1" "0" "1" "1" ...
##  $ X7: chr [1:20337] "1" "1" "1" "1" ...
##  $ X8: chr [1:20337] "1" "1" "1" "1" ...
##  $ X9: chr [1:20337] "1" "1" "1" "1" ...

#sebelum melakukan analisis seluruh variabel harus diubah menjadi bentuk faktor ## merubah var ke factor

data_sp$Y <- as.factor(data_sp$Y)
data_sp$X1 <- as.factor(data_sp$X1)
data_sp$X2 <- as.factor(data_sp$X2)
data_sp$X4 <- as.factor(data_sp$X4)
data_sp$X5 <- as.factor(data_sp$X5)
data_sp$X6 <- as.factor(data_sp$X6)
data_sp$X7 <- as.factor(data_sp$X7)
data_sp$X8 <- as.factor(data_sp$X8)
data_sp$X9 <- as.factor(data_sp$X9)

str(data_sp)

## tibble [20,337 × 10] (S3: tbl_df/tbl/data.frame)
##  $ Y : Factor w/ 3 levels "0","1","2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ X1: Factor w/ 5 levels "0","1","2","3",..: 1 5 3 2 3 1 2 2 1 1 ...
##  $ X2: Factor w/ 5 levels "0","1","2","3",..: 2 1 2 1 4 1 4 4 4 1 ...
##  $ X3: num [1:20337] 2 4 3 3 3 4 2 3 4 3 ...
##  $ X4: Factor w/ 2 levels "0","1": 1 1 2 1 1 2 2 2 2 2 ...
##  $ X5: Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
##  $ X6: Factor w/ 2 levels "0","1": 2 1 2 2 2 1 1 1 1 1 ...
##  $ X7: Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
##  $ X8: Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
##  $ X9: Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 1 ...

#visualisasi data menggunakan statistika deskriptif ## Box Plot pada variabel Jumlah anggota Keluarga (X3)

boxplot(X3~Y, data = data_sp,
        main = ".",
        xlab = "Pendapatan terbesar di rumah tangga",
        ylab = "Umur",
        col = c("lightblue", "mistyrose", "lightgrey", "lavender"))

#diagram batang sumber pendapatan terbesar Rumah Tangga (Y) dengan Status Pekerjaan(X2)

barplot (table(data_sp$Y, data_sp$X2),
         main = ".",
         xlab = "Status Pekerjaan",
         ylab = "jumlah",
         col = c("lightblue", "mistyrose", "lightgrey"),
         beside = TRUE)

legend("topright",
c("ART yang bekerja", "Kiriman Uang/Barang", "Investasi", "Pensiunan"),
fill = c("lightblue", "mistyrose", "lightgrey"))

#Barplot Sumber Pendapatan Terbesar di Rumah Tangga

barplot (table(data_sp$Y),
         main = " .",
         xlab = "Sumber Pendapatan Terbesar di Rumah Tangga",
         ylab = "JUMLAH",
         col = c("lightblue", "mistyrose", "lightgrey", "lavender"))

#Barplot Status pekerjaan

barplot (table(data_sp$X2),
         main = " .",
         xlab = "Status pekerjaan",
         ylab = "JUMLAH",
         col = c("lightblue", "mistyrose", "lightgrey", "lavender", "lightyellow"))

barplot (table(data_sp$X4),
         main = " Barplot Kepemilikan perhiasan/emas (min 10 gram)",
         xlab = "Status Kepemilikan perhiasan/emas (min 10 gram)",
         ylab = "JUMLAH",
         col = c("mistyrose", "lightgrey"))

barplot (table(data_sp$X6),
         main = " Barplot Status kepemilikan lahan/tanah",
         xlab = "Status kepemilikan lahan/tanah",
         ylab = "JUMLAH",
         col = c("mistyrose", "lightgrey"))

barplot (table(data_sp$X8),
         main = " Barplot Status jaminan hari tua",
         xlab = "Status jaminan hari tua",
         ylab = "JUMLAH",
         col = c("mistyrose", "lightgrey"))

#pie chart pendidikan

library(plotrix)
mytable <- table(data_sp$X1)
lbls <- c("SD","SMP","SMA","SMK","S1")
pct<-round(mytable/sum(mytable)*100)
lbls<-paste(lbls,pct)
lbls<-paste(lbls,"%",sep = "")
pie(mytable, labels = lbls,
    main="Status pendidikan",
    col=c("mistyrose","lightblue","lavender", "salmon", "lightyellow"))

#Status kepemilikan mobil (X5)

library(plotrix)
mytable <- table(data_sp$X5)
lbls <- c("memiliki","tidak memiliki")
pct<-round(mytable/sum(mytable)*100)
lbls<-paste(lbls,pct)
lbls<-paste(lbls,"%",sep = "")
pie(mytable, labels = lbls,
    main="Status kepemilikan mobil",
    col=c("mistyrose","lavender"))

library(plotrix)
mytable <- table(data_sp$X7)
lbls <- c("dapat","tidak dapat")
pct<-round(mytable/sum(mytable)*100)
lbls<-paste(lbls,pct)
lbls<-paste(lbls,"%",sep = "")
pie(mytable, labels = lbls,
    main="jaminan pensiun",
    col=c("mistyrose","lavender"))

library(plotrix)
mytable <- table(data_sp$X9)
lbls <- c("dapat","tidak dapat")
pct<-round(mytable/sum(mytable)*100)
lbls<-paste(lbls,pct)
lbls<-paste(lbls,"%",sep = "")
pie(mytable, labels = lbls,
    main="bantuan program keluarga harapan",
    col=c("mistyrose","lavender"))

##numerik

attach(data_sp)
median(data_sp$X3)

## [1] 3

mean(data_sp$X3)

## [1] 3.522348

min(data_sp$X3)

## [1] 1

max(data_sp$X3)

## [1] 14

sumary

library(arsenal)
tab <- tableby(Y ~., data = data_sp)
summary(tab, text = TRUE)

## 
## 
## |             |  0 (N=19754)   |   1 (N=507)   |   2 (N=76)    | Total (N=20337) | p value|
## |:------------|:--------------:|:-------------:|:-------------:|:---------------:|-------:|
## |X1           |                |               |               |                 | < 0.001|
## |-  0         |  9313 (47.1%)  |  349 (68.8%)  |  21 (27.6%)   |  9683 (47.6%)   |        |
## |-  1         |  3447 (17.4%)  |  59 (11.6%)   |  13 (17.1%)   |  3519 (17.3%)   |        |
## |-  2         |  4551 (23.0%)  |  72 (14.2%)   |  27 (35.5%)   |  4650 (22.9%)   |        |
## |-  3         |  1068 (5.4%)   |   9 (1.8%)    |   3 (3.9%)    |   1080 (5.3%)   |        |
## |-  4         |  1375 (7.0%)   |   18 (3.6%)   |  12 (15.8%)   |   1405 (6.9%)   |        |
## |X2           |                |               |               |                 | < 0.001|
## |-  0         |  5428 (27.5%)  |  210 (41.4%)  |  36 (47.4%)   |  5674 (27.9%)   |        |
## |-  1         |  2344 (11.9%)  |  85 (16.8%)   |  17 (22.4%)   |  2446 (12.0%)   |        |
## |-  2         |   836 (4.2%)   |   21 (4.1%)   |   9 (11.8%)   |   866 (4.3%)    |        |
## |-  3         |  8184 (41.4%)  |  94 (18.5%)   |   8 (10.5%)   |  8286 (40.7%)   |        |
## |-  4         |  2962 (15.0%)  |  97 (19.1%)   |   6 (7.9%)    |  3065 (15.1%)   |        |
## |X3           |                |               |               |                 | < 0.001|
## |-  Mean (SD) | 3.555 (1.370)  | 2.367 (1.363) | 2.645 (1.547) |  3.522 (1.384)  |        |
## |-  Range     | 1.000 - 14.000 | 1.000 - 8.000 | 1.000 - 8.000 | 1.000 - 14.000  |        |
## |X4           |                |               |               |                 | < 0.001|
## |-  0         |  3265 (16.5%)  |   48 (9.5%)   |  24 (31.6%)   |  3337 (16.4%)   |        |
## |-  1         | 16489 (83.5%)  |  459 (90.5%)  |  52 (68.4%)   |  17000 (83.6%)  |        |
## |X5           |                |               |               |                 | < 0.001|
## |-  0         |  1908 (9.7%)   |   14 (2.8%)   |  13 (17.1%)   |   1935 (9.5%)   |        |
## |-  1         | 17846 (90.3%)  |  493 (97.2%)  |  63 (82.9%)   |  18402 (90.5%)  |        |
## |X6           |                |               |               |                 |   0.005|
## |-  0         | 13822 (70.0%)  |  369 (72.8%)  |  65 (85.5%)   |  14256 (70.1%)  |        |
## |-  1         |  5932 (30.0%)  |  138 (27.2%)  |  11 (14.5%)   |  6081 (29.9%)   |        |
## |X7           |                |               |               |                 | < 0.001|
## |-  0         |   703 (3.6%)   |   4 (0.8%)    |  33 (43.4%)   |   740 (3.6%)    |        |
## |-  1         | 19051 (96.4%)  |  503 (99.2%)  |  43 (56.6%)   |  19597 (96.4%)  |        |
## |X8           |                |               |               |                 | < 0.001|
## |-  0         |   913 (4.6%)   |   4 (0.8%)    |  13 (17.1%)   |   930 (4.6%)    |        |
## |-  1         | 18841 (95.4%)  |  503 (99.2%)  |  63 (82.9%)   |  19407 (95.4%)  |        |
## |X9           |                |               |               |                 |   0.002|
## |-  0         |  2591 (13.1%)  |  53 (10.5%)   |   1 (1.3%)    |  2645 (13.0%)   |        |
## |-  1         | 17163 (86.9%)  |  454 (89.5%)  |  75 (98.7%)   |  17692 (87.0%)  |        |

partisi data menggunakan training dan testing. Jumlah data pada penelitian ini sebanyak 20.000 Rumah tangga

set.seed(1234)
acak <- createDataPartition(data_sp$Y, p=0.8, list=FALSE)
data_train <- data_sp[acak,]
data_test <- data_sp[-acak,]

summary(data_train)

##  Y         X1       X2             X3         X4        X5        X6       
##  0:15804   0:7745   0:4548   Min.   : 1.000   0: 2633   0: 1533   0:11482  
##  1:  406   1:2818   1:1948   1st Qu.: 3.000   1:13638   1:14738   1: 4789  
##  2:   61   2:3714   2: 684   Median : 3.000                                
##            3: 883   3:6639   Mean   : 3.523                                
##            4:1111   4:2452   3rd Qu.: 4.000                                
##                              Max.   :14.000                                
##  X7        X8        X9       
##  0:  583   0:  729   0: 2114  
##  1:15688   1:15542   1:14157  
##                               
##                               
##                               
##

summary(data_test)

##  Y        X1       X2             X3         X4       X5       X6      
##  0:3950   0:1938   0:1126   Min.   : 1.000   0: 704   0: 402   0:2774  
##  1: 101   1: 701   1: 498   1st Qu.: 3.000   1:3362   1:3664   1:1292  
##  2:  15   2: 936   2: 182   Median : 3.000                             
##           3: 197   3:1647   Mean   : 3.521                             
##           4: 294   4: 613   3rd Qu.: 4.000                             
##                             Max.   :12.000                             
##  X7       X8       X9      
##  0: 157   0: 201   0: 531  
##  1:3909   1:3865   1:3535  
##                            
##                            
##                            
##

Crostabulasi silang variabel independen dengan variabel dependen

##tabel kontingensi

data1 <- table(data_sp$Y,data_sp$X1)
data1

##    
##        0    1    2    3    4
##   0 9313 3447 4551 1068 1375
##   1  349   59   72    9   18
##   2   21   13   27    3   12

data2 <- table(data_sp$Y,data_sp$X2)
data2

##    
##        0    1    2    3    4
##   0 5428 2344  836 8184 2962
##   1  210   85   21   94   97
##   2   36   17    9    8    6

data4 <- table(data_sp$Y,data_sp$X4)
data4

##    
##         0     1
##   0  3265 16489
##   1    48   459
##   2    24    52

data5 <- table(data_sp$Y,data_sp$X5)
data5

##    
##         0     1
##   0  1908 17846
##   1    14   493
##   2    13    63

data6 <- table(data_sp$Y,data_sp$X6)
data6

##    
##         0     1
##   0 13822  5932
##   1   369   138
##   2    65    11

data7 <- table(data_sp$Y,data_sp$X7)
data7

##    
##         0     1
##   0   703 19051
##   1     4   503
##   2    33    43

data8 <- table(data_sp$Y,data_sp$X8)
data8

##    
##         0     1
##   0   913 18841
##   1     4   503
##   2    13    63

data9 <- table(data_sp$Y,data_sp$X9)
data9

##    
##         0     1
##   0  2591 17163
##   1    53   454
##   2     1    75

##Uji Independensi variabel dependen dengan variabel independen menggunakan chi-square

c1=chisq.test(data_sp$Y, data_sp$X1)

## Warning in chisq.test(data_sp$Y, data_sp$X1): Chi-squared approximation may be
## incorrect

c1

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$Y and data_sp$X1
## X-squared = 116.7, df = 8, p-value < 2.2e-16

c2=chisq.test(data_sp$Y, data_sp$X2)

## Warning in chisq.test(data_sp$Y, data_sp$X2): Chi-squared approximation may be
## incorrect

c2

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$Y and data_sp$X2
## X-squared = 160.64, df = 8, p-value < 2.2e-16

c4=chisq.test(data_sp$Y, data_sp$X4)
c4

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$Y and data_sp$X4
## X-squared = 30.767, df = 2, p-value = 2.084e-07

c5=chisq.test(data_sp$Y, data_sp$X5)
c5

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$Y and data_sp$X5
## X-squared = 32.421, df = 2, p-value = 9.119e-08

c6=chisq.test(data_sp$Y, data_sp$X6)
c6

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$Y and data_sp$X6
## X-squared = 10.525, df = 2, p-value = 0.005182

c7=chisq.test(data_sp$Y, data_sp$X7)

## Warning in chisq.test(data_sp$Y, data_sp$X7): Chi-squared approximation may be
## incorrect

c7

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$Y and data_sp$X7
## X-squared = 355.14, df = 2, p-value < 2.2e-16

c8=chisq.test(data_sp$Y, data_sp$X8)

## Warning in chisq.test(data_sp$Y, data_sp$X8): Chi-squared approximation may be
## incorrect

c8

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$Y and data_sp$X8
## X-squared = 44.097, df = 2, p-value = 2.657e-10

c9=chisq.test(data_sp$Y, data_sp$X9)
c9

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$Y and data_sp$X9
## X-squared = 12.311, df = 2, p-value = 0.002121

#hasil dari uji independensi menunjukkan seluruh variabel memiliki hubungan dengan variabel dependen

##uji multiko antar variabel independen dengan menggunakan chisquare ## Menggunakan chart.Correlation()

Variabel X1

m1=chisq.test(data_sp$X1, data_sp$X2)
m1

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$X1 and data_sp$X2
## X-squared = 2507.9, df = 16, p-value < 2.2e-16

m2=chisq.test(data_sp$X1, data_sp$X4)
m2

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$X1 and data_sp$X4
## X-squared = 1398.1, df = 4, p-value < 2.2e-16

m3=chisq.test(data_sp$X1, data_sp$X5)
m3

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$X1 and data_sp$X5
## X-squared = 2714.2, df = 4, p-value < 2.2e-16

m4=chisq.test(data_sp$X1, data_sp$X6)
m4

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$X1 and data_sp$X6
## X-squared = 404.89, df = 4, p-value < 2.2e-16

m5=chisq.test(data_sp$X1, data_sp$X7)
m5

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$X1 and data_sp$X7
## X-squared = 1254.6, df = 4, p-value < 2.2e-16

m6=chisq.test(data_sp$X1, data_sp$X8)
m6

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$X1 and data_sp$X8
## X-squared = 1317.4, df = 4, p-value < 2.2e-16

m7=chisq.test(data_sp$X1, data_sp$X9)
m7

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$X1 and data_sp$X9
## X-squared = 813.22, df = 4, p-value < 2.2e-16

Variabel X2

d2=chisq.test(data_sp$X2, data_sp$X4)
d2

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$X2 and data_sp$X4
## X-squared = 453.64, df = 4, p-value < 2.2e-16

d3=chisq.test(data_sp$X2, data_sp$X5)
d3

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$X2 and data_sp$X5
## X-squared = 710.48, df = 4, p-value < 2.2e-16

d4=chisq.test(data_sp$X2, data_sp$X6)
d4

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$X2 and data_sp$X6
## X-squared = 597.93, df = 4, p-value < 2.2e-16

d5=chisq.test(data_sp$X2, data_sp$X7)
d5

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$X2 and data_sp$X7
## X-squared = 421.93, df = 4, p-value < 2.2e-16

d6=chisq.test(data_sp$X2, data_sp$X8)
d6

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$X2 and data_sp$X8
## X-squared = 759.81, df = 4, p-value < 2.2e-16

d7=chisq.test(data_sp$X2, data_sp$X9)
d7

## 
##  Pearson's Chi-squared test
## 
## data:  data_sp$X2 and data_sp$X9
## X-squared = 257.76, df = 4, p-value < 2.2e-16

variabel X4

n4=chisq.test(data_sp$X4, data_sp$X5)
n4

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_sp$X4 and data_sp$X5
## X-squared = 2462.4, df = 1, p-value < 2.2e-16

n5=chisq.test(data_sp$X4, data_sp$X6)
n5

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_sp$X4 and data_sp$X6
## X-squared = 357.68, df = 1, p-value < 2.2e-16

n6=chisq.test(data_sp$X4, data_sp$X7)
n6

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_sp$X4 and data_sp$X7
## X-squared = 517.96, df = 1, p-value < 2.2e-16

n7=chisq.test(data_sp$X4, data_sp$X8)
n7

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_sp$X4 and data_sp$X8
## X-squared = 419.23, df = 1, p-value < 2.2e-16

n8=chisq.test(data_sp$X4, data_sp$X9)
n8

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_sp$X4 and data_sp$X9
## X-squared = 284.22, df = 1, p-value < 2.2e-16

variabel X5

k4=chisq.test(data_sp$X5, data_sp$X6)
k4

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_sp$X5 and data_sp$X6
## X-squared = 313.3, df = 1, p-value < 2.2e-16

k5=chisq.test(data_sp$X5, data_sp$X7)
k5

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_sp$X5 and data_sp$X7
## X-squared = 900.26, df = 1, p-value < 2.2e-16

k6=chisq.test(data_sp$X5, data_sp$X8)
k6

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_sp$X5 and data_sp$X8
## X-squared = 818.09, df = 1, p-value < 2.2e-16

k7=chisq.test(data_sp$X5, data_sp$X9)
k7

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_sp$X5 and data_sp$X9
## X-squared = 272.08, df = 1, p-value < 2.2e-16

Variabel X6

l5=chisq.test(data_sp$X6, data_sp$X7)
l5

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_sp$X6 and data_sp$X7
## X-squared = 77.705, df = 1, p-value < 2.2e-16

l6=chisq.test(data_sp$X6, data_sp$X8)
l6

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_sp$X6 and data_sp$X8
## X-squared = 45.087, df = 1, p-value = 1.884e-11

l7=chisq.test(data_sp$X6, data_sp$X9)
l7

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_sp$X6 and data_sp$X9
## X-squared = 0.71841, df = 1, p-value = 0.3967

Variabel X7

p6=chisq.test(data_sp$X7, data_sp$X8)
p6

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_sp$X7 and data_sp$X8
## X-squared = 9780.1, df = 1, p-value < 2.2e-16

p7=chisq.test(data_sp$X7, data_sp$X9)
p7

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_sp$X7 and data_sp$X9
## X-squared = 82.821, df = 1, p-value < 2.2e-16

Variabel X8

q7=chisq.test(data_sp$X8, data_sp$X9)
q7

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  data_sp$X8 and data_sp$X9
## X-squared = 92.653, df = 1, p-value < 2.2e-16

Uji Korelasi Koefisien

panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) {
    usr <- par("usr")
    on.exit(par(usr))
    par(usr = c(0, 1, 0, 1))
    Cor <- abs(cor(x, y)) # Remove abs function if desired
    txt <- paste0(prefix, format(c(Cor, 0.123456789), digits = digits)[1])
    if(missing(cex.cor)) {
        cex.cor <- 0.4 / strwidth(txt)
    }
    text(0.5, 0.5, txt,
         cex = 1 + cex.cor * Cor) # Resize the text by level of correlation
}

# Plotting the correlation matrix
pairs(data_sp[,2:10],
      upper.panel = panel.cor,    # Correlation panel
      lower.panel = panel.smooth) # Smoothed regression lines

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

memodelkan data sumber pendapatan RT secara Multivariat

##create model

model.SP <- multinom(Y ~X1+X2+X3+X4+X5+X6+X7+X8+X9, data = data_train)

## # weights:  51 (32 variable)
## initial  value 17875.520549 
## iter  10 value 3510.769492
## iter  20 value 3015.383624
## iter  30 value 2636.191384
## iter  40 value 2295.947976
## iter  50 value 2002.743754
## iter  60 value 1977.680212
## final  value 1977.678863 
## converged

summary(model.SP)

## Call:
## multinom(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + 
##     X9, data = data_train)
## 
## Coefficients:
##   (Intercept)        X11        X12        X13        X14         X21
## 1   -3.171511 -0.4571073 -0.2465316 -0.8915369 -0.2456432 -0.21226670
## 2 -165.003359  1.0212390  1.4909076  1.0312733  1.6052593 -0.07724534
##          X22        X23        X24         X3        X41       X51         X61
## 1 -0.4687188 -0.9326786 -0.2552974 -0.6838456  0.4197614 0.7109466 -0.08523507
## 2  0.2981131 -2.5684605 -0.8015994 -0.4261133 -0.2173688 0.1799242 -0.26091961
##          X71       X81          X91
## 1  0.4439536 0.6304837  -0.04229984
## 2 -3.6689168 1.0996232 162.88162982
## 
## Std. Errors:
##   (Intercept)       X11       X12       X13       X14       X21       X22
## 1   0.8013919 0.1657521 0.1556770 0.3671079 0.3036084 0.1515911 0.2778880
## 2   0.3652984 0.4281066 0.3789103 0.6725777 0.4900058 0.3755168 0.4416945
##         X23       X24         X3       X41       X51       X61       X71
## 1 0.1464937 0.1424262 0.04754721 0.1882558 0.3379092 0.1206288 0.6647321
## 2 0.4634753 0.5471759 0.11636441 0.3218223 0.3916292 0.3710724 0.3600281
##         X81       X91
## 1 0.6628256 0.1747936
## 2 0.4985916 0.3652984
## 
## Residual Deviance: 3955.358 
## AIC: 4019.358

uji simultan

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

lrtest(model.SP)

## # weights:  6 (2 variable)
## initial  value 17875.520549 
## iter  10 value 2311.677327
## final  value 2299.454710 
## converged

## Likelihood ratio test
## 
## Model 1: Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9
## Model 2: Y ~ 1
##   #Df  LogLik  Df  Chisq Pr(>Chisq)    
## 1  32 -1977.7                          
## 2   2 -2299.4 -30 643.55  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Uji Parsial menggunakan likelihood

model.P1 <- multinom(Y~X1, data = data_train)

## # weights:  18 (10 variable)
## initial  value 17875.520549 
## iter  10 value 2575.464007
## iter  20 value 2419.674054
## iter  30 value 2266.025945
## final  value 2246.302468 
## converged

library(car)

## Loading required package: carData

lrtest(model.P1) #likelihood

## # weights:  6 (2 variable)
## initial  value 17875.520549 
## iter  10 value 2311.677327
## final  value 2299.454710 
## converged

## Likelihood ratio test
## 
## Model 1: Y ~ X1
## Model 2: Y ~ 1
##   #Df  LogLik Df Chisq Pr(>Chisq)    
## 1  10 -2246.3                        
## 2   2 -2299.4 -8 106.3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model.P2 <- multinom(Y~X2, data = data_train)

## # weights:  18 (10 variable)
## initial  value 17875.520549 
## iter  10 value 2438.283462
## iter  20 value 2317.710168
## iter  30 value 2229.432399
## final  value 2227.460004 
## converged

library(car)
lrtest(model.P2) #likelihood

## # weights:  6 (2 variable)
## initial  value 17875.520549 
## iter  10 value 2311.677327
## final  value 2299.454710 
## converged

## Likelihood ratio test
## 
## Model 1: Y ~ X2
## Model 2: Y ~ 1
##   #Df  LogLik Df  Chisq Pr(>Chisq)    
## 1  10 -2227.5                         
## 2   2 -2299.4 -8 143.99  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model.P3 <- multinom(Y~X3, data = data_train)

## # weights:  9 (4 variable)
## initial  value 17875.520549 
## iter  10 value 3512.122660
## iter  20 value 2127.314105
## final  value 2126.845732 
## converged

library(car)
lrtest(model.P3) #likelihood

## # weights:  6 (2 variable)
## initial  value 17875.520549 
## iter  10 value 2311.677327
## final  value 2299.454710 
## converged

## Likelihood ratio test
## 
## Model 1: Y ~ X3
## Model 2: Y ~ 1
##   #Df  LogLik Df  Chisq Pr(>Chisq)    
## 1   4 -2126.8                         
## 2   2 -2299.4 -2 345.22  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model.P4 <- multinom(Y~X4, data = data_train)

## # weights:  9 (4 variable)
## initial  value 17875.520549 
## iter  10 value 3030.931867
## iter  20 value 2281.597110
## final  value 2281.559529 
## converged

library(car)
lrtest(model.P4) #likelihood

## # weights:  6 (2 variable)
## initial  value 17875.520549 
## iter  10 value 2311.677327
## final  value 2299.454710 
## converged

## Likelihood ratio test
## 
## Model 1: Y ~ X4
## Model 2: Y ~ 1
##   #Df  LogLik Df Chisq Pr(>Chisq)    
## 1   4 -2281.6                        
## 2   2 -2299.4 -2 35.79  1.691e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model.P5 <- multinom(Y~X5, data = data_train)

## # weights:  9 (4 variable)
## initial  value 17875.520549 
## iter  10 value 2428.644408
## iter  20 value 2282.711330
## final  value 2280.432284 
## converged

library(car)
lrtest(model.P5) #likelihood

## # weights:  6 (2 variable)
## initial  value 17875.520549 
## iter  10 value 2311.677327
## final  value 2299.454710 
## converged

## Likelihood ratio test
## 
## Model 1: Y ~ X5
## Model 2: Y ~ 1
##   #Df  LogLik Df  Chisq Pr(>Chisq)    
## 1   4 -2280.4                         
## 2   2 -2299.4 -2 38.045  5.479e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model.P6 <- multinom(Y~X6, data = data_train)

## # weights:  9 (4 variable)
## initial  value 17875.520549 
## iter  10 value 2550.368546
## iter  20 value 2324.900093
## final  value 2295.649804 
## converged

library(car)
lrtest(model.P6) #likelihood

## # weights:  6 (2 variable)
## initial  value 17875.520549 
## iter  10 value 2311.677327
## final  value 2299.454710 
## converged

## Likelihood ratio test
## 
## Model 1: Y ~ X6
## Model 2: Y ~ 1
##   #Df  LogLik Df  Chisq Pr(>Chisq)  
## 1   4 -2295.7                       
## 2   2 -2299.4 -2 7.6098    0.02226 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model.P7 <- multinom(Y~X7, data = data_train)

## # weights:  9 (4 variable)
## initial  value 17875.520549 
## iter  10 value 2838.897123
## iter  20 value 2249.150145
## final  value 2249.105513 
## converged

library(car)
lrtest(model.P7) #likelihood

## # weights:  6 (2 variable)
## initial  value 17875.520549 
## iter  10 value 2311.677327
## final  value 2299.454710 
## converged

## Likelihood ratio test
## 
## Model 1: Y ~ X7
## Model 2: Y ~ 1
##   #Df  LogLik Df Chisq Pr(>Chisq)    
## 1   4 -2249.1                        
## 2   2 -2299.4 -2 100.7  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model.P8 <- multinom(Y~X8, data = data_train)

## # weights:  9 (4 variable)
## initial  value 17875.520549 
## iter  10 value 3044.697758
## iter  20 value 2285.738173
## final  value 2285.734239 
## converged

library(car)
lrtest(model.P8) #likelihood

## # weights:  6 (2 variable)
## initial  value 17875.520549 
## iter  10 value 2311.677327
## final  value 2299.454710 
## converged

## Likelihood ratio test
## 
## Model 1: Y ~ X8
## Model 2: Y ~ 1
##   #Df  LogLik Df  Chisq Pr(>Chisq)    
## 1   4 -2285.7                         
## 2   2 -2299.4 -2 27.441    1.1e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model.P9 <- multinom(Y~X9, data = data_train)

## # weights:  9 (4 variable)
## initial  value 17875.520549 
## iter  10 value 2360.988120
## final  value 2289.245532 
## converged

library(car)
lrtest(model.P9) #likelihood

## # weights:  6 (2 variable)
## initial  value 17875.520549 
## iter  10 value 2311.677327
## final  value 2299.454710 
## converged

## Likelihood ratio test
## 
## Model 1: Y ~ X9
## Model 2: Y ~ 1
##   #Df  LogLik Df  Chisq Pr(>Chisq)    
## 1   4 -2289.2                         
## 2   2 -2299.4 -2 20.418  3.683e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Uji Partial (hitung p-value menggunakan z test)

z <- summary(model.SP)$coefficients/summary(model.SP)$standard.errors
p <- (1-pnorm(abs(z), 0.1))*2
data.frame(p)

##   X.Intercept.         X11         X12        X13         X14       X21
## 1  0.000114551 0.007865807 0.137912619 0.01988334 0.478275509 0.1935123
## 2  0.000000000 0.022284806 0.000125705 0.15176790 0.001493208 0.9158171
##         X22          X23        X24           X3        X41        X51
## 1 0.1125765 3.688299e-10 0.09055278 0.0000000000 0.03319309 0.04507462
## 2 0.5653383 5.276210e-08 0.17226074 0.0003681987 0.56499981 0.71927718
##         X61       X71        X81       X91
## 1 0.5441234 0.5701243 0.39465496 0.8870809
## 2 0.5464089 0.0000000 0.03525139 0.0000000

#prediksi data pada test

predict_prob = predict(model.SP, data_test, type = "prob")
head(predict_prob,10)

##            0           1            2
## 1  0.9783676 0.011898743 9.733687e-03
## 2  0.9661790 0.026288020 7.532933e-03
## 3  0.9761276 0.017484016 6.388338e-03
## 4  0.9905869 0.008618305 7.948407e-04
## 5  0.9879444 0.011537908 5.176777e-04
## 6  0.9533764 0.046623611 4.284973e-74
## 7  0.9753567 0.023075125 1.568128e-03
## 8  0.9876977 0.012302259 1.893173e-74
## 9  0.9832023 0.016654803 1.429339e-04
## 10 0.9639294 0.035005952 1.064613e-03

head(data.frame(predict_prob, data_test$Y),10)

##           X0          X1           X2 data_test.Y
## 1  0.9783676 0.011898743 9.733687e-03           0
## 2  0.9661790 0.026288020 7.532933e-03           0
## 3  0.9761276 0.017484016 6.388338e-03           0
## 4  0.9905869 0.008618305 7.948407e-04           0
## 5  0.9879444 0.011537908 5.176777e-04           0
## 6  0.9533764 0.046623611 4.284973e-74           0
## 7  0.9753567 0.023075125 1.568128e-03           0
## 8  0.9876977 0.012302259 1.893173e-74           0
## 9  0.9832023 0.016654803 1.429339e-04           0
## 10 0.9639294 0.035005952 1.064613e-03           0

confution matrix

prediksi.test <- predict(model.SP, data_test,type = "class")
data_test$Y<-as.factor(data_test$Y)
confusionMatrix(as.factor(prediksi.test),
                data_test$Y, positive="1")

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    0    1    2
##          0 3950  101   15
##          1    0    0    0
##          2    0    0    0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9715          
##                  95% CI : (0.9659, 0.9764)
##     No Information Rate : 0.9715          
##     P-Value [Acc > NIR] : 0.5247          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 0 Class: 1 Class: 2
## Sensitivity            1.0000  0.00000 0.000000
## Specificity            0.0000  1.00000 1.000000
## Pos Pred Value         0.9715      NaN      NaN
## Neg Pred Value            NaN  0.97516 0.996311
## Prevalence             0.9715  0.02484 0.003689
## Detection Rate         0.9715  0.00000 0.000000
## Detection Prevalence   1.0000  0.00000 0.000000
## Balanced Accuracy      0.5000  0.50000 0.500000

interpretasi (odds ratio)

data.frame(summary(model.SP)$coefficients)

##   X.Intercept.        X11        X12        X13        X14         X21
## 1    -3.171511 -0.4571073 -0.2465316 -0.8915369 -0.2456432 -0.21226670
## 2  -165.003359  1.0212390  1.4909076  1.0312733  1.6052593 -0.07724534
##          X22        X23        X24         X3        X41       X51         X61
## 1 -0.4687188 -0.9326786 -0.2552974 -0.6838456  0.4197614 0.7109466 -0.08523507
## 2  0.2981131 -2.5684605 -0.8015994 -0.4261133 -0.2173688 0.1799242 -0.26091961
##          X71       X81          X91
## 1  0.4439536 0.6304837  -0.04229984
## 2 -3.6689168 1.0996232 162.88162982

data.frame(exp(summary(model.SP)$coefficients))

##   X.Intercept.       X11       X12       X13       X14       X21       X22
## 1 4.194017e-02 0.6331124 0.7815067 0.4100251 0.7822013 0.8087490 0.6258036
## 2 2.187518e-72 2.7766328 4.4411246 2.8046347 4.9791507 0.9256627 1.3473142
##          X23       X24        X3       X41      X51       X61        X71
## 1 0.39349827 0.7746860 0.5046725 1.5215984 2.035918 0.9182964 1.55885814
## 2 0.07665346 0.4486109 0.6530423 0.8046332 1.197127 0.7703428 0.02550408
##        X81          X91
## 1 1.878519 9.585823e-01
## 2 3.003034 5.477634e+70

Regresi Logistik Mutinomial

Alfisyahrina H, Shofie, Purhadi, Dwi Cahya J.K, Maria T.Y., Ananda R

2023-05-24