#SUmber pendapatan terbesar rumah tangga di Jawa Barat
library(haven)
library(nnet)
library(readxl)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
data_sp <- read_excel("D:/ADK/Multinomial - Sumber Pendapatan/DATA SUSENAS.xlsx",
col_types = c("text", "text", "text",
"numeric", "text", "text", "text",
"text", "text", "text"))
data_sp
## # A tibble: 20,337 × 10
## Y X1 X2 X3 X4 X5 X6 X7 X8 X9
## <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 0 0 1 2 0 1 1 1 1 1
## 2 0 4 0 4 0 1 0 1 1 1
## 3 0 2 1 3 1 1 1 1 1 1
## 4 0 1 0 3 0 1 1 1 1 1
## 5 0 2 3 3 0 1 1 1 1 1
## 6 0 0 0 4 1 1 0 1 1 1
## 7 0 1 3 2 1 1 0 1 1 1
## 8 0 1 3 3 1 1 0 1 1 1
## 9 0 0 3 4 1 1 0 1 1 1
## 10 0 0 0 3 1 1 0 1 1 0
## # ℹ 20,327 more rows
str(data_sp)
## tibble [20,337 × 10] (S3: tbl_df/tbl/data.frame)
## $ Y : chr [1:20337] "0" "0" "0" "0" ...
## $ X1: chr [1:20337] "0" "4" "2" "1" ...
## $ X2: chr [1:20337] "1" "0" "1" "0" ...
## $ X3: num [1:20337] 2 4 3 3 3 4 2 3 4 3 ...
## $ X4: chr [1:20337] "0" "0" "1" "0" ...
## $ X5: chr [1:20337] "1" "1" "1" "1" ...
## $ X6: chr [1:20337] "1" "0" "1" "1" ...
## $ X7: chr [1:20337] "1" "1" "1" "1" ...
## $ X8: chr [1:20337] "1" "1" "1" "1" ...
## $ X9: chr [1:20337] "1" "1" "1" "1" ...
#sebelum melakukan analisis seluruh variabel harus diubah menjadi bentuk faktor ## merubah var ke factor
data_sp$Y <- as.factor(data_sp$Y)
data_sp$X1 <- as.factor(data_sp$X1)
data_sp$X2 <- as.factor(data_sp$X2)
data_sp$X4 <- as.factor(data_sp$X4)
data_sp$X5 <- as.factor(data_sp$X5)
data_sp$X6 <- as.factor(data_sp$X6)
data_sp$X7 <- as.factor(data_sp$X7)
data_sp$X8 <- as.factor(data_sp$X8)
data_sp$X9 <- as.factor(data_sp$X9)
str(data_sp)
## tibble [20,337 × 10] (S3: tbl_df/tbl/data.frame)
## $ Y : Factor w/ 3 levels "0","1","2": 1 1 1 1 1 1 1 1 1 1 ...
## $ X1: Factor w/ 5 levels "0","1","2","3",..: 1 5 3 2 3 1 2 2 1 1 ...
## $ X2: Factor w/ 5 levels "0","1","2","3",..: 2 1 2 1 4 1 4 4 4 1 ...
## $ X3: num [1:20337] 2 4 3 3 3 4 2 3 4 3 ...
## $ X4: Factor w/ 2 levels "0","1": 1 1 2 1 1 2 2 2 2 2 ...
## $ X5: Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
## $ X6: Factor w/ 2 levels "0","1": 2 1 2 2 2 1 1 1 1 1 ...
## $ X7: Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
## $ X8: Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
## $ X9: Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 1 ...
#visualisasi data menggunakan statistika deskriptif ## Box Plot pada variabel Jumlah anggota Keluarga (X3)
boxplot(X3~Y, data = data_sp,
main = ".",
xlab = "Pendapatan terbesar di rumah tangga",
ylab = "Umur",
col = c("lightblue", "mistyrose", "lightgrey", "lavender"))
#diagram batang sumber pendapatan terbesar Rumah Tangga (Y) dengan
Status Pekerjaan(X2)
barplot (table(data_sp$Y, data_sp$X2),
main = ".",
xlab = "Status Pekerjaan",
ylab = "jumlah",
col = c("lightblue", "mistyrose", "lightgrey"),
beside = TRUE)
legend("topright",
c("ART yang bekerja", "Kiriman Uang/Barang", "Investasi", "Pensiunan"),
fill = c("lightblue", "mistyrose", "lightgrey"))
#Barplot Sumber Pendapatan Terbesar di Rumah Tangga
barplot (table(data_sp$Y),
main = " .",
xlab = "Sumber Pendapatan Terbesar di Rumah Tangga",
ylab = "JUMLAH",
col = c("lightblue", "mistyrose", "lightgrey", "lavender"))
#Barplot Status pekerjaan
barplot (table(data_sp$X2),
main = " .",
xlab = "Status pekerjaan",
ylab = "JUMLAH",
col = c("lightblue", "mistyrose", "lightgrey", "lavender", "lightyellow"))
barplot (table(data_sp$X4),
main = " Barplot Kepemilikan perhiasan/emas (min 10 gram)",
xlab = "Status Kepemilikan perhiasan/emas (min 10 gram)",
ylab = "JUMLAH",
col = c("mistyrose", "lightgrey"))
barplot (table(data_sp$X6),
main = " Barplot Status kepemilikan lahan/tanah",
xlab = "Status kepemilikan lahan/tanah",
ylab = "JUMLAH",
col = c("mistyrose", "lightgrey"))
barplot (table(data_sp$X8),
main = " Barplot Status jaminan hari tua",
xlab = "Status jaminan hari tua",
ylab = "JUMLAH",
col = c("mistyrose", "lightgrey"))
#pie chart pendidikan
library(plotrix)
mytable <- table(data_sp$X1)
lbls <- c("SD","SMP","SMA","SMK","S1")
pct<-round(mytable/sum(mytable)*100)
lbls<-paste(lbls,pct)
lbls<-paste(lbls,"%",sep = "")
pie(mytable, labels = lbls,
main="Status pendidikan",
col=c("mistyrose","lightblue","lavender", "salmon", "lightyellow"))
#Status kepemilikan mobil (X5)
library(plotrix)
mytable <- table(data_sp$X5)
lbls <- c("memiliki","tidak memiliki")
pct<-round(mytable/sum(mytable)*100)
lbls<-paste(lbls,pct)
lbls<-paste(lbls,"%",sep = "")
pie(mytable, labels = lbls,
main="Status kepemilikan mobil",
col=c("mistyrose","lavender"))
library(plotrix)
mytable <- table(data_sp$X7)
lbls <- c("dapat","tidak dapat")
pct<-round(mytable/sum(mytable)*100)
lbls<-paste(lbls,pct)
lbls<-paste(lbls,"%",sep = "")
pie(mytable, labels = lbls,
main="jaminan pensiun",
col=c("mistyrose","lavender"))
library(plotrix)
mytable <- table(data_sp$X9)
lbls <- c("dapat","tidak dapat")
pct<-round(mytable/sum(mytable)*100)
lbls<-paste(lbls,pct)
lbls<-paste(lbls,"%",sep = "")
pie(mytable, labels = lbls,
main="bantuan program keluarga harapan",
col=c("mistyrose","lavender"))
##numerik
attach(data_sp)
median(data_sp$X3)
## [1] 3
mean(data_sp$X3)
## [1] 3.522348
min(data_sp$X3)
## [1] 1
max(data_sp$X3)
## [1] 14
library(arsenal)
tab <- tableby(Y ~., data = data_sp)
summary(tab, text = TRUE)
##
##
## | | 0 (N=19754) | 1 (N=507) | 2 (N=76) | Total (N=20337) | p value|
## |:------------|:--------------:|:-------------:|:-------------:|:---------------:|-------:|
## |X1 | | | | | < 0.001|
## |- 0 | 9313 (47.1%) | 349 (68.8%) | 21 (27.6%) | 9683 (47.6%) | |
## |- 1 | 3447 (17.4%) | 59 (11.6%) | 13 (17.1%) | 3519 (17.3%) | |
## |- 2 | 4551 (23.0%) | 72 (14.2%) | 27 (35.5%) | 4650 (22.9%) | |
## |- 3 | 1068 (5.4%) | 9 (1.8%) | 3 (3.9%) | 1080 (5.3%) | |
## |- 4 | 1375 (7.0%) | 18 (3.6%) | 12 (15.8%) | 1405 (6.9%) | |
## |X2 | | | | | < 0.001|
## |- 0 | 5428 (27.5%) | 210 (41.4%) | 36 (47.4%) | 5674 (27.9%) | |
## |- 1 | 2344 (11.9%) | 85 (16.8%) | 17 (22.4%) | 2446 (12.0%) | |
## |- 2 | 836 (4.2%) | 21 (4.1%) | 9 (11.8%) | 866 (4.3%) | |
## |- 3 | 8184 (41.4%) | 94 (18.5%) | 8 (10.5%) | 8286 (40.7%) | |
## |- 4 | 2962 (15.0%) | 97 (19.1%) | 6 (7.9%) | 3065 (15.1%) | |
## |X3 | | | | | < 0.001|
## |- Mean (SD) | 3.555 (1.370) | 2.367 (1.363) | 2.645 (1.547) | 3.522 (1.384) | |
## |- Range | 1.000 - 14.000 | 1.000 - 8.000 | 1.000 - 8.000 | 1.000 - 14.000 | |
## |X4 | | | | | < 0.001|
## |- 0 | 3265 (16.5%) | 48 (9.5%) | 24 (31.6%) | 3337 (16.4%) | |
## |- 1 | 16489 (83.5%) | 459 (90.5%) | 52 (68.4%) | 17000 (83.6%) | |
## |X5 | | | | | < 0.001|
## |- 0 | 1908 (9.7%) | 14 (2.8%) | 13 (17.1%) | 1935 (9.5%) | |
## |- 1 | 17846 (90.3%) | 493 (97.2%) | 63 (82.9%) | 18402 (90.5%) | |
## |X6 | | | | | 0.005|
## |- 0 | 13822 (70.0%) | 369 (72.8%) | 65 (85.5%) | 14256 (70.1%) | |
## |- 1 | 5932 (30.0%) | 138 (27.2%) | 11 (14.5%) | 6081 (29.9%) | |
## |X7 | | | | | < 0.001|
## |- 0 | 703 (3.6%) | 4 (0.8%) | 33 (43.4%) | 740 (3.6%) | |
## |- 1 | 19051 (96.4%) | 503 (99.2%) | 43 (56.6%) | 19597 (96.4%) | |
## |X8 | | | | | < 0.001|
## |- 0 | 913 (4.6%) | 4 (0.8%) | 13 (17.1%) | 930 (4.6%) | |
## |- 1 | 18841 (95.4%) | 503 (99.2%) | 63 (82.9%) | 19407 (95.4%) | |
## |X9 | | | | | 0.002|
## |- 0 | 2591 (13.1%) | 53 (10.5%) | 1 (1.3%) | 2645 (13.0%) | |
## |- 1 | 17163 (86.9%) | 454 (89.5%) | 75 (98.7%) | 17692 (87.0%) | |
set.seed(1234)
acak <- createDataPartition(data_sp$Y, p=0.8, list=FALSE)
data_train <- data_sp[acak,]
data_test <- data_sp[-acak,]
summary(data_train)
## Y X1 X2 X3 X4 X5 X6
## 0:15804 0:7745 0:4548 Min. : 1.000 0: 2633 0: 1533 0:11482
## 1: 406 1:2818 1:1948 1st Qu.: 3.000 1:13638 1:14738 1: 4789
## 2: 61 2:3714 2: 684 Median : 3.000
## 3: 883 3:6639 Mean : 3.523
## 4:1111 4:2452 3rd Qu.: 4.000
## Max. :14.000
## X7 X8 X9
## 0: 583 0: 729 0: 2114
## 1:15688 1:15542 1:14157
##
##
##
##
summary(data_test)
## Y X1 X2 X3 X4 X5 X6
## 0:3950 0:1938 0:1126 Min. : 1.000 0: 704 0: 402 0:2774
## 1: 101 1: 701 1: 498 1st Qu.: 3.000 1:3362 1:3664 1:1292
## 2: 15 2: 936 2: 182 Median : 3.000
## 3: 197 3:1647 Mean : 3.521
## 4: 294 4: 613 3rd Qu.: 4.000
## Max. :12.000
## X7 X8 X9
## 0: 157 0: 201 0: 531
## 1:3909 1:3865 1:3535
##
##
##
##
##tabel kontingensi
data1 <- table(data_sp$Y,data_sp$X1)
data1
##
## 0 1 2 3 4
## 0 9313 3447 4551 1068 1375
## 1 349 59 72 9 18
## 2 21 13 27 3 12
data2 <- table(data_sp$Y,data_sp$X2)
data2
##
## 0 1 2 3 4
## 0 5428 2344 836 8184 2962
## 1 210 85 21 94 97
## 2 36 17 9 8 6
data4 <- table(data_sp$Y,data_sp$X4)
data4
##
## 0 1
## 0 3265 16489
## 1 48 459
## 2 24 52
data5 <- table(data_sp$Y,data_sp$X5)
data5
##
## 0 1
## 0 1908 17846
## 1 14 493
## 2 13 63
data6 <- table(data_sp$Y,data_sp$X6)
data6
##
## 0 1
## 0 13822 5932
## 1 369 138
## 2 65 11
data7 <- table(data_sp$Y,data_sp$X7)
data7
##
## 0 1
## 0 703 19051
## 1 4 503
## 2 33 43
data8 <- table(data_sp$Y,data_sp$X8)
data8
##
## 0 1
## 0 913 18841
## 1 4 503
## 2 13 63
data9 <- table(data_sp$Y,data_sp$X9)
data9
##
## 0 1
## 0 2591 17163
## 1 53 454
## 2 1 75
##Uji Independensi variabel dependen dengan variabel independen menggunakan chi-square
c1=chisq.test(data_sp$Y, data_sp$X1)
## Warning in chisq.test(data_sp$Y, data_sp$X1): Chi-squared approximation may be
## incorrect
c1
##
## Pearson's Chi-squared test
##
## data: data_sp$Y and data_sp$X1
## X-squared = 116.7, df = 8, p-value < 2.2e-16
c2=chisq.test(data_sp$Y, data_sp$X2)
## Warning in chisq.test(data_sp$Y, data_sp$X2): Chi-squared approximation may be
## incorrect
c2
##
## Pearson's Chi-squared test
##
## data: data_sp$Y and data_sp$X2
## X-squared = 160.64, df = 8, p-value < 2.2e-16
c4=chisq.test(data_sp$Y, data_sp$X4)
c4
##
## Pearson's Chi-squared test
##
## data: data_sp$Y and data_sp$X4
## X-squared = 30.767, df = 2, p-value = 2.084e-07
c5=chisq.test(data_sp$Y, data_sp$X5)
c5
##
## Pearson's Chi-squared test
##
## data: data_sp$Y and data_sp$X5
## X-squared = 32.421, df = 2, p-value = 9.119e-08
c6=chisq.test(data_sp$Y, data_sp$X6)
c6
##
## Pearson's Chi-squared test
##
## data: data_sp$Y and data_sp$X6
## X-squared = 10.525, df = 2, p-value = 0.005182
c7=chisq.test(data_sp$Y, data_sp$X7)
## Warning in chisq.test(data_sp$Y, data_sp$X7): Chi-squared approximation may be
## incorrect
c7
##
## Pearson's Chi-squared test
##
## data: data_sp$Y and data_sp$X7
## X-squared = 355.14, df = 2, p-value < 2.2e-16
c8=chisq.test(data_sp$Y, data_sp$X8)
## Warning in chisq.test(data_sp$Y, data_sp$X8): Chi-squared approximation may be
## incorrect
c8
##
## Pearson's Chi-squared test
##
## data: data_sp$Y and data_sp$X8
## X-squared = 44.097, df = 2, p-value = 2.657e-10
c9=chisq.test(data_sp$Y, data_sp$X9)
c9
##
## Pearson's Chi-squared test
##
## data: data_sp$Y and data_sp$X9
## X-squared = 12.311, df = 2, p-value = 0.002121
#hasil dari uji independensi menunjukkan seluruh variabel memiliki hubungan dengan variabel dependen
##uji multiko antar variabel independen dengan menggunakan chisquare ## Menggunakan chart.Correlation()
Variabel X1
m1=chisq.test(data_sp$X1, data_sp$X2)
m1
##
## Pearson's Chi-squared test
##
## data: data_sp$X1 and data_sp$X2
## X-squared = 2507.9, df = 16, p-value < 2.2e-16
m2=chisq.test(data_sp$X1, data_sp$X4)
m2
##
## Pearson's Chi-squared test
##
## data: data_sp$X1 and data_sp$X4
## X-squared = 1398.1, df = 4, p-value < 2.2e-16
m3=chisq.test(data_sp$X1, data_sp$X5)
m3
##
## Pearson's Chi-squared test
##
## data: data_sp$X1 and data_sp$X5
## X-squared = 2714.2, df = 4, p-value < 2.2e-16
m4=chisq.test(data_sp$X1, data_sp$X6)
m4
##
## Pearson's Chi-squared test
##
## data: data_sp$X1 and data_sp$X6
## X-squared = 404.89, df = 4, p-value < 2.2e-16
m5=chisq.test(data_sp$X1, data_sp$X7)
m5
##
## Pearson's Chi-squared test
##
## data: data_sp$X1 and data_sp$X7
## X-squared = 1254.6, df = 4, p-value < 2.2e-16
m6=chisq.test(data_sp$X1, data_sp$X8)
m6
##
## Pearson's Chi-squared test
##
## data: data_sp$X1 and data_sp$X8
## X-squared = 1317.4, df = 4, p-value < 2.2e-16
m7=chisq.test(data_sp$X1, data_sp$X9)
m7
##
## Pearson's Chi-squared test
##
## data: data_sp$X1 and data_sp$X9
## X-squared = 813.22, df = 4, p-value < 2.2e-16
Variabel X2
d2=chisq.test(data_sp$X2, data_sp$X4)
d2
##
## Pearson's Chi-squared test
##
## data: data_sp$X2 and data_sp$X4
## X-squared = 453.64, df = 4, p-value < 2.2e-16
d3=chisq.test(data_sp$X2, data_sp$X5)
d3
##
## Pearson's Chi-squared test
##
## data: data_sp$X2 and data_sp$X5
## X-squared = 710.48, df = 4, p-value < 2.2e-16
d4=chisq.test(data_sp$X2, data_sp$X6)
d4
##
## Pearson's Chi-squared test
##
## data: data_sp$X2 and data_sp$X6
## X-squared = 597.93, df = 4, p-value < 2.2e-16
d5=chisq.test(data_sp$X2, data_sp$X7)
d5
##
## Pearson's Chi-squared test
##
## data: data_sp$X2 and data_sp$X7
## X-squared = 421.93, df = 4, p-value < 2.2e-16
d6=chisq.test(data_sp$X2, data_sp$X8)
d6
##
## Pearson's Chi-squared test
##
## data: data_sp$X2 and data_sp$X8
## X-squared = 759.81, df = 4, p-value < 2.2e-16
d7=chisq.test(data_sp$X2, data_sp$X9)
d7
##
## Pearson's Chi-squared test
##
## data: data_sp$X2 and data_sp$X9
## X-squared = 257.76, df = 4, p-value < 2.2e-16
variabel X4
n4=chisq.test(data_sp$X4, data_sp$X5)
n4
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_sp$X4 and data_sp$X5
## X-squared = 2462.4, df = 1, p-value < 2.2e-16
n5=chisq.test(data_sp$X4, data_sp$X6)
n5
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_sp$X4 and data_sp$X6
## X-squared = 357.68, df = 1, p-value < 2.2e-16
n6=chisq.test(data_sp$X4, data_sp$X7)
n6
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_sp$X4 and data_sp$X7
## X-squared = 517.96, df = 1, p-value < 2.2e-16
n7=chisq.test(data_sp$X4, data_sp$X8)
n7
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_sp$X4 and data_sp$X8
## X-squared = 419.23, df = 1, p-value < 2.2e-16
n8=chisq.test(data_sp$X4, data_sp$X9)
n8
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_sp$X4 and data_sp$X9
## X-squared = 284.22, df = 1, p-value < 2.2e-16
variabel X5
k4=chisq.test(data_sp$X5, data_sp$X6)
k4
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_sp$X5 and data_sp$X6
## X-squared = 313.3, df = 1, p-value < 2.2e-16
k5=chisq.test(data_sp$X5, data_sp$X7)
k5
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_sp$X5 and data_sp$X7
## X-squared = 900.26, df = 1, p-value < 2.2e-16
k6=chisq.test(data_sp$X5, data_sp$X8)
k6
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_sp$X5 and data_sp$X8
## X-squared = 818.09, df = 1, p-value < 2.2e-16
k7=chisq.test(data_sp$X5, data_sp$X9)
k7
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_sp$X5 and data_sp$X9
## X-squared = 272.08, df = 1, p-value < 2.2e-16
Variabel X6
l5=chisq.test(data_sp$X6, data_sp$X7)
l5
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_sp$X6 and data_sp$X7
## X-squared = 77.705, df = 1, p-value < 2.2e-16
l6=chisq.test(data_sp$X6, data_sp$X8)
l6
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_sp$X6 and data_sp$X8
## X-squared = 45.087, df = 1, p-value = 1.884e-11
l7=chisq.test(data_sp$X6, data_sp$X9)
l7
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_sp$X6 and data_sp$X9
## X-squared = 0.71841, df = 1, p-value = 0.3967
Variabel X7
p6=chisq.test(data_sp$X7, data_sp$X8)
p6
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_sp$X7 and data_sp$X8
## X-squared = 9780.1, df = 1, p-value < 2.2e-16
p7=chisq.test(data_sp$X7, data_sp$X9)
p7
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_sp$X7 and data_sp$X9
## X-squared = 82.821, df = 1, p-value < 2.2e-16
Variabel X8
q7=chisq.test(data_sp$X8, data_sp$X9)
q7
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data_sp$X8 and data_sp$X9
## X-squared = 92.653, df = 1, p-value < 2.2e-16
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) {
usr <- par("usr")
on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
Cor <- abs(cor(x, y)) # Remove abs function if desired
txt <- paste0(prefix, format(c(Cor, 0.123456789), digits = digits)[1])
if(missing(cex.cor)) {
cex.cor <- 0.4 / strwidth(txt)
}
text(0.5, 0.5, txt,
cex = 1 + cex.cor * Cor) # Resize the text by level of correlation
}
# Plotting the correlation matrix
pairs(data_sp[,2:10],
upper.panel = panel.cor, # Correlation panel
lower.panel = panel.smooth) # Smoothed regression lines
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
##create model
model.SP <- multinom(Y ~X1+X2+X3+X4+X5+X6+X7+X8+X9, data = data_train)
## # weights: 51 (32 variable)
## initial value 17875.520549
## iter 10 value 3510.769492
## iter 20 value 3015.383624
## iter 30 value 2636.191384
## iter 40 value 2295.947976
## iter 50 value 2002.743754
## iter 60 value 1977.680212
## final value 1977.678863
## converged
summary(model.SP)
## Call:
## multinom(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 +
## X9, data = data_train)
##
## Coefficients:
## (Intercept) X11 X12 X13 X14 X21
## 1 -3.171511 -0.4571073 -0.2465316 -0.8915369 -0.2456432 -0.21226670
## 2 -165.003359 1.0212390 1.4909076 1.0312733 1.6052593 -0.07724534
## X22 X23 X24 X3 X41 X51 X61
## 1 -0.4687188 -0.9326786 -0.2552974 -0.6838456 0.4197614 0.7109466 -0.08523507
## 2 0.2981131 -2.5684605 -0.8015994 -0.4261133 -0.2173688 0.1799242 -0.26091961
## X71 X81 X91
## 1 0.4439536 0.6304837 -0.04229984
## 2 -3.6689168 1.0996232 162.88162982
##
## Std. Errors:
## (Intercept) X11 X12 X13 X14 X21 X22
## 1 0.8013919 0.1657521 0.1556770 0.3671079 0.3036084 0.1515911 0.2778880
## 2 0.3652984 0.4281066 0.3789103 0.6725777 0.4900058 0.3755168 0.4416945
## X23 X24 X3 X41 X51 X61 X71
## 1 0.1464937 0.1424262 0.04754721 0.1882558 0.3379092 0.1206288 0.6647321
## 2 0.4634753 0.5471759 0.11636441 0.3218223 0.3916292 0.3710724 0.3600281
## X81 X91
## 1 0.6628256 0.1747936
## 2 0.4985916 0.3652984
##
## Residual Deviance: 3955.358
## AIC: 4019.358
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
lrtest(model.SP)
## # weights: 6 (2 variable)
## initial value 17875.520549
## iter 10 value 2311.677327
## final value 2299.454710
## converged
## Likelihood ratio test
##
## Model 1: Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9
## Model 2: Y ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 32 -1977.7
## 2 2 -2299.4 -30 643.55 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.P1 <- multinom(Y~X1, data = data_train)
## # weights: 18 (10 variable)
## initial value 17875.520549
## iter 10 value 2575.464007
## iter 20 value 2419.674054
## iter 30 value 2266.025945
## final value 2246.302468
## converged
library(car)
## Loading required package: carData
lrtest(model.P1) #likelihood
## # weights: 6 (2 variable)
## initial value 17875.520549
## iter 10 value 2311.677327
## final value 2299.454710
## converged
## Likelihood ratio test
##
## Model 1: Y ~ X1
## Model 2: Y ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 10 -2246.3
## 2 2 -2299.4 -8 106.3 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.P2 <- multinom(Y~X2, data = data_train)
## # weights: 18 (10 variable)
## initial value 17875.520549
## iter 10 value 2438.283462
## iter 20 value 2317.710168
## iter 30 value 2229.432399
## final value 2227.460004
## converged
library(car)
lrtest(model.P2) #likelihood
## # weights: 6 (2 variable)
## initial value 17875.520549
## iter 10 value 2311.677327
## final value 2299.454710
## converged
## Likelihood ratio test
##
## Model 1: Y ~ X2
## Model 2: Y ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 10 -2227.5
## 2 2 -2299.4 -8 143.99 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.P3 <- multinom(Y~X3, data = data_train)
## # weights: 9 (4 variable)
## initial value 17875.520549
## iter 10 value 3512.122660
## iter 20 value 2127.314105
## final value 2126.845732
## converged
library(car)
lrtest(model.P3) #likelihood
## # weights: 6 (2 variable)
## initial value 17875.520549
## iter 10 value 2311.677327
## final value 2299.454710
## converged
## Likelihood ratio test
##
## Model 1: Y ~ X3
## Model 2: Y ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 4 -2126.8
## 2 2 -2299.4 -2 345.22 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.P4 <- multinom(Y~X4, data = data_train)
## # weights: 9 (4 variable)
## initial value 17875.520549
## iter 10 value 3030.931867
## iter 20 value 2281.597110
## final value 2281.559529
## converged
library(car)
lrtest(model.P4) #likelihood
## # weights: 6 (2 variable)
## initial value 17875.520549
## iter 10 value 2311.677327
## final value 2299.454710
## converged
## Likelihood ratio test
##
## Model 1: Y ~ X4
## Model 2: Y ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 4 -2281.6
## 2 2 -2299.4 -2 35.79 1.691e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.P5 <- multinom(Y~X5, data = data_train)
## # weights: 9 (4 variable)
## initial value 17875.520549
## iter 10 value 2428.644408
## iter 20 value 2282.711330
## final value 2280.432284
## converged
library(car)
lrtest(model.P5) #likelihood
## # weights: 6 (2 variable)
## initial value 17875.520549
## iter 10 value 2311.677327
## final value 2299.454710
## converged
## Likelihood ratio test
##
## Model 1: Y ~ X5
## Model 2: Y ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 4 -2280.4
## 2 2 -2299.4 -2 38.045 5.479e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.P6 <- multinom(Y~X6, data = data_train)
## # weights: 9 (4 variable)
## initial value 17875.520549
## iter 10 value 2550.368546
## iter 20 value 2324.900093
## final value 2295.649804
## converged
library(car)
lrtest(model.P6) #likelihood
## # weights: 6 (2 variable)
## initial value 17875.520549
## iter 10 value 2311.677327
## final value 2299.454710
## converged
## Likelihood ratio test
##
## Model 1: Y ~ X6
## Model 2: Y ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 4 -2295.7
## 2 2 -2299.4 -2 7.6098 0.02226 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.P7 <- multinom(Y~X7, data = data_train)
## # weights: 9 (4 variable)
## initial value 17875.520549
## iter 10 value 2838.897123
## iter 20 value 2249.150145
## final value 2249.105513
## converged
library(car)
lrtest(model.P7) #likelihood
## # weights: 6 (2 variable)
## initial value 17875.520549
## iter 10 value 2311.677327
## final value 2299.454710
## converged
## Likelihood ratio test
##
## Model 1: Y ~ X7
## Model 2: Y ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 4 -2249.1
## 2 2 -2299.4 -2 100.7 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.P8 <- multinom(Y~X8, data = data_train)
## # weights: 9 (4 variable)
## initial value 17875.520549
## iter 10 value 3044.697758
## iter 20 value 2285.738173
## final value 2285.734239
## converged
library(car)
lrtest(model.P8) #likelihood
## # weights: 6 (2 variable)
## initial value 17875.520549
## iter 10 value 2311.677327
## final value 2299.454710
## converged
## Likelihood ratio test
##
## Model 1: Y ~ X8
## Model 2: Y ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 4 -2285.7
## 2 2 -2299.4 -2 27.441 1.1e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.P9 <- multinom(Y~X9, data = data_train)
## # weights: 9 (4 variable)
## initial value 17875.520549
## iter 10 value 2360.988120
## final value 2289.245532
## converged
library(car)
lrtest(model.P9) #likelihood
## # weights: 6 (2 variable)
## initial value 17875.520549
## iter 10 value 2311.677327
## final value 2299.454710
## converged
## Likelihood ratio test
##
## Model 1: Y ~ X9
## Model 2: Y ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 4 -2289.2
## 2 2 -2299.4 -2 20.418 3.683e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
z <- summary(model.SP)$coefficients/summary(model.SP)$standard.errors
p <- (1-pnorm(abs(z), 0.1))*2
data.frame(p)
## X.Intercept. X11 X12 X13 X14 X21
## 1 0.000114551 0.007865807 0.137912619 0.01988334 0.478275509 0.1935123
## 2 0.000000000 0.022284806 0.000125705 0.15176790 0.001493208 0.9158171
## X22 X23 X24 X3 X41 X51
## 1 0.1125765 3.688299e-10 0.09055278 0.0000000000 0.03319309 0.04507462
## 2 0.5653383 5.276210e-08 0.17226074 0.0003681987 0.56499981 0.71927718
## X61 X71 X81 X91
## 1 0.5441234 0.5701243 0.39465496 0.8870809
## 2 0.5464089 0.0000000 0.03525139 0.0000000
#prediksi data pada test
predict_prob = predict(model.SP, data_test, type = "prob")
head(predict_prob,10)
## 0 1 2
## 1 0.9783676 0.011898743 9.733687e-03
## 2 0.9661790 0.026288020 7.532933e-03
## 3 0.9761276 0.017484016 6.388338e-03
## 4 0.9905869 0.008618305 7.948407e-04
## 5 0.9879444 0.011537908 5.176777e-04
## 6 0.9533764 0.046623611 4.284973e-74
## 7 0.9753567 0.023075125 1.568128e-03
## 8 0.9876977 0.012302259 1.893173e-74
## 9 0.9832023 0.016654803 1.429339e-04
## 10 0.9639294 0.035005952 1.064613e-03
head(data.frame(predict_prob, data_test$Y),10)
## X0 X1 X2 data_test.Y
## 1 0.9783676 0.011898743 9.733687e-03 0
## 2 0.9661790 0.026288020 7.532933e-03 0
## 3 0.9761276 0.017484016 6.388338e-03 0
## 4 0.9905869 0.008618305 7.948407e-04 0
## 5 0.9879444 0.011537908 5.176777e-04 0
## 6 0.9533764 0.046623611 4.284973e-74 0
## 7 0.9753567 0.023075125 1.568128e-03 0
## 8 0.9876977 0.012302259 1.893173e-74 0
## 9 0.9832023 0.016654803 1.429339e-04 0
## 10 0.9639294 0.035005952 1.064613e-03 0
prediksi.test <- predict(model.SP, data_test,type = "class")
data_test$Y<-as.factor(data_test$Y)
confusionMatrix(as.factor(prediksi.test),
data_test$Y, positive="1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1 2
## 0 3950 101 15
## 1 0 0 0
## 2 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.9715
## 95% CI : (0.9659, 0.9764)
## No Information Rate : 0.9715
## P-Value [Acc > NIR] : 0.5247
##
## Kappa : 0
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 0 Class: 1 Class: 2
## Sensitivity 1.0000 0.00000 0.000000
## Specificity 0.0000 1.00000 1.000000
## Pos Pred Value 0.9715 NaN NaN
## Neg Pred Value NaN 0.97516 0.996311
## Prevalence 0.9715 0.02484 0.003689
## Detection Rate 0.9715 0.00000 0.000000
## Detection Prevalence 1.0000 0.00000 0.000000
## Balanced Accuracy 0.5000 0.50000 0.500000
data.frame(summary(model.SP)$coefficients)
## X.Intercept. X11 X12 X13 X14 X21
## 1 -3.171511 -0.4571073 -0.2465316 -0.8915369 -0.2456432 -0.21226670
## 2 -165.003359 1.0212390 1.4909076 1.0312733 1.6052593 -0.07724534
## X22 X23 X24 X3 X41 X51 X61
## 1 -0.4687188 -0.9326786 -0.2552974 -0.6838456 0.4197614 0.7109466 -0.08523507
## 2 0.2981131 -2.5684605 -0.8015994 -0.4261133 -0.2173688 0.1799242 -0.26091961
## X71 X81 X91
## 1 0.4439536 0.6304837 -0.04229984
## 2 -3.6689168 1.0996232 162.88162982
data.frame(exp(summary(model.SP)$coefficients))
## X.Intercept. X11 X12 X13 X14 X21 X22
## 1 4.194017e-02 0.6331124 0.7815067 0.4100251 0.7822013 0.8087490 0.6258036
## 2 2.187518e-72 2.7766328 4.4411246 2.8046347 4.9791507 0.9256627 1.3473142
## X23 X24 X3 X41 X51 X61 X71
## 1 0.39349827 0.7746860 0.5046725 1.5215984 2.035918 0.9182964 1.55885814
## 2 0.07665346 0.4486109 0.6530423 0.8046332 1.197127 0.7703428 0.02550408
## X81 X91
## 1 1.878519 9.585823e-01
## 2 3.003034 5.477634e+70