Y (Keputusan): Jika total “poin” kekuatannya hebat, dia Diterima (1). Jika kurang, dia Ditolak (0).
X1 (Pengalaman): Semakin lama dia pernah pegang robot (dalam bulan), semakin jago dia.
X2 (Status): Menganggur (1) atau Bekerja (0)
X3 (Pendidikan terakhir) : Sekolah menengah (0), Berkuliah (1).
X4 (Nilai IPK): Seberapa pintar dia saat berkuliah dulu.
set.seed(789)
n <- 150
x1 <- round(runif(n, 0, 36)) # Pengalaman 0-3 tahun (36 bulan)
x1
## [1] 25 3 0 21 18 1 21 6 13 12 11 18 9 17 16 1 6 23 28 4 12 16 6 11 15
## [26] 19 11 18 30 18 8 10 24 18 10 16 21 10 8 27 9 28 27 36 7 6 23 33 3 27
## [51] 11 1 14 33 15 33 29 20 16 11 13 25 5 24 26 27 26 27 8 10 5 28 3 6 15
## [76] 14 10 18 27 32 16 21 35 29 34 12 28 30 32 3 25 4 0 1 25 9 26 20 30 10
## [101] 17 11 2 32 28 6 16 15 9 17 26 23 31 16 9 4 32 14 32 23 13 36 28 26 14
## [126] 30 36 27 23 6 13 23 7 9 12 28 6 26 28 2 25 21 31 20 32 13 13 1 5 3
set.seed(678)
x2 <- sample(c(0, 1), n, replace=TRUE) # 0: Kerja, 1: Nganggur
x2
## [1] 0 1 1 1 0 0 1 1 0 1 1 1 1 0 0 1 1 0 0 0 1 0 0 0 0 0 1 1 0 0 1 1 0 1 1 0 0
## [38] 1 1 0 0 0 1 0 1 0 1 1 0 0 1 0 1 0 0 0 1 0 0 0 1 0 1 1 0 0 1 0 1 0 0 0 1 0
## [75] 0 0 1 1 0 0 0 0 0 0 0 1 1 1 0 0 1 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 1 1 0
## [112] 0 1 1 0 0 1 1 1 0 0 0 1 0 1 1 0 0 0 1 1 0 0 1 0 1 1 1 0 1 0 1 1 1 0 1 0 0
## [149] 1 1
set.seed(456)
x3 <- rbinom(n, 1, 0.5) # 0: SMA, 1: Kuliah
x3
## [1] 0 0 1 1 1 0 0 0 0 0 0 0 1 1 1 1 1 0 1 0 0 1 1 0 1 1 1 1 0 0 1 0 1 1 1 1 1
## [38] 1 1 1 0 0 0 0 0 0 1 1 0 1 1 1 0 0 0 1 1 1 0 0 0 0 0 1 0 0 1 0 1 1 1 1 0 0
## [75] 0 0 0 0 0 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 0 0 0 0 1 1 1 1 1 0 1
## [112] 0 0 1 1 1 1 1 1 1 0 0 0 1 0 1 1 1 0 0 1 0 1 1 0 1 0 1 0 1 0 0 0 0 0 1 1 1
## [149] 0 1
set.seed(234)
x4 <- round(rnorm(n, 3, 0.5), 2) # Nilai IPK rata-rata 3.0
x4[x3 == 0] <- 0 # FILTER: Kalau SMA, IPK tidak muncul
x4
## [1] 0.00 0.00 2.25 3.74 3.73 0.00 0.00 0.00 0.00 0.00 0.00 0.00 2.99 3.26 3.50
## [16] 3.15 2.53 0.00 3.26 0.00 0.00 3.51 3.20 0.00 3.12 3.17 2.49 2.29 0.00 0.00
## [31] 2.83 0.00 2.95 3.35 3.22 3.75 3.11 2.25 3.06 2.48 0.00 0.00 0.00 0.00 0.00
## [46] 0.00 2.49 3.02 0.00 3.25 3.39 2.68 0.00 0.00 0.00 3.30 3.15 2.64 0.00 0.00
## [61] 0.00 0.00 0.00 3.24 0.00 0.00 3.29 0.00 3.21 3.18 3.52 3.10 0.00 0.00 0.00
## [76] 0.00 0.00 0.00 0.00 3.12 0.00 2.17 3.36 3.35 0.00 3.01 3.77 0.00 2.90 2.80
## [91] 2.78 2.53 3.69 3.18 3.51 3.07 0.00 0.00 2.86 3.43 0.00 0.00 0.00 0.00 1.78
## [106] 2.88 3.19 2.31 2.62 0.00 2.70 0.00 0.00 1.72 2.87 2.85 3.08 3.33 3.72 2.15
## [121] 0.00 0.00 0.00 3.23 0.00 2.37 3.15 3.19 0.00 0.00 3.02 0.00 2.69 2.84 0.00
## [136] 3.21 0.00 2.61 0.00 3.01 0.00 0.00 0.00 0.00 0.00 2.33 2.87 3.54 0.00 3.33
summary(x4)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 2.270 1.618 3.120 3.770
Menentukan koefisien
b0 <- -5
b1 <- 1.0
b2 <- 0.1
b3 <- 0.8
b4 <- 0.5
set.seed(1)
# Menjumlahkan poin
datapendukung <- b0+(b1*x1)+(b2*x2)+(b3*x3)+(b4*x4)
datapendukung
## [1] 20.000 -1.900 -2.975 18.770 15.665 -4.000 16.100 1.100 8.000 7.100
## [11] 6.100 13.100 6.395 14.430 13.550 -1.525 3.165 18.000 25.430 -1.000
## [21] 7.100 13.555 3.400 6.000 12.360 16.385 8.145 15.045 25.000 13.000
## [31] 5.315 5.100 21.275 15.575 7.510 13.675 18.355 7.025 5.430 24.040
## [41] 4.000 23.000 22.100 31.000 2.100 1.000 20.145 30.410 -2.000 24.425
## [51] 8.595 -1.860 9.100 28.000 10.000 30.450 26.475 17.120 11.000 6.000
## [61] 8.100 20.000 0.100 21.520 21.000 22.000 23.545 22.000 5.505 7.390
## [71] 2.560 25.350 -1.900 1.000 10.000 9.000 5.100 13.100 22.000 29.360
## [81] 11.000 17.885 32.480 26.475 29.000 9.405 25.785 25.100 29.250 0.200
## [91] 22.290 1.065 -2.255 -1.610 22.555 6.335 21.000 15.000 27.230 7.615
## [101] 12.000 6.100 -3.000 27.000 24.690 3.240 13.495 11.955 6.210 12.100
## [111] 23.150 18.000 26.100 12.760 6.235 1.225 29.440 11.565 29.760 19.875
## [121] 8.000 31.000 23.100 23.415 9.100 27.085 33.375 24.395 18.000 1.100
## [131] 10.410 18.000 4.145 6.320 7.000 25.505 1.100 23.205 23.000 -0.595
## [141] 20.000 16.100 26.100 15.100 27.000 10.065 10.235 -1.430 0.100 0.565
# Ubah menjadi 1 = diterima atau 0 = tidak diterima
p <- exp(datapendukung)/(1+exp(datapendukung))
p
## [1] 1.00000000 0.13010847 0.04856815 0.99999999 0.99999984 0.01798621
## [7] 0.99999990 0.75026011 0.99966465 0.99917558 0.99776215 0.99999795
## [13] 0.99833290 0.99999946 0.99999870 0.17872642 0.95949571 0.99999998
## [19] 1.00000000 0.26894142 0.99917558 0.99999870 0.96770454 0.99752738
## [25] 0.99999571 0.99999992 0.99970990 0.99999971 1.00000000 0.99999774
## [31] 0.99510678 0.99394020 1.00000000 0.99999983 0.99945272 0.99999885
## [37] 0.99999999 0.99911142 0.99563603 1.00000000 0.98201379 1.00000000
## [43] 1.00000000 1.00000000 0.89090318 0.73105858 1.00000000 1.00000000
## [49] 0.11920292 1.00000000 0.99981501 0.13470305 0.99988835 1.00000000
## [55] 0.99995460 1.00000000 1.00000000 0.99999996 0.99998330 0.99752738
## [61] 0.99969655 1.00000000 0.52497919 1.00000000 1.00000000 1.00000000
## [67] 1.00000000 1.00000000 0.99595008 0.99938298 0.92824246 1.00000000
## [73] 0.13010847 0.73105858 0.99995460 0.99987661 0.99394020 0.99999795
## [79] 1.00000000 1.00000000 0.99998330 0.99999998 1.00000000 1.00000000
## [85] 1.00000000 0.99991770 1.00000000 1.00000000 1.00000000 0.54983400
## [91] 1.00000000 0.74364489 0.09491905 0.16658861 1.00000000 0.99822999
## [97] 1.00000000 0.99999969 1.00000000 0.99950724 0.99999386 0.99776215
## [103] 0.04742587 1.00000000 1.00000000 0.96231211 0.99999862 0.99999357
## [109] 0.99799479 0.99999444 1.00000000 0.99999998 1.00000000 0.99999713
## [115] 0.99804420 0.77294226 1.00000000 0.99999051 1.00000000 1.00000000
## [121] 0.99966465 1.00000000 1.00000000 1.00000000 0.99988835 1.00000000
## [127] 1.00000000 1.00000000 0.99999998 0.75026011 0.99996987 0.99999998
## [133] 0.98440366 0.99820329 0.99908895 1.00000000 0.75026011 1.00000000
## [139] 1.00000000 0.35548845 1.00000000 0.99999990 1.00000000 0.99999972
## [145] 1.00000000 0.99995746 0.99996411 0.19309868 0.52497919 0.63760865
set.seed(2)
y <- rbinom(n,1,p)
y
## [1] 1 0 0 1 1 0 1 0 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [38] 1 1 1 1 1 1 1 0 0 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1
## [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [112] 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0
## [149] 1 1
# Menggabungkan semua data
datagab <- data.frame(y,x1,x2,x3,x4)
head(datagab)
## y x1 x2 x3 x4
## 1 1 25 0 0 0.00
## 2 0 3 1 0 0.00
## 3 0 0 1 1 2.25
## 4 1 21 1 1 3.74
## 5 1 18 0 1 3.73
## 6 0 1 0 0 0.00
modelreglog <- glm(y~x1+x2+x3+x4, family = binomial(link = "logit"), data=datagab)
summary(modelreglog)
##
## Call:
## glm(formula = y ~ x1 + x2 + x3 + x4, family = binomial(link = "logit"),
## data = datagab)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.6616 1.2723 -2.878 0.004002 **
## x1 0.7146 0.1939 3.686 0.000228 ***
## x2 -0.4400 0.8775 -0.501 0.616111
## x3 -3.2188 5.2237 -0.616 0.537760
## x4 1.6471 1.7548 0.939 0.347940
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 106.028 on 149 degrees of freedom
## Residual deviance: 35.963 on 145 degrees of freedom
## AIC: 45.963
##
## Number of Fisher Scoring iterations: 9