membangkitkan data

skenario

Y : keputusan menolak/menerima pelamar kerja pada PT A posisi B X1 : lama pengalaman kerja sebelumnya (bulan) X2 : status pekerjaan saat ini (0:bekerja, 1:tidak bekerja) X3 : tingkat pendidikan (0:lulusan sekolah menengah, 1:lulusam perguruan tinggi) X4 : IPK (skala 4)

membangkitkan data X1

X1 : lama pengalaman kerja sebelumnya (bulan) membangkitkan variabel X1 dengan lama pekerjaan 0-60 bulan dengan nilai tengah 12 dan banyak pelamar adalah 100

set.seed(100)
n <- 100
u <- runif(n)

x1 <- round(60*(-(log(1-u)/10)))
x1              
##   [1]  2  2  5  0  4  4 10  3  5  1  6 13  2  3  9  7  1  3  3  7  5  7  5  8  3
##  [26]  1  9 13  5  2  4 16  3 18  7 13  1  6 27  1  2 12  9 11  6  4  9 13  1  2
##  [51]  2  1  2  2  5  2  1  2  5  1  4  6 19  7  4  3  4  4  2  7  3  2  5 20  7
##  [76]  6 12  9 11  1  4  5 15 24  0  5  8  2  2  8 14  1  3  4 14  3  4  1  0  9

membangkitkan data X2

X2 : status pekerjaan keterangan yang digunakan (0:bekerja, 1:tidak bekerja)

set.seed(1234)
x2 <- round(runif(n))
x2 
##   [1] 0 1 1 1 1 1 0 0 1 1 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 0 1 0
##  [38] 0 1 1 1 1 0 1 0 1 1 0 0 1 0 0 1 1 0 1 0 1 0 1 1 0 0 0 0 1 0 1 0 1 0 1 0 1
##  [75] 0 1 0 0 0 1 1 0 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0 0 0 1

membangkitkan data X3

X3 : tingkat pendidikan keterangan yang digunakan (0:lulus SMA/tidak kuliah, 1:lulus kuliah)

set.seed(123)
x3 <- round(runif(n))
x3 
##   [1] 0 1 0 1 1 0 1 1 1 0 1 0 1 1 0 1 0 0 0 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 0 0 1
##  [38] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 1 1 0 1 0 0 0 1 0 1 1 1 0 1 1 1 0
##  [75] 0 0 0 1 0 0 0 1 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 1

membangkitkan data X4

X4 : IPK pelamar dengan skala 4

x4 <- round(rnorm(n, 3, 0.5),3)
x4 
##   [1] 3.127 2.986 2.979 3.684 2.887 3.758 2.226 3.292 3.062 3.108 3.190 2.749
##  [13] 2.833 2.491 2.464 3.152 3.224 3.027 3.461 4.025 2.754 1.845 3.503 2.645
##  [25] 2.656 3.513 2.858 2.390 3.091 2.931 3.003 3.193 2.815 3.322 2.890 3.166
##  [37] 3.548 3.218 2.837 3.574 3.497 3.274 3.119 2.686 3.680 2.700 4.094 3.766
##  [49] 2.882 2.487 2.645 3.128 2.877 2.826 2.524 2.977 2.608 2.166 2.810 3.459
##  [61] 2.712 3.304 2.191 2.972 3.260 3.151 3.053 2.680 2.575 2.488 3.059 2.526
##  [73] 2.755 2.872 3.922 2.674 3.118 3.039 2.519 2.964 3.722 3.226 3.021 2.789
##  [85] 1.973 3.566 2.270 3.370 3.955 2.278 3.351 2.869 2.214 2.243 2.199 2.735
##  [97] 2.269 3.344 4.050 2.356

membangkitkan data Y

menentukan koef

b0 <- -11
b1 <- 1.5
b2 <- 2.5
b3 <- 1.7
b4 <- 2.2          
set.seed(1)
datapendukung <- b0 +(b1*x1)+(b2*x2)+(b3*x3)+(b4*x4)
datapendukung
##   [1] -1.1206  2.7692  5.5538  1.3048  5.5514  5.7676 10.5972  2.4424  7.4364
##  [10] -0.1624  9.2180 17.0478 -0.0674  3.1802  7.9208 10.6344 -2.4072  0.1594
##  [19]  1.1142 10.0550  4.2588  5.2590  5.9066  8.5190  1.0432  2.4286 12.9876
##  [28] 17.9580  5.8002 -1.5518  3.3066 21.7246  1.3930 27.5084  5.8580 17.9652
##  [37]  0.0056  5.0796 38.2414  0.8628  2.1934 16.7028  9.3618 13.9092  6.0960
##  [46]  3.4400 14.0068 16.7852 -3.1596  1.6714 -2.1810 -2.6184  2.5294  0.7172
##  [55]  3.7528  1.0494 -3.7624  0.9652  4.3820  0.6098  5.1664  5.2688 22.3202
##  [64]  6.0384  3.8720  2.9322  3.4166  5.0960 -0.6350  7.4736  1.9298  1.7572
##  [73]  4.2610 27.8184  8.1284  6.3828 13.8596 10.8858 11.0418 -0.4792  5.6884
##  [82]  5.2972 18.1462 35.3358 -6.6594  6.8452  7.6940  1.1140  2.4010  8.5116
##  [91] 17.3722  1.0118 -1.6292  1.6346 14.8378  2.0170  1.6918 -2.1432 -2.0900
## [100] 11.8832
p <- exp(datapendukung)/(1+exp(datapendukung))
p
##   [1] 0.245900007 0.940988579 0.996142227 0.786641707 0.996132993 0.996882495
##   [7] 0.999975015 0.920003898 0.999410944 0.459488997 0.999900773 0.999999961
##  [13] 0.483156376 0.960082332 0.999637020 0.999975927 0.082625306 0.539765837
##  [19] 0.752911292 0.999957031 0.986057872 0.994826403 0.997285962 0.999800401
##  [25] 0.739466976 0.918982359 0.999997711 0.999999984 0.996982185 0.174826444
##  [31] 0.964654537 1.000000000 0.801070744 1.000000000 0.997151187 0.999999984
##  [37] 0.501399996 0.993816081 1.000000000 0.703245322 0.899655261 0.999999944
##  [43] 0.999914062 0.999999089 0.997753202 0.968931516 0.999999174 0.999999949
##  [49] 0.040714673 0.841762388 0.101469718 0.067963575 0.926177340 0.671990140
##  [55] 0.977085405 0.740659666 0.022700638 0.724161723 0.987653996 0.647895178
##  [61] 0.994327293 0.994876598 1.000000000 0.997620304 0.979607804 0.949415436
##  [67] 0.968219318 0.993916059 0.346377669 0.999432442 0.873227281 0.852858633
##  [73] 0.986088085 1.000000000 0.999705047 0.998312469 0.999999043 0.999981278
##  [79] 0.999983982 0.382441051 0.996626415 0.995019341 0.999999987 1.000000000
##  [85] 0.001280274 0.998936579 0.999544655 0.752874083 0.916903527 0.999798919
##  [91] 0.999999971 0.733372264 0.163939983 0.836798819 0.999999640 0.882570446
##  [97] 0.844460731 0.104968369 0.110072574 0.999993095
set.seed(2)
Y <- rbinom(n,1, p)
Y
##   [1] 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 0
##  [38] 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 0 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
##  [75] 1 1 1 1 1 0 1 1 1 1 0 1 1 0 1 1 1 0 0 1 1 1 1 0 0 1
datagab <- data.frame(n, x1, x2, x3, x4)
datagab
##       n x1 x2 x3    x4
## 1   100  2  0  0 3.127
## 2   100  2  1  1 2.986
## 3   100  5  1  0 2.979
## 4   100  0  1  1 3.684
## 5   100  4  1  1 2.887
## 6   100  4  1  0 3.758
## 7   100 10  0  1 2.226
## 8   100  3  0  1 3.292
## 9   100  5  1  1 3.062
## 10  100  1  1  0 3.108
## 11  100  6  1  1 3.190
## 12  100 13  1  0 2.749
## 13  100  2  0  1 2.833
## 14  100  3  1  1 2.491
## 15  100  9  0  0 2.464
## 16  100  7  1  1 3.152
## 17  100  1  0  0 3.224
## 18  100  3  0  0 3.027
## 19  100  3  0  0 3.461
## 20  100  7  0  1 4.025
## 21  100  5  0  1 2.754
## 22  100  7  0  1 1.845
## 23  100  5  0  1 3.503
## 24  100  8  0  1 2.645
## 25  100  3  0  1 2.656
## 26  100  1  1  1 3.513
## 27  100  9  1  1 2.858
## 28  100 13  1  1 2.390
## 29  100  5  1  0 3.091
## 30  100  2  0  0 2.931
## 31  100  4  0  1 3.003
## 32  100 16  0  1 3.193
## 33  100  3  0  1 2.815
## 34  100 18  1  1 3.322
## 35  100  7  0  0 2.890
## 36  100 13  1  0 3.166
## 37  100  1  0  1 3.548
## 38  100  6  0  0 3.218
## 39  100 27  1  0 2.837
## 40  100  1  1  0 3.574
## 41  100  2  1  0 3.497
## 42  100 12  1  0 3.274
## 43  100  9  0  0 3.119
## 44  100 11  1  0 2.686
## 45  100  6  0  0 3.680
## 46  100  4  1  0 2.700
## 47  100  9  1  0 4.094
## 48  100 13  0  0 3.766
## 49  100  1  0  0 2.882
## 50  100  2  1  1 2.487
## 51  100  2  0  0 2.645
## 52  100  1  0  0 3.128
## 53  100  2  1  1 2.877
## 54  100  2  1  0 2.826
## 55  100  5  0  1 2.524
## 56  100  2  1  0 2.977
## 57  100  1  0  0 2.608
## 58  100  2  1  1 2.166
## 59  100  5  0  1 2.810
## 60  100  1  1  0 3.459
## 61  100  4  1  1 2.712
## 62  100  6  0  0 3.304
## 63  100 19  0  0 2.191
## 64  100  7  0  0 2.972
## 65  100  4  0  1 3.260
## 66  100  3  1  0 3.151
## 67  100  4  0  1 3.053
## 68  100  4  1  1 2.680
## 69  100  2  0  1 2.575
## 70  100  7  1  0 2.488
## 71  100  3  0  1 3.059
## 72  100  2  1  1 2.526
## 73  100  5  0  1 2.755
## 74  100 20  1  0 2.872
## 75  100  7  0  0 3.922
## 76  100  6  1  0 2.674
## 77  100 12  0  0 3.118
## 78  100  9  0  1 3.039
## 79  100 11  0  0 2.519
## 80  100  1  1  0 2.964
## 81  100  4  1  0 3.722
## 82  100  5  0  1 3.226
## 83  100 15  0  0 3.021
## 84  100 24  1  1 2.789
## 85  100  0  0  0 1.973
## 86  100  5  1  0 3.566
## 87  100  8  0  1 2.270
## 88  100  2  0  1 3.370
## 89  100  2  0  1 3.955
## 90  100  8  1  0 2.278
## 91  100 14  0  0 3.351
## 92  100  1  1  1 2.869
## 93  100  3  0  0 2.214
## 94  100  4  0  1 2.243
## 95  100 14  0  0 2.199
## 96  100  3  1  0 2.735
## 97  100  4  0  1 2.269
## 98  100  1  0  0 3.344
## 99  100  0  0  0 4.050
## 100 100  9  1  1 2.356

analisis regresi logistik

modelreglog <- glm(Y~x1+x2+x3+x4, family =  binomial(link = "logit"), data = datagab)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(modelreglog)
## 
## Call:
## glm(formula = Y ~ x1 + x2 + x3 + x4, family = binomial(link = "logit"), 
##     data = datagab)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -13.3382     4.4432  -3.002  0.00268 **
## x1            1.6269     0.5012   3.246  0.00117 **
## x2            2.8565     0.9520   3.000  0.00270 **
## x3            0.6788     0.8460   0.802  0.42230   
## x4            2.9697     1.1546   2.572  0.01011 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 97.245  on 99  degrees of freedom
## Residual deviance: 41.267  on 95  degrees of freedom
## AIC: 51.267
## 
## Number of Fisher Scoring iterations: 9