Ingin dilakukan prosedur pembangkitan bilangan acak untuk regresi logistik biner sebagai berikut:
Dengan ukuran data sebesar 100.
Dilakukan dengan membangkitkan bilangan eksponensial dengan nilai tengah 12:
set.seed(29)
# membangkitkan bilangan uniform
n <- 100
u <- runif(n)
# membangkitkan bilangan acak x
x1 <- round(60*(-(log(1-u)/12)))
x1
## [1] 1 1 1 2 4 0 9 10 1 1 20 2 2 5 1 9 5 2 10 2 5 5 18 20 5
## [26] 2 3 11 6 1 2 1 0 6 3 1 13 2 2 0 5 0 21 6 0 10 16 6 5 5
## [51] 3 6 1 5 3 3 4 9 4 1 3 1 4 2 4 7 4 5 3 2 1 11 1 0 0
## [76] 2 11 0 0 1 5 0 9 2 2 3 16 7 0 3 6 3 4 3 2 1 3 5 7 2
set.seed(13)
x2 <- round(runif(n))
x2
## [1] 1 0 0 0 1 0 1 1 1 0 1 1 1 1 1 0 0 1 1 1 0 1 1 1 0 1 0 0 0 1 0 0 1 1 1 0 0
## [38] 0 1 0 1 0 1 1 1 1 0 0 1 1 1 0 0 1 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0
## [75] 0 1 0 0 0 1 1 0 1 0 1 1 1 0 0 1 0 1 0 1 0 0 1 1 1 1
set.seed(102)
x3 <- round(runif(n))
x3
## [1] 1 0 1 1 0 0 1 0 1 0 1 1 1 0 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 0 0 1 0 1 1 0 0
## [38] 1 1 0 1 1 0 0 0 0 0 0 0 1 0 1 1 0 1 0 1 0 1 1 1 0 0 1 0 1 0 0 1 1 1 1 1 0
## [75] 0 0 1 0 1 0 1 0 1 1 0 0 0 1 1 0 0 0 1 1 0 0 1 0 0 0
set.seed(1)
# membangkitkan bilangan uniform
n <- 100
x4 <- round(rnorm(n,3, 0.3),2)
x4
## [1] 2.81 3.06 2.75 3.48 3.10 2.75 3.15 3.22 3.17 2.91 3.45 3.12 2.81 2.34 3.34
## [16] 2.99 3.00 3.28 3.25 3.18 3.28 3.23 3.02 2.40 3.19 2.98 2.95 2.56 2.86 3.13
## [31] 3.41 2.97 3.12 2.98 2.59 2.88 2.88 2.98 3.33 3.23 2.95 2.92 3.21 3.17 2.79
## [46] 2.79 3.11 3.23 2.97 3.26 3.12 2.82 3.10 2.66 3.43 3.59 2.89 2.69 3.17 2.96
## [61] 3.72 2.99 3.21 3.01 2.78 3.06 2.46 3.44 3.05 3.65 3.14 2.79 3.18 2.72 2.62
## [76] 3.09 2.87 3.00 3.02 2.82 2.83 2.96 3.35 2.54 3.18 3.10 3.32 2.91 3.11 3.08
## [91] 2.84 3.36 3.35 3.21 3.48 3.17 2.62 2.83 2.63 2.86
b0 <- -11
b1 <- 2.35
b2 <- 0.87
b3 <- 2.31
b4 <- 1.78
set.seed(1)
xb <- b0+(b1*x1)+(b2*x2)+(b3*x3)+(b4*x4)
xb
## [1] -0.4682 -3.2032 -1.4450 2.2044 4.7880 -6.1050 18.9370 19.1016 0.1726
## [10] -3.4702 45.3210 2.4336 1.8818 5.7852 0.4752 17.7822 8.4000 0.4084
## [19] 21.4650 2.5404 8.8984 9.6794 39.8556 43.4520 6.4282 -0.1256 1.3010
## [28] 19.4068 8.1908 -2.2086 -0.2302 -1.0534 -4.5764 11.5844 3.8402 -3.5236
## [37] 24.6764 1.3144 2.8074 -5.2506 9.1810 -3.4924 44.9338 9.6126 -5.1638
## [46] 18.3362 32.1358 8.8494 6.9066 9.7328 2.4736 10.4296 -0.8220 6.3548
## [55] 4.4654 2.4402 5.8542 14.9382 6.3526 -1.0712 4.9816 -2.4578 4.9838
## [64] 2.2378 3.3484 13.2068 2.7788 6.8732 3.7890 2.5070 -0.7508 22.1262
## [73] -0.6796 -6.1584 -6.3364 0.0702 22.2686 -5.6600 -3.3144 -2.7604 8.9674
## [82] -5.7312 19.2930 0.5312 0.2304 2.4380 33.3796 12.9398 -3.1542 2.4024
## [91] 8.1552 2.9008 6.6730 4.9438 -0.1056 -3.0074 3.8936 6.6574 11.0014
## [100] -0.3392
p <- exp(xb)/(1+exp(xb))
p
## [1] 0.385042368 0.039045479 0.190772264 0.900643937 0.991739702 0.002226712
## [7] 0.999999994 0.999999995 0.543043196 0.030172129 1.000000000 0.919353849
## [13] 0.867817741 0.996936717 0.616613786 0.999999981 0.999775183 0.600704167
## [19] 1.000000000 0.926925925 0.999863411 0.999937445 1.000000000 1.000000000
## [25] 0.998387249 0.468641214 0.786003233 0.999999996 0.999722885 0.098980860
## [31] 0.442702801 0.258572742 0.010187037 0.999990690 0.978962772 0.028648147
## [37] 1.000000000 0.788248504 0.943074399 0.005217011 0.999897033 0.029529249
## [43] 1.000000000 0.999933124 0.005687391 0.999999989 1.000000000 0.999856553
## [49] 0.998999845 0.999940698 0.922270234 0.999970456 0.305339280 0.998264634
## [55] 0.988630654 0.919841836 0.997140372 0.999999675 0.998260819 0.255174944
## [61] 0.993183708 0.078870018 0.993198585 0.903592980 0.966052403 0.999998162
## [67] 0.941519407 0.998965911 0.977882059 0.924631091 0.320647010 1.000000000
## [73] 0.336350584 0.002111169 0.001767535 0.517542796 1.000000000 0.003470431
## [79] 0.035080475 0.059501977 0.999872517 0.003232699 0.999999996 0.629762949
## [85] 0.557346541 0.919679473 1.000000000 0.999997599 0.040926105 0.917010133
## [91] 0.999712845 0.947885970 0.998736998 0.992922979 0.473624506 0.047092683
## [97] 0.980034852 0.998717166 0.999983322 0.416003820
y <- rbinom(100, 1, p)
y
## [1] 0 0 0 0 1 0 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 1 1 0 1
## [38] 0 1 0 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 0 1 0 0
## [75] 0 0 1 0 0 0 1 0 1 0 1 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1
randData <- data.frame(X1 = x1, X2 = x2, X3 = x3, X4 = x4, Y = y)
head(randData, 10)
## X1 X2 X3 X4 Y
## 1 1 1 1 2.81 0
## 2 1 0 0 3.06 0
## 3 1 0 1 2.75 0
## 4 2 0 1 3.48 0
## 5 4 1 0 3.10 1
## 6 0 0 0 2.75 0
## 7 9 1 1 3.15 1
## 8 10 1 0 3.22 1
## 9 1 1 1 3.17 0
## 10 1 0 0 2.91 0
fit_reglog <- glm(y~x1+x2+x3+x4, family = binomial(link="logit"), data = randData)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(fit_reglog)
##
## Call:
## glm(formula = y ~ x1 + x2 + x3 + x4, family = binomial(link = "logit"),
## data = randData)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.57649 -0.05114 0.00112 0.09596 2.22738
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.1686 5.7554 -1.767 0.07726 .
## x1 2.8254 0.7833 3.607 0.00031 ***
## x2 2.0501 1.0184 2.013 0.04411 *
## x3 2.2132 1.0931 2.025 0.04290 *
## x4 0.9215 1.6721 0.551 0.58156
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 123.820 on 99 degrees of freedom
## Residual deviance: 31.404 on 95 degrees of freedom
## AIC: 41.404
##
## Number of Fisher Scoring iterations: 9
myBeta <- rbind(b0, b1, b2, b3, b4)
betaFromModel <- fit_reglog$coefficients
beta <- data.frame(myBeta, betaFromModel)
beta
## myBeta betaFromModel
## b0 -11.00 -10.1686140
## b1 2.35 2.8253759
## b2 0.87 2.0500929
## b3 2.31 2.2131839
## b4 1.78 0.9214932