shuttle <- read.table("https://users.stat.ufl.edu/~aa/cat/data/Shuttle.dat", header=TRUE)
shuttle
## Ft Temp TD
## 1 1 66 0
## 2 2 70 1
## 3 3 69 0
## 4 4 68 0
## 5 5 67 0
## 6 6 72 0
## 7 7 73 0
## 8 8 70 0
## 9 9 57 1
## 10 10 63 1
## 11 11 70 1
## 12 12 78 0
## 13 13 67 0
## 14 14 53 1
## 15 15 67 0
## 16 16 75 0
## 17 17 70 0
## 18 18 81 0
## 19 19 76 0
## 20 20 79 0
## 21 21 75 1
## 22 22 76 0
## 23 23 58 1
fit <- glm(TD ~ Temp, family=binomial, data=shuttle)
fit$coeff
## (Intercept) Temp
## 15.0429016 -0.2321627
predict(fit,data.frame(Temp=31),type="link")
## 1
## 7.845857
predict(fit,data.frame(Temp=31),type="response")
## 1
## 0.9996088
linear_fit <- lm(TD ~ Temp, data=shuttle)
linear_fit$coeff
## (Intercept) Temp
## 2.90476190 -0.03738095
head(cbind(predict(linear_fit, data.frame(Temp=shuttle$Temp)), predict(linear_fit, data.frame(Temp=shuttle$Temp), type="response")))
## [,1] [,2]
## 1 0.4376190 0.4376190
## 2 0.2880952 0.2880952
## 3 0.3254762 0.3254762
## 4 0.3628571 0.3628571
## 5 0.4002381 0.4002381
## 6 0.2133333 0.2133333
library(rootSolve)
linear_fit <- lm(TD ~ Temp, data=shuttle)
intercept <- linear_fit$coeff[1]
temp <- linear_fit$coeff[2]
f1 = function(x) {
intercept + temp * x
}
uniroot.all(f1, c(0, 100))
## [1] 77.70701
solve(temp, -intercept)
## [1] 77.70701
middle_efficient <- -1 * intercept / temp
middle_efficient
## (Intercept)
## 77.70701
middle_efficient <- -1 * intercept / temp
sample_mean <- mean(shuttle$Temp)
sample_mean_prob <- predict(fit,data.frame(Temp=sample_mean),type="response")
prob_change_ratio <- temp * sample_mean_prob * (1-sample_mean_prob)
sample_mean
## [1] 69.56522
sample_mean_prob
## 1
## 0.2483279
prob_change_ratio
## Temp
## -0.006977572
middle_efficient <- -1 * intercept / temp
sample_min <- min(shuttle$Temp)
sample_min_prob <- predict(fit,data.frame(Temp=sample_min),type="response")
sample_min
## [1] 53
sample_min_prob
## 1
## 0.9392478
summary(fit)
##
## Call:
## glm(formula = TD ~ Temp, family = binomial, data = shuttle)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0611 -0.7613 -0.3783 0.4524 2.2175
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 15.0429 7.3786 2.039 0.0415 *
## Temp -0.2322 0.1082 -2.145 0.0320 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 28.267 on 22 degrees of freedom
## Residual deviance: 20.315 on 21 degrees of freedom
## AIC: 24.315
##
## Number of Fisher Scoring iterations: 5
null_deviance <- 28.267
residual_deviance <- 0.1082
likelihood_z <- null_deviance - residual_deviance
likelihood_z
## [1] 28.1588
beta <- -0.2322
std_error <- 0.1082
wald_z <- beta / std_error
wald_z
## [1] -2.146026
confint(fit, level=0.95)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 3.3305848 34.34215133
## Temp -0.5154718 -0.06082076
confint.default(fit)
## 2.5 % 97.5 %
## (Intercept) 0.5810523 29.50475096
## Temp -0.4443022 -0.02002324
mbti <- read.table("http://users.stat.ufl.edu/~aa/intro-cda/data/MBTI.dat", header=TRUE)
mbti$yes <- mbti$drink
mbti$no <- mbti$n - mbti$drink
mbti
## EI SN TF JP smoke drink n yes no
## 1 e s t j 13 10 77 10 67
## 2 e s t p 11 8 42 8 34
## 3 e s f j 16 5 106 5 101
## 4 e s f p 19 7 79 7 72
## 5 e n t j 6 3 23 3 20
## 6 e n t p 4 2 18 2 16
## 7 e n f j 6 4 31 4 27
## 8 e n f p 23 15 80 15 65
## 9 i s t j 32 17 140 17 123
## 10 i s t p 9 3 52 3 49
## 11 i s f j 34 6 138 6 132
## 12 i s f p 29 4 106 4 102
## 13 i n t j 4 1 13 1 12
## 14 i n t p 9 5 35 5 30
## 15 i n f j 4 1 31 1 30
## 16 i n f p 22 6 79 6 73
fit <- glm(yes/(yes+no) ~ EI + SN + TF + JP,
weights = yes + no,
family = binomial(link = "logit"),
data = mbti)
fit$coefficients
## (Intercept) EIi SNs TFt JPp
## -2.1140470 -0.5550115 -0.4291508 0.6873349 0.2022295
summary(fit)
##
## Call:
## glm(formula = yes/(yes + no) ~ EI + SN + TF + JP, family = binomial(link = "logit"),
## data = mbti, weights = yes + no)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2712 -0.8062 -0.1063 0.1124 1.5807
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.1140 0.2715 -7.788 6.82e-15 ***
## EIi -0.5550 0.2170 -2.558 0.01053 *
## SNs -0.4292 0.2340 -1.834 0.06664 .
## TFt 0.6873 0.2206 3.116 0.00184 **
## JPp 0.2022 0.2266 0.893 0.37209
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 30.488 on 15 degrees of freedom
## Residual deviance: 11.149 on 11 degrees of freedom
## AIC: 73.99
##
## Number of Fisher Scoring iterations: 4
pred.prob <- fitted(fit)
lp <- predict(fit, se.fit = TRUE)
LB <- lp$fit - 1.96*lp$se.fit
UB <- lp$fit + 1.96*lp$se.fit
LB.p <- exp(LB)/(1+exp(LB))
UB.p <- exp(UB)/(1+exp(UB))
out <- data.frame(mbti$EI, mbti$SN, mbti$TF, mbti$JP,
pred.prob,
LB.p,
UB.p)
out
## mbti.EI mbti.SN mbti.TF mbti.JP pred.prob LB.p UB.p
## 1 e s t j 0.13518600 0.09311660 0.19223318
## 2 e s t p 0.16061850 0.10468282 0.23848133
## 3 e s f j 0.07288479 0.04757773 0.11009687
## 4 e s f p 0.08778634 0.05694528 0.13297541
## 5 e n t j 0.19361150 0.12222927 0.29277650
## 6 e n t p 0.22714857 0.15042946 0.32789340
## 7 e n f j 0.10773900 0.06622939 0.17051419
## 8 e n f p 0.12877681 0.08736742 0.18581683
## 9 i s t j 0.08234722 0.05576477 0.11999096
## 10 i s t p 0.09897686 0.06339960 0.15129366
## 11 i s f j 0.04318118 0.02722891 0.06782757
## 12 i s f p 0.05235266 0.03296411 0.08217593
## 13 i n t j 0.12113523 0.07276292 0.19490558
## 14 i n t p 0.14436562 0.09114324 0.22110598
## 15 i n f j 0.06482402 0.03763115 0.10943247
## 16 i n f p 0.07821656 0.05006819 0.12018772
[1] Alan Agresti, 『범주형 자료분석 개론』, 박태성, 이승연 옮긴이,
자유아카데미, 2020
[2] Alan Agresti, 『Categorical Data Analysis
(3rd Edition)』, Wiley, 2018
[3] https://users.stat.ufl.edu/~aa/cat/data/Shuttle.dat
[4] https://users.stat.ufl.edu/~aa/cat/data/MBTI.dat