dose=c(1,1,2,2,3,3)
response=c(0,1,0,1,0,1)
count=c(7,3,5,5,2,8)
toxic=data.frame(dose,response,count)
toxic
## dose response count
## 1 1 0 7
## 2 1 1 3
## 3 2 0 5
## 4 2 1 5
## 5 3 0 2
## 6 3 1 8
##
## Call:
## glm(formula = response ~ dose, family = binomial, data = toxic,
## weights = count)
##
## Deviance Residuals:
## 1 2 3 4 5 6
## -2.144 2.764 -2.787 2.482 -2.461 1.994
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.0496 1.0888 -1.882 0.0598 .
## dose 1.1051 0.5186 2.131 0.0331 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 41.455 on 5 degrees of freedom
## Residual deviance: 36.196 on 4 degrees of freedom
## AIC: 40.196
##
## Number of Fisher Scoring iterations: 4
summary 결과를 보면 dose의 beta = 1.1051 이므로 dose가 1증가할때 쥐가 죽을 가능성이 e^(1.1051)=3.0193배 증가한다고 해석할 수 있습니다.
glm 결과에 대해 coefplot 확인
## (Intercept) dose
## 0.1287847 3.0193924
## 2.5 % 97.5 %
## (Intercept) 0.01217438 0.9667881
## dose 1.16595620 9.3389411
plot(response~dose,data=toxic,type='n',main="Predicted Probability of Response")
curve(predict(out,data.frame(dose=x),type="resp"),add=TRUE)
plot(response~dose,data=toxic,type='n',main="Predicted Probability of Response")
curve(predict(out,data.frame(dose=x),type="resp"),add=TRUE)
curve(
predict(out,data.frame(dose=x),type="resp", se.fit=TRUE)$fit+
predict(out,data.frame(dose=x),type="resp", se.fit=TRUE)$se.fit*1.96,
add=TRUE, col="blue", lty=2)
curve(
predict(out,data.frame(dose=x),type="resp", se.fit=TRUE)$fit-
predict(out,data.frame(dose=x),type="resp", se.fit=TRUE)$se.fit*1.96,
add=TRUE, col="blue", lty=2)
## 'data.frame': 1858 obs. of 16 variables:
## $ id : num 1 1 2 2 3 3 4 4 5 5 ...
## $ study : num 1 1 1 1 1 1 1 1 1 1 ...
## $ rx : Factor w/ 3 levels "Obs","Lev","Lev+5FU": 3 3 3 3 1 1 3 3 1 1 ...
## $ sex : num 1 1 1 1 0 0 0 0 1 1 ...
## $ age : num 43 43 63 63 71 71 66 66 69 69 ...
## $ obstruct: num 0 0 0 0 0 0 1 1 0 0 ...
## $ perfor : num 0 0 0 0 0 0 0 0 0 0 ...
## $ adhere : num 0 0 0 0 1 1 0 0 0 0 ...
## $ nodes : num 5 5 1 1 7 7 6 6 22 22 ...
## $ status : num 1 1 0 0 1 1 1 1 1 1 ...
## $ differ : num 2 2 2 2 2 2 2 2 2 2 ...
## $ extent : num 3 3 3 3 2 2 3 3 3 3 ...
## $ surg : num 0 0 0 0 0 0 1 1 1 1 ...
## $ node4 : num 1 1 0 0 1 1 1 1 1 1 ...
## $ time : num 1521 968 3087 3087 963 ...
## $ etype : num 2 1 2 1 2 1 2 1 2 1 ...
colon1 <- na.omit(colon)
result <- glm(status~rx+sex+age+obstruct+perfor+adhere+nodes+differ+extent+surg, family=binomial, data=colon1)
summary(result)
##
## Call:
## glm(formula = status ~ rx + sex + age + obstruct + perfor + adhere +
## nodes + differ + extent + surg, family = binomial, data = colon1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.575 -1.046 -0.584 1.119 2.070
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.430926 0.478301 -5.082 3.73e-07 ***
## rxLev -0.069553 0.122490 -0.568 0.570156
## rxLev+5FU -0.585606 0.124579 -4.701 2.59e-06 ***
## sex -0.086161 0.101614 -0.848 0.396481
## age 0.001896 0.004322 0.439 0.660933
## obstruct 0.219995 0.128234 1.716 0.086240 .
## perfor 0.085831 0.298339 0.288 0.773578
## adhere 0.373527 0.147164 2.538 0.011144 *
## nodes 0.185245 0.018873 9.815 < 2e-16 ***
## differ 0.031839 0.100757 0.316 0.752003
## extent 0.563617 0.116837 4.824 1.41e-06 ***
## surg 0.388068 0.113840 3.409 0.000652 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2461.7 on 1775 degrees of freedom
## Residual deviance: 2240.4 on 1764 degrees of freedom
## AIC: 2264.4
##
## Number of Fisher Scoring iterations: 4
## Start: AIC=2264.43
## status ~ rx + sex + age + obstruct + perfor + adhere + nodes +
## differ + extent + surg
##
## Df Deviance AIC
## - perfor 1 2240.5 2262.5
## - differ 1 2240.5 2262.5
## - age 1 2240.6 2262.6
## - sex 1 2241.2 2263.2
## <none> 2240.4 2264.4
## - obstruct 1 2243.4 2265.4
## - adhere 1 2246.9 2268.9
## - surg 1 2252.1 2274.1
## - rx 2 2266.7 2286.7
## - extent 1 2265.5 2287.5
## - nodes 1 2363.5 2385.5
##
## Step: AIC=2262.52
## status ~ rx + sex + age + obstruct + adhere + nodes + differ +
## extent + surg
##
## Df Deviance AIC
## - differ 1 2240.6 2260.6
## - age 1 2240.7 2260.7
## - sex 1 2241.2 2261.2
## <none> 2240.5 2262.5
## - obstruct 1 2243.6 2263.6
## - adhere 1 2247.4 2267.4
## - surg 1 2252.2 2272.2
## - rx 2 2266.8 2284.8
## - extent 1 2265.8 2285.8
## - nodes 1 2363.7 2383.7
##
## Step: AIC=2260.61
## status ~ rx + sex + age + obstruct + adhere + nodes + extent +
## surg
##
## Df Deviance AIC
## - age 1 2240.8 2258.8
## - sex 1 2241.3 2259.3
## <none> 2240.6 2260.6
## - obstruct 1 2243.7 2261.7
## - adhere 1 2247.6 2265.6
## - surg 1 2252.4 2270.4
## - rx 2 2266.8 2282.8
## - extent 1 2266.2 2284.2
## - nodes 1 2367.7 2385.7
##
## Step: AIC=2258.8
## status ~ rx + sex + obstruct + adhere + nodes + extent + surg
##
## Df Deviance AIC
## - sex 1 2241.5 2257.5
## <none> 2240.8 2258.8
## - obstruct 1 2243.7 2259.7
## - adhere 1 2247.9 2263.9
## - surg 1 2252.7 2268.7
## - rx 2 2266.9 2280.9
## - extent 1 2266.4 2282.4
## - nodes 1 2368.5 2384.5
##
## Step: AIC=2257.49
## status ~ rx + obstruct + adhere + nodes + extent + surg
##
## Df Deviance AIC
## <none> 2241.5 2257.5
## - obstruct 1 2244.5 2258.5
## - adhere 1 2248.8 2262.8
## - surg 1 2253.3 2267.3
## - rx 2 2267.1 2279.1
## - extent 1 2266.9 2280.9
## - nodes 1 2369.7 2383.7
##
## Call:
## glm(formula = status ~ rx + obstruct + adhere + nodes + extent +
## surg, family = binomial, data = colon1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5583 -1.0490 -0.5884 1.1213 2.0393
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.30406 0.35138 -6.557 5.49e-11 ***
## rxLev -0.07214 0.12221 -0.590 0.554978
## rxLev+5FU -0.57807 0.12428 -4.651 3.30e-06 ***
## obstruct 0.22148 0.12700 1.744 0.081179 .
## adhere 0.38929 0.14498 2.685 0.007251 **
## nodes 0.18556 0.01850 10.030 < 2e-16 ***
## extent 0.56510 0.11643 4.854 1.21e-06 ***
## surg 0.38989 0.11371 3.429 0.000606 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2461.7 on 1775 degrees of freedom
## Residual deviance: 2241.5 on 1768 degrees of freedom
## AIC: 2257.5
##
## Number of Fisher Scoring iterations: 4
exact_OR = function(x, digits=2){
suppressMessages(a<-confint(x))
result=data.frame(exp(coef(x)),exp(a))
result=round(result,digits)
result=cbind(result,round(summary(x)$coefficient[,4],3))
colnames(result)=c('OR','2.5%','97.5%','p')
result
}
exact_OR(reduced.model)
## OR 2.5% 97.5% p
## (Intercept) 0.10 0.05 0.20 0.000
## rxLev 0.93 0.73 1.18 0.555
## rxLev+5FU 0.56 0.44 0.72 0.000
## obstruct 1.25 0.97 1.60 0.081
## adhere 1.48 1.11 1.96 0.007
## nodes 1.20 1.16 1.25 0.000
## extent 1.76 1.41 2.22 0.000
## surg 1.48 1.18 1.85 0.001
## OR lcl ucl p
## (Intercept) 0.10 0.05 0.20 0.0000
## rxLev 0.93 0.73 1.18 0.5550
## rxLev+5FU 0.56 0.44 0.72 0.0000
## obstruct 1.25 0.97 1.60 0.0812
## adhere 1.48 1.11 1.96 0.0073
## nodes 1.20 1.16 1.25 0.0000
## extent 1.76 1.40 2.21 0.0000
## surg 1.48 1.18 1.85 0.0006
ORplot(reduced.model, type=2, show.OR=FALSE, show.CI=TRUE, pch=15, lwd=3,
col=c("blue","red"), main="Plot for Odds ratios of Reduced Model")