dta<-read.csv("C:/Users/User/Desktop/LearnR/CA/CAdata/lowbwt.csv")
View(dta)
attach(dta)
(Outcome: LOW;
Predictors: AGE, LWT, RACE, SMOKE, PTL, HT, UI, and FTV)
(AGE, LWT, PTL, and FTV are treated as continuousvariables)
(RACE, SMOKE, HT, and UI are treated as categoricalvariables)
#glm(generalized linear model) function.
#dta$rank<-factor(mydata$rank)
mylogit<-glm(LOW ~ AGE+LWT+factor(RACE)+factor(SMOKE)+PTL+factor(HT)+factor(UI)+FTV, data = dta, family = "binomial")
summary(mylogit)
##
## Call:
## glm(formula = LOW ~ AGE + LWT + factor(RACE) + factor(SMOKE) +
## PTL + factor(HT) + factor(UI) + FTV, family = "binomial",
## data = dta)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8946 -0.8212 -0.5316 0.9818 2.2125
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.480623 1.196888 0.402 0.68801
## AGE -0.029549 0.037031 -0.798 0.42489
## LWT -0.015424 0.006919 -2.229 0.02580 *
## factor(RACE)2 1.272260 0.527357 2.413 0.01584 *
## factor(RACE)3 0.880496 0.440778 1.998 0.04576 *
## factor(SMOKE)1 0.938846 0.402147 2.335 0.01957 *
## PTL 0.543337 0.345403 1.573 0.11571
## factor(HT)1 1.863303 0.697533 2.671 0.00756 **
## factor(UI)1 0.767648 0.459318 1.671 0.09467 .
## FTV 0.065302 0.172394 0.379 0.70484
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 234.67 on 188 degrees of freedom
## Residual deviance: 201.28 on 179 degrees of freedom
## AIC: 221.28
##
## Number of Fisher Scoring iterations: 4
library(car)
## Loading required package: carData
vif(mylogit)
## GVIF Df GVIF^(1/(2*Df))
## AGE 1.100003 1 1.048810
## LWT 1.303489 1 1.141704
## factor(RACE) 1.510253 2 1.108568
## factor(SMOKE) 1.348243 1 1.161139
## PTL 1.087847 1 1.042999
## factor(HT) 1.168419 1 1.080934
## factor(UI) 1.063061 1 1.031048
## FTV 1.087144 1 1.042662
#exp(mylogit$coefficients) #OR=eβ
#exp(confint(mylogit)) #95%CI
#也可以直接寫下面這行解決
exp(cbind(OR = coef(mylogit), confint(mylogit)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 1.6170819 0.1586248 17.7689406
## AGE 0.9708833 0.9014649 1.0429731
## LWT 0.9846941 0.9706547 0.9975382
## factor(RACE)2 3.5689085 1.2733620 10.2378101
## factor(RACE)3 2.4120956 1.0269690 5.8422688
## factor(SMOKE)1 2.5570281 1.1753715 5.7425658
## PTL 1.7217428 0.8838560 3.4765158
## factor(HT)1 6.4449886 1.7030020 27.6935195
## factor(UI)1 2.1546928 0.8662663 5.3169672
## FTV 1.0674812 0.7534567 1.4900589
library(MASS)
step <-stepAIC(mylogit, direction="both") # direction = c("both", "backward", "forward") # -的 back +的for
## Start: AIC=221.28
## LOW ~ AGE + LWT + factor(RACE) + factor(SMOKE) + PTL + factor(HT) +
## factor(UI) + FTV
##
## Df Deviance AIC
## - FTV 1 201.43 219.43
## - AGE 1 201.93 219.93
## <none> 201.28 221.28
## - PTL 1 203.83 221.83
## - factor(UI) 1 204.03 222.03
## - factor(RACE) 2 208.75 224.75
## - LWT 1 206.80 224.80
## - factor(SMOKE) 1 206.91 224.91
## - factor(HT) 1 208.81 226.81
##
## Step: AIC=219.43
## LOW ~ AGE + LWT + factor(RACE) + factor(SMOKE) + PTL + factor(HT) +
## factor(UI)
##
## Df Deviance AIC
## - AGE 1 201.99 217.99
## <none> 201.43 219.43
## - PTL 1 203.95 219.95
## - factor(UI) 1 204.11 220.11
## + FTV 1 201.28 221.28
## - factor(RACE) 2 208.77 222.77
## - LWT 1 206.81 222.81
## - factor(SMOKE) 1 206.92 222.92
## - factor(HT) 1 208.81 224.81
##
## Step: AIC=217.99
## LOW ~ LWT + factor(RACE) + factor(SMOKE) + PTL + factor(HT) +
## factor(UI)
##
## Df Deviance AIC
## <none> 201.99 217.99
## - PTL 1 204.22 218.22
## - factor(UI) 1 204.90 218.90
## + AGE 1 201.43 219.43
## + FTV 1 201.93 219.93
## - factor(SMOKE) 1 207.73 221.73
## - LWT 1 208.11 222.11
## - factor(RACE) 2 210.31 222.31
## - factor(HT) 1 209.46 223.46
step$anova # display results
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## LOW ~ AGE + LWT + factor(RACE) + factor(SMOKE) + PTL + factor(HT) +
## factor(UI) + FTV
##
## Final Model:
## LOW ~ LWT + factor(RACE) + factor(SMOKE) + PTL + factor(HT) +
## factor(UI)
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 179 201.2848 221.2848
## 2 - FTV 1 0.1421561 180 201.4270 219.4270
## 3 - AGE 1 0.5586360 181 201.9856 217.9856
newfit<-glm(LOW ~ LWT+factor(RACE)+factor(SMOKE)+PTL+factor(HT)+factor(UI), data = dta, family = "binomial")
summary(newfit)
##
## Call:
## glm(formula = LOW ~ LWT + factor(RACE) + factor(SMOKE) + PTL +
## factor(HT) + factor(UI), family = "binomial", data = dta)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9049 -0.8124 -0.5241 0.9483 2.1812
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.086550 0.951760 -0.091 0.92754
## LWT -0.015905 0.006855 -2.320 0.02033 *
## factor(RACE)2 1.325719 0.522243 2.539 0.01113 *
## factor(RACE)3 0.897078 0.433881 2.068 0.03868 *
## factor(SMOKE)1 0.938727 0.398717 2.354 0.01855 *
## PTL 0.503215 0.341231 1.475 0.14029
## factor(HT)1 1.855042 0.695118 2.669 0.00762 **
## factor(UI)1 0.785698 0.456441 1.721 0.08519 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 234.67 on 188 degrees of freedom
## Residual deviance: 201.99 on 181 degrees of freedom
## AIC: 217.99
##
## Number of Fisher Scoring iterations: 4