資料匯入

dta<-read.csv("C:/Users/User/Desktop/LearnR/CA/CAdata/lowbwt.csv")
View(dta)
attach(dta)

(a) Please use multiple logistic regression to analyze the risk factors of LOW by all independent variables.

(Outcome: LOW;

Predictors: AGE, LWT, RACE, SMOKE, PTL, HT, UI, and FTV)

(AGE, LWT, PTL, and FTV are treated as continuousvariables)

(RACE, SMOKE, HT, and UI are treated as categoricalvariables)

model

#glm(generalized linear model) function.
#dta$rank<-factor(mydata$rank)
mylogit<-glm(LOW ~ AGE+LWT+factor(RACE)+factor(SMOKE)+PTL+factor(HT)+factor(UI)+FTV, data = dta, family = "binomial")
summary(mylogit)
## 
## Call:
## glm(formula = LOW ~ AGE + LWT + factor(RACE) + factor(SMOKE) + 
##     PTL + factor(HT) + factor(UI) + FTV, family = "binomial", 
##     data = dta)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8946  -0.8212  -0.5316   0.9818   2.2125  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)   
## (Intercept)     0.480623   1.196888   0.402  0.68801   
## AGE            -0.029549   0.037031  -0.798  0.42489   
## LWT            -0.015424   0.006919  -2.229  0.02580 * 
## factor(RACE)2   1.272260   0.527357   2.413  0.01584 * 
## factor(RACE)3   0.880496   0.440778   1.998  0.04576 * 
## factor(SMOKE)1  0.938846   0.402147   2.335  0.01957 * 
## PTL             0.543337   0.345403   1.573  0.11571   
## factor(HT)1     1.863303   0.697533   2.671  0.00756 **
## factor(UI)1     0.767648   0.459318   1.671  0.09467 . 
## FTV             0.065302   0.172394   0.379  0.70484   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 234.67  on 188  degrees of freedom
## Residual deviance: 201.28  on 179  degrees of freedom
## AIC: 221.28
## 
## Number of Fisher Scoring iterations: 4

VIF

library(car)
## Loading required package: carData
vif(mylogit)
##                   GVIF Df GVIF^(1/(2*Df))
## AGE           1.100003  1        1.048810
## LWT           1.303489  1        1.141704
## factor(RACE)  1.510253  2        1.108568
## factor(SMOKE) 1.348243  1        1.161139
## PTL           1.087847  1        1.042999
## factor(HT)    1.168419  1        1.080934
## factor(UI)    1.063061  1        1.031048
## FTV           1.087144  1        1.042662

OR

#exp(mylogit$coefficients) #OR=eβ
#exp(confint(mylogit)) #95%CI
#也可以直接寫下面這行解決
exp(cbind(OR = coef(mylogit), confint(mylogit)))
## Waiting for profiling to be done...
##                       OR     2.5 %     97.5 %
## (Intercept)    1.6170819 0.1586248 17.7689406
## AGE            0.9708833 0.9014649  1.0429731
## LWT            0.9846941 0.9706547  0.9975382
## factor(RACE)2  3.5689085 1.2733620 10.2378101
## factor(RACE)3  2.4120956 1.0269690  5.8422688
## factor(SMOKE)1 2.5570281 1.1753715  5.7425658
## PTL            1.7217428 0.8838560  3.4765158
## factor(HT)1    6.4449886 1.7030020 27.6935195
## factor(UI)1    2.1546928 0.8662663  5.3169672
## FTV            1.0674812 0.7534567  1.4900589

(b) Please use multiple logistic regression to analyze the risk factors of LOW by stepwise method.

stepwise regression

library(MASS)

step <-stepAIC(mylogit, direction="both")      # direction = c("both", "backward", "forward") #   -的 back +的for   
## Start:  AIC=221.28
## LOW ~ AGE + LWT + factor(RACE) + factor(SMOKE) + PTL + factor(HT) + 
##     factor(UI) + FTV
## 
##                 Df Deviance    AIC
## - FTV            1   201.43 219.43
## - AGE            1   201.93 219.93
## <none>               201.28 221.28
## - PTL            1   203.83 221.83
## - factor(UI)     1   204.03 222.03
## - factor(RACE)   2   208.75 224.75
## - LWT            1   206.80 224.80
## - factor(SMOKE)  1   206.91 224.91
## - factor(HT)     1   208.81 226.81
## 
## Step:  AIC=219.43
## LOW ~ AGE + LWT + factor(RACE) + factor(SMOKE) + PTL + factor(HT) + 
##     factor(UI)
## 
##                 Df Deviance    AIC
## - AGE            1   201.99 217.99
## <none>               201.43 219.43
## - PTL            1   203.95 219.95
## - factor(UI)     1   204.11 220.11
## + FTV            1   201.28 221.28
## - factor(RACE)   2   208.77 222.77
## - LWT            1   206.81 222.81
## - factor(SMOKE)  1   206.92 222.92
## - factor(HT)     1   208.81 224.81
## 
## Step:  AIC=217.99
## LOW ~ LWT + factor(RACE) + factor(SMOKE) + PTL + factor(HT) + 
##     factor(UI)
## 
##                 Df Deviance    AIC
## <none>               201.99 217.99
## - PTL            1   204.22 218.22
## - factor(UI)     1   204.90 218.90
## + AGE            1   201.43 219.43
## + FTV            1   201.93 219.93
## - factor(SMOKE)  1   207.73 221.73
## - LWT            1   208.11 222.11
## - factor(RACE)   2   210.31 222.31
## - factor(HT)     1   209.46 223.46
step$anova # display results
## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## LOW ~ AGE + LWT + factor(RACE) + factor(SMOKE) + PTL + factor(HT) + 
##     factor(UI) + FTV
## 
## Final Model:
## LOW ~ LWT + factor(RACE) + factor(SMOKE) + PTL + factor(HT) + 
##     factor(UI)
## 
## 
##    Step Df  Deviance Resid. Df Resid. Dev      AIC
## 1                          179   201.2848 221.2848
## 2 - FTV  1 0.1421561       180   201.4270 219.4270
## 3 - AGE  1 0.5586360       181   201.9856 217.9856

new model

newfit<-glm(LOW ~ LWT+factor(RACE)+factor(SMOKE)+PTL+factor(HT)+factor(UI), data = dta, family = "binomial")
summary(newfit)
## 
## Call:
## glm(formula = LOW ~ LWT + factor(RACE) + factor(SMOKE) + PTL + 
##     factor(HT) + factor(UI), family = "binomial", data = dta)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9049  -0.8124  -0.5241   0.9483   2.1812  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)   
## (Intercept)    -0.086550   0.951760  -0.091  0.92754   
## LWT            -0.015905   0.006855  -2.320  0.02033 * 
## factor(RACE)2   1.325719   0.522243   2.539  0.01113 * 
## factor(RACE)3   0.897078   0.433881   2.068  0.03868 * 
## factor(SMOKE)1  0.938727   0.398717   2.354  0.01855 * 
## PTL             0.503215   0.341231   1.475  0.14029   
## factor(HT)1     1.855042   0.695118   2.669  0.00762 **
## factor(UI)1     0.785698   0.456441   1.721  0.08519 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 234.67  on 188  degrees of freedom
## Residual deviance: 201.99  on 181  degrees of freedom
## AIC: 217.99
## 
## Number of Fisher Scoring iterations: 4