library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(corrplot)
## corrplot 0.84 loaded
library(ROCR)
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
liver<-read.csv("~/Downloads/liver3.csv")

set.seed(123)
number<-sample(dim(liver)[1],dim(liver)[1]*0.9)
liver$Gender%<>%as.factor()

train<-liver[number,]
test<-liver[-number,]

#敘述統計
df<-gather(liver,measure,num,TB:AGR) 
ggplot(data=df)+geom_boxplot(aes(x=measure,y=num,color=measure))+
  facet_grid(.~liver)+theme(axis.text = element_text(size=rel(1.3)))

liver %>% select(TB:AGR,Age) %>% cor() %>% corrplot(order="AOE" )

#9,10
#full model
model0<-glm(formula = liver~.,family = binomial(link = "logit"),data=train)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(model0);vif(model0)
## 
## Call:
## glm(formula = liver ~ ., family = binomial(link = "logit"), data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3971  -0.9312  -0.4235   1.1086   3.1158  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  3.3144613  1.3297663   2.493   0.0127 *
## Age         -0.0169703  0.0066896  -2.537   0.0112 *
## Gender1     -0.0017461  0.2430674  -0.007   0.9943  
## TB          -0.0156004  0.0896548  -0.174   0.8619  
## DB          -0.4042453  0.2383097  -1.696   0.0898 .
## AP          -0.0011557  0.0008032  -1.439   0.1502  
## ALA         -0.0117970  0.0052905  -2.230   0.0258 *
## ASA         -0.0026567  0.0032620  -0.814   0.4154  
## TP          -0.8160842  0.3772941  -2.163   0.0305 *
## ALB          1.4693605  0.7404459   1.984   0.0472 *
## AGR         -1.6102326  1.1334127  -1.421   0.1554  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 627.28  on 520  degrees of freedom
## Residual deviance: 524.65  on 510  degrees of freedom
## AIC: 546.65
## 
## Number of Fisher Scoring iterations: 7
##       Age    Gender        TB        DB        AP       ALA       ASA 
##  1.085994  1.056017  2.209775  2.355281  1.136324  1.832855  1.805860 
##        TP       ALB       AGR 
## 15.891179 30.728596  9.253623
#11
#自選變數 vif boooooom
model1<-update(model0,.~.-ALB)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
anova(model1,model0,test = "Chisq");AIC(model0,model1);vif(model1)
## Analysis of Deviance Table
## 
## Model 1: liver ~ Age + Gender + TB + DB + AP + ALA + ASA + TP + AGR
## Model 2: liver ~ Age + Gender + TB + DB + AP + ALA + ASA + TP + ALB + 
##     AGR
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1       511     529.05                       
## 2       510     524.65  1   4.3976  0.03599 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##        df      AIC
## model0 11 546.6478
## model1 10 549.0454
##      Age   Gender       TB       DB       AP      ALA      ASA       TP 
## 1.090763 1.052777 2.163451 2.296402 1.138304 1.763603 1.789126 1.156140 
##      AGR 
## 1.176941
summary(model1)
## 
## Call:
## glm(formula = liver ~ Age + Gender + TB + DB + AP + ALA + ASA + 
##     TP + AGR, family = binomial(link = "logit"), data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5327  -0.9187  -0.4401   1.1461   3.0900  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  1.3533782  0.8719983   1.552  0.12065   
## Age         -0.0174958  0.0066926  -2.614  0.00894 **
## Gender1     -0.0203723  0.2418759  -0.084  0.93288   
## TB          -0.0128626  0.0874728  -0.147  0.88310   
## DB          -0.4637723  0.2376742  -1.951  0.05102 . 
## AP          -0.0010950  0.0007898  -1.387  0.16557   
## ALA         -0.0097463  0.0050352  -1.936  0.05291 . 
## ASA         -0.0036749  0.0033333  -1.103  0.27024   
## TP          -0.0912103  0.1026053  -0.889  0.37403   
## AGR          0.4435473  0.3876648   1.144  0.25256   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 627.28  on 520  degrees of freedom
## Residual deviance: 529.05  on 511  degrees of freedom
## AIC: 549.05
## 
## Number of Fisher Scoring iterations: 7
vif(model1) %>% write.csv("~/Desktop/vifmodel1.csv")

model2<-update(model1,.~.+Age:TB +Age:DB +Age:AP +Age:ALA +Age:ASA +Age:TP +Age:AGR)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
anova(model2,model1,test = "Chisq");AIC(model0,model1,model2);vif(model2)
## Analysis of Deviance Table
## 
## Model 1: liver ~ Age + Gender + TB + DB + AP + ALA + ASA + TP + AGR + 
##     Age:TB + Age:DB + Age:AP + Age:ALA + Age:ASA + Age:TP + Age:AGR
## Model 2: liver ~ Age + Gender + TB + DB + AP + ALA + ASA + TP + AGR
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       504     520.59                     
## 2       511     529.05 -7  -8.4592   0.2938
##        df      AIC
## model0 11 546.6478
## model1 10 549.0454
## model2 17 554.5862
##        Age     Gender         TB         DB         AP        ALA 
##  48.282426   1.061565 447.142552 464.556702  10.145192  16.645381 
##        ASA         TP        AGR     Age:TB     Age:DB     Age:AP 
##  14.852927   8.516994  10.351750 319.897457 321.990964  13.252783 
##    Age:ALA    Age:ASA     Age:TP    Age:AGR 
##  15.903845  13.549485  38.300503  23.101190
#12
step(model1,direction = "both")
## Start:  AIC=549.05
## liver ~ Age + Gender + TB + DB + AP + ALA + ASA + TP + AGR
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##          Df Deviance    AIC
## - Gender  1   529.05 547.05
## - TB      1   529.07 547.07
## - TP      1   529.84 547.84
## - AGR     1   530.34 548.34
## - ASA     1   530.40 548.40
## - DB      1   530.73 548.73
## <none>        529.05 549.05
## - AP      1   531.43 549.43
## - ALA     1   533.55 551.55
## - Age     1   536.02 554.02
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=547.05
## liver ~ Age + TB + DB + AP + ALA + ASA + TP + AGR
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##          Df Deviance    AIC
## - TB      1   529.08 545.08
## - TP      1   529.84 545.84
## - AGR     1   530.34 546.34
## - ASA     1   530.41 546.41
## - DB      1   530.74 546.74
## <none>        529.05 547.05
## - AP      1   531.44 547.44
## + Gender  1   529.05 549.05
## - ALA     1   533.59 549.59
## - Age     1   536.04 552.04
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=545.08
## liver ~ Age + DB + AP + ALA + ASA + TP + AGR
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##          Df Deviance    AIC
## - TP      1   529.86 543.86
## - AGR     1   530.37 544.37
## - ASA     1   530.44 544.44
## <none>        529.08 545.08
## - AP      1   531.46 545.46
## + TB      1   529.05 547.05
## + Gender  1   529.07 547.07
## - ALA     1   533.62 547.62
## - Age     1   536.07 550.07
## - DB      1   548.31 562.31
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=543.86
## liver ~ Age + DB + AP + ALA + ASA + AGR
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##          Df Deviance    AIC
## - AGR     1   530.76 542.76
## - ASA     1   531.04 543.04
## <none>        529.86 543.86
## - AP      1   532.49 544.49
## + TP      1   529.08 545.08
## + TB      1   529.84 545.84
## + Gender  1   529.86 545.86
## - ALA     1   534.56 546.56
## - Age     1   536.23 548.23
## - DB      1   548.79 560.79
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=542.76
## liver ~ Age + DB + AP + ALA + ASA
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##          Df Deviance    AIC
## - ASA     1   531.96 541.96
## <none>        530.76 542.76
## + AGR     1   529.86 543.86
## - AP      1   534.13 544.13
## + TP      1   530.37 544.37
## + TB      1   530.74 544.74
## + Gender  1   530.76 544.76
## - ALA     1   535.29 545.29
## - Age     1   538.10 548.10
## - DB      1   551.11 561.11
## 
## Step:  AIC=541.96
## liver ~ Age + DB + AP + ALA
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##          Df Deviance    AIC
## <none>        531.96 541.96
## + ASA     1   530.76 542.76
## + AGR     1   531.04 543.04
## - AP      1   535.46 543.46
## + TP      1   531.68 543.68
## + TB      1   531.94 543.94
## + Gender  1   531.96 543.96
## - Age     1   539.32 547.32
## - ALA     1   552.57 560.57
## - DB      1   559.18 567.18
## 
## Call:  glm(formula = liver ~ Age + DB + AP + ALA, family = binomial(link = "logit"), 
##     data = train)
## 
## Coefficients:
## (Intercept)          Age           DB           AP          ALA  
##    1.216512    -0.017365    -0.559391    -0.001308    -0.013183  
## 
## Degrees of Freedom: 520 Total (i.e. Null);  516 Residual
## Null Deviance:       627.3 
## Residual Deviance: 532   AIC: 542
#Age + DB + AP + ALA
step(model1,direction = "backward")
## Start:  AIC=549.05
## liver ~ Age + Gender + TB + DB + AP + ALA + ASA + TP + AGR
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##          Df Deviance    AIC
## - Gender  1   529.05 547.05
## - TB      1   529.07 547.07
## - TP      1   529.84 547.84
## - AGR     1   530.34 548.34
## - ASA     1   530.40 548.40
## - DB      1   530.73 548.73
## <none>        529.05 549.05
## - AP      1   531.43 549.43
## - ALA     1   533.55 551.55
## - Age     1   536.02 554.02
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=547.05
## liver ~ Age + TB + DB + AP + ALA + ASA + TP + AGR
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##        Df Deviance    AIC
## - TB    1   529.08 545.08
## - TP    1   529.84 545.84
## - AGR   1   530.34 546.34
## - ASA   1   530.41 546.41
## - DB    1   530.74 546.74
## <none>      529.05 547.05
## - AP    1   531.44 547.44
## - ALA   1   533.59 549.59
## - Age   1   536.04 552.04
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=545.08
## liver ~ Age + DB + AP + ALA + ASA + TP + AGR
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##        Df Deviance    AIC
## - TP    1   529.86 543.86
## - AGR   1   530.37 544.37
## - ASA   1   530.44 544.44
## <none>      529.08 545.08
## - AP    1   531.46 545.46
## - ALA   1   533.62 547.62
## - Age   1   536.07 550.07
## - DB    1   548.31 562.31
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=543.86
## liver ~ Age + DB + AP + ALA + ASA + AGR
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##        Df Deviance    AIC
## - AGR   1   530.76 542.76
## - ASA   1   531.04 543.04
## <none>      529.86 543.86
## - AP    1   532.49 544.49
## - ALA   1   534.56 546.56
## - Age   1   536.23 548.23
## - DB    1   548.79 560.79
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=542.76
## liver ~ Age + DB + AP + ALA + ASA
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##        Df Deviance    AIC
## - ASA   1   531.96 541.96
## <none>      530.76 542.76
## - AP    1   534.13 544.13
## - ALA   1   535.29 545.29
## - Age   1   538.10 548.10
## - DB    1   551.11 561.11
## 
## Step:  AIC=541.96
## liver ~ Age + DB + AP + ALA
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##        Df Deviance    AIC
## <none>      531.96 541.96
## - AP    1   535.46 543.46
## - Age   1   539.32 547.32
## - ALA   1   552.57 560.57
## - DB    1   559.18 567.18
## 
## Call:  glm(formula = liver ~ Age + DB + AP + ALA, family = binomial(link = "logit"), 
##     data = train)
## 
## Coefficients:
## (Intercept)          Age           DB           AP          ALA  
##    1.216512    -0.017365    -0.559391    -0.001308    -0.013183  
## 
## Degrees of Freedom: 520 Total (i.e. Null);  516 Residual
## Null Deviance:       627.3 
## Residual Deviance: 532   AIC: 542
#Age + DB + AP + ALA
step(model1,direction = "forward")
## Start:  AIC=549.05
## liver ~ Age + Gender + TB + DB + AP + ALA + ASA + TP + AGR
## 
## Call:  glm(formula = liver ~ Age + Gender + TB + DB + AP + ALA + ASA + 
##     TP + AGR, family = binomial(link = "logit"), data = train)
## 
## Coefficients:
## (Intercept)          Age      Gender1           TB           DB  
##    1.353378    -0.017496    -0.020372    -0.012863    -0.463772  
##          AP          ALA          ASA           TP          AGR  
##   -0.001095    -0.009746    -0.003675    -0.091210     0.443547  
## 
## Degrees of Freedom: 520 Total (i.e. Null);  511 Residual
## Null Deviance:       627.3 
## Residual Deviance: 529   AIC: 549
#Age + Gender + TB + DB + AP + ALA + ASA + TP + AGR


#13 所有可能選取
all<-read.csv("~/Downloads/活頁簿2.csv",fileEncoding = "big5",header = T)
all<-all[-1,]
# min AIC
#Age DB AP ALA
model3<-glm(formula = liver~Age+DB+AP+ALA,family = binomial(link = "logit"),data=train)
AIC(model1,model2,model3)
##        df      AIC
## model1 10 549.0454
## model2 17 554.5862
## model3  5 541.9600
step(model3,.~.^2)
## Start:  AIC=541.96
## liver ~ Age + DB + AP + ALA
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##           Df Deviance    AIC
## + DB:AP    1   524.63 536.63
## + AP:ALA   1   528.61 540.61
## <none>         531.96 541.96
## + Age:DB   1   530.06 542.06
## + DB:ALA   1   530.77 542.77
## + Age:ALA  1   531.19 543.19
## - AP       1   535.46 543.46
## + Age:AP   1   531.47 543.47
## - Age      1   539.32 547.32
## - ALA      1   552.57 560.57
## - DB       1   559.18 567.18
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=536.63
## liver ~ Age + DB + AP + ALA + DB:AP
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##           Df Deviance    AIC
## + Age:DB   1   521.83 535.83
## <none>         524.63 536.63
## + Age:ALA  1   523.66 537.66
## + Age:AP   1   523.76 537.76
## + DB:ALA   1   524.26 538.26
## + AP:ALA   1   524.57 538.57
## - DB:AP    1   531.96 541.96
## - Age      1   532.14 542.14
## - ALA      1   545.36 555.36
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=535.83
## liver ~ Age + DB + AP + ALA + DB:AP + Age:DB
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##           Df Deviance    AIC
## <none>         521.83 535.83
## + DB:ALA   1   520.11 536.11
## - Age:DB   1   524.63 536.63
## + Age:ALA  1   521.17 537.17
## + Age:AP   1   521.45 537.45
## + AP:ALA   1   521.78 537.78
## - DB:AP    1   530.06 542.06
## - ALA      1   543.12 555.12
## 
## Call:  glm(formula = liver ~ Age + DB + AP + ALA + DB:AP + Age:DB, family = binomial(link = "logit"), 
##     data = train)
## 
## Coefficients:
## (Intercept)          Age           DB           AP          ALA  
##    1.183800    -0.009957    -0.102036    -0.002158    -0.013848  
##       DB:AP       Age:DB  
##    0.001015    -0.020031  
## 
## Degrees of Freedom: 520 Total (i.e. Null);  514 Residual
## Null Deviance:       627.3 
## Residual Deviance: 521.8     AIC: 535.8
model4<-glm(formula = liver ~ Age + DB + AP + ALA + DB:AP, family = binomial(link = "logit"), 
            data = train)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
a<-AIC(model4,model3,model2,model1,model0) 
write.csv(a,"~/Desktop/AIC.csv")

par(mfrow=c(2,2))
#predict
#預測值
set.seed(123)
number<-sample(dim(liver)[1],dim(liver)[1]*0.9)
test<-liver[-number,]


{
pred<-predict(model4,newdata = test,type = "response")

pred.1<-pred[!is.na(pred)]
#轉換預測值
pred.2 = ifelse(pred.1>0.6,1,0)
#預測vs真實
pr = prediction(pred.1,test[!is.na(pred),"liver"] )
#不知道
prf = performance(pr, measure = "tpr", x.measure = "fpr")

plot(prf,main="model4")
abline(a=0,b=1,col="red",lty=2)

auc = performance(pr, measure = "auc")
text(0.5, 0.5,paste0("AUC= ",as.character(auc@y.values[[1]])),col=4)

}
AIC(model1,model2,model3,model4) %>% write.csv("~/Desktop/HOPE/AIC.csv")
vif(model1)%>% write.csv("~/Desktop/HOPE/VIF1.csv")
vif(model2)%>% write.csv("~/Desktop/HOPE/VIF2.csv")
vif(model3)%>% write.csv("~/Desktop/HOPE/VIF3.csv")
vif(model4)%>% write.csv("~/Desktop/HOPE/VIF4.csv")
summary(model0)
## 
## Call:
## glm(formula = liver ~ ., family = binomial(link = "logit"), data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3971  -0.9312  -0.4235   1.1086   3.1158  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  3.3144613  1.3297663   2.493   0.0127 *
## Age         -0.0169703  0.0066896  -2.537   0.0112 *
## Gender1     -0.0017461  0.2430674  -0.007   0.9943  
## TB          -0.0156004  0.0896548  -0.174   0.8619  
## DB          -0.4042453  0.2383097  -1.696   0.0898 .
## AP          -0.0011557  0.0008032  -1.439   0.1502  
## ALA         -0.0117970  0.0052905  -2.230   0.0258 *
## ASA         -0.0026567  0.0032620  -0.814   0.4154  
## TP          -0.8160842  0.3772941  -2.163   0.0305 *
## ALB          1.4693605  0.7404459   1.984   0.0472 *
## AGR         -1.6102326  1.1334127  -1.421   0.1554  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 627.28  on 520  degrees of freedom
## Residual deviance: 524.65  on 510  degrees of freedom
## AIC: 546.65
## 
## Number of Fisher Scoring iterations: 7
summary(model1)
## 
## Call:
## glm(formula = liver ~ Age + Gender + TB + DB + AP + ALA + ASA + 
##     TP + AGR, family = binomial(link = "logit"), data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5327  -0.9187  -0.4401   1.1461   3.0900  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  1.3533782  0.8719983   1.552  0.12065   
## Age         -0.0174958  0.0066926  -2.614  0.00894 **
## Gender1     -0.0203723  0.2418759  -0.084  0.93288   
## TB          -0.0128626  0.0874728  -0.147  0.88310   
## DB          -0.4637723  0.2376742  -1.951  0.05102 . 
## AP          -0.0010950  0.0007898  -1.387  0.16557   
## ALA         -0.0097463  0.0050352  -1.936  0.05291 . 
## ASA         -0.0036749  0.0033333  -1.103  0.27024   
## TP          -0.0912103  0.1026053  -0.889  0.37403   
## AGR          0.4435473  0.3876648   1.144  0.25256   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 627.28  on 520  degrees of freedom
## Residual deviance: 529.05  on 511  degrees of freedom
## AIC: 549.05
## 
## Number of Fisher Scoring iterations: 7
summary(model2)
## 
## Call:
## glm(formula = liver ~ Age + Gender + TB + DB + AP + ALA + ASA + 
##     TP + AGR + Age:TB + Age:DB + Age:AP + Age:ALA + Age:ASA + 
##     Age:TP + Age:AGR, family = binomial(link = "logit"), data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4663  -0.9461  -0.3757   1.1525   3.3528  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  1.742e+00  2.067e+00   0.843   0.3994  
## Age         -2.742e-02  4.519e-02  -0.607   0.5440  
## Gender1     -8.818e-03  2.440e-01  -0.036   0.9712  
## TB           1.403e+00  1.509e+00   0.930   0.3525  
## DB          -2.735e+00  3.011e+00  -0.908   0.3638  
## AP          -8.753e-04  2.430e-03  -0.360   0.7187  
## ALA         -1.652e-02  1.519e-02  -1.088   0.2768  
## ASA          6.281e-03  9.338e-03   0.673   0.5012  
## TP           4.806e-02  2.811e-01   0.171   0.8643  
## AGR         -1.826e+00  1.184e+00  -1.542   0.1230  
## Age:TB      -2.714e-02  3.205e-02  -0.847   0.3971  
## Age:DB       4.166e-02  6.255e-02   0.666   0.5053  
## Age:AP      -1.439e-06  5.210e-05  -0.028   0.9780  
## Age:ALA      1.593e-04  3.729e-04   0.427   0.6693  
## Age:ASA     -2.667e-04  2.331e-04  -1.144   0.2525  
## Age:TP      -3.005e-03  6.154e-03  -0.488   0.6253  
## Age:AGR      5.067e-02  2.498e-02   2.029   0.0425 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 627.28  on 520  degrees of freedom
## Residual deviance: 520.59  on 504  degrees of freedom
## AIC: 554.59
## 
## Number of Fisher Scoring iterations: 9
summary(model3)
## 
## Call:
## glm(formula = liver ~ Age + DB + AP + ALA, family = binomial(link = "logit"), 
##     data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3426  -0.9235  -0.4722   1.1497   3.1303  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.2165115  0.3746680   3.247 0.001167 ** 
## Age         -0.0173652  0.0064726  -2.683 0.007299 ** 
## DB          -0.5593911  0.1678835  -3.332 0.000862 ***
## AP          -0.0013077  0.0007936  -1.648 0.099402 .  
## ALA         -0.0131829  0.0039618  -3.327 0.000876 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 627.28  on 520  degrees of freedom
## Residual deviance: 531.96  on 516  degrees of freedom
## AIC: 541.96
## 
## Number of Fisher Scoring iterations: 7
summary(model4)
## 
## Call:
## glm(formula = liver ~ Age + DB + AP + ALA + DB:AP, family = binomial(link = "logit"), 
##     data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4065  -0.9325  -0.4358   1.1243   2.9207  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.5200734  0.3972913   3.826 0.000130 ***
## Age         -0.0177281  0.0065430  -2.709 0.006739 ** 
## DB          -0.9578932  0.2299968  -4.165 3.12e-05 ***
## AP          -0.0022573  0.0008496  -2.657 0.007884 ** 
## ALA         -0.0134353  0.0039264  -3.422 0.000622 ***
## DB:AP        0.0009647  0.0002361   4.086 4.40e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 627.28  on 520  degrees of freedom
## Residual deviance: 524.63  on 515  degrees of freedom
## AIC: 536.63
## 
## Number of Fisher Scoring iterations: 7
out<-cooks.distance(model0);length(which(out>=1))
## [1] 0
out<-cooks.distance(model1);length(which(out>=1))
## [1] 1
out<-cooks.distance(model2);length(which(out>=1))
## [1] 0
out<-cooks.distance(model3);length(which(out>=1))
## [1] 0
out<-cooks.distance(model4);length(which(out>=1))
## [1] 0