library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(corrplot)
## corrplot 0.84 loaded
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
liver<-read.csv("~/Downloads/liver3.csv")
set.seed(123)
number<-sample(dim(liver)[1],dim(liver)[1]*0.9)
liver$Gender%<>%as.factor()
train<-liver[number,]
test<-liver[-number,]
#敘述統計
df<-gather(liver,measure,num,TB:AGR)
ggplot(data=df)+geom_boxplot(aes(x=measure,y=num,color=measure))+
facet_grid(.~liver)+theme(axis.text = element_text(size=rel(1.3)))

liver %>% select(TB:AGR,Age) %>% cor() %>% corrplot(order="AOE" )

#9,10
#full model
model0<-glm(formula = liver~.,family = binomial(link = "logit"),data=train)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(model0);vif(model0)
##
## Call:
## glm(formula = liver ~ ., family = binomial(link = "logit"), data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.3971 -0.9312 -0.4235 1.1086 3.1158
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.3144613 1.3297663 2.493 0.0127 *
## Age -0.0169703 0.0066896 -2.537 0.0112 *
## Gender1 -0.0017461 0.2430674 -0.007 0.9943
## TB -0.0156004 0.0896548 -0.174 0.8619
## DB -0.4042453 0.2383097 -1.696 0.0898 .
## AP -0.0011557 0.0008032 -1.439 0.1502
## ALA -0.0117970 0.0052905 -2.230 0.0258 *
## ASA -0.0026567 0.0032620 -0.814 0.4154
## TP -0.8160842 0.3772941 -2.163 0.0305 *
## ALB 1.4693605 0.7404459 1.984 0.0472 *
## AGR -1.6102326 1.1334127 -1.421 0.1554
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 627.28 on 520 degrees of freedom
## Residual deviance: 524.65 on 510 degrees of freedom
## AIC: 546.65
##
## Number of Fisher Scoring iterations: 7
## Age Gender TB DB AP ALA ASA
## 1.085994 1.056017 2.209775 2.355281 1.136324 1.832855 1.805860
## TP ALB AGR
## 15.891179 30.728596 9.253623
#11
#自選變數 vif boooooom
model1<-update(model0,.~.-ALB)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
anova(model1,model0,test = "Chisq");AIC(model0,model1);vif(model1)
## Analysis of Deviance Table
##
## Model 1: liver ~ Age + Gender + TB + DB + AP + ALA + ASA + TP + AGR
## Model 2: liver ~ Age + Gender + TB + DB + AP + ALA + ASA + TP + ALB +
## AGR
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 511 529.05
## 2 510 524.65 1 4.3976 0.03599 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## df AIC
## model0 11 546.6478
## model1 10 549.0454
## Age Gender TB DB AP ALA ASA TP
## 1.090763 1.052777 2.163451 2.296402 1.138304 1.763603 1.789126 1.156140
## AGR
## 1.176941
summary(model1)
##
## Call:
## glm(formula = liver ~ Age + Gender + TB + DB + AP + ALA + ASA +
## TP + AGR, family = binomial(link = "logit"), data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5327 -0.9187 -0.4401 1.1461 3.0900
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.3533782 0.8719983 1.552 0.12065
## Age -0.0174958 0.0066926 -2.614 0.00894 **
## Gender1 -0.0203723 0.2418759 -0.084 0.93288
## TB -0.0128626 0.0874728 -0.147 0.88310
## DB -0.4637723 0.2376742 -1.951 0.05102 .
## AP -0.0010950 0.0007898 -1.387 0.16557
## ALA -0.0097463 0.0050352 -1.936 0.05291 .
## ASA -0.0036749 0.0033333 -1.103 0.27024
## TP -0.0912103 0.1026053 -0.889 0.37403
## AGR 0.4435473 0.3876648 1.144 0.25256
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 627.28 on 520 degrees of freedom
## Residual deviance: 529.05 on 511 degrees of freedom
## AIC: 549.05
##
## Number of Fisher Scoring iterations: 7
vif(model1) %>% write.csv("~/Desktop/vifmodel1.csv")
model2<-update(model1,.~.+Age:TB +Age:DB +Age:AP +Age:ALA +Age:ASA +Age:TP +Age:AGR)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
anova(model2,model1,test = "Chisq");AIC(model0,model1,model2);vif(model2)
## Analysis of Deviance Table
##
## Model 1: liver ~ Age + Gender + TB + DB + AP + ALA + ASA + TP + AGR +
## Age:TB + Age:DB + Age:AP + Age:ALA + Age:ASA + Age:TP + Age:AGR
## Model 2: liver ~ Age + Gender + TB + DB + AP + ALA + ASA + TP + AGR
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 504 520.59
## 2 511 529.05 -7 -8.4592 0.2938
## df AIC
## model0 11 546.6478
## model1 10 549.0454
## model2 17 554.5862
## Age Gender TB DB AP ALA
## 48.282426 1.061565 447.142552 464.556702 10.145192 16.645381
## ASA TP AGR Age:TB Age:DB Age:AP
## 14.852927 8.516994 10.351750 319.897457 321.990964 13.252783
## Age:ALA Age:ASA Age:TP Age:AGR
## 15.903845 13.549485 38.300503 23.101190
#12
step(model1,direction = "both")
## Start: AIC=549.05
## liver ~ Age + Gender + TB + DB + AP + ALA + ASA + TP + AGR
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - Gender 1 529.05 547.05
## - TB 1 529.07 547.07
## - TP 1 529.84 547.84
## - AGR 1 530.34 548.34
## - ASA 1 530.40 548.40
## - DB 1 530.73 548.73
## <none> 529.05 549.05
## - AP 1 531.43 549.43
## - ALA 1 533.55 551.55
## - Age 1 536.02 554.02
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=547.05
## liver ~ Age + TB + DB + AP + ALA + ASA + TP + AGR
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - TB 1 529.08 545.08
## - TP 1 529.84 545.84
## - AGR 1 530.34 546.34
## - ASA 1 530.41 546.41
## - DB 1 530.74 546.74
## <none> 529.05 547.05
## - AP 1 531.44 547.44
## + Gender 1 529.05 549.05
## - ALA 1 533.59 549.59
## - Age 1 536.04 552.04
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=545.08
## liver ~ Age + DB + AP + ALA + ASA + TP + AGR
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - TP 1 529.86 543.86
## - AGR 1 530.37 544.37
## - ASA 1 530.44 544.44
## <none> 529.08 545.08
## - AP 1 531.46 545.46
## + TB 1 529.05 547.05
## + Gender 1 529.07 547.07
## - ALA 1 533.62 547.62
## - Age 1 536.07 550.07
## - DB 1 548.31 562.31
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=543.86
## liver ~ Age + DB + AP + ALA + ASA + AGR
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - AGR 1 530.76 542.76
## - ASA 1 531.04 543.04
## <none> 529.86 543.86
## - AP 1 532.49 544.49
## + TP 1 529.08 545.08
## + TB 1 529.84 545.84
## + Gender 1 529.86 545.86
## - ALA 1 534.56 546.56
## - Age 1 536.23 548.23
## - DB 1 548.79 560.79
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=542.76
## liver ~ Age + DB + AP + ALA + ASA
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - ASA 1 531.96 541.96
## <none> 530.76 542.76
## + AGR 1 529.86 543.86
## - AP 1 534.13 544.13
## + TP 1 530.37 544.37
## + TB 1 530.74 544.74
## + Gender 1 530.76 544.76
## - ALA 1 535.29 545.29
## - Age 1 538.10 548.10
## - DB 1 551.11 561.11
##
## Step: AIC=541.96
## liver ~ Age + DB + AP + ALA
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## <none> 531.96 541.96
## + ASA 1 530.76 542.76
## + AGR 1 531.04 543.04
## - AP 1 535.46 543.46
## + TP 1 531.68 543.68
## + TB 1 531.94 543.94
## + Gender 1 531.96 543.96
## - Age 1 539.32 547.32
## - ALA 1 552.57 560.57
## - DB 1 559.18 567.18
##
## Call: glm(formula = liver ~ Age + DB + AP + ALA, family = binomial(link = "logit"),
## data = train)
##
## Coefficients:
## (Intercept) Age DB AP ALA
## 1.216512 -0.017365 -0.559391 -0.001308 -0.013183
##
## Degrees of Freedom: 520 Total (i.e. Null); 516 Residual
## Null Deviance: 627.3
## Residual Deviance: 532 AIC: 542
#Age + DB + AP + ALA
step(model1,direction = "backward")
## Start: AIC=549.05
## liver ~ Age + Gender + TB + DB + AP + ALA + ASA + TP + AGR
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - Gender 1 529.05 547.05
## - TB 1 529.07 547.07
## - TP 1 529.84 547.84
## - AGR 1 530.34 548.34
## - ASA 1 530.40 548.40
## - DB 1 530.73 548.73
## <none> 529.05 549.05
## - AP 1 531.43 549.43
## - ALA 1 533.55 551.55
## - Age 1 536.02 554.02
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=547.05
## liver ~ Age + TB + DB + AP + ALA + ASA + TP + AGR
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - TB 1 529.08 545.08
## - TP 1 529.84 545.84
## - AGR 1 530.34 546.34
## - ASA 1 530.41 546.41
## - DB 1 530.74 546.74
## <none> 529.05 547.05
## - AP 1 531.44 547.44
## - ALA 1 533.59 549.59
## - Age 1 536.04 552.04
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=545.08
## liver ~ Age + DB + AP + ALA + ASA + TP + AGR
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - TP 1 529.86 543.86
## - AGR 1 530.37 544.37
## - ASA 1 530.44 544.44
## <none> 529.08 545.08
## - AP 1 531.46 545.46
## - ALA 1 533.62 547.62
## - Age 1 536.07 550.07
## - DB 1 548.31 562.31
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=543.86
## liver ~ Age + DB + AP + ALA + ASA + AGR
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - AGR 1 530.76 542.76
## - ASA 1 531.04 543.04
## <none> 529.86 543.86
## - AP 1 532.49 544.49
## - ALA 1 534.56 546.56
## - Age 1 536.23 548.23
## - DB 1 548.79 560.79
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=542.76
## liver ~ Age + DB + AP + ALA + ASA
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - ASA 1 531.96 541.96
## <none> 530.76 542.76
## - AP 1 534.13 544.13
## - ALA 1 535.29 545.29
## - Age 1 538.10 548.10
## - DB 1 551.11 561.11
##
## Step: AIC=541.96
## liver ~ Age + DB + AP + ALA
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## <none> 531.96 541.96
## - AP 1 535.46 543.46
## - Age 1 539.32 547.32
## - ALA 1 552.57 560.57
## - DB 1 559.18 567.18
##
## Call: glm(formula = liver ~ Age + DB + AP + ALA, family = binomial(link = "logit"),
## data = train)
##
## Coefficients:
## (Intercept) Age DB AP ALA
## 1.216512 -0.017365 -0.559391 -0.001308 -0.013183
##
## Degrees of Freedom: 520 Total (i.e. Null); 516 Residual
## Null Deviance: 627.3
## Residual Deviance: 532 AIC: 542
#Age + DB + AP + ALA
step(model1,direction = "forward")
## Start: AIC=549.05
## liver ~ Age + Gender + TB + DB + AP + ALA + ASA + TP + AGR
##
## Call: glm(formula = liver ~ Age + Gender + TB + DB + AP + ALA + ASA +
## TP + AGR, family = binomial(link = "logit"), data = train)
##
## Coefficients:
## (Intercept) Age Gender1 TB DB
## 1.353378 -0.017496 -0.020372 -0.012863 -0.463772
## AP ALA ASA TP AGR
## -0.001095 -0.009746 -0.003675 -0.091210 0.443547
##
## Degrees of Freedom: 520 Total (i.e. Null); 511 Residual
## Null Deviance: 627.3
## Residual Deviance: 529 AIC: 549
#Age + Gender + TB + DB + AP + ALA + ASA + TP + AGR
#13 所有可能選取
all<-read.csv("~/Downloads/活頁簿2.csv",fileEncoding = "big5",header = T)
all<-all[-1,]
# min AIC
#Age DB AP ALA
model3<-glm(formula = liver~Age+DB+AP+ALA,family = binomial(link = "logit"),data=train)
AIC(model1,model2,model3)
## df AIC
## model1 10 549.0454
## model2 17 554.5862
## model3 5 541.9600
step(model3,.~.^2)
## Start: AIC=541.96
## liver ~ Age + DB + AP + ALA
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## + DB:AP 1 524.63 536.63
## + AP:ALA 1 528.61 540.61
## <none> 531.96 541.96
## + Age:DB 1 530.06 542.06
## + DB:ALA 1 530.77 542.77
## + Age:ALA 1 531.19 543.19
## - AP 1 535.46 543.46
## + Age:AP 1 531.47 543.47
## - Age 1 539.32 547.32
## - ALA 1 552.57 560.57
## - DB 1 559.18 567.18
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=536.63
## liver ~ Age + DB + AP + ALA + DB:AP
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## + Age:DB 1 521.83 535.83
## <none> 524.63 536.63
## + Age:ALA 1 523.66 537.66
## + Age:AP 1 523.76 537.76
## + DB:ALA 1 524.26 538.26
## + AP:ALA 1 524.57 538.57
## - DB:AP 1 531.96 541.96
## - Age 1 532.14 542.14
## - ALA 1 545.36 555.36
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=535.83
## liver ~ Age + DB + AP + ALA + DB:AP + Age:DB
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## <none> 521.83 535.83
## + DB:ALA 1 520.11 536.11
## - Age:DB 1 524.63 536.63
## + Age:ALA 1 521.17 537.17
## + Age:AP 1 521.45 537.45
## + AP:ALA 1 521.78 537.78
## - DB:AP 1 530.06 542.06
## - ALA 1 543.12 555.12
##
## Call: glm(formula = liver ~ Age + DB + AP + ALA + DB:AP + Age:DB, family = binomial(link = "logit"),
## data = train)
##
## Coefficients:
## (Intercept) Age DB AP ALA
## 1.183800 -0.009957 -0.102036 -0.002158 -0.013848
## DB:AP Age:DB
## 0.001015 -0.020031
##
## Degrees of Freedom: 520 Total (i.e. Null); 514 Residual
## Null Deviance: 627.3
## Residual Deviance: 521.8 AIC: 535.8
model4<-glm(formula = liver ~ Age + DB + AP + ALA + DB:AP, family = binomial(link = "logit"),
data = train)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
a<-AIC(model4,model3,model2,model1,model0)
write.csv(a,"~/Desktop/AIC.csv")
par(mfrow=c(2,2))
#predict
#預測值
set.seed(123)
number<-sample(dim(liver)[1],dim(liver)[1]*0.9)
test<-liver[-number,]
{
pred<-predict(model4,newdata = test,type = "response")
pred.1<-pred[!is.na(pred)]
#轉換預測值
pred.2 = ifelse(pred.1>0.6,1,0)
#預測vs真實
pr = prediction(pred.1,test[!is.na(pred),"liver"] )
#不知道
prf = performance(pr, measure = "tpr", x.measure = "fpr")
plot(prf,main="model4")
abline(a=0,b=1,col="red",lty=2)
auc = performance(pr, measure = "auc")
text(0.5, 0.5,paste0("AUC= ",as.character(auc@y.values[[1]])),col=4)
}
AIC(model1,model2,model3,model4) %>% write.csv("~/Desktop/HOPE/AIC.csv")
vif(model1)%>% write.csv("~/Desktop/HOPE/VIF1.csv")
vif(model2)%>% write.csv("~/Desktop/HOPE/VIF2.csv")
vif(model3)%>% write.csv("~/Desktop/HOPE/VIF3.csv")
vif(model4)%>% write.csv("~/Desktop/HOPE/VIF4.csv")
summary(model0)
##
## Call:
## glm(formula = liver ~ ., family = binomial(link = "logit"), data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.3971 -0.9312 -0.4235 1.1086 3.1158
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.3144613 1.3297663 2.493 0.0127 *
## Age -0.0169703 0.0066896 -2.537 0.0112 *
## Gender1 -0.0017461 0.2430674 -0.007 0.9943
## TB -0.0156004 0.0896548 -0.174 0.8619
## DB -0.4042453 0.2383097 -1.696 0.0898 .
## AP -0.0011557 0.0008032 -1.439 0.1502
## ALA -0.0117970 0.0052905 -2.230 0.0258 *
## ASA -0.0026567 0.0032620 -0.814 0.4154
## TP -0.8160842 0.3772941 -2.163 0.0305 *
## ALB 1.4693605 0.7404459 1.984 0.0472 *
## AGR -1.6102326 1.1334127 -1.421 0.1554
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 627.28 on 520 degrees of freedom
## Residual deviance: 524.65 on 510 degrees of freedom
## AIC: 546.65
##
## Number of Fisher Scoring iterations: 7
summary(model1)
##
## Call:
## glm(formula = liver ~ Age + Gender + TB + DB + AP + ALA + ASA +
## TP + AGR, family = binomial(link = "logit"), data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5327 -0.9187 -0.4401 1.1461 3.0900
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.3533782 0.8719983 1.552 0.12065
## Age -0.0174958 0.0066926 -2.614 0.00894 **
## Gender1 -0.0203723 0.2418759 -0.084 0.93288
## TB -0.0128626 0.0874728 -0.147 0.88310
## DB -0.4637723 0.2376742 -1.951 0.05102 .
## AP -0.0010950 0.0007898 -1.387 0.16557
## ALA -0.0097463 0.0050352 -1.936 0.05291 .
## ASA -0.0036749 0.0033333 -1.103 0.27024
## TP -0.0912103 0.1026053 -0.889 0.37403
## AGR 0.4435473 0.3876648 1.144 0.25256
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 627.28 on 520 degrees of freedom
## Residual deviance: 529.05 on 511 degrees of freedom
## AIC: 549.05
##
## Number of Fisher Scoring iterations: 7
summary(model2)
##
## Call:
## glm(formula = liver ~ Age + Gender + TB + DB + AP + ALA + ASA +
## TP + AGR + Age:TB + Age:DB + Age:AP + Age:ALA + Age:ASA +
## Age:TP + Age:AGR, family = binomial(link = "logit"), data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4663 -0.9461 -0.3757 1.1525 3.3528
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.742e+00 2.067e+00 0.843 0.3994
## Age -2.742e-02 4.519e-02 -0.607 0.5440
## Gender1 -8.818e-03 2.440e-01 -0.036 0.9712
## TB 1.403e+00 1.509e+00 0.930 0.3525
## DB -2.735e+00 3.011e+00 -0.908 0.3638
## AP -8.753e-04 2.430e-03 -0.360 0.7187
## ALA -1.652e-02 1.519e-02 -1.088 0.2768
## ASA 6.281e-03 9.338e-03 0.673 0.5012
## TP 4.806e-02 2.811e-01 0.171 0.8643
## AGR -1.826e+00 1.184e+00 -1.542 0.1230
## Age:TB -2.714e-02 3.205e-02 -0.847 0.3971
## Age:DB 4.166e-02 6.255e-02 0.666 0.5053
## Age:AP -1.439e-06 5.210e-05 -0.028 0.9780
## Age:ALA 1.593e-04 3.729e-04 0.427 0.6693
## Age:ASA -2.667e-04 2.331e-04 -1.144 0.2525
## Age:TP -3.005e-03 6.154e-03 -0.488 0.6253
## Age:AGR 5.067e-02 2.498e-02 2.029 0.0425 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 627.28 on 520 degrees of freedom
## Residual deviance: 520.59 on 504 degrees of freedom
## AIC: 554.59
##
## Number of Fisher Scoring iterations: 9
summary(model3)
##
## Call:
## glm(formula = liver ~ Age + DB + AP + ALA, family = binomial(link = "logit"),
## data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.3426 -0.9235 -0.4722 1.1497 3.1303
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.2165115 0.3746680 3.247 0.001167 **
## Age -0.0173652 0.0064726 -2.683 0.007299 **
## DB -0.5593911 0.1678835 -3.332 0.000862 ***
## AP -0.0013077 0.0007936 -1.648 0.099402 .
## ALA -0.0131829 0.0039618 -3.327 0.000876 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 627.28 on 520 degrees of freedom
## Residual deviance: 531.96 on 516 degrees of freedom
## AIC: 541.96
##
## Number of Fisher Scoring iterations: 7
summary(model4)
##
## Call:
## glm(formula = liver ~ Age + DB + AP + ALA + DB:AP, family = binomial(link = "logit"),
## data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4065 -0.9325 -0.4358 1.1243 2.9207
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.5200734 0.3972913 3.826 0.000130 ***
## Age -0.0177281 0.0065430 -2.709 0.006739 **
## DB -0.9578932 0.2299968 -4.165 3.12e-05 ***
## AP -0.0022573 0.0008496 -2.657 0.007884 **
## ALA -0.0134353 0.0039264 -3.422 0.000622 ***
## DB:AP 0.0009647 0.0002361 4.086 4.40e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 627.28 on 520 degrees of freedom
## Residual deviance: 524.63 on 515 degrees of freedom
## AIC: 536.63
##
## Number of Fisher Scoring iterations: 7
out<-cooks.distance(model0);length(which(out>=1))
## [1] 0
out<-cooks.distance(model1);length(which(out>=1))
## [1] 1
out<-cooks.distance(model2);length(which(out>=1))
## [1] 0
out<-cooks.distance(model3);length(which(out>=1))
## [1] 0
out<-cooks.distance(model4);length(which(out>=1))
## [1] 0
