logisitic regression

##临床预测模型包括临床诊断模型及临床预测模型。临床诊断模型可通过R语言实现，从大量的自变量中筛选出有意义的自变量，找到最高效的 ##临床诊断公式。

##本人将继续在此公众号发表R语言相关的统计、TCGA、GEO等相关数据处理，欢迎大家关注并转发给需要的人，谢谢。

knitr::include_graphics(path = './1.jpg')

###logisitic regression ##step1筛选变量 ##step2模型评价 ##step3模型验证

##使用数据来源于MASS包中的biopsy数据集，了解更多请??biopsy

rm(list = ls())
library(MASS)
data<-biopsy

##??biopsy

##查看数据

class(data)

## [1] "data.frame"

dim(data)

## [1] 699  11

str(data)

## 'data.frame':    699 obs. of  11 variables:
##  $ ID   : chr  "1000025" "1002945" "1015425" "1016277" ...
##  $ V1   : int  5 5 3 6 4 8 1 2 2 4 ...
##  $ V2   : int  1 4 1 8 1 10 1 1 1 2 ...
##  $ V3   : int  1 4 1 8 1 10 1 2 1 1 ...
##  $ V4   : int  1 5 1 1 3 8 1 1 1 1 ...
##  $ V5   : int  2 7 2 3 2 7 2 2 2 2 ...
##  $ V6   : int  1 10 2 4 1 10 10 1 1 1 ...
##  $ V7   : int  3 3 3 3 3 9 3 3 1 2 ...
##  $ V8   : int  1 2 1 7 1 7 1 1 1 1 ...
##  $ V9   : int  1 1 1 1 1 1 1 1 5 1 ...
##  $ class: Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 1 1 1 ...

##数据整理去NA

data<-data[,-1]
data<-na.omit(data)

##数据整理更换列名

colnames(data)

##  [1] "V1"    "V2"    "V3"    "V4"    "V5"    "V6"    "V7"    "V8"    "V9"   
## [10] "class"

names(data)<-c("thickness","size","shape","adhesion","epithelial_cell_size","nuclei","chromatin"
               ,"nucleoli","mitoses","status")

##数据整理

table(data$status)

## 
##    benign malignant 
##       444       239

data$status<-ifelse(data$status=="benign",0,1)
data$status<-factor(data$status)
class(data$status)

## [1] "factor"

library(dplyr)

## 
## 载入程辑包：'dplyr'

## The following object is masked from 'package:MASS':
## 
##     select

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

data<-select(data,status,size,epithelial_cell_size,everything())

data$thickness<-ifelse(data$thickness>=mean(data$thickness),"High","Low")

data$shape<-ifelse(data$shape>=mean(data$shape),"High","Low")

data$adhesion<-ifelse(data$adhesion>=mean(data$adhesion),"High","Low")

data$nuclei<-ifelse(data$nuclei>=mean(data$nuclei),"High","Low")

data$chromatin<-ifelse(data$chromatin>=mean(data$chromatin),"High","Low")

data$nucleoli<-ifelse(data$nucleoli>=mean(data$nucleoli),"High","Low")

data$mitoses<-ifelse(data$mitoses>=mean(data$mitoses),"High","Low")

str(data)

## 'data.frame':    683 obs. of  10 variables:
##  $ status              : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
##  $ size                : int  1 4 1 8 1 10 1 1 1 2 ...
##  $ epithelial_cell_size: int  2 7 2 3 2 7 2 2 2 2 ...
##  $ thickness           : chr  "High" "High" "Low" "High" ...
##  $ shape               : chr  "Low" "High" "Low" "High" ...
##  $ adhesion            : chr  "Low" "High" "Low" "Low" ...
##  $ nuclei              : chr  "Low" "High" "Low" "High" ...
##  $ chromatin           : chr  "Low" "Low" "Low" "Low" ...
##  $ nucleoli            : chr  "Low" "Low" "Low" "High" ...
##  $ mitoses             : chr  "Low" "Low" "Low" "Low" ...
##  - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
##   ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...

for (i in names(data)[c(2:3)]){data[,i] <- as.numeric(data[,i])}
for (i in names(data)[c(4:10)]) {data[,i] <- as.factor(data[,i])}
str(data)

## 'data.frame':    683 obs. of  10 variables:
##  $ status              : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
##  $ size                : num  1 4 1 8 1 10 1 1 1 2 ...
##  $ epithelial_cell_size: num  2 7 2 3 2 7 2 2 2 2 ...
##  $ thickness           : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
##  $ shape               : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
##  $ adhesion            : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
##  $ nuclei              : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
##  $ chromatin           : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
##  $ nucleoli            : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
##  $ mitoses             : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
##  - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
##   ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...

save(data,file = "data.Rdata")

##step1筛选变量

#方法1 使用常见的检验方法（T、卡方）

rm(list = ls())
load('./data.Rdata')
names(data)

##  [1] "status"               "size"                 "epithelial_cell_size"
##  [4] "thickness"            "shape"                "adhesion"            
##  [7] "nuclei"               "chromatin"            "nucleoli"            
## [10] "mitoses"

##连续自变量（数值变量）
x1<-c("size","epithelial_cell_size")

##分类变量
x2<-c("thickness","shape","adhesion","nuclei","chromatin","nucleoli","mitoses")

library(tableone)
table1<-CreateTableOne(vars = c(x1,x2),strata = "status",data = data,
                       factorVars = x2,test = T,addOverall = T )
result1<-print(table1,showAllLevels = F)

##                                   Stratified by status
##                                    Overall      0            1           
##   n                                 683          444          239        
##   size (mean (SD))                 3.15 (3.07)  1.31 (0.86)  6.58 (2.72) 
##   epithelial_cell_size (mean (SD)) 3.23 (2.22)  2.11 (0.88)  5.33 (2.44) 
##   thickness = Low (%)               372 (54.5)   341 (76.8)    31 (13.0) 
##   shape = Low (%)                   457 (66.9)   425 (95.7)    32 (13.4) 
##   adhesion = Low (%)                451 (66.0)   400 (90.1)    51 (21.3) 
##   nuclei = Low (%)                  460 (67.3)   422 (95.0)    38 (15.9) 
##   chromatin = Low (%)               471 (69.0)   426 (95.9)    45 (18.8) 
##   nucleoli = Low (%)                468 (68.5)   421 (94.8)    47 (19.7) 
##   mitoses = Low (%)                 563 (82.4)   431 (97.1)   132 (55.2) 
##                                   Stratified by status
##                                    p      test
##   n                                           
##   size (mean (SD))                 <0.001     
##   epithelial_cell_size (mean (SD)) <0.001     
##   thickness = Low (%)              <0.001     
##   shape = Low (%)                  <0.001     
##   adhesion = Low (%)               <0.001     
##   nuclei = Low (%)                 <0.001     
##   chromatin = Low (%)              <0.001     
##   nucleoli = Low (%)               <0.001     
##   mitoses = Low (%)                <0.001

result1<-print(table1,showAllLevels = T)

##                                   Stratified by status
##                                    level Overall      0            1           
##   n                                       683          444          239        
##   size (mean (SD))                       3.15 (3.07)  1.31 (0.86)  6.58 (2.72) 
##   epithelial_cell_size (mean (SD))       3.23 (2.22)  2.11 (0.88)  5.33 (2.44) 
##   thickness (%)                    High   311 (45.5)   103 (23.2)   208 (87.0) 
##                                    Low    372 (54.5)   341 (76.8)    31 (13.0) 
##   shape (%)                        High   226 (33.1)    19 ( 4.3)   207 (86.6) 
##                                    Low    457 (66.9)   425 (95.7)    32 (13.4) 
##   adhesion (%)                     High   232 (34.0)    44 ( 9.9)   188 (78.7) 
##                                    Low    451 (66.0)   400 (90.1)    51 (21.3) 
##   nuclei (%)                       High   223 (32.7)    22 ( 5.0)   201 (84.1) 
##                                    Low    460 (67.3)   422 (95.0)    38 (15.9) 
##   chromatin (%)                    High   212 (31.0)    18 ( 4.1)   194 (81.2) 
##                                    Low    471 (69.0)   426 (95.9)    45 (18.8) 
##   nucleoli (%)                     High   215 (31.5)    23 ( 5.2)   192 (80.3) 
##                                    Low    468 (68.5)   421 (94.8)    47 (19.7) 
##   mitoses (%)                      High   120 (17.6)    13 ( 2.9)   107 (44.8) 
##                                    Low    563 (82.4)   431 (97.1)   132 (55.2) 
##                                   Stratified by status
##                                    p      test
##   n                                           
##   size (mean (SD))                 <0.001     
##   epithelial_cell_size (mean (SD)) <0.001     
##   thickness (%)                    <0.001     
##                                               
##   shape (%)                        <0.001     
##                                               
##   adhesion (%)                     <0.001     
##                                               
##   nuclei (%)                       <0.001     
##                                               
##   chromatin (%)                    <0.001     
##                                               
##   nucleoli (%)                     <0.001     
##                                               
##   mitoses (%)                      <0.001     
##

###同时也可以设定某个变量的计算方式
nonnormalvars = c("size")##非参数检验变量
exactvars <- c("epithelial_cell_size")##确切检验变量

result1<-print(table1,nonnormal = nonnormalvars, #指定非正态连续变量
                       exact = exactvars,  #指定需要Fisher确切法统计的变量
                       showAllLevels = T,  #TRUE则显示所有分类变量水平的频数和百分比
                       noSpaces = TRUE, #删除用于对齐的空格
                       printToggle = T) #不展示输出结果

##                                   Stratified by status
##                                    level Overall           0                
##   n                                      683               444              
##   size (median [IQR])                    1.00 [1.00, 5.00] 1.00 [1.00, 1.00]
##   epithelial_cell_size (mean (SD))       3.23 (2.22)       2.11 (0.88)      
##   thickness (%)                    High  311 (45.5)        103 (23.2)       
##                                    Low   372 (54.5)        341 (76.8)       
##   shape (%)                        High  226 (33.1)        19 (4.3)         
##                                    Low   457 (66.9)        425 (95.7)       
##   adhesion (%)                     High  232 (34.0)        44 (9.9)         
##                                    Low   451 (66.0)        400 (90.1)       
##   nuclei (%)                       High  223 (32.7)        22 (5.0)         
##                                    Low   460 (67.3)        422 (95.0)       
##   chromatin (%)                    High  212 (31.0)        18 (4.1)         
##                                    Low   471 (69.0)        426 (95.9)       
##   nucleoli (%)                     High  215 (31.5)        23 (5.2)         
##                                    Low   468 (68.5)        421 (94.8)       
##   mitoses (%)                      High  120 (17.6)        13 (2.9)         
##                                    Low   563 (82.4)        431 (97.1)       
##                                   Stratified by status
##                                    1                  p      test   
##   n                                239                              
##   size (median [IQR])              6.00 [4.00, 10.00] <0.001 nonnorm
##   epithelial_cell_size (mean (SD)) 5.33 (2.44)        <0.001        
##   thickness (%)                    208 (87.0)         <0.001        
##                                    31 (13.0)                        
##   shape (%)                        207 (86.6)         <0.001        
##                                    32 (13.4)                        
##   adhesion (%)                     188 (78.7)         <0.001        
##                                    51 (21.3)                        
##   nuclei (%)                       201 (84.1)         <0.001        
##                                    38 (15.9)                        
##   chromatin (%)                    194 (81.2)         <0.001        
##                                    45 (18.8)                        
##   nucleoli (%)                     192 (80.3)         <0.001        
##                                    47 (19.7)                        
##   mitoses (%)                      107 (44.8)         <0.001        
##                                    132 (55.2)

###由以上结果可以看出9个变量均有统计学差异，因此都可纳入模型验证。

###方法2 单因素logistic回归分析

rm(list = ls())
load("./data.Rdata")
library(rms)

## 载入需要的程辑包：Hmisc

## 载入需要的程辑包：lattice

## 载入需要的程辑包：survival

## 载入需要的程辑包：Formula

## 载入需要的程辑包：ggplot2

## 
## 载入程辑包：'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

## 载入需要的程辑包：SparseM

## 
## 载入程辑包：'SparseM'

## The following object is masked from 'package:base':
## 
##     backsolve

modle<-glm(status~.,data = data,family = binomial())

summary(modle)

## 
## Call:
## glm(formula = status ~ ., family = binomial(), data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.4566  -0.1030  -0.0817   0.0589   2.4288  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           4.12476    1.36180   3.029  0.00245 ** 
## size                  0.36416    0.16403   2.220  0.02642 *  
## epithelial_cell_size  0.09753    0.14239   0.685  0.49340    
## thicknessLow         -1.53354    0.53706  -2.855  0.00430 ** 
## shapeLow             -1.27029    0.64410  -1.972  0.04859 *  
## adhesionLow          -0.67813    0.54362  -1.247  0.21224    
## nucleiLow            -2.22493    0.50767  -4.383 1.17e-05 ***
## chromatinLow         -1.91294    0.53155  -3.599  0.00032 ***
## nucleoliLow          -1.12753    0.52714  -2.139  0.03244 *  
## mitosesLow           -1.63792    0.68457  -2.393  0.01673 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 884.35  on 682  degrees of freedom
## Residual deviance: 126.18  on 673  degrees of freedom
## AIC: 146.18
## 
## Number of Fisher Scoring iterations: 7

modle$coefficients

##          (Intercept)                 size epithelial_cell_size 
##           4.12476237           0.36415903           0.09752513 
##         thicknessLow             shapeLow          adhesionLow 
##          -1.53353629          -1.27029329          -0.67812671 
##            nucleiLow         chromatinLow          nucleoliLow 
##          -2.22492599          -1.91294051          -1.12752674 
##           mitosesLow 
##          -1.63792441

exp(cbind("OR"=coef(modle),confint(modle)))

## Waiting for profiling to be done...

##                              OR      2.5 %      97.5 %
## (Intercept)          61.8531093 4.41857754 947.9079276
## size                  1.4393031 1.07466010   2.0589840
## epithelial_cell_size  1.1024391 0.83632218   1.4672771
## thicknessLow          0.2157713 0.07256867   0.6110435
## shapeLow              0.2807493 0.08020564   1.0109500
## adhesionLow           0.5075669 0.17741815   1.5259506
## nucleiLow             0.1080754 0.03906304   0.2907083
## chromatinLow          0.1476456 0.05091027   0.4167445
## nucleoliLow           0.3238332 0.11535887   0.9263018
## mitosesLow            0.1943831 0.04823036   0.7187466

##AIC: 146.18 ##解释，size每增加1cm,恶性风险增加1.4393031倍。 ##thickness Low是high组恶性风险的0.2157713倍。

colnames(data)

##  [1] "status"               "size"                 "epithelial_cell_size"
##  [4] "thickness"            "shape"                "adhesion"            
##  [7] "nuclei"               "chromatin"            "nucleoli"            
## [10] "mitoses"

modlenew<-glm(status~size+thickness+shape+nuclei+chromatin+nucleoli+mitoses,data = data,family = binomial())

summary(modlenew)

## 
## Call:
## glm(formula = status ~ size + thickness + shape + nuclei + chromatin + 
##     nucleoli + mitoses, family = binomial(), data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.3655  -0.1072  -0.0858   0.0667   2.5741  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    3.8738     1.2259   3.160  0.00158 ** 
## size           0.4466     0.1633   2.735  0.00623 ** 
## thicknessLow  -1.6101     0.5286  -3.046  0.00232 ** 
## shapeLow      -1.3560     0.6372  -2.128  0.03333 *  
## nucleiLow     -2.3269     0.5031  -4.625 3.75e-06 ***
## chromatinLow  -1.9142     0.5365  -3.568  0.00036 ***
## nucleoliLow   -1.2903     0.5165  -2.498  0.01249 *  
## mitosesLow    -1.4256     0.6585  -2.165  0.03038 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 884.35  on 682  degrees of freedom
## Residual deviance: 128.42  on 675  degrees of freedom
## AIC: 144.42
## 
## Number of Fisher Scoring iterations: 7

modlenew$coefficients

##  (Intercept)         size thicknessLow     shapeLow    nucleiLow chromatinLow 
##    3.8737617    0.4465546   -1.6101132   -1.3560239   -2.3268944   -1.9142429 
##  nucleoliLow   mitosesLow 
##   -1.2902597   -1.4256451

exp(cbind("OR"=coef(modlenew),confint(modlenew)))

## Waiting for profiling to be done...

##                       OR      2.5 %      97.5 %
## (Intercept)  48.12307098 4.70500122 580.5895696
## size          1.56291798 1.16995577   2.2243591
## thicknessLow  0.19986500 0.06823304   0.5555434
## shapeLow      0.25768332 0.07410375   0.9089832
## nucleiLow     0.09759838 0.03555500   0.2598215
## chromatinLow  0.14745343 0.05040969   0.4199752
## nucleoliLow   0.27519932 0.09991303   0.7694625
## mitosesLow    0.24035335 0.06326596   0.8505815

##AIC: 144.42,较前下降

##如果已知某自变量x与应变量Y的关系，那其他的自变量如何纳入研究呢？

rm(list = ls())
load("./data.Rdata")
#colnames(data)
##连续自变量（数值变量）
#x1<-c("size","epithelial_cell_size")

##分类变量
#x2<-c("thickness","shape","adhesion","nuclei","chromatin","nucleoli","mitoses")
#假设已知size与status关系，需要研究其他的变量

trans<-function(x){
  modle1<-glm(status~size,data = data,family = binomial())
  coef1<-coef(modle1)[2]
  form<-as.formula(paste0("status~size+",x))
  modle2<-glm(form,data = data,family = binomial())
   coef2<-coef(modle2)[2]
   ratio<-abs(coef2-coef1)/coef1
   if (ratio>0.1) {return(x)}
}

x<-colnames(data)
x<-x[-which(x==c("status","size"))]
#x<-as.character(x)

lapply(x,trans)

## [[1]]
## [1] "epithelial_cell_size"
## 
## [[2]]
## [1] "thickness"
## 
## [[3]]
## [1] "shape"
## 
## [[4]]
## [1] "adhesion"
## 
## [[5]]
## [1] "nuclei"
## 
## [[6]]
## [1] "chromatin"
## 
## [[7]]
## [1] "nucleoli"
## 
## [[8]]
## NULL

##依据得到的结果可继续使用多因素逻辑回归
#"epithelial_cell_size" "thickness" "shape" "adhesion" "nuclei" "chromatin" "nucleoli"

###方法3 LASSO回归分析，一般临床少量自变量可不采用LASSO回归，而如果是组学数据，由于存在共线性的问题可选用LASSO回归，具有筛选变量的功能。

rm(list = ls())
library(MASS)
library(dplyr)
data<-biopsy%>%
  na.omit()%>%
  select(-1)
colnames(data)<-c("thickness","size","shape","adhesion","epithelial_cell_size","nuclei","chromatin"
               ,"nucleoli","mitoses","status")
data<-select(data,"status","size","epithelial_cell_size",everything())

data$thickness<-ifelse(data$thickness>=mean(data$thickness),1,0)

data$shape<-ifelse(data$shape>=mean(data$shape),1,0)

data$adhesion<-ifelse(data$adhesion>=mean(data$adhesion),1,0)

data$nuclei<-ifelse(data$nuclei>=mean(data$nuclei),1,0)

data$chromatin<-ifelse(data$chromatin>=mean(data$chromatin),1,0)

data$nucleoli<-ifelse(data$nucleoli>=mean(data$nucleoli),1,0)

data$mitoses<-ifelse(data$mitoses>=mean(data$mitoses),1,0)



library(glmnet)##LASS0/弹性网络

## 载入需要的程辑包：Matrix

## Loaded glmnet 4.1-2

library(caret)

## 
## 载入程辑包：'caret'

## The following object is masked from 'package:survival':
## 
##     cluster

x<-as.matrix(data[,2:10])
y<-as.matrix(data[,1])

lasso<-glmnet(x,y,family = "binomial",
                alpha = 1,#1为LASSO回归，0为岭回归
                nlambda = 1000)
View(print(lasso)) #停止在580

## 
## Call:  glmnet(x = x, y = y, family = "binomial", alpha = 1, nlambda = 1000) 
## 
##     Df  %Dev  Lambda
## 1    0  0.00 0.39800
## 2    1  0.98 0.39440
## 3    1  1.94 0.39080
## 4    1  2.88 0.38720
## 5    1  3.80 0.38360
## 6    1  4.70 0.38010
## 7    1  5.58 0.37660
## 8    1  6.45 0.37320
## 9    1  7.30 0.36970
## 10   1  8.13 0.36630
## 11   1  8.94 0.36300
## 12   2  9.81 0.35960
## 13   2 10.67 0.35630
## 14   2 11.51 0.35310
## 15   2 12.34 0.34980
## 16   2 13.15 0.34660
## 17   3 13.98 0.34340
## 18   3 14.84 0.34030
## 19   3 15.69 0.33720
## 20   3 16.53 0.33410
## 21   3 17.35 0.33100
## 22   3 18.15 0.32800
## 23   3 18.94 0.32500
## 24   3 19.72 0.32200
## 25   3 20.48 0.31900
## 26   3 21.23 0.31610
## 27   3 21.97 0.31320
## 28   4 22.72 0.31030
## 29   4 23.45 0.30750
## 30   4 24.19 0.30460
## 31   4 24.91 0.30190
## 32   4 25.61 0.29910
## 33   4 26.31 0.29630
## 34   4 27.00 0.29360
## 35   4 27.67 0.29090
## 36   4 28.33 0.28830
## 37   4 28.99 0.28560
## 38   4 29.63 0.28300
## 39   4 30.27 0.28040
## 40   4 30.89 0.27780
## 41   4 31.51 0.27530
## 42   4 32.11 0.27270
## 43   4 32.71 0.27020
## 44   4 33.30 0.26780
## 45   4 33.88 0.26530
## 46   4 34.45 0.26290
## 47   4 35.01 0.26050
## 48   4 35.56 0.25810
## 49   4 36.11 0.25570
## 50   4 36.65 0.25330
## 51   4 37.18 0.25100
## 52   4 37.71 0.24870
## 53   4 38.22 0.24640
## 54   4 38.73 0.24420
## 55   4 39.23 0.24190
## 56   4 39.73 0.23970
## 57   5 40.23 0.23750
## 58   5 40.72 0.23530
## 59   5 41.21 0.23320
## 60   5 41.70 0.23100
## 61   5 42.18 0.22890
## 62   5 42.65 0.22680
## 63   5 43.12 0.22470
## 64   5 43.58 0.22270
## 65   5 44.04 0.22060
## 66   5 44.48 0.21860
## 67   5 44.93 0.21660
## 68   5 45.36 0.21460
## 69   5 45.80 0.21260
## 70   5 46.22 0.21070
## 71   5 46.64 0.20880
## 72   5 47.06 0.20680
## 73   5 47.47 0.20490
## 74   5 47.87 0.20310
## 75   5 48.27 0.20120
## 76   5 48.67 0.19930
## 77   5 49.06 0.19750
## 78   5 49.44 0.19570
## 79   5 49.82 0.19390
## 80   5 50.20 0.19210
## 81   5 50.57 0.19040
## 82   5 50.93 0.18860
## 83   5 51.29 0.18690
## 84   5 51.65 0.18520
## 85   5 52.01 0.18350
## 86   5 52.35 0.18180
## 87   5 52.70 0.18010
## 88   5 53.04 0.17850
## 89   5 53.38 0.17680
## 90   5 53.71 0.17520
## 91   5 54.04 0.17360
## 92   5 54.36 0.17200
## 93   5 54.68 0.17040
## 94   5 55.00 0.16890
## 95   5 55.31 0.16730
## 96   5 55.62 0.16580
## 97   5 55.93 0.16430
## 98   5 56.23 0.16280
## 99   5 56.53 0.16130
## 100  5 56.83 0.15980
## 101  5 57.12 0.15830
## 102  5 57.41 0.15690
## 103  5 57.69 0.15540
## 104  5 57.97 0.15400
## 105  5 58.25 0.15260
## 106  5 58.53 0.15120
## 107  5 58.80 0.14980
## 108  5 59.07 0.14840
## 109  5 59.34 0.14710
## 110  5 59.60 0.14570
## 111  5 59.86 0.14440
## 112  5 60.12 0.14300
## 113  5 60.37 0.14170
## 114  5 60.63 0.14040
## 115  5 60.88 0.13910
## 116  5 61.12 0.13790
## 117  5 61.37 0.13660
## 118  5 61.61 0.13530
## 119  5 61.85 0.13410
## 120  5 62.08 0.13290
## 121  5 62.31 0.13170
## 122  5 62.54 0.13040
## 123  5 62.77 0.12920
## 124  5 63.00 0.12810
## 125  5 63.22 0.12690
## 126  5 63.44 0.12570
## 127  5 63.66 0.12460
## 128  5 63.88 0.12340
## 129  5 64.09 0.12230
## 130  5 64.30 0.12120
## 131  5 64.51 0.12010
## 132  5 64.72 0.11900
## 133  5 64.92 0.11790
## 134  5 65.12 0.11680
## 135  5 65.32 0.11570
## 136  5 65.52 0.11460
## 137  5 65.72 0.11360
## 138  5 65.91 0.11260
## 139  5 66.10 0.11150
## 140  5 66.29 0.11050
## 141  5 66.48 0.10950
## 142  5 66.66 0.10850
## 143  5 66.85 0.10750
## 144  5 67.03 0.10650
## 145  5 67.21 0.10550
## 146  5 67.39 0.10460
## 147  5 67.56 0.10360
## 148  5 67.74 0.10260
## 149  5 67.91 0.10170
## 150  5 68.08 0.10080
## 151  5 68.25 0.09984
## 152  5 68.41 0.09893
## 153  5 68.58 0.09802
## 154  5 68.74 0.09712
## 155  5 68.90 0.09623
## 156  5 69.06 0.09534
## 157  5 69.22 0.09447
## 158  5 69.38 0.09360
## 159  5 69.53 0.09274
## 160  5 69.69 0.09189
## 161  5 69.84 0.09105
## 162  5 69.99 0.09021
## 163  5 70.14 0.08939
## 164  5 70.28 0.08856
## 165  6 70.43 0.08775
## 166  6 70.59 0.08695
## 167  6 70.74 0.08615
## 168  6 70.89 0.08536
## 169  6 71.05 0.08457
## 170  6 71.20 0.08380
## 171  6 71.34 0.08303
## 172  6 71.49 0.08227
## 173  6 71.64 0.08151
## 174  6 71.78 0.08076
## 175  6 71.92 0.08002
## 176  6 72.06 0.07929
## 177  6 72.20 0.07856
## 178  6 72.34 0.07784
## 179  6 72.48 0.07713
## 180  6 72.61 0.07642
## 181  6 72.75 0.07572
## 182  6 72.88 0.07502
## 183  6 73.01 0.07433
## 184  6 73.14 0.07365
## 185  6 73.27 0.07298
## 186  6 73.39 0.07231
## 187  6 73.52 0.07164
## 188  6 73.64 0.07098
## 189  6 73.77 0.07033
## 190  6 73.89 0.06969
## 191  6 74.01 0.06905
## 192  6 74.13 0.06841
## 193  6 74.25 0.06779
## 194  6 74.36 0.06716
## 195  6 74.48 0.06655
## 196  6 74.59 0.06594
## 197  6 74.71 0.06533
## 198  6 74.82 0.06473
## 199  6 74.93 0.06414
## 200  6 75.04 0.06355
## 201  6 75.15 0.06297
## 202  6 75.26 0.06239
## 203  6 75.36 0.06182
## 204  6 75.47 0.06125
## 205  6 75.57 0.06069
## 206  6 75.67 0.06013
## 207  6 75.78 0.05958
## 208  6 75.88 0.05903
## 209  6 75.98 0.05849
## 210  6 76.08 0.05795
## 211  7 76.18 0.05742
## 212  7 76.27 0.05689
## 213  7 76.37 0.05637
## 214  7 76.47 0.05585
## 215  7 76.56 0.05534
## 216  7 76.66 0.05483
## 217  7 76.75 0.05433
## 218  7 76.84 0.05383
## 219  7 76.94 0.05334
## 220  7 77.03 0.05285
## 221  7 77.12 0.05236
## 222  7 77.21 0.05188
## 223  7 77.29 0.05141
## 224  7 77.38 0.05094
## 225  7 77.47 0.05047
## 226  7 77.55 0.05001
## 227  7 77.64 0.04955
## 228  7 77.72 0.04909
## 229  8 77.81 0.04864
## 230  8 77.89 0.04819
## 231  8 77.98 0.04775
## 232  8 78.06 0.04731
## 233  8 78.15 0.04688
## 234  8 78.23 0.04645
## 235  8 78.31 0.04602
## 236  8 78.39 0.04560
## 237  8 78.48 0.04518
## 238  8 78.56 0.04477
## 239  8 78.63 0.04436
## 240  8 78.71 0.04395
## 241  8 78.79 0.04355
## 242  8 78.87 0.04315
## 243  8 78.94 0.04275
## 244  8 79.02 0.04236
## 245  8 79.09 0.04197
## 246  8 79.17 0.04158
## 247  8 79.24 0.04120
## 248  8 79.31 0.04082
## 249  8 79.38 0.04045
## 250  8 79.45 0.04008
## 251  8 79.52 0.03971
## 252  8 79.59 0.03935
## 253  8 79.66 0.03899
## 254  8 79.73 0.03863
## 255  8 79.80 0.03827
## 256  8 79.86 0.03792
## 257  8 79.93 0.03757
## 258  8 80.00 0.03723
## 259  8 80.06 0.03689
## 260  8 80.12 0.03655
## 261  8 80.19 0.03621
## 262  8 80.25 0.03588
## 263  8 80.31 0.03555
## 264  8 80.37 0.03523
## 265  8 80.43 0.03490
## 266  8 80.49 0.03458
## 267  8 80.55 0.03426
## 268  8 80.61 0.03395
## 269  8 80.67 0.03364
## 270  8 80.73 0.03333
## 271  8 80.79 0.03302
## 272  8 80.84 0.03272
## 273  8 80.90 0.03242
## 274  8 80.95 0.03212
## 275  8 81.01 0.03183
## 276  8 81.06 0.03154
## 277  8 81.12 0.03125
## 278  8 81.17 0.03096
## 279  8 81.22 0.03068
## 280  8 81.27 0.03039
## 281  8 81.33 0.03012
## 282  8 81.38 0.02984
## 283  8 81.43 0.02957
## 284  8 81.48 0.02929
## 285  8 81.53 0.02903
## 286  8 81.57 0.02876
## 287  8 81.62 0.02850
## 288  8 81.67 0.02823
## 289  8 81.72 0.02797
## 290  8 81.76 0.02772
## 291  8 81.81 0.02746
## 292  8 81.86 0.02721
## 293  8 81.90 0.02696
## 294  8 81.95 0.02671
## 295  8 81.99 0.02647
## 296  8 82.04 0.02623
## 297  8 82.08 0.02599
## 298  8 82.12 0.02575
## 299  8 82.16 0.02551
## 300  8 82.21 0.02528
## 301  8 82.25 0.02504
## 302  8 82.29 0.02481
## 303  8 82.33 0.02459
## 304  8 82.37 0.02436
## 305  8 82.41 0.02414
## 306  8 82.45 0.02392
## 307  8 82.49 0.02370
## 308  8 82.53 0.02348
## 309  9 82.57 0.02326
## 310  9 82.60 0.02305
## 311  9 82.64 0.02284
## 312  9 82.68 0.02263
## 313  9 82.72 0.02242
## 314  9 82.75 0.02222
## 315  9 82.79 0.02201
## 316  9 82.83 0.02181
## 317  9 82.86 0.02161
## 318  9 82.90 0.02141
## 319  9 82.93 0.02121
## 320  9 82.97 0.02102
## 321  9 83.00 0.02083
## 322  9 83.03 0.02064
## 323  9 83.07 0.02045
## 324  9 83.10 0.02026
## 325  9 83.13 0.02007
## 326  9 83.16 0.01989
## 327  9 83.20 0.01971
## 328  9 83.23 0.01953
## 329  9 83.26 0.01935
## 330  9 83.29 0.01917
## 331  9 83.32 0.01899
## 332  9 83.35 0.01882
## 333  9 83.38 0.01865
## 334  9 83.41 0.01847
## 335  9 83.44 0.01831
## 336  9 83.47 0.01814
## 337  9 83.49 0.01797
## 338  9 83.52 0.01781
## 339  9 83.55 0.01764
## 340  9 83.58 0.01748
## 341  9 83.61 0.01732
## 342  9 83.63 0.01716
## 343  9 83.66 0.01700
## 344  9 83.69 0.01685
## 345  9 83.71 0.01669
## 346  9 83.74 0.01654
## 347  9 83.76 0.01639
## 348  9 83.79 0.01624
## 349  9 83.81 0.01609
## 350  9 83.84 0.01594
## 351  9 83.86 0.01579
## 352  9 83.89 0.01565
## 353  9 83.91 0.01551
## 354  9 83.93 0.01536
## 355  9 83.96 0.01522
## 356  9 83.98 0.01508
## 357  9 84.00 0.01494
## 358  9 84.02 0.01481
## 359  9 84.05 0.01467
## 360  9 84.07 0.01454
## 361  9 84.09 0.01440
## 362  9 84.11 0.01427
## 363  9 84.13 0.01414
## 364  9 84.15 0.01401
## 365  9 84.17 0.01388
## 366  9 84.19 0.01375
## 367  9 84.22 0.01363
## 368  9 84.24 0.01350
## 369  9 84.25 0.01338
## 370  9 84.27 0.01326
## 371  9 84.29 0.01314
## 372  9 84.31 0.01301
## 373  9 84.33 0.01290
## 374  9 84.35 0.01278
## 375  9 84.37 0.01266
## 376  9 84.39 0.01254
## 377  9 84.40 0.01243
## 378  9 84.42 0.01231
## 379  9 84.44 0.01220
## 380  9 84.46 0.01209
## 381  9 84.47 0.01198
## 382  9 84.49 0.01187
## 383  9 84.51 0.01176
## 384  9 84.52 0.01165
## 385  9 84.54 0.01154
## 386  9 84.56 0.01144
## 387  9 84.57 0.01133
## 388  9 84.59 0.01123
## 389  9 84.60 0.01113
## 390  9 84.62 0.01102
## 391  9 84.64 0.01092
## 392  9 84.65 0.01082
## 393  9 84.67 0.01072
## 394  9 84.68 0.01063
## 395  9 84.69 0.01053
## 396  9 84.71 0.01043
## 397  9 84.72 0.01034
## 398  9 84.74 0.01024
## 399  9 84.75 0.01015
## 400  9 84.76 0.01005
## 401  9 84.78 0.00996
## 402  9 84.79 0.00987
## 403  9 84.80 0.00978
## 404  9 84.82 0.00969
## 405  9 84.83 0.00960
## 406  9 84.84 0.00951
## 407  9 84.85 0.00942
## 408  9 84.87 0.00934
## 409  9 84.88 0.00925
## 410  9 84.89 0.00917
## 411  9 84.90 0.00908
## 412  9 84.91 0.00900
## 413  9 84.93 0.00892
## 414  9 84.94 0.00884
## 415  9 84.95 0.00876
## 416  9 84.96 0.00868
## 417  9 84.97 0.00860
## 418  9 84.98 0.00852
## 419  9 84.99 0.00844
## 420  9 85.00 0.00836
## 421  9 85.01 0.00828
## 422  9 85.02 0.00821
## 423  9 85.03 0.00813
## 424  9 85.04 0.00806
## 425  9 85.05 0.00798
## 426  9 85.06 0.00791
## 427  9 85.07 0.00784
## 428  9 85.08 0.00777
## 429  9 85.09 0.00769
## 430  9 85.10 0.00762
## 431  9 85.11 0.00755
## 432  9 85.12 0.00749
## 433  9 85.13 0.00742
## 434  9 85.14 0.00735
## 435  9 85.14 0.00728
## 436  9 85.15 0.00721
## 437  9 85.16 0.00715
## 438  9 85.17 0.00708
## 439  9 85.18 0.00702
## 440  9 85.19 0.00695
## 441  9 85.19 0.00689
## 442  9 85.20 0.00683
## 443  9 85.21 0.00676
## 444  9 85.22 0.00670
## 445  9 85.23 0.00664
## 446  9 85.23 0.00658
## 447  9 85.24 0.00652
## 448  9 85.25 0.00646
## 449  9 85.25 0.00640
## 450  9 85.26 0.00634
## 451  9 85.27 0.00628
## 452  9 85.28 0.00622
## 453  9 85.28 0.00617
## 454  9 85.29 0.00611
## 455  9 85.30 0.00605
## 456  9 85.30 0.00600
## 457  9 85.31 0.00594
## 458  9 85.32 0.00589
## 459  9 85.32 0.00584
## 460  9 85.33 0.00578
## 461  9 85.33 0.00573
## 462  9 85.34 0.00568
## 463  9 85.35 0.00562
## 464  9 85.35 0.00557
## 465  9 85.36 0.00552
## 466  9 85.36 0.00547
## 467  9 85.37 0.00542
## 468  9 85.37 0.00537
## 469  9 85.38 0.00532
## 470  9 85.38 0.00527
## 471  9 85.39 0.00522
## 472  9 85.40 0.00518
## 473  9 85.40 0.00513
## 474  9 85.41 0.00508
## 475  9 85.41 0.00504
## 476  9 85.42 0.00499
## 477  9 85.42 0.00494
## 478  9 85.43 0.00490
## 479  9 85.43 0.00485
## 480  9 85.43 0.00481
## 481  9 85.44 0.00476
## 482  9 85.44 0.00472
## 483  9 85.45 0.00468
## 484  9 85.45 0.00463
## 485  9 85.46 0.00459
## 486  9 85.46 0.00455
## 487  9 85.47 0.00451
## 488  9 85.47 0.00447
## 489  9 85.47 0.00442
## 490  9 85.48 0.00438
## 491  9 85.48 0.00434
## 492  9 85.49 0.00430
## 493  9 85.49 0.00426
## 494  9 85.49 0.00423
## 495  9 85.50 0.00419
## 496  9 85.50 0.00415
## 497  9 85.50 0.00411
## 498  9 85.51 0.00407
## 499  9 85.51 0.00404
## 500  9 85.51 0.00400
## 501  9 85.52 0.00396
## 502  9 85.52 0.00393
## 503  9 85.52 0.00389
## 504  9 85.53 0.00385
## 505  9 85.53 0.00382
## 506  9 85.53 0.00378
## 507  9 85.54 0.00375
## 508  9 85.54 0.00371
## 509  9 85.54 0.00368
## 510  9 85.55 0.00365
## 511  9 85.55 0.00361
## 512  9 85.55 0.00358
## 513  9 85.56 0.00355
## 514  9 85.56 0.00351
## 515  9 85.56 0.00348
## 516  9 85.56 0.00345
## 517  9 85.57 0.00342
## 518  9 85.57 0.00339
## 519  9 85.57 0.00336
## 520  9 85.57 0.00332
## 521  9 85.58 0.00330
## 522  9 85.58 0.00327
## 523  9 85.58 0.00324
## 524  9 85.58 0.00320
## 525  9 85.59 0.00318
## 526  9 85.59 0.00315
## 527  9 85.59 0.00312
## 528  9 85.59 0.00309
## 529  9 85.60 0.00306
## 530  9 85.60 0.00303
## 531  9 85.60 0.00300
## 532  9 85.60 0.00298
## 533  9 85.60 0.00295
## 534  9 85.61 0.00292
## 535  9 85.61 0.00290
## 536  9 85.61 0.00287
## 537  9 85.61 0.00284
## 538  9 85.61 0.00282
## 539  9 85.62 0.00279
## 540  9 85.62 0.00277
## 541  9 85.62 0.00274
## 542  9 85.62 0.00271
## 543  9 85.62 0.00269
## 544  9 85.63 0.00266
## 545  9 85.63 0.00264
## 546  9 85.63 0.00262
## 547  9 85.63 0.00259
## 548  9 85.63 0.00257
## 549  9 85.63 0.00254
## 550  9 85.64 0.00252
## 551  9 85.64 0.00250
## 552  9 85.64 0.00248
## 553  9 85.64 0.00245
## 554  9 85.64 0.00243
## 555  9 85.64 0.00241
## 556  9 85.64 0.00239
## 557  9 85.65 0.00236
## 558  9 85.65 0.00234
## 559  9 85.65 0.00232
## 560  9 85.65 0.00230
## 561  9 85.65 0.00228
## 562  9 85.65 0.00226
## 563  9 85.65 0.00224
## 564  9 85.66 0.00222
## 565  9 85.66 0.00220
## 566  9 85.66 0.00218
## 567  9 85.66 0.00216
## 568  9 85.66 0.00214
## 569  9 85.66 0.00212
## 570  9 85.66 0.00210
## 571  9 85.66 0.00208
## 572  9 85.67 0.00206
## 573  9 85.67 0.00204
## 574  9 85.67 0.00202
## 575  9 85.67 0.00200
## 576  9 85.67 0.00198
## 577  9 85.67 0.00197
## 578  9 85.67 0.00195
## 579  9 85.67 0.00193
## 580  9 85.67 0.00191

NROW(lasso$lambda)

## [1] 580

lasso$lambda[580]

## [1] 0.001912477

##最优lambda=0.001912477

plot(lasso)

plot(lasso,xvar = "lambda",label = T)

####最优lambda=0.001912477

lasso.coef<-coef(lasso,s=0.001912477)
lasso.coef

## 10 x 1 sparse Matrix of class "dgCMatrix"
##                               s1
## (Intercept)          -5.85678193
## size                  0.34354513
## epithelial_cell_size  0.08104042
## thickness             1.39801994
## shape                 1.25179405
## adhesion              0.60230200
## nuclei                2.12058803
## chromatin             1.80059787
## nucleoli              1.08774083
## mitoses               1.42551555

##可以看出自变量都没有被筛选出来

##方法4逐步回归

rm(list = ls())
load('./data.Rdata')
library(MASS)

#首先建立一个模型

modle<-glm(status~.,data = data,family = binomial())
summary(modle)

## 
## Call:
## glm(formula = status ~ ., family = binomial(), data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.4566  -0.1030  -0.0817   0.0589   2.4288  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           4.12476    1.36180   3.029  0.00245 ** 
## size                  0.36416    0.16403   2.220  0.02642 *  
## epithelial_cell_size  0.09753    0.14239   0.685  0.49340    
## thicknessLow         -1.53354    0.53706  -2.855  0.00430 ** 
## shapeLow             -1.27029    0.64410  -1.972  0.04859 *  
## adhesionLow          -0.67813    0.54362  -1.247  0.21224    
## nucleiLow            -2.22493    0.50767  -4.383 1.17e-05 ***
## chromatinLow         -1.91294    0.53155  -3.599  0.00032 ***
## nucleoliLow          -1.12753    0.52714  -2.139  0.03244 *  
## mitosesLow           -1.63792    0.68457  -2.393  0.01673 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 884.35  on 682  degrees of freedom
## Residual deviance: 126.18  on 673  degrees of freedom
## AIC: 146.18
## 
## Number of Fisher Scoring iterations: 7

exp(cbind("OR"=coef(modle),confint(modle)))

## Waiting for profiling to be done...

##                              OR      2.5 %      97.5 %
## (Intercept)          61.8531093 4.41857754 947.9079276
## size                  1.4393031 1.07466010   2.0589840
## epithelial_cell_size  1.1024391 0.83632218   1.4672771
## thicknessLow          0.2157713 0.07256867   0.6110435
## shapeLow              0.2807493 0.08020564   1.0109500
## adhesionLow           0.5075669 0.17741815   1.5259506
## nucleiLow             0.1080754 0.03906304   0.2907083
## chromatinLow          0.1476456 0.05091027   0.4167445
## nucleoliLow           0.3238332 0.11535887   0.9263018
## mitosesLow            0.1943831 0.04823036   0.7187466

##再使用逐步回归

###向前法逐步回归
modle.forward<-stepAIC(modle,direction = "forward")

## Start:  AIC=146.18
## status ~ size + epithelial_cell_size + thickness + shape + adhesion + 
##     nuclei + chromatin + nucleoli + mitoses

##得到的结果是全部变量，无改变。

###向后法逐步回归
modle.back<-stepAIC(modle,direction = "backward")

## Start:  AIC=146.18
## status ~ size + epithelial_cell_size + thickness + shape + adhesion + 
##     nuclei + chromatin + nucleoli + mitoses
## 
##                        Df Deviance    AIC
## - epithelial_cell_size  1   126.65 144.65
## - adhesion              1   127.68 145.68
## <none>                      126.18 146.18
## - shape                 1   129.96 147.96
## - nucleoli              1   130.58 148.58
## - mitoses               1   132.26 150.26
## - size                  1   132.39 150.39
## - thickness             1   134.48 152.48
## - chromatin             1   139.03 157.03
## - nuclei                1   145.15 163.15
## 
## Step:  AIC=144.65
## status ~ size + thickness + shape + adhesion + nuclei + chromatin + 
##     nucleoli + mitoses
## 
##             Df Deviance    AIC
## - adhesion   1   128.42 144.42
## <none>           126.65 144.65
## - shape      1   131.04 147.04
## - nucleoli   1   131.27 147.27
## - mitoses    1   132.50 148.50
## - size       1   134.79 150.79
## - thickness  1   135.47 151.47
## - chromatin  1   139.76 155.76
## - nuclei     1   145.39 161.39
## 
## Step:  AIC=144.42
## status ~ size + thickness + shape + nuclei + chromatin + nucleoli + 
##     mitoses
## 
##             Df Deviance    AIC
## <none>           128.42 144.42
## - shape      1   132.86 146.86
## - mitoses    1   133.33 147.33
## - nucleoli   1   134.40 148.40
## - thickness  1   137.91 151.91
## - size       1   138.59 152.59
## - chromatin  1   141.11 155.11
## - nuclei     1   149.62 163.62

#Step:  AIC=144.42
#status ~ size + thickness + shape + nuclei + chromatin + nucleoli + mitoses

###向前向后法逐步回归
modle.both<-stepAIC(modle,direction = "both")

## Start:  AIC=146.18
## status ~ size + epithelial_cell_size + thickness + shape + adhesion + 
##     nuclei + chromatin + nucleoli + mitoses
## 
##                        Df Deviance    AIC
## - epithelial_cell_size  1   126.65 144.65
## - adhesion              1   127.68 145.68
## <none>                      126.18 146.18
## - shape                 1   129.96 147.96
## - nucleoli              1   130.58 148.58
## - mitoses               1   132.26 150.26
## - size                  1   132.39 150.39
## - thickness             1   134.48 152.48
## - chromatin             1   139.03 157.03
## - nuclei                1   145.15 163.15
## 
## Step:  AIC=144.65
## status ~ size + thickness + shape + adhesion + nuclei + chromatin + 
##     nucleoli + mitoses
## 
##                        Df Deviance    AIC
## - adhesion              1   128.42 144.42
## <none>                      126.65 144.65
## + epithelial_cell_size  1   126.18 146.18
## - shape                 1   131.04 147.04
## - nucleoli              1   131.27 147.27
## - mitoses               1   132.50 148.50
## - size                  1   134.79 150.79
## - thickness             1   135.47 151.47
## - chromatin             1   139.76 155.76
## - nuclei                1   145.39 161.39
## 
## Step:  AIC=144.42
## status ~ size + thickness + shape + nuclei + chromatin + nucleoli + 
##     mitoses
## 
##                        Df Deviance    AIC
## <none>                      128.42 144.42
## + adhesion              1   126.65 144.65
## + epithelial_cell_size  1   127.68 145.68
## - shape                 1   132.86 146.86
## - mitoses               1   133.33 147.33
## - nucleoli              1   134.40 148.40
## - thickness             1   137.91 151.91
## - size                  1   138.59 152.59
## - chromatin             1   141.11 155.11
## - nuclei                1   149.62 163.62

#Step:  AIC=144.42
#status ~ size + thickness + shape + nuclei + chromatin + nucleoli + mitoses
#结果与向后法一致。

###方法5 最优子集

rm(list = ls())
load('./data.Rdata')
library(bestglm)

## 载入需要的程辑包：leaps

#需要把应变量放在最后一行
data$staus1<-data$status
data<-data[,-1]

bestAIC<-bestglm(data,family = binomial(),IC = "BIC")

## Morgan-Tatar search since family is non-gaussian.

attr(bestAIC$BestModel$terms,"term.labels")

## [1] "size"      "thickness" "nuclei"    "chromatin" "nucleoli"

##筛选出来的自变量包括"size"      "thickness" "nuclei"    "chromatin" "nucleoli"

glm(staus1~ size+thickness+nuclei+chromatin+nucleoli,data = data,family = binomial())

## 
## Call:  glm(formula = staus1 ~ size + thickness + nuclei + chromatin + 
##     nucleoli, family = binomial(), data = data)
## 
## Coefficients:
##  (Intercept)          size  thicknessLow     nucleiLow  chromatinLow  
##       1.3892        0.7065       -1.8117       -2.4841       -1.7778  
##  nucleoliLow  
##      -1.5696  
## 
## Degrees of Freedom: 682 Total (i.e. Null);  677 Residual
## Null Deviance:       884.4 
## Residual Deviance: 136.6     AIC: 148.6

##AIC: 148.6

###方法6 随机森林

rm(list = ls())
library(MASS)
data<-biopsy
data<-data[,-1]
data<-na.omit(data)
library(randomForest)

## randomForest 4.6-14

## Type rfNews() to see new features/changes/bug fixes.

## 
## 载入程辑包：'randomForest'

## The following object is masked from 'package:ggplot2':
## 
##     margin

## The following object is masked from 'package:dplyr':
## 
##     combine

set.seed(1234)
random<-randomForest(class~.,data = data)

random

## 
## Call:
##  randomForest(formula = class ~ ., data = data) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 3
## 
##         OOB estimate of  error rate: 2.49%
## Confusion matrix:
##           benign malignant class.error
## benign       433        11  0.02477477
## malignant      6       233  0.02510460

##OOB estimate of  error rate: 2.49%
##benign class.error 0.02477477
##malignant class.error 0.02510460

plot(random)

##红绿黑线分别表示：良性、恶性、整体

which.min(random$err.rate[,1])

## [1] 24

set.seed(1234)
random1<-randomForest(class~.,data = data,ntree=24)
random1

## 
## Call:
##  randomForest(formula = class ~ ., data = data, ntree = 24) 
##                Type of random forest: classification
##                      Number of trees: 24
## No. of variables tried at each split: 3
## 
##         OOB estimate of  error rate: 2.49%
## Confusion matrix:
##           benign malignant class.error
## benign       433        11  0.02477477
## malignant      6       233  0.02510460

##OOB estimate of  error rate: 2.49%
##benign class.error 0.02477477
##malignant class.error 0.02510460

##变量重要性评分

varImpPlot(random1)

##自变量V2/3/6/7/5

#通过以上方法就可以得到筛选出来的变量进行模型构建了

rm(list = ls())
load("./data.Rdata")
library(MASS)

modle<-glm(status ~ size + thickness + shape + nuclei + chromatin + nucleoli + mitoses,
           data = data,family = binomial())

summary(modle)

## 
## Call:
## glm(formula = status ~ size + thickness + shape + nuclei + chromatin + 
##     nucleoli + mitoses, family = binomial(), data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.3655  -0.1072  -0.0858   0.0667   2.5741  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    3.8738     1.2259   3.160  0.00158 ** 
## size           0.4466     0.1633   2.735  0.00623 ** 
## thicknessLow  -1.6101     0.5286  -3.046  0.00232 ** 
## shapeLow      -1.3560     0.6372  -2.128  0.03333 *  
## nucleiLow     -2.3269     0.5031  -4.625 3.75e-06 ***
## chromatinLow  -1.9142     0.5365  -3.568  0.00036 ***
## nucleoliLow   -1.2903     0.5165  -2.498  0.01249 *  
## mitosesLow    -1.4256     0.6585  -2.165  0.03038 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 884.35  on 682  degrees of freedom
## Residual deviance: 128.42  on 675  degrees of freedom
## AIC: 144.42
## 
## Number of Fisher Scoring iterations: 7

modle$coefficients

##  (Intercept)         size thicknessLow     shapeLow    nucleiLow chromatinLow 
##    3.8737617    0.4465546   -1.6101132   -1.3560239   -2.3268944   -1.9142429 
##  nucleoliLow   mitosesLow 
##   -1.2902597   -1.4256451

exp(cbind("OR"=coef(modle),confint(modle)))

## Waiting for profiling to be done...

##                       OR      2.5 %      97.5 %
## (Intercept)  48.12307098 4.70500122 580.5895696
## size          1.56291798 1.16995577   2.2243591
## thicknessLow  0.19986500 0.06823304   0.5555434
## shapeLow      0.25768332 0.07410375   0.9089832
## nucleiLow     0.09759838 0.03555500   0.2598215
## chromatinLow  0.14745343 0.05040969   0.4199752
## nucleoliLow   0.27519932 0.09991303   0.7694625
## mitosesLow    0.24035335 0.06326596   0.8505815

##二元logistic回归通过列线图展示结果

library(rms)
dd<-datadist(data)
options(datadist="dd")

fit<-lrm(status ~ size + thickness + shape + nuclei + chromatin + nucleoli + mitoses,
           data = data,x = T,y = T)
nom<-nomogram(fit, fun=plogis,
               fun.at=c(0.0001,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.9999),
               lp=T,
               funlabel="Risk of hypertension")

plot(nom)

##模型评价 ###方法1 BRIER score评分 BRIER score越小校准效果越好

rm(list = ls())
load("./data.Rdata")


library(rms)
dd<-datadist(data)
options(datadist="dd")
str(data)

## 'data.frame':    683 obs. of  10 variables:
##  $ status              : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
##  $ size                : num  1 4 1 8 1 10 1 1 1 2 ...
##  $ epithelial_cell_size: num  2 7 2 3 2 7 2 2 2 2 ...
##  $ thickness           : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
##  $ shape               : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
##  $ adhesion            : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
##  $ nuclei              : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
##  $ chromatin           : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
##  $ nucleoli            : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
##  $ mitoses             : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
##  - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
##   ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...

data$status<-as.numeric(as.character(data$status))
str(data)

## 'data.frame':    683 obs. of  10 variables:
##  $ status              : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ size                : num  1 4 1 8 1 10 1 1 1 2 ...
##  $ epithelial_cell_size: num  2 7 2 3 2 7 2 2 2 2 ...
##  $ thickness           : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
##  $ shape               : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
##  $ adhesion            : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
##  $ nuclei              : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
##  $ chromatin           : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
##  $ nucleoli            : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
##  $ mitoses             : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
##  - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
##   ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...

#使用向后法结果展示
f <- lrm(status ~ size + thickness + shape + nuclei + chromatin + nucleoli + mitoses,
           data = data,x = T,y = T)

pred.logit <- predict(f)
phat <- 1/(1+exp(-pred.logit))

val.prob(phat, data$status,  cex=0.8)

##           Dxy       C (ROC)            R2             D      D:Chi-sq 
##  9.861095e-01  9.930548e-01  9.219452e-01  1.105311e+00  7.559274e+02 
##           D:p             U      U:Chi-sq           U:p             Q 
##            NA -2.928258e-03 -5.684342e-13  1.000000e+00  1.108239e+00 
##         Brier     Intercept         Slope          Emax           E90 
##  2.373327e-02 -2.880692e-09  1.000000e+00  1.866464e-01  2.045948e-02 
##          Eavg           S:z           S:p 
##  1.867196e-02 -1.822993e-01  8.553479e-01

###BRIER score=1.024

#####方法2 AIC AIC越小校准效果越好

rm(list = ls())
load("./data.Rdata")
library(rms)
dd<-datadist(data)
options(datadist="dd")
str(data)

## 'data.frame':    683 obs. of  10 variables:
##  $ status              : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
##  $ size                : num  1 4 1 8 1 10 1 1 1 2 ...
##  $ epithelial_cell_size: num  2 7 2 3 2 7 2 2 2 2 ...
##  $ thickness           : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
##  $ shape               : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
##  $ adhesion            : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
##  $ nuclei              : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
##  $ chromatin           : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
##  $ nucleoli            : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
##  $ mitoses             : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
##  - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
##   ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...

modle<-glm(status ~ size + thickness + shape + nuclei + chromatin + nucleoli + mitoses,
           data = data,family = binomial())

AIC(modle)

## [1] 144.4228

#####方法3 BIC BIC越小校准效果越好,同AIC

rm(list = ls())
load("./data.Rdata")
library(rms)
dd<-datadist(data)
options(datadist="dd")
str(data)

## 'data.frame':    683 obs. of  10 variables:
##  $ status              : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
##  $ size                : num  1 4 1 8 1 10 1 1 1 2 ...
##  $ epithelial_cell_size: num  2 7 2 3 2 7 2 2 2 2 ...
##  $ thickness           : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
##  $ shape               : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
##  $ adhesion            : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
##  $ nuclei              : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
##  $ chromatin           : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
##  $ nucleoli            : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
##  $ mitoses             : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
##  - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
##   ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...

modle<-glm(status ~ size + thickness + shape + nuclei + chromatin + nucleoli + mitoses,
           data = data,family = binomial())

BIC(modle)

## [1] 180.6348

方法4 ROC曲线，AUC曲线面积越大越好， ##c-index（一致性指数）0.5-1之间，c-index=0.5–0.7为低度准确，0.71–0.9中等准确，

##>0.91非常准确

rm(list = ls())
load("./data.Rdata")
library(rms)
dd<-datadist(data)
options(datadist="dd")
str(data)

## 'data.frame':    683 obs. of  10 variables:
##  $ status              : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
##  $ size                : num  1 4 1 8 1 10 1 1 1 2 ...
##  $ epithelial_cell_size: num  2 7 2 3 2 7 2 2 2 2 ...
##  $ thickness           : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
##  $ shape               : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
##  $ adhesion            : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
##  $ nuclei              : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
##  $ chromatin           : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
##  $ nucleoli            : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
##  $ mitoses             : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
##  - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
##   ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...

##使用向后法结果展示
modle<-glm(status ~ size + thickness + shape + nuclei + chromatin + nucleoli + mitoses,
           data = data,family = binomial())

##AUC计算

library(pROC)

## Type 'citation("pROC")' for a citation.

## 
## 载入程辑包：'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

data$pred<-predict(modle,type = "response")
ROC<-roc(data$status,data$pred)

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

auc(ROC)

## Area under the curve: 0.993

ci.auc(ROC)

## 95% CI: 0.9882-0.9979 (DeLong)

##绘制ROC

plot(ROC)#最简单版

plot(ROC, print.auc=TRUE, auc.polygon=TRUE, 
     grid=c(0.1, 0.2), 
     #grid.col=c("green", "red"), 
     max.auc.polygon=TRUE,
     auc.polygon.col="skyblue",
     print.thres=TRUE)

plot(ROC, print.auc=TRUE, auc.polygon=TRUE, 
     partial.auc=c(1, 0.8),partial.auc.focus="sp", 
     grid=c(0.1, 0.2),grid.col=c("green", "red"),
     max.auc.polygon=TRUE, 
     auc.polygon.col="skyblue",
     print.thres=TRUE, reuse.auc=FALSE)

##同一AUC曲线下同时展现多条曲线

modle2<-glm(status ~ size + thickness,
           data = data,family = binomial())

data$pred2<-predict(modle2,type = "response")
ROC2<-roc(data$status,data$pred2)

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

auc(ROC2)

## Area under the curve: 0.9828

ci.auc(ROC2)

## 95% CI: 0.974-0.9917 (DeLong)

plot(ROC, col="red")
plot(ROC2, col="blue", add=TRUE)
legend("bottomright",legend = c(paste("AUC of modle1 =",round(auc(ROC),3)),
                          paste("AUC of modle2 =",round(auc(ROC2),3))),
       col=c("red","blue"),
       lty= 1 ,lwd= 2)

##cindex 计算，二元logistics回归分析中，cindex与AUC值一样

fit<-lrm(status ~ size + thickness,
           data = data,x = T,y = T)

####计算方法1
data$predict<-predict(fit)
library(pROC)
modleroc<-roc(data$status,data$predict)

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

auc(modleroc)

## Area under the curve: 0.9828

ci.auc(modleroc)

## 95% CI: 0.974-0.9917 (DeLong)

####计算方法2
library(Hmisc)
str(data)

## 'data.frame':    683 obs. of  13 variables:
##  $ status              : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
##  $ size                : num  1 4 1 8 1 10 1 1 1 2 ...
##  $ epithelial_cell_size: num  2 7 2 3 2 7 2 2 2 2 ...
##  $ thickness           : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
##  $ shape               : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
##  $ adhesion            : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
##  $ nuclei              : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
##  $ chromatin           : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
##  $ nucleoli            : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
##  $ mitoses             : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
##  $ pred                : num  0.01811 0.73688 0.00367 0.9838 0.00367 ...
##  $ pred2               : num  0.0755 0.8536 0.0112 0.9994 0.0112 ...
##  $ predict             : num  -2.51 1.76 -4.48 7.45 -4.48 ...
##  - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
##   ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...

data$status<-as.numeric(as.character(data$status))
str(data)

## 'data.frame':    683 obs. of  13 variables:
##  $ status              : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ size                : num  1 4 1 8 1 10 1 1 1 2 ...
##  $ epithelial_cell_size: num  2 7 2 3 2 7 2 2 2 2 ...
##  $ thickness           : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
##  $ shape               : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
##  $ adhesion            : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
##  $ nuclei              : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
##  $ chromatin           : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
##  $ nucleoli            : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
##  $ mitoses             : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
##  $ pred                : num  0.01811 0.73688 0.00367 0.9838 0.00367 ...
##  $ pred2               : num  0.0755 0.8536 0.0112 0.9994 0.0112 ...
##  $ predict             : num  -2.51 1.76 -4.48 7.45 -4.48 ...
##  - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
##   ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...

somers2(data$predict,data$status)

##           C         Dxy           n     Missing 
##   0.9828348   0.9656696 683.0000000   0.0000000

####计算方法3
fit

## Logistic Regression Model
##  
##  lrm(formula = status ~ size + thickness, data = data, x = T, 
##      y = T)
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs           683    LR chi2     659.13    R2       0.853    C       0.983    
##   0            444    d.f.             2    g        5.153    Dxy     0.965    
##   1            239    Pr(> chi2) <0.0001    gr     173.033    gamma   0.976    
##  max |deriv| 7e-09                          gp       0.434    tau-a   0.440    
##                                             Brier    0.043                     
##  
##                Coef    S.E.   Wald Z Pr(>|Z|)
##  Intercept     -3.9280 0.4160 -9.44  <0.0001 
##  size           1.4228 0.1343 10.60  <0.0001 
##  thickness=Low -1.9774 0.3804 -5.20  <0.0001 
##

###方法5 拟合优度检验

##依据之前方法得到的自变量

rm(list = ls())
load("./data.Rdata")
##T检验/LASSO回归
formula1<-as.formula(status ~ size + epithelial_cell_size+thickness + shape +adhesion+ nuclei + chromatin + 
    nucleoli + mitoses)
##单因素logistic回归、逐步回归
formula2<-as.formula(status ~ size + thickness + shape + nuclei + chromatin + 
    nucleoli + mitoses)
##最优子集
formula3<-as.formula(status~ size+thickness+nuclei+chromatin+nucleoli)

library(rms)
dd<-datadist(data)
options(datadist = "dd")

library(ResourceSelection)

## ResourceSelection 0.3-5   2019-07-22

fit1<-glm(formula1,data = data,family = binomial())
h1<-hoslem.test(fit1$y,fitted(fit1),g=10)
h1

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  fit1$y, fitted(fit1)
## X-squared = 17.03, df = 8, p-value = 0.0298

fit2<-glm(formula2,data = data,family = binomial())
h2<-hoslem.test(fit2$y,fitted(fit2),g=10)
h2

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  fit2$y, fitted(fit2)
## X-squared = 12.897, df = 8, p-value = 0.1154

##X-squared = 12.897, df = 8, p-value = 0.1154 说明模型拟合优度好

fit3<-glm(formula3,data = data,family = binomial())
h3<-hoslem.test(fit3$y,fitted(fit3),g=10)
h3

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  fit3$y, fitted(fit3)
## X-squared = 27.02, df = 8, p-value = 0.0007015

######方法6 DCA曲线（临床决策曲线）

##依据之前方法得到的自变量

rm(list = ls())
load("./data.Rdata")
##T检验/LASSO回归
formula1<-as.formula(status ~ size + epithelial_cell_size+thickness + shape +adhesion+ nuclei + chromatin + 
    nucleoli + mitoses)
##单因素logistic回归、逐步回归
formula2<-as.formula(status ~ size + thickness + shape + nuclei + chromatin + 
    nucleoli + mitoses)
##最优子集
formula3<-as.formula(status~ size+thickness+nuclei+chromatin+nucleoli)

library(rms)
dd<-datadist(data)
options(datadist = "dd")
str(data)

## 'data.frame':    683 obs. of  10 variables:
##  $ status              : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
##  $ size                : num  1 4 1 8 1 10 1 1 1 2 ...
##  $ epithelial_cell_size: num  2 7 2 3 2 7 2 2 2 2 ...
##  $ thickness           : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
##  $ shape               : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
##  $ adhesion            : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
##  $ nuclei              : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
##  $ chromatin           : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
##  $ nucleoli            : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
##  $ mitoses             : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
##  - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
##   ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...

data$status<-as.numeric(as.character(data$status))
str(data)

## 'data.frame':    683 obs. of  10 variables:
##  $ status              : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ size                : num  1 4 1 8 1 10 1 1 1 2 ...
##  $ epithelial_cell_size: num  2 7 2 3 2 7 2 2 2 2 ...
##  $ thickness           : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
##  $ shape               : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
##  $ adhesion            : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
##  $ nuclei              : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
##  $ chromatin           : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
##  $ nucleoli            : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
##  $ mitoses             : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
##  - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
##   ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...

library(rmda)

curve1<-decision_curve(formula1,data = data,family = binomial(link = "logit"),
                       thresholds = seq(0, 1, by = 0.01), 
                       confidence.intervals = 0.95,study.design = "case-control",
                      population.prevalence = 0.3)

## Calculating net benefit curves for case-control data. All calculations are done conditional on the outcome prevalence provided.

## Note:  The data provided is used to both fit a prediction model and to estimate the respective decision curve. This may cause bias in decision curve estimates leading to over-confidence in model performance.

curve2<-decision_curve(formula2,data = data,family = binomial(link = "logit"),
                       thresholds = seq(0, 1, by = 0.01), 
                       confidence.intervals = 0.95,study.design = "case-control",
                      population.prevalence = 0.3)

## Calculating net benefit curves for case-control data. All calculations are done conditional on the outcome prevalence provided.
## Note:  The data provided is used to both fit a prediction model and to estimate the respective decision curve. This may cause bias in decision curve estimates leading to over-confidence in model performance.

curve3<-decision_curve(formula3,data = data,family = binomial(link = "logit"),
                       thresholds = seq(0, 1, by = 0.01), 
                       confidence.intervals = 0.95,study.design = "case-control",
                      population.prevalence = 0.3)

## Calculating net benefit curves for case-control data. All calculations are done conditional on the outcome prevalence provided.
## Note:  The data provided is used to both fit a prediction model and to estimate the respective decision curve. This may cause bias in decision curve estimates leading to over-confidence in model performance.

summary(curve1,measure = "NB")

## 
## Net Benefit (95% Confidence Intervals):
## ----------------------------------------------------------------------------------------------------------
##    risk      cost:benefit       percent               All                  status ~ size +           None 
##  threshold      ratio          high risk                                epithelial_cell_size +            
##                                                                     thickness + shape + adhesion +        
##                                                                     nuclei + chromatin + nucleoli         
##                                                                               + mitoses                   
## ----------- -------------- ------------------ -------------------- -------------------------------- ------
##      0           0:1              100                 0.3                        0.3                  0   
##                                (100, 100)          (0.3, 0.3)                 (0.3, 0.3)                  
## 
##    0.01          1:99            51.599              0.293                      0.298                 0   
##                             (36.149, 55.383)     (0.293, 0.293)             (0.297, 0.299)                
## 
##    0.02          1:49            40.405              0.286                      0.298                 0   
##                             (34.572, 43.401)     (0.286, 0.286)             (0.297, 0.299)                
## 
##    0.03          3:97            35.518              0.278                      0.298                 0   
##                             (33.626, 39.775)     (0.278, 0.278)             (0.296, 0.299)                
## 
##    0.04          1:24            34.887              0.271                      0.298                 0   
##                             (32.995, 38.041)     (0.271, 0.271)             (0.295, 0.299)                
## 
##    0.05          1:19            34.447              0.263                      0.296                 0   
##                             (32.68, 36.622)      (0.263, 0.263)             (0.294, 0.299)                
## 
##    0.06          3:47            33.974              0.255                      0.296                 0   
##                             (32.365, 36.023)     (0.255, 0.255)             (0.294, 0.298)                
## 
##    0.07          7:93            33.816              0.247                      0.296                 0   
##                             (32.082, 35.392)     (0.247, 0.247)             (0.293, 0.298)                
## 
##    0.08          2:23            33.816              0.239                      0.295                 0   
##                             (31.892, 35.077)     (0.239, 0.239)             (0.293, 0.298)                
## 
##    0.09          9:91            33.343              0.231                      0.295                 0   
##                             (31.766, 34.762)     (0.231, 0.231)             (0.292, 0.298)                
## 
##     0.1          1:9             33.028              0.222                      0.295                 0   
##                             (31.609, 34.572)     (0.222, 0.222)             (0.292, 0.298)                
## 
##    0.11         11:89            32.87               0.213                      0.295                 0   
##                             (31.451, 34.353)     (0.213, 0.213)             (0.291, 0.298)                
## 
##    0.12          3:22            32.87               0.205                      0.295                 0   
##                             (31.451, 34.131)     (0.205, 0.205)             (0.29, 0.298)                 
## 
##    0.13         13:87            32.87               0.195                      0.294                 0   
##                             (31.419, 33.912)     (0.195, 0.195)             (0.29, 0.297)                 
## 
##    0.14          7:43            32.87               0.186                      0.294                 0   
##                             (31.326, 33.755)     (0.186, 0.186)             (0.289, 0.297)                
## 
##    0.15          3:17            32.555              0.176                      0.294                 0   
##                             (31.293, 33.69)      (0.176, 0.176)             (0.289, 0.297)                
## 
##    0.16          4:21            32.429              0.167                      0.292                 0   
##                              (31.2, 33.565)      (0.167, 0.167)             (0.288, 0.297)                
## 
##    0.17         17:83            32.271              0.157                      0.292                 0   
##                             (31.168, 33.375)     (0.157, 0.157)             (0.287, 0.296)                
## 
##    0.18          9:41            32.271              0.146                      0.292                 0   
##                             (31.136, 33.217)     (0.146, 0.146)             (0.287, 0.296)                
## 
##    0.19         19:81            32.146              0.136                       0.29                 0   
##                             (31.042, 33.217)     (0.136, 0.136)             (0.286, 0.296)                
## 
##     0.2          1:4             32.146              0.125                       0.29                 0   
##                             (30.978, 33.124)     (0.125, 0.125)             (0.285, 0.296)                
## 
##    0.21         21:79            32.146              0.114                       0.29                 0   
##                             (30.885, 33.092)     (0.114, 0.114)             (0.284, 0.296)                
## 
##    0.22         11:39            31.988              0.103                       0.29                 0   
##                             (30.856, 33.06)      (0.103, 0.103)             (0.284, 0.296)                
## 
##    0.23         23:77            31.988              0.091                      0.289                 0   
##                             (30.853, 32.999)     (0.091, 0.091)             (0.283, 0.295)                
## 
##    0.24          6:19            31.988              0.079                      0.289                 0   
##                             (30.82, 32.838)      (0.079, 0.079)             (0.282, 0.295)                
## 
##    0.25          1:3             31.831              0.067                      0.289                 0   
##                             (30.759, 32.777)     (0.067, 0.067)             (0.281, 0.295)                
## 
##    0.26         13:37            31.831              0.054                      0.288                 0   
##                             (30.759, 32.715)     (0.054, 0.054)             (0.28, 0.295)                 
## 
##    0.27         27:73            31.831              0.041                      0.288                 0   
##                             (30.634, 32.619)     (0.041, 0.041)             (0.28, 0.295)                 
## 
##    0.28          7:18            31.39               0.028                      0.288                 0   
##                             (30.602, 32.493)     (0.028, 0.028)             (0.279, 0.294)                
## 
##    0.29         29:71            31.264              0.014                      0.286                 0   
##                             (30.602, 32.461)     (0.014, 0.014)             (0.278, 0.294)                
## 
##     0.3          3:7             31.264                0                        0.286                 0   
##                             (30.54, 32.461)          (0, 0)                 (0.278, 0.293)                
## 
##    0.31         31:69            31.139              -0.014                     0.284                 0   
##                             (30.511, 32.336)    (-0.014, -0.014)            (0.277, 0.294)                
## 
##    0.32          8:17            31.139              -0.029                     0.284                 0   
##                             (30.415, 32.274)    (-0.029, -0.029)            (0.276, 0.293)                
## 
##    0.33         33:67            31.139              -0.045                     0.283                 0   
##                             (30.412, 32.178)    (-0.045, -0.045)            (0.275, 0.293)                
## 
##    0.34         17:33            31.139              -0.061                     0.283                 0   
##                             (30.383, 32.149)    (-0.061, -0.061)            (0.274, 0.293)                
## 
##    0.35          7:13            31.139              -0.077                     0.282                 0   
##                             (30.38, 32.114)     (-0.077, -0.077)            (0.273, 0.292)                
## 
##    0.36          9:16            31.139              -0.094                     0.282                 0   
##                             (30.289, 31.959)    (-0.094, -0.094)            (0.272, 0.292)                
## 
##    0.37         37:63            31.139              -0.111                     0.281                 0   
##                             (30.286, 31.895)    (-0.111, -0.111)            (0.271, 0.292)                
## 
##    0.38         19:31            30.981              -0.129                     0.282                 0   
##                             (30.254, 31.895)    (-0.129, -0.129)            (0.27, 0.291)                 
## 
##    0.39         39:61            30.981              -0.148                     0.281                 0   
##                             (30.164, 31.799)    (-0.148, -0.148)            (0.269, 0.29)                 
## 
##     0.4          2:3             30.981              -0.167                     0.281                 0   
##                             (30.099, 31.708)    (-0.167, -0.167)            (0.269, 0.29)                 
## 
##    0.41         41:59            30.856              -0.186                     0.279                 0   
##                             (30.099, 31.676)    (-0.186, -0.186)            (0.268, 0.29)                 
## 
##    0.42         21:29            30.856              -0.207                     0.279                 0   
##                             (30.067, 31.612)    (-0.207, -0.207)            (0.267, 0.29)                 
## 
##    0.43         43:57            30.698              -0.228                     0.279                 0   
##                             (30.067, 31.521)    (-0.228, -0.228)            (0.265, 0.29)                 
## 
##    0.44         11:14            30.698              -0.25                      0.279                 0   
##                             (29.881, 31.483)     (-0.25, -0.25)             (0.263, 0.289)                
## 
##    0.45          9:11            30.698              -0.273                     0.278                 0   
##                             (29.877, 31.393)    (-0.273, -0.273)            (0.262, 0.289)                
## 
##    0.46         23:27            30.572              -0.296                     0.277                 0   
##                             (29.813, 31.361)    (-0.296, -0.296)            (0.26, 0.288)                 
## 
##    0.47         47:53            30.572              -0.321                     0.276                 0   
##                             (29.726, 31.329)    (-0.321, -0.321)            (0.259, 0.288)                
## 
##    0.48         12:13            30.572              -0.346                     0.275                 0   
##                             (29.723, 31.264)    (-0.346, -0.346)            (0.258, 0.288)                
## 
##    0.49         49:51            30.572              -0.373                     0.275                 0   
##                             (29.629, 31.203)    (-0.373, -0.373)            (0.255, 0.287)                
## 
##     0.5          1:1             30.321               -0.4                      0.272                 0   
##                             (29.597, 31.171)      (-0.4, -0.4)              (0.254, 0.287)                
## 
##    0.51         51:49            30.196              -0.429                      0.27                 0   
##                             (29.504, 31.139)    (-0.429, -0.429)            (0.252, 0.286)                
## 
##    0.52         13:12            30.07               -0.458                     0.268                 0   
##                             (29.443, 31.045)    (-0.458, -0.458)            (0.251, 0.286)                
## 
##    0.53         53:47            29.945              -0.489                     0.266                 0   
##                             (29.408, 30.981)    (-0.489, -0.489)            (0.249, 0.286)                
## 
##    0.54         27:23            29.819              -0.522                     0.264                 0   
##                             (29.288, 30.888)    (-0.522, -0.522)            (0.247, 0.286)                
## 
##    0.55          11:9            29.819              -0.556                     0.263                 0   
##                             (29.253, 30.826)    (-0.556, -0.556)            (0.247, 0.285)                
## 
##    0.56         14:11            29.819              -0.591                     0.262                 0   
##                             (29.192, 30.73)     (-0.591, -0.591)            (0.245, 0.284)                
## 
##    0.57         57:43            29.694              -0.628                      0.26                 0   
##                             (29.098, 30.64)     (-0.628, -0.628)            (0.243, 0.284)                
## 
##    0.58         29:21            29.443              -0.667                     0.257                 0   
##                             (29.034, 30.605)    (-0.667, -0.667)            (0.24, 0.283)                 
## 
##    0.59         59:41            29.317              -0.707                     0.255                 0   
##                             (29.005, 30.511)    (-0.707, -0.707)            (0.24, 0.282)                 
## 
##     0.6          3:2             29.317              -0.75                      0.254                 0   
##                             (28.908, 30.482)     (-0.75, -0.75)             (0.238, 0.282)                
## 
##    0.61         61:39            29.317              -0.795                     0.253                 0   
##                             (28.786, 30.418)    (-0.795, -0.795)            (0.235, 0.281)                
## 
##    0.62         31:19            29.192              -0.842                      0.25                 0   
##                             (28.754, 30.353)    (-0.842, -0.842)            (0.234, 0.28)                 
## 
##    0.63         63:37            29.066              -0.892                     0.248                 0   
##                             (28.657, 30.228)    (-0.892, -0.892)            (0.231, 0.279)                
## 
##    0.64          16:9            28.941              -0.944                     0.246                 0   
##                             (28.532, 30.135)    (-0.944, -0.944)            (0.229, 0.279)                
## 
##    0.65          13:7            28.941                -1                       0.244                 0   
##                             (28.374, 30.064)        (-1, -1)                (0.228, 0.278)                
## 
##    0.66         33:17            28.783              -1.059                     0.246                 0   
##                             (28.22, 29.948)     (-1.059, -1.059)            (0.225, 0.278)                
## 
##    0.67         67:33            28.657              -1.121                     0.244                 0   
##                             (28.152, 29.881)    (-1.121, -1.121)            (0.221, 0.277)                
## 
##    0.68          17:8            28.657              -1.188                     0.242                 0   
##                             (27.969, 29.787)    (-1.188, -1.188)            (0.219, 0.277)                
## 
##    0.69         69:31            28.657              -1.258                     0.241                 0   
##                             (27.875, 29.723)    (-1.258, -1.258)            (0.215, 0.275)                
## 
##     0.7          7:3             28.657              -1.333                     0.239                 0   
##                             (27.717, 29.629)    (-1.333, -1.333)            (0.213, 0.274)                
## 
##    0.71         71:29            28.532              -1.414                     0.236                 0   
##                             (27.528, 29.594)    (-1.414, -1.414)            (0.21, 0.273)                 
## 
##    0.72          18:7            28.532               -1.5                      0.235                 0   
##                             (27.463, 29.504)      (-1.5, -1.5)              (0.208, 0.274)                
## 
##    0.73         73:27            28.406              -1.593                     0.232                 0   
##                             (27.338, 29.472)    (-1.593, -1.593)            (0.203, 0.273)                
## 
##    0.74         37:13            28.406              -1.692                     0.229                 0   
##                             (27.122, 29.443)    (-1.692, -1.692)             (0.2, 0.271)                 
## 
##    0.75          3:1             28.281               -1.8                      0.226                 0   
##                             (26.993, 29.346)      (-1.8, -1.8)              (0.197, 0.27)                 
## 
##    0.76          19:6            28.281              -1.917                     0.224                 0   
##                             (26.932, 29.282)    (-1.917, -1.917)            (0.192, 0.27)                 
## 
##    0.77         77:23            28.281              -2.043                     0.221                 0   
##                             (26.713, 29.218)    (-2.043, -2.043)            (0.191, 0.269)                
## 
##    0.78         39:11            28.281              -2.182                     0.218                 0   
##                             (26.617, 29.031)    (-2.182, -2.182)            (0.188, 0.267)                
## 
##    0.79         79:21            27.904              -2.333                     0.211                 0   
##                             (26.427, 28.938)    (-2.333, -2.333)            (0.184, 0.266)                
## 
##     0.8          4:1             27.621               -2.5                      0.213                 0   
##                             (26.147, 28.812)      (-2.5, -2.5)              (0.179, 0.265)                
## 
##    0.81         81:19            27.37               -2.684                     0.207                 0   
##                             (25.957, 28.748)    (-2.684, -2.684)            (0.174, 0.264)                
## 
##    0.82          41:9            27.119              -2.889                     0.201                 0   
##                             (25.738, 28.654)    (-2.889, -2.889)            (0.166, 0.262)                
## 
##    0.83         83:17            27.119              -3.118                     0.197                 0   
##                             (25.642, 28.432)    (-3.118, -3.118)            (0.163, 0.262)                
## 
##    0.84          21:4            26.993              -3.375                     0.191                 0   
##                             (25.265, 28.31)     (-3.375, -3.375)            (0.157, 0.261)                
## 
##    0.85          17:3            26.617              -3.667                     0.182                 0   
##                             (25.075, 28.307)    (-3.667, -3.667)            (0.149, 0.258)                
## 
##    0.86          43:7            26.459                -4                       0.186                 0   
##                             (24.763, 28.12)         (-4, -4)                (0.143, 0.254)                
## 
##    0.87         87:13            26.083              -4.385                     0.176                 0   
##                             (24.451, 28.056)    (-4.385, -4.385)            (0.133, 0.251)                
## 
##    0.88          22:3            26.083              -4.833                     0.169                 0   
##                             (24.042, 27.963)    (-4.833, -4.833)            (0.124, 0.248)                
## 
##    0.89         89:11            25.832              -5.364                     0.158                 0   
##                             (23.698, 27.711)    (-5.364, -5.364)            (0.114, 0.247)                
## 
##     0.9          9:1             25.423                -6                        0.16                 0   
##                             (23.196, 27.46)         (-6, -6)                (0.099, 0.245)                
## 
##    0.91          91:9            24.921              -6.778                     0.144                 0   
##                             (22.912, 27.274)    (-6.778, -6.778)            (0.08, 0.239)                 
## 
##    0.92          23:2            24.795              -7.75                       0.13                 0   
##                             (22.285, 27.052)     (-7.75, -7.75)             (0.066, 0.235)                
## 
##    0.93          93:7            24.419                -9                       0.109                 0   
##                             (21.905, 26.958)        (-9, -9)                (0.05, 0.229)                 
## 
##    0.94          47:3            24.01              -10.667                     0.109                 0   
##                             (20.84, 26.675)    (-10.667, -10.667)           (0.033, 0.221)                
## 
##    0.95          19:1            23.193               -13                       0.137                 0   
##                             (19.836, 26.173)       (-13, -13)               (0.002, 0.219)                
## 
##    0.96          24:1            22.314              -16.5                      0.105                 0   
##                             (18.738, 25.671)     (-16.5, -16.5)            (-0.031, 0.214)                
## 
##    0.97          97:3            21.31              -22.333                     0.055                 0   
##                             (16.92, 25.169)    (-22.333, -22.333)           (-0.08, 0.204)                
## 
##    0.98          49:1            19.395               -34                       0.036                 0   
##                             (14.345, 24.509)       (-34, -34)              (-0.202, 0.211)                
## 
##    0.99          99:1            15.253               -69                       -0.163                0   
##                             (10.418, 23.128)       (-69, -69)               (-0.34, 0.213)                
## 
##      1          Inf:1              0                   NA                         NA                  NA  
##                                  (0, 0)             (NA, NA)                   (NA, NA)                   
## ----------------------------------------------------------------------------------------------------------

summary(curve1,measure = "sNB")

## 
## Standardized Net Benefit (95% Confidence Intervals):
## ------------------------------------------------------------------------------------------------------------
##    risk      cost:benefit       percent                All                   status ~ size +           None 
##  threshold      ratio          high risk                                  epithelial_cell_size +            
##                                                                       thickness + shape + adhesion +        
##                                                                       nuclei + chromatin + nucleoli         
##                                                                                 + mitoses                   
## ----------- -------------- ------------------ ---------------------- -------------------------------- ------
##      0           0:1              100                   1                           1                   0   
##                                (100, 100)             (1, 1)                      (1, 1)                    
## 
##    0.01          1:99            51.599               0.976                       0.993                 0   
##                             (36.149, 55.383)      (0.976, 0.976)              (0.991, 0.998)                
## 
##    0.02          1:49            40.405               0.952                       0.993                 0   
##                             (34.572, 43.401)      (0.952, 0.952)              (0.989, 0.997)                
## 
##    0.03          3:97            35.518               0.928                       0.994                 0   
##                             (33.626, 39.775)      (0.928, 0.928)              (0.985, 0.996)                
## 
##    0.04          1:24            34.887               0.903                       0.993                 0   
##                             (32.995, 38.041)      (0.903, 0.903)              (0.983, 0.996)                
## 
##    0.05          1:19            34.447               0.877                       0.988                 0   
##                             (32.68, 36.622)       (0.877, 0.877)              (0.981, 0.995)                
## 
##    0.06          3:47            33.974               0.851                       0.987                 0   
##                             (32.365, 36.023)      (0.851, 0.851)              (0.979, 0.995)                
## 
##    0.07          7:93            33.816               0.824                       0.986                 0   
##                             (32.082, 35.392)      (0.824, 0.824)              (0.978, 0.994)                
## 
##    0.08          2:23            33.816               0.797                       0.984                 0   
##                             (31.892, 35.077)      (0.797, 0.797)              (0.976, 0.994)                
## 
##    0.09          9:91            33.343               0.769                       0.984                 0   
##                             (31.766, 34.762)      (0.769, 0.769)              (0.973, 0.993)                
## 
##     0.1          1:9             33.028               0.741                       0.984                 0   
##                             (31.609, 34.572)      (0.741, 0.741)              (0.972, 0.993)                
## 
##    0.11         11:89            32.87                0.712                       0.983                 0   
##                             (31.451, 34.353)      (0.712, 0.712)              (0.97, 0.992)                 
## 
##    0.12          3:22            32.87                0.682                       0.982                 0   
##                             (31.451, 34.131)      (0.682, 0.682)              (0.967, 0.992)                
## 
##    0.13         13:87            32.87                0.651                       0.981                 0   
##                             (31.419, 33.912)      (0.651, 0.651)              (0.965, 0.991)                
## 
##    0.14          7:43            32.87                 0.62                        0.98                 0   
##                             (31.326, 33.755)       (0.62, 0.62)               (0.964, 0.991)                
## 
##    0.15          3:17            32.555               0.588                        0.98                 0   
##                             (31.293, 33.69)       (0.588, 0.588)              (0.962, 0.99)                 
## 
##    0.16          4:21            32.429               0.556                       0.975                 0   
##                              (31.2, 33.565)       (0.556, 0.556)              (0.96, 0.989)                 
## 
##    0.17         17:83            32.271               0.522                       0.974                 0   
##                             (31.168, 33.375)      (0.522, 0.522)              (0.958, 0.988)                
## 
##    0.18          9:41            32.271               0.488                       0.973                 0   
##                             (31.136, 33.217)      (0.488, 0.488)              (0.956, 0.987)                
## 
##    0.19         19:81            32.146               0.453                       0.968                 0   
##                             (31.042, 33.217)      (0.453, 0.453)              (0.954, 0.987)                
## 
##     0.2          1:4             32.146               0.417                       0.966                 0   
##                             (30.978, 33.124)      (0.417, 0.417)              (0.95, 0.987)                 
## 
##    0.21         21:79            32.146                0.38                       0.965                 0   
##                             (30.885, 33.092)       (0.38, 0.38)               (0.947, 0.986)                
## 
##    0.22         11:39            31.988               0.342                       0.965                 0   
##                             (30.856, 33.06)       (0.342, 0.342)              (0.945, 0.985)                
## 
##    0.23         23:77            31.988               0.303                       0.964                 0   
##                             (30.853, 32.999)      (0.303, 0.303)              (0.942, 0.983)                
## 
##    0.24          6:19            31.988               0.263                       0.963                 0   
##                             (30.82, 32.838)       (0.263, 0.263)              (0.941, 0.984)                
## 
##    0.25          1:3             31.831               0.222                       0.963                 0   
##                             (30.759, 32.777)      (0.222, 0.222)              (0.938, 0.984)                
## 
##    0.26         13:37            31.831                0.18                       0.962                 0   
##                             (30.759, 32.715)       (0.18, 0.18)               (0.934, 0.983)                
## 
##    0.27         27:73            31.831               0.137                        0.96                 0   
##                             (30.634, 32.619)      (0.137, 0.137)              (0.934, 0.982)                
## 
##    0.28          7:18            31.39                0.093                       0.959                 0   
##                             (30.602, 32.493)      (0.093, 0.093)              (0.93, 0.981)                 
## 
##    0.29         29:71            31.264               0.047                       0.953                 0   
##                             (30.602, 32.461)      (0.047, 0.047)              (0.928, 0.979)                
## 
##     0.3          3:7             31.264                 0                         0.952                 0   
##                             (30.54, 32.461)           (0, 0)                  (0.926, 0.978)                
## 
##    0.31         31:69            31.139               -0.048                      0.947                 0   
##                             (30.511, 32.336)     (-0.048, -0.048)             (0.923, 0.979)                
## 
##    0.32          8:17            31.139               -0.098                      0.945                 0   
##                             (30.415, 32.274)     (-0.098, -0.098)             (0.92, 0.977)                 
## 
##    0.33         33:67            31.139               -0.149                      0.944                 0   
##                             (30.412, 32.178)     (-0.149, -0.149)             (0.917, 0.976)                
## 
##    0.34         17:33            31.139               -0.202                      0.942                 0   
##                             (30.383, 32.149)     (-0.202, -0.202)             (0.912, 0.975)                
## 
##    0.35          7:13            31.139               -0.256                      0.941                 0   
##                             (30.38, 32.114)      (-0.256, -0.256)             (0.91, 0.973)                 
## 
##    0.36          9:16            31.139               -0.312                      0.939                 0   
##                             (30.289, 31.959)     (-0.312, -0.312)             (0.907, 0.973)                
## 
##    0.37         37:63            31.139               -0.37                       0.938                 0   
##                             (30.286, 31.895)      (-0.37, -0.37)              (0.904, 0.972)                
## 
##    0.38         19:31            30.981               -0.43                       0.939                 0   
##                             (30.254, 31.895)      (-0.43, -0.43)               (0.9, 0.969)                 
## 
##    0.39         39:61            30.981               -0.492                      0.938                 0   
##                             (30.164, 31.799)     (-0.492, -0.492)             (0.898, 0.968)                
## 
##     0.4          2:3             30.981               -0.556                      0.936                 0   
##                             (30.099, 31.708)     (-0.556, -0.556)             (0.896, 0.967)                
## 
##    0.41         41:59            30.856               -0.621                      0.931                 0   
##                             (30.099, 31.676)     (-0.621, -0.621)             (0.892, 0.966)                
## 
##    0.42         21:29            30.856               -0.69                       0.929                 0   
##                             (30.067, 31.612)      (-0.69, -0.69)              (0.889, 0.965)                
## 
##    0.43         43:57            30.698               -0.76                       0.931                 0   
##                             (30.067, 31.521)      (-0.76, -0.76)              (0.882, 0.967)                
## 
##    0.44         11:14            30.698               -0.833                      0.929                 0   
##                             (29.881, 31.483)     (-0.833, -0.833)             (0.875, 0.963)                
## 
##    0.45          9:11            30.698               -0.909                      0.928                 0   
##                             (29.877, 31.393)     (-0.909, -0.909)             (0.872, 0.962)                
## 
##    0.46         23:27            30.572               -0.988                      0.922                 0   
##                             (29.813, 31.361)     (-0.988, -0.988)             (0.866, 0.961)                
## 
##    0.47         47:53            30.572               -1.069                       0.92                 0   
##                             (29.726, 31.329)     (-1.069, -1.069)             (0.862, 0.96)                 
## 
##    0.48         12:13            30.572               -1.154                      0.918                 0   
##                             (29.723, 31.264)     (-1.154, -1.154)             (0.859, 0.959)                
## 
##    0.49         49:51            30.572               -1.242                      0.916                 0   
##                             (29.629, 31.203)     (-1.242, -1.242)             (0.851, 0.957)                
## 
##     0.5          1:1             30.321               -1.333                      0.906                 0   
##                             (29.597, 31.171)     (-1.333, -1.333)             (0.847, 0.956)                
## 
##    0.51         51:49            30.196               -1.429                      0.899                 0   
##                             (29.504, 31.139)     (-1.429, -1.429)             (0.841, 0.955)                
## 
##    0.52         13:12            30.07                -1.528                      0.893                 0   
##                             (29.443, 31.045)     (-1.528, -1.528)             (0.837, 0.953)                
## 
##    0.53         53:47            29.945               -1.631                      0.886                 0   
##                             (29.408, 30.981)     (-1.631, -1.631)             (0.831, 0.952)                
## 
##    0.54         27:23            29.819               -1.739                       0.88                 0   
##                             (29.288, 30.888)     (-1.739, -1.739)             (0.824, 0.952)                
## 
##    0.55          11:9            29.819               -1.852                      0.877                 0   
##                             (29.253, 30.826)     (-1.852, -1.852)             (0.824, 0.951)                
## 
##    0.56         14:11            29.819               -1.97                       0.875                 0   
##                             (29.192, 30.73)       (-1.97, -1.97)              (0.815, 0.948)                
## 
##    0.57         57:43            29.694               -2.093                      0.868                 0   
##                             (29.098, 30.64)      (-2.093, -2.093)             (0.81, 0.947)                 
## 
##    0.58         29:21            29.443               -2.222                      0.856                 0   
##                             (29.034, 30.605)     (-2.222, -2.222)             (0.801, 0.942)                
## 
##    0.59         59:41            29.317               -2.358                      0.849                 0   
##                             (29.005, 30.511)     (-2.358, -2.358)              (0.8, 0.94)                  
## 
##     0.6          3:2             29.317                -2.5                       0.846                 0   
##                             (28.908, 30.482)       (-2.5, -2.5)               (0.793, 0.939)                
## 
##    0.61         61:39            29.317               -2.65                       0.842                 0   
##                             (28.786, 30.418)      (-2.65, -2.65)              (0.785, 0.938)                
## 
##    0.62         31:19            29.192               -2.807                      0.835                 0   
##                             (28.754, 30.353)     (-2.807, -2.807)             (0.779, 0.932)                
## 
##    0.63         63:37            29.066               -2.973                      0.827                 0   
##                             (28.657, 30.228)     (-2.973, -2.973)             (0.77, 0.931)                 
## 
##    0.64          16:9            28.941               -3.148                      0.819                 0   
##                             (28.532, 30.135)     (-3.148, -3.148)             (0.764, 0.93)                 
## 
##    0.65          13:7            28.941               -3.333                      0.815                 0   
##                             (28.374, 30.064)     (-3.333, -3.333)             (0.76, 0.927)                 
## 
##    0.66         33:17            28.783               -3.529                       0.82                 0   
##                             (28.22, 29.948)      (-3.529, -3.529)             (0.749, 0.926)                
## 
##    0.67         67:33            28.657               -3.737                      0.812                 0   
##                             (28.152, 29.881)     (-3.737, -3.737)             (0.738, 0.924)                
## 
##    0.68          17:8            28.657               -3.958                      0.807                 0   
##                             (27.969, 29.787)     (-3.958, -3.958)             (0.729, 0.922)                
## 
##    0.69         69:31            28.657               -4.194                      0.803                 0   
##                             (27.875, 29.723)     (-4.194, -4.194)             (0.718, 0.916)                
## 
##     0.7          7:3             28.657               -4.444                      0.798                 0   
##                             (27.717, 29.629)     (-4.444, -4.444)             (0.711, 0.913)                
## 
##    0.71         71:29            28.532               -4.713                      0.788                 0   
##                             (27.528, 29.594)     (-4.713, -4.713)             (0.699, 0.911)                
## 
##    0.72          18:7            28.532                 -5                        0.782                 0   
##                             (27.463, 29.504)         (-5, -5)                 (0.694, 0.913)                
## 
##    0.73         73:27            28.406               -5.309                      0.772                 0   
##                             (27.338, 29.472)     (-5.309, -5.309)             (0.677, 0.911)                
## 
##    0.74         37:13            28.406               -5.641                      0.765                 0   
##                             (27.122, 29.443)     (-5.641, -5.641)             (0.667, 0.903)                
## 
##    0.75          3:1             28.281                 -6                        0.754                 0   
##                             (26.993, 29.346)         (-6, -6)                 (0.657, 0.902)                
## 
##    0.76          19:6            28.281               -6.389                      0.746                 0   
##                             (26.932, 29.282)     (-6.389, -6.389)              (0.641, 0.9)                 
## 
##    0.77         77:23            28.281               -6.812                      0.737                 0   
##                             (26.713, 29.218)     (-6.812, -6.812)             (0.638, 0.897)                
## 
##    0.78         39:11            28.281               -7.273                      0.728                 0   
##                             (26.617, 29.031)     (-7.273, -7.273)             (0.626, 0.89)                 
## 
##    0.79         79:21            27.904               -7.778                      0.705                 0   
##                             (26.427, 28.938)     (-7.778, -7.778)             (0.612, 0.886)                
## 
##     0.8          4:1             27.621               -8.333                       0.71                 0   
##                             (26.147, 28.812)     (-8.333, -8.333)             (0.597, 0.883)                
## 
##    0.81         81:19            27.37                -8.947                      0.691                 0   
##                             (25.957, 28.748)     (-8.947, -8.947)             (0.579, 0.878)                
## 
##    0.82          41:9            27.119               -9.63                        0.67                 0   
##                             (25.738, 28.654)      (-9.63, -9.63)              (0.554, 0.874)                
## 
##    0.83         83:17            27.119              -10.392                      0.657                 0   
##                             (25.642, 28.432)    (-10.392, -10.392)            (0.542, 0.873)                
## 
##    0.84          21:4            26.993               -11.25                      0.637                 0   
##                             (25.265, 28.31)      (-11.25, -11.25)             (0.525, 0.87)                 
## 
##    0.85          17:3            26.617              -12.222                      0.607                 0   
##                             (25.075, 28.307)    (-12.222, -12.222)            (0.497, 0.861)                
## 
##    0.86          43:7            26.459              -13.333                      0.619                 0   
##                             (24.763, 28.12)     (-13.333, -13.333)            (0.476, 0.848)                
## 
##    0.87         87:13            26.083              -14.615                      0.586                 0   
##                             (24.451, 28.056)    (-14.615, -14.615)            (0.443, 0.838)                
## 
##    0.88          22:3            26.083              -16.111                      0.563                 0   
##                             (24.042, 27.963)    (-16.111, -16.111)            (0.415, 0.828)                
## 
##    0.89         89:11            25.832              -17.879                      0.527                 0   
##                             (23.698, 27.711)    (-17.879, -17.879)            (0.379, 0.824)                
## 
##     0.9          9:1             25.423                -20                        0.532                 0   
##                             (23.196, 27.46)         (-20, -20)                (0.332, 0.818)                
## 
##    0.91          91:9            24.921              -22.593                       0.48                 0   
##                             (22.912, 27.274)    (-22.593, -22.593)            (0.267, 0.797)                
## 
##    0.92          23:2            24.795              -25.833                      0.432                 0   
##                             (22.285, 27.052)    (-25.833, -25.833)            (0.219, 0.783)                
## 
##    0.93          93:7            24.419                -30                        0.364                 0   
##                             (21.905, 26.958)        (-30, -30)                (0.165, 0.764)                
## 
##    0.94          47:3            24.01               -35.556                      0.362                 0   
##                             (20.84, 26.675)     (-35.556, -35.556)            (0.11, 0.738)                 
## 
##    0.95          19:1            23.193              -43.333                      0.458                 0   
##                             (19.836, 26.173)    (-43.333, -43.333)            (0.008, 0.729)                
## 
##    0.96          24:1            22.314                -55                         0.35                 0   
##                             (18.738, 25.671)        (-55, -55)               (-0.104, 0.715)                
## 
##    0.97          97:3            21.31               -74.444                      0.185                 0   
##                             (16.92, 25.169)     (-74.444, -74.444)           (-0.268, 0.679)                
## 
##    0.98          49:1            19.395              -113.333                     0.121                 0   
##                             (14.345, 24.509)   (-113.333, -113.333)          (-0.672, 0.703)                
## 
##    0.99          99:1            15.253                -230                       -0.543                0   
##                             (10.418, 23.128)       (-230, -230)              (-1.134, 0.711)                
## 
##      1          Inf:1              0                    NA                          NA                  NA  
##                                  (0, 0)              (NA, NA)                    (NA, NA)                   
## ------------------------------------------------------------------------------------------------------------

##绘制单个临床决策曲线
plot_decision_curve(curve1,
                    curve.names=c('modle1'),
                    cost.benefit.axis =FALSE,col= 'red',
                    confidence.intervals=FALSE,
                    standardize = FALSE)

plot_decision_curve(curve1,
                    curve.names=c('modle1'),
                    cost.benefit.axis =T,
                    n.cost.benefits= 8,
                    col= 'red',
                    confidence.intervals=FALSE,
                    standardize = FALSE)

plot_decision_curve(curve1,curve.names=c('modle1'),
                    cost.benefit.axis =T,col= 'red',
                    confidence.intervals=T,
                    standardize = T)

##绘制多个临床决策曲线在同一图上
List<-list(curve1,curve2,curve3)
plot_decision_curve(List,
                    curve.names=c('modle1','modle2',"modle3"),
                    cost.benefit.axis =FALSE,col= c('red','blue','green'),
                    confidence.intervals=FALSE,
                    standardize = FALSE)

## Note: When multiple decision curves are plotted, decision curves for 'All' are calculated using the prevalence from the first DecisionCurve object in the list provided.

plot_decision_curve(List,
                    curve.names=c('modle1','modle2',"modle3"),
                    cost.benefit.axis =T,
                    n.cost.benefits = 8,
                    col= c('red','blue','green'),
                    confidence.intervals=FALSE,
                    standardize = FALSE)

## Note: When multiple decision curves are plotted, decision curves for 'All' are calculated using the prevalence from the first DecisionCurve object in the list provided.

##绘制临床影响曲线(Clinical Impact Curve)

plot_clinical_impact(curve1,population.size= 1000,
                     cost.benefit.axis = T,
                     n.cost.benefits= 8,
                     col =c('red','blue'),
                     confidence.intervals= T,
                     ylim=c(0,1000),
                     legend.position="topright")

######方法7 净重新分类指数（NRI，用于比较两个模型的准确度）NRI=(新模型特异度+敏感度)-（旧模型特异度+敏感度） ##综合判别改善指数（IDI） ##NRI/IDI>0,提示新模型改善

rm(list = ls())
load("./data.Rdata")
##T检验/LASSO回归
formula1<-as.formula(status ~ size + epithelial_cell_size+thickness + shape +adhesion+ nuclei + chromatin + 
    nucleoli + mitoses)
##单因素logistic回归、逐步回归
formula2<-as.formula(status ~ size + thickness + shape + nuclei + chromatin + 
    nucleoli + mitoses)

library(nricens)
modle1<-glm(formula1,family = binomial(),data = data, x = T)
modle2<-glm(formula2,family = binomial(),data = data, x = T)

#predicted risk，分别计算两个模型的预测风险
p.std= modle1$fitted.values
p.new= modle2$fitted.values

#以上Logistic回归模型拟合完毕。

##分类c(0.2, 0.4)
result<-nribin(mdl.std= modle1, mdl.new = modle2, cut = c(0.2, 0.4),
       niter = 1000, updown = 'category')

## 
## UP and DOWN calculation:

##   #of total, case, and control subjects at t0:  683 239 444

## 
##   Reclassification Table for all subjects:

##         New
## Standard < 0.2 < 0.4 >= 0.4
##   < 0.2    428     2      0
##   < 0.4      2     5      1
##   >= 0.4     0     1    244

## 
##   Reclassification Table for case:

##         New
## Standard < 0.2 < 0.4 >= 0.4
##   < 0.2      2     0      0
##   < 0.4      1     2      1
##   >= 0.4     0     1    232

## 
##   Reclassification Table for control:

##         New
## Standard < 0.2 < 0.4 >= 0.4
##   < 0.2    426     2      0
##   < 0.4      1     3      0
##   >= 0.4     0     0     12

## 
## NRI estimation:

## Point estimates:

##                   Estimate
## NRI           -0.006436353
## NRI+          -0.004184100
## NRI-          -0.002252252
## Pr(Up|Case)    0.004184100
## Pr(Down|Case)  0.008368201
## Pr(Down|Ctrl)  0.002252252
## Pr(Up|Ctrl)    0.004504505

## 
## Now in bootstrap..

## 
## Point & Interval estimates:

##                   Estimate   Std.Error       Lower       Upper
## NRI           -0.006436353 0.010419128 -0.02897500 0.012466155
## NRI+          -0.004184100 0.008744322 -0.02489627 0.008869223
## NRI-          -0.002252252 0.005363130 -0.01141558 0.009270300
## Pr(Up|Case)    0.004184100 0.004747980  0.00000000 0.016494915
## Pr(Down|Case)  0.008368201 0.008396143  0.00000000 0.030501234
## Pr(Down|Ctrl)  0.002252252 0.003906260  0.00000000 0.013274336
## Pr(Up|Ctrl)    0.004504505 0.004610237  0.00000000 0.015927211

classfication<-result$nri
classfication$z<-abs(classfication$Estimate/classfication$Std.Error)
classfication$pvalue<-(1-pnorm(classfication$z))*2

##新模型较旧模型重分类正确的比例提高(NRI)-0.64%，p=0.5562935
##阳性结局中新模型较旧模型重分类正确的比例提高(NRI)-0.42%，p=0.6346839
##阴性结局中新模型较旧模型重分类正确的比例提高(NRI)-0.22%，p=0.6828429

##无分类cut=0-1之间任何固定数
result2<-nribin(mdl.std= modle1, mdl.new = modle2, 
                cut=0.02,#认为当预测的风险在新旧模型中相差2%时，即被认为是重新分类了
                niter = 1000, updown = 'category')

## 
## UP and DOWN calculation:

##   #of total, case, and control subjects at t0:  683 239 444

## 
##   Reclassification Table for all subjects:

##          New
## Standard  < 0.02 >= 0.02
##   < 0.02     368       2
##   >= 0.02     11     302

## 
##   Reclassification Table for case:

##          New
## Standard  < 0.02 >= 0.02
##   < 0.02       0       0
##   >= 0.02      0     239

## 
##   Reclassification Table for control:

##          New
## Standard  < 0.02 >= 0.02
##   < 0.02     368       2
##   >= 0.02     11      63

## 
## NRI estimation:

## Point estimates:

##                  Estimate
## NRI           0.020270270
## NRI+          0.000000000
## NRI-          0.020270270
## Pr(Up|Case)   0.000000000
## Pr(Down|Case) 0.000000000
## Pr(Down|Ctrl) 0.024774775
## Pr(Up|Ctrl)   0.004504505

## 
## Now in bootstrap..

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## 
## Point & Interval estimates:

##                  Estimate    Std.Error        Lower       Upper
## NRI           0.020270270 0.0478373437 -0.154622568 0.031531691
## NRI+          0.000000000 0.0012882221 -0.004264412 0.000000000
## NRI-          0.020270270 0.0478386135 -0.154370148 0.031531691
## Pr(Up|Case)   0.000000000 0.0006802433  0.000000000 0.000000000
## Pr(Down|Case) 0.000000000 0.0010703749  0.000000000 0.004264412
## Pr(Down|Ctrl) 0.024774775 0.0142090886  0.000000000 0.044374121
## Pr(Up|Ctrl)   0.004504505 0.0444885224  0.000000000 0.159861111

classfication2<-result2$nri
classfication2$z<-abs(classfication2$Estimate/classfication2$Std.Error)
classfication2$pvalue<-(1-pnorm(classfication2$z))*2


##新模型较旧模型重分类正确的比例提高(NRI)2%，p=0.66520067
##阳性结局中新模型较旧模型重分类正确的比例提高(NRI)0%，p=1
##阴性结局中新模型较旧模型重分类正确的比例提高(NRI)2%，p=0.66506415

##IDI计算
#predicted risk，分别计算两个模型的预测风险
library(PredictABEL)
p.std= modle1$fitted.values
p.new= modle2$fitted.values
data$status<-as.numeric(as.character(data$status))
demo<-as.matrix(data)
reclassification(data = data,cOutcome = 1,predrisk1 = p.std,predrisk2 =p.new, cutoff = c(seq(0,1,by=0.2)))

##  _________________________________________
##  
##      Reclassification table    
##  _________________________________________
## 
##  Outcome: absent 
##   
##              Updated Model
## Initial Model [0,0.2) [0.2,0.4) [0.4,0.6) [0.6,0.8) [0.8,1]  % reclassified
##     [0,0.2)       426         2         0         0       0               0
##     [0.2,0.4)       1         3         0         0       0              25
##     [0.4,0.6)       0         0         2         0       0               0
##     [0.6,0.8)       0         0         1         0       0             100
##     [0.8,1]         0         0         0         1       8              11
## 
##  
##  Outcome: present 
##   
##              Updated Model
## Initial Model [0,0.2) [0.2,0.4) [0.4,0.6) [0.6,0.8) [0.8,1]  % reclassified
##     [0,0.2)         2         0         0         0       0               0
##     [0.2,0.4)       1         2         1         0       0              50
##     [0.4,0.6)       0         1         6         1       0              25
##     [0.6,0.8)       0         0         1         7       3              36
##     [0.8,1]         0         0         0         2     212               1
## 
##  
##  Combined Data 
##   
##              Updated Model
## Initial Model [0,0.2) [0.2,0.4) [0.4,0.6) [0.6,0.8) [0.8,1]  % reclassified
##     [0,0.2)       428         2         0         0       0               0
##     [0.2,0.4)       2         5         1         0       0              38
##     [0.4,0.6)       0         1         8         1       0              20
##     [0.6,0.8)       0         0         2         7       3              42
##     [0.8,1]         0         0         0         3     220               1
##  _________________________________________
## 
##  NRI(Categorical) [95% CI]: 0.0023 [ -0.0255 - 0.03 ] ; p-value: 0.8736 
##  NRI(Continuous) [95% CI]: -0.9611 [ -1.0995 - -0.8227 ] ; p-value: 0 
##  IDI [95% CI]: -8e-04 [ -0.0048 - 0.0032 ] ; p-value: 0.7055

##IDI [95% CI]: -8e-04 [ -0.0048 - 0.0032 ] ; p-value: 0.7055

######方法8 标准校准曲线

rm(list = ls())
load("./data.Rdata")

##单因素logistic回归、逐步回归
formula<-as.formula(status ~ size + thickness + shape + nuclei + chromatin + 
    nucleoli + mitoses)

library(rms)
dd<-datadist(data)
options(datadist="dd")

fit<-lrm(formula,data = data,x = T,y = T)

call1<-calibrate(fit,method = "boot",B = 1000)

##单个展示
plot(call1,xlim=c(0,1.0),ylim=c(0,1.0))

## 
## n=683   Mean absolute error=0.022   Mean squared error=0.00154
## 0.9 Quantile of absolute error=0.03

plot(call1,
     xlim = c(0,1),
     xlab = "Predicted Probability",
     ylab = "Observed Probability",
     legend = FALSE,
     subtitles = FALSE)

## 
## n=683   Mean absolute error=0.022   Mean squared error=0.00154
## 0.9 Quantile of absolute error=0.03

abline(0,1,col = "black",lty = 2,lwd = 2)
lines(call1[,c("predy","calibrated.orig")], type = "l",lwd = 2,col="red",pch =16)
lines(call1[,c("predy","calibrated.corrected")], type = "l",lwd = 2,col="green",pch =16)
legend(0.55,0.35,
       c("Apparent","Ideal","Bias-corrected"),
       lty = c(2,1,1),
       lwd = c(2,1,1),
       col = c("black","red","green"),
       bty = "n") # "o"为加边框

library(riskRegression)

## riskRegression version 2021.10.10

## 
## 载入程辑包：'riskRegression'

## The following objects are masked from 'package:PredictABEL':
## 
##     plotCalibration, plotROC

fit2 <- glm(formula,data = data,family = binomial())
xb <- Score(list("fit"=fit2),formula= status~1,
            null.model = FALSE,
            conf.int = TRUE,
            plots = c("calibration","ROC"),
            metrics = c("auc","brier"),
            B=1000,M = 50, # bootstrap；每次抽样50
            data= data)
plotCalibration(xb,col="red")

##多个模型展示
##T检验/LASSO回归
formula1<-as.formula(status ~ size + epithelial_cell_size+thickness + shape +adhesion+ nuclei + chromatin + 
    nucleoli + mitoses)
##单因素logistic回归、逐步回归
formula2<-as.formula(status ~ size + thickness + shape + nuclei + chromatin + 
    nucleoli + mitoses)
fit1 = glm(formula1, data=data,family = binomial())
fit2 = glm(formula2, data=data,family = binomial())
xb <- Score(list("fit1"=fit1,
                 "fit2"=fit2),
            formula=status~1,
            null.model = FALSE,
            conf.int =TRUE,
            plots =c("calibration","ROC"),
            metrics = c("auc","brier"),
            B=1000,M=50,
            data=data)
plotCalibration(xb)

###C指数，用来评价模型一致性

fit<-lrm(formula,data = data,x = T,y = T)
fit

## Logistic Regression Model
##  
##  lrm(formula = formula, data = data, x = T, y = T)
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs           683    LR chi2     755.93    R2       0.922    C       0.993    
##   0            444    d.f.             7    g        5.172    Dxy     0.986    
##   1            239    Pr(> chi2) <0.0001    gr     176.250    gamma   0.986    
##  max |deriv| 1e-06                          gp       0.448    tau-a   0.449    
##                                             Brier    0.024                     
##  
##                Coef    S.E.   Wald Z Pr(>|Z|)
##  Intercept      3.8738 1.2260  3.16  0.0016  
##  size           0.4466 0.1633  2.74  0.0062  
##  thickness=Low -1.6101 0.5286 -3.05  0.0023  
##  shape=Low     -1.3560 0.6372 -2.13  0.0333  
##  nuclei=Low    -2.3269 0.5031 -4.62  <0.0001 
##  chromatin=Low -1.9142 0.5365 -3.57  0.0004  
##  nucleoli=Low  -1.2903 0.5165 -2.50  0.0125  
##  mitoses=Low   -1.4256 0.6585 -2.17  0.0304  
##

#c-index=0.993

###Hosmer-Lemeshow，拟合优度检验用来评价模型

library(ResourceSelection)
mod <- glm(formula,data = data, family=binomial)
hl <- hoslem.test(mod$y, fitted(mod), g=10)
hl

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  mod$y, fitted(mod)
## X-squared = 12.897, df = 8, p-value = 0.1154

logisitic regression

葛斌

2021/10/28

方法4 ROC曲线，AUC曲线面积越大越好， ##c-index（一致性指数）0.5-1之间，c-index=0.5–0.7为低度准确，0.71–0.9中等准确，