##临床预测模型包括临床诊断模型及临床预测模型。临床诊断模型可通过R语言实现,从大量的自变量中筛选出有意义的自变量,找到最高效的 ##临床诊断公式。
##本人将继续在此公众号发表R语言相关的统计、TCGA、GEO等相关数据处理,欢迎大家关注并转发给需要的人,谢谢。
knitr::include_graphics(path = './1.jpg')
###logisitic regression ##step1筛选变量 ##step2模型评价 ##step3模型验证
##使用数据来源于MASS包中的biopsy数据集,了解更多请??biopsy
rm(list = ls())
library(MASS)
data<-biopsy
##??biopsy
##查看数据
class(data)
## [1] "data.frame"
dim(data)
## [1] 699 11
str(data)
## 'data.frame': 699 obs. of 11 variables:
## $ ID : chr "1000025" "1002945" "1015425" "1016277" ...
## $ V1 : int 5 5 3 6 4 8 1 2 2 4 ...
## $ V2 : int 1 4 1 8 1 10 1 1 1 2 ...
## $ V3 : int 1 4 1 8 1 10 1 2 1 1 ...
## $ V4 : int 1 5 1 1 3 8 1 1 1 1 ...
## $ V5 : int 2 7 2 3 2 7 2 2 2 2 ...
## $ V6 : int 1 10 2 4 1 10 10 1 1 1 ...
## $ V7 : int 3 3 3 3 3 9 3 3 1 2 ...
## $ V8 : int 1 2 1 7 1 7 1 1 1 1 ...
## $ V9 : int 1 1 1 1 1 1 1 1 5 1 ...
## $ class: Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 1 1 1 ...
##数据整理 去NA
data<-data[,-1]
data<-na.omit(data)
##数据整理 更换列名
colnames(data)
## [1] "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8" "V9"
## [10] "class"
names(data)<-c("thickness","size","shape","adhesion","epithelial_cell_size","nuclei","chromatin"
,"nucleoli","mitoses","status")
##数据整理
table(data$status)
##
## benign malignant
## 444 239
data$status<-ifelse(data$status=="benign",0,1)
data$status<-factor(data$status)
class(data$status)
## [1] "factor"
library(dplyr)
##
## 载入程辑包:'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data<-select(data,status,size,epithelial_cell_size,everything())
data$thickness<-ifelse(data$thickness>=mean(data$thickness),"High","Low")
data$shape<-ifelse(data$shape>=mean(data$shape),"High","Low")
data$adhesion<-ifelse(data$adhesion>=mean(data$adhesion),"High","Low")
data$nuclei<-ifelse(data$nuclei>=mean(data$nuclei),"High","Low")
data$chromatin<-ifelse(data$chromatin>=mean(data$chromatin),"High","Low")
data$nucleoli<-ifelse(data$nucleoli>=mean(data$nucleoli),"High","Low")
data$mitoses<-ifelse(data$mitoses>=mean(data$mitoses),"High","Low")
str(data)
## 'data.frame': 683 obs. of 10 variables:
## $ status : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
## $ size : int 1 4 1 8 1 10 1 1 1 2 ...
## $ epithelial_cell_size: int 2 7 2 3 2 7 2 2 2 2 ...
## $ thickness : chr "High" "High" "Low" "High" ...
## $ shape : chr "Low" "High" "Low" "High" ...
## $ adhesion : chr "Low" "High" "Low" "Low" ...
## $ nuclei : chr "Low" "High" "Low" "High" ...
## $ chromatin : chr "Low" "Low" "Low" "Low" ...
## $ nucleoli : chr "Low" "Low" "Low" "High" ...
## $ mitoses : chr "Low" "Low" "Low" "Low" ...
## - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
## ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...
for (i in names(data)[c(2:3)]){data[,i] <- as.numeric(data[,i])}
for (i in names(data)[c(4:10)]) {data[,i] <- as.factor(data[,i])}
str(data)
## 'data.frame': 683 obs. of 10 variables:
## $ status : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
## $ size : num 1 4 1 8 1 10 1 1 1 2 ...
## $ epithelial_cell_size: num 2 7 2 3 2 7 2 2 2 2 ...
## $ thickness : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
## $ shape : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
## $ adhesion : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
## $ nuclei : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
## $ chromatin : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
## $ nucleoli : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
## $ mitoses : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
## - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
## ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...
save(data,file = "data.Rdata")
##step1筛选变量
#方法1 使用常见的检验方法(T、卡方)
rm(list = ls())
load('./data.Rdata')
names(data)
## [1] "status" "size" "epithelial_cell_size"
## [4] "thickness" "shape" "adhesion"
## [7] "nuclei" "chromatin" "nucleoli"
## [10] "mitoses"
##连续自变量(数值变量)
x1<-c("size","epithelial_cell_size")
##分类变量
x2<-c("thickness","shape","adhesion","nuclei","chromatin","nucleoli","mitoses")
library(tableone)
table1<-CreateTableOne(vars = c(x1,x2),strata = "status",data = data,
factorVars = x2,test = T,addOverall = T )
result1<-print(table1,showAllLevels = F)
## Stratified by status
## Overall 0 1
## n 683 444 239
## size (mean (SD)) 3.15 (3.07) 1.31 (0.86) 6.58 (2.72)
## epithelial_cell_size (mean (SD)) 3.23 (2.22) 2.11 (0.88) 5.33 (2.44)
## thickness = Low (%) 372 (54.5) 341 (76.8) 31 (13.0)
## shape = Low (%) 457 (66.9) 425 (95.7) 32 (13.4)
## adhesion = Low (%) 451 (66.0) 400 (90.1) 51 (21.3)
## nuclei = Low (%) 460 (67.3) 422 (95.0) 38 (15.9)
## chromatin = Low (%) 471 (69.0) 426 (95.9) 45 (18.8)
## nucleoli = Low (%) 468 (68.5) 421 (94.8) 47 (19.7)
## mitoses = Low (%) 563 (82.4) 431 (97.1) 132 (55.2)
## Stratified by status
## p test
## n
## size (mean (SD)) <0.001
## epithelial_cell_size (mean (SD)) <0.001
## thickness = Low (%) <0.001
## shape = Low (%) <0.001
## adhesion = Low (%) <0.001
## nuclei = Low (%) <0.001
## chromatin = Low (%) <0.001
## nucleoli = Low (%) <0.001
## mitoses = Low (%) <0.001
result1<-print(table1,showAllLevels = T)
## Stratified by status
## level Overall 0 1
## n 683 444 239
## size (mean (SD)) 3.15 (3.07) 1.31 (0.86) 6.58 (2.72)
## epithelial_cell_size (mean (SD)) 3.23 (2.22) 2.11 (0.88) 5.33 (2.44)
## thickness (%) High 311 (45.5) 103 (23.2) 208 (87.0)
## Low 372 (54.5) 341 (76.8) 31 (13.0)
## shape (%) High 226 (33.1) 19 ( 4.3) 207 (86.6)
## Low 457 (66.9) 425 (95.7) 32 (13.4)
## adhesion (%) High 232 (34.0) 44 ( 9.9) 188 (78.7)
## Low 451 (66.0) 400 (90.1) 51 (21.3)
## nuclei (%) High 223 (32.7) 22 ( 5.0) 201 (84.1)
## Low 460 (67.3) 422 (95.0) 38 (15.9)
## chromatin (%) High 212 (31.0) 18 ( 4.1) 194 (81.2)
## Low 471 (69.0) 426 (95.9) 45 (18.8)
## nucleoli (%) High 215 (31.5) 23 ( 5.2) 192 (80.3)
## Low 468 (68.5) 421 (94.8) 47 (19.7)
## mitoses (%) High 120 (17.6) 13 ( 2.9) 107 (44.8)
## Low 563 (82.4) 431 (97.1) 132 (55.2)
## Stratified by status
## p test
## n
## size (mean (SD)) <0.001
## epithelial_cell_size (mean (SD)) <0.001
## thickness (%) <0.001
##
## shape (%) <0.001
##
## adhesion (%) <0.001
##
## nuclei (%) <0.001
##
## chromatin (%) <0.001
##
## nucleoli (%) <0.001
##
## mitoses (%) <0.001
##
###同时也可以设定某个变量的计算方式
nonnormalvars = c("size")##非参数检验变量
exactvars <- c("epithelial_cell_size")##确切检验变量
result1<-print(table1,nonnormal = nonnormalvars, #指定非正态连续变量
exact = exactvars, #指定需要Fisher确切法统计的变量
showAllLevels = T, #TRUE则显示所有分类变量水平的频数和百分比
noSpaces = TRUE, #删除用于对齐的空格
printToggle = T) #不展示输出结果
## Stratified by status
## level Overall 0
## n 683 444
## size (median [IQR]) 1.00 [1.00, 5.00] 1.00 [1.00, 1.00]
## epithelial_cell_size (mean (SD)) 3.23 (2.22) 2.11 (0.88)
## thickness (%) High 311 (45.5) 103 (23.2)
## Low 372 (54.5) 341 (76.8)
## shape (%) High 226 (33.1) 19 (4.3)
## Low 457 (66.9) 425 (95.7)
## adhesion (%) High 232 (34.0) 44 (9.9)
## Low 451 (66.0) 400 (90.1)
## nuclei (%) High 223 (32.7) 22 (5.0)
## Low 460 (67.3) 422 (95.0)
## chromatin (%) High 212 (31.0) 18 (4.1)
## Low 471 (69.0) 426 (95.9)
## nucleoli (%) High 215 (31.5) 23 (5.2)
## Low 468 (68.5) 421 (94.8)
## mitoses (%) High 120 (17.6) 13 (2.9)
## Low 563 (82.4) 431 (97.1)
## Stratified by status
## 1 p test
## n 239
## size (median [IQR]) 6.00 [4.00, 10.00] <0.001 nonnorm
## epithelial_cell_size (mean (SD)) 5.33 (2.44) <0.001
## thickness (%) 208 (87.0) <0.001
## 31 (13.0)
## shape (%) 207 (86.6) <0.001
## 32 (13.4)
## adhesion (%) 188 (78.7) <0.001
## 51 (21.3)
## nuclei (%) 201 (84.1) <0.001
## 38 (15.9)
## chromatin (%) 194 (81.2) <0.001
## 45 (18.8)
## nucleoli (%) 192 (80.3) <0.001
## 47 (19.7)
## mitoses (%) 107 (44.8) <0.001
## 132 (55.2)
###由以上结果可以看出9个变量均有统计学差异,因此都可纳入模型验证。
###方法2 单因素logistic回归分析
rm(list = ls())
load("./data.Rdata")
library(rms)
## 载入需要的程辑包:Hmisc
## 载入需要的程辑包:lattice
## 载入需要的程辑包:survival
## 载入需要的程辑包:Formula
## 载入需要的程辑包:ggplot2
##
## 载入程辑包:'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
## 载入需要的程辑包:SparseM
##
## 载入程辑包:'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
modle<-glm(status~.,data = data,family = binomial())
summary(modle)
##
## Call:
## glm(formula = status ~ ., family = binomial(), data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.4566 -0.1030 -0.0817 0.0589 2.4288
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.12476 1.36180 3.029 0.00245 **
## size 0.36416 0.16403 2.220 0.02642 *
## epithelial_cell_size 0.09753 0.14239 0.685 0.49340
## thicknessLow -1.53354 0.53706 -2.855 0.00430 **
## shapeLow -1.27029 0.64410 -1.972 0.04859 *
## adhesionLow -0.67813 0.54362 -1.247 0.21224
## nucleiLow -2.22493 0.50767 -4.383 1.17e-05 ***
## chromatinLow -1.91294 0.53155 -3.599 0.00032 ***
## nucleoliLow -1.12753 0.52714 -2.139 0.03244 *
## mitosesLow -1.63792 0.68457 -2.393 0.01673 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 884.35 on 682 degrees of freedom
## Residual deviance: 126.18 on 673 degrees of freedom
## AIC: 146.18
##
## Number of Fisher Scoring iterations: 7
modle$coefficients
## (Intercept) size epithelial_cell_size
## 4.12476237 0.36415903 0.09752513
## thicknessLow shapeLow adhesionLow
## -1.53353629 -1.27029329 -0.67812671
## nucleiLow chromatinLow nucleoliLow
## -2.22492599 -1.91294051 -1.12752674
## mitosesLow
## -1.63792441
exp(cbind("OR"=coef(modle),confint(modle)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 61.8531093 4.41857754 947.9079276
## size 1.4393031 1.07466010 2.0589840
## epithelial_cell_size 1.1024391 0.83632218 1.4672771
## thicknessLow 0.2157713 0.07256867 0.6110435
## shapeLow 0.2807493 0.08020564 1.0109500
## adhesionLow 0.5075669 0.17741815 1.5259506
## nucleiLow 0.1080754 0.03906304 0.2907083
## chromatinLow 0.1476456 0.05091027 0.4167445
## nucleoliLow 0.3238332 0.11535887 0.9263018
## mitosesLow 0.1943831 0.04823036 0.7187466
##AIC: 146.18 ##解释,size每增加1cm,恶性风险增加1.4393031倍。 ##thickness Low是high组恶性风险的0.2157713倍。
colnames(data)
## [1] "status" "size" "epithelial_cell_size"
## [4] "thickness" "shape" "adhesion"
## [7] "nuclei" "chromatin" "nucleoli"
## [10] "mitoses"
modlenew<-glm(status~size+thickness+shape+nuclei+chromatin+nucleoli+mitoses,data = data,family = binomial())
summary(modlenew)
##
## Call:
## glm(formula = status ~ size + thickness + shape + nuclei + chromatin +
## nucleoli + mitoses, family = binomial(), data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.3655 -0.1072 -0.0858 0.0667 2.5741
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.8738 1.2259 3.160 0.00158 **
## size 0.4466 0.1633 2.735 0.00623 **
## thicknessLow -1.6101 0.5286 -3.046 0.00232 **
## shapeLow -1.3560 0.6372 -2.128 0.03333 *
## nucleiLow -2.3269 0.5031 -4.625 3.75e-06 ***
## chromatinLow -1.9142 0.5365 -3.568 0.00036 ***
## nucleoliLow -1.2903 0.5165 -2.498 0.01249 *
## mitosesLow -1.4256 0.6585 -2.165 0.03038 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 884.35 on 682 degrees of freedom
## Residual deviance: 128.42 on 675 degrees of freedom
## AIC: 144.42
##
## Number of Fisher Scoring iterations: 7
modlenew$coefficients
## (Intercept) size thicknessLow shapeLow nucleiLow chromatinLow
## 3.8737617 0.4465546 -1.6101132 -1.3560239 -2.3268944 -1.9142429
## nucleoliLow mitosesLow
## -1.2902597 -1.4256451
exp(cbind("OR"=coef(modlenew),confint(modlenew)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 48.12307098 4.70500122 580.5895696
## size 1.56291798 1.16995577 2.2243591
## thicknessLow 0.19986500 0.06823304 0.5555434
## shapeLow 0.25768332 0.07410375 0.9089832
## nucleiLow 0.09759838 0.03555500 0.2598215
## chromatinLow 0.14745343 0.05040969 0.4199752
## nucleoliLow 0.27519932 0.09991303 0.7694625
## mitosesLow 0.24035335 0.06326596 0.8505815
##AIC: 144.42,较前下降
##如果已知某自变量x与应变量Y的关系,那其他的自变量如何纳入研究呢?
rm(list = ls())
load("./data.Rdata")
#colnames(data)
##连续自变量(数值变量)
#x1<-c("size","epithelial_cell_size")
##分类变量
#x2<-c("thickness","shape","adhesion","nuclei","chromatin","nucleoli","mitoses")
#假设已知size与status关系,需要研究其他的变量
trans<-function(x){
modle1<-glm(status~size,data = data,family = binomial())
coef1<-coef(modle1)[2]
form<-as.formula(paste0("status~size+",x))
modle2<-glm(form,data = data,family = binomial())
coef2<-coef(modle2)[2]
ratio<-abs(coef2-coef1)/coef1
if (ratio>0.1) {return(x)}
}
x<-colnames(data)
x<-x[-which(x==c("status","size"))]
#x<-as.character(x)
lapply(x,trans)
## [[1]]
## [1] "epithelial_cell_size"
##
## [[2]]
## [1] "thickness"
##
## [[3]]
## [1] "shape"
##
## [[4]]
## [1] "adhesion"
##
## [[5]]
## [1] "nuclei"
##
## [[6]]
## [1] "chromatin"
##
## [[7]]
## [1] "nucleoli"
##
## [[8]]
## NULL
##依据得到的结果可继续使用多因素逻辑回归
#"epithelial_cell_size" "thickness" "shape" "adhesion" "nuclei" "chromatin" "nucleoli"
###方法3 LASSO回归分析,一般临床少量自变量可不采用LASSO回归,而如果是组学数据,由于存在共线性的问题可选用LASSO回归,具有筛选变量的功能。
rm(list = ls())
library(MASS)
library(dplyr)
data<-biopsy%>%
na.omit()%>%
select(-1)
colnames(data)<-c("thickness","size","shape","adhesion","epithelial_cell_size","nuclei","chromatin"
,"nucleoli","mitoses","status")
data<-select(data,"status","size","epithelial_cell_size",everything())
data$thickness<-ifelse(data$thickness>=mean(data$thickness),1,0)
data$shape<-ifelse(data$shape>=mean(data$shape),1,0)
data$adhesion<-ifelse(data$adhesion>=mean(data$adhesion),1,0)
data$nuclei<-ifelse(data$nuclei>=mean(data$nuclei),1,0)
data$chromatin<-ifelse(data$chromatin>=mean(data$chromatin),1,0)
data$nucleoli<-ifelse(data$nucleoli>=mean(data$nucleoli),1,0)
data$mitoses<-ifelse(data$mitoses>=mean(data$mitoses),1,0)
library(glmnet)##LASS0/弹性网络
## 载入需要的程辑包:Matrix
## Loaded glmnet 4.1-2
library(caret)
##
## 载入程辑包:'caret'
## The following object is masked from 'package:survival':
##
## cluster
x<-as.matrix(data[,2:10])
y<-as.matrix(data[,1])
lasso<-glmnet(x,y,family = "binomial",
alpha = 1,#1为LASSO回归,0为岭回归
nlambda = 1000)
View(print(lasso)) #停止在580
##
## Call: glmnet(x = x, y = y, family = "binomial", alpha = 1, nlambda = 1000)
##
## Df %Dev Lambda
## 1 0 0.00 0.39800
## 2 1 0.98 0.39440
## 3 1 1.94 0.39080
## 4 1 2.88 0.38720
## 5 1 3.80 0.38360
## 6 1 4.70 0.38010
## 7 1 5.58 0.37660
## 8 1 6.45 0.37320
## 9 1 7.30 0.36970
## 10 1 8.13 0.36630
## 11 1 8.94 0.36300
## 12 2 9.81 0.35960
## 13 2 10.67 0.35630
## 14 2 11.51 0.35310
## 15 2 12.34 0.34980
## 16 2 13.15 0.34660
## 17 3 13.98 0.34340
## 18 3 14.84 0.34030
## 19 3 15.69 0.33720
## 20 3 16.53 0.33410
## 21 3 17.35 0.33100
## 22 3 18.15 0.32800
## 23 3 18.94 0.32500
## 24 3 19.72 0.32200
## 25 3 20.48 0.31900
## 26 3 21.23 0.31610
## 27 3 21.97 0.31320
## 28 4 22.72 0.31030
## 29 4 23.45 0.30750
## 30 4 24.19 0.30460
## 31 4 24.91 0.30190
## 32 4 25.61 0.29910
## 33 4 26.31 0.29630
## 34 4 27.00 0.29360
## 35 4 27.67 0.29090
## 36 4 28.33 0.28830
## 37 4 28.99 0.28560
## 38 4 29.63 0.28300
## 39 4 30.27 0.28040
## 40 4 30.89 0.27780
## 41 4 31.51 0.27530
## 42 4 32.11 0.27270
## 43 4 32.71 0.27020
## 44 4 33.30 0.26780
## 45 4 33.88 0.26530
## 46 4 34.45 0.26290
## 47 4 35.01 0.26050
## 48 4 35.56 0.25810
## 49 4 36.11 0.25570
## 50 4 36.65 0.25330
## 51 4 37.18 0.25100
## 52 4 37.71 0.24870
## 53 4 38.22 0.24640
## 54 4 38.73 0.24420
## 55 4 39.23 0.24190
## 56 4 39.73 0.23970
## 57 5 40.23 0.23750
## 58 5 40.72 0.23530
## 59 5 41.21 0.23320
## 60 5 41.70 0.23100
## 61 5 42.18 0.22890
## 62 5 42.65 0.22680
## 63 5 43.12 0.22470
## 64 5 43.58 0.22270
## 65 5 44.04 0.22060
## 66 5 44.48 0.21860
## 67 5 44.93 0.21660
## 68 5 45.36 0.21460
## 69 5 45.80 0.21260
## 70 5 46.22 0.21070
## 71 5 46.64 0.20880
## 72 5 47.06 0.20680
## 73 5 47.47 0.20490
## 74 5 47.87 0.20310
## 75 5 48.27 0.20120
## 76 5 48.67 0.19930
## 77 5 49.06 0.19750
## 78 5 49.44 0.19570
## 79 5 49.82 0.19390
## 80 5 50.20 0.19210
## 81 5 50.57 0.19040
## 82 5 50.93 0.18860
## 83 5 51.29 0.18690
## 84 5 51.65 0.18520
## 85 5 52.01 0.18350
## 86 5 52.35 0.18180
## 87 5 52.70 0.18010
## 88 5 53.04 0.17850
## 89 5 53.38 0.17680
## 90 5 53.71 0.17520
## 91 5 54.04 0.17360
## 92 5 54.36 0.17200
## 93 5 54.68 0.17040
## 94 5 55.00 0.16890
## 95 5 55.31 0.16730
## 96 5 55.62 0.16580
## 97 5 55.93 0.16430
## 98 5 56.23 0.16280
## 99 5 56.53 0.16130
## 100 5 56.83 0.15980
## 101 5 57.12 0.15830
## 102 5 57.41 0.15690
## 103 5 57.69 0.15540
## 104 5 57.97 0.15400
## 105 5 58.25 0.15260
## 106 5 58.53 0.15120
## 107 5 58.80 0.14980
## 108 5 59.07 0.14840
## 109 5 59.34 0.14710
## 110 5 59.60 0.14570
## 111 5 59.86 0.14440
## 112 5 60.12 0.14300
## 113 5 60.37 0.14170
## 114 5 60.63 0.14040
## 115 5 60.88 0.13910
## 116 5 61.12 0.13790
## 117 5 61.37 0.13660
## 118 5 61.61 0.13530
## 119 5 61.85 0.13410
## 120 5 62.08 0.13290
## 121 5 62.31 0.13170
## 122 5 62.54 0.13040
## 123 5 62.77 0.12920
## 124 5 63.00 0.12810
## 125 5 63.22 0.12690
## 126 5 63.44 0.12570
## 127 5 63.66 0.12460
## 128 5 63.88 0.12340
## 129 5 64.09 0.12230
## 130 5 64.30 0.12120
## 131 5 64.51 0.12010
## 132 5 64.72 0.11900
## 133 5 64.92 0.11790
## 134 5 65.12 0.11680
## 135 5 65.32 0.11570
## 136 5 65.52 0.11460
## 137 5 65.72 0.11360
## 138 5 65.91 0.11260
## 139 5 66.10 0.11150
## 140 5 66.29 0.11050
## 141 5 66.48 0.10950
## 142 5 66.66 0.10850
## 143 5 66.85 0.10750
## 144 5 67.03 0.10650
## 145 5 67.21 0.10550
## 146 5 67.39 0.10460
## 147 5 67.56 0.10360
## 148 5 67.74 0.10260
## 149 5 67.91 0.10170
## 150 5 68.08 0.10080
## 151 5 68.25 0.09984
## 152 5 68.41 0.09893
## 153 5 68.58 0.09802
## 154 5 68.74 0.09712
## 155 5 68.90 0.09623
## 156 5 69.06 0.09534
## 157 5 69.22 0.09447
## 158 5 69.38 0.09360
## 159 5 69.53 0.09274
## 160 5 69.69 0.09189
## 161 5 69.84 0.09105
## 162 5 69.99 0.09021
## 163 5 70.14 0.08939
## 164 5 70.28 0.08856
## 165 6 70.43 0.08775
## 166 6 70.59 0.08695
## 167 6 70.74 0.08615
## 168 6 70.89 0.08536
## 169 6 71.05 0.08457
## 170 6 71.20 0.08380
## 171 6 71.34 0.08303
## 172 6 71.49 0.08227
## 173 6 71.64 0.08151
## 174 6 71.78 0.08076
## 175 6 71.92 0.08002
## 176 6 72.06 0.07929
## 177 6 72.20 0.07856
## 178 6 72.34 0.07784
## 179 6 72.48 0.07713
## 180 6 72.61 0.07642
## 181 6 72.75 0.07572
## 182 6 72.88 0.07502
## 183 6 73.01 0.07433
## 184 6 73.14 0.07365
## 185 6 73.27 0.07298
## 186 6 73.39 0.07231
## 187 6 73.52 0.07164
## 188 6 73.64 0.07098
## 189 6 73.77 0.07033
## 190 6 73.89 0.06969
## 191 6 74.01 0.06905
## 192 6 74.13 0.06841
## 193 6 74.25 0.06779
## 194 6 74.36 0.06716
## 195 6 74.48 0.06655
## 196 6 74.59 0.06594
## 197 6 74.71 0.06533
## 198 6 74.82 0.06473
## 199 6 74.93 0.06414
## 200 6 75.04 0.06355
## 201 6 75.15 0.06297
## 202 6 75.26 0.06239
## 203 6 75.36 0.06182
## 204 6 75.47 0.06125
## 205 6 75.57 0.06069
## 206 6 75.67 0.06013
## 207 6 75.78 0.05958
## 208 6 75.88 0.05903
## 209 6 75.98 0.05849
## 210 6 76.08 0.05795
## 211 7 76.18 0.05742
## 212 7 76.27 0.05689
## 213 7 76.37 0.05637
## 214 7 76.47 0.05585
## 215 7 76.56 0.05534
## 216 7 76.66 0.05483
## 217 7 76.75 0.05433
## 218 7 76.84 0.05383
## 219 7 76.94 0.05334
## 220 7 77.03 0.05285
## 221 7 77.12 0.05236
## 222 7 77.21 0.05188
## 223 7 77.29 0.05141
## 224 7 77.38 0.05094
## 225 7 77.47 0.05047
## 226 7 77.55 0.05001
## 227 7 77.64 0.04955
## 228 7 77.72 0.04909
## 229 8 77.81 0.04864
## 230 8 77.89 0.04819
## 231 8 77.98 0.04775
## 232 8 78.06 0.04731
## 233 8 78.15 0.04688
## 234 8 78.23 0.04645
## 235 8 78.31 0.04602
## 236 8 78.39 0.04560
## 237 8 78.48 0.04518
## 238 8 78.56 0.04477
## 239 8 78.63 0.04436
## 240 8 78.71 0.04395
## 241 8 78.79 0.04355
## 242 8 78.87 0.04315
## 243 8 78.94 0.04275
## 244 8 79.02 0.04236
## 245 8 79.09 0.04197
## 246 8 79.17 0.04158
## 247 8 79.24 0.04120
## 248 8 79.31 0.04082
## 249 8 79.38 0.04045
## 250 8 79.45 0.04008
## 251 8 79.52 0.03971
## 252 8 79.59 0.03935
## 253 8 79.66 0.03899
## 254 8 79.73 0.03863
## 255 8 79.80 0.03827
## 256 8 79.86 0.03792
## 257 8 79.93 0.03757
## 258 8 80.00 0.03723
## 259 8 80.06 0.03689
## 260 8 80.12 0.03655
## 261 8 80.19 0.03621
## 262 8 80.25 0.03588
## 263 8 80.31 0.03555
## 264 8 80.37 0.03523
## 265 8 80.43 0.03490
## 266 8 80.49 0.03458
## 267 8 80.55 0.03426
## 268 8 80.61 0.03395
## 269 8 80.67 0.03364
## 270 8 80.73 0.03333
## 271 8 80.79 0.03302
## 272 8 80.84 0.03272
## 273 8 80.90 0.03242
## 274 8 80.95 0.03212
## 275 8 81.01 0.03183
## 276 8 81.06 0.03154
## 277 8 81.12 0.03125
## 278 8 81.17 0.03096
## 279 8 81.22 0.03068
## 280 8 81.27 0.03039
## 281 8 81.33 0.03012
## 282 8 81.38 0.02984
## 283 8 81.43 0.02957
## 284 8 81.48 0.02929
## 285 8 81.53 0.02903
## 286 8 81.57 0.02876
## 287 8 81.62 0.02850
## 288 8 81.67 0.02823
## 289 8 81.72 0.02797
## 290 8 81.76 0.02772
## 291 8 81.81 0.02746
## 292 8 81.86 0.02721
## 293 8 81.90 0.02696
## 294 8 81.95 0.02671
## 295 8 81.99 0.02647
## 296 8 82.04 0.02623
## 297 8 82.08 0.02599
## 298 8 82.12 0.02575
## 299 8 82.16 0.02551
## 300 8 82.21 0.02528
## 301 8 82.25 0.02504
## 302 8 82.29 0.02481
## 303 8 82.33 0.02459
## 304 8 82.37 0.02436
## 305 8 82.41 0.02414
## 306 8 82.45 0.02392
## 307 8 82.49 0.02370
## 308 8 82.53 0.02348
## 309 9 82.57 0.02326
## 310 9 82.60 0.02305
## 311 9 82.64 0.02284
## 312 9 82.68 0.02263
## 313 9 82.72 0.02242
## 314 9 82.75 0.02222
## 315 9 82.79 0.02201
## 316 9 82.83 0.02181
## 317 9 82.86 0.02161
## 318 9 82.90 0.02141
## 319 9 82.93 0.02121
## 320 9 82.97 0.02102
## 321 9 83.00 0.02083
## 322 9 83.03 0.02064
## 323 9 83.07 0.02045
## 324 9 83.10 0.02026
## 325 9 83.13 0.02007
## 326 9 83.16 0.01989
## 327 9 83.20 0.01971
## 328 9 83.23 0.01953
## 329 9 83.26 0.01935
## 330 9 83.29 0.01917
## 331 9 83.32 0.01899
## 332 9 83.35 0.01882
## 333 9 83.38 0.01865
## 334 9 83.41 0.01847
## 335 9 83.44 0.01831
## 336 9 83.47 0.01814
## 337 9 83.49 0.01797
## 338 9 83.52 0.01781
## 339 9 83.55 0.01764
## 340 9 83.58 0.01748
## 341 9 83.61 0.01732
## 342 9 83.63 0.01716
## 343 9 83.66 0.01700
## 344 9 83.69 0.01685
## 345 9 83.71 0.01669
## 346 9 83.74 0.01654
## 347 9 83.76 0.01639
## 348 9 83.79 0.01624
## 349 9 83.81 0.01609
## 350 9 83.84 0.01594
## 351 9 83.86 0.01579
## 352 9 83.89 0.01565
## 353 9 83.91 0.01551
## 354 9 83.93 0.01536
## 355 9 83.96 0.01522
## 356 9 83.98 0.01508
## 357 9 84.00 0.01494
## 358 9 84.02 0.01481
## 359 9 84.05 0.01467
## 360 9 84.07 0.01454
## 361 9 84.09 0.01440
## 362 9 84.11 0.01427
## 363 9 84.13 0.01414
## 364 9 84.15 0.01401
## 365 9 84.17 0.01388
## 366 9 84.19 0.01375
## 367 9 84.22 0.01363
## 368 9 84.24 0.01350
## 369 9 84.25 0.01338
## 370 9 84.27 0.01326
## 371 9 84.29 0.01314
## 372 9 84.31 0.01301
## 373 9 84.33 0.01290
## 374 9 84.35 0.01278
## 375 9 84.37 0.01266
## 376 9 84.39 0.01254
## 377 9 84.40 0.01243
## 378 9 84.42 0.01231
## 379 9 84.44 0.01220
## 380 9 84.46 0.01209
## 381 9 84.47 0.01198
## 382 9 84.49 0.01187
## 383 9 84.51 0.01176
## 384 9 84.52 0.01165
## 385 9 84.54 0.01154
## 386 9 84.56 0.01144
## 387 9 84.57 0.01133
## 388 9 84.59 0.01123
## 389 9 84.60 0.01113
## 390 9 84.62 0.01102
## 391 9 84.64 0.01092
## 392 9 84.65 0.01082
## 393 9 84.67 0.01072
## 394 9 84.68 0.01063
## 395 9 84.69 0.01053
## 396 9 84.71 0.01043
## 397 9 84.72 0.01034
## 398 9 84.74 0.01024
## 399 9 84.75 0.01015
## 400 9 84.76 0.01005
## 401 9 84.78 0.00996
## 402 9 84.79 0.00987
## 403 9 84.80 0.00978
## 404 9 84.82 0.00969
## 405 9 84.83 0.00960
## 406 9 84.84 0.00951
## 407 9 84.85 0.00942
## 408 9 84.87 0.00934
## 409 9 84.88 0.00925
## 410 9 84.89 0.00917
## 411 9 84.90 0.00908
## 412 9 84.91 0.00900
## 413 9 84.93 0.00892
## 414 9 84.94 0.00884
## 415 9 84.95 0.00876
## 416 9 84.96 0.00868
## 417 9 84.97 0.00860
## 418 9 84.98 0.00852
## 419 9 84.99 0.00844
## 420 9 85.00 0.00836
## 421 9 85.01 0.00828
## 422 9 85.02 0.00821
## 423 9 85.03 0.00813
## 424 9 85.04 0.00806
## 425 9 85.05 0.00798
## 426 9 85.06 0.00791
## 427 9 85.07 0.00784
## 428 9 85.08 0.00777
## 429 9 85.09 0.00769
## 430 9 85.10 0.00762
## 431 9 85.11 0.00755
## 432 9 85.12 0.00749
## 433 9 85.13 0.00742
## 434 9 85.14 0.00735
## 435 9 85.14 0.00728
## 436 9 85.15 0.00721
## 437 9 85.16 0.00715
## 438 9 85.17 0.00708
## 439 9 85.18 0.00702
## 440 9 85.19 0.00695
## 441 9 85.19 0.00689
## 442 9 85.20 0.00683
## 443 9 85.21 0.00676
## 444 9 85.22 0.00670
## 445 9 85.23 0.00664
## 446 9 85.23 0.00658
## 447 9 85.24 0.00652
## 448 9 85.25 0.00646
## 449 9 85.25 0.00640
## 450 9 85.26 0.00634
## 451 9 85.27 0.00628
## 452 9 85.28 0.00622
## 453 9 85.28 0.00617
## 454 9 85.29 0.00611
## 455 9 85.30 0.00605
## 456 9 85.30 0.00600
## 457 9 85.31 0.00594
## 458 9 85.32 0.00589
## 459 9 85.32 0.00584
## 460 9 85.33 0.00578
## 461 9 85.33 0.00573
## 462 9 85.34 0.00568
## 463 9 85.35 0.00562
## 464 9 85.35 0.00557
## 465 9 85.36 0.00552
## 466 9 85.36 0.00547
## 467 9 85.37 0.00542
## 468 9 85.37 0.00537
## 469 9 85.38 0.00532
## 470 9 85.38 0.00527
## 471 9 85.39 0.00522
## 472 9 85.40 0.00518
## 473 9 85.40 0.00513
## 474 9 85.41 0.00508
## 475 9 85.41 0.00504
## 476 9 85.42 0.00499
## 477 9 85.42 0.00494
## 478 9 85.43 0.00490
## 479 9 85.43 0.00485
## 480 9 85.43 0.00481
## 481 9 85.44 0.00476
## 482 9 85.44 0.00472
## 483 9 85.45 0.00468
## 484 9 85.45 0.00463
## 485 9 85.46 0.00459
## 486 9 85.46 0.00455
## 487 9 85.47 0.00451
## 488 9 85.47 0.00447
## 489 9 85.47 0.00442
## 490 9 85.48 0.00438
## 491 9 85.48 0.00434
## 492 9 85.49 0.00430
## 493 9 85.49 0.00426
## 494 9 85.49 0.00423
## 495 9 85.50 0.00419
## 496 9 85.50 0.00415
## 497 9 85.50 0.00411
## 498 9 85.51 0.00407
## 499 9 85.51 0.00404
## 500 9 85.51 0.00400
## 501 9 85.52 0.00396
## 502 9 85.52 0.00393
## 503 9 85.52 0.00389
## 504 9 85.53 0.00385
## 505 9 85.53 0.00382
## 506 9 85.53 0.00378
## 507 9 85.54 0.00375
## 508 9 85.54 0.00371
## 509 9 85.54 0.00368
## 510 9 85.55 0.00365
## 511 9 85.55 0.00361
## 512 9 85.55 0.00358
## 513 9 85.56 0.00355
## 514 9 85.56 0.00351
## 515 9 85.56 0.00348
## 516 9 85.56 0.00345
## 517 9 85.57 0.00342
## 518 9 85.57 0.00339
## 519 9 85.57 0.00336
## 520 9 85.57 0.00332
## 521 9 85.58 0.00330
## 522 9 85.58 0.00327
## 523 9 85.58 0.00324
## 524 9 85.58 0.00320
## 525 9 85.59 0.00318
## 526 9 85.59 0.00315
## 527 9 85.59 0.00312
## 528 9 85.59 0.00309
## 529 9 85.60 0.00306
## 530 9 85.60 0.00303
## 531 9 85.60 0.00300
## 532 9 85.60 0.00298
## 533 9 85.60 0.00295
## 534 9 85.61 0.00292
## 535 9 85.61 0.00290
## 536 9 85.61 0.00287
## 537 9 85.61 0.00284
## 538 9 85.61 0.00282
## 539 9 85.62 0.00279
## 540 9 85.62 0.00277
## 541 9 85.62 0.00274
## 542 9 85.62 0.00271
## 543 9 85.62 0.00269
## 544 9 85.63 0.00266
## 545 9 85.63 0.00264
## 546 9 85.63 0.00262
## 547 9 85.63 0.00259
## 548 9 85.63 0.00257
## 549 9 85.63 0.00254
## 550 9 85.64 0.00252
## 551 9 85.64 0.00250
## 552 9 85.64 0.00248
## 553 9 85.64 0.00245
## 554 9 85.64 0.00243
## 555 9 85.64 0.00241
## 556 9 85.64 0.00239
## 557 9 85.65 0.00236
## 558 9 85.65 0.00234
## 559 9 85.65 0.00232
## 560 9 85.65 0.00230
## 561 9 85.65 0.00228
## 562 9 85.65 0.00226
## 563 9 85.65 0.00224
## 564 9 85.66 0.00222
## 565 9 85.66 0.00220
## 566 9 85.66 0.00218
## 567 9 85.66 0.00216
## 568 9 85.66 0.00214
## 569 9 85.66 0.00212
## 570 9 85.66 0.00210
## 571 9 85.66 0.00208
## 572 9 85.67 0.00206
## 573 9 85.67 0.00204
## 574 9 85.67 0.00202
## 575 9 85.67 0.00200
## 576 9 85.67 0.00198
## 577 9 85.67 0.00197
## 578 9 85.67 0.00195
## 579 9 85.67 0.00193
## 580 9 85.67 0.00191
NROW(lasso$lambda)
## [1] 580
lasso$lambda[580]
## [1] 0.001912477
##最优lambda=0.001912477
plot(lasso)
plot(lasso,xvar = "lambda",label = T)
####最优lambda=0.001912477
lasso.coef<-coef(lasso,s=0.001912477)
lasso.coef
## 10 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) -5.85678193
## size 0.34354513
## epithelial_cell_size 0.08104042
## thickness 1.39801994
## shape 1.25179405
## adhesion 0.60230200
## nuclei 2.12058803
## chromatin 1.80059787
## nucleoli 1.08774083
## mitoses 1.42551555
##可以看出自变量都没有被筛选出来
##方法4逐步回归
rm(list = ls())
load('./data.Rdata')
library(MASS)
#首先建立一个模型
modle<-glm(status~.,data = data,family = binomial())
summary(modle)
##
## Call:
## glm(formula = status ~ ., family = binomial(), data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.4566 -0.1030 -0.0817 0.0589 2.4288
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.12476 1.36180 3.029 0.00245 **
## size 0.36416 0.16403 2.220 0.02642 *
## epithelial_cell_size 0.09753 0.14239 0.685 0.49340
## thicknessLow -1.53354 0.53706 -2.855 0.00430 **
## shapeLow -1.27029 0.64410 -1.972 0.04859 *
## adhesionLow -0.67813 0.54362 -1.247 0.21224
## nucleiLow -2.22493 0.50767 -4.383 1.17e-05 ***
## chromatinLow -1.91294 0.53155 -3.599 0.00032 ***
## nucleoliLow -1.12753 0.52714 -2.139 0.03244 *
## mitosesLow -1.63792 0.68457 -2.393 0.01673 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 884.35 on 682 degrees of freedom
## Residual deviance: 126.18 on 673 degrees of freedom
## AIC: 146.18
##
## Number of Fisher Scoring iterations: 7
exp(cbind("OR"=coef(modle),confint(modle)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 61.8531093 4.41857754 947.9079276
## size 1.4393031 1.07466010 2.0589840
## epithelial_cell_size 1.1024391 0.83632218 1.4672771
## thicknessLow 0.2157713 0.07256867 0.6110435
## shapeLow 0.2807493 0.08020564 1.0109500
## adhesionLow 0.5075669 0.17741815 1.5259506
## nucleiLow 0.1080754 0.03906304 0.2907083
## chromatinLow 0.1476456 0.05091027 0.4167445
## nucleoliLow 0.3238332 0.11535887 0.9263018
## mitosesLow 0.1943831 0.04823036 0.7187466
##再使用逐步回归
###向前法逐步回归
modle.forward<-stepAIC(modle,direction = "forward")
## Start: AIC=146.18
## status ~ size + epithelial_cell_size + thickness + shape + adhesion +
## nuclei + chromatin + nucleoli + mitoses
##得到的结果是全部变量,无改变。
###向后法逐步回归
modle.back<-stepAIC(modle,direction = "backward")
## Start: AIC=146.18
## status ~ size + epithelial_cell_size + thickness + shape + adhesion +
## nuclei + chromatin + nucleoli + mitoses
##
## Df Deviance AIC
## - epithelial_cell_size 1 126.65 144.65
## - adhesion 1 127.68 145.68
## <none> 126.18 146.18
## - shape 1 129.96 147.96
## - nucleoli 1 130.58 148.58
## - mitoses 1 132.26 150.26
## - size 1 132.39 150.39
## - thickness 1 134.48 152.48
## - chromatin 1 139.03 157.03
## - nuclei 1 145.15 163.15
##
## Step: AIC=144.65
## status ~ size + thickness + shape + adhesion + nuclei + chromatin +
## nucleoli + mitoses
##
## Df Deviance AIC
## - adhesion 1 128.42 144.42
## <none> 126.65 144.65
## - shape 1 131.04 147.04
## - nucleoli 1 131.27 147.27
## - mitoses 1 132.50 148.50
## - size 1 134.79 150.79
## - thickness 1 135.47 151.47
## - chromatin 1 139.76 155.76
## - nuclei 1 145.39 161.39
##
## Step: AIC=144.42
## status ~ size + thickness + shape + nuclei + chromatin + nucleoli +
## mitoses
##
## Df Deviance AIC
## <none> 128.42 144.42
## - shape 1 132.86 146.86
## - mitoses 1 133.33 147.33
## - nucleoli 1 134.40 148.40
## - thickness 1 137.91 151.91
## - size 1 138.59 152.59
## - chromatin 1 141.11 155.11
## - nuclei 1 149.62 163.62
#Step: AIC=144.42
#status ~ size + thickness + shape + nuclei + chromatin + nucleoli + mitoses
###向前向后法逐步回归
modle.both<-stepAIC(modle,direction = "both")
## Start: AIC=146.18
## status ~ size + epithelial_cell_size + thickness + shape + adhesion +
## nuclei + chromatin + nucleoli + mitoses
##
## Df Deviance AIC
## - epithelial_cell_size 1 126.65 144.65
## - adhesion 1 127.68 145.68
## <none> 126.18 146.18
## - shape 1 129.96 147.96
## - nucleoli 1 130.58 148.58
## - mitoses 1 132.26 150.26
## - size 1 132.39 150.39
## - thickness 1 134.48 152.48
## - chromatin 1 139.03 157.03
## - nuclei 1 145.15 163.15
##
## Step: AIC=144.65
## status ~ size + thickness + shape + adhesion + nuclei + chromatin +
## nucleoli + mitoses
##
## Df Deviance AIC
## - adhesion 1 128.42 144.42
## <none> 126.65 144.65
## + epithelial_cell_size 1 126.18 146.18
## - shape 1 131.04 147.04
## - nucleoli 1 131.27 147.27
## - mitoses 1 132.50 148.50
## - size 1 134.79 150.79
## - thickness 1 135.47 151.47
## - chromatin 1 139.76 155.76
## - nuclei 1 145.39 161.39
##
## Step: AIC=144.42
## status ~ size + thickness + shape + nuclei + chromatin + nucleoli +
## mitoses
##
## Df Deviance AIC
## <none> 128.42 144.42
## + adhesion 1 126.65 144.65
## + epithelial_cell_size 1 127.68 145.68
## - shape 1 132.86 146.86
## - mitoses 1 133.33 147.33
## - nucleoli 1 134.40 148.40
## - thickness 1 137.91 151.91
## - size 1 138.59 152.59
## - chromatin 1 141.11 155.11
## - nuclei 1 149.62 163.62
#Step: AIC=144.42
#status ~ size + thickness + shape + nuclei + chromatin + nucleoli + mitoses
#结果与向后法一致。
###方法5 最优子集
rm(list = ls())
load('./data.Rdata')
library(bestglm)
## 载入需要的程辑包:leaps
#需要把应变量放在最后一行
data$staus1<-data$status
data<-data[,-1]
bestAIC<-bestglm(data,family = binomial(),IC = "BIC")
## Morgan-Tatar search since family is non-gaussian.
attr(bestAIC$BestModel$terms,"term.labels")
## [1] "size" "thickness" "nuclei" "chromatin" "nucleoli"
##筛选出来的自变量包括"size" "thickness" "nuclei" "chromatin" "nucleoli"
glm(staus1~ size+thickness+nuclei+chromatin+nucleoli,data = data,family = binomial())
##
## Call: glm(formula = staus1 ~ size + thickness + nuclei + chromatin +
## nucleoli, family = binomial(), data = data)
##
## Coefficients:
## (Intercept) size thicknessLow nucleiLow chromatinLow
## 1.3892 0.7065 -1.8117 -2.4841 -1.7778
## nucleoliLow
## -1.5696
##
## Degrees of Freedom: 682 Total (i.e. Null); 677 Residual
## Null Deviance: 884.4
## Residual Deviance: 136.6 AIC: 148.6
##AIC: 148.6
###方法6 随机森林
rm(list = ls())
library(MASS)
data<-biopsy
data<-data[,-1]
data<-na.omit(data)
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## 载入程辑包:'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:dplyr':
##
## combine
set.seed(1234)
random<-randomForest(class~.,data = data)
random
##
## Call:
## randomForest(formula = class ~ ., data = data)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 2.49%
## Confusion matrix:
## benign malignant class.error
## benign 433 11 0.02477477
## malignant 6 233 0.02510460
##OOB estimate of error rate: 2.49%
##benign class.error 0.02477477
##malignant class.error 0.02510460
plot(random)
##红绿黑线分别表示:良性、恶性、整体
which.min(random$err.rate[,1])
## [1] 24
set.seed(1234)
random1<-randomForest(class~.,data = data,ntree=24)
random1
##
## Call:
## randomForest(formula = class ~ ., data = data, ntree = 24)
## Type of random forest: classification
## Number of trees: 24
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 2.49%
## Confusion matrix:
## benign malignant class.error
## benign 433 11 0.02477477
## malignant 6 233 0.02510460
##OOB estimate of error rate: 2.49%
##benign class.error 0.02477477
##malignant class.error 0.02510460
##变量重要性评分
varImpPlot(random1)
##自变量V2/3/6/7/5
#通过以上方法就可以得到筛选出来的变量进行模型构建了
rm(list = ls())
load("./data.Rdata")
library(MASS)
modle<-glm(status ~ size + thickness + shape + nuclei + chromatin + nucleoli + mitoses,
data = data,family = binomial())
summary(modle)
##
## Call:
## glm(formula = status ~ size + thickness + shape + nuclei + chromatin +
## nucleoli + mitoses, family = binomial(), data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.3655 -0.1072 -0.0858 0.0667 2.5741
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.8738 1.2259 3.160 0.00158 **
## size 0.4466 0.1633 2.735 0.00623 **
## thicknessLow -1.6101 0.5286 -3.046 0.00232 **
## shapeLow -1.3560 0.6372 -2.128 0.03333 *
## nucleiLow -2.3269 0.5031 -4.625 3.75e-06 ***
## chromatinLow -1.9142 0.5365 -3.568 0.00036 ***
## nucleoliLow -1.2903 0.5165 -2.498 0.01249 *
## mitosesLow -1.4256 0.6585 -2.165 0.03038 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 884.35 on 682 degrees of freedom
## Residual deviance: 128.42 on 675 degrees of freedom
## AIC: 144.42
##
## Number of Fisher Scoring iterations: 7
modle$coefficients
## (Intercept) size thicknessLow shapeLow nucleiLow chromatinLow
## 3.8737617 0.4465546 -1.6101132 -1.3560239 -2.3268944 -1.9142429
## nucleoliLow mitosesLow
## -1.2902597 -1.4256451
exp(cbind("OR"=coef(modle),confint(modle)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 48.12307098 4.70500122 580.5895696
## size 1.56291798 1.16995577 2.2243591
## thicknessLow 0.19986500 0.06823304 0.5555434
## shapeLow 0.25768332 0.07410375 0.9089832
## nucleiLow 0.09759838 0.03555500 0.2598215
## chromatinLow 0.14745343 0.05040969 0.4199752
## nucleoliLow 0.27519932 0.09991303 0.7694625
## mitosesLow 0.24035335 0.06326596 0.8505815
##二元logistic回归通过列线图展示结果
library(rms)
dd<-datadist(data)
options(datadist="dd")
fit<-lrm(status ~ size + thickness + shape + nuclei + chromatin + nucleoli + mitoses,
data = data,x = T,y = T)
nom<-nomogram(fit, fun=plogis,
fun.at=c(0.0001,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.9999),
lp=T,
funlabel="Risk of hypertension")
plot(nom)
##模型评价 ###方法1 BRIER score评分 BRIER score越小校准效果越好
rm(list = ls())
load("./data.Rdata")
library(rms)
dd<-datadist(data)
options(datadist="dd")
str(data)
## 'data.frame': 683 obs. of 10 variables:
## $ status : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
## $ size : num 1 4 1 8 1 10 1 1 1 2 ...
## $ epithelial_cell_size: num 2 7 2 3 2 7 2 2 2 2 ...
## $ thickness : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
## $ shape : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
## $ adhesion : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
## $ nuclei : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
## $ chromatin : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
## $ nucleoli : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
## $ mitoses : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
## - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
## ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...
data$status<-as.numeric(as.character(data$status))
str(data)
## 'data.frame': 683 obs. of 10 variables:
## $ status : num 0 0 0 0 0 1 0 0 0 0 ...
## $ size : num 1 4 1 8 1 10 1 1 1 2 ...
## $ epithelial_cell_size: num 2 7 2 3 2 7 2 2 2 2 ...
## $ thickness : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
## $ shape : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
## $ adhesion : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
## $ nuclei : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
## $ chromatin : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
## $ nucleoli : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
## $ mitoses : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
## - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
## ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...
#使用向后法结果展示
f <- lrm(status ~ size + thickness + shape + nuclei + chromatin + nucleoli + mitoses,
data = data,x = T,y = T)
pred.logit <- predict(f)
phat <- 1/(1+exp(-pred.logit))
val.prob(phat, data$status, cex=0.8)
## Dxy C (ROC) R2 D D:Chi-sq
## 9.861095e-01 9.930548e-01 9.219452e-01 1.105311e+00 7.559274e+02
## D:p U U:Chi-sq U:p Q
## NA -2.928258e-03 -5.684342e-13 1.000000e+00 1.108239e+00
## Brier Intercept Slope Emax E90
## 2.373327e-02 -2.880692e-09 1.000000e+00 1.866464e-01 2.045948e-02
## Eavg S:z S:p
## 1.867196e-02 -1.822993e-01 8.553479e-01
###BRIER score=1.024
#####方法2 AIC AIC越小校准效果越好
rm(list = ls())
load("./data.Rdata")
library(rms)
dd<-datadist(data)
options(datadist="dd")
str(data)
## 'data.frame': 683 obs. of 10 variables:
## $ status : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
## $ size : num 1 4 1 8 1 10 1 1 1 2 ...
## $ epithelial_cell_size: num 2 7 2 3 2 7 2 2 2 2 ...
## $ thickness : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
## $ shape : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
## $ adhesion : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
## $ nuclei : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
## $ chromatin : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
## $ nucleoli : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
## $ mitoses : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
## - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
## ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...
modle<-glm(status ~ size + thickness + shape + nuclei + chromatin + nucleoli + mitoses,
data = data,family = binomial())
AIC(modle)
## [1] 144.4228
#####方法3 BIC BIC越小校准效果越好,同AIC
rm(list = ls())
load("./data.Rdata")
library(rms)
dd<-datadist(data)
options(datadist="dd")
str(data)
## 'data.frame': 683 obs. of 10 variables:
## $ status : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
## $ size : num 1 4 1 8 1 10 1 1 1 2 ...
## $ epithelial_cell_size: num 2 7 2 3 2 7 2 2 2 2 ...
## $ thickness : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
## $ shape : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
## $ adhesion : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
## $ nuclei : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
## $ chromatin : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
## $ nucleoli : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
## $ mitoses : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
## - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
## ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...
modle<-glm(status ~ size + thickness + shape + nuclei + chromatin + nucleoli + mitoses,
data = data,family = binomial())
BIC(modle)
## [1] 180.6348
##>0.91非常准确
rm(list = ls())
load("./data.Rdata")
library(rms)
dd<-datadist(data)
options(datadist="dd")
str(data)
## 'data.frame': 683 obs. of 10 variables:
## $ status : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
## $ size : num 1 4 1 8 1 10 1 1 1 2 ...
## $ epithelial_cell_size: num 2 7 2 3 2 7 2 2 2 2 ...
## $ thickness : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
## $ shape : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
## $ adhesion : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
## $ nuclei : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
## $ chromatin : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
## $ nucleoli : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
## $ mitoses : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
## - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
## ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...
##使用向后法结果展示
modle<-glm(status ~ size + thickness + shape + nuclei + chromatin + nucleoli + mitoses,
data = data,family = binomial())
##AUC计算
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## 载入程辑包:'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
data$pred<-predict(modle,type = "response")
ROC<-roc(data$status,data$pred)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc(ROC)
## Area under the curve: 0.993
ci.auc(ROC)
## 95% CI: 0.9882-0.9979 (DeLong)
##绘制ROC
plot(ROC)#最简单版
plot(ROC, print.auc=TRUE, auc.polygon=TRUE,
grid=c(0.1, 0.2),
#grid.col=c("green", "red"),
max.auc.polygon=TRUE,
auc.polygon.col="skyblue",
print.thres=TRUE)
plot(ROC, print.auc=TRUE, auc.polygon=TRUE,
partial.auc=c(1, 0.8),partial.auc.focus="sp",
grid=c(0.1, 0.2),grid.col=c("green", "red"),
max.auc.polygon=TRUE,
auc.polygon.col="skyblue",
print.thres=TRUE, reuse.auc=FALSE)
##同一AUC曲线下同时展现多条曲线
modle2<-glm(status ~ size + thickness,
data = data,family = binomial())
data$pred2<-predict(modle2,type = "response")
ROC2<-roc(data$status,data$pred2)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc(ROC2)
## Area under the curve: 0.9828
ci.auc(ROC2)
## 95% CI: 0.974-0.9917 (DeLong)
plot(ROC, col="red")
plot(ROC2, col="blue", add=TRUE)
legend("bottomright",legend = c(paste("AUC of modle1 =",round(auc(ROC),3)),
paste("AUC of modle2 =",round(auc(ROC2),3))),
col=c("red","blue"),
lty= 1 ,lwd= 2)
##cindex 计算,二元logistics回归分析中,cindex与AUC值一样
fit<-lrm(status ~ size + thickness,
data = data,x = T,y = T)
####计算方法1
data$predict<-predict(fit)
library(pROC)
modleroc<-roc(data$status,data$predict)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc(modleroc)
## Area under the curve: 0.9828
ci.auc(modleroc)
## 95% CI: 0.974-0.9917 (DeLong)
####计算方法2
library(Hmisc)
str(data)
## 'data.frame': 683 obs. of 13 variables:
## $ status : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
## $ size : num 1 4 1 8 1 10 1 1 1 2 ...
## $ epithelial_cell_size: num 2 7 2 3 2 7 2 2 2 2 ...
## $ thickness : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
## $ shape : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
## $ adhesion : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
## $ nuclei : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
## $ chromatin : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
## $ nucleoli : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
## $ mitoses : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
## $ pred : num 0.01811 0.73688 0.00367 0.9838 0.00367 ...
## $ pred2 : num 0.0755 0.8536 0.0112 0.9994 0.0112 ...
## $ predict : num -2.51 1.76 -4.48 7.45 -4.48 ...
## - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
## ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...
data$status<-as.numeric(as.character(data$status))
str(data)
## 'data.frame': 683 obs. of 13 variables:
## $ status : num 0 0 0 0 0 1 0 0 0 0 ...
## $ size : num 1 4 1 8 1 10 1 1 1 2 ...
## $ epithelial_cell_size: num 2 7 2 3 2 7 2 2 2 2 ...
## $ thickness : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
## $ shape : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
## $ adhesion : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
## $ nuclei : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
## $ chromatin : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
## $ nucleoli : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
## $ mitoses : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
## $ pred : num 0.01811 0.73688 0.00367 0.9838 0.00367 ...
## $ pred2 : num 0.0755 0.8536 0.0112 0.9994 0.0112 ...
## $ predict : num -2.51 1.76 -4.48 7.45 -4.48 ...
## - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
## ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...
somers2(data$predict,data$status)
## C Dxy n Missing
## 0.9828348 0.9656696 683.0000000 0.0000000
####计算方法3
fit
## Logistic Regression Model
##
## lrm(formula = status ~ size + thickness, data = data, x = T,
## y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 683 LR chi2 659.13 R2 0.853 C 0.983
## 0 444 d.f. 2 g 5.153 Dxy 0.965
## 1 239 Pr(> chi2) <0.0001 gr 173.033 gamma 0.976
## max |deriv| 7e-09 gp 0.434 tau-a 0.440
## Brier 0.043
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -3.9280 0.4160 -9.44 <0.0001
## size 1.4228 0.1343 10.60 <0.0001
## thickness=Low -1.9774 0.3804 -5.20 <0.0001
##
###方法5 拟合优度检验
##依据之前方法得到的自变量
rm(list = ls())
load("./data.Rdata")
##T检验/LASSO回归
formula1<-as.formula(status ~ size + epithelial_cell_size+thickness + shape +adhesion+ nuclei + chromatin +
nucleoli + mitoses)
##单因素logistic回归、逐步回归
formula2<-as.formula(status ~ size + thickness + shape + nuclei + chromatin +
nucleoli + mitoses)
##最优子集
formula3<-as.formula(status~ size+thickness+nuclei+chromatin+nucleoli)
library(rms)
dd<-datadist(data)
options(datadist = "dd")
library(ResourceSelection)
## ResourceSelection 0.3-5 2019-07-22
fit1<-glm(formula1,data = data,family = binomial())
h1<-hoslem.test(fit1$y,fitted(fit1),g=10)
h1
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: fit1$y, fitted(fit1)
## X-squared = 17.03, df = 8, p-value = 0.0298
fit2<-glm(formula2,data = data,family = binomial())
h2<-hoslem.test(fit2$y,fitted(fit2),g=10)
h2
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: fit2$y, fitted(fit2)
## X-squared = 12.897, df = 8, p-value = 0.1154
##X-squared = 12.897, df = 8, p-value = 0.1154 说明模型拟合优度好
fit3<-glm(formula3,data = data,family = binomial())
h3<-hoslem.test(fit3$y,fitted(fit3),g=10)
h3
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: fit3$y, fitted(fit3)
## X-squared = 27.02, df = 8, p-value = 0.0007015
######方法6 DCA曲线(临床决策曲线)
##依据之前方法得到的自变量
rm(list = ls())
load("./data.Rdata")
##T检验/LASSO回归
formula1<-as.formula(status ~ size + epithelial_cell_size+thickness + shape +adhesion+ nuclei + chromatin +
nucleoli + mitoses)
##单因素logistic回归、逐步回归
formula2<-as.formula(status ~ size + thickness + shape + nuclei + chromatin +
nucleoli + mitoses)
##最优子集
formula3<-as.formula(status~ size+thickness+nuclei+chromatin+nucleoli)
library(rms)
dd<-datadist(data)
options(datadist = "dd")
str(data)
## 'data.frame': 683 obs. of 10 variables:
## $ status : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
## $ size : num 1 4 1 8 1 10 1 1 1 2 ...
## $ epithelial_cell_size: num 2 7 2 3 2 7 2 2 2 2 ...
## $ thickness : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
## $ shape : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
## $ adhesion : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
## $ nuclei : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
## $ chromatin : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
## $ nucleoli : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
## $ mitoses : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
## - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
## ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...
data$status<-as.numeric(as.character(data$status))
str(data)
## 'data.frame': 683 obs. of 10 variables:
## $ status : num 0 0 0 0 0 1 0 0 0 0 ...
## $ size : num 1 4 1 8 1 10 1 1 1 2 ...
## $ epithelial_cell_size: num 2 7 2 3 2 7 2 2 2 2 ...
## $ thickness : Factor w/ 2 levels "High","Low": 1 1 2 1 2 1 2 2 2 2 ...
## $ shape : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 2 2 2 2 ...
## $ adhesion : Factor w/ 2 levels "High","Low": 2 1 2 2 1 1 2 2 2 2 ...
## $ nuclei : Factor w/ 2 levels "High","Low": 2 1 2 1 2 1 1 2 2 2 ...
## $ chromatin : Factor w/ 2 levels "High","Low": 2 2 2 2 2 1 2 2 2 2 ...
## $ nucleoli : Factor w/ 2 levels "High","Low": 2 2 2 1 2 1 2 2 2 2 ...
## $ mitoses : Factor w/ 2 levels "High","Low": 2 2 2 2 2 2 2 2 1 2 ...
## - attr(*, "na.action")= 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
## ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...
library(rmda)
curve1<-decision_curve(formula1,data = data,family = binomial(link = "logit"),
thresholds = seq(0, 1, by = 0.01),
confidence.intervals = 0.95,study.design = "case-control",
population.prevalence = 0.3)
## Calculating net benefit curves for case-control data. All calculations are done conditional on the outcome prevalence provided.
## Note: The data provided is used to both fit a prediction model and to estimate the respective decision curve. This may cause bias in decision curve estimates leading to over-confidence in model performance.
curve2<-decision_curve(formula2,data = data,family = binomial(link = "logit"),
thresholds = seq(0, 1, by = 0.01),
confidence.intervals = 0.95,study.design = "case-control",
population.prevalence = 0.3)
## Calculating net benefit curves for case-control data. All calculations are done conditional on the outcome prevalence provided.
## Note: The data provided is used to both fit a prediction model and to estimate the respective decision curve. This may cause bias in decision curve estimates leading to over-confidence in model performance.
curve3<-decision_curve(formula3,data = data,family = binomial(link = "logit"),
thresholds = seq(0, 1, by = 0.01),
confidence.intervals = 0.95,study.design = "case-control",
population.prevalence = 0.3)
## Calculating net benefit curves for case-control data. All calculations are done conditional on the outcome prevalence provided.
## Note: The data provided is used to both fit a prediction model and to estimate the respective decision curve. This may cause bias in decision curve estimates leading to over-confidence in model performance.
summary(curve1,measure = "NB")
##
## Net Benefit (95% Confidence Intervals):
## ----------------------------------------------------------------------------------------------------------
## risk cost:benefit percent All status ~ size + None
## threshold ratio high risk epithelial_cell_size +
## thickness + shape + adhesion +
## nuclei + chromatin + nucleoli
## + mitoses
## ----------- -------------- ------------------ -------------------- -------------------------------- ------
## 0 0:1 100 0.3 0.3 0
## (100, 100) (0.3, 0.3) (0.3, 0.3)
##
## 0.01 1:99 51.599 0.293 0.298 0
## (36.149, 55.383) (0.293, 0.293) (0.297, 0.299)
##
## 0.02 1:49 40.405 0.286 0.298 0
## (34.572, 43.401) (0.286, 0.286) (0.297, 0.299)
##
## 0.03 3:97 35.518 0.278 0.298 0
## (33.626, 39.775) (0.278, 0.278) (0.296, 0.299)
##
## 0.04 1:24 34.887 0.271 0.298 0
## (32.995, 38.041) (0.271, 0.271) (0.295, 0.299)
##
## 0.05 1:19 34.447 0.263 0.296 0
## (32.68, 36.622) (0.263, 0.263) (0.294, 0.299)
##
## 0.06 3:47 33.974 0.255 0.296 0
## (32.365, 36.023) (0.255, 0.255) (0.294, 0.298)
##
## 0.07 7:93 33.816 0.247 0.296 0
## (32.082, 35.392) (0.247, 0.247) (0.293, 0.298)
##
## 0.08 2:23 33.816 0.239 0.295 0
## (31.892, 35.077) (0.239, 0.239) (0.293, 0.298)
##
## 0.09 9:91 33.343 0.231 0.295 0
## (31.766, 34.762) (0.231, 0.231) (0.292, 0.298)
##
## 0.1 1:9 33.028 0.222 0.295 0
## (31.609, 34.572) (0.222, 0.222) (0.292, 0.298)
##
## 0.11 11:89 32.87 0.213 0.295 0
## (31.451, 34.353) (0.213, 0.213) (0.291, 0.298)
##
## 0.12 3:22 32.87 0.205 0.295 0
## (31.451, 34.131) (0.205, 0.205) (0.29, 0.298)
##
## 0.13 13:87 32.87 0.195 0.294 0
## (31.419, 33.912) (0.195, 0.195) (0.29, 0.297)
##
## 0.14 7:43 32.87 0.186 0.294 0
## (31.326, 33.755) (0.186, 0.186) (0.289, 0.297)
##
## 0.15 3:17 32.555 0.176 0.294 0
## (31.293, 33.69) (0.176, 0.176) (0.289, 0.297)
##
## 0.16 4:21 32.429 0.167 0.292 0
## (31.2, 33.565) (0.167, 0.167) (0.288, 0.297)
##
## 0.17 17:83 32.271 0.157 0.292 0
## (31.168, 33.375) (0.157, 0.157) (0.287, 0.296)
##
## 0.18 9:41 32.271 0.146 0.292 0
## (31.136, 33.217) (0.146, 0.146) (0.287, 0.296)
##
## 0.19 19:81 32.146 0.136 0.29 0
## (31.042, 33.217) (0.136, 0.136) (0.286, 0.296)
##
## 0.2 1:4 32.146 0.125 0.29 0
## (30.978, 33.124) (0.125, 0.125) (0.285, 0.296)
##
## 0.21 21:79 32.146 0.114 0.29 0
## (30.885, 33.092) (0.114, 0.114) (0.284, 0.296)
##
## 0.22 11:39 31.988 0.103 0.29 0
## (30.856, 33.06) (0.103, 0.103) (0.284, 0.296)
##
## 0.23 23:77 31.988 0.091 0.289 0
## (30.853, 32.999) (0.091, 0.091) (0.283, 0.295)
##
## 0.24 6:19 31.988 0.079 0.289 0
## (30.82, 32.838) (0.079, 0.079) (0.282, 0.295)
##
## 0.25 1:3 31.831 0.067 0.289 0
## (30.759, 32.777) (0.067, 0.067) (0.281, 0.295)
##
## 0.26 13:37 31.831 0.054 0.288 0
## (30.759, 32.715) (0.054, 0.054) (0.28, 0.295)
##
## 0.27 27:73 31.831 0.041 0.288 0
## (30.634, 32.619) (0.041, 0.041) (0.28, 0.295)
##
## 0.28 7:18 31.39 0.028 0.288 0
## (30.602, 32.493) (0.028, 0.028) (0.279, 0.294)
##
## 0.29 29:71 31.264 0.014 0.286 0
## (30.602, 32.461) (0.014, 0.014) (0.278, 0.294)
##
## 0.3 3:7 31.264 0 0.286 0
## (30.54, 32.461) (0, 0) (0.278, 0.293)
##
## 0.31 31:69 31.139 -0.014 0.284 0
## (30.511, 32.336) (-0.014, -0.014) (0.277, 0.294)
##
## 0.32 8:17 31.139 -0.029 0.284 0
## (30.415, 32.274) (-0.029, -0.029) (0.276, 0.293)
##
## 0.33 33:67 31.139 -0.045 0.283 0
## (30.412, 32.178) (-0.045, -0.045) (0.275, 0.293)
##
## 0.34 17:33 31.139 -0.061 0.283 0
## (30.383, 32.149) (-0.061, -0.061) (0.274, 0.293)
##
## 0.35 7:13 31.139 -0.077 0.282 0
## (30.38, 32.114) (-0.077, -0.077) (0.273, 0.292)
##
## 0.36 9:16 31.139 -0.094 0.282 0
## (30.289, 31.959) (-0.094, -0.094) (0.272, 0.292)
##
## 0.37 37:63 31.139 -0.111 0.281 0
## (30.286, 31.895) (-0.111, -0.111) (0.271, 0.292)
##
## 0.38 19:31 30.981 -0.129 0.282 0
## (30.254, 31.895) (-0.129, -0.129) (0.27, 0.291)
##
## 0.39 39:61 30.981 -0.148 0.281 0
## (30.164, 31.799) (-0.148, -0.148) (0.269, 0.29)
##
## 0.4 2:3 30.981 -0.167 0.281 0
## (30.099, 31.708) (-0.167, -0.167) (0.269, 0.29)
##
## 0.41 41:59 30.856 -0.186 0.279 0
## (30.099, 31.676) (-0.186, -0.186) (0.268, 0.29)
##
## 0.42 21:29 30.856 -0.207 0.279 0
## (30.067, 31.612) (-0.207, -0.207) (0.267, 0.29)
##
## 0.43 43:57 30.698 -0.228 0.279 0
## (30.067, 31.521) (-0.228, -0.228) (0.265, 0.29)
##
## 0.44 11:14 30.698 -0.25 0.279 0
## (29.881, 31.483) (-0.25, -0.25) (0.263, 0.289)
##
## 0.45 9:11 30.698 -0.273 0.278 0
## (29.877, 31.393) (-0.273, -0.273) (0.262, 0.289)
##
## 0.46 23:27 30.572 -0.296 0.277 0
## (29.813, 31.361) (-0.296, -0.296) (0.26, 0.288)
##
## 0.47 47:53 30.572 -0.321 0.276 0
## (29.726, 31.329) (-0.321, -0.321) (0.259, 0.288)
##
## 0.48 12:13 30.572 -0.346 0.275 0
## (29.723, 31.264) (-0.346, -0.346) (0.258, 0.288)
##
## 0.49 49:51 30.572 -0.373 0.275 0
## (29.629, 31.203) (-0.373, -0.373) (0.255, 0.287)
##
## 0.5 1:1 30.321 -0.4 0.272 0
## (29.597, 31.171) (-0.4, -0.4) (0.254, 0.287)
##
## 0.51 51:49 30.196 -0.429 0.27 0
## (29.504, 31.139) (-0.429, -0.429) (0.252, 0.286)
##
## 0.52 13:12 30.07 -0.458 0.268 0
## (29.443, 31.045) (-0.458, -0.458) (0.251, 0.286)
##
## 0.53 53:47 29.945 -0.489 0.266 0
## (29.408, 30.981) (-0.489, -0.489) (0.249, 0.286)
##
## 0.54 27:23 29.819 -0.522 0.264 0
## (29.288, 30.888) (-0.522, -0.522) (0.247, 0.286)
##
## 0.55 11:9 29.819 -0.556 0.263 0
## (29.253, 30.826) (-0.556, -0.556) (0.247, 0.285)
##
## 0.56 14:11 29.819 -0.591 0.262 0
## (29.192, 30.73) (-0.591, -0.591) (0.245, 0.284)
##
## 0.57 57:43 29.694 -0.628 0.26 0
## (29.098, 30.64) (-0.628, -0.628) (0.243, 0.284)
##
## 0.58 29:21 29.443 -0.667 0.257 0
## (29.034, 30.605) (-0.667, -0.667) (0.24, 0.283)
##
## 0.59 59:41 29.317 -0.707 0.255 0
## (29.005, 30.511) (-0.707, -0.707) (0.24, 0.282)
##
## 0.6 3:2 29.317 -0.75 0.254 0
## (28.908, 30.482) (-0.75, -0.75) (0.238, 0.282)
##
## 0.61 61:39 29.317 -0.795 0.253 0
## (28.786, 30.418) (-0.795, -0.795) (0.235, 0.281)
##
## 0.62 31:19 29.192 -0.842 0.25 0
## (28.754, 30.353) (-0.842, -0.842) (0.234, 0.28)
##
## 0.63 63:37 29.066 -0.892 0.248 0
## (28.657, 30.228) (-0.892, -0.892) (0.231, 0.279)
##
## 0.64 16:9 28.941 -0.944 0.246 0
## (28.532, 30.135) (-0.944, -0.944) (0.229, 0.279)
##
## 0.65 13:7 28.941 -1 0.244 0
## (28.374, 30.064) (-1, -1) (0.228, 0.278)
##
## 0.66 33:17 28.783 -1.059 0.246 0
## (28.22, 29.948) (-1.059, -1.059) (0.225, 0.278)
##
## 0.67 67:33 28.657 -1.121 0.244 0
## (28.152, 29.881) (-1.121, -1.121) (0.221, 0.277)
##
## 0.68 17:8 28.657 -1.188 0.242 0
## (27.969, 29.787) (-1.188, -1.188) (0.219, 0.277)
##
## 0.69 69:31 28.657 -1.258 0.241 0
## (27.875, 29.723) (-1.258, -1.258) (0.215, 0.275)
##
## 0.7 7:3 28.657 -1.333 0.239 0
## (27.717, 29.629) (-1.333, -1.333) (0.213, 0.274)
##
## 0.71 71:29 28.532 -1.414 0.236 0
## (27.528, 29.594) (-1.414, -1.414) (0.21, 0.273)
##
## 0.72 18:7 28.532 -1.5 0.235 0
## (27.463, 29.504) (-1.5, -1.5) (0.208, 0.274)
##
## 0.73 73:27 28.406 -1.593 0.232 0
## (27.338, 29.472) (-1.593, -1.593) (0.203, 0.273)
##
## 0.74 37:13 28.406 -1.692 0.229 0
## (27.122, 29.443) (-1.692, -1.692) (0.2, 0.271)
##
## 0.75 3:1 28.281 -1.8 0.226 0
## (26.993, 29.346) (-1.8, -1.8) (0.197, 0.27)
##
## 0.76 19:6 28.281 -1.917 0.224 0
## (26.932, 29.282) (-1.917, -1.917) (0.192, 0.27)
##
## 0.77 77:23 28.281 -2.043 0.221 0
## (26.713, 29.218) (-2.043, -2.043) (0.191, 0.269)
##
## 0.78 39:11 28.281 -2.182 0.218 0
## (26.617, 29.031) (-2.182, -2.182) (0.188, 0.267)
##
## 0.79 79:21 27.904 -2.333 0.211 0
## (26.427, 28.938) (-2.333, -2.333) (0.184, 0.266)
##
## 0.8 4:1 27.621 -2.5 0.213 0
## (26.147, 28.812) (-2.5, -2.5) (0.179, 0.265)
##
## 0.81 81:19 27.37 -2.684 0.207 0
## (25.957, 28.748) (-2.684, -2.684) (0.174, 0.264)
##
## 0.82 41:9 27.119 -2.889 0.201 0
## (25.738, 28.654) (-2.889, -2.889) (0.166, 0.262)
##
## 0.83 83:17 27.119 -3.118 0.197 0
## (25.642, 28.432) (-3.118, -3.118) (0.163, 0.262)
##
## 0.84 21:4 26.993 -3.375 0.191 0
## (25.265, 28.31) (-3.375, -3.375) (0.157, 0.261)
##
## 0.85 17:3 26.617 -3.667 0.182 0
## (25.075, 28.307) (-3.667, -3.667) (0.149, 0.258)
##
## 0.86 43:7 26.459 -4 0.186 0
## (24.763, 28.12) (-4, -4) (0.143, 0.254)
##
## 0.87 87:13 26.083 -4.385 0.176 0
## (24.451, 28.056) (-4.385, -4.385) (0.133, 0.251)
##
## 0.88 22:3 26.083 -4.833 0.169 0
## (24.042, 27.963) (-4.833, -4.833) (0.124, 0.248)
##
## 0.89 89:11 25.832 -5.364 0.158 0
## (23.698, 27.711) (-5.364, -5.364) (0.114, 0.247)
##
## 0.9 9:1 25.423 -6 0.16 0
## (23.196, 27.46) (-6, -6) (0.099, 0.245)
##
## 0.91 91:9 24.921 -6.778 0.144 0
## (22.912, 27.274) (-6.778, -6.778) (0.08, 0.239)
##
## 0.92 23:2 24.795 -7.75 0.13 0
## (22.285, 27.052) (-7.75, -7.75) (0.066, 0.235)
##
## 0.93 93:7 24.419 -9 0.109 0
## (21.905, 26.958) (-9, -9) (0.05, 0.229)
##
## 0.94 47:3 24.01 -10.667 0.109 0
## (20.84, 26.675) (-10.667, -10.667) (0.033, 0.221)
##
## 0.95 19:1 23.193 -13 0.137 0
## (19.836, 26.173) (-13, -13) (0.002, 0.219)
##
## 0.96 24:1 22.314 -16.5 0.105 0
## (18.738, 25.671) (-16.5, -16.5) (-0.031, 0.214)
##
## 0.97 97:3 21.31 -22.333 0.055 0
## (16.92, 25.169) (-22.333, -22.333) (-0.08, 0.204)
##
## 0.98 49:1 19.395 -34 0.036 0
## (14.345, 24.509) (-34, -34) (-0.202, 0.211)
##
## 0.99 99:1 15.253 -69 -0.163 0
## (10.418, 23.128) (-69, -69) (-0.34, 0.213)
##
## 1 Inf:1 0 NA NA NA
## (0, 0) (NA, NA) (NA, NA)
## ----------------------------------------------------------------------------------------------------------
summary(curve1,measure = "sNB")
##
## Standardized Net Benefit (95% Confidence Intervals):
## ------------------------------------------------------------------------------------------------------------
## risk cost:benefit percent All status ~ size + None
## threshold ratio high risk epithelial_cell_size +
## thickness + shape + adhesion +
## nuclei + chromatin + nucleoli
## + mitoses
## ----------- -------------- ------------------ ---------------------- -------------------------------- ------
## 0 0:1 100 1 1 0
## (100, 100) (1, 1) (1, 1)
##
## 0.01 1:99 51.599 0.976 0.993 0
## (36.149, 55.383) (0.976, 0.976) (0.991, 0.998)
##
## 0.02 1:49 40.405 0.952 0.993 0
## (34.572, 43.401) (0.952, 0.952) (0.989, 0.997)
##
## 0.03 3:97 35.518 0.928 0.994 0
## (33.626, 39.775) (0.928, 0.928) (0.985, 0.996)
##
## 0.04 1:24 34.887 0.903 0.993 0
## (32.995, 38.041) (0.903, 0.903) (0.983, 0.996)
##
## 0.05 1:19 34.447 0.877 0.988 0
## (32.68, 36.622) (0.877, 0.877) (0.981, 0.995)
##
## 0.06 3:47 33.974 0.851 0.987 0
## (32.365, 36.023) (0.851, 0.851) (0.979, 0.995)
##
## 0.07 7:93 33.816 0.824 0.986 0
## (32.082, 35.392) (0.824, 0.824) (0.978, 0.994)
##
## 0.08 2:23 33.816 0.797 0.984 0
## (31.892, 35.077) (0.797, 0.797) (0.976, 0.994)
##
## 0.09 9:91 33.343 0.769 0.984 0
## (31.766, 34.762) (0.769, 0.769) (0.973, 0.993)
##
## 0.1 1:9 33.028 0.741 0.984 0
## (31.609, 34.572) (0.741, 0.741) (0.972, 0.993)
##
## 0.11 11:89 32.87 0.712 0.983 0
## (31.451, 34.353) (0.712, 0.712) (0.97, 0.992)
##
## 0.12 3:22 32.87 0.682 0.982 0
## (31.451, 34.131) (0.682, 0.682) (0.967, 0.992)
##
## 0.13 13:87 32.87 0.651 0.981 0
## (31.419, 33.912) (0.651, 0.651) (0.965, 0.991)
##
## 0.14 7:43 32.87 0.62 0.98 0
## (31.326, 33.755) (0.62, 0.62) (0.964, 0.991)
##
## 0.15 3:17 32.555 0.588 0.98 0
## (31.293, 33.69) (0.588, 0.588) (0.962, 0.99)
##
## 0.16 4:21 32.429 0.556 0.975 0
## (31.2, 33.565) (0.556, 0.556) (0.96, 0.989)
##
## 0.17 17:83 32.271 0.522 0.974 0
## (31.168, 33.375) (0.522, 0.522) (0.958, 0.988)
##
## 0.18 9:41 32.271 0.488 0.973 0
## (31.136, 33.217) (0.488, 0.488) (0.956, 0.987)
##
## 0.19 19:81 32.146 0.453 0.968 0
## (31.042, 33.217) (0.453, 0.453) (0.954, 0.987)
##
## 0.2 1:4 32.146 0.417 0.966 0
## (30.978, 33.124) (0.417, 0.417) (0.95, 0.987)
##
## 0.21 21:79 32.146 0.38 0.965 0
## (30.885, 33.092) (0.38, 0.38) (0.947, 0.986)
##
## 0.22 11:39 31.988 0.342 0.965 0
## (30.856, 33.06) (0.342, 0.342) (0.945, 0.985)
##
## 0.23 23:77 31.988 0.303 0.964 0
## (30.853, 32.999) (0.303, 0.303) (0.942, 0.983)
##
## 0.24 6:19 31.988 0.263 0.963 0
## (30.82, 32.838) (0.263, 0.263) (0.941, 0.984)
##
## 0.25 1:3 31.831 0.222 0.963 0
## (30.759, 32.777) (0.222, 0.222) (0.938, 0.984)
##
## 0.26 13:37 31.831 0.18 0.962 0
## (30.759, 32.715) (0.18, 0.18) (0.934, 0.983)
##
## 0.27 27:73 31.831 0.137 0.96 0
## (30.634, 32.619) (0.137, 0.137) (0.934, 0.982)
##
## 0.28 7:18 31.39 0.093 0.959 0
## (30.602, 32.493) (0.093, 0.093) (0.93, 0.981)
##
## 0.29 29:71 31.264 0.047 0.953 0
## (30.602, 32.461) (0.047, 0.047) (0.928, 0.979)
##
## 0.3 3:7 31.264 0 0.952 0
## (30.54, 32.461) (0, 0) (0.926, 0.978)
##
## 0.31 31:69 31.139 -0.048 0.947 0
## (30.511, 32.336) (-0.048, -0.048) (0.923, 0.979)
##
## 0.32 8:17 31.139 -0.098 0.945 0
## (30.415, 32.274) (-0.098, -0.098) (0.92, 0.977)
##
## 0.33 33:67 31.139 -0.149 0.944 0
## (30.412, 32.178) (-0.149, -0.149) (0.917, 0.976)
##
## 0.34 17:33 31.139 -0.202 0.942 0
## (30.383, 32.149) (-0.202, -0.202) (0.912, 0.975)
##
## 0.35 7:13 31.139 -0.256 0.941 0
## (30.38, 32.114) (-0.256, -0.256) (0.91, 0.973)
##
## 0.36 9:16 31.139 -0.312 0.939 0
## (30.289, 31.959) (-0.312, -0.312) (0.907, 0.973)
##
## 0.37 37:63 31.139 -0.37 0.938 0
## (30.286, 31.895) (-0.37, -0.37) (0.904, 0.972)
##
## 0.38 19:31 30.981 -0.43 0.939 0
## (30.254, 31.895) (-0.43, -0.43) (0.9, 0.969)
##
## 0.39 39:61 30.981 -0.492 0.938 0
## (30.164, 31.799) (-0.492, -0.492) (0.898, 0.968)
##
## 0.4 2:3 30.981 -0.556 0.936 0
## (30.099, 31.708) (-0.556, -0.556) (0.896, 0.967)
##
## 0.41 41:59 30.856 -0.621 0.931 0
## (30.099, 31.676) (-0.621, -0.621) (0.892, 0.966)
##
## 0.42 21:29 30.856 -0.69 0.929 0
## (30.067, 31.612) (-0.69, -0.69) (0.889, 0.965)
##
## 0.43 43:57 30.698 -0.76 0.931 0
## (30.067, 31.521) (-0.76, -0.76) (0.882, 0.967)
##
## 0.44 11:14 30.698 -0.833 0.929 0
## (29.881, 31.483) (-0.833, -0.833) (0.875, 0.963)
##
## 0.45 9:11 30.698 -0.909 0.928 0
## (29.877, 31.393) (-0.909, -0.909) (0.872, 0.962)
##
## 0.46 23:27 30.572 -0.988 0.922 0
## (29.813, 31.361) (-0.988, -0.988) (0.866, 0.961)
##
## 0.47 47:53 30.572 -1.069 0.92 0
## (29.726, 31.329) (-1.069, -1.069) (0.862, 0.96)
##
## 0.48 12:13 30.572 -1.154 0.918 0
## (29.723, 31.264) (-1.154, -1.154) (0.859, 0.959)
##
## 0.49 49:51 30.572 -1.242 0.916 0
## (29.629, 31.203) (-1.242, -1.242) (0.851, 0.957)
##
## 0.5 1:1 30.321 -1.333 0.906 0
## (29.597, 31.171) (-1.333, -1.333) (0.847, 0.956)
##
## 0.51 51:49 30.196 -1.429 0.899 0
## (29.504, 31.139) (-1.429, -1.429) (0.841, 0.955)
##
## 0.52 13:12 30.07 -1.528 0.893 0
## (29.443, 31.045) (-1.528, -1.528) (0.837, 0.953)
##
## 0.53 53:47 29.945 -1.631 0.886 0
## (29.408, 30.981) (-1.631, -1.631) (0.831, 0.952)
##
## 0.54 27:23 29.819 -1.739 0.88 0
## (29.288, 30.888) (-1.739, -1.739) (0.824, 0.952)
##
## 0.55 11:9 29.819 -1.852 0.877 0
## (29.253, 30.826) (-1.852, -1.852) (0.824, 0.951)
##
## 0.56 14:11 29.819 -1.97 0.875 0
## (29.192, 30.73) (-1.97, -1.97) (0.815, 0.948)
##
## 0.57 57:43 29.694 -2.093 0.868 0
## (29.098, 30.64) (-2.093, -2.093) (0.81, 0.947)
##
## 0.58 29:21 29.443 -2.222 0.856 0
## (29.034, 30.605) (-2.222, -2.222) (0.801, 0.942)
##
## 0.59 59:41 29.317 -2.358 0.849 0
## (29.005, 30.511) (-2.358, -2.358) (0.8, 0.94)
##
## 0.6 3:2 29.317 -2.5 0.846 0
## (28.908, 30.482) (-2.5, -2.5) (0.793, 0.939)
##
## 0.61 61:39 29.317 -2.65 0.842 0
## (28.786, 30.418) (-2.65, -2.65) (0.785, 0.938)
##
## 0.62 31:19 29.192 -2.807 0.835 0
## (28.754, 30.353) (-2.807, -2.807) (0.779, 0.932)
##
## 0.63 63:37 29.066 -2.973 0.827 0
## (28.657, 30.228) (-2.973, -2.973) (0.77, 0.931)
##
## 0.64 16:9 28.941 -3.148 0.819 0
## (28.532, 30.135) (-3.148, -3.148) (0.764, 0.93)
##
## 0.65 13:7 28.941 -3.333 0.815 0
## (28.374, 30.064) (-3.333, -3.333) (0.76, 0.927)
##
## 0.66 33:17 28.783 -3.529 0.82 0
## (28.22, 29.948) (-3.529, -3.529) (0.749, 0.926)
##
## 0.67 67:33 28.657 -3.737 0.812 0
## (28.152, 29.881) (-3.737, -3.737) (0.738, 0.924)
##
## 0.68 17:8 28.657 -3.958 0.807 0
## (27.969, 29.787) (-3.958, -3.958) (0.729, 0.922)
##
## 0.69 69:31 28.657 -4.194 0.803 0
## (27.875, 29.723) (-4.194, -4.194) (0.718, 0.916)
##
## 0.7 7:3 28.657 -4.444 0.798 0
## (27.717, 29.629) (-4.444, -4.444) (0.711, 0.913)
##
## 0.71 71:29 28.532 -4.713 0.788 0
## (27.528, 29.594) (-4.713, -4.713) (0.699, 0.911)
##
## 0.72 18:7 28.532 -5 0.782 0
## (27.463, 29.504) (-5, -5) (0.694, 0.913)
##
## 0.73 73:27 28.406 -5.309 0.772 0
## (27.338, 29.472) (-5.309, -5.309) (0.677, 0.911)
##
## 0.74 37:13 28.406 -5.641 0.765 0
## (27.122, 29.443) (-5.641, -5.641) (0.667, 0.903)
##
## 0.75 3:1 28.281 -6 0.754 0
## (26.993, 29.346) (-6, -6) (0.657, 0.902)
##
## 0.76 19:6 28.281 -6.389 0.746 0
## (26.932, 29.282) (-6.389, -6.389) (0.641, 0.9)
##
## 0.77 77:23 28.281 -6.812 0.737 0
## (26.713, 29.218) (-6.812, -6.812) (0.638, 0.897)
##
## 0.78 39:11 28.281 -7.273 0.728 0
## (26.617, 29.031) (-7.273, -7.273) (0.626, 0.89)
##
## 0.79 79:21 27.904 -7.778 0.705 0
## (26.427, 28.938) (-7.778, -7.778) (0.612, 0.886)
##
## 0.8 4:1 27.621 -8.333 0.71 0
## (26.147, 28.812) (-8.333, -8.333) (0.597, 0.883)
##
## 0.81 81:19 27.37 -8.947 0.691 0
## (25.957, 28.748) (-8.947, -8.947) (0.579, 0.878)
##
## 0.82 41:9 27.119 -9.63 0.67 0
## (25.738, 28.654) (-9.63, -9.63) (0.554, 0.874)
##
## 0.83 83:17 27.119 -10.392 0.657 0
## (25.642, 28.432) (-10.392, -10.392) (0.542, 0.873)
##
## 0.84 21:4 26.993 -11.25 0.637 0
## (25.265, 28.31) (-11.25, -11.25) (0.525, 0.87)
##
## 0.85 17:3 26.617 -12.222 0.607 0
## (25.075, 28.307) (-12.222, -12.222) (0.497, 0.861)
##
## 0.86 43:7 26.459 -13.333 0.619 0
## (24.763, 28.12) (-13.333, -13.333) (0.476, 0.848)
##
## 0.87 87:13 26.083 -14.615 0.586 0
## (24.451, 28.056) (-14.615, -14.615) (0.443, 0.838)
##
## 0.88 22:3 26.083 -16.111 0.563 0
## (24.042, 27.963) (-16.111, -16.111) (0.415, 0.828)
##
## 0.89 89:11 25.832 -17.879 0.527 0
## (23.698, 27.711) (-17.879, -17.879) (0.379, 0.824)
##
## 0.9 9:1 25.423 -20 0.532 0
## (23.196, 27.46) (-20, -20) (0.332, 0.818)
##
## 0.91 91:9 24.921 -22.593 0.48 0
## (22.912, 27.274) (-22.593, -22.593) (0.267, 0.797)
##
## 0.92 23:2 24.795 -25.833 0.432 0
## (22.285, 27.052) (-25.833, -25.833) (0.219, 0.783)
##
## 0.93 93:7 24.419 -30 0.364 0
## (21.905, 26.958) (-30, -30) (0.165, 0.764)
##
## 0.94 47:3 24.01 -35.556 0.362 0
## (20.84, 26.675) (-35.556, -35.556) (0.11, 0.738)
##
## 0.95 19:1 23.193 -43.333 0.458 0
## (19.836, 26.173) (-43.333, -43.333) (0.008, 0.729)
##
## 0.96 24:1 22.314 -55 0.35 0
## (18.738, 25.671) (-55, -55) (-0.104, 0.715)
##
## 0.97 97:3 21.31 -74.444 0.185 0
## (16.92, 25.169) (-74.444, -74.444) (-0.268, 0.679)
##
## 0.98 49:1 19.395 -113.333 0.121 0
## (14.345, 24.509) (-113.333, -113.333) (-0.672, 0.703)
##
## 0.99 99:1 15.253 -230 -0.543 0
## (10.418, 23.128) (-230, -230) (-1.134, 0.711)
##
## 1 Inf:1 0 NA NA NA
## (0, 0) (NA, NA) (NA, NA)
## ------------------------------------------------------------------------------------------------------------
##绘制单个临床决策曲线
plot_decision_curve(curve1,
curve.names=c('modle1'),
cost.benefit.axis =FALSE,col= 'red',
confidence.intervals=FALSE,
standardize = FALSE)
plot_decision_curve(curve1,
curve.names=c('modle1'),
cost.benefit.axis =T,
n.cost.benefits= 8,
col= 'red',
confidence.intervals=FALSE,
standardize = FALSE)
plot_decision_curve(curve1,curve.names=c('modle1'),
cost.benefit.axis =T,col= 'red',
confidence.intervals=T,
standardize = T)
##绘制多个临床决策曲线在同一图上
List<-list(curve1,curve2,curve3)
plot_decision_curve(List,
curve.names=c('modle1','modle2',"modle3"),
cost.benefit.axis =FALSE,col= c('red','blue','green'),
confidence.intervals=FALSE,
standardize = FALSE)
## Note: When multiple decision curves are plotted, decision curves for 'All' are calculated using the prevalence from the first DecisionCurve object in the list provided.
plot_decision_curve(List,
curve.names=c('modle1','modle2',"modle3"),
cost.benefit.axis =T,
n.cost.benefits = 8,
col= c('red','blue','green'),
confidence.intervals=FALSE,
standardize = FALSE)
## Note: When multiple decision curves are plotted, decision curves for 'All' are calculated using the prevalence from the first DecisionCurve object in the list provided.
##绘制临床影响曲线(Clinical Impact Curve)
plot_clinical_impact(curve1,population.size= 1000,
cost.benefit.axis = T,
n.cost.benefits= 8,
col =c('red','blue'),
confidence.intervals= T,
ylim=c(0,1000),
legend.position="topright")
######方法7 净重新分类指数(NRI,用于比较两个模型的准确度)NRI=(新模型特异度+敏感度)-(旧模型特异度+敏感度) ##综合判别改善指数(IDI) ##NRI/IDI>0,提示新模型改善
rm(list = ls())
load("./data.Rdata")
##T检验/LASSO回归
formula1<-as.formula(status ~ size + epithelial_cell_size+thickness + shape +adhesion+ nuclei + chromatin +
nucleoli + mitoses)
##单因素logistic回归、逐步回归
formula2<-as.formula(status ~ size + thickness + shape + nuclei + chromatin +
nucleoli + mitoses)
library(nricens)
modle1<-glm(formula1,family = binomial(),data = data, x = T)
modle2<-glm(formula2,family = binomial(),data = data, x = T)
#predicted risk,分别计算两个模型的预测风险
p.std= modle1$fitted.values
p.new= modle2$fitted.values
#以上Logistic回归模型拟合完毕。
##分类c(0.2, 0.4)
result<-nribin(mdl.std= modle1, mdl.new = modle2, cut = c(0.2, 0.4),
niter = 1000, updown = 'category')
##
## UP and DOWN calculation:
## #of total, case, and control subjects at t0: 683 239 444
##
## Reclassification Table for all subjects:
## New
## Standard < 0.2 < 0.4 >= 0.4
## < 0.2 428 2 0
## < 0.4 2 5 1
## >= 0.4 0 1 244
##
## Reclassification Table for case:
## New
## Standard < 0.2 < 0.4 >= 0.4
## < 0.2 2 0 0
## < 0.4 1 2 1
## >= 0.4 0 1 232
##
## Reclassification Table for control:
## New
## Standard < 0.2 < 0.4 >= 0.4
## < 0.2 426 2 0
## < 0.4 1 3 0
## >= 0.4 0 0 12
##
## NRI estimation:
## Point estimates:
## Estimate
## NRI -0.006436353
## NRI+ -0.004184100
## NRI- -0.002252252
## Pr(Up|Case) 0.004184100
## Pr(Down|Case) 0.008368201
## Pr(Down|Ctrl) 0.002252252
## Pr(Up|Ctrl) 0.004504505
##
## Now in bootstrap..
##
## Point & Interval estimates:
## Estimate Std.Error Lower Upper
## NRI -0.006436353 0.010419128 -0.02897500 0.012466155
## NRI+ -0.004184100 0.008744322 -0.02489627 0.008869223
## NRI- -0.002252252 0.005363130 -0.01141558 0.009270300
## Pr(Up|Case) 0.004184100 0.004747980 0.00000000 0.016494915
## Pr(Down|Case) 0.008368201 0.008396143 0.00000000 0.030501234
## Pr(Down|Ctrl) 0.002252252 0.003906260 0.00000000 0.013274336
## Pr(Up|Ctrl) 0.004504505 0.004610237 0.00000000 0.015927211
classfication<-result$nri
classfication$z<-abs(classfication$Estimate/classfication$Std.Error)
classfication$pvalue<-(1-pnorm(classfication$z))*2
##新模型较旧模型重分类正确的比例提高(NRI)-0.64%,p=0.5562935
##阳性结局中新模型较旧模型重分类正确的比例提高(NRI)-0.42%,p=0.6346839
##阴性结局中新模型较旧模型重分类正确的比例提高(NRI)-0.22%,p=0.6828429
##无分类cut=0-1之间任何固定数
result2<-nribin(mdl.std= modle1, mdl.new = modle2,
cut=0.02,#认为当预测的风险在新旧模型中相差2%时,即被认为是重新分类了
niter = 1000, updown = 'category')
##
## UP and DOWN calculation:
## #of total, case, and control subjects at t0: 683 239 444
##
## Reclassification Table for all subjects:
## New
## Standard < 0.02 >= 0.02
## < 0.02 368 2
## >= 0.02 11 302
##
## Reclassification Table for case:
## New
## Standard < 0.02 >= 0.02
## < 0.02 0 0
## >= 0.02 0 239
##
## Reclassification Table for control:
## New
## Standard < 0.02 >= 0.02
## < 0.02 368 2
## >= 0.02 11 63
##
## NRI estimation:
## Point estimates:
## Estimate
## NRI 0.020270270
## NRI+ 0.000000000
## NRI- 0.020270270
## Pr(Up|Case) 0.000000000
## Pr(Down|Case) 0.000000000
## Pr(Down|Ctrl) 0.024774775
## Pr(Up|Ctrl) 0.004504505
##
## Now in bootstrap..
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Point & Interval estimates:
## Estimate Std.Error Lower Upper
## NRI 0.020270270 0.0478373437 -0.154622568 0.031531691
## NRI+ 0.000000000 0.0012882221 -0.004264412 0.000000000
## NRI- 0.020270270 0.0478386135 -0.154370148 0.031531691
## Pr(Up|Case) 0.000000000 0.0006802433 0.000000000 0.000000000
## Pr(Down|Case) 0.000000000 0.0010703749 0.000000000 0.004264412
## Pr(Down|Ctrl) 0.024774775 0.0142090886 0.000000000 0.044374121
## Pr(Up|Ctrl) 0.004504505 0.0444885224 0.000000000 0.159861111
classfication2<-result2$nri
classfication2$z<-abs(classfication2$Estimate/classfication2$Std.Error)
classfication2$pvalue<-(1-pnorm(classfication2$z))*2
##新模型较旧模型重分类正确的比例提高(NRI)2%,p=0.66520067
##阳性结局中新模型较旧模型重分类正确的比例提高(NRI)0%,p=1
##阴性结局中新模型较旧模型重分类正确的比例提高(NRI)2%,p=0.66506415
##IDI计算
#predicted risk,分别计算两个模型的预测风险
library(PredictABEL)
p.std= modle1$fitted.values
p.new= modle2$fitted.values
data$status<-as.numeric(as.character(data$status))
demo<-as.matrix(data)
reclassification(data = data,cOutcome = 1,predrisk1 = p.std,predrisk2 =p.new, cutoff = c(seq(0,1,by=0.2)))
## _________________________________________
##
## Reclassification table
## _________________________________________
##
## Outcome: absent
##
## Updated Model
## Initial Model [0,0.2) [0.2,0.4) [0.4,0.6) [0.6,0.8) [0.8,1] % reclassified
## [0,0.2) 426 2 0 0 0 0
## [0.2,0.4) 1 3 0 0 0 25
## [0.4,0.6) 0 0 2 0 0 0
## [0.6,0.8) 0 0 1 0 0 100
## [0.8,1] 0 0 0 1 8 11
##
##
## Outcome: present
##
## Updated Model
## Initial Model [0,0.2) [0.2,0.4) [0.4,0.6) [0.6,0.8) [0.8,1] % reclassified
## [0,0.2) 2 0 0 0 0 0
## [0.2,0.4) 1 2 1 0 0 50
## [0.4,0.6) 0 1 6 1 0 25
## [0.6,0.8) 0 0 1 7 3 36
## [0.8,1] 0 0 0 2 212 1
##
##
## Combined Data
##
## Updated Model
## Initial Model [0,0.2) [0.2,0.4) [0.4,0.6) [0.6,0.8) [0.8,1] % reclassified
## [0,0.2) 428 2 0 0 0 0
## [0.2,0.4) 2 5 1 0 0 38
## [0.4,0.6) 0 1 8 1 0 20
## [0.6,0.8) 0 0 2 7 3 42
## [0.8,1] 0 0 0 3 220 1
## _________________________________________
##
## NRI(Categorical) [95% CI]: 0.0023 [ -0.0255 - 0.03 ] ; p-value: 0.8736
## NRI(Continuous) [95% CI]: -0.9611 [ -1.0995 - -0.8227 ] ; p-value: 0
## IDI [95% CI]: -8e-04 [ -0.0048 - 0.0032 ] ; p-value: 0.7055
##IDI [95% CI]: -8e-04 [ -0.0048 - 0.0032 ] ; p-value: 0.7055
######方法8 标准校准曲线
rm(list = ls())
load("./data.Rdata")
##单因素logistic回归、逐步回归
formula<-as.formula(status ~ size + thickness + shape + nuclei + chromatin +
nucleoli + mitoses)
library(rms)
dd<-datadist(data)
options(datadist="dd")
fit<-lrm(formula,data = data,x = T,y = T)
call1<-calibrate(fit,method = "boot",B = 1000)
##单个展示
plot(call1,xlim=c(0,1.0),ylim=c(0,1.0))
##
## n=683 Mean absolute error=0.022 Mean squared error=0.00154
## 0.9 Quantile of absolute error=0.03
plot(call1,
xlim = c(0,1),
xlab = "Predicted Probability",
ylab = "Observed Probability",
legend = FALSE,
subtitles = FALSE)
##
## n=683 Mean absolute error=0.022 Mean squared error=0.00154
## 0.9 Quantile of absolute error=0.03
abline(0,1,col = "black",lty = 2,lwd = 2)
lines(call1[,c("predy","calibrated.orig")], type = "l",lwd = 2,col="red",pch =16)
lines(call1[,c("predy","calibrated.corrected")], type = "l",lwd = 2,col="green",pch =16)
legend(0.55,0.35,
c("Apparent","Ideal","Bias-corrected"),
lty = c(2,1,1),
lwd = c(2,1,1),
col = c("black","red","green"),
bty = "n") # "o"为加边框
library(riskRegression)
## riskRegression version 2021.10.10
##
## 载入程辑包:'riskRegression'
## The following objects are masked from 'package:PredictABEL':
##
## plotCalibration, plotROC
fit2 <- glm(formula,data = data,family = binomial())
xb <- Score(list("fit"=fit2),formula= status~1,
null.model = FALSE,
conf.int = TRUE,
plots = c("calibration","ROC"),
metrics = c("auc","brier"),
B=1000,M = 50, # bootstrap;每次抽样50
data= data)
plotCalibration(xb,col="red")
##多个模型展示
##T检验/LASSO回归
formula1<-as.formula(status ~ size + epithelial_cell_size+thickness + shape +adhesion+ nuclei + chromatin +
nucleoli + mitoses)
##单因素logistic回归、逐步回归
formula2<-as.formula(status ~ size + thickness + shape + nuclei + chromatin +
nucleoli + mitoses)
fit1 = glm(formula1, data=data,family = binomial())
fit2 = glm(formula2, data=data,family = binomial())
xb <- Score(list("fit1"=fit1,
"fit2"=fit2),
formula=status~1,
null.model = FALSE,
conf.int =TRUE,
plots =c("calibration","ROC"),
metrics = c("auc","brier"),
B=1000,M=50,
data=data)
plotCalibration(xb)
###C指数,用来评价模型一致性
fit<-lrm(formula,data = data,x = T,y = T)
fit
## Logistic Regression Model
##
## lrm(formula = formula, data = data, x = T, y = T)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 683 LR chi2 755.93 R2 0.922 C 0.993
## 0 444 d.f. 7 g 5.172 Dxy 0.986
## 1 239 Pr(> chi2) <0.0001 gr 176.250 gamma 0.986
## max |deriv| 1e-06 gp 0.448 tau-a 0.449
## Brier 0.024
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept 3.8738 1.2260 3.16 0.0016
## size 0.4466 0.1633 2.74 0.0062
## thickness=Low -1.6101 0.5286 -3.05 0.0023
## shape=Low -1.3560 0.6372 -2.13 0.0333
## nuclei=Low -2.3269 0.5031 -4.62 <0.0001
## chromatin=Low -1.9142 0.5365 -3.57 0.0004
## nucleoli=Low -1.2903 0.5165 -2.50 0.0125
## mitoses=Low -1.4256 0.6585 -2.17 0.0304
##
#c-index=0.993
###Hosmer-Lemeshow,拟合优度检验用来评价模型
library(ResourceSelection)
mod <- glm(formula,data = data, family=binomial)
hl <- hoslem.test(mod$y, fitted(mod), g=10)
hl
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: mod$y, fitted(mod)
## X-squared = 12.897, df = 8, p-value = 0.1154