Example
#install.packages("PredictABEL")
library(PredictABEL)
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
## Loading required package: ROCR
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
## Loading required package: epitools
##
## Attaching package: 'epitools'
## The following object is masked from 'package:survival':
##
## ratetable
## Loading required package: PBSmodelling
##
## -----------------------------------------------------------
## PBS Modelling 2.67.266 -- Copyright (C) 2005-2016 Fisheries and Oceans Canada
##
## A complete user guide 'PBSmodelling-UG.pdf' is located at
## /home/ccwang-letsnote/R/x86_64-pc-linux-gnu-library/3.3/PBSmodelling/doc/PBSmodelling-UG.pdf
##
## Packaged on 2015-01-23
## Pacific Biological Station, Nanaimo
##
## All available PBS packages can be found at
## http://code.google.com/p/pbs-software/
## -----------------------------------------------------------
# Let's try the sample data
# specify dataset with outcome and predictor variables
data(ExampleData)
head(ExampleData)
## ID outcome(AMD) Age Sex Education BaselineAMD Smoking BMI
## 1 1 0 1 0 1 2 0 2
## 2 2 0 0 0 0 0 1 0
## 3 3 1 0 1 0 1 1 0
## 4 4 0 0 0 0 1 1 1
## 5 5 0 1 1 0 0 0 2
## 6 6 0 1 0 0 2 1 2
## AntioxidentGroup ZincGroup CFHrs1061170 LOCrs10490924 CFHrs1410996
## 1 1 0 1 1 2
## 2 0 1 1 2 1
## 3 0 1 1 0 2
## 4 1 1 2 1 2
## 5 1 1 1 1 0
## 6 0 0 0 1 2
## C2rs9332739 CFBrs641153 CFHrs2230199
## 1 1 1 1
## 2 1 1 0
## 3 1 1 1
## 4 1 1 1
## 5 1 1 1
## 6 1 1 1
# specify column number of the outcome variable
cOutcome <- 2
# fit logistic regression models
# all steps needed to construct a logistic regression model are written in a function
# called 'ExampleModels', which is described on page 4-5
riskmodel1 <- ExampleModels()$riskModel1
riskmodel2 <- ExampleModels()$riskModel2
predRisk1 <- predRisk(riskmodel1)
predRisk2 <- predRisk(riskmodel2)
cutoff <- c(0, .10, .30, 1)
reclassification(data = ExampleData, cOutcome = cOutcome, predrisk1 = predRisk1, predrisk2 = predRisk2, cutoff)
## _________________________________________
##
## Reclassification table
## _________________________________________
##
## Outcome: absent
##
## Updated Model
## Initial Model [0,0.1) [0.1,0.3) [0.3,1] % reclassified
## [0,0.1) 2899 76 0 3
## [0.1,0.3) 714 2398 657 36
## [0.3,1] 34 469 766 40
##
##
## Outcome: present
##
## Updated Model
## Initial Model [0,0.1) [0.1,0.3) [0.3,1] % reclassified
## [0,0.1) 85 11 0 11
## [0.1,0.3) 63 542 457 49
## [0.3,1] 1 118 710 14
##
##
## Combined Data
##
## Updated Model
## Initial Model [0,0.1) [0.1,0.3) [0.3,1] % reclassified
## [0,0.1) 2984 87 0 3
## [0.1,0.3) 777 2940 1114 39
## [0.3,1] 35 587 1476 30
## _________________________________________
##
## NRI(Categorical) [95% CI]: 0.2043 [ 0.1777 - 0.2309 ] ; p-value: 0
## NRI(Continuous) [95% CI]: 0.5114 [ 0.4636 - 0.5591 ] ; p-value: 0
## IDI [95% CI]: 0.0672 [ 0.0612 - 0.0732 ] ; p-value: 0
Your data
setwd("~/pCloudDrive/pCloud Sync/鄔娜/2016-7-13")
# Read the data:
data <- read.csv("data.csv", header = TRUE, colClasses = "character")
head(data)
## ID sex age weight height BMI LDL HDL TG TC Abnor_TC smk
## 1 4,425,248 0 61 59.0 167 21.16 2.89 1.16 0.93 4.61 0 0
## 2 4,579,791 1 51 69.0 160 26.95 4.26 1.58 1.53 6.45 1 0
## 3 3,852,936 1 62 62.0 165 22.77 1.94 1.37 1.29 3.60 0 0
## 4 4,413,993 0 49 64.8 168 22.96 2.75 0.77 0.98 3.98 0 1
## 5 7,125,819 1 50 52.0 156 21.37 4.95 1.91 0.93 8.32 1 0
## 6 7,821,748 1 63 #NULL! #NULL! #NULL! 3.39 1.39 1.75 5.71 1 0
## hyt dm CDK med_hyt med_dm med_sta IMT_l IMT_r soft hard mix total
## 1 0 0 0 0 0 0 0.8 0.0 0.00 0.00 0.00 0
## 2 1 1 0 1 1 0 0.8 0.1 1.60 5.00 3.60 3
## 3 1 0 0 1 0 0 0.5 0.4 0.00 17.10 13.60 3
## 4 1 0 0 1 0 0 0.5 0.4 20.16 11.44 0.00 3
## 5 1 0 0 0 0 0 0.4 0.4 0.00 14.58 0.00 1
## 6 1 1 0 1 0 0 0.7 0.4 10.44 4.34 0.00 2
## cad IMT_neck rf RFCIMT RFnum RFsoft RFhard RFmix
## 1 0 0.40 0.24584 0.19397 0.16795 0.23031 0.24010 0.23225
## 2 0 0.44 0.29219 0.24216 0.45996 0.28610 0.30553 0.29613
## 3 1 0.45 0.19730 0.16024 0.31973 0.18555 0.24473 0.24933
## 4 0 0.45 0.28127 0.23402 0.44496 0.35877 0.32545 0.25958
## 5 0 0.40 0.13499 0.10632 0.12007 0.12528 0.16822 0.12448
## 6 1 0.55 #NULL! #NULL! #NULL! #NULL! #NULL! #NULL!
## RFnumsoftmix RFnumsoftmixhard RFnumsoft RFnummixed
## 1 0.16835 0.16807 0.16660 0.16986
## 2 0.42933 0.42883 0.44209 0.44335
## 3 0.31850 0.30620 0.29945 0.33859
## 4 0.44389 0.43390 0.47647 0.41241
## 5 0.11370 0.10693 0.11653 0.11649
## 6 #NULL! #NULL! #NULL! #NULL!
# Data manipulation:
data[data == "#NULL!"] <- NA
data$CAD <- as.numeric(data$cad)
cOutcome <- 37 #Specify column number of the outcome variable
# categorize smoking, dyslip, hp, dm
data[, c("smk", "hyt", "dm", "Abnor_TC")] <-
lapply(data[,c("smk", "hyt", "dm", "Abnor_TC")],
function(var) {
var <- factor(var, levels = 0:1, labels = c("No", "Yes"))
})
# recoding
data <- within(data, {
Sex <- factor(sex, levels = 0:1, labels = c("men", "women"))
Age <- as.numeric(age)
BMI <- as.numeric(BMI)
CIMT <- as.numeric(RFCIMT)
soft <- as.numeric(RFsoft)
hard <- as.numeric(RFhard)
mix <- as.numeric(RFmix)
num <- as.numeric(RFnum)
})
# Fit logistic regression models
# create two models
# 1. basic model with gender, age, BMI, dyslip, smoking, hp, dm
# 2. new model with basic model + soft, hard, mix, num, CIMT
newdata <- na.omit(data) # remove the missing values
logistic.model.list <-
list(Basic_m = glm(CAD ~ Sex + Age + BMI + Abnor_TC + smk + hyt + dm, data = newdata, family = binomial),
New_m = glm(CAD ~ Sex + Age + BMI + Abnor_TC + smk + hyt + dm +
soft + hard + mix + num + CIMT, data = newdata, family = binomial)
)
library(PredictABEL)
reclassification(data = newdata, cOutcome = 37,
predrisk1 = fitted(logistic.model.list[["Basic_m"]]),
predrisk2 = fitted(logistic.model.list[["New_m"]]),
cutoff = c(0, 0.40, 1)
)
## _________________________________________
##
## Reclassification table
## _________________________________________
##
## Outcome: absent
##
## Updated Model
## Initial Model [0,0.4) [0.4,1] % reclassified
## [0,0.4) 1356 85 6
## [0.4,1] 117 141 45
##
##
## Outcome: present
##
## Updated Model
## Initial Model [0,0.4) [0.4,1] % reclassified
## [0,0.4) 310 112 27
## [0.4,1] 65 204 24
##
##
## Combined Data
##
## Updated Model
## Initial Model [0,0.4) [0.4,1] % reclassified
## [0,0.4) 1666 197 11
## [0.4,1] 182 345 35
## _________________________________________
##
## NRI(Categorical) [95% CI]: 0.0869 [ 0.046 - 0.1277 ] ; p-value: 3e-05
## NRI(Continuous) [95% CI]: 0.4723 [ 0.3861 - 0.5585 ] ; p-value: 0
## IDI [95% CI]: 0.0667 [ 0.0546 - 0.0788 ] ; p-value: 0