library(gtx)
## Loading required package: survival
#approximate Bayes factor (ABF) for normal prior
data(agtstats)
agtstats$pval <- with(agtstats, pchisq((beta/se.GC)^2, df = 1, lower.tail = FALSE))
max1 <- function(bf) return(bf/max(bf, na.rm = TRUE))
agtstats$BF.normal <- with(agtstats, max1(abf.Wakefield(beta, se.GC, 0.05)))
agtstats$BF.numeric <- with(agtstats, max1(abf.normal(beta, se.GC, 0.05)))
with(agtstats, plot(BF.normal, BF.numeric)) # excellent agreement
#approximate Bayes factor (ABF) for t distribution prior
data(agtstats)
agtstats$pval <- with(agtstats, pchisq((beta/se.GC)^2, df = 1, lower.tail = FALSE))
max1 <- function(bf) return(bf/max(bf, na.rm = TRUE))
agtstats$BF.normal <- with(agtstats, max1(abf.Wakefield(beta, se.GC, 0.05)))
agtstats$BF.t <- with(agtstats, max1(abf.t(beta, se.GC, 0.0208)))
with(agtstats, plot(-log10(pval), log(BF.normal)))
with(agtstats, plot(-log10(pval), log(BF.t)))
Calculate approximate Bayes factor (ABF) using method of Wakefield
data(agtstats)
agtstats$pval <- with(agtstats, pchisq((beta/se.GC)^2, df = 1, lower.tail = FALSE))
max1 <- function(bf) return(bf/max(bf, na.rm = TRUE))
agtstats$BF.normal <- with(agtstats, max1(abf.Wakefield(beta, se.GC, 0.05)))
agtstats$BF.t <- with(agtstats, max1(abf.t(beta, se.GC, 0.0208)))
with(agtstats, plot(-log10(pval), log(BF.normal)))
with(agtstats, plot(-log10(pval), log(BF.t)))
genetic risk scores
data(bp.scores)
head(subset(bp.scores, score == "SBP2011A"))
## locus snp coded.allele noncoded.allele coded.freq score
## 88 MTHFR-NPPB rs17367504 G A 0.15 SBP2011A
## 89 MOV10 rs2932538 G A 0.75 SBP2011A
## 90 SLC4A7 rs13082711 T C 0.78 SBP2011A
## 91 ULK4 rs3774372 T C 0.83 SBP2011A
## 92 MECOM rs419076 T C 0.47 SBP2011A
## 93 FGF5 rs1458038 T C 0.29 SBP2011A
## coef coef.se
## 88 -0.950 0.1079
## 89 0.321 0.0768
## 90 -0.318 0.0787
## 91 -0.073 0.0830
## 92 0.355 0.0678
## 93 0.732 0.0792
sbp <- subset(bp.scores, score == "SBP2011A")
dbp <- subset(bp.scores, score == "DBP2011A")
dbp <- dbp[match(sbp$locus, dbp$locus), ]
plot(sbp$coef/sign(sbp$coef), dbp$coef/sign(sbp$coef),
xlim = c(0, max(sbp$coef/sign(sbp$coef))),
ylim = c(0, max(dbp$coef/sign(sbp$coef))),
xlab = "SBP effect [mmHg]", ylab = "DBP effect [mmHg]",
las = 1)
data(cad.scores)
cad.scores$MAF <- pmin(cad.scores$coded.freq, 1-cad.scores$coded.freq)
cad.scores$OR <- exp(abs(cad.scores$coef))
plot(cad.scores$MAF, cad.scores$OR,
xlim = c(0, 0.5), ylim = c(1, max(cad.scores$OR)),
xlab = "Minor allele frequency",
ylab = "Odds ratio effect", las = 1)
text(cad.scores$MAF, cad.scores$OR, cad.scores$name, pos = 1, cex = 0.5)
#non-centrality parameter of chi squared distribution
## 0.80 power for 0.05 size test
chi2ncp(.05, .8)
## [1] 7.848861
## 0.80 power for genome-wide significance
chi2ncp(5e-08, .8)
## [1] 39.60099
## test
critval <- qchisq(5e-08, lower.tail = FALSE, df = 1)
pchisq(critval, ncp = chi2ncp(5e-08, .8), lower.tail = FALSE, df = 1)
## [1] 0.8000001
data(mthfrex)
## artifical example with two datasets obtained by
## splitting mthfrex by the HTN variable
xtx.hi <- make.moments2(mthfr.params, c("SBP", "SexC", "Age"),
as.snpdata(list(snpinfo = mthfrex$snpinfo,
data = subset(mthfrex$data, HTN == 1))))
xtx.lo <- make.moments2(mthfr.params, c("SBP", "SexC", "Age"),
as.snpdata(list(snpinfo = mthfrex$snpinfo,
data = subset(mthfrex$data, HTN == 0))))
## make list of X'X matrices
xtx.list <- list(hi = xtx.hi, lo = xtx.lo)
## combine for outcome SBP and fixed effects for all SNPs
## other variables SexC and Age will be treated as study-specific
fixed <- paste(mthfr.params$snp, mthfr.params$coded.allele, sep = "_")
xtx.comb <- combine.moments2(xtx.list, c("SBP", fixed))
## fit regression model
n.comb <- sum(diag(xtx.comb)[1:length(xtx.list)])
lm.moments2(xtx.comb, "SBP", c("hi_ONE", "lo_ONE", "rs4846052_T"), n = n.comb)
## $betahat
## hi_ONE lo_ONE rs4846052_T
## 147.820107 111.696958 5.509798
##
## $se
## hi_ONE lo_ONE rs4846052_T
## 1.4596039 1.5757237 0.8589874
##
## $prec
## hi_ONE lo_ONE rs4846052_T
## hi_ONE 2.798619 0.0000000 4.338370
## lo_ONE 0.000000 0.9884162 1.395683
## rs4846052_T 4.338370 1.3956830 10.051296
##
## $ssr
## [1] 1054651
## equivalent regression directly using subject-specific data
coeff.extract(lm(SBP ~ HTN + rs4846052_C, data = mthfrex$data))
## Estimate Std. Error
## (Intercept) 122.716554 1.1255201
## HTN 36.123150 1.1760610
## rs4846052_C -5.509798 0.8589874
## contrasting colours suitable for Manhattan plot for 22 autosomes
plot(1:22, rep(1, 22), pch = 22, cex = 2, ann = FALSE,
yaxt = "n", bg = contrasting.rainbow(22))
data(mthfrex)
mthfrex <- gls.approx.logistic(mthfrex, "HTN", c("SexC", "Age"))
## HTN ~ 1 + SexC + Age
## <environment: 0x000000000ea11e98>
xtwx <- make.moments2(mthfr.params, c("HTNstar", "SexC", "Age"), mthfrex,
weightvar = "weight")
myglm <- est.moments2(xtwx, "HTNstar", c("ONE", "rs6668659_T", "rs4846049_T",
"rs1801133_G", "SexC", "Age"), vscale=1)
myglm$z <- myglm$betahat/myglm$se
cbind(beta = myglm$betahat, se = myglm$se, z = myglm$z,
pval = pnorm(-abs(myglm$z))*2)
## beta se z pval
## ONE 1.025734203 0.391724849 2.61850686 0.008831552
## rs6668659_T -0.711174571 0.976728427 -0.72811905 0.466540716
## rs4846049_T 0.213609014 1.025834374 0.20822953 0.835049749
## rs1801133_G -0.131553799 0.760876811 -0.17289763 0.862731893
## SexC -0.006874618 0.102401166 -0.06713418 0.946474883
## Age 0.001348874 0.005407132 0.24946194 0.803003481
## Compare against results from glm
## Note have to use coded alleles used in original data
mycheck <- glm(HTN ~ rs6668659_G+rs4846049_G+rs1801133_A+Sex+Age,
family="binomial", data = mthfrex$data)
coef(summary(mycheck))
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.55392240 0.320025189 4.8556253 1.200074e-06
## rs6668659_G 0.80808967 1.016737620 0.7947868 4.267375e-01
## rs4846049_G -0.22721934 1.117444003 -0.2033385 8.388705e-01
## rs1801133_A 0.14872575 0.886512614 0.1677650 8.667682e-01
## SexMale 0.26426598 0.103266899 2.5590579 1.049562e-02
## Age -0.01410404 0.005366367 -2.6282296 8.583055e-03
## Note in results Sex factor coded differently than SexC
## Coefficients for covariates used in null model are different,
## because xtwx approximates around the fitted null model
## Look at pairwise correlations
cor(subset(mthfrex$data, select = c("rs6668659_G", "rs4846049_G",
"rs1801133_A")))^2
## rs6668659_G rs4846049_G rs1801133_A
## rs6668659_G 1.0000000 0.9863988 0.9739446
## rs4846049_G 0.9863988 1.0000000 0.9768849
## rs1801133_A 0.9739446 0.9768849 1.0000000
## SNP coefficients well approximated (given very high
## inter-SNP correlations) but signs ALL inverted by coded allele flips
## check error less than 10percent
stopifnot(all(-1*myglm$z[2:4]/coef(summary(mycheck))[2:4,3] > 0.9))
stopifnot(all(-1*myglm$z[2:4]/coef(summary(mycheck))[2:4,3] < 1.1))
data(mthfrex)
mthfrex <- gls.approx.logistic(mthfrex, "HTN", c("SexC", "Age"))
## HTN ~ 1 + SexC + Age
## <environment: 0x000000000f2e9d38>
xtwx <- make.moments2(mthfr.params, c("HTNstar", "SexC", "Age"), mthfrex,
weightvar = "weight")
est.moments2(xtwx, "HTNstar", c("ONE", "rs6668659_T", "rs4846049_T",
"rs1801133_G", "SexC", "Age"), vscale=1)
## $betahat
## ONE rs6668659_T rs4846049_T rs1801133_G SexC
## 1.025734203 -0.711174571 0.213609014 -0.131553799 -0.006874618
## Age
## 0.001348874
##
## $se
## ONE rs6668659_T rs4846049_T rs1801133_G SexC Age
## 0.391724849 0.976728427 1.025834374 0.760876811 0.102401166 0.005407132
##
## $prec
## ONE rs6668659_T rs4846049_T rs1801133_G SexC
## ONE 382.9513 665.2416 665.1568 665.7749 580.3467
## rs6668659_T 665.2416 1240.5727 1239.9402 1240.0392 1007.3162
## rs4846049_T 665.1568 1239.9402 1240.4802 1240.0939 1007.7027
## rs1801133_G 665.7749 1240.0392 1240.0939 1241.6532 1008.3018
## SexC 580.3467 1007.3162 1007.7027 1008.3018 975.1375
## Age 22083.6678 38435.6732 38430.0807 38465.0319 33503.0910
## Age
## ONE 22083.67
## rs6668659_T 38435.67
## rs4846049_T 38430.08
## rs1801133_G 38465.03
## SexC 33503.09
## Age 1307780.38
##
## $ssr
## [1] NA
#Filter SNPs for inclusion in genetic risk score using heterogeneity test
data(magic.scores)
score1 <- subset(magic.scores, score == "FG2010")
score1 <- within(score1, okay <- grs.filter.Qrs(coef, beta_TG, se_TG))
with(score1, {grs.plot(coef, beta_TG, se_TG, locus);
title(xlab = "FG effect", ylab = "TG effect")})
with(score1, locus[!okay]) # loci removed
## [1] "GCKR" "FADS1" "SLC2A2"
with(subset(score1, okay), {grs.plot(coef, beta_TG, se_TG, locus);
title(xlab = "FG effect", ylab = "TG effect")})
Make genetic risk scores from individual-level data
Convenience tool to fit a series of single-SNP models
library(survival)
data(t2d.scores)
data(t2dex)
mycoxph <- coxph(Surv(FollowupDays,FollowupT2D) ~ Overweight,
data = t2dex$data) # fit null model
assoc1 <- grs.onesnp.apply(t2d.scores, mycoxph) # single SNP association
## risk score fit from single SNPs
unlist(grs.summary(t2d.scores$coef, assoc1$beta, assoc1$se,
n = length(residuals(mycoxph))))
## m n X2m R2m ahat
## 3.100000e+01 1.605000e+03 6.494297e+01 3.965521e-02 5.660855e-01
## aSE X2rs R2rs pval Qrs
## 9.704573e-02 3.402603e+01 2.097688e-02 5.437979e-09 3.091694e+01
## phet
## 4.194988e-01
## compare direct analysis of subject-specific data
t2dex <- grs.make.scores(t2d.scores, t2dex)
coxph(Surv(FollowupDays,FollowupT2D) ~ Overweight + T2D2010.score,
data = t2dex$data)
## Call:
## coxph(formula = Surv(FollowupDays, FollowupT2D) ~ Overweight +
## T2D2010.score, data = t2dex$data)
##
## coef exp(coef) se(coef) z p
## Overweight 0.5532 1.7388 0.0970 5.70 1.2e-08
## T2D2010.score 0.5756 1.7782 0.0979 5.88 4.1e-09
##
## Likelihood ratio test=61.1 on 2 df, p=5.42e-14
## n= 1605, number of events= 464
## (395 observations deleted due to missingness)
Diagnostic plot for genetic risk score calculation from summary statistics.
data(t2dex)
library(survival)
mycoxph <- coxph(Surv(FollowupDays,FollowupT2D) ~ Overweight,
data = t2dex$data) # fit null model
data(t2d.scores)
assoc1 <- grs.onesnp.apply(t2d.scores, mycoxph) # single SNP association
## risk score fit from single SNPs
grs.plot(t2d.scores$coef, assoc1$beta, assoc1$se, t2d.scores$name)
title(xlab = "risk score weight", ylab = "estimated effect size")
Genetic risk score calculation from summary statistics.
data(liver.scores)
with(subset(liver.scores, score == "ALP2011"), {
plot(pmin(coded.freq, 1 - coded.freq), coef,
xlab = "MAF", ylab = "ALP effect",
xlim = c(0, 0.5), ylim = c(0, 10), las = 1)
text(pmin(coded.freq, 1 - coded.freq), coef, name,
pos = 3, cex = 0.5)})
## compare unweighted score used by Chambers et al. 2011
## with weighted score:
with(subset(liver.scores, score == "ALP2011"),
rbind(unlist(grs.summary(rep(1, length(coef)), beta_replication,
se_replication, n = 8112)),
unlist(grs.summary(coef, beta_replication,
se_replication, n = 8112))))
## m n X2m R2m ahat aSE X2rs R2rs
## [1,] 14 8112 468.9801 0.05617369 2.5035835 0.14244639 308.9024 0.03736377
## [2,] 14 8112 468.9801 0.05617369 0.9136978 0.04335698 444.1064 0.05327521
## pval Qrs phet
## [1,] 3.787348e-69 160.07771 1.901086e-27
## [2,] 1.382751e-98 24.87369 2.398158e-02
#Fit normal linear model using pre-built matrix of second moments.
data(mthfrex)
xtx <- make.moments2(mthfr.params, c("SBP", "DBP", "SexC", "Age"), mthfrex)
lm.moments2(xtx, "SBP", c("ONE", "rs6668659_T", "rs4846049_T",
"rs1801133_G", "SexC", "Age"))
## $betahat
## ONE rs6668659_T rs4846049_T rs1801133_G SexC
## 172.60377218 -3.67807898 -8.48305715 1.82092249 -3.79938368
## Age
## -0.03999416
##
## $se
## ONE rs6668659_T rs4846049_T rs1801133_G SexC Age
## 4.67202357 11.86845291 12.58429705 9.13955632 1.24769373 0.06447682
##
## $prec
## ONE rs6668659_T rs4846049_T rs1801133_G SexC
## ONE 2.579943 4.480063 4.479177 4.483566 3.829926
## rs6668659_T 4.480063 8.352945 8.348321 8.348801 6.645447
## rs4846049_T 4.479177 8.348321 8.351534 8.348929 6.647476
## rs1801133_G 4.483566 8.348801 8.348929 8.359640 6.651883
## SexC 3.829926 6.645447 6.647476 6.651883 6.329891
## Age 146.996147 255.619712 255.558744 255.810405 218.549589
## Age
## ONE 146.9961
## rs6668659_T 255.6197
## rs4846049_T 255.5587
## rs1801133_G 255.8104
## SexC 218.5496
## Age 8616.2874
##
## $ssr
## [1] 1545770
## Compare against results from lm
## Note have to use coded alleles in original data
lm(SBP ~ rs6668659_G+rs4846049_G+rs1801133_A+Sex+Age, data = mthfrex$data)
##
## Call:
## lm(formula = SBP ~ rs6668659_G + rs4846049_G + rs1801133_A +
## Sex + Age, data = mthfrex$data)
##
## Coefficients:
## (Intercept) rs6668659_G rs4846049_G rs1801133_A SexMale
## 144.32458 3.67808 8.48306 -1.82092 3.79938
## Age
## -0.03999
## Note in results Sex factor coded differently than SexC
#Genetic risk scores for glucose/insulin traits.
data(magic.scores)
with(subset(magic.scores, score = "FG2010"),
grs.plot(beta_FG, beta_TG, se_TG, text = locus))
Build matrix of second moments from subject-specific data.
data(mthfrex)
xtx <- make.moments2(mthfr.params, c("SBP", "DBP", "SexC", "Age"), mthfrex)
#compute minimum size of cover of overlapping intervals.
mincover(c(1, 5, 10, 11, 22), c(8, 17, 13, 19, 25))
## [1] 23
## first to fourth intervals all overlap
## third interval 10:13 entirely inside second interval 5:17
nsubj <- 400 # number of subjects
nsnp <- 4000 # intended number of SNPs in GWAS
snps <- replicate(nsnp, rbinom(nsubj, 2, rbeta(1, 1.2, 1.2)))
## simulate with random allele frequencies
snps <- snps[ , apply(snps, 2, var) > 0]
nsnp <- ncol(snps) # number after filtering monomorphic
beta <- matrix(rnorm(30, 0, 0.1), ncol = 3)
## matrix of effects of 10 snps on 3 phenotypes
y1 <- rnorm(nsubj)
y2 <- .1*y1 + rnorm(nsubj)
y3 <- .1*y1 + .3*y2 + rnorm(nsubj) # simulate correlated errors
y <- cbind(y1, y2, y3) + snps[ , 1:10] %*% beta
## wlog the truely associated snps are 1:10
rm(y1, y2, y3)
astats <- function(ii) {
with(list(snp = snps[ , ii]),
c(coef(summary(lm(y[ , 1] ~ snp)))["snp", 3],
coef(summary(lm(y[ , 2] ~ snp)))["snp", 3],
coef(summary(lm(y[ , 3] ~ snp)))["snp", 3],
summary(manova(y ~ snp))$stats["snp", 6]))
}
system.time(gwas <- t(sapply(1:nsnp, astats)))
## user system elapsed
## 43.39 0.08 48.12
## columns 1-3 contain single phenotype Z statistics
## column 4 contains manova P-value
pv <- multipheno.T2(gwas[ , 1:3])$pval
plot(-log10(gwas[ , 4]), -log10(pv), type = "n",
xlab = "MANOVA -log10(P)", ylab = "Summary statistic -log10(P)", las = 1)
points(-log10(gwas[-(1:10), 4]), -log10(pv[-(1:10)]))
points(-log10(gwas[1:10, 4]), -log10(pv[1:10]), cex = 2, pch = 21, bg = "red")
legend("topleft", pch = c(1, 21), pt.cex = c(1, 2), pt.bg = c("white", "red"),
legend = c("null SNPs", "associated SNPs"))
## nb approximation gets better as nsnp becomes large, even with small nsubj
Parse text representation of a SNP embedded in flanking sequences.
data(snps.BRCA1)
parse.snps(snps.BRCA1$SourceSeq[1:10])
## seq5p poly
## 1 GGGTAGAAGTCTTGTTGTTTCCTTCTGCCCCAACAGGCCCTCCACGCCCTTCAGGAGGAG [C/G]
## 2 AAGTCTTGTTGTTTCCTTCTGCCCCAACAGGCCCTCCACGCCCTTCAGGAGGAGCAGGCC [A/G]
## 3 GCAGGCCAGACTCAAGATGAGGCTGTGGGACCTGCAGCAGCTGAGAAAGGAGCTCGGGGA [T/C]
## 4 GTATTCCGAGGACACACCCAGCAGGACCCGGAAGTGCCTAAGTCTTTAGTTTCCAATTTG [T/C]
## 5 AGGACCCGGAAGTGCCTAAGTCTTTAGTTTCCAATTTGCGGATCCACTGCCCTCTGCTTG [T/C]
## 6 GGAGTGCCGGCTGCGGGTGCAGGTCCAGCCCTTGGAGCTGCCCATGGTCACCACCATCCA [C/G]
## 7 GCACCTGGGCATGGGGAAGTGGAGCTGTGTCTGGGACCACCCCTTGCTGTCTCCCCCTAG [A/G]
## 8 TCACTGGATTTCCTGCCAGCCTCAGGCTGAGTGAGGAGGAGCTGCTGGACAAGCTAGAGA [T/C]
## 9 AGGCTGAGTGAGGAGGAGCTGCTGGACAAGCTAGAGATCTTCTTTGGCAAGACTAGGAAC [A/G]
## 10 TGGACAAGCTAGAGATCTTCTTTGGCAAGACTAGGAACGGAGGTGGCGATGTGGACGTTC [A/G]
## alleleA alleleB
## 1 C G
## 2 A G
## 3 T C
## 4 T C
## 5 T C
## 6 C G
## 7 A G
## 8 T C
## 9 A G
## 10 A G
## seq3p len5p
## 1 AGGCCAGACTCAAGATGAGGCTGTGGGACCTGCAGCAGCTGAGAAAGGAGCTCGGGGACT 60
## 2 GACTCAAGATGAGGCTGTGGGACCTGCAGCAGCTGAGAAAGGAGCTCGGGGACTCCCCCA 60
## 3 TCCCCCAAAGACAAGGTAAGGTGGGAGATCTGGTGTTGTTTGGTAAAAACGAGCTGGCGA 60
## 4 GGATCCACTGCCCTCTGCTTGCGGGCTCTGCTCTGATCACCTTTGATGACCCCAAAGGTA 60
## 5 GGGCTCTGCTCTGATCACCTTTGATGACCCCAAAGGTAAGCTCATGGGGAGCCTCAGGAG 60
## 6 GTGATGGTATGACAGAATCCTGGGGCATGCAAAGCATGCCATGCACCTGGGCATGGGGAA 60
## 7 TGTCCAGCCAGTTGAGTGGCCGGAGGGTGTTGGTCACTGGATTTCCTGCCAGCCTCAGGC 60
## 8 CTTCTTTGGCAAGACTAGGAACGGAGGTGGCGATGTGGACGTTCGGGAGCTACTGCCAGG 60
## 9 GAGGTGGCGATGTGGACGTTCGGGAGCTACTGCCAGGGAGTGTCATGCTGGGGTTTGCTA 60
## 10 GGAGCTACTGCCAGGGAGTGTCATGCTGGGGTTTGCTAGGGATGGAGGTGAGGGCTATGC 60
## len3p
## 1 60
## 2 60
## 3 60
## 4 60
## 5 60
## 6 60
## 7 60
## 8 60
## 9 60
## 10 60
data(mthfrex)
xtx <- make.moments2(mthfr.params, c("SBP", "DBP", "SexC", "Age"), mthfrex)
allsnps <- paste(mthfr.params$snp, mthfr.params$coded.allele, sep = "_")
myfit <- stepdown.moments2(xtx, "SBP", allsnps, c("ONE", "SexC", "Age"))
## Dropping rs17421462_G P-value = 0.993
## Dropping rs12121543_C P-value = 0.943
## Dropping rs9651118_T P-value = 0.882
## Dropping rs5065_G P-value = 0.874
## Dropping rs1009591_T P-value = 0.869
## Dropping rs4846049_T P-value = 0.861
## Dropping rs6668659_T P-value = 0.884
## Dropping rs198388_T P-value = 0.823
## Dropping rs13306558_T P-value = 0.816
## Dropping rs198375_T P-value = 0.721
## Dropping rs1801131_T P-value = 0.705
## Dropping rs7535669_G P-value = 0.701
## Dropping rs2066463_T P-value = 0.645
## Dropping rs17376426_T P-value = 0.634
## Dropping rs2066466_T P-value = 0.59
## Dropping rs1931226_C P-value = 0.587
## Dropping rs3753583_G P-value = 0.538
## Dropping rs12562819_G P-value = 0.538
## Dropping rs6541007_G P-value = 0.561
## Dropping rs17037425_G P-value = 0.492
## Dropping rs2274976_T P-value = 0.46
## Dropping rs1413355_T P-value = 0.451
## Dropping rs2066470_G P-value = 0.447
## Dropping rs5064_G P-value = 0.374
## Dropping rs1801133_G P-value = 0.359
## Dropping rs34840945_G P-value = 0.325
## Dropping rs868014_G P-value = 0.318
## Dropping rs6694164_T P-value = 0.321
## Dropping rs4846051_G P-value = 0.28
## Dropping rs5227_C P-value = 0.274
## Dropping rs5229_T P-value = 0.272
## Dropping rs198358_T P-value = 0.262
## Dropping rs2066465_T P-value = 0.249
## Dropping rs17037397_C P-value = 0.224
## Dropping rs7525338_T P-value = 0.167
## Dropping rs11802855_T P-value = 0.118
## Dropping rs13306553_G P-value = 0.0944
## Dropping rs5068_G P-value = 0.0863
## Dropping rs198372_G P-value = 0.06
## Dropping rs17375901_T P-value = 0.0606
## Dropping rs7555034_T P-value = 0.0562
## much faster than stepAIC but only steps to bigger models
cbind(beta = myfit$betahat, se = myfit$se, t = myfit$beta/myfit$se)
## beta se t
## ONE 110.1931342 37.895541 2.907813
## SexC -5.1022999 1.072420 -4.757742
## Age 0.3183264 0.111901 2.844713
## rs1537514_G 16.8410119 4.525585 3.721289
## rs3818762_G 14.9129432 2.010188 7.418681
## rs13306556_T -19.5698794 4.627593 -4.228954
## rs1476413_T -12.4331352 2.597824 -4.785981
## rs1994798_G 3.9919299 1.187325 3.362120
## rs17421511_G -18.8726973 3.014396 -6.260856
## rs4846052_T 6.6284976 1.742221 3.804625
## rs17037388_G -5.8092998 2.500043 -2.323680
## rs11121832_T 6.1477959 1.991041 3.087729
## rs17037390_G 13.6237356 2.405258 5.664147
## rs17037396_T -11.7738169 3.607161 -3.264012
## rs17367504_G -11.5052536 2.438761 -4.717663
## rs13306561_G -5.8285741 2.720583 -2.142399
## rs13306560_T -17.2103810 6.639754 -2.592021
## rs3737964_T -15.8326222 1.763742 -8.976722
## rs17350396_G 14.4289719 2.066664 6.981769
## rs14078_G 9.4308889 3.849477 2.449914
## rs5063_T 25.6984043 7.361210 3.491057
## rs198381_G 5.4733139 2.777182 1.970816
## rs1318408_G -22.7012244 3.265255 -6.952358
## rs12562952_T 12.5508022 3.810200 3.294001
## rs11801879_T 13.4001764 5.599225 2.393220
## rs11803049_G 13.6914841 6.957667 1.967827
data(mthfrex)
xtx <- make.moments2(mthfr.params, c("SBP", "DBP", "SexC", "Age"), mthfrex)
allsnps <- paste(mthfr.params$snp, mthfr.params$coded.allele, sep = "_")
myfit <- stepup.moments2(xtx, "SBP", allsnps, c("ONE", "SexC", "Age"))
## Adding rs3818762_G P-value = 7.49e-43
## Adding rs3737964_T P-value = 2.42e-19
## Adding rs17421511_G P-value = 6.61e-19
## Adding rs17350396_G P-value = 6.47e-13
## Adding rs1318408_G P-value = 4.09e-12
## Adding rs17367504_G P-value = 5.43e-08
## Adding rs17037390_G P-value = 0.000149
## Adding rs12562952_T P-value = 0.000156
## Adding rs1476413_T P-value = 0.000221
## Adding rs17037396_T P-value = 0.000451
## Adding rs5063_T P-value = 0.00387
## Adding rs13306556_T P-value = 0.00415
## Adding rs1537514_G P-value = 0.00709
## Adding rs1009591_T P-value = 0.00488
## Adding rs1994798_G P-value = 0.0205
## Adding rs11121832_T P-value = 0.0175
## much faster than stepAIC but only steps to bigger models
cbind(beta = myfit$betahat, se = myfit$se, t = myfit$beta/myfit$se)
## beta se t
## ONE 112.5475696 28.7358322 3.916628
## SexC -5.2126505 1.0766330 -4.841622
## Age 0.2564697 0.1078969 2.376990
## rs3818762_G 20.9841475 1.5322119 13.695330
## rs3737964_T -15.8165598 1.7059818 -9.271236
## rs17421511_G -13.9229376 2.3275950 -5.981684
## rs17350396_G 14.3966040 2.0771825 6.930832
## rs1318408_G -22.8589016 3.2607777 -7.010261
## rs17367504_G -11.9505242 2.4412599 -4.895228
## rs17037390_G 12.8233214 2.3982414 5.346969
## rs12562952_T 12.8266033 3.8292774 3.349615
## rs1476413_T -8.1231489 2.2918821 -3.544314
## rs17037396_T -11.4475132 3.5794753 -3.198098
## rs5063_T 25.4000450 7.4008979 3.432022
## rs13306556_T -20.4748423 4.7411484 -4.318541
## rs1537514_G 16.8169176 4.5274007 3.714475
## rs1009591_T 7.8149262 1.8810695 4.154512
## rs1994798_G 3.7875003 1.1090340 3.415134
## rs11121832_T 5.4801858 1.9549277 2.803268
data(t2dex)
summary(subset(t2dex$data, select = c("Age", "Overweight", "T2D",
"FollowupDays", "FollowupT2D")))
## Age Overweight T2D FollowupDays
## Min. :18.00 Min. :0.0000 Min. :0.0000 Min. : 15
## 1st Qu.:22.00 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 3885
## Median :26.00 Median :0.0000 Median :0.0000 Median : 5912
## Mean :26.39 Mean :0.2925 Mean :0.1975 Mean : 6137
## 3rd Qu.:31.00 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.: 8308
## Max. :35.00 Max. :1.0000 Max. :1.0000 Max. :15242
##
## FollowupT2D
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.2891
## 3rd Qu.:1.0000
## Max. :1.0000
## NA's :395
library(survival)
plot(survfit(Surv(FollowupDays,FollowupT2D) ~ Overweight,
data = t2dex$data), col = c("green", "red"))
mycoxph <- coxph(Surv(FollowupDays,FollowupT2D) ~ Overweight,
data = t2dex$data) # fit null model
data(t2d.scores)
assoc1 <- grs.onesnp.apply(t2d.scores, mycoxph) # single SNP association
## risk score fit from single SNPs
unlist(grs.summary(t2d.scores$coef, assoc1$beta, assoc1$se,
n = length(residuals(mycoxph))))
## m n X2m R2m ahat
## 3.100000e+01 1.605000e+03 6.494297e+01 3.965521e-02 5.660855e-01
## aSE X2rs R2rs pval Qrs
## 9.704573e-02 3.402603e+01 2.097688e-02 5.437979e-09 3.091694e+01
## phet
## 4.194988e-01
## compare direct analysis of subject-specific data
t2dex <- grs.make.scores(t2d.scores, t2dex)
coxph(Surv(FollowupDays,FollowupT2D) ~ Overweight + T2D2010.score,
data = t2dex$data)
## Call:
## coxph(formula = Surv(FollowupDays, FollowupT2D) ~ Overweight +
## T2D2010.score, data = t2dex$data)
##
## coef exp(coef) se(coef) z p
## Overweight 0.5532 1.7388 0.0970 5.70 1.2e-08
## T2D2010.score 0.5756 1.7782 0.0979 5.88 4.1e-09
##
## Likelihood ratio test=61.1 on 2 df, p=5.42e-14
## n= 1605, number of events= 464
## (395 observations deleted due to missingness)