ALL IMPORTED FILES MUST BE IN THE SAME DIRECTORY AS THIS SCRIPT
High-performance computing cluster (e.g. using RStudio Server) strongly recommended, due to large memory requirements
Load required packages
library("tidyverse")
library("readbulk")
library("glmmTMB")
library("betareg")
library("DHARMa")
Import data; each file comprises 1000 SIGNAL bootstrap solutions (generated using https://signal.mutationalsignatures.com/analyse2) using the OvCa-specific signature set (OVARY_A/B/C/D/E/F/G- see https://signal.mutationalsignatures.com/explore/studyTissueType/1-15) for the 12 samples comprising the pooled reference group (‘HGSrefMutSig’), along with either the 2-3 samples comprising each pooled test group (‘ATM/PALB2a/PALB2b/LLGL2/SCYL3’) or an individual sample (‘sample’); refer to Chapters 4.2.2.3-4 for further details
mut_sig_SIGNAL_exposures_ATM <- read_tsv("masterfile_tumour_samples_SIGNAL_HGSrefMutSig_ATM.tsv")
mut_sig_SIGNAL_exposures_PALB2a <- read_tsv("masterfile_tumour_samples_SIGNAL_HGSrefMutSig_PALB2a.tsv")
mut_sig_SIGNAL_exposures_PALB2b <- read_tsv("masterfile_tumour_samples_SIGNAL_HGSrefMutSig_PALB2b.tsv")
mut_sig_SIGNAL_exposures_LLGL2 <- read_tsv("masterfile_tumour_samples_SIGNAL_HGSrefMutSig_LLGL2.tsv")
mut_sig_SIGNAL_exposures_SCYL3 <- read_tsv("masterfile_tumour_samples_SIGNAL_HGSrefMutSig_SCYL3.tsv")
mut_sig_SIGNAL_exposures_sample <- read_tsv("masterfile_tumour_samples_SIGNAL_HGSrefMutSig_sample.tsv")
Round down 1s and convert data into factors
mut_sig_SIGNAL_exposures_ATM$Exposures <- replace(mut_sig_SIGNAL_exposures_ATM$Exposures, mut_sig_SIGNAL_exposures_ATM$Exposures==1, 0.9999999)
mut_sig_SIGNAL_exposures_ATM$Group <- as.factor(mut_sig_SIGNAL_exposures_ATM$Group)
mut_sig_SIGNAL_exposures_ATM$Tumour_Sample <- as.factor(mut_sig_SIGNAL_exposures_ATM$Tumour_Sample)
mut_sig_SIGNAL_exposures_ATM$Solution <- as.factor(mut_sig_SIGNAL_exposures_ATM$Solution)
mut_sig_SIGNAL_exposures_ATM$Solu_Sample <- paste(mut_sig_SIGNAL_exposures_ATM$Solution,
mut_sig_SIGNAL_exposures_ATM$Tumour_Sample,
sep=":") %>%
as.factor()
mut_sig_SIGNAL_exposures_ATM$Signature <- as.factor(mut_sig_SIGNAL_exposures_ATM$Signature)
mut_sig_SIGNAL_exposures_PALB2a$Exposures <- replace(mut_sig_SIGNAL_exposures_PALB2a$Exposures, mut_sig_SIGNAL_exposures_PALB2a$Exposures==1, 0.9999999)
mut_sig_SIGNAL_exposures_PALB2a$Group <- as.factor(mut_sig_SIGNAL_exposures_PALB2a$Group)
mut_sig_SIGNAL_exposures_PALB2a$Tumour_Sample <- as.factor(mut_sig_SIGNAL_exposures_PALB2a$Tumour_Sample)
mut_sig_SIGNAL_exposures_PALB2a$Solution <- as.factor(mut_sig_SIGNAL_exposures_PALB2a$Solution)
mut_sig_SIGNAL_exposures_PALB2a$Solu_Sample <- paste(mut_sig_SIGNAL_exposures_PALB2a$Solution,
mut_sig_SIGNAL_exposures_PALB2a$Tumour_Sample,
sep=":") %>%
as.factor()
mut_sig_SIGNAL_exposures_PALB2a$Signature <- as.factor(mut_sig_SIGNAL_exposures_PALB2a$Signature)
mut_sig_SIGNAL_exposures_PALB2b$Exposures <- replace(mut_sig_SIGNAL_exposures_PALB2b$Exposures, mut_sig_SIGNAL_exposures_PALB2b$Exposures==1, 0.9999999)
mut_sig_SIGNAL_exposures_PALB2b$Group <- as.factor(mut_sig_SIGNAL_exposures_PALB2b$Group)
mut_sig_SIGNAL_exposures_PALB2b$Tumour_Sample <- as.factor(mut_sig_SIGNAL_exposures_PALB2b$Tumour_Sample)
mut_sig_SIGNAL_exposures_PALB2b$Solution <- as.factor(mut_sig_SIGNAL_exposures_PALB2b$Solution)
mut_sig_SIGNAL_exposures_PALB2b$Solu_Sample <- paste(mut_sig_SIGNAL_exposures_PALB2b$Solution,
mut_sig_SIGNAL_exposures_PALB2b$Tumour_Sample,
sep=":") %>%
as.factor()
mut_sig_SIGNAL_exposures_PALB2b$Signature <- as.factor(mut_sig_SIGNAL_exposures_PALB2b$Signature)
mut_sig_SIGNAL_exposures_LLGL2$Exposures <- replace(mut_sig_SIGNAL_exposures_LLGL2$Exposures, mut_sig_SIGNAL_exposures_LLGL2$Exposures==1, 0.9999999)
mut_sig_SIGNAL_exposures_LLGL2$Group <- as.factor(mut_sig_SIGNAL_exposures_LLGL2$Group)
mut_sig_SIGNAL_exposures_LLGL2$Tumour_Sample <- as.factor(mut_sig_SIGNAL_exposures_LLGL2$Tumour_Sample)
mut_sig_SIGNAL_exposures_LLGL2$Solution <- as.factor(mut_sig_SIGNAL_exposures_LLGL2$Solution)
mut_sig_SIGNAL_exposures_LLGL2$Solu_Sample <- paste(mut_sig_SIGNAL_exposures_LLGL2$Solution,
mut_sig_SIGNAL_exposures_LLGL2$Tumour_Sample,
sep=":") %>%
as.factor()
mut_sig_SIGNAL_exposures_LLGL2$Signature <- as.factor(mut_sig_SIGNAL_exposures_LLGL2$Signature)
mut_sig_SIGNAL_exposures_SCYL3$Exposures <- replace(mut_sig_SIGNAL_exposures_SCYL3$Exposures, mut_sig_SIGNAL_exposures_SCYL3$Exposures==1, 0.9999999)
mut_sig_SIGNAL_exposures_SCYL3$Group <- as.factor(mut_sig_SIGNAL_exposures_SCYL3$Group)
mut_sig_SIGNAL_exposures_SCYL3$Tumour_Sample <- as.factor(mut_sig_SIGNAL_exposures_SCYL3$Tumour_Sample)
mut_sig_SIGNAL_exposures_SCYL3$Solution <- as.factor(mut_sig_SIGNAL_exposures_SCYL3$Solution)
mut_sig_SIGNAL_exposures_SCYL3$Solu_Sample <- paste(mut_sig_SIGNAL_exposures_SCYL3$Solution,
mut_sig_SIGNAL_exposures_SCYL3$Tumour_Sample,
sep=":") %>%
as.factor()
mut_sig_SIGNAL_exposures_SCYL3$Signature <- as.factor(mut_sig_SIGNAL_exposures_SCYL3$Signature)
mut_sig_SIGNAL_exposures_SCYL3$Exposures <- replace(mut_sig_SIGNAL_exposures_SCYL3$Exposures, mut_sig_SIGNAL_exposures_SCYL3$Exposures==1, 0.9999999)
mut_sig_SIGNAL_exposures_SCYL3$Group <- as.factor(mut_sig_SIGNAL_exposures_SCYL3$Group)
mut_sig_SIGNAL_exposures_SCYL3$Tumour_Sample <- as.factor(mut_sig_SIGNAL_exposures_SCYL3$Tumour_Sample)
mut_sig_SIGNAL_exposures_SCYL3$Solution <- as.factor(mut_sig_SIGNAL_exposures_SCYL3$Solution)
mut_sig_SIGNAL_exposures_SCYL3$Solu_Sample <- paste(mut_sig_SIGNAL_exposures_SCYL3$Solution,
mut_sig_SIGNAL_exposures_SCYL3$Tumour_Sample,
sep=":") %>%
as.factor()
mut_sig_SIGNAL_exposures_SCYL3$Signature <- as.factor(mut_sig_SIGNAL_exposures_SCYL3$Signature)
mut_sig_SIGNAL_exposures_sample$Exposures <- replace(mut_sig_SIGNAL_exposures_sample$Exposures, mut_sig_SIGNAL_exposures_sample$Exposures==1, 0.9999999)
mut_sig_SIGNAL_exposures_sample$Group <- as.factor(mut_sig_SIGNAL_exposures_sample$Group)
mut_sig_SIGNAL_exposures_sample$Tumour_Sample <- as.factor(mut_sig_SIGNAL_exposures_sample$Tumour_Sample)
mut_sig_SIGNAL_exposures_sample$Solution <- as.factor(mut_sig_SIGNAL_exposures_sample$Solution)
mut_sig_SIGNAL_exposures_sample$Solu_Sample <- paste(mut_sig_SIGNAL_exposures_sample$Solution,
mut_sig_SIGNAL_exposures_sample$Tumour_Sample,
sep=":") %>%
as.factor()
mut_sig_SIGNAL_exposures_sample$Signature <- as.factor(mut_sig_SIGNAL_exposures_sample$Signature)
Run GLMM with beta-regression for combined HGSrefMutSig vs combined and individual samples of interest, and simulate residuals for model using DHARMa
mut_sig_mixed_lmer_bin_ATM <- glmmTMB(Exposures ~ Age + Group +
(1|Solu_Sample) + (1|Signature),
data = mut_sig_SIGNAL_exposures_ATM,
family=beta_family(),
ziformula = ~1,
control = glmmTMBControl(parallel = 48) #parallel = no. of available processor cores/threads
)
simres_mut_sig_mixed_lmer_bin_ATM <- simulateResiduals(mut_sig_mixed_lmer_bin_ATM)
mut_sig_mixed_lmer_bin_PALB2a <- glmmTMB(Exposures ~ Age + Group +
(1|Solu_Sample) + (1|Signature),
data = mut_sig_SIGNAL_exposures_PALB2a,
family=beta_family(),
ziformula = ~1,
control = glmmTMBControl(parallel = 48)
)
simres_mut_sig_mixed_lmer_bin_PALB2a <- simulateResiduals(mut_sig_mixed_lmer_bin_PALB2a)
mut_sig_mixed_lmer_bin_PALB2b <- glmmTMB(Exposures ~ Age + Group +
(1|Solu_Sample) + (1|Signature),
data = mut_sig_SIGNAL_exposures_PALB2b,
family=beta_family(),
ziformula = ~1,
control = glmmTMBControl(parallel = 48)
)
simres_mut_sig_mixed_lmer_bin_PALB2b <- simulateResiduals(mut_sig_mixed_lmer_bin_PALB2b)
mut_sig_mixed_lmer_bin_LLGL2 <- glmmTMB(Exposures ~ Age + Group +
(1|Solu_Sample) + (1|Signature),
data = mut_sig_SIGNAL_exposures_LLGL2,
family=beta_family(),
ziformula = ~1,
control = glmmTMBControl(parallel = 48)
)
simres_mut_sig_mixed_lmer_bin_LLGL2 <- simulateResiduals(mut_sig_mixed_lmer_bin_LLGL2)
mut_sig_mixed_lmer_bin_SCYL3 <- glmmTMB(Exposures ~ Age + Group +
(1|Solu_Sample) + (1|Signature),
data = mut_sig_SIGNAL_exposures_SCYL3,
family=beta_family(),
ziformula = ~1,
control = glmmTMBControl(parallel = 48)
)
simres_mut_sig_mixed_lmer_bin_SCYL3 <- simulateResiduals(mut_sig_mixed_lmer_bin_SCYL3)
mut_sig_mixed_lmer_bin_sample <- glmmTMB(Exposures ~ Age + Group +
(1|Solu_Sample) + (1|Signature),
data = mut_sig_SIGNAL_exposures_sample,
family=beta_family(),
ziformula = ~1,
control = glmmTMBControl(parallel = 48)
)
simres_mut_sig_mixed_lmer_bin_sample <- simulateResiduals(mut_sig_mixed_lmer_bin_sample)
Summary of GLMM results
summary(mut_sig_mixed_lmer_bin_ATM)
Family: beta ( logit )
Formula: Exposures ~ Age + Group + (1 | Solu_Sample) + (1 | Signature)
Zero inflation: ~1
Data: mut_sig_SIGNAL_exposures_ATM
AIC BIC logLik deviance df.resid
44048.1 44115.0 -22017.0 44034.1 104993
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
Solu_Sample (Intercept) 0.05436 0.2332
Signature (Intercept) 0.17581 0.4193
Number of obs: 105000, groups: Solu_Sample, 15000; Signature, 7
Dispersion parameter for beta family (): 5.39
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.1272493 0.1632022 -13.03 <2e-16 ***
Age 0.0129747 0.0005831 22.25 <2e-16 ***
GroupHGSrefMutSig -0.1288221 0.0101442 -12.70 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Zero-inflation model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.633850 0.006485 -97.75 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
summary(mut_sig_mixed_lmer_bin_PALB2a)
Family: beta ( logit )
Formula: Exposures ~ Age + Group + (1 | Solu_Sample) + (1 | Signature)
Zero inflation: ~1
Data: mut_sig_SIGNAL_exposures_PALB2a
AIC BIC logLik deviance df.resid
30004.1 30070.5 -14995.0 29990.1 97993
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
Solu_Sample (Intercept) 0.04277 0.2068
Signature (Intercept) 0.19685 0.4437
Number of obs: 98000, groups: Solu_Sample, 14000; Signature, 7
Dispersion parameter for beta family (): 5.82
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.2596816 0.1711655 -13.202 < 2e-16 ***
Age 0.0124926 0.0005535 22.571 < 2e-16 ***
GroupPALB2a 0.0528082 0.0120234 4.392 1.12e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Zero-inflation model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.732795 0.006822 -107.4 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
summary(mut_sig_mixed_lmer_bin_PALB2b)
Family: beta ( logit )
Formula: Exposures ~ Age + Group + (1 | Solu_Sample) + (1 | Signature)
Zero inflation: ~1
Data: mut_sig_SIGNAL_exposures_PALB2b
AIC BIC logLik deviance df.resid
31932.4 31999.3 -15959.2 31918.4 104993
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
Solu_Sample (Intercept) 0.04491 0.2119
Signature (Intercept) 0.17691 0.4206
Number of obs: 105000, groups: Solu_Sample, 15000; Signature, 7
Dispersion parameter for beta family (): 5.69
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.2644555 0.1626662 -13.921 < 2e-16 ***
Age 0.0128562 0.0005562 23.116 < 2e-16 ***
GroupPALB2b 0.0764734 0.0110615 6.913 4.73e-12 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Zero-inflation model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.750185 0.006611 -113.5 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
summary(mut_sig_mixed_lmer_bin_LLGL2)
Family: beta ( logit )
Formula: Exposures ~ Age + Group + (1 | Solu_Sample) + (1 | Signature)
Zero inflation: ~1
Data: mut_sig_SIGNAL_exposures_LLGL2
AIC BIC logLik deviance df.resid
60057.5 60124.4 -30021.7 60043.5 104993
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
Solu_Sample (Intercept) 0.05634 0.2374
Signature (Intercept) 0.21470 0.4634
Number of obs: 105000, groups: Solu_Sample, 15000; Signature, 7
Dispersion parameter for beta family (): 4.08
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.0859905 0.1788675 -11.66 <2e-16 ***
Age 0.0112367 0.0005847 19.22 <2e-16 ***
GroupLLGL2b 0.1460556 0.0115641 12.63 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Zero-inflation model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.531563 0.006391 -83.17 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
summary(mut_sig_mixed_lmer_bin_SCYL3)
Family: beta ( logit )
Formula: Exposures ~ Age + Group + (1 | Solu_Sample) + (1 | Signature)
Zero inflation: ~1
Data: mut_sig_SIGNAL_exposures_SCYL3
AIC BIC logLik deviance df.resid
35547.5 35613.9 -17766.7 35533.5 97993
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
Solu_Sample (Intercept) 0.04149 0.2037
Signature (Intercept) 0.21897 0.4679
Number of obs: 98000, groups: Solu_Sample, 14000; Signature, 7
Dispersion parameter for beta family (): 5.67
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.1432706 0.1797911 -11.921 <2e-16 ***
Age 0.0105856 0.0005209 20.322 <2e-16 ***
GroupSCYL3 0.0187499 0.0110561 1.696 0.0899 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Zero-inflation model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.664613 0.006745 -98.54 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# summary(mut_sig_mixed_lmer_bin_sample)
Plots of simulated residuals
plot(simres_mut_sig_mixed_lmer_bin_ATM)
plot(simres_mut_sig_mixed_lmer_bin_PALB2a)
plot(simres_mut_sig_mixed_lmer_bin_PALB2b)
plot(simres_mut_sig_mixed_lmer_bin_LLGL2)
plot(simres_mut_sig_mixed_lmer_bin_SCYL3)
Summary and plots of dispersion
testDispersion(simres_mut_sig_mixed_lmer_bin_ATM)
DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated
data: simulationOutput
dispersion = 1.0582, p-value = 0.616
alternative hypothesis: two.sided
testDispersion(simres_mut_sig_mixed_lmer_bin_PALB2a)
DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated
data: simulationOutput
dispersion = 1.0424, p-value = 0.744
alternative hypothesis: two.sided
testDispersion(simres_mut_sig_mixed_lmer_bin_PALB2b)
DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated
data: simulationOutput
dispersion = 1.0152, p-value = 0.84
alternative hypothesis: two.sided
testDispersion(simres_mut_sig_mixed_lmer_bin_LLGL2)
DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated
data: simulationOutput
dispersion = 1.0324, p-value = 0.816
alternative hypothesis: two.sided
testDispersion(simres_mut_sig_mixed_lmer_bin_SCYL3)
DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated
data: simulationOutput
dispersion = 1.0557, p-value = 0.792
alternative hypothesis: two.sided
# testDispersion(simres_mut_sig_mixed_lmer_bin_sample)
Summary and plots of uniformity
testUniformity(simres_mut_sig_mixed_lmer_bin_ATM)
One-sample Kolmogorov-Smirnov test
data: simulationOutput$scaledResiduals
D = 0.048133, p-value < 2.2e-16
alternative hypothesis: two-sided
testUniformity(simres_mut_sig_mixed_lmer_bin_PALB2a)
One-sample Kolmogorov-Smirnov test
data: simulationOutput$scaledResiduals
D = 0.056776, p-value < 2.2e-16
alternative hypothesis: two-sided
testUniformity(simres_mut_sig_mixed_lmer_bin_PALB2b)
One-sample Kolmogorov-Smirnov test
data: simulationOutput$scaledResiduals
D = 0.0542, p-value < 2.2e-16
alternative hypothesis: two-sided
testUniformity(simres_mut_sig_mixed_lmer_bin_LLGL2)
One-sample Kolmogorov-Smirnov test
data: simulationOutput$scaledResiduals
D = 0.052019, p-value < 2.2e-16
alternative hypothesis: two-sided
testUniformity(simres_mut_sig_mixed_lmer_bin_SCYL3)
One-sample Kolmogorov-Smirnov test
data: simulationOutput$scaledResiduals
D = 0.054255, p-value < 2.2e-16
alternative hypothesis: two-sided
# testUniformity(simres_mut_sig_mixed_lmer_bin_sample)
Summary and plots of outliers
testOutliers(simres_mut_sig_mixed_lmer_bin_ATM,type="bootstrap")
DHARMa bootstrapped outlier test
data: simres_mut_sig_mixed_lmer_bin_ATM
outliers at both margin(s) = 543, observations = 105000, p-value = 0.76
alternative hypothesis: two.sided
percent confidence interval:
0.002842143 0.014148333
sample estimates:
outlier frequency (expected: 0.00645638095238095 )
0.005171429
testOutliers(simres_mut_sig_mixed_lmer_bin_PALB2a,type="bootstrap")
DHARMa bootstrapped outlier test
data: simres_mut_sig_mixed_lmer_bin_PALB2a
outliers at both margin(s) = 427, observations = 98000, p-value = 0.6
alternative hypothesis: two.sided
percent confidence interval:
0.002389541 0.016099745
sample estimates:
outlier frequency (expected: 0.00667224489795918 )
0.004357143
testOutliers(simres_mut_sig_mixed_lmer_bin_PALB2b,type="bootstrap")
DHARMa bootstrapped outlier test
data: simres_mut_sig_mixed_lmer_bin_PALB2b
outliers at both margin(s) = 441, observations = 105000, p-value = 0.54
alternative hypothesis: two.sided
percent confidence interval:
0.002544524 0.015225476
sample estimates:
outlier frequency (expected: 0.00646295238095238 )
0.0042
testOutliers(simres_mut_sig_mixed_lmer_bin_LLGL2,type="bootstrap")
DHARMa bootstrapped outlier test
data: simres_mut_sig_mixed_lmer_bin_LLGL2
outliers at both margin(s) = 1094, observations = 105000, p-value = 0.2
alternative hypothesis: two.sided
percent confidence interval:
0.002551429 0.012666667
sample estimates:
outlier frequency (expected: 0.00629619047619048 )
0.01041905
testOutliers(simres_mut_sig_mixed_lmer_bin_SCYL3,type="bootstrap")
DHARMa bootstrapped outlier test
data: simres_mut_sig_mixed_lmer_bin_SCYL3
outliers at both margin(s) = 463, observations = 98000, p-value = 0.92
alternative hypothesis: two.sided
percent confidence interval:
0.002066071 0.012079082
sample estimates:
outlier frequency (expected: 0.00571051020408163 )
0.00472449
# testOutliers(simres_mut_sig_mixed_lmer_bin_sample,type="bootstrap")
Summary and plots of zero-inflation
testZeroInflation(simres_mut_sig_mixed_lmer_bin_ATM)
DHARMa zero-inflation test via comparison to expected zeros with simulation under H0 = fitted model
data: simulationOutput
ratioObsSim = 1.0004, p-value = 0.92
alternative hypothesis: two.sided
testZeroInflation(simres_mut_sig_mixed_lmer_bin_PALB2a)
DHARMa zero-inflation test via comparison to expected zeros with simulation under H0 = fitted model
data: simulationOutput
ratioObsSim = 1.0002, p-value = 0.952
alternative hypothesis: two.sided
testZeroInflation(simres_mut_sig_mixed_lmer_bin_PALB2b)
DHARMa zero-inflation test via comparison to expected zeros with simulation under H0 = fitted model
data: simulationOutput
ratioObsSim = 1.0002, p-value = 0.984
alternative hypothesis: two.sided
testZeroInflation(simres_mut_sig_mixed_lmer_bin_LLGL2)
DHARMa zero-inflation test via comparison to expected zeros with simulation under H0 = fitted model
data: simulationOutput
ratioObsSim = 1.0005, p-value = 0.888
alternative hypothesis: two.sided
testZeroInflation(simres_mut_sig_mixed_lmer_bin_SCYL3)
DHARMa zero-inflation test via comparison to expected zeros with simulation under H0 = fitted model
data: simulationOutput
ratioObsSim = 0.99979, p-value = 0.904
alternative hypothesis: two.sided
# testZeroInflation(simres_mut_sig_mixed_lmer_bin_sample)
Plots of residuals against sample/group predictors
plotResiduals(simres_mut_sig_mixed_lmer_bin_ATM, form=mut_sig_SIGNAL_exposures_ATM$Group)
plotResiduals(simres_mut_sig_mixed_lmer_bin_PALB2a, form=mut_sig_SIGNAL_exposures_PALB2a$Group)
plotResiduals(simres_mut_sig_mixed_lmer_bin_PALB2b, form=mut_sig_SIGNAL_exposures_PALB2b$Group)
plotResiduals(simres_mut_sig_mixed_lmer_bin_LLGL2, form=mut_sig_SIGNAL_exposures_LLGL2$Group)
plotResiduals(simres_mut_sig_mixed_lmer_bin_SCYL3, form=mut_sig_SIGNAL_exposures_SCYL3$Group)
# plotResiduals(simres_mut_sig_mixed_lmer_bin_sample, form=mut_sig_SIGNAL_exposures_sample$Group)
Save results
sink(file="pooled_vs_HGSrefMutSig_SIGNAL_lmer_bin_all_p_values.txt",append = FALSE)
print("HGS_refMutSig_vs_ATM_lmer_bin_results")
print(coef(summary(mut_sig_mixed_lmer_bin_ATM))$cond)
print("HGS_refMutSig_vs_PALB2a_lmer_bin_results")
print(coef(summary(mut_sig_mixed_lmer_bin_PALB2a))$cond)
print("HGS_refMutSig_vs_PALB2b_lmer_bin_results")
print(coef(summary(mut_sig_mixed_lmer_bin_PALB2b))$cond)
print("HGS_refMutSig_vs_LLGL2_lmer_bin_results")
print(coef(summary(mut_sig_mixed_lmer_bin_LLGL2))$cond)
print("HGS_refMutSig_vs_SCYL3_lmer_bin_results")
print(coef(summary(mut_sig_mixed_lmer_bin_SCYL3))$cond)
# print("HGS_refMutSig_vs_sample_lmer_bin_results")
# print(coef(summary(mut_sig_mixed_lmer_bin_sample))$cond)
sink()
pdf("HGS_refMutSig_vs_ATM_SIGNAL_lmer_bin_simRes_results.pdf")
sink(file="HGS_refMutSig_vs_ATM_SIGNAL_lmer_bin_simRes_results.txt",append = FALSE)
summary(mut_sig_mixed_lmer_bin_ATM)
plot(simres_mut_sig_mixed_lmer_bin_ATM)
testDispersion(simres_mut_sig_mixed_lmer_bin_ATM)
testUniformity(simres_mut_sig_mixed_lmer_bin_ATM)
testOutliers(simres_mut_sig_mixed_lmer_bin_ATM,type="bootstrap")
testZeroInflation(simres_mut_sig_mixed_lmer_bin_ATM)
plotResiduals(simres_mut_sig_mixed_lmer_bin_ATM, mut_sig_SIGNAL_exposures_ATM$Group)
dev.off()
sink()
pdf("HGS_refMutSig_vs_PALB2a_SIGNAL_lmer_bin_simRes_results.pdf")
sink(file="HGS_refMutSig_vs_PALB2a_SIGNAL_lmer_bin_simRes_results.txt",append = FALSE)
summary(mut_sig_mixed_lmer_bin_PALB2a)
plot(simres_mut_sig_mixed_lmer_bin_PALB2a)
testDispersion(simres_mut_sig_mixed_lmer_bin_PALB2a)
testUniformity(simres_mut_sig_mixed_lmer_bin_PALB2a)
testOutliers(simres_mut_sig_mixed_lmer_bin_PALB2a,type="bootstrap")
testZeroInflation(simres_mut_sig_mixed_lmer_bin_PALB2a)
plotResiduals(simres_mut_sig_mixed_lmer_bin_PALB2a, mut_sig_SIGNAL_exposures_PALB2a$Group)
dev.off()
sink()
pdf("HGS_refMutSig_vs_PALB2b_SIGNAL_lmer_bin_simRes_results.pdf")
sink(file="HGS_refMutSig_vs_PALB2b_SIGNAL_lmer_bin_simRes_results.txt",append = FALSE)
summary(mut_sig_mixed_lmer_bin_PALB2b)
plot(simres_mut_sig_mixed_lmer_bin_PALB2b)
testDispersion(simres_mut_sig_mixed_lmer_bin_PALB2b)
testUniformity(simres_mut_sig_mixed_lmer_bin_PALB2b)
testOutliers(simres_mut_sig_mixed_lmer_bin_PALB2b,type="bootstrap")
testZeroInflation(simres_mut_sig_mixed_lmer_bin_PALB2b)
plotResiduals(simres_mut_sig_mixed_lmer_bin_PALB2b, mut_sig_SIGNAL_exposures_PALB2b$Group)
dev.off()
sink()
pdf("HGS_refMutSig_vs_LLGL2_SIGNAL_lmer_bin_simRes_results.pdf")
sink(file="HGS_refMutSig_vs_LLGL2_SIGNAL_lmer_bin_simRes_results.txt",append = FALSE)
summary(mut_sig_mixed_lmer_bin_LLGL2)
plot(simres_mut_sig_mixed_lmer_bin_LLGL2)
testDispersion(simres_mut_sig_mixed_lmer_bin_LLGL2)
testUniformity(simres_mut_sig_mixed_lmer_bin_LLGL2)
testOutliers(simres_mut_sig_mixed_lmer_bin_LLGL2,type="bootstrap")
testZeroInflation(simres_mut_sig_mixed_lmer_bin_LLGL2)
plotResiduals(simres_mut_sig_mixed_lmer_bin_LLGL2, mut_sig_SIGNAL_exposures_LLGL2$Group)
dev.off()
sink()
pdf("HGS_refMutSig_vs_SCYL3_SIGNAL_lmer_bin_simRes_results.pdf")
sink(file="HGS_refMutSig_vs_SCYL3_SIGNAL_lmer_bin_simRes_results.txt",append = FALSE)
summary(mut_sig_mixed_lmer_bin_SCYL3)
plot(simres_mut_sig_mixed_lmer_bin_SCYL3)
testDispersion(simres_mut_sig_mixed_lmer_bin_SCYL3)
testUniformity(simres_mut_sig_mixed_lmer_bin_SCYL3)
testOutliers(simres_mut_sig_mixed_lmer_bin_SCYL3,type="bootstrap")
testZeroInflation(simres_mut_sig_mixed_lmer_bin_SCYL3)
plotResiduals(simres_mut_sig_mixed_lmer_bin_SCYL3, mut_sig_SIGNAL_exposures_SCYL3$Group)
dev.off()
sink()
# pdf("HGS_refMutSig_vs_sample_SIGNAL_lmer_bin_simRes_results.pdf")
# sink(file="HGS_refMutSig_vs_sample_SIGNAL_lmer_bin_simRes_results.txt",append = FALSE)
# summary(mut_sig_mixed_lmer_bin_sample)
# plot(simres_mut_sig_mixed_lmer_bin_sample)
# testDispersion(simres_mut_sig_mixed_lmer_bin_sample)
# testUniformity(simres_mut_sig_mixed_lmer_bin_sample)
# testOutliers(simres_mut_sig_mixed_lmer_bin_sample,type="bootstrap")
# testZeroInflation(simres_mut_sig_mixed_lmer_bin_sample)
# plotResiduals(simres_mut_sig_mixed_lmer_bin_sample, mut_sig_SIGNAL_exposures_sample$Group)
# dev.off()
# sink()
closeAllConnections()