This code describes the performed pilot study with interpretation of results
#At random selected samples from dataframe to check for proper read-in
DATA[sample(nrow(DATA), 10), ]
## TRAIT SNP NON.RISK.RISK.ALLELE log.OR. OR
## 23 PIR rs73416724 A>G 0.037426498 1.090
## 14 PIR rs28541419 C>G -0.003300638 0.992
## 159 SDR rs2367245 G 0.681842337 4.807
## 112 PDR rs80278479 C>G -1.161150909 0.069
## 60 SIR rs243505 G>A 0.033423755 1.080
## 178 SDR rs7443270 T>C 0.017371779 1.041
## 127 PDR rs17158930 A>G -0.169027412 0.678
## 50 PIR rs34972666 A>G 0.099887731 1.259
## 7 PIR rs112348907 A>G 0.003083491 1.007
## 37 PIR rs4700060 C>T 0.091201841 1.234
boxplot(log.OR.~TRAIT, data = DATA)
Possible outliers in PDR and SDR. Statistical testing for outliers with RosnerTest.
rosnerTest(PDR, k=4, warn=F)
##
## Results of Outlier Test
## -------------------------
##
## Test Method: Rosner's Test for Outliers
##
## Hypothesized Distribution: Normal
##
## Data: PDR
##
## Sample Size: 36
##
## Test Statistics: R.1 = 2.620486
## R.2 = 2.772525
## R.3 = 3.080248
## R.4 = 2.234773
##
## Test Statistic Parameter: k = 4
##
## Alternative Hypothesis: Up to 4 observations are not
## from the same Distribution.
##
## Type I Error: 5%
##
## Number of Outliers Detected: 3
##
## i Mean.i SD.i Value Obs.Num R.i+1 lambda.i+1 Outlier
## 1 0 -0.05055808 0.4554755 -1.2441251 7 2.620486 2.990585 TRUE
## 2 1 -0.01645617 0.4128709 -1.1611509 12 2.772525 2.978183 TRUE
## 3 2 0.01721133 0.3670875 -1.1135093 3 3.080248 2.965315 TRUE
## 4 3 0.05147559 0.3127291 0.7503541 6 2.234773 2.951949 FALSE
rosnerTest(SDR, k=6, warn=F)
##
## Results of Outlier Test
## -------------------------
##
## Test Method: Rosner's Test for Outliers
##
## Hypothesized Distribution: Normal
##
## Data: SDR
##
## Sample Size: 42
##
## Test Statistics: R.1 = 3.166320
## R.2 = 2.426619
## R.3 = 2.307888
## R.4 = 2.305706
## R.5 = 2.340861
## R.6 = 2.307768
##
## Test Statistic Parameter: k = 6
##
## Alternative Hypothesis: Up to 6 observations are not
## from the same Distribution.
##
## Type I Error: 5%
##
## Number of Outliers Detected: 1
##
## i Mean.i SD.i Value Obs.Num R.i+1 lambda.i+1 Outlier
## 1 0 -0.09899846 0.7601507 -2.505879 25 3.166320 3.056723 TRUE
## 2 1 -0.04029405 0.6662699 1.576489 22 2.426619 3.046571 FALSE
## 3 2 -0.08071363 0.6217693 -1.515688 33 2.307888 3.036097 FALSE
## 4 3 -0.04391942 0.5841174 1.302883 20 2.305706 3.025284 FALSE
## 5 4 -0.07936160 0.5478145 1.202996 37 2.340861 3.014109 FALSE
## 6 5 -0.11401991 0.5113936 -1.294198 21 2.307768 3.002552 FALSE
Removal of the outliers detected by OUTLIER = TRUE
DATA <- DATA[-c(103, 107, 112, 161),]
p <- ggplot(data = DATA, mapping = aes(x=reorder(TRAIT, log.OR., median, na.rm = TRUE), y=log.OR.,
backgroundColor = "white")) +
geom_boxplot(fill="#EBF7FF", colour="#003C69", size=0.5) +
coord_flip() +
ylim(-1.5,1.8) +
labs(x="Trait", y="Log Odds Ratio")
p + theme (panel.background = element_rect(fill = "white", colour = "#848484",
size = 0.5, linetype = "solid"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
##
## Shapiro-Wilk normality test
##
## data: PIR
## W = 0.92866, p-value = 0.004917
##
## Shapiro-Wilk normality test
##
## data: SIR
## W = 0.78849, p-value = 4.81e-07
##
## Shapiro-Wilk normality test
##
## data: PDR
## W = 0.94582, p-value = 0.1004
##
## Shapiro-Wilk normality test
##
## data: SDR
## W = 0.93529, p-value = 0.02171
All distribution are non-normal. Further statistical analyses will thus be performed with non-parametric tests.
median_test(log.OR. ~ TRAIT, data = DATA, exact = FALSE) #GENERAL
##
## Asymptotic K-Sample Brown-Mood Median Test
##
## data: log.OR. by TRAIT (PDR, PIR, SDR, SIR)
## chi-squared = 11.44, df = 3, p-value = 0.009569
pairwiseMedianTest(log.OR. ~ TRAIT, data = DATA,
exact = NULL, method = "fdr") #POST-HOC
## Comparison p.value p.adjust
## 1 PDR - PIR = 0 0.5631 0.675700
## 2 PDR - SDR = 0 0.8163 0.816300
## 3 PDR - SIR = 0 0.2605 0.390800
## 4 PIR - SDR = 0 0.2364 0.390800
## 5 PIR - SIR = 0 0.2321 0.390800
## 6 SDR - SIR = 0 0.001036 0.006216
Only difference in medians is detected between SDR and SIR (p.adjust <0.05)
kruskal.test(log.OR. ~ TRAIT, data = DATA) #GENERAL
##
## Kruskal-Wallis rank sum test
##
## data: log.OR. by TRAIT
## Kruskal-Wallis chi-squared = 5.9421, df = 3, p-value = 0.1145
#No difference detected, but a post-hoc analysis is performed for more security
pairwise.wilcox.test(DATA$log.OR., DATA$TRAIT,
p.adjust.method = "fdr") #POST-HOC
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: DATA$log.OR. and DATA$TRAIT
##
## PDR PIR SDR
## PIR 0.849 - -
## SDR 0.849 0.207 -
## SIR 0.849 0.678 0.025
##
## P value adjustment method: fdr
Only difference in distributions is detected between SDR and SIR (p.adjust <0.05)