R Markdown
setup
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.6 v dplyr 1.0.4
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(extrafont)
## Registering fonts with R
library(skimr)
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(corrplot)
## corrplot 0.84 loaded
setwd('C:/Users/megha/OneDrive/Desktop/workshop/')
mytheme <- theme(
panel.border = element_blank(),
panel.grid.major = element_line(
color = 'grey60',
linetype = 'dashed',
size = 0.5),
panel.grid.minor = element_blank(),
panel.background = element_rect(colour = "white", fill = "white"),
axis.line = element_line(colour = "white"),
text = element_text(family = "Palatino Linotype", color = 'grey5'),
axis.text.x = element_text(colour = "grey5", size = 10),
axis.text.y = element_text(colour = "grey5", size = 10, ),
axis.ticks = element_line(colour = "grey5"),title = element_text(
colour = 'grey5',
size = 10,
vjust = -4
)
)
read-in files using tidyverse
raw <- read_csv('thePerfectdata_raw.csv')
##
## -- Column specification --------------------------------------------------------
## cols(
## SampleID = col_character(),
## Strain = col_character(),
## Replicate = col_double(),
## Trial = col_character(),
## Exposed_CFU = col_double(),
## NoExposure_CFU = col_double()
## )
clean <- read_csv('thePerfectdata_clean.csv')
##
## -- Column specification --------------------------------------------------------
## cols(
## SampleID = col_character(),
## Strain = col_character(),
## Replicate = col_double(),
## Trial = col_double(),
## Exposed_CFU = col_double(),
## NoExposure_CFU = col_double()
## )
Qualitative
ggplot(raw, aes(Exposed_CFU)) + geom_histogram(bins = 15) + mytheme
## Warning: Removed 2 rows containing non-finite values (stat_bin).

ggplot(raw, aes(log(Exposed_CFU))) + geom_histogram(bins = 15) + mytheme
## Warning: Removed 3 rows containing non-finite values (stat_bin).

raw %>% pivot_longer(cols = c(Exposed_CFU,NoExposure_CFU),
names_to = 'study', values_to = 'CFU') %>%
ggplot(.,aes(log(CFU), fill = Strain), alpha = 0.2) + geom_histogram(bins = 10,
position = 'dodge')+
facet_wrap(~Trial) + mytheme
## Warning: Removed 6 rows containing non-finite values (stat_bin).

raw %>% pivot_longer(cols = c(Exposed_CFU,NoExposure_CFU),
names_to = 'study', values_to = 'CFU') %>%
ggplot(.,aes(log(CFU), fill = Strain), alpha = 0.2) + geom_histogram(bins = 10,
position = 'dodge')+
facet_wrap(~Trial) + mytheme
## Warning: Removed 6 rows containing non-finite values (stat_bin).

Quantative
clean %>% pivot_longer(cols = c(Exposed_CFU,NoExposure_CFU),
names_to = 'study', values_to = 'CFU')
## # A tibble: 72 x 6
## SampleID Strain Replicate Trial study CFU
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 Sample1 WT 1 1 Exposed_CFU 100
## 2 Sample1 WT 1 1 NoExposure_CFU 67000
## 3 Sample2 WT 2 1 Exposed_CFU 249
## 4 Sample2 WT 2 1 NoExposure_CFU 8500
## 5 Sample3 WT 3 1 Exposed_CFU 75
## 6 Sample3 WT 3 1 NoExposure_CFU 10000
## 7 Sample4 mutPD 1 1 Exposed_CFU 10000
## 8 Sample4 mutPD 1 1 NoExposure_CFU 11000
## 9 Sample5 mutPD 2 1 Exposed_CFU 11900
## 10 Sample5 mutPD 2 1 NoExposure_CFU 15000
## # ... with 62 more rows
clean %>% pivot_longer(cols = c(Exposed_CFU,NoExposure_CFU),
names_to = 'study', values_to = 'CFU') %>%
ggplot(.,aes(log(CFU), fill = Strain), alpha = 0.2) + geom_histogram(bins = 10,
position = 'dodge')+
facet_wrap(~Trial) + mytheme

t.test
clean_q <- clean %>% pivot_longer(cols = c(Exposed_CFU,NoExposure_CFU),
names_to = 'study', values_to = 'CFU')
t.test(CFU ~ study, data = clean_q)
##
## Welch Two Sample t-test
##
## data: CFU by study
## t = 1.8121, df = 69.776, p-value = 0.07428
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -17897.59 373320.54
## sample estimates:
## mean in group Exposed_CFU mean in group NoExposure_CFU
## 299923.6 122212.1
t.test(log(CFU) ~ study, data = clean_q)
##
## Welch Two Sample t-test
##
## data: log(CFU) by study
## t = 1.1484, df = 66.569, p-value = 0.2549
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.633364 2.349046
## sample estimates:
## mean in group Exposed_CFU mean in group NoExposure_CFU
## 9.421546 8.563705
ANOVA and ad-hoc Tukeys HSD
aov_test <- aov(log(CFU) ~ study, data = clean_q)
summary(aov_test)
## Df Sum Sq Mean Sq F value Pr(>F)
## study 1 13.2 13.25 1.319 0.255
## Residuals 70 703.1 10.04
TukeyHSD(aov_test)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = log(CFU) ~ study, data = clean_q)
##
## $study
## diff lwr upr p adj
## NoExposure_CFU-Exposed_CFU -0.8578411 -2.347695 0.632013 0.2547233
aov_test <- aov(log(CFU) ~ study + Strain + study*Strain, data = clean_q)
summary(aov_test)
## Df Sum Sq Mean Sq F value Pr(>F)
## study 1 13.2 13.25 6.604 0.012439 *
## Strain 2 35.5 17.74 8.845 0.000395 ***
## study:Strain 2 535.2 267.62 133.429 < 2e-16 ***
## Residuals 66 132.4 2.01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(aov_test)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = log(CFU) ~ study + Strain + study * Strain, data = clean_q)
##
## $study
## diff lwr upr p adj
## NoExposure_CFU-Exposed_CFU -0.8578411 -1.524312 -0.1913702 0.0124394
##
## $Strain
## diff lwr upr p adj
## mutPDa-mutPD 0.2120216 -0.7682327 1.1922760 0.8625496
## WT-mutPD -1.3717902 -2.3520446 -0.3915358 0.0037224
## WT-mutPDa -1.5838118 -2.5640662 -0.6035574 0.0007191
##
## $`study:Strain`
## diff lwr upr
## NoExposure_CFU:mutPD-Exposed_CFU:mutPD 0.72527496 -0.9717201 2.422270
## Exposed_CFU:mutPDa-Exposed_CFU:mutPD 4.66753018 2.9705351 6.364525
## NoExposure_CFU:mutPDa-Exposed_CFU:mutPD -3.51821192 -5.2152070 -1.821217
## Exposed_CFU:WT-Exposed_CFU:mutPD -3.45262467 -5.1496198 -1.755630
## NoExposure_CFU:WT-Exposed_CFU:mutPD 1.43431924 -0.2626759 3.131314
## Exposed_CFU:mutPDa-NoExposure_CFU:mutPD 3.94225522 2.2452601 5.639250
## NoExposure_CFU:mutPDa-NoExposure_CFU:mutPD -4.24348688 -5.9404820 -2.546492
## Exposed_CFU:WT-NoExposure_CFU:mutPD -4.17789963 -5.8748947 -2.480905
## NoExposure_CFU:WT-NoExposure_CFU:mutPD 0.70904428 -0.9879508 2.406039
## NoExposure_CFU:mutPDa-Exposed_CFU:mutPDa -8.18574210 -9.8827372 -6.488747
## Exposed_CFU:WT-Exposed_CFU:mutPDa -8.12015485 -9.8171499 -6.423160
## NoExposure_CFU:WT-Exposed_CFU:mutPDa -3.23321094 -4.9302060 -1.536216
## Exposed_CFU:WT-NoExposure_CFU:mutPDa 0.06558725 -1.6314078 1.762582
## NoExposure_CFU:WT-NoExposure_CFU:mutPDa 4.95253116 3.2555361 6.649526
## NoExposure_CFU:WT-Exposed_CFU:WT 4.88694391 3.1899488 6.583939
## p adj
## NoExposure_CFU:mutPD-Exposed_CFU:mutPD 0.8081742
## Exposed_CFU:mutPDa-Exposed_CFU:mutPD 0.0000000
## NoExposure_CFU:mutPDa-Exposed_CFU:mutPD 0.0000010
## Exposed_CFU:WT-Exposed_CFU:mutPD 0.0000015
## NoExposure_CFU:WT-Exposed_CFU:mutPD 0.1448298
## Exposed_CFU:mutPDa-NoExposure_CFU:mutPD 0.0000001
## NoExposure_CFU:mutPDa-NoExposure_CFU:mutPD 0.0000000
## Exposed_CFU:WT-NoExposure_CFU:mutPD 0.0000000
## NoExposure_CFU:WT-NoExposure_CFU:mutPD 0.8224164
## NoExposure_CFU:mutPDa-Exposed_CFU:mutPDa 0.0000000
## Exposed_CFU:WT-Exposed_CFU:mutPDa 0.0000000
## NoExposure_CFU:WT-Exposed_CFU:mutPDa 0.0000067
## Exposed_CFU:WT-NoExposure_CFU:mutPDa 0.9999972
## NoExposure_CFU:WT-NoExposure_CFU:mutPDa 0.0000000
## NoExposure_CFU:WT-Exposed_CFU:WT 0.0000000
heatmaps
heatmap(as.matrix(clean[5:6]))

heatmap.2(as.matrix(clean[5:6]))

cor(iris[1:4], method = 'pearson', use = 'pairwise.complete.obs')-> all
corrplot(all, method = 'circle')

heatmap basics for ggplot
iris %>% pivot_longer(cols = -Species) %>%
ggplot(., aes(x = Species, y = name, fill = value)) +
geom_tile() +
ylab('measurement')
### as a correlation plot
all %>% as_tibble %>% pivot_longer(cols = everything()) %>%
ggplot(., aes(x = name, y = name, fill = value)) +
geom_tile() +
ylab('measurement')
