library(dplyr)
library(tidyverse)
library(ggplot2)
library(ggpubr)
library(ggthemes)
library(stats)
library(moments)
library(grid)
library(formattable)
library(gridExtra)
library(ggsignif)
library(hrbrthemes)
library(plotrix)
library(rstatix)
library(car)
library(plotly)
library(postHoc)
library(knitr)
knitr::opts_chunk$set(echo = TRUE)
data <- read.csv("/Users/nuriteliash/Documents/GitHub/Sofia-varroa-project/EAG_data_all.csv")
the tested effect (x) is the Essential oil “EO” dissolved in acetone, the “response” (y) is the electrophysiological response of the leg measured in mV. Each leg was stimulated by different stimuli in the following order:Air > Air > Air > Acetone (solvent) > 0.1 > 0.25 > 0.5 > 1 > 2.5 > 5 > Acetone (solvent) > Air
the response was normalized…. [please complete in here, by copy-pasting from the method section:)]
to test the difference in response amplitude to the different stimuli, we will use One way ANOVA. then a post-hoc Tukey test, to see which of the stimuli are significantly differnet. we followed this tutorial for ANOVA in R
# our data contains:
str(data)
## 'data.frame': 664 obs. of 4 variables:
## $ leg : Factor w/ 83 levels "VF1","VF10","VF11",..: 1 1 1 1 1 1 1 1 12 12 ...
## $ stimuli : Factor w/ 8 levels "0.1","0.25","0.5",..: 8 1 2 3 4 5 6 7 8 1 ...
## $ EO : Factor w/ 9 levels "EO4309","EO4444",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ response: num 109.5 118.5 86.6 131.9 108.5 ...
table(data$stimuli, data$EO)
##
## EO4309 EO4444 EO4891 EO4899 EO5068 EO5663 EO5714 EO5749 EO5763
## 0.1 8 10 10 10 11 9 9 9 7
## 0.25 8 10 10 10 11 9 9 9 7
## 0.5 8 10 10 10 11 9 9 9 7
## 1 8 10 10 10 11 9 9 9 7
## 2.5 8 10 10 10 11 9 9 9 7
## 5 8 10 10 10 11 9 9 9 7
## acet_after 8 10 10 10 11 9 9 9 7
## acet_before 8 10 10 10 11 9 9 9 7
We have tested a total of 9 essential oils, using 83 legs. Each essential oil was tested on at least 7 different legs.
# plot it to detect outliers by specific leg
# first sort the order of stimuli:
data$stimuli <- factor(data$stimuli,levels = c("acet_before", "0.1", "0.25", "0.5", "1", "2.5", "5","acet_after"))
box <- ggplot(data, aes(x = stimuli, y = response)) +
geom_boxplot(aes(colour = EO)) +
facet_wrap( ~ EO) +
theme_linedraw() +
ggtitle("Varroa foreleg response to different essential oils") +
xlab("Stimuli amount (microgram)") +
ylab("Normalized response (%)") +
theme(axis.text.x=element_text(angle=45, hjust=1))
ggplotly(box, tooltip = "leg")
Remove outliered legs: “VF71”, “VF1”, “VF2”, “VF4”, “VF77”, “VF82”, “VF24”, “VF26”, “VF37”, “VF39”, “VF46”, “VF49”, “VF17”, “VF19”, “VF50”, “VF62”
data <- data %>%
dplyr::filter(!leg %in% c("VF71", "VF1", "VF2", "VF4", "VF77", "VF82", "VF24", "VF26", "VF37", "VF39", "VF46", "VF49", "VF17", "VF19", "VF50", "VF62"))
After excluding the outliers, we proceed with the rest of the tests:
#the dependent variable should be approximately normally distributed in each cell of the design. This can be checked using the Shapiro-Wilk normality test (shapiro_test()
normality <- data %>%
group_by(stimuli,EO) %>%
shapiro_test(response)
normality %>%
dplyr::filter(p<0.05)
## # A tibble: 4 x 5
## stimuli EO variable statistic p
## <fct> <fct> <chr> <dbl> <dbl>
## 1 acet_before EO4309 response 0.761 0.0163
## 2 0.1 EO5663 response 0.792 0.0344
## 3 1 EO5068 response 0.783 0.0130
## 4 acet_after EO4891 response 0.738 0.00607
For the above four EO x stimuli combinations the response dont distribute normally (p<0.05)
# Build the linear model
model <- lapply(split(data, data$EO), function(i){
lm(response ~ stimuli, data = i)
})
# now you can Create a QQ plot of residuals of each EO, for example, essential oil EO4309:
ggqqplot(residuals(model$EO4309))
plot(model$EO4309, 1)
After checking the 3 assumptions, we decided to go for a non-parametric test, to test the significant difference of each stimuli concentration from the solvent.
we used a non-parametric post hoc test for multiple comparisons.
# (1) for each leg, calculate the avg response to solvent (before and after the stimuli to the essential oils)
EOs <- unique(data$EO) %>% as.character()
list_solvent <- list()
for (i in EOs) {
# choose one EO
EO <- data %>% filter(EO == i)
# calculate the avg response to solvent per EO
before <- EO %>% filter(stimuli == "acet_before")
after <- EO %>% filter(stimuli == "acet_after")
sol<-left_join(before, after, by="leg")
sol$response = rowMeans(sol[,c("response.x","response.y")] )
# add the mean response to solvent
sol <- sol %>% dplyr::select(c("leg","response","EO"="EO.x")) %>% mutate("stimuli"="solvent")
list_solvent[[i]] = sol
}
# (2) for each EO, compare each dose, to the avg of "acet_before","acet_after", then correct using BH for each EO. so the correction is only for these 6 comparisons, per EO
#make a loop for Wilcox test of avg solvent vs each of the doses
EOs <- unique(data$EO) %>% as.character()
dose = c("0.1","0.25","0.5","1","2.5","5")
wilcox_list = list()
for (i in EOs) {
for (j in dose) {
EO_df <- data %>% filter(EO == i) %>% filter(stimuli == j)
EO_df <- bind_rows(EO_df,list_solvent[[i]])
pval <- wilcox.test(data=EO_df , response ~ stimuli)$p.value # test wilcoxon
wilcox <- data_frame("EO"=EO_df[1,3], "dose" = EO_df[1,2], "pval" = pval)
l_name <- paste(i, j, sep = "_")
wilcox_list[[l_name]] <- wilcox
}
}
# combine all data together
df = do.call("rbind", wilcox_list)%>% group_by(EO) %>% mutate(padj = p.adjust(pval, "BH"))
#save it
#write_csv(df, "/Users/nuriteliash/Documents/GitHub/Sofia-varroa-project/results/wilcoxon.csv")
| EO | dose | pval | padj |
|---|---|---|---|
| EO4444 | 0.1 | 0.7103730 | 0.8047786 |
| EO4444 | 0.25 | 0.8047786 | 0.8047786 |
| EO4444 | 0.5 | 0.4557110 | 0.6835664 |
| EO4444 | 1 | 0.4557110 | 0.6835664 |
| EO4444 | 2.5 | 0.4557110 | 0.6835664 |
| EO4444 | 5 | 0.0530303 | 0.3181818 |
| EO5714 | 0.1 | 0.8047786 | 0.8047786 |
| EO5714 | 0.25 | 0.3175991 | 0.4763986 |
| EO5714 | 0.5 | 0.2593240 | 0.4763986 |
| EO5714 | 1 | 0.1282051 | 0.3846154 |
| EO5714 | 2.5 | 0.4557110 | 0.5468531 |
| EO5714 | 5 | 0.0530303 | 0.3181818 |
| EO4899 | 0.1 | 0.2344988 | 0.4689977 |
| EO4899 | 0.25 | 0.8784771 | 0.8784771 |
| EO4899 | 0.5 | 0.0829837 | 0.3911422 |
| EO4899 | 1 | 0.1303807 | 0.3911422 |
| EO4899 | 2.5 | 0.3282051 | 0.4923077 |
| EO4899 | 5 | 0.6453768 | 0.7744522 |
| EO5068 | 0.1 | 0.5457014 | 0.9072193 |
| EO5068 | 0.25 | 0.1614973 | 0.8919786 |
| EO5068 | 0.5 | 0.6048128 | 0.9072193 |
| EO5068 | 1 | 0.9314274 | 0.9314274 |
| EO5068 | 2.5 | 0.9314274 | 0.9314274 |
| EO5068 | 5 | 0.2973262 | 0.8919786 |
| EO5663 | 0.1 | 0.9015152 | 0.9015152 |
| EO5663 | 0.25 | 0.3828671 | 0.9015152 |
| EO5663 | 0.5 | 0.9015152 | 0.9015152 |
| EO5663 | 1 | 0.8047786 | 0.9015152 |
| EO5663 | 2.5 | 0.7103730 | 0.9015152 |
| EO5663 | 5 | 0.9015152 | 0.9015152 |
| EO5749 | 0.1 | 0.8784771 | 0.9591298 |
| EO5749 | 0.25 | 0.9591298 | 0.9591298 |
| EO5749 | 0.5 | 0.9591298 | 0.9591298 |
| EO5749 | 1 | 0.9591298 | 0.9591298 |
| EO5749 | 2.5 | 0.6453768 | 0.9591298 |
| EO5749 | 5 | 0.1605284 | 0.9591298 |
| EO5763 | 0.1 | 0.4848485 | 0.9818182 |
| EO5763 | 0.25 | 0.3095238 | 0.9818182 |
| EO5763 | 0.5 | 0.6991342 | 0.9818182 |
| EO5763 | 1 | 0.6991342 | 0.9818182 |
| EO5763 | 2.5 | 0.8181818 | 0.9818182 |
| EO5763 | 5 | 1.0000000 | 1.0000000 |
| EO4309 | 0.1 | 1.0000000 | 1.0000000 |
| EO4309 | 0.25 | 0.4557110 | 0.8524476 |
| EO4309 | 0.5 | 0.7103730 | 0.8524476 |
| EO4309 | 1 | 0.2593240 | 0.8524476 |
| EO4309 | 2.5 | 0.7103730 | 0.8524476 |
| EO4309 | 5 | 0.6200466 | 0.8524476 |
| EO4891 | 0.1 | 0.5737374 | 0.5737374 |
| EO4891 | 0.25 | 0.4418026 | 0.5737374 |
| EO4891 | 0.5 | 0.0829837 | 0.3210567 |
| EO4891 | 1 | 0.5737374 | 0.5737374 |
| EO4891 | 2.5 | 0.1605284 | 0.3210567 |
| EO4891 | 5 | 0.1303807 | 0.3210567 |
for some reason, i cannot add the leg ID to the hovering text in the box plot, but i can do it in the dot-plot. so in order to detect outliers, we can detect them in the first plot (the box plot), then identify their ID leg in the dot plot:)
dot <- ggplot(data, aes(x = stimuli, y = response, colour = EO, leg=leg)) +
geom_point() +
facet_wrap( ~ EO) +
theme_linedraw() +
ggtitle("Varroa foreleg response to different essential oils") +
xlab("Stimuli amount (microgram)") +
ylab("Normalized response (%)") +
theme(axis.text.x=element_text(angle=45, hjust=1)) +
stat_summary(aes(group=EO), fun=mean, geom="line", colour="black")
ggplotly(dot, tooltip = c("leg","response"))