Load libraries

library(dplyr)
library(tidyverse)
library(ggplot2)
library(ggpubr)
library(ggthemes)
library(stats) 
library(moments)
library(grid)
library(formattable)
library(gridExtra)
library(ggsignif)
library(hrbrthemes)
library(plotrix)
library(rstatix)
library(car)
library(plotly)
library(postHoc)
library(knitr)
knitr::opts_chunk$set(echo = TRUE)

Load data

data <- read.csv("/Users/nuriteliash/Documents/GitHub/Sofia-varroa-project/EAG_data_all.csv")

EAG stat analysis

the tested effect (x) is the Essential oil “EO” dissolved in acetone, the “response” (y) is the electrophysiological response of the leg measured in mV. Each leg was stimulated by different stimuli in the following order:Air > Air > Air > Acetone (solvent) > 0.1 > 0.25 > 0.5 > 1 > 2.5 > 5 > Acetone (solvent) > Air

the response was normalized…. [please complete in here, by copy-pasting from the method section:)]

to test the difference in response amplitude to the different stimuli, we will use One way ANOVA. then a post-hoc Tukey test, to see which of the stimuli are significantly differnet. we followed this tutorial for ANOVA in R

Data summary

# our data contains: 
str(data)
## 'data.frame':    664 obs. of  4 variables:
##  $ leg     : Factor w/ 83 levels "VF1","VF10","VF11",..: 1 1 1 1 1 1 1 1 12 12 ...
##  $ stimuli : Factor w/ 8 levels "0.1","0.25","0.5",..: 8 1 2 3 4 5 6 7 8 1 ...
##  $ EO      : Factor w/ 9 levels "EO4309","EO4444",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ response: num  109.5 118.5 86.6 131.9 108.5 ...
table(data$stimuli, data$EO)
##              
##               EO4309 EO4444 EO4891 EO4899 EO5068 EO5663 EO5714 EO5749 EO5763
##   0.1              8     10     10     10     11      9      9      9      7
##   0.25             8     10     10     10     11      9      9      9      7
##   0.5              8     10     10     10     11      9      9      9      7
##   1                8     10     10     10     11      9      9      9      7
##   2.5              8     10     10     10     11      9      9      9      7
##   5                8     10     10     10     11      9      9      9      7
##   acet_after       8     10     10     10     11      9      9      9      7
##   acet_before      8     10     10     10     11      9      9      9      7

We have tested a total of 9 essential oils, using 83 legs. Each essential oil was tested on at least 7 different legs.

Prior to analysis, we gonna check the ANOVA assumptions:

(1) Outliers

# plot it to detect outliers by specific leg
# first sort the order of stimuli:
data$stimuli <- factor(data$stimuli,levels = c("acet_before", "0.1", "0.25", "0.5", "1", "2.5", "5","acet_after"))

box <- ggplot(data, aes(x = stimuli, y = response)) +
  geom_boxplot(aes(colour = EO)) +
  facet_wrap( ~ EO) +
  theme_linedraw() +
        ggtitle("Varroa foreleg response to different essential oils") +
        xlab("Stimuli amount (microgram)") +
        ylab("Normalized response (%)") +
        theme(axis.text.x=element_text(angle=45, hjust=1))
  
ggplotly(box, tooltip = "leg")

Remove outliered legs: “VF71”, “VF1”, “VF2”, “VF4”, “VF77”, “VF82”, “VF24”, “VF26”, “VF37”, “VF39”, “VF46”, “VF49”, “VF17”, “VF19”, “VF50”, “VF62”

data <- data %>%
  dplyr::filter(!leg %in% c("VF71", "VF1", "VF2", "VF4", "VF77", "VF82", "VF24", "VF26", "VF37", "VF39", "VF46", "VF49", "VF17", "VF19", "VF50", "VF62"))

After excluding the outliers, we proceed with the rest of the tests:

(2) Normality:

#the dependent variable should be approximately normally distributed in each cell of the design. This can be checked using the Shapiro-Wilk normality test (shapiro_test()
normality <- data %>%
  group_by(stimuli,EO) %>%
  shapiro_test(response)

normality %>%
  dplyr::filter(p<0.05)
## # A tibble: 4 x 5
##   stimuli     EO     variable statistic       p
##   <fct>       <fct>  <chr>        <dbl>   <dbl>
## 1 acet_before EO4309 response     0.761 0.0163 
## 2 0.1         EO5663 response     0.792 0.0344 
## 3 1           EO5068 response     0.783 0.0130 
## 4 acet_after  EO4891 response     0.738 0.00607

For the above four EO x stimuli combinations the response dont distribute normally (p<0.05)

(3) Homogneity of variance

# Build the linear model
model <- lapply(split(data, data$EO), function(i){
  lm(response ~ stimuli, data = i)
  })

# now you can Create a QQ plot of residuals of each EO, for example, essential oil EO4309:
ggqqplot(residuals(model$EO4309))

plot(model$EO4309, 1)

After checking the 3 assumptions, we decided to go for a non-parametric test, to test the significant difference of each stimuli concentration from the solvent.

we used a non-parametric post hoc test for multiple comparisons.

Post hoc non-parametric test for all pairwise comparisons, with Benjamini-Hochberg p-value

Post hoc Wilcoxon test, for specific 6 comparisons, and correct using BH

# (1) for each leg, calculate the avg response to solvent (before and after the stimuli to the essential oils)
EOs <- unique(data$EO) %>% as.character()
list_solvent <- list()

for (i in EOs) {
# choose one EO
EO <- data %>% filter(EO == i) 

# calculate the avg response to solvent per EO
before <- EO  %>% filter(stimuli == "acet_before") 
after <- EO  %>% filter(stimuli == "acet_after") 
sol<-left_join(before, after, by="leg") 
sol$response = rowMeans(sol[,c("response.x","response.y")] )

# add the mean response to solvent
sol <- sol %>% dplyr::select(c("leg","response","EO"="EO.x")) %>% mutate("stimuli"="solvent")

list_solvent[[i]] = sol
}

# (2) for each EO, compare each dose, to the avg of "acet_before","acet_after", then correct using BH for each EO. so the correction is only for these 6 comparisons, per EO 

#make a loop for Wilcox test of avg solvent vs each of the doses
EOs <- unique(data$EO) %>% as.character()
dose = c("0.1","0.25","0.5","1","2.5","5")
wilcox_list = list()

for (i in EOs) {
  for (j in dose) {
  EO_df <- data %>% filter(EO == i) %>% filter(stimuli == j) 
  EO_df <- bind_rows(EO_df,list_solvent[[i]])   
  pval <-  wilcox.test(data=EO_df , response ~ stimuli)$p.value # test wilcoxon
  wilcox <- data_frame("EO"=EO_df[1,3], "dose" = EO_df[1,2], "pval" = pval)
  
  l_name <- paste(i, j, sep = "_")
  wilcox_list[[l_name]] <- wilcox
  }
}

# combine all data together
df = do.call("rbind", wilcox_list)%>% group_by(EO) %>% mutate(padj = p.adjust(pval, "BH")) 

#save it
#write_csv(df, "/Users/nuriteliash/Documents/GitHub/Sofia-varroa-project/results/wilcoxon.csv")
Post hoc Wilcoxon test, comparing the response of 6 doses to the mean response to solvent. pvalues corrected using Benjamini-Hochberg method
EO dose pval padj
EO4444 0.1 0.7103730 0.8047786
EO4444 0.25 0.8047786 0.8047786
EO4444 0.5 0.4557110 0.6835664
EO4444 1 0.4557110 0.6835664
EO4444 2.5 0.4557110 0.6835664
EO4444 5 0.0530303 0.3181818
EO5714 0.1 0.8047786 0.8047786
EO5714 0.25 0.3175991 0.4763986
EO5714 0.5 0.2593240 0.4763986
EO5714 1 0.1282051 0.3846154
EO5714 2.5 0.4557110 0.5468531
EO5714 5 0.0530303 0.3181818
EO4899 0.1 0.2344988 0.4689977
EO4899 0.25 0.8784771 0.8784771
EO4899 0.5 0.0829837 0.3911422
EO4899 1 0.1303807 0.3911422
EO4899 2.5 0.3282051 0.4923077
EO4899 5 0.6453768 0.7744522
EO5068 0.1 0.5457014 0.9072193
EO5068 0.25 0.1614973 0.8919786
EO5068 0.5 0.6048128 0.9072193
EO5068 1 0.9314274 0.9314274
EO5068 2.5 0.9314274 0.9314274
EO5068 5 0.2973262 0.8919786
EO5663 0.1 0.9015152 0.9015152
EO5663 0.25 0.3828671 0.9015152
EO5663 0.5 0.9015152 0.9015152
EO5663 1 0.8047786 0.9015152
EO5663 2.5 0.7103730 0.9015152
EO5663 5 0.9015152 0.9015152
EO5749 0.1 0.8784771 0.9591298
EO5749 0.25 0.9591298 0.9591298
EO5749 0.5 0.9591298 0.9591298
EO5749 1 0.9591298 0.9591298
EO5749 2.5 0.6453768 0.9591298
EO5749 5 0.1605284 0.9591298
EO5763 0.1 0.4848485 0.9818182
EO5763 0.25 0.3095238 0.9818182
EO5763 0.5 0.6991342 0.9818182
EO5763 1 0.6991342 0.9818182
EO5763 2.5 0.8181818 0.9818182
EO5763 5 1.0000000 1.0000000
EO4309 0.1 1.0000000 1.0000000
EO4309 0.25 0.4557110 0.8524476
EO4309 0.5 0.7103730 0.8524476
EO4309 1 0.2593240 0.8524476
EO4309 2.5 0.7103730 0.8524476
EO4309 5 0.6200466 0.8524476
EO4891 0.1 0.5737374 0.5737374
EO4891 0.25 0.4418026 0.5737374
EO4891 0.5 0.0829837 0.3210567
EO4891 1 0.5737374 0.5737374
EO4891 2.5 0.1605284 0.3210567
EO4891 5 0.1303807 0.3210567

EAG plot

for some reason, i cannot add the leg ID to the hovering text in the box plot, but i can do it in the dot-plot. so in order to detect outliers, we can detect them in the first plot (the box plot), then identify their ID leg in the dot plot:)

dot <- ggplot(data, aes(x = stimuli, y = response, colour = EO, leg=leg)) +
  geom_point() + 
  facet_wrap( ~ EO) +
  theme_linedraw() +
        ggtitle("Varroa foreleg response to different essential oils") +
        xlab("Stimuli amount (microgram)") +
        ylab("Normalized response (%)") +
        theme(axis.text.x=element_text(angle=45, hjust=1)) +
        stat_summary(aes(group=EO), fun=mean, geom="line", colour="black") 

ggplotly(dot, tooltip = c("leg","response"))