load libraries

library(tidyverse)
library(dplyr)
library(ggplot2)
library(ggpubr)
#library(reshape2)
library(hrbrthemes)
library("gridExtra")
#library(cowplot)
library(plotly)
library(scales) # to calculate percentages, and put into dataframe
library(ggrepel)
knitr::opts_chunk$set(echo = TRUE)
data <- read.csv("/Users/nuriteliash/Documents/GitHub/ToBe_Ayelet/data/Tobe_Aayelet.csv") %>%
  select(c(Yard, time, hive, populated_frames,treatment, brood_frames,varroa,supers, spring_harvest)) %>%
  mutate(treatment = as.factor(treatment))

# Convert necessary columns to factors if not already done
data$hive <- as.factor(data$hive)
data$treatment <- as.factor(data$treatment)
data$time <- as.factor(data$time)

colony strength

data %>% 
ggplot( aes(x=as.factor(treatment), y=populated_frames)) + 
    geom_boxplot() + 
 geom_point() +
  geom_jitter()+
  xlab("Treatment") +
  ggtitle("Colony strength (populated_frames)") +    
 facet_wrap( vars(time), nrow = 1)

data %>% filter(time!= "T3") %>%
 ggplot( aes(x=as.factor(treatment), y=brood_frames)) + 
    geom_boxplot() + 
 geom_point() +
  geom_jitter()+
  xlab("Treatment") +
  ggtitle("Colony strength (brood_frames)") +    
 facet_wrap( vars(time), nrow = 1)

data %>% filter(time %in% c("T2","T3")) %>%
ggplot( aes(x=as.factor(treatment), y=supers)) + 
    geom_boxplot() + 
 geom_point() +
  geom_jitter()+
  xlab("Treatment") +
  ggtitle("Colony strength (supers)") +    
 facet_wrap( vars(time), nrow = 1)

brood frames stats

# check paired T-test for all times
# Subset the data based on the treatment levels
treatment1 <- data$brood_frames[data$treatment == levels(data$treatment)[1]]
treatment2 <- data$brood_frames[data$treatment == levels(data$treatment)[2]]

# Perform the t-test
t.test(treatment1, treatment2,  alternative = c("two.sided"))
## 
##  Welch Two Sample t-test
## 
## data:  treatment1 and treatment2
## t = -0.44232, df = 87.548, p-value = 0.6593
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.233975  0.784700
## sample estimates:
## mean of x mean of y 
##  4.416667  4.641304
#Independent t-test for Each Time Point
#Subset the data for each time point.
#Perform an independent t-test for each subset, as there are not enough observations. that is, each hive does not apear in two times

# Get the levels of the time factor
time_levels <- levels(data$time)

# Function to perform independent t-test for each time point
perform_independent_ttest <- function(time_point) {
  # Subset data for the specific time point
  subset_data <- data %>% filter(time == time_point)
  
  # Perform independent t-test
  t_test_result <- tryCatch({
    t.test(brood_frames ~ treatment, data = subset_data)
  }, error = function(e) {
    return(paste("Error for time point:", time_point, "-", e$message))
  })
  
  return(t_test_result)
}

# Perform independent t-test for each time point and store results
t_test_results <- lapply(time_levels, perform_independent_ttest)

# Filter out errors and print the remaining results for each time point
valid_results <- t_test_results[sapply(t_test_results, function(x) !is.character(x))]
invalid_results <- t_test_results[sapply(t_test_results, function(x) is.character(x))]

# Print valid results
names(valid_results) <- time_levels[!sapply(t_test_results, is.character)]
print("Valid results:")
## [1] "Valid results:"
print(valid_results)
## $T0
## 
##  Welch Two Sample t-test
## 
## data:  brood_frames by treatment
## t = -1.2314, df = 31.319, p-value = 0.2273
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
##  -1.1715561  0.2892031
## sample estimates:
## mean in group Apiraz   mean in group ToBe 
##             2.352941             2.794118 
## 
## 
## $T1
## 
##  Welch Two Sample t-test
## 
## data:  brood_frames by treatment
## t = 0.71395, df = 18.658, p-value = 0.4841
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
##  -0.7005442  1.4245261
## sample estimates:
## mean in group Apiraz   mean in group ToBe 
##             4.323529             3.961538 
## 
## 
## $T2
## 
##  Welch Two Sample t-test
## 
## data:  brood_frames by treatment
## t = -0.15348, df = 25.002, p-value = 0.8793
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
##  -1.737969  1.496897
## sample estimates:
## mean in group Apiraz   mean in group ToBe 
##             7.035714             7.156250

populated frames stats

# check paired T-test for all times
# Subset the data based on the treatment levels
treatment1 <- data$populated_frames[data$treatment == levels(data$treatment)[1]]
treatment2 <- data$populated_frames[data$treatment == levels(data$treatment)[2]]

# Perform the t-test
t.test(treatment1, treatment2,  alternative = c("two.sided"))
## 
##  Welch Two Sample t-test
## 
## data:  treatment1 and treatment2
## t = -0.93652, df = 122.79, p-value = 0.3508
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.983739  1.424855
## sample estimates:
## mean of x mean of y 
##  11.89516  13.17460
#Independent t-test for Each Time Point
#Subset the data for each time point.
#Perform an independent t-test for each subset, as there are not enough observations. that is, each hive does not apear in two times

# Get the levels of the time factor
time_levels <- levels(data$time)

# Function to perform independent t-test for each time point
perform_independent_ttest <- function(time_point) {
  # Subset data for the specific time point
  subset_data <- data %>% filter(time == time_point)
  
  # Perform independent t-test
  t_test_result <- tryCatch({
    t.test(populated_frames ~ treatment, data = subset_data)
  }, error = function(e) {
    return(paste("Error for time point:", time_point, "-", e$message))
  })
  
  return(t_test_result)
}

# Perform independent t-test for each time point and store results
t_test_results <- lapply(time_levels, perform_independent_ttest)

# Filter out errors and print the remaining results for each time point
valid_results <- t_test_results[sapply(t_test_results, function(x) !is.character(x))]
invalid_results <- t_test_results[sapply(t_test_results, function(x) is.character(x))]

# Print valid results
names(valid_results) <- time_levels[!sapply(t_test_results, is.character)]
print("Valid results:")
## [1] "Valid results:"
print(valid_results)
## $T0
## 
##  Welch Two Sample t-test
## 
## data:  populated_frames by treatment
## t = -1.0777, df = 30.637, p-value = 0.2896
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
##  -1.7019445  0.5254739
## sample estimates:
## mean in group Apiraz   mean in group ToBe 
##             6.176471             6.764706 
## 
## 
## $T1
## 
##  Welch Two Sample t-test
## 
## data:  populated_frames by treatment
## t = 0.11871, df = 20.894, p-value = 0.9066
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
##  -1.532711  1.718232
## sample estimates:
## mean in group Apiraz   mean in group ToBe 
##             6.323529             6.230769 
## 
## 
## $T2
## 
##  Welch Two Sample t-test
## 
## data:  populated_frames by treatment
## t = -0.89234, df = 22.857, p-value = 0.3815
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
##  -4.741499  1.884356
## sample estimates:
## mean in group Apiraz   mean in group ToBe 
##             16.07143             17.50000 
## 
## 
## $T3
## 
##  Welch Two Sample t-test
## 
## data:  populated_frames by treatment
## t = 0.25256, df = 26.099, p-value = 0.8026
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
##  -4.318444  5.528528
## sample estimates:
## mean in group Apiraz   mean in group ToBe 
##             21.42857             20.82353

Varroa infestation

data %>% filter(time!= "T3") %>%
  #na.omit() %>% 
  ggplot(aes(x=as.factor(treatment), y=varroa)) + 
    geom_boxplot() + 
 geom_point() +
  geom_jitter()+
  xlab("Treatment") +
  ggtitle("Varroa infestation
  % in 300 bees") +    
 facet_wrap( vars(time), nrow = 1)

# remove two outliers
data %>% filter(time!= "T3") %>%
filter(varroa < 20) %>%  
  ggplot(aes(x=as.factor(treatment), y=varroa)) + 
    geom_boxplot() + 
 geom_point() +
  geom_jitter()+
  xlab("Treatment") +
  ggtitle("Varroa infestation
  % in 300 bees (remove two outliers)") +    
 facet_wrap( vars(time), nrow = 1)

varroa infestation stats

# check paired T-test for all times
# Subset the data based on the treatment levels
treatment1 <- data$varroa[data$treatment == levels(data$treatment)[1]]
treatment2 <- data$varroa[data$treatment == levels(data$treatment)[2]]

# Perform the t-test
t.test(treatment1, treatment2,  alternative = c("two.sided"))
## 
##  Welch Two Sample t-test
## 
## data:  treatment1 and treatment2
## t = 1.3104, df = 61.011, p-value = 0.195
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4932839  2.3688339
## sample estimates:
## mean of x mean of y 
##  2.284375  1.346600
#Independent t-test for Each Time Point
#Subset the data for each time point.
#Perform an independent t-test for each subset, as there are not enough observations. that is, each hive does not apear in two times

# Get the levels of the time factor
time_levels <- levels(data$time)

# Function to perform independent t-test for each time point
perform_independent_ttest <- function(time_point) {
  # Subset data for the specific time point
  subset_data <- data %>% filter(time == time_point)
  
  # Perform independent t-test
  t_test_result <- tryCatch({
    t.test(varroa ~ treatment, data = subset_data)
  }, error = function(e) {
    return(paste("Error for time point:", time_point, "-", e$message))
  })
  
  return(t_test_result)
}

# Perform independent t-test for each time point and store results
t_test_results <- lapply(time_levels, perform_independent_ttest)

# Filter out errors and print the remaining results for each time point
valid_results <- t_test_results[sapply(t_test_results, function(x) !is.character(x))]
invalid_results <- t_test_results[sapply(t_test_results, function(x) is.character(x))]

# Print valid results
names(valid_results) <- time_levels[!sapply(t_test_results, is.character)]
print("Valid results:")
## [1] "Valid results:"
print(valid_results)
## $T0
## 
##  Welch Two Sample t-test
## 
## data:  varroa by treatment
## t = -0.81321, df = 31.982, p-value = 0.4221
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
##  -1.7833617  0.7657146
## sample estimates:
## mean in group Apiraz   mean in group ToBe 
##             1.392353             1.901176 
## 
## 
## $T1
## 
##  Welch Two Sample t-test
## 
## data:  varroa by treatment
## t = -0.6913, df = 29.937, p-value = 0.4947
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
##  -2.019137  0.997961
## sample estimates:
## mean in group Apiraz   mean in group ToBe 
##            0.6664706            1.1770588 
## 
## 
## $T2
## 
##  Welch Two Sample t-test
## 
## data:  varroa by treatment
## t = 2.218, df = 13.319, p-value = 0.04452
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
##  0.1245979 8.6646878
## sample estimates:
## mean in group Apiraz   mean in group ToBe 
##             5.332143             0.937500

Spring harvest

data %>% filter(time == "T3") %>%
  #na.omit() %>% 
  ggplot(aes(x=as.factor(treatment), y=spring_harvest)) + 
    geom_boxplot() + 
 geom_point() +
  geom_jitter()+
  xlab("Treatment") +
  ggtitle("spring_harvest") 

spring_harvest stats

# check paired T-test for all times
# Subset the data based on the treatment levels
treatment1 <- data$spring_harvest[data$treatment == levels(data$treatment)[1]]
treatment2 <- data$spring_harvest[data$treatment == levels(data$treatment)[2]]

# Perform the t-test
t.test(treatment1, treatment2,  alternative = c("two.sided"))
## 
##  Welch Two Sample t-test
## 
## data:  treatment1 and treatment2
## t = -0.4057, df = 21.852, p-value = 0.6889
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.445952  3.664439
## sample estimates:
## mean of x mean of y 
##  11.28571  12.17647

colony loss