load libraries
library(tidyverse)
library(dplyr)
library(ggplot2)
library(ggpubr)
#library(reshape2)
library(hrbrthemes)
library("gridExtra")
#library(cowplot)
library(plotly)
library(scales) # to calculate percentages, and put into dataframe
library(ggrepel)
knitr::opts_chunk$set(echo = TRUE)
data <- read.csv("/Users/nuriteliash/Documents/GitHub/ToBe_Ayelet/data/Tobe_Aayelet.csv") %>%
select(c(Yard, time, hive, populated_frames,treatment, brood_frames,varroa,supers, spring_harvest)) %>%
mutate(treatment = as.factor(treatment))
# Convert necessary columns to factors if not already done
data$hive <- as.factor(data$hive)
data$treatment <- as.factor(data$treatment)
data$time <- as.factor(data$time)
data %>%
ggplot( aes(x=as.factor(treatment), y=populated_frames)) +
geom_boxplot() +
geom_point() +
geom_jitter()+
xlab("Treatment") +
ggtitle("Colony strength (populated_frames)") +
facet_wrap( vars(time), nrow = 1)
data %>% filter(time!= "T3") %>%
ggplot( aes(x=as.factor(treatment), y=brood_frames)) +
geom_boxplot() +
geom_point() +
geom_jitter()+
xlab("Treatment") +
ggtitle("Colony strength (brood_frames)") +
facet_wrap( vars(time), nrow = 1)
data %>% filter(time %in% c("T2","T3")) %>%
ggplot( aes(x=as.factor(treatment), y=supers)) +
geom_boxplot() +
geom_point() +
geom_jitter()+
xlab("Treatment") +
ggtitle("Colony strength (supers)") +
facet_wrap( vars(time), nrow = 1)
# check paired T-test for all times
# Subset the data based on the treatment levels
treatment1 <- data$brood_frames[data$treatment == levels(data$treatment)[1]]
treatment2 <- data$brood_frames[data$treatment == levels(data$treatment)[2]]
# Perform the t-test
t.test(treatment1, treatment2, alternative = c("two.sided"))
##
## Welch Two Sample t-test
##
## data: treatment1 and treatment2
## t = -0.44232, df = 87.548, p-value = 0.6593
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.233975 0.784700
## sample estimates:
## mean of x mean of y
## 4.416667 4.641304
#Independent t-test for Each Time Point
#Subset the data for each time point.
#Perform an independent t-test for each subset, as there are not enough observations. that is, each hive does not apear in two times
# Get the levels of the time factor
time_levels <- levels(data$time)
# Function to perform independent t-test for each time point
perform_independent_ttest <- function(time_point) {
# Subset data for the specific time point
subset_data <- data %>% filter(time == time_point)
# Perform independent t-test
t_test_result <- tryCatch({
t.test(brood_frames ~ treatment, data = subset_data)
}, error = function(e) {
return(paste("Error for time point:", time_point, "-", e$message))
})
return(t_test_result)
}
# Perform independent t-test for each time point and store results
t_test_results <- lapply(time_levels, perform_independent_ttest)
# Filter out errors and print the remaining results for each time point
valid_results <- t_test_results[sapply(t_test_results, function(x) !is.character(x))]
invalid_results <- t_test_results[sapply(t_test_results, function(x) is.character(x))]
# Print valid results
names(valid_results) <- time_levels[!sapply(t_test_results, is.character)]
print("Valid results:")
## [1] "Valid results:"
print(valid_results)
## $T0
##
## Welch Two Sample t-test
##
## data: brood_frames by treatment
## t = -1.2314, df = 31.319, p-value = 0.2273
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
## -1.1715561 0.2892031
## sample estimates:
## mean in group Apiraz mean in group ToBe
## 2.352941 2.794118
##
##
## $T1
##
## Welch Two Sample t-test
##
## data: brood_frames by treatment
## t = 0.71395, df = 18.658, p-value = 0.4841
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
## -0.7005442 1.4245261
## sample estimates:
## mean in group Apiraz mean in group ToBe
## 4.323529 3.961538
##
##
## $T2
##
## Welch Two Sample t-test
##
## data: brood_frames by treatment
## t = -0.15348, df = 25.002, p-value = 0.8793
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
## -1.737969 1.496897
## sample estimates:
## mean in group Apiraz mean in group ToBe
## 7.035714 7.156250
# check paired T-test for all times
# Subset the data based on the treatment levels
treatment1 <- data$populated_frames[data$treatment == levels(data$treatment)[1]]
treatment2 <- data$populated_frames[data$treatment == levels(data$treatment)[2]]
# Perform the t-test
t.test(treatment1, treatment2, alternative = c("two.sided"))
##
## Welch Two Sample t-test
##
## data: treatment1 and treatment2
## t = -0.93652, df = 122.79, p-value = 0.3508
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.983739 1.424855
## sample estimates:
## mean of x mean of y
## 11.89516 13.17460
#Independent t-test for Each Time Point
#Subset the data for each time point.
#Perform an independent t-test for each subset, as there are not enough observations. that is, each hive does not apear in two times
# Get the levels of the time factor
time_levels <- levels(data$time)
# Function to perform independent t-test for each time point
perform_independent_ttest <- function(time_point) {
# Subset data for the specific time point
subset_data <- data %>% filter(time == time_point)
# Perform independent t-test
t_test_result <- tryCatch({
t.test(populated_frames ~ treatment, data = subset_data)
}, error = function(e) {
return(paste("Error for time point:", time_point, "-", e$message))
})
return(t_test_result)
}
# Perform independent t-test for each time point and store results
t_test_results <- lapply(time_levels, perform_independent_ttest)
# Filter out errors and print the remaining results for each time point
valid_results <- t_test_results[sapply(t_test_results, function(x) !is.character(x))]
invalid_results <- t_test_results[sapply(t_test_results, function(x) is.character(x))]
# Print valid results
names(valid_results) <- time_levels[!sapply(t_test_results, is.character)]
print("Valid results:")
## [1] "Valid results:"
print(valid_results)
## $T0
##
## Welch Two Sample t-test
##
## data: populated_frames by treatment
## t = -1.0777, df = 30.637, p-value = 0.2896
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
## -1.7019445 0.5254739
## sample estimates:
## mean in group Apiraz mean in group ToBe
## 6.176471 6.764706
##
##
## $T1
##
## Welch Two Sample t-test
##
## data: populated_frames by treatment
## t = 0.11871, df = 20.894, p-value = 0.9066
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
## -1.532711 1.718232
## sample estimates:
## mean in group Apiraz mean in group ToBe
## 6.323529 6.230769
##
##
## $T2
##
## Welch Two Sample t-test
##
## data: populated_frames by treatment
## t = -0.89234, df = 22.857, p-value = 0.3815
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
## -4.741499 1.884356
## sample estimates:
## mean in group Apiraz mean in group ToBe
## 16.07143 17.50000
##
##
## $T3
##
## Welch Two Sample t-test
##
## data: populated_frames by treatment
## t = 0.25256, df = 26.099, p-value = 0.8026
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
## -4.318444 5.528528
## sample estimates:
## mean in group Apiraz mean in group ToBe
## 21.42857 20.82353
data %>% filter(time!= "T3") %>%
#na.omit() %>%
ggplot(aes(x=as.factor(treatment), y=varroa)) +
geom_boxplot() +
geom_point() +
geom_jitter()+
xlab("Treatment") +
ggtitle("Varroa infestation
% in 300 bees") +
facet_wrap( vars(time), nrow = 1)
# remove two outliers
data %>% filter(time!= "T3") %>%
filter(varroa < 20) %>%
ggplot(aes(x=as.factor(treatment), y=varroa)) +
geom_boxplot() +
geom_point() +
geom_jitter()+
xlab("Treatment") +
ggtitle("Varroa infestation
% in 300 bees (remove two outliers)") +
facet_wrap( vars(time), nrow = 1)
# check paired T-test for all times
# Subset the data based on the treatment levels
treatment1 <- data$varroa[data$treatment == levels(data$treatment)[1]]
treatment2 <- data$varroa[data$treatment == levels(data$treatment)[2]]
# Perform the t-test
t.test(treatment1, treatment2, alternative = c("two.sided"))
##
## Welch Two Sample t-test
##
## data: treatment1 and treatment2
## t = 1.3104, df = 61.011, p-value = 0.195
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.4932839 2.3688339
## sample estimates:
## mean of x mean of y
## 2.284375 1.346600
#Independent t-test for Each Time Point
#Subset the data for each time point.
#Perform an independent t-test for each subset, as there are not enough observations. that is, each hive does not apear in two times
# Get the levels of the time factor
time_levels <- levels(data$time)
# Function to perform independent t-test for each time point
perform_independent_ttest <- function(time_point) {
# Subset data for the specific time point
subset_data <- data %>% filter(time == time_point)
# Perform independent t-test
t_test_result <- tryCatch({
t.test(varroa ~ treatment, data = subset_data)
}, error = function(e) {
return(paste("Error for time point:", time_point, "-", e$message))
})
return(t_test_result)
}
# Perform independent t-test for each time point and store results
t_test_results <- lapply(time_levels, perform_independent_ttest)
# Filter out errors and print the remaining results for each time point
valid_results <- t_test_results[sapply(t_test_results, function(x) !is.character(x))]
invalid_results <- t_test_results[sapply(t_test_results, function(x) is.character(x))]
# Print valid results
names(valid_results) <- time_levels[!sapply(t_test_results, is.character)]
print("Valid results:")
## [1] "Valid results:"
print(valid_results)
## $T0
##
## Welch Two Sample t-test
##
## data: varroa by treatment
## t = -0.81321, df = 31.982, p-value = 0.4221
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
## -1.7833617 0.7657146
## sample estimates:
## mean in group Apiraz mean in group ToBe
## 1.392353 1.901176
##
##
## $T1
##
## Welch Two Sample t-test
##
## data: varroa by treatment
## t = -0.6913, df = 29.937, p-value = 0.4947
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
## -2.019137 0.997961
## sample estimates:
## mean in group Apiraz mean in group ToBe
## 0.6664706 1.1770588
##
##
## $T2
##
## Welch Two Sample t-test
##
## data: varroa by treatment
## t = 2.218, df = 13.319, p-value = 0.04452
## alternative hypothesis: true difference in means between group Apiraz and group ToBe is not equal to 0
## 95 percent confidence interval:
## 0.1245979 8.6646878
## sample estimates:
## mean in group Apiraz mean in group ToBe
## 5.332143 0.937500
data %>% filter(time == "T3") %>%
#na.omit() %>%
ggplot(aes(x=as.factor(treatment), y=spring_harvest)) +
geom_boxplot() +
geom_point() +
geom_jitter()+
xlab("Treatment") +
ggtitle("spring_harvest")
# check paired T-test for all times
# Subset the data based on the treatment levels
treatment1 <- data$spring_harvest[data$treatment == levels(data$treatment)[1]]
treatment2 <- data$spring_harvest[data$treatment == levels(data$treatment)[2]]
# Perform the t-test
t.test(treatment1, treatment2, alternative = c("two.sided"))
##
## Welch Two Sample t-test
##
## data: treatment1 and treatment2
## t = -0.4057, df = 21.852, p-value = 0.6889
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5.445952 3.664439
## sample estimates:
## mean of x mean of y
## 11.28571 12.17647