load libraries

library(tidyverse)
library(dplyr)
library(ggplot2)
library(ggpubr)
#library(reshape2)
library(hrbrthemes)
library("gridExtra")
#library(cowplot)
library(plotly)
library(scales) # to calculate percentages, and put into dataframe
library(ggrepel)
knitr::opts_chunk$set(echo = TRUE)
data <- read.csv("/Users/nuriteliash/Documents/GitHub/Hive-monitoring/data/nov_R.csv")

df = data %>% 
  dplyr::mutate(date = as.Date(date, format = "%d/%m/%Y")) %>% # Convert to Date object 
  dplyr::mutate(floors = as.numeric(floors)) %>%
  dplyr::mutate(hive = as.character(hive)) %>%
  dplyr::mutate(populated = as.numeric(populated)) %>%
  dplyr::mutate(brood = as.numeric(brood)) %>%
  dplyr::mutate(infestation_control = as.numeric(infestation_control)) %>%
    dplyr::mutate(infestation_apivar = as.numeric(infestation_apivar)) %>%
      dplyr::mutate(infestation_ethanol = as.numeric(infestation_ethanol)) %>%
    dplyr::mutate(apivar_efficacy = as.numeric(apivar_efficacy)) %>%
      dplyr::mutate(control_mites = as.numeric(control_mites)) %>%
  dplyr::select(c(date,hive, floors, brood, populated, infestation_ethanol, apivar_efficacy,total_mites_apivar,total_mites_control, group, sensitivity_level))

colony strength

df %>%
  select(c(hive, brood, populated, infestation_ethanol)) %>%
  na.omit() %>%
  mutate(hive = factor(hive, levels = sort(unique(as.numeric(as.character(hive)))))) %>%
  ggplot(aes(x=hive, y=populated)) +
  geom_col() +
  geom_hline(yintercept = mean(df$populated, na.rm = TRUE), linetype = "dashed", color = "red") +
  ggtitle("Colony strength (# populated frames)\nNov junction") +    
  theme_ipsum()

df %>%
  select(c(hive, brood, populated, infestation_ethanol)) %>%
  na.omit() %>%
  mutate(hive = factor(hive, levels = sort(unique(as.numeric(as.character(hive)))))) %>%
  ggplot(aes(x=hive, y=brood)) +
  geom_col() +
  geom_hline(yintercept = mean(df$brood, na.rm = TRUE), linetype = "dashed", color = "red") +
  ggtitle("Colony strength (# brood frames)\nNov junction") +    
  theme_ipsum()

df %>%select(c(hive, brood, populated,infestation_ethanol )) %>%
  na.omit() %>% 
  ggplot(aes(x=brood, y=populated, label=hive)) + 
  geom_point() +
  #geom_text(nudge_x = 1, nudge_y = 0, check_overlap = F) +
    geom_text_repel() +
  xlab("Bee brood (# brood frames)") +
    ylab("Bee population (# populated frames)") +
ggtitle("Bee population") +    
  theme_ipsum() 

df %>%select(c(hive, brood, populated,infestation_ethanol )) %>%
  na.omit() %>%
  ggplot(aes(x=populated)) + 
 geom_histogram( binwidth=0.5, fill="#69b3a2", color="#e9ecef", alpha=0.9) +
  ggtitle("Bee population (# populated frames)") +    
  theme_ipsum()

df %>%select(c(hive, brood, populated,infestation_ethanol )) %>%
  na.omit() %>% 
  ggplot(aes(x=populated)) + 
    geom_density(fill="#69b3a2", color="#e9ecef", alpha=0.8) +
  ggtitle("Bee population (# populated frames)") +    
  theme_ipsum()

varroa infestation and colony strength

df %>%
  select(c(hive, brood, populated, infestation_ethanol)) %>%
  na.omit() %>%
  mutate(hive = factor(hive, levels = sort(unique(as.numeric(as.character(hive)))))) %>%  # Convert to factor with numeric order
  ggplot(aes(x=hive, y=infestation_ethanol)) +
  geom_col() +
  ggtitle("Varroa infestation level (mite/bee)
Nov junction") +    
  theme_ipsum()

df %>%select(c(hive, brood, populated,infestation_ethanol )) %>%
  na.omit() %>%
  ggplot(aes(x=infestation_ethanol)) + 
 geom_histogram( binwidth=0.005, fill="#69b3a2", color="#e9ecef", alpha=0.9) +
  ggtitle("Varroa infestation level") +    
  theme_ipsum()

df %>%select(c(hive, brood, populated,infestation_ethanol )) %>%
  na.omit() %>% ggplot(aes(x=infestation_ethanol)) + 
    geom_density(fill="#69b3a2", color="#e9ecef", alpha=0.8) +
  ggtitle("Varroa infestation level") +    
  theme_ipsum()

df %>%select(c(hive, brood, populated,infestation_ethanol,floors )) %>%
  na.omit() %>% ggplot( aes(x=as.factor(floors), y=infestation_ethanol, label=hive)) + 
    geom_boxplot(fill="#69b3a2") + 
 geom_point() +
  geom_jitter()+
  xlab("floors") +
  ggtitle("Varroa infestation level") +    
  theme_ipsum()
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

df %>%select(c(hive, brood, populated,infestation_ethanol )) %>%
  na.omit() %>% 
  ggplot(aes(x=brood, y=infestation_ethanol, label=hive)) + 
  geom_point() +
  #geom_text(nudge_x = 1, nudge_y = 0, check_overlap = F) +
    geom_text_repel() +
  xlab("Bee brood (# brood frames)") +
    ylab("Varroa infestation (mite/bee)") +
ggtitle("Varroa infestation and bee brood frames") +    
  theme_ipsum()

df %>%select(c(hive, brood, populated,infestation_ethanol )) %>%
  na.omit() %>% 
  ggplot(aes(x=populated, y=infestation_ethanol, label=hive)) + 
  geom_point() +
  #geom_text(nudge_x = 1, nudge_y = 0, check_overlap = F) +
    geom_text_repel() +
  xlab("Bee population (# populated frames)") +
    ylab("Varroa infestation (mite/bee)") +
ggtitle("Varroa infestation and bee population") +    
  theme_ipsum()

varroa resistance

df %>%
  select(c(hive, brood, populated, infestation_ethanol, apivar_efficacy,total_mites_apivar,total_mites_control)) %>%
  na.omit() %>%
  filter(total_mites_apivar >=10) %>%
  mutate(hive = factor(hive, levels = sort(unique(as.numeric(as.character(hive)))))) %>%
  ggplot(aes(x=hive, y=apivar_efficacy)) +
  geom_col() +
  geom_hline(yintercept = mean(df$apivar_efficacy, na.rm = TRUE), linetype = "dashed", color = "red") +
  ggtitle("Apivar efficacy \nNov junction") +    
  theme_ipsum()

df %>%
  select(c(hive, brood, populated, infestation_ethanol, apivar_efficacy,total_mites_apivar,total_mites_control)) %>%
  na.omit() %>%
  filter(total_mites_apivar >=10) %>%
  ggplot(aes(x=apivar_efficacy)) + 
 geom_histogram( binwidth=0.05, fill="#69b3a2", color="#e9ecef", alpha=0.9) +
  ggtitle("Apivar efficacy \nNov junction") +    
  theme_ipsum()

segregate into 2 groups with similar variance

efficacy variance

var_efficacy = data %>%
  filter(total_mites_apivar >=10) 

var_efficacy %>% select(c(hive, group, populated, brood, infestation_ethanol,apivar_efficacy))
##    hive group populated brood infestation_ethanol apivar_efficacy
## 1     1     A        15     6                0.10             0.8
## 2     3     B        18     5                0.12             0.8
## 3     5     A        24     8                0.03             0.8
## 4     9     B        15     6                0.06             0.8
## 5    10     A        19     5                0.05             0.8
## 6    12     B        10     6                0.12             0.7
## 7    14     A        10     6                0.05             0.5
## 8    16     A        11     6                0.09             0.7
## 9    17     B        22     8                0.03             0.8
## 10   20     A        14     4                0.02             0.8
## 11   29     B         9     5                0.02             0.9
## 12   39     B         8     4                0.05             0.7
## 13   42     B         8     4                0.06             0.8
data %>%
 # select(c(hive, brood, populated, infestation_ethanol, apivar_efficacy,total_mites_apivar,total_mites_control)) %>%
  #na.omit() %>%
  filter(total_mites_apivar >=10) %>%
  ggplot( aes(x=as.factor(group), y=apivar_efficacy)) + 
    geom_boxplot(fill="#69b3a2") + 
 geom_point() +
  geom_jitter()+
  xlab("group") +
  ggtitle("Apivar efficacy") +    
  theme_ipsum()

# Perform the F-test to compare variances
var.test(apivar_efficacy ~ group, data = var_efficacy)
## 
##  F test to compare two variances
## 
## data:  apivar_efficacy by group
## F = 3.08, num df = 5, denom df = 6, p-value = 0.2034
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##   0.5143994 21.4913217
## sample estimates:
## ratio of variances 
##               3.08

population variance

data %>%
  filter(group %in% c("A", "B")) %>%  
 # na.omit() %>%
  ggplot( aes(x=as.factor(group), y=populated)) + 
    geom_boxplot(fill="#69b3a2") + 
 geom_point() +
  geom_jitter()+
  xlab("group") +
  ggtitle("populated frames") +    
  theme_ipsum()
## Warning: Removed 14 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 14 rows containing missing values (`geom_point()`).
## Removed 14 rows containing missing values (`geom_point()`).

# Perform the F-test to compare variances
var.test(populated ~ group, data = data)
## 
##  F test to compare two variances
## 
## data:  populated by group
## F = 0.84883, num df = 19, denom df = 20, p-value = 0.7242
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.3419822 2.1296540
## sample estimates:
## ratio of variances 
##          0.8488253

brood variance

data %>%
 # select(c(hive, brood, populated, infestation_ethanol, apivar_efficacy,total_mites_apivar,total_mites_control)) %>%
  #filter(total_mites_apivar >=10) %>%
  #filter(group %in% c("A", "B")) %>%  
 # na.omit() %>%
  ggplot( aes(x=as.factor(group), y=brood)) + 
    geom_boxplot(fill="#69b3a2") + 
 geom_point() +
  geom_jitter()+
  xlab("group") +
  ggtitle("brood frames") +    
  theme_ipsum()
## Warning: Removed 22 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_point()`).

# Perform the F-test to compare variances
var.test(brood ~ group, data = data)
## 
##  F test to compare two variances
## 
## data:  brood by group
## F = 0.84265, num df = 15, denom df = 16, p-value = 0.7448
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.3022927 2.3897843
## sample estimates:
## ratio of variances 
##          0.8426463

varroa infestation variance

data %>%
 # select(c(hive, brood, populated, infestation_ethanol, apivar_efficacy,total_mites_apivar,total_mites_control)) %>%
  #filter(total_mites_apivar >=10) %>%
 # filter(group %in% c("A", "B")) %>%  
 # na.omit() %>%
  ggplot( aes(x=as.factor(group), y=infestation_ethanol)) + 
    geom_boxplot(fill="#69b3a2") + 
 geom_point() +
  geom_jitter()+
  xlab("group") +
  ggtitle("varroa infestation_ethanol") +    
  theme_ipsum()
## Warning: Removed 22 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 22 rows containing missing values (`geom_point()`).
## Removed 22 rows containing missing values (`geom_point()`).

# Perform the F-test to compare variances
var.test(infestation_ethanol ~ group, data = data)
## 
##  F test to compare two variances
## 
## data:  infestation_ethanol by group
## F = 0.63245, num df = 15, denom df = 16, p-value = 0.3813
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.2268876 1.7936665
## sample estimates:
## ratio of variances 
##          0.6324531