Varroa amitraz dose reponse

library(tidyverse)
library(dplyr)
library(ggplot2)
library(ggpubr)
#library(reshape2)
#library(hrbrthemes)
library("gridExtra")
#library(cowplot)
library(plotly)
library(scales) # to calculate percentages, and put into dataframe
library(multcompView) # to correct for multiple comparisons after Tukey HSD
library("kableExtra")
library("knitr")
library(patchwork)
library(grid)
library(lubridate)

knitr::opts_chunk$set(echo = TRUE)

Here we followed Varroa mites viability in glass vials, after 3 hours exposure to Amitraz in differnet doses, and the solvent as control.

The main aim was to test the effect of Amitraz contact exposure on varroa viability.

First, we tested two parameters important for the establishment of the experimental protocol:

Does the solvent (Acetone) affects the mites’ viability, in glass vials?
Does the physiological stage of the mite affects its viability, in glass vials?

Following the preliminary experiments, we have concluded that there’s no difference between the two physiological stages, and that the solvent, Acetone confer no harm to the mite’s viability.
we therefore proceeded to test amitraz effect on the varroa mite, asking:

Is there a dose dependent effect of Amitraz on varroa mite viability, after 3 hours contact exposure?

Results summary

to be written later

Statistical Methods

To analyze the data, we employed three complementary statistical approaches:

Chi-squared Test of Independence: To assess whether the overall distribution of mite states (Live, Paralyzed/Abnormal, Dead) was dependent on the experimental group (e.g., Control vs. Acetone, or different physiological stages), we used the Chi-squared (χ²) test. This non-parametric test is ideal for comparing categorical outcomes across different groups to determine if there is a statistically significant association between them. A significant p-value (p < 0.05) indicates that the distribution of outcomes is not the same across the groups being compared.
Parametric Tests on Proportions: To specifically compare the mean viability (i.e., the percentage of live mites per vial) between groups, we used parametric tests.
- Student’s t-test was used for comparing two groups (Control vs. Acetone).
- Analysis of Variance (ANOVA) was used for comparing more than two groups (Phoretic vs. Reproductive stages). To meet the assumptions of these tests, the proportion data was first transformed using the arcsin square root transformation, a standard procedure for stabilizing the variance of percentage data. When the overall ANOVA was significant, we performed a Tukey’s HSD post-hoc test to identify which specific groups differed from each other.
Logistic Regression: To analyze the dose-response relationship of Amitraz, we used a binomial logistic regression model (a type of Generalized Linear Model, or GLM). This model allowed us to test whether the log of the Amitraz dose had a significant effect on the probability of mite mortality. From the model’s coefficients, we calculated the Lethal Dose 50 (LD50), representing the estimated dose required to kill 50% of the mite population under the experimental conditions.

# Upload data
df <- read.csv("/Users/nuriteliash/Documents/GitHub/Glass-vials-short-term/data/amitraz/amitraz_all_yards.csv") %>%
  mutate(Vial =as.character(Vial)) %>% 
  mutate(Tested_compound = factor(Tested_compound,levels = c("Control" ,"Acetone", "Amitraz"))) %>%
  mutate(Dose =as.numeric(Dose)) %>% 
  mutate(Total =as.numeric(Total)) %>% 
  mutate(Viable =as.numeric(Viable)) %>% 
  mutate(Paralyzed =as.numeric(Paralyzed)) %>% 
mutate(Dancing =as.numeric(Dancing)) %>% 
  mutate(Abnormal_behaviour =as.numeric(Abnormal_behaviour)) %>% 
  mutate(Dead =as.numeric(Dead)) %>%
  mutate(Date = format(dmy(Date), "%d-%m-%y")) %>%  # Parse as dmy and format as d-m-y
  filter(Exposure == 3)  # Only include 3-hour exposure data

# First, calculate control viability for each experiment to identify which ones to keep
# We need to correctly identify the control compound for each experiment.
# For some experiments, it's "Control"; for others, it's "Acetone".
# We'll consider "Control" if it exists, otherwise "Acetone".
exps_with_control <- df %>% filter(Tested_compound == "Control") %>% pull(Date) %>% unique()

control_data <- df %>%
  filter( (Tested_compound == "Control") | (Tested_compound == "Acetone" & !Date %in% exps_with_control) )

control_viability <- control_data %>%
  group_by(Date, Vial) %>%
  summarise(
    Total_Mites = sum(Total, na.rm = TRUE),
    Viable_Mites = sum(Viable, na.rm = TRUE),
    Viability_Percent = (Viable_Mites / Total_Mites) * 100,
    .groups = 'drop'
  )

# Find experiments where AT LEAST ONE control vial has < 90% viability
experiments_to_remove <- control_viability %>%
  filter(Viability_Percent < 80) %>%
  distinct(Date) %>%
  pull(Date)

all_experiments <- unique(df$Date)
experiments_to_keep <- setdiff(all_experiments, experiments_to_remove)

# Print information about filtering
cat("### Experiment Filtering Information\n")

## ### Experiment Filtering Information

cat("Experiments with any control vial < 80% viability (REMOVED):\n")

## Experiments with any control vial < 80% viability (REMOVED):

if(length(experiments_to_remove) > 0) {
  removed_info <- control_viability %>% filter(Date %in% experiments_to_remove)
  for(exp in unique(removed_info$Date)) {
    vial_info <- removed_info %>% filter(Date == exp)
    cat(paste0("  - Experiment ", exp, " (vials: ", paste(vial_info$Vial, collapse=", "), " with ", paste(round(vial_info$Viability_Percent,1), collapse="%, "), "%)\n"))
  }
} else {
  cat("  None\n")
}

##   - Experiment 17-03-25 (vials: 7, 8, 8.1 with 70%, 100%, 22.2%)
##   - Experiment 27-05-25 (vials: 1 with 50%)
##   - Experiment 28-04-25 (vials: 17, 7 with 0%, 0%)
##   - Experiment 29-06-25 (vials: 1, 8 with 80%, 0%)

cat("\nExperiments with all control vials >= 80% viability (KEPT):\n")

## 
## Experiments with all control vials >= 80% viability (KEPT):

if(length(experiments_to_keep) > 0) {
    kept_info <- control_viability %>% filter(Date %in% experiments_to_keep)
    # Check if kept_info is not empty before proceeding
    if(nrow(kept_info) > 0) {
        for(exp in unique(kept_info$Date)) {
            vial_info <- kept_info %>% filter(Date == exp)
            cat(paste0("  - Experiment ", exp, " (vials: ", paste(vial_info$Vial, collapse=", "), " with ", paste(round(vial_info$Viability_Percent,1), collapse="%, "), "%)\n"))
        }
    } else {
        cat("  None with recorded control viability meeting criteria.\n")
    }
} else {
    cat("  None\n")
}

##   - Experiment 04-03-25 (vials: 1, 13 with 100%, 90%)
##   - Experiment 06-03-25 (vials: 1 with 100%)
##   - Experiment 09-03-25 (vials: 1, 2, 3, 4 with 100%, 100%, 100%, 88.9%)
##   - Experiment 12-03-25 (vials: 7, 8 with 100%, 90%)
##   - Experiment 13-05-25 (vials: 1 with 100%)
##   - Experiment 14-07-25 (vials: 1 with 100%)
##   - Experiment 16-07-25 (vials: 1 with 100%)
##   - Experiment 21-04-25 (vials: 1 with 100%)
##   - Experiment 21-05-25 (vials: 10 with 100%)
##   - Experiment 24-03-25 (vials: 7, 8 with 100%, 100%)
##   - Experiment 27-03-25 (vials: 3, 4, 7, 8 with 100%, 100%, 100%, 100%)
##   - Experiment 31-03-25 (vials: 3, 7, 8 with 80%, 100%, 100%)

cat(paste0("\nTotal experiments kept: ", length(experiments_to_keep), "\n"))

## 
## Total experiments kept: 17

cat(paste0("Total experiments removed: ", length(experiments_to_remove), "\n\n"))

## Total experiments removed: 4

Does the solvent (Acetone) affects the mites’ viability?

Phoretic mites’ state after 3 hours exposure to the solvent (“Acetone”), compared to an empty vial (“Control”).
Only freshly prepared acetone vials were used for the analysis.

# Analysis 1: 4-category analysis for experiments 8-13 (with dancing) - Control and Acetone only
df_solvent <- df %>%
    filter(Yard == "Shamir") %>% 
filter(Physiological_stage == "Phoretic") %>% 
    filter(Vial_Freshness == "Fresh") %>% 
    filter(Tested_compound %in% c("Control", "Acetone")) %>%
filter(Date %in% experiments_to_keep) %>%  # Filter by control viability
   filter(Date %in% c("12-03-25", "24-03-25", "27-03-25", "17-03-25")) %>% # include experiments with both acetone and control
  select(Date, Physiological_stage, Tested_compound, Viable, Paralyzed, Dead) %>%
  pivot_longer(
    cols = c(Viable, Paralyzed, Dead),
    names_to = "State",
    values_to = "Count"
  ) %>%
  mutate(
    State = factor(State, levels = c("Viable", "Paralyzed", "Dead")),
    State = recode(State, 
                  "Viable" = "Live",
                  "Paralyzed" = "Paralyzed",
                  "Dead" = "Dead"))


# Calculate summary df
if (nrow(df_solvent) > 0) {
  df_summary_solvent <- df_solvent %>%
    group_by(Tested_compound, State) %>%
    summarise(
      Total_Count = sum(Count, na.rm = TRUE),
      .groups = 'drop'
    ) %>%
    group_by(Tested_compound) %>%
    mutate(
      Percentage = Total_Count / sum(Total_Count) * 100,
      Total_Count_Per_Condition = sum(Total_Count)  # Add total count per condition
    ) %>%
    ungroup()
} else {
  df_summary_solvent <- tibble()
}


# Create the 4-category plot (experiments 1,3,4)
  p1 <- ggplot(df_summary_solvent, aes(x = Tested_compound, y = Percentage, fill = State)) +
    geom_bar(stat = "identity", position = "stack", alpha = 0.8, color = "black", linewidth = 0.5) +
    #facet_wrap(~ Vial_Freshness, labeller = label_wrap_gen(width = 15), scales = "free_x") +
    scale_fill_manual(values = c("Live" = "#FFFFFF", 
                                 "Paralyzed" = "#A0A0A0", 
                                 "Dead" = "#000000")) +
    scale_x_discrete(labels = function(x) {
      sapply(x, function(val) {
        count <- df_summary_solvent$Total_Count_Per_Condition[df_summary_solvent$Tested_compound == val][1]
        paste0(val, "\n(n = ", count, ")")
      })
    }) +
    labs(
      title = "Distribution of Varroa Mite state - Control vs Acetone",
      subtitle = paste0("Experiments 1, 3 and 4 (Filtered: >= 80% control viability)"),
      x = "Treatment",
      y = "Percentage (%)",
      fill = "State"
    ) +
    theme_minimal() +
    theme(
      plot.title = element_text(size = 14, face = "bold"),
      plot.subtitle = element_text(size = 12),
      axis.text.x = element_text(angle = 0, hjust = 0.5),
      axis.title = element_text(size = 11, face = "bold"),
      legend.title = element_text(size = 11, face = "bold"),
      strip.text = element_text(size = 10, face = "bold"),
      panel.grid.major.x = element_blank(),
      panel.grid.minor = element_blank()
    )
  
  print(p1)

cat("\n\n### Mean and Standard Error Plots\n")

## 
## 
## ### Mean and Standard Error Plots

cat("These plots show the mean percentage of mites per vial for each state, with standard error bars.\n")

## These plots show the mean percentage of mites per vial for each state, with standard error bars.

# --- Mean analysis for 4-category data ---
df_for_mean_solvent <- df %>%
  filter(Physiological_stage == "Phoretic") %>% 
    filter(Vial_Freshness == "Fresh") %>% 
    filter(Tested_compound %in% c("Control", "Acetone")) %>%
filter(Date %in% experiments_to_keep) %>%  # Filter by control viability
   filter(Date %in% c("12-03-25", "23-03-25", "27-03-25"))
  
if (nrow(df_for_mean_solvent) > 0) {
    # Calculate proportions per vial
    df_proportions_solvent <- df_for_mean_solvent %>%
        filter(Total > 0) %>%
        mutate(
            Live = Viable / Total,
            Paralyzed = Paralyzed / Total,
            Dancing = Dancing / Total,
            Dead = Dead / Total
        ) %>%
        select(Vial, Tested_compound, Vial_Freshness, Total, Live, Paralyzed, Dancing, Dead) %>%
        pivot_longer(
            cols = c(Live, Paralyzed, Dancing, Dead),
            names_to = "State",
            values_to = "Proportion"
        )

    # Calculate summary stats (mean, se)
    summary_mean_solvent <- df_proportions_solvent %>%
        group_by(Tested_compound, State) %>%
        summarise(
            Mean_Percentage = mean(Proportion, na.rm = TRUE) * 100,
            SE = (sd(Proportion, na.rm = TRUE) / sqrt(n())) * 100,
            .groups = 'drop'
        )

    # Get total counts (vials and mites)
    totals_solvent <- df_for_mean_solvent %>%
        group_by(Tested_compound) %>%
        summarise(
            n_vials = n_distinct(paste(Date, Vial)),
            n_mites = sum(Total, na.rm = TRUE),
            .groups = 'drop'
        ) %>%
        mutate(x_label = paste0(Tested_compound, "\nvials: ", n_vials, "\nmites: ", n_mites))

    # Combine for plotting
    plot_data_mean_solvent <- summary_mean_solvent %>%
        left_join(totals_solvent, by = "Tested_compound") %>%
        mutate(State = factor(State, levels = c("Live", "Paralyzed", "Dead"))) %>%
      na.omit()

    # Create the plot
    p3 <- ggplot(plot_data_mean_solvent, aes(x = x_label, y = Mean_Percentage, fill = State)) +
        geom_bar(stat = "identity", position = position_dodge(), color = "black") +
        geom_errorbar(
            aes(ymin = Mean_Percentage - SE, ymax = pmin(100, Mean_Percentage + SE)),
            position = position_dodge(0.9),
            width = 0.25
        ) +
        scale_fill_manual(values = c("Live" = "#FFFFFF", "Paralyzed" = "#A0A0A0",  "Dead" = "#000000")) +
        labs(
          title = "Mean Mite States per Vial - Control vs Acetone",
          subtitle = paste0("Experiments 1, 3 and 4(Filtered: >= 80% control viability)"),
          x = "Treatment",
          y = "Mean Percentage per Vial"
        ) +
        theme_minimal() +
        theme(
          plot.title = element_text(size = 14, face = "bold"),
          plot.subtitle = element_text(size = 12),
          axis.text.x = element_text(angle = 0, hjust = 0.5),
          axis.title = element_text(size = 11, face = "bold"),
          legend.title = element_text(size = 11, face = "bold"),
          strip.text = element_text(size = 10, face = "bold")
        )
    print(p3)
}

cat("\n\n### Statistical Analysis: Control vs. Acetone\n")

## 
## 
## ### Statistical Analysis: Control vs. Acetone

# --- Chi-squared Test of Independence ---
# This test determines if there is a significant association between the treatment
# (Control/Acetone) and the mite's state (Live/Paralyzed/Dead).

# First, create a contingency table of counts
contingency_table_solvent <- df_summary_solvent %>%
  select(Tested_compound, State, Total_Count) %>%
  pivot_wider(names_from = State, values_from = Total_Count, values_fill = 0) %>%
  column_to_rownames(var = "Tested_compound")

# Perform the Chi-squared test
chi_test_result <- chisq.test(contingency_table_solvent)

cat("\n**1. Chi-squared Test Results:**\n")

## 
## **1. Chi-squared Test Results:**

cat("This test compares the overall distribution of mite states between the Control and Acetone groups.\n\n")

## This test compares the overall distribution of mite states between the Control and Acetone groups.

# Print the results in a readable format
print(chi_test_result)

## 
##  Pearson's Chi-squared test
## 
## data:  contingency_table_solvent
## X-squared = 7.2345, df = 2, p-value = 0.02686

cat(paste0("\n**Interpretation:**\n",
"The p-value is ", round(chi_test_result$p.value, 4), ". ",
"If this value is less than your significance level (e.g., 0.05), you can conclude that there is a statistically significant association between the treatment and the mites' state distribution. In other words, the distribution of Live, Paralyzed, and Dead mites is different for the Acetone group compared to the Control group.\n"))

## 
## **Interpretation:**
## The p-value is 0.0269. If this value is less than your significance level (e.g., 0.05), you can conclude that there is a statistically significant association between the treatment and the mites' state distribution. In other words, the distribution of Live, Paralyzed, and Dead mites is different for the Acetone group compared to the Control group.

# --- T-test on Arcsin-Transformed Proportions of Live Mites ---
# This test compares the mean proportion of LIVE mites per vial between the two groups.
# Arcsin transformation is used to stabilize the variance of proportion data.

cat("\n**2. T-test on the Proportion of Live Mites:**\n")

## 
## **2. T-test on the Proportion of Live Mites:**

cat("This test specifically checks if the mean percentage of *Live* mites per vial is different between the Control and Acetone groups.\n\n")

## This test specifically checks if the mean percentage of *Live* mites per vial is different between the Control and Acetone groups.

if (nrow(df_for_mean_solvent) > 0) {
    # Calculate proportion of live mites per vial
    proportions_live <- df_for_mean_solvent %>%
        filter(Total > 0) %>%
        mutate(Proportion_Live = Viable / Total)

    # Apply arcsin square root transformation
    proportions_live$Transformed_Live <- asin(sqrt(proportions_live$Proportion_Live))

    # Perform Welch's t-test (does not assume equal variances)
    t_test_result <- t.test(Transformed_Live ~ Tested_compound, data = proportions_live)

    print(t_test_result)
    
    cat(paste0("\n**Interpretation:**\n",
    "The p-value is ", round(t_test_result$p.value, 4), ". ",
    "If this value is less than your significance level, it indicates a significant difference in the mean proportion of live mites per vial between the Control and Acetone treatments.\n"))

} else {
    cat("Not enough data to perform the t-test.\n")
}

## 
##  Welch Two Sample t-test
## 
## data:  Transformed_Live by Tested_compound
## t = 0.81636, df = 5.318, p-value = 0.4493
## alternative hypothesis: true difference in means between group Control and group Acetone is not equal to 0
## 95 percent confidence interval:
##  -0.2426092  0.4744330
## sample estimates:
## mean in group Control mean in group Acetone 
##              1.490359              1.374447 
## 
## 
## **Interpretation:**
## The p-value is 0.4493. If this value is less than your significance level, it indicates a significant difference in the mean proportion of live mites per vial between the Control and Acetone treatments.

Does the physiological stage of the mite affects its viability?

Comparing phoretic and reproductive mites’ state after 3 hours exposure to the solvent (“Acetone”).
Only freshly prepared acetone vials were used for the analysis.

# --- Data Filtering and Preparation ---
df_physio <- df %>%
  filter(Date %in% c("27-03-25", "31-03-25")) %>%
  filter(Tested_compound == "Acetone") %>%
  filter(Vial_Freshness == "Fresh") %>%
  filter(Physiological_stage %in% c("Phoretic", "Reproductive")) %>%
  mutate(
    Physiological_stage = factor(Physiological_stage, levels = c("Phoretic", "Reproductive"))
  )

# --- Stacked Percentage Plot ---
cat("### Overall Distribution by Physiological Stage\n")

## ### Overall Distribution by Physiological Stage

# Reshape the data to long format to correctly calculate summary statistics
df_physio_long_counts <- df_physio %>%
  select(Physiological_stage, Viable, Paralyzed, Dead) %>%
  pivot_longer(
      cols = c(Viable, Paralyzed, Dead),
      names_to = "State",
      values_to = "Count"
  ) %>%
  mutate(
      State = recode(factor(State, levels = c("Viable", "Paralyzed", "Dead")),
                     "Viable" = "Live",
                     "Paralyzed" = "Paralyzed",
                     "Dead" = "Dead")
  )

# Create the summary table from the correctly shaped data
df_summary_physio <- df_physio_long_counts %>%
  group_by(Physiological_stage, State) %>%
  summarise(Total_Count = sum(Count, na.rm = TRUE), .groups = 'drop') %>%
  complete(Physiological_stage, State, fill = list(Total_Count = 0)) %>% # Ensure all state/group combos exist
  group_by(Physiological_stage) %>%
  mutate(
    Total_Count_Per_Condition = sum(Total_Count),
    Percentage = ifelse(Total_Count_Per_Condition > 0, (Total_Count / Total_Count_Per_Condition) * 100, 0)
  ) %>%
  ungroup()


if (nrow(df_summary_physio) > 0) {
    p_physio_stacked <- ggplot(df_summary_physio, aes(x = Physiological_stage, y = Percentage, fill = State)) +
        geom_bar(stat = "identity", position = "stack", color = "black") +
        scale_x_discrete(labels = function(x) {
            sapply(x, function(val) {
                total_n <- df_summary_physio %>% 
                    filter(Physiological_stage == val) %>% 
                    pull(Total_Count_Per_Condition) %>% 
                    unique()
                paste0(gsub("_", "\n", val), "\n(n=", total_n, ")")
            })
        }) +
        scale_fill_manual(values = c("Live" = "#FFFFFF", "Paralyzed" = "#A0A0A0", "Dead" = "#000000")) +
        labs(
            title = "Mite State Distribution by Physiological Stage (Acetone Only)",
            subtitle = "Experiments 3, 4, 5 on Fresh Vials",
            x = "Physiological Stage",
            y = "Percentage (%)"
        ) +
        theme_minimal() +
        theme(plot.title = element_text(size = 14, face = "bold"))
    print(p_physio_stacked)
}

# --- Mean Percentage per Vial Plot ---
cat("\n\n### Mean Percentage per Vial by Physiological Stage\n")

## 
## 
## ### Mean Percentage per Vial by Physiological Stage

df_proportions_physio <- df_physio %>%
  filter(Total > 0) %>%
  mutate(
    Live = Viable / Total,
    Paralyzed = Paralyzed / Total,
    Dead = Dead / Total
  ) %>%
  select(Date, Vial, Physiological_stage, Live, Paralyzed, Dead) %>%
  pivot_longer(cols = c("Live", "Paralyzed", "Dead"), names_to = "State", values_to = "Proportion")

summary_mean_physio <- df_proportions_physio %>%
  group_by(Physiological_stage, State) %>%
  summarise(
    Mean_Percentage = mean(Proportion, na.rm = TRUE) * 100,
    SE = (sd(Proportion, na.rm = TRUE) / sqrt(n())) * 100,
    .groups = 'drop'
  )

totals_physio <- df_physio %>%
  group_by(Physiological_stage) %>%
  summarise(
    n_vials = n_distinct(paste(Date, Vial)),
    n_mites = sum(Total, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  mutate(x_label = paste0(gsub("_", "\n", Physiological_stage), "\nvials: ", n_vials, "\nmites: ", n_mites))

plot_data_mean_physio <- summary_mean_physio %>%
  left_join(totals_physio, by = "Physiological_stage") %>%
  mutate(State = factor(State, levels = c("Live", "Paralyzed", "Dead")))

if (nrow(plot_data_mean_physio) > 0) {
    p_physio_mean <- ggplot(plot_data_mean_physio, aes(x = x_label, y = Mean_Percentage, fill = State)) +
        geom_bar(stat = "identity", position = position_dodge(), color = "black") +
        geom_errorbar(aes(ymin = Mean_Percentage - SE, ymax = pmin(100, Mean_Percentage + SE)), position = position_dodge(0.9), width = 0.25) +
        scale_fill_manual(values = c("Live" = "#FFFFFF", "Paralyzed" = "#A0A0A0", "Dead" = "#000000")) +
        labs(
          title = "Mean Mite States per Vial by Physiological Stage",
          x = "Physiological Stage",
          y = "Mean Percentage per Vial"
        ) +
        theme_minimal() +
        theme(plot.title = element_text(size = 14, face = "bold"))
    print(p_physio_mean)
}

# --- Statistical Analysis ---
cat("\n\n### Statistical Analysis: Physiological Stage\n")

## 
## 
## ### Statistical Analysis: Physiological Stage

# Chi-squared Test
# Build the contingency table from the correctly summarized data
contingency_table_physio <- df_summary_physio %>%
  select(Physiological_stage, State, Total_Count) %>%
  pivot_wider(names_from = State, values_from = Total_Count, values_fill = 0) %>%
  column_to_rownames(var = "Physiological_stage")

# Check for low expected counts which might invalidate the chi-squared test
chi_test_physio <- chisq.test(contingency_table_physio)
cat("\n**1. Chi-squared Test Results:**\n")

## 
## **1. Chi-squared Test Results:**

print(chi_test_physio)

## 
##  Pearson's Chi-squared test
## 
## data:  contingency_table_physio
## X-squared = 1.79, df = 2, p-value = 0.4086

if (any(chi_test_physio$expected < 5)) {
    cat("\n*Warning: Chi-squared test may be inaccurate because some expected counts are less than 5.*\n")
}

## 
## *Warning: Chi-squared test may be inaccurate because some expected counts are less than 5.*

cat(paste0("**Interpretation:** A p-value less than 0.05 suggests that the overall distribution of mite states is significantly different across the physiological stages.\n"))

## **Interpretation:** A p-value less than 0.05 suggests that the overall distribution of mite states is significantly different across the physiological stages.

# ANOVA on Arcsin-Transformed Proportions of Live Mites
cat("\n**2. ANOVA on the Proportion of Live Mites:**\n")

## 
## **2. ANOVA on the Proportion of Live Mites:**

if (nrow(df_proportions_physio) > 0) {
    live_proportions_physio <- df_proportions_physio %>%
      filter(State == "Live") %>%
      mutate(Transformed_Live = asin(sqrt(Proportion)))
    
    anova_result <- aov(Transformed_Live ~ Physiological_stage, data = live_proportions_physio)
    print(summary(anova_result))

    # Perform Tukey's HSD post-hoc test if ANOVA is significant
    if (summary(anova_result)[[1]][["Pr(>F)"]][1] < 0.05) {
        cat("\n**Tukey's HSD Post-Hoc Test:**\n")
        tukey_result <- TukeyHSD(anova_result)
        print(tukey_result)
        cat(paste0("**Interpretation:** This table shows pairwise comparisons between the groups. A 'p adj' value less than 0.05 for a pair indicates a significant difference in the mean proportion of live mites between those two stages.\n"))
    } else {
        cat(paste0("**Interpretation:** The overall ANOVA p-value is not significant, so we do not proceed with post-hoc tests. There is no significant difference in the mean proportion of live mites among the physiological stages.\n"))
    }
} else {
    cat("Not enough data to perform ANOVA.\n")
}

##                     Df  Sum Sq Mean Sq F value Pr(>F)
## Physiological_stage  1 0.07711 0.07711   1.519  0.264
## Residuals            6 0.30466 0.05078               
## **Interpretation:** The overall ANOVA p-value is not significant, so we do not proceed with post-hoc tests. There is no significant difference in the mean proportion of live mites among the physiological stages.

Is there a dose dependent effect of Amitraz on varroa mite viability?

To answer this question, we’ll use only phoretic mites from fresh vials. The analysis was done on Abbott -corrected values.

we tested mite’s state after three hours exposure to different doses of amitraz, in glass vials.
Experiment was conducted in threee yards, in a few hives in each yard

# --- Data Filtering and Preparation ---
df_dose <- df %>%
  filter(Date %in% experiments_to_keep) %>%
  filter(Physiological_stage == "Phoretic") %>%
  # Include Acetone (dose 0) for Abbott correction; we will exclude dose 0 in modeling later
  filter(Tested_compound %in% c("Acetone", "Amitraz")) %>%
  filter(Vial_Freshness == "Fresh") %>%
  mutate(
    Dose_numeric = ifelse(Tested_compound == "Acetone", 0, as.numeric(Dose)))

# --- Abbott correction using same-experiment controls (Dose = 0) ---
# For each Date and State, compute the mean control proportion from dose 0 vials
df_control_means <- df_dose %>%
  filter(Total > 0) %>%
  filter(Dose_numeric == 0) %>%
  mutate(
    Abnormal_behaviour = (ifelse(is.na(Paralyzed), 0, Paralyzed) + ifelse(is.na(Dancing), 0, Dancing)) / Total,
    Live = Viable / Total,
    Dead = Dead / Total
  ) %>%
  select(Date, Yard, Live, Abnormal_behaviour, Dead) %>%
  pivot_longer(cols = c(Live, Abnormal_behaviour, Dead), names_to = "State", values_to = "Control_Prop") %>%
  group_by(Date, Yard, State) %>%
  summarise(Control_Proportion = mean(Control_Prop, na.rm = TRUE), .groups = 'drop')

# Build base proportions table once (do not overwrite later)
df_proportions_dose_base <- df_dose %>%
  filter(Total > 0) %>%
  mutate(
    Abnormal_behaviour = (ifelse(is.na(Paralyzed), 0, Paralyzed) + ifelse(is.na(Dancing), 0, Dancing)) / Total,
    Live = Viable / Total,
    Dead = Dead / Total
  ) %>%
  select(Date, Yard, Hive, Vial, Dose_numeric, Live, Abnormal_behaviour, Dead) %>%
  pivot_longer(
      cols = c(Live, Abnormal_behaviour, Dead),
      names_to = "State",
      values_to = "Proportion")

# Add control means and Abbott-corrected proportions to a new table
df_proportions_dose <- df_proportions_dose_base %>%
  left_join(df_control_means, by = c("Date", "Yard", "State")) %>%
  mutate(
    Abbott_corrected = case_when(
      # Robust Abbott for Live: correct mortality then convert back to viability
      State == "Live" & !is.na(Control_Proportion) ~ {
        mort_test <- pmin(pmax(1 - Proportion, 0), 1)
        mort_ctrl <- pmin(pmax(1 - Control_Proportion, 0), 1)
        ifelse((1 - mort_ctrl) > 0,
               pmin(pmax(1 - ((mort_test - mort_ctrl) / (1 - mort_ctrl)), 0), 1),
               NA_real_)
      },
      # Default Abbott for other states (e.g., Dead, Abnormal)
      !is.na(Control_Proportion) & (1 - Control_Proportion) > 0 ~
        pmin(pmax((Proportion - Control_Proportion) / (1 - Control_Proportion), 0), 1),
      TRUE ~ NA_real_
    )
  )

# Prepare a reusable table for Live-state Abbott-corrected viability with per-vial totals
df_viability_corrected <- df_proportions_dose %>%
  filter(State == "Live") %>%
  select(Date, Yard, Hive, Vial, Dose_numeric, Abbott_corrected) %>%
  left_join(
    df_dose %>%
      group_by(Date, Yard, Hive, Vial) %>%
      summarise(Total = sum(Total, na.rm = TRUE), .groups = 'drop'),
    by = c("Date", "Yard", "Hive", "Vial")
  ) %>%
  mutate(Corrected_Viability = pmin(pmax(Abbott_corrected, 0), 1)) %>%
  filter(!is.na(Corrected_Viability))



## (Removed global logistic regression section to focus on yard/hive-specific analyses)

# Build per-vial table of corrected viability for the Live state and attach weights (Total mites)
live_corrected <- df_proportions_dose %>%
  filter(State == "Live") %>%
  select(Date, Yard, Hive, Vial, Dose_numeric, Proportion, Control_Proportion, Abbott_corrected) %>%
  left_join(
    df_dose %>%
      group_by(Date, Yard, Hive, Vial) %>%
      summarise(Total = sum(Total, na.rm = TRUE), .groups = 'drop'),
    by = c("Date", "Yard", "Hive", "Vial")
  ) %>%
  mutate(
    Corrected_Viability = pmin(pmax(Abbott_corrected, 0), 1)
  ) %>%
  filter(!is.na(Corrected_Viability))

# Fit GLM on corrected viability with binomial family and weights = Total, excluding dose 0
if (nrow(live_corrected %>% filter(Dose_numeric > 0)) > 2) {
  dose_response_model <- glm(
    Corrected_Viability ~ log10(Dose_numeric),
    data = live_corrected %>% filter(Dose_numeric > 0),
    weights = Total,
    family = binomial(link = "logit")
  )

  # suppress printing of the full model summary in the HTML output

  # Helper to compute LDx given target viability proportion v (0<v<1)
  compute_LD_from_viability <- function(v, coef_vec) {
    a <- coef_vec[1]
    b <- coef_vec[2]
    if (is.na(a) || is.na(b) || b == 0) return(NA_real_)
    log10_d <- (qlogis(v) - a) / b
    10^log10_d
  }

  coefs <- coef(dose_response_model)
  # LDx where viability equals 5%, 10%, 50%
  ld5  <- compute_LD_from_viability(0.05, coefs)
  ld10 <- compute_LD_from_viability(0.10, coefs)
  ld50 <- compute_LD_from_viability(0.50, coefs)

  # suppress printing raw LDs here (reported later per yard/hive)

  # Prepare per‑yard prediction lines for plotting
  pred_lines_all <- live_corrected %>%
    filter(Dose_numeric > 0) %>%  # Exclude dose 0 for GLM fitting
    group_by(Yard) %>%
    group_modify(~{
      df_h <- .x
      if (nrow(df_h) < 3 || length(unique(df_h$Dose_numeric)) < 2) return(tibble(Dose_numeric = numeric(0), Predicted_Viability = numeric(0), Yard = df_h$Yard[1]))
      m <- tryCatch(glm(Corrected_Viability ~ log10(Dose_numeric), data = df_h, weights = Total, family = binomial(link = "logit")), error=function(e) NULL)
      if (is.null(m)) return(tibble(Dose_numeric = numeric(0), Predicted_Viability = numeric(0), Yard = df_h$Yard[1]))
      dmin <- min(df_h$Dose_numeric); dmax <- max(df_h$Dose_numeric)
      dseq <- 10^seq(log10(dmin), log10(dmax), length.out = 200)
      tibble(Dose_numeric = dseq, Predicted_Viability = predict(m, newdata = data.frame(Dose_numeric = dseq), type = "response"), Yard = df_h$Yard[1])
    })

  # Calculate per-yard LDs
  yard_ld_calculations <- live_corrected %>%
    filter(Dose_numeric > 0) %>%
    group_by(Yard) %>%
    group_modify(~{
      df_h <- .x
      if (nrow(df_h) < 3 || length(unique(df_h$Dose_numeric)) < 2) {
        return(tibble(LD5 = NA_real_, LD10 = NA_real_, LD50 = NA_real_, n_points = nrow(df_h), dose_range = paste(range(df_h$Dose_numeric), collapse = " to ")))
      }
      
      m <- tryCatch(
        glm(Corrected_Viability ~ log10(Dose_numeric), data = df_h, weights = Total, family = binomial(link = "logit")),
        error = function(e) NULL
      )
      
      if (is.null(m)) {
        return(tibble(LD5 = NA_real_, LD10 = NA_real_, LD50 = NA_real_, n_points = nrow(df_h), dose_range = paste(range(df_h$Dose_numeric), collapse = " to ")))
      }
      
      coefs <- coef(m)
      a <- coefs[1]; b <- coefs[2]
      
      if (is.na(a) || is.na(b) || b == 0) {
        return(tibble(LD5 = NA_real_, LD10 = NA_real_, LD50 = NA_real_, n_points = nrow(df_h), dose_range = paste(range(df_h$Dose_numeric), collapse = " to ")))
      }
      
      ld5 <- 10^((qlogis(0.05) - a) / b)
      ld10 <- 10^((qlogis(0.10) - a) / b)
      ld50 <- 10^((qlogis(0.50) - a) / b)
      
      tibble(LD5 = ld5, LD10 = ld10, LD50 = ld50, n_points = nrow(df_h), dose_range = paste(range(df_h$Dose_numeric), collapse = " to "))
    }) %>%
    ungroup()

  # Prepare mean ± SE data for all yards
  all_yards_summary <- live_corrected %>%
    filter(Dose_numeric > 0) %>%
    group_by(Yard, Dose_numeric) %>%
    summarise(
      n_vials = n(),
      mean_viability = mean(Corrected_Viability, na.rm = TRUE),
      se_viability = sd(Corrected_Viability, na.rm = TRUE) / sqrt(n()),
      .groups = 'drop'
    ) %>%
    arrange(Yard, Dose_numeric)

  p_fit <- ggplot() +
    geom_point(
      data = all_yards_summary,
      aes(x = Dose_numeric, y = mean_viability, color = Yard),
      size = 3, shape = 16
    ) +
    geom_errorbar(
      data = all_yards_summary,
      aes(x = Dose_numeric, ymin = pmax(0, mean_viability - se_viability),
          ymax = pmin(1, mean_viability + se_viability), color = Yard),
      width = 0.1, linewidth = 0.8
    ) +
    geom_line(
      data = pred_lines_all,
      aes(x = Dose_numeric, y = Predicted_Viability, color = Yard),
      linewidth = 1.2
    ) +
    geom_hline(yintercept = c(0.05, 0.10, 0.50), linetype = "dashed", color = "grey50") +
    scale_x_log10(breaks = c(0.01, 0.1, 1, 10, 100, 1000), labels = c("0.01", "0.1", "1", "10", "100", "1000")) +
    scale_y_continuous(limits = c(0, 1), labels = scales::percent_format(accuracy = 1)) +
    labs(
      title = "Amitraz Dose–Response on Abbott-corrected Viability (Live)",
      x = "Dose (µg/vial, log scale)",
      y = "Abbott-corrected Viability",
      color = "Yard"
    ) +
    theme_minimal() +
    theme(
      plot.title = element_text(size = 14, face = "bold"),
      axis.title = element_text(size = 12, face = "bold"),
      axis.text = element_text(size = 10)
    )

  print(p_fit)
  
  cat("\n**Figure 1.** Dose-response curves for Abbott-corrected viability of *Varroa destructor* mites exposed to Amitraz for 3 hours. Data points represent mean ± standard error of viability at each dose tested. Solid lines show fitted logistic regression models for each apiary. Horizontal dashed lines indicate reference viability levels (5%, 10%, 50%). Zur_Suheita (blue) shows highest sensitivity with rapid viability decline, Raabane_Ein_Kinya (red) exhibits intermediate sensitivity, and Shamir (green) demonstrates lowest sensitivity (highest resistance) to Amitraz treatment.\n")
  
  # Create a nicely formatted LD summary table
  cat("\n**Viable Dose Summary by Yard:**\n")
  cat("VD values represent doses (µg/vial) where VIABILITY equals 5%, 10%, and 50% (VD5=dose at 5% viability, etc.)\n\n")
  
  ld_summary_table <- yard_ld_calculations %>%
    mutate(
      across(c(LD5, LD10, LD50), ~round(., 2)),
      dose_range = paste0("(", dose_range, ")")
    ) %>%
    select(Yard, LD5, LD10, LD50, n_points, dose_range) %>%
    rename(VD5 = LD5, VD10 = LD10, VD50 = LD50, n = n_points, `Dose range` = dose_range)
  
  # Render as styled HTML table
  if (knitr::is_html_output()) {
    html_tbl <- knitr::kable(
      ld_summary_table,
      format = "html",
      col.names = c("Yard", "VD5", "VD10", "VD50", "n", "Dose range"),
      align = c('l','r','r','r','r','l')
    ) %>%
      kableExtra::kable_styling(
        full_width = FALSE,
        bootstrap_options = c("striped", "hover", "condensed")
      ) %>%
      kableExtra::row_spec(0, bold = TRUE)
    knitr::asis_output(html_tbl)
  } else {
    print(ld_summary_table)
  }
} else {
  cat("\nNot enough corrected data points to fit the logistic regression model.\n")
}

## 
## **Figure 1.** Dose-response curves for Abbott-corrected viability of *Varroa destructor* mites exposed to Amitraz for 3 hours. Data points represent mean ± standard error of viability at each dose tested. Solid lines show fitted logistic regression models for each apiary. Horizontal dashed lines indicate reference viability levels (5%, 10%, 50%). Zur_Suheita (blue) shows highest sensitivity with rapid viability decline, Raabane_Ein_Kinya (red) exhibits intermediate sensitivity, and Shamir (green) demonstrates lowest sensitivity (highest resistance) to Amitraz treatment.
## 
## **Viable Dose Summary by Yard:**
## VD values represent doses (µg/vial) where VIABILITY equals 5%, 10%, and 50% (VD5=dose at 5% viability, etc.)

Yard	VD5	VD10	VD50	n	Dose range
Dan_Zaatar	NA	NA	NA	3	(10 to 10)
Raabane_Ein_Kinya	8.92	1.40	0.01	28	(0.01 to 5)
Shamir	55147.21	6254.96	10.39	60	(0.01 to 2000)
Zur_Suheita	40.16	4.85	0.01	50	(0.01 to 10)

Shamir yard: dose–response and LDs per hive

# Focus on Shamir yard; reuse Abbott-corrected viability prepared earlier

# Build per-vial corrected viability for Live state in Shamir and attach per-vial totals
live_corrected_shamir <- df_viability_corrected %>%
  filter(Yard == "Shamir", Dose_numeric > 0, Total > 0, Hive != "3_4")  # Exclude mixed-hive experiments

# Fit GLM per hive
fit_per_hive <- live_corrected_shamir %>%
  group_by(Hive) %>%
  group_modify(~{
    df_h <- .x
    if (nrow(df_h) < 3 || length(unique(df_h$Dose_numeric)) < 2) return(tibble(model = list(NULL)))
    model <- tryCatch(
      glm(Corrected_Viability ~ log10(Dose_numeric), data = df_h, weights = Total, family = binomial(link = "logit")),
      error = function(e) NULL
    )
    tibble(model = list(model))
  })

# Compute LDs per hive
compute_LD_v <- function(model, v) {
  if (is.null(model)) return(NA_real_)
  coefs <- coef(model)
  a <- coefs[1]; b <- coefs[2]
  if (is.na(a) || is.na(b) || b == 0) return(NA_real_)
  10^((qlogis(v) - a) / b)
}

# Compute Viable Doses (with reasonable extrapolation)
compute_VD_extrapolated <- function(model, v, dmin, dmax) {
  if (is.null(model) || is.na(dmin) || is.na(dmax) || dmin <= 0 || dmax <= 0 || dmin >= dmax) return(NA_real_)
  
  # First try within observed range
  f <- function(logd) {
    predict(model, newdata = data.frame(Dose_numeric = 10^logd), type = "response") - v
  }
  
  # Check if target is achievable within observed range
  f_lo <- tryCatch(f(log10(dmin)), error = function(e) NA_real_)
  f_hi <- tryCatch(f(log10(dmax)), error = function(e) NA_real_)
  
  if (any(is.na(c(f_lo, f_hi)))) return(NA_real_)
  
  # If target is within range, use uniroot
  if (sign(f_lo) != sign(f_hi)) {
    root <- tryCatch(uniroot(f, interval = c(log10(dmin), log10(dmax)))$root, error = function(e) NA_real_)
    if (!is.na(root)) return(10^root)
  }
  
  # If not achievable within range, try reasonable extrapolation
  # Extend range by 2 log units in both directions for more aggressive extrapolation
  extended_min <- max(0.0001, dmin / 100)  # Don't go below 0.0001
  extended_max <- min(100000, dmax * 100)  # Don't go above 100000
  
  # Check if target is achievable in extended range
  f_ext_lo <- tryCatch(f(log10(extended_min)), error = function(e) NA_real_)
  f_ext_hi <- tryCatch(f(log10(extended_max)), error = function(e) NA_real_)
  
  if (any(is.na(c(f_ext_lo, f_ext_hi)))) return(NA_real_)
  
  if (sign(f_ext_lo) != sign(f_ext_hi)) {
    root <- tryCatch(uniroot(f, interval = c(log10(extended_min), log10(extended_max)))$root, error = function(e) NA_real_)
    if (!is.na(root)) return(10^root)
  }
  
  # If still not achievable, try even more aggressive extrapolation
  # This handles cases where the model asymptote is above the target viability
  very_extended_min <- max(0.00001, dmin / 1000)  # Very low doses
  very_extended_max <- min(1000000, dmax * 1000)  # Very high doses
  
  f_very_lo <- tryCatch(f(log10(very_extended_min)), error = function(e) NA_real_)
  f_very_hi <- tryCatch(f(log10(very_extended_max)), error = function(e) NA_real_)
  
  if (any(is.na(c(f_very_lo, f_very_hi)))) return(NA_real_)
  
  if (sign(f_very_lo) != sign(f_very_hi)) {
    root <- tryCatch(uniroot(f, interval = c(log10(very_extended_min), log10(very_extended_max)))$root, error = function(e) NA_real_)
    if (!is.na(root)) return(10^root)
  }
  
  # If still not achievable, try the simple formula as last resort
  # This handles cases where the model is well-behaved but extrapolation fails
  coefs <- coef(model)
  if (length(coefs) >= 2 && !is.na(coefs[2]) && coefs[2] != 0) {
    simple_result <- 10^((qlogis(v) - coefs[1]) / coefs[2])
    # Only use if result is reasonable (between 0.00001 and 1000000)
    if (!is.na(simple_result) && simple_result > 0.00001 && simple_result < 1000000) {
      return(simple_result)
    }
  }
  
  # If all methods fail, return NA
  return(NA_real_)
}

# Join dose ranges per hive
shamir_ranges <- live_corrected_shamir %>% group_by(Hive) %>% summarise(dmin = min(Dose_numeric), dmax = max(Dose_numeric), .groups = 'drop')

# Debug: Check Hive 3 data specifically
cat("\n**Debug: Hive 3 Shamir data:**\n")

## 
## **Debug: Hive 3 Shamir data:**

hive3_data <- live_corrected_shamir %>% filter(Hive == "3")
hive3_model <- fit_per_hive %>% filter(Hive == "3") %>% pull(model) %>% .[[1]]
hive3_range <- shamir_ranges %>% filter(Hive == "3")

cat("Dose range:", hive3_range$dmin, "to", hive3_range$dmax, "\n")

## Dose range: 0.01 to 2000

cat("Number of data points:", nrow(hive3_data), "\n")

## Number of data points: 22

cat("Model exists:", !is.null(hive3_model), "\n")

## Model exists: TRUE

if (!is.null(hive3_model)) {
  # Check viability range
  cat("Viability range:", round(min(hive3_data$Corrected_Viability), 3), "to", round(max(hive3_data$Corrected_Viability), 3), "\n")
  
  # Test predictions at dose range
  pred_min <- predict(hive3_model, newdata = data.frame(Dose_numeric = hive3_range$dmin), type = "response")
  pred_max <- predict(hive3_model, newdata = data.frame(Dose_numeric = hive3_range$dmax), type = "response")
  cat("Predicted viability at min dose:", round(pred_min, 3), "\n")
  cat("Predicted viability at max dose:", round(pred_max, 3), "\n")
  
  # Test VD calculations manually
  vd5_manual <- compute_VD_extrapolated(hive3_model, 0.05, hive3_range$dmin, hive3_range$dmax)
  vd10_manual <- compute_VD_extrapolated(hive3_model, 0.10, hive3_range$dmin, hive3_range$dmax)
  cat("Manual VD5:", vd5_manual, "\n")
  cat("Manual VD10:", vd10_manual, "\n")
}

## Viability range: 0 to 1 
## Predicted viability at min dose: 0.864 
## Predicted viability at max dose: 0.151 
## Manual VD5: 129753 
## Manual VD10: 10078.94

ld_table <- fit_per_hive %>%
  left_join(shamir_ranges, by = "Hive") %>%
  mutate(
    VD5  = pmap_dbl(list(model, dmin, dmax), ~compute_VD_extrapolated(..1, 0.05, ..2, ..3)),
    VD10 = pmap_dbl(list(model, dmin, dmax), ~compute_VD_extrapolated(..1, 0.10, ..2, ..3)),
    VD50 = pmap_dbl(list(model, dmin, dmax), ~compute_VD_extrapolated(..1, 0.50, ..2, ..3))
  ) %>%
  select(Hive, VD5, VD10, VD50)

cat("\n**VD estimates (viability = 5%, 10%, 50%) by hive (Shamir):**\n")

## 
## **VD estimates (viability = 5%, 10%, 50%) by hive (Shamir):**

ld_table_html <- ld_table
if (knitr::is_html_output()) {
  html_tbl <- knitr::kable(ld_table_html, format = "html", align = c('l','r','r','r'),
                           col.names = c("Hive","VD5","VD10","VD50")) %>%
    kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped","hover","condensed")) %>%
    kableExtra::row_spec(0, bold = TRUE)
  knitr::asis_output(html_tbl)
} else {
  print(ld_table_html)
}

Hive	VD5	VD10	VD50
1	991532.9093	125748.9670	289.993684
3	129753.0356	10078.9433	5.499867
4	525.1695	288.8162	49.768309
6	2556.1228	625.5240	9.965903

# Summary statistics per hive (compute then render)
shamir_summary <- live_corrected_shamir %>%
  group_by(Hive) %>%
  summarise(
    n_vials = n(),
    total_mites = sum(Total, na.rm = TRUE),
    mean_mites_per_vial = round(mean(Total, na.rm = TRUE), 1),
    dose_range = paste0("(", round(min(Dose_numeric), 3), " - ", round(max(Dose_numeric), 1), ")"),
    .groups = 'drop'
  )
cat("\n**Shamir yard summary statistics per hive:**\n")

## 
## **Shamir yard summary statistics per hive:**

if (knitr::is_html_output()) {
  html_tbl <- knitr::kable(shamir_summary, format = "html", align = c('l','r','r','r','l')) %>%
    kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped","hover","condensed")) %>%
    kableExtra::row_spec(0, bold = TRUE)
  knitr::asis_output(html_tbl)
} else {
  print(shamir_summary)
}

Hive	n_vials	total_mites	mean_mites_per_vial	dose_range
1	10	100	10	(1 - 2000)
3	22	220	10	(0.01 - 2000)
4	6	60	10	(1 - 2000)
6	18	180	10	(1 - 2000)

# Dose-level summary for Shamir yard (compute then render)
shamir_dose_summary <- live_corrected_shamir %>%
  group_by(Dose_numeric) %>%
  summarise(
    n_vials = n(),
    total_mites = sum(Total, na.rm = TRUE),
    mean_viability = round(mean(Corrected_Viability, na.rm = TRUE) * 100, 1),
    se_viability = round((sd(Corrected_Viability, na.rm = TRUE) / sqrt(n())) * 100, 1),
    .groups = 'drop'
  ) %>%
  arrange(Dose_numeric)
cat("\n**Shamir yard dose-level summary:**\n")

## 
## **Shamir yard dose-level summary:**

if (knitr::is_html_output()) {
  html_tbl <- knitr::kable(shamir_dose_summary, format = "html", align = c('r','r','r','r')) %>%
    kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped","hover","condensed")) %>%
    kableExtra::row_spec(0, bold = TRUE)
  knitr::asis_output(html_tbl)
} else {
  print(shamir_dose_summary)
}

Dose_numeric	n_vials	total_mites	mean_viability	se_viability
1e-02	1	10	40.0	NA
1e-01	2	20	90.0	10.0
1e+00	10	101	78.4	8.6
1e+01	14	139	55.2	8.9
5e+01	7	70	45.1	9.8
1e+02	8	80	28.3	7.1
1e+03	7	70	11.2	8.5
2e+03	7	70	14.3	14.3

# Prepare LD lines (long format) for plotting
ld_lines_shamir <- ld_table %>%
  tidyr::pivot_longer(cols = c(VD5, VD10, VD50), names_to = "LD", values_to = "dose") %>%
  filter(!is.na(dose))

# Create prediction grid per hive for plotting
pred_lines <- live_corrected_shamir %>%
  group_by(Hive) %>%
  summarise(dose_min = min(Dose_numeric), dose_max = max(Dose_numeric), .groups = 'drop') %>%
  inner_join(fit_per_hive, by = "Hive") %>%
  mutate(grid = pmap(list(dose_min, dose_max, model), function(dmin, dmax, m) {
    if (is.null(m)) return(tibble(Dose_numeric = numeric(0), Pred = numeric(0)))
    dseq <- 10^seq(log10(dmin), log10(dmax), length.out = 200)
    tibble(Dose_numeric = dseq, Pred = predict(m, newdata = data.frame(Dose_numeric = dseq), type = "response"))
  })) %>%
  select(Hive, grid) %>%
  unnest(grid)

# Prepare mean ± SE data for Shamir hives
shamir_hives_summary <- live_corrected_shamir %>%
  group_by(Hive, Dose_numeric) %>%
  summarise(
    n_vials = n(),
    mean_viability = mean(Corrected_Viability, na.rm = TRUE),
    se_viability = sd(Corrected_Viability, na.rm = TRUE) / sqrt(n()),
    .groups = 'drop'
  ) %>%
  arrange(Hive, Dose_numeric)

# Plot per-hive regression lines and observations
p_shamir <- ggplot() +
  geom_point(
    data = shamir_hives_summary,
    aes(x = Dose_numeric, y = mean_viability, color = Hive),
    size = 3, shape = 16
  ) +
  geom_errorbar(
    data = shamir_hives_summary,
    aes(x = Dose_numeric, ymin = pmax(0, mean_viability - se_viability),
        ymax = pmin(1, mean_viability + se_viability), color = Hive),
    width = 0.1, linewidth = 0.8
  ) +
  geom_line(
    data = pred_lines,
    aes(x = Dose_numeric, y = Pred, color = Hive),
    linewidth = 1.2
  ) +
  geom_hline(yintercept = c(0.05, 0.10, 0.50), linetype = "dashed", color = "grey50") +
  scale_x_log10(breaks = c(0.01, 0.1, 1, 10, 100, 1000), labels = c("0.01", "0.1", "1", "10", "100", "1000")) +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent_format(accuracy = 1)) +
  labs(
    title = "Shamir yard: Abbott-corrected viability by hive",
    x = "Dose (µg/vial, log scale)",
    y = "Abbott-corrected Viability",
    color = "Hive"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(size = 14, face = "bold"))

print(p_shamir)

# Publication-style dose-response plot for all hives in Shamir yard with separate regression lines
cat("\n**Publication-style dose-response plot for all hives in Shamir yard:**\n")

## 
## **Publication-style dose-response plot for all hives in Shamir yard:**

# Prepare mean ± SE data for all hives in Shamir
shamir_hives_summary <- live_corrected_shamir %>%
  group_by(Hive, Dose_numeric) %>%
  summarise(
    n_vials = n(),
    mean_viability = mean(Corrected_Viability, na.rm = TRUE),
    se_viability = sd(Corrected_Viability, na.rm = TRUE) / sqrt(n()),
    .groups = 'drop'
  ) %>%
  arrange(Hive, Dose_numeric)

# Create publication-style plot for all hives in Shamir
p_shamir_hives_pub <- ggplot(shamir_hives_summary, aes(x = log10(Dose_numeric), y = mean_viability, color = Hive)) +
  geom_point(size = 3, shape = 16) +
  geom_errorbar(
    aes(ymin = pmax(0, mean_viability - se_viability), 
        ymax = pmin(1, mean_viability + se_viability)),
    width = 0.1,
    linewidth = 0.8
  ) +
  geom_line(
    data = pred_lines %>% 
      mutate(log_dose = log10(Dose_numeric)),
    aes(x = log_dose, y = Pred, color = Hive),
    linewidth = 1.2
  ) +
  scale_x_continuous(
    breaks = seq(-1, 3, 1),
    labels = c("0.1", "1", "10", "100", "1000"),
    limits = c(-1, 3)
  ) +
  scale_y_continuous(
    limits = c(0, 1),
    labels = c("0", "0.25", "0.5", "0.75", "1.0")
  ) +
  labs(
    title = "Shamir - All Hives Dose-Response",
    x = "log Concentration (µg/vial)",
    y = "Viability (proportion)",
    color = "Hive"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
    legend.position = "right"
  )

print(p_shamir_hives_pub)

# Individual hive plots for Shamir arranged in one row
cat("\n**Individual hive plots for Shamir yard:**\n")

## 
## **Individual hive plots for Shamir yard:**

# Get unique hives in Shamir
shamir_hives <- unique(live_corrected_shamir$Hive)

# Create individual plots for each hive
shamir_individual_plots <- list()
for (hive_id in shamir_hives) {
  hive_data <- live_corrected_shamir %>% filter(Hive == hive_id)
  hive_model <- fit_per_hive %>% filter(Hive == hive_id) %>% pull(model) %>% .[[1]]
  
  if (!is.null(hive_model)) {
    # Create prediction line for this hive
    dmin <- min(hive_data$Dose_numeric)
    dmax <- max(hive_data$Dose_numeric)
    dseq <- 10^seq(log10(dmin), log10(dmax), length.out = 200)
    pred_hive <- data.frame(
      Dose_numeric = dseq,
      Predicted_Viability = predict(hive_model, newdata = data.frame(Dose_numeric = dseq), type = "response")
    )
    
    # Create individual hive plot
    p_hive <- ggplot() +
      geom_point(
        data = hive_data,
        aes(x = Dose_numeric, y = Corrected_Viability),
        color = "blue", alpha = 0.7, shape = 16, size = 2
      ) +
      geom_line(
        data = pred_hive,
        aes(x = Dose_numeric, y = Predicted_Viability),
        color = "blue", linewidth = 1.0
      ) +
      geom_hline(yintercept = c(0.05, 0.10, 0.50), linetype = "dashed", color = "grey50") +
      scale_x_log10() +
      scale_y_continuous(limits = c(0, 1), labels = scales::percent_format(accuracy = 1)) +
      labs(
        title = paste0("Hive ", hive_id),
        x = "Dose (µg/vial)",
        y = "Viability"
      ) +
      theme_minimal() +
      theme(
        plot.title = element_text(size = 12, face = "bold"),
        axis.title = element_text(size = 10),
        axis.text = element_text(size = 8),
        plot.margin = margin(5, 5, 5, 5)
      )
    
    shamir_individual_plots[[hive_id]] <- p_hive
  }
}

# Arrange individual plots in one row
if (length(shamir_individual_plots) > 0) {
  combined_shamir_plots <- do.call(gridExtra::grid.arrange, c(shamir_individual_plots, ncol = length(shamir_individual_plots)))
  print(combined_shamir_plots)
}

## TableGrob (1 x 4) "arrange": 4 grobs
##   z     cells    name           grob
## 3 1 (1-1,1-1) arrange gtable[layout]
## 4 2 (1-1,2-2) arrange gtable[layout]
## 6 3 (1-1,3-3) arrange gtable[layout]
## 1 4 (1-1,4-4) arrange gtable[layout]

# Publication-style dose-response plot for hive 4 in Shamir yard by date with separate regression lines
cat("\n**Publication-style dose-response plot for Shamir - Hive 4 by Date:**\n")

## 
## **Publication-style dose-response plot for Shamir - Hive 4 by Date:**

# Prepare mean ± SE data for hive 4 by date
hive4_by_date_summary <- live_corrected_shamir %>%
  filter(Hive == "4") %>%
  group_by(Date, Dose_numeric) %>%
  summarise(
    n_vials = n(),
    mean_viability = mean(Corrected_Viability, na.rm = TRUE),
    se_viability = sd(Corrected_Viability, na.rm = TRUE) / sqrt(n()),
    .groups = 'drop'
  ) %>%
  arrange(Date, Dose_numeric)

# Fit GLM per date for hive 4
fit_per_date_hive4 <- live_corrected_shamir %>%
  filter(Hive == "4") %>%
  group_by(Date) %>%
  group_modify(~{
    df_d <- .x
    if (nrow(df_d) < 3 || length(unique(df_d$Dose_numeric)) < 2) return(tibble(model = list(NULL)))
    model <- tryCatch(
      glm(Corrected_Viability ~ log10(Dose_numeric), data = df_d, weights = Total, family = binomial(link = "logit")),
      error = function(e) NULL
    )
    tibble(model = list(model))
  })

# Create prediction lines for each date
pred_lines_hive4_by_date <- live_corrected_shamir %>%
  filter(Hive == "4") %>%
  group_by(Date) %>%
  summarise(dose_min = min(Dose_numeric), dose_max = max(Dose_numeric), .groups = 'drop') %>%
  inner_join(fit_per_date_hive4, by = "Date") %>%
  mutate(grid = pmap(list(dose_min, dose_max, model), function(dmin, dmax, m) {
    if (is.null(m)) return(tibble(Dose_numeric = numeric(0), Pred = numeric(0)))
    dseq <- 10^seq(log10(dmin), log10(dmax), length.out = 200)
    tibble(Dose_numeric = dseq, Pred = predict(m, newdata = data.frame(Dose_numeric = dseq), type = "response"))
  })) %>%
  select(Date, grid) %>%
  unnest(grid)

# Create publication-style plot for hive 4 by date
p_hive4_by_date_pub <- ggplot(hive4_by_date_summary, aes(x = log10(Dose_numeric), y = mean_viability, color = Date)) +
  geom_point(size = 3, shape = 16) +
  geom_errorbar(
    aes(ymin = pmax(0, mean_viability - se_viability), 
        ymax = pmin(1, mean_viability + se_viability)),
    width = 0.1,
    linewidth = 0.8
  ) +
  geom_line(
    data = pred_lines_hive4_by_date %>% 
      mutate(log_dose = log10(Dose_numeric)),
    aes(x = log_dose, y = Pred, color = Date),
    linewidth = 1.2
  ) +
  scale_x_continuous(
    breaks = seq(-1, 3, 1),
    labels = c("0.1", "1", "10", "100", "1000"),
    limits = c(-1, 3)
  ) +
  scale_y_continuous(
    limits = c(0, 1),
    labels = c("0", "0.25", "0.5", "0.75", "1.0")
  ) +
  labs(
    title = "Shamir - Hive 4 Dose-Response by Date",
    x = "log Concentration (µg/vial)",
    y = "Viability (proportion)",
    color = "Date"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
    legend.position = "right"
  )

print(p_hive4_by_date_pub)

# Publication-style dose-response plot for hive 6 in Shamir yard by date with separate regression lines
cat("\n**Publication-style dose-response plot for Shamir - Hive 6 by Date:**\n")

## 
## **Publication-style dose-response plot for Shamir - Hive 6 by Date:**

# Prepare mean ± SE data for hive 6 by date
hive6_by_date_summary <- live_corrected_shamir %>%
  filter(Hive == "6") %>%
  group_by(Date, Dose_numeric) %>%
  summarise(
    n_vials = n(),
    mean_viability = mean(Corrected_Viability, na.rm = TRUE),
    se_viability = sd(Corrected_Viability, na.rm = TRUE) / sqrt(n()),
    .groups = 'drop'
  ) %>%
  arrange(Date, Dose_numeric)

# Fit GLM per date for hive 6
fit_per_date_hive6 <- live_corrected_shamir %>%
  filter(Hive == "6") %>%
  group_by(Date) %>%
  group_modify(~{
    df_d <- .x
    if (nrow(df_d) < 3 || length(unique(df_d$Dose_numeric)) < 2) return(tibble(model = list(NULL)))
    model <- tryCatch(
      glm(Corrected_Viability ~ log10(Dose_numeric), data = df_d, weights = Total, family = binomial(link = "logit")),
      error = function(e) NULL
    )
    tibble(model = list(model))
  })

# Create prediction lines for each date
pred_lines_hive6_by_date <- live_corrected_shamir %>%
  filter(Hive == "6") %>%
  group_by(Date) %>%
  summarise(dose_min = min(Dose_numeric), dose_max = max(Dose_numeric), .groups = 'drop') %>%
  inner_join(fit_per_date_hive6, by = "Date") %>%
  mutate(grid = pmap(list(dose_min, dose_max, model), function(dmin, dmax, m) {
    if (is.null(m)) return(tibble(Dose_numeric = numeric(0), Pred = numeric(0)))
    dseq <- 10^seq(log10(dmin), log10(dmax), length.out = 200)
    tibble(Dose_numeric = dseq, Pred = predict(m, newdata = data.frame(Dose_numeric = dseq), type = "response"))
  })) %>%
  select(Date, grid) %>%
  unnest(grid)

# Create publication-style plot for hive 6 by date
p_hive6_by_date_pub <- ggplot(hive6_by_date_summary, aes(x = log10(Dose_numeric), y = mean_viability, color = Date)) +
  geom_point(size = 3, shape = 16) +
  geom_errorbar(
    aes(ymin = pmax(0, mean_viability - se_viability), 
        ymax = pmin(1, mean_viability + se_viability)),
    width = 0.1,
    linewidth = 0.8
  ) +
  geom_line(
    data = pred_lines_hive6_by_date %>% 
      mutate(log_dose = log10(Dose_numeric)),
    aes(x = log_dose, y = Pred, color = Date),
    linewidth = 1.2
  ) +
  scale_x_continuous(
    breaks = seq(-1, 3, 1),
    labels = c("0.1", "1", "10", "100", "1000"),
    limits = c(-1, 3)
  ) +
  scale_y_continuous(
    limits = c(0, 1),
    labels = c("0", "0.25", "0.5", "0.75", "1.0")
  ) +
  labs(
    title = "Shamir - Hive 6 Dose-Response by Date",
    x = "log Concentration (µg/vial)",
    y = "Viability (proportion)",
    color = "Date"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
    legend.position = "right"
  )

print(p_hive6_by_date_pub)

Raabane_Ein_Kinya yard: dose–response and LDs per hive

# Focus on Raabane_Ein_Kinya yard; reuse Abbott-corrected viability prepared earlier

# Build per-vial corrected viability for Live state in Raabane_Ein_Kinya and attach per-vial totals
live_corrected_raabane <- df_viability_corrected %>%
  filter(Yard == "Raabane_Ein_Kinya", Dose_numeric > 0, Total > 0)

# Fit GLM per hive
fit_per_hive_raabane <- live_corrected_raabane %>%
  group_by(Hive) %>%
  group_modify(~{
    df_h <- .x
    if (nrow(df_h) < 3 || length(unique(df_h$Dose_numeric)) < 2) return(tibble(model = list(NULL)))
    model <- tryCatch(
      glm(Corrected_Viability ~ log10(Dose_numeric), data = df_h, weights = Total, family = binomial(link = "logit")),
      error = function(e) NULL
    )
    tibble(model = list(model))
  })

# Compute LDs per hive
ld_table_raabane <- fit_per_hive_raabane %>%
  mutate(
    LD5  = map_dbl(model, ~compute_LD_v(.x, 0.05)),
    LD10 = map_dbl(model, ~compute_LD_v(.x, 0.10)),
    LD50 = map_dbl(model, ~compute_LD_v(.x, 0.50))
  ) %>%
  select(Hive, LD5, LD10, LD50)

cat("\n**VD estimates (viability = 5%, 10%, 50%) by hive (Raabane_Ein_Kinya):**\n")

## 
## **VD estimates (viability = 5%, 10%, 50%) by hive (Raabane_Ein_Kinya):**

ld_table_raabane_html <- ld_table_raabane
if (knitr::is_html_output()) {
  html_tbl <- knitr::kable(ld_table_raabane_html, format = "html", align = c('l','r','r','r')) %>%
    kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped","hover","condensed")) %>%
    kableExtra::row_spec(0, bold = TRUE)
  knitr::asis_output(html_tbl)
} else {
  print(ld_table_raabane_html)
}

Hive	LD5	LD10	LD50
2	40.173334	2.8910395	0.0012599
3	3.454109	0.8518582	0.0138868

# Summary statistics per hive (compute then render)
raabane_summary <- live_corrected_raabane %>%
  group_by(Hive) %>%
  summarise(
    n_vials = n(),
    total_mites = sum(Total, na.rm = TRUE),
    mean_mites_per_vial = round(mean(Total, na.rm = TRUE), 1),
    dose_range = paste0("(", round(min(Dose_numeric), 3), " - ", round(max(Dose_numeric), 1), ")"),
    .groups = 'drop'
  )
cat("\n**Raabane_Ein_Kinya yard summary statistics per hive:**\n")

## 
## **Raabane_Ein_Kinya yard summary statistics per hive:**

if (knitr::is_html_output()) {
  html_tbl <- knitr::kable(raabane_summary, format = "html", align = c('l','r','r','r','l')) %>%
    kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped","hover","condensed")) %>%
    kableExtra::row_spec(0, bold = TRUE)
  knitr::asis_output(html_tbl)
} else {
  print(raabane_summary)
}

Hive	n_vials	total_mites	mean_mites_per_vial	dose_range
2	15	149	9.9	(0.01 - 5)
3	13	130	10.0	(0.01 - 5)

# Dose-level summary for Raabane_Ein_Kinya yard (compute then render)
raabane_dose_summary <- live_corrected_raabane %>%
  group_by(Dose_numeric) %>%
  summarise(
    n_vials = n(),
    total_mites = sum(Total, na.rm = TRUE),
    mean_viability = round(mean(Corrected_Viability, na.rm = TRUE) * 100, 1),
    se_viability = round((sd(Corrected_Viability, na.rm = TRUE) / sqrt(n())) * 100, 1),
    .groups = 'drop'
  ) %>%
  arrange(Dose_numeric)
cat("\n**Raabane_Ein_Kinya yard dose-level summary:**\n")

## 
## **Raabane_Ein_Kinya yard dose-level summary:**

if (knitr::is_html_output()) {
  html_tbl <- knitr::kable(raabane_dose_summary, format = "html", align = c('r','r','r','r')) %>%
    kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped","hover","condensed")) %>%
    kableExtra::row_spec(0, bold = TRUE)
  knitr::asis_output(html_tbl)
} else {
  print(raabane_dose_summary)
}

Dose_numeric	n_vials	total_mites	mean_viability	se_viability
0.01	6	60	48.3	7.9
0.10	6	60	18.3	5.4
0.50	2	20	10.0	0.0
1.00	4	40	20.0	9.1
2.50	4	40	2.5	2.5
5.00	6	59	8.3	4.0

# Create prediction grid per hive for plotting
pred_lines_raabane <- live_corrected_raabane %>%
  group_by(Hive) %>%
  summarise(dose_min = min(Dose_numeric), dose_max = max(Dose_numeric), .groups = 'drop') %>%
  inner_join(fit_per_hive_raabane, by = "Hive") %>%
  mutate(grid = pmap(list(dose_min, dose_max, model), function(dmin, dmax, m) {
    if (is.null(m)) return(tibble(Dose_numeric = numeric(0), Pred = numeric(0)))
    dseq <- 10^seq(log10(dmin), log10(dmax), length.out = 200)
    tibble(Dose_numeric = dseq, Pred = predict(m, newdata = data.frame(Dose_numeric = dseq), type = "response"))
  })) %>%
  select(Hive, grid) %>%
  unnest(grid)

# Prepare mean ± SE data for Raabane hives
raabane_hives_summary <- live_corrected_raabane %>%
  group_by(Hive, Dose_numeric) %>%
  summarise(
    n_vials = n(),
    mean_viability = mean(Corrected_Viability, na.rm = TRUE),
    se_viability = sd(Corrected_Viability, na.rm = TRUE) / sqrt(n()),
    .groups = 'drop'
  ) %>%
  arrange(Hive, Dose_numeric)

# Plot per-hive regression lines and observations
p_raabane <- ggplot() +
  geom_point(
    data = raabane_hives_summary,
    aes(x = Dose_numeric, y = mean_viability, color = Hive),
    size = 3, shape = 16
  ) +
  geom_errorbar(
    data = raabane_hives_summary,
    aes(x = Dose_numeric, ymin = pmax(0, mean_viability - se_viability),
        ymax = pmin(1, mean_viability + se_viability), color = Hive),
    width = 0.1, linewidth = 0.8
  ) +
  geom_line(
    data = pred_lines_raabane,
    aes(x = Dose_numeric, y = Pred, color = Hive),
    linewidth = 1.2
  ) +
  geom_hline(yintercept = c(0.05, 0.10, 0.50), linetype = "dashed", color = "grey50") +
  scale_x_log10(breaks = c(0.01, 0.1, 1, 10, 100, 1000), labels = c("0.01", "0.1", "1", "10", "100", "1000")) +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent_format(accuracy = 1)) +
  labs(
    title = "Raabane_Ein_Kinya yard: Abbott-corrected viability by hive",
    x = "Dose (µg/vial, log scale)",
    y = "Abbott-corrected Viability",
    color = "Hive"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(size = 14, face = "bold"))

print(p_raabane)

# Individual hive plots for Raabane arranged in one row
cat("\n**Individual hive plots for Raabane_Ein_Kinya yard:**\n")

## 
## **Individual hive plots for Raabane_Ein_Kinya yard:**

# Get unique hives in Raabane
raabane_hives <- unique(live_corrected_raabane$Hive)

# Create individual plots for each hive
raabane_individual_plots <- list()
for (hive_id in raabane_hives) {
  hive_data <- live_corrected_raabane %>% filter(Hive == hive_id)
  hive_model <- fit_per_hive_raabane %>% filter(Hive == hive_id) %>% pull(model) %>% .[[1]]
  
  if (!is.null(hive_model)) {
    # Create prediction line for this hive
    dmin <- min(hive_data$Dose_numeric)
    dmax <- max(hive_data$Dose_numeric)
    dseq <- 10^seq(log10(dmin), log10(dmax), length.out = 200)
    pred_hive <- data.frame(
      Dose_numeric = dseq,
      Predicted_Viability = predict(hive_model, newdata = data.frame(Dose_numeric = dseq), type = "response")
    )
    
    # Create individual hive plot
    p_hive <- ggplot() +
      geom_point(
        data = hive_data,
        aes(x = Dose_numeric, y = Corrected_Viability),
        color = "red", alpha = 0.7, shape = 16, size = 2
      ) +
      geom_line(
        data = pred_hive,
        aes(x = Dose_numeric, y = Predicted_Viability),
        color = "red", linewidth = 1.0
      ) +
      geom_hline(yintercept = c(0.05, 0.10, 0.50), linetype = "dashed", color = "grey50") +
      scale_x_log10() +
      scale_y_continuous(limits = c(0, 1), labels = scales::percent_format(accuracy = 1)) +
      labs(
        title = paste0("Hive ", hive_id),
        x = "Dose (µg/vial)",
        y = "Viability"
      ) +
      theme_minimal() +
      theme(
        plot.title = element_text(size = 12, face = "bold"),
        axis.title = element_text(size = 10),
        axis.text = element_text(size = 8),
        plot.margin = margin(5, 5, 5, 5)
      )
    
    raabane_individual_plots[[hive_id]] <- p_hive
  }
}

# Arrange individual plots in one row
if (length(raabane_individual_plots) > 0) {
  combined_raabane_plots <- do.call(gridExtra::grid.arrange, c(raabane_individual_plots, ncol = length(raabane_individual_plots)))
  print(combined_raabane_plots)
}

## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name           grob
## 2 1 (1-1,1-1) arrange gtable[layout]
## 3 2 (1-1,2-2) arrange gtable[layout]

# Compute VD bounded for Raabane
raabane_ranges <- live_corrected_raabane %>% group_by(Hive) %>% summarise(dmin = min(Dose_numeric), dmax = max(Dose_numeric), .groups = 'drop')
ld_table_raabane <- fit_per_hive_raabane %>%
  left_join(raabane_ranges, by = "Hive") %>%
  mutate(
    VD5  = pmap_dbl(list(model, dmin, dmax), ~compute_VD_extrapolated(..1, 0.05, ..2, ..3)),
    VD10 = pmap_dbl(list(model, dmin, dmax), ~compute_VD_extrapolated(..1, 0.10, ..2, ..3)),
    VD50 = pmap_dbl(list(model, dmin, dmax), ~compute_VD_extrapolated(..1, 0.50, ..2, ..3))
  ) %>%
  select(Hive, VD5, VD10, VD50)

# (Removed premature Zur VD computation; handled in Zur chunk below)

Zur_Suheita yard: dose–response and LDs per hive

# Focus on Zur_Suheita yard; reuse Abbott-corrected viability prepared earlier

# Build per-vial corrected viability for Live state in Zur_Suheita and attach per-vial totals
live_corrected_zur <- df_viability_corrected %>%
  filter(Yard == "Zur_Suheita", Dose_numeric > 0, Total > 0)# %>%
#  filter(Hive == "3") #%>% # keep only hive 3, as hives 7 and 9 were tested 24 hours post collection, and the mites were not in good conditon
 # filter(Date == "05-08-25") # keep only the date of collection. for the same reason

# Fit GLM per hive
fit_per_hive_zur <- live_corrected_zur %>%
  group_by(Hive) %>%
  group_modify(~{
    df_h <- .x
    if (nrow(df_h) < 3 || length(unique(df_h$Dose_numeric)) < 2) return(tibble(model = list(NULL)))
    model <- tryCatch(
      glm(Corrected_Viability ~ log10(Dose_numeric), data = df_h, weights = Total, family = binomial(link = "logit")),
      error = function(e) NULL
    )
    tibble(model = list(model))
  })

# Compute bounded VD per hive (within observed dose range)
zur_ranges <- live_corrected_zur %>% group_by(Hive) %>% summarise(dmin = min(Dose_numeric), dmax = max(Dose_numeric), .groups = 'drop')
ld_table_zur <- fit_per_hive_zur %>%
  left_join(zur_ranges, by = "Hive") %>%
  mutate(
    VD5  = pmap_dbl(list(model, dmin, dmax), ~compute_VD_extrapolated(..1, 0.05, ..2, ..3)),
    VD10 = pmap_dbl(list(model, dmin, dmax), ~compute_VD_extrapolated(..1, 0.10, ..2, ..3)),
    VD50 = pmap_dbl(list(model, dmin, dmax), ~compute_VD_extrapolated(..1, 0.50, ..2, ..3))
  ) %>%
  select(Hive, VD5, VD10, VD50)

cat("\n**VD estimates (viability = 5%, 10%, 50%) by hive (Zur_Suheita):**\n")

## 
## **VD estimates (viability = 5%, 10%, 50%) by hive (Zur_Suheita):**

ld_table_zur_html <- ld_table_zur
if (knitr::is_html_output()) {
  html_tbl <- knitr::kable(ld_table_zur_html, format = "html", align = c('l','r','r','r')) %>%
    kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped","hover","condensed")) %>%
    kableExtra::row_spec(0, bold = TRUE)
  knitr::asis_output(html_tbl)
} else {
  print(ld_table_zur_html)
}

Hive	VD5	VD10	VD50
3	70.3834129	9.6730175	0.0282546
5	NA	NA	NA
7	0.0148820	0.0126596	0.0078677
9	0.0163165	0.0145443	0.0103696

# Summary statistics per hive (compute then render)
zur_summary <- live_corrected_zur %>%
  group_by(Hive) %>%
  summarise(
    n_vials = n(),
    total_mites = sum(Total, na.rm = TRUE),
    mean_mites_per_vial = round(mean(Total, na.rm = TRUE), 1),
    "dose_range_ug/vial" = paste0("(", round(min(Dose_numeric), 3), " - ", round(max(Dose_numeric), 1), ")"),
    .groups = 'drop'
  )
cat("\n**Zur_Suheita yard summary statistics per hive:**\n")

## 
## **Zur_Suheita yard summary statistics per hive:**

if (knitr::is_html_output()) {
  html_tbl <- knitr::kable(zur_summary, format = "html", align = c('l','r','r','r','l')) %>%
    kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped","hover","condensed")) %>%
    kableExtra::row_spec(0, bold = TRUE)
  knitr::asis_output(html_tbl)
} else {
  print(zur_summary)
}

Hive	n_vials	total_mites	mean_mites_per_vial	dose_range_ug/vial
3	37	370	10	(0.01 - 10)
5	2	20	10	(0.01 - 10)
7	7	70	10	(0.01 - 5)
9	4	40	10	(0.01 - 10)

# Dose-level summary for Zur_Suheita yard (compute then render)
zur_dose_summary <- live_corrected_zur %>%
  group_by(Dose_numeric) %>%
  summarise(
    n_vials = n(),
    total_mites = sum(Total, na.rm = TRUE),
    mean_viability = round(mean(Corrected_Viability, na.rm = TRUE) * 100, 1),
    se_viability = round((sd(Corrected_Viability, na.rm = TRUE) / sqrt(n())) * 100, 1),
    .groups = 'drop'
  ) %>%
  arrange(Dose_numeric)
cat("\n**Zur_Suheita yard dose-level summary:**\n")

## 
## **Zur_Suheita yard dose-level summary:**

if (knitr::is_html_output()) {
  html_tbl <- knitr::kable(zur_dose_summary, format = "html", align = c('r','r','r','r')) %>%
    kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped","hover","condensed")) %>%
    kableExtra::row_spec(0, bold = TRUE)
  knitr::asis_output(html_tbl)
} else {
  print(zur_dose_summary)
}

Dose_numeric	n_vials	total_mites	mean_viability	se_viability
0.01	13	130	45.2	11.4
0.10	8	80	51.7	18.3
0.50	6	60	0.0	0.0
1.00	5	50	27.0	8.7
2.50	6	60	0.0	0.0
5.00	6	60	0.0	0.0
10.00	6	60	22.5	10.8

# Create prediction grid per hive for plotting
pred_lines_zur <- live_corrected_zur %>%
  group_by(Hive) %>%
  summarise(dose_min = min(Dose_numeric), dose_max = max(Dose_numeric), .groups = 'drop') %>%
  inner_join(fit_per_hive_zur, by = "Hive") %>%
  mutate(grid = pmap(list(dose_min, dose_max, model), function(dmin, dmax, m) {
    if (is.null(m)) return(tibble(Dose_numeric = numeric(0), Pred = numeric(0)))
    dseq <- 10^seq(log10(dmin), log10(dmax), length.out = 200)
    tibble(Dose_numeric = dseq, Pred = predict(m, newdata = data.frame(Dose_numeric = dseq), type = "response"))
  })) %>%
  select(Hive, grid) %>%
  unnest(grid)

# Prepare mean ± SE data for Zur hives
zur_hives_summary <- live_corrected_zur %>%
  group_by(Hive, Dose_numeric) %>%
  summarise(
    n_vials = n(),
    mean_viability = mean(Corrected_Viability, na.rm = TRUE),
    se_viability = sd(Corrected_Viability, na.rm = TRUE) / sqrt(n()),
    .groups = 'drop'
  ) %>%
  arrange(Hive, Dose_numeric)

# Plot per-hive regression lines and observations
p_zur <- ggplot() +
  geom_point(
    data = zur_hives_summary,
    aes(x = Dose_numeric, y = mean_viability, color = Hive),
    size = 3, shape = 16
  ) +
  geom_errorbar(
    data = zur_hives_summary,
    aes(x = Dose_numeric, ymin = pmax(0, mean_viability - se_viability),
        ymax = pmin(1, mean_viability + se_viability), color = Hive),
    width = 0.1, linewidth = 0.8
  ) +
  geom_line(
    data = pred_lines_zur,
    aes(x = Dose_numeric, y = Pred, color = Hive),
    linewidth = 1.2
  ) +
  geom_hline(yintercept = c(0.05, 0.10, 0.50), linetype = "dashed", color = "grey50") +
  scale_x_log10(breaks = c(0.01, 0.1, 1, 10, 100, 1000), labels = c("0.01", "0.1", "1", "10", "100", "1000")) +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent_format(accuracy = 1)) +
  labs(
    title = "Zur_Suheita yard: Abbott-corrected viability by hive",
    x = "Dose (µg/vial, log scale)",
    y = "Abbott-corrected Viability",
    color = "Hive"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(size = 14, face = "bold"))

print(p_zur)

# Individual hive plots for Zur_Suheita arranged in one row
cat("\n**Individual hive plots for Zur_Suheita yard:**\n")

## 
## **Individual hive plots for Zur_Suheita yard:**

# Get unique hives in Zur_Suheita
zur_hives <- unique(live_corrected_zur$Hive)

# Create individual plots for each hive
zur_individual_plots <- list()
for (hive_id in zur_hives) {
  hive_data <- live_corrected_zur %>% filter(Hive == hive_id)
  hive_model <- fit_per_hive_zur %>% filter(Hive == hive_id) %>% pull(model) %>% .[[1]]
  
  if (!is.null(hive_model)) {
    # Create prediction line for this hive
    dmin <- min(hive_data$Dose_numeric)
    dmax <- max(hive_data$Dose_numeric)
    dseq <- 10^seq(log10(dmin), log10(dmax), length.out = 200)
    pred_hive <- data.frame(
      Dose_numeric = dseq,
      Predicted_Viability = predict(hive_model, newdata = data.frame(Dose_numeric = dseq), type = "response")
    )
    
    # Color coding: hive 3 by date, others in blue
    if (hive_id == "3") {
      # Hive 3: color by date
      p_hive <- ggplot() +
        geom_point(
          data = hive_data,
          aes(x = Dose_numeric, y = Corrected_Viability, color = Date),
          alpha = 0.7, shape = 16, size = 2
        ) +
        geom_line(
          data = pred_hive,
          aes(x = Dose_numeric, y = Predicted_Viability),
          color = "blue", linewidth = 1.0
        ) +
        geom_hline(yintercept = c(0.05, 0.10, 0.50), linetype = "dashed", color = "grey50") +
        scale_x_log10() +
        scale_y_continuous(limits = c(0, 1), labels = scales::percent_format(accuracy = 1)) +
        labs(
          title = paste0("Hive ", hive_id),
          x = "Dose (µg/vial)",
          y = "Viability",
          color = "Date"
        ) +
        theme_minimal() +
        theme(
          plot.title = element_text(size = 12, face = "bold"),
          axis.title = element_text(size = 10),
          axis.text = element_text(size = 8),
          plot.margin = margin(5, 5, 5, 5),
          legend.position = "none"  # Remove legend for cleaner individual plots
        )
    } else {
      # Other hives: blue color
      p_hive <- ggplot() +
        geom_point(
          data = hive_data,
          aes(x = Dose_numeric, y = Corrected_Viability),
          color = "blue", alpha = 0.7, shape = 16, size = 2
        ) +
        geom_line(
          data = pred_hive,
          aes(x = Dose_numeric, y = Predicted_Viability),
          color = "blue", linewidth = 1.0
        ) +
        geom_hline(yintercept = c(0.05, 0.10, 0.50), linetype = "dashed", color = "grey50") +
        scale_x_log10() +
        scale_y_continuous(limits = c(0, 1), labels = scales::percent_format(accuracy = 1)) +
        labs(
          title = paste0("Hive ", hive_id),
          x = "Dose (µg/vial)",
          y = "Viability"
        ) +
        theme_minimal() +
        theme(
          plot.title = element_text(size = 12, face = "bold"),
          axis.title = element_text(size = 10),
          axis.text = element_text(size = 8),
          plot.margin = margin(5, 5, 5, 5)
        )
    }
    
    zur_individual_plots[[hive_id]] <- p_hive
  }
}

# Arrange individual plots in one row
if (length(zur_individual_plots) > 0) {
  combined_zur_plots <- do.call(gridExtra::grid.arrange, c(zur_individual_plots, ncol = length(zur_individual_plots)))
  print(combined_zur_plots)
}

## TableGrob (1 x 3) "arrange": 3 grobs
##   z     cells    name           grob
## 3 1 (1-1,1-1) arrange gtable[layout]
## 9 2 (1-1,2-2) arrange gtable[layout]
## 7 3 (1-1,3-3) arrange gtable[layout]

# Publication-style dose-response plot for hive 3 (Zur yard) with mean ± SE
cat("\n**Publication-style dose-response plot for Zur_Suheita - Hive 3:**\n")

## 
## **Publication-style dose-response plot for Zur_Suheita - Hive 3:**

# Prepare mean ± SE data for hive 3
hive3_summary <- live_corrected_zur %>%
  filter(Hive == "3") %>%
  group_by(Dose_numeric) %>%
  summarise(
    n_vials = n(),
    mean_viability = mean(Corrected_Viability, na.rm = TRUE),
    se_viability = sd(Corrected_Viability, na.rm = TRUE) / sqrt(n()),
    .groups = 'drop'
  ) %>%
  arrange(Dose_numeric)

# Create publication-style plot
p_hive3_pub <- ggplot(hive3_summary, aes(x = log10(Dose_numeric), y = mean_viability)) +
  geom_point(size = 3, color = "blue", shape = 16) +
  geom_errorbar(
    aes(ymin = pmax(0, mean_viability - se_viability), 
        ymax = pmin(1, mean_viability + se_viability)),
    width = 0.1,
    color = "blue",
    linewidth = 0.8
  ) +
  geom_line(
    data = pred_lines_zur %>% 
      filter(Hive == "3") %>% 
      mutate(log_dose = log10(Dose_numeric)),
    aes(x = log_dose, y = Pred),
    color = "blue",
    linewidth = 1.2
  ) +
  scale_x_continuous(
    breaks = seq(-3, 1, 1),
    labels = c("0.001", "0.01", "0.1", "1", "10"),
    limits = c(-3, 1)
  ) +
  scale_y_continuous(
    limits = c(0, 1),
    labels = c("0", "0.25", "0.5", "0.75", "1.0")
  ) +
  labs(
    title = "Zur_Suheita - Hive 3 Dose-Response",
    x = "log Concentration (µg/vial)",
    y = "Viability (proportion)"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5)
  )

print(p_hive3_pub)

# Publication-style dose-response plot for hive 3 in Zur yard by date with separate regression lines
cat("\n**Publication-style dose-response plot for Zur_Suheita - Hive 3 by Date:**\n")

## 
## **Publication-style dose-response plot for Zur_Suheita - Hive 3 by Date:**

# Prepare mean ± SE data for hive 3 by date
hive3_by_date_summary <- live_corrected_zur %>%
  filter(Hive == "3") %>%
  group_by(Date, Dose_numeric) %>%
  summarise(
    n_vials = n(),
    mean_viability = mean(Corrected_Viability, na.rm = TRUE),
    se_viability = sd(Corrected_Viability, na.rm = TRUE) / sqrt(n()),
    .groups = 'drop'
  ) %>%
  arrange(Date, Dose_numeric)

# Fit GLM per date for hive 3
fit_per_date_hive3 <- live_corrected_zur %>%
  filter(Hive == "3") %>%
  group_by(Date) %>%
  group_modify(~{
    df_d <- .x
    if (nrow(df_d) < 3 || length(unique(df_d$Dose_numeric)) < 2) return(tibble(model = list(NULL)))
    model <- tryCatch(
      glm(Corrected_Viability ~ log10(Dose_numeric), data = df_d, weights = Total, family = binomial(link = "logit")),
      error = function(e) NULL
    )
    tibble(model = list(model))
  })

# Create prediction lines for each date
pred_lines_hive3_by_date <- live_corrected_zur %>%
  filter(Hive == "3") %>%
  group_by(Date) %>%
  summarise(dose_min = min(Dose_numeric), dose_max = max(Dose_numeric), .groups = 'drop') %>%
  inner_join(fit_per_date_hive3, by = "Date") %>%
  mutate(grid = pmap(list(dose_min, dose_max, model), function(dmin, dmax, m) {
    if (is.null(m)) return(tibble(Dose_numeric = numeric(0), Pred = numeric(0)))
    dseq <- 10^seq(log10(dmin), log10(dmax), length.out = 200)
    tibble(Dose_numeric = dseq, Pred = predict(m, newdata = data.frame(Dose_numeric = dseq), type = "response"))
  })) %>%
  select(Date, grid) %>%
  unnest(grid)

# Create publication-style plot for hive 3 by date
p_hive3_by_date_pub <- ggplot(hive3_by_date_summary, aes(x = log10(Dose_numeric), y = mean_viability, color = Date)) +
  geom_point(size = 3, shape = 16) +
  geom_errorbar(
    aes(ymin = pmax(0, mean_viability - se_viability), 
        ymax = pmin(1, mean_viability + se_viability)),
    width = 0.1,
    linewidth = 0.8
  ) +
  geom_line(
    data = pred_lines_hive3_by_date %>% 
      mutate(log_dose = log10(Dose_numeric)),
    aes(x = log_dose, y = Pred, color = Date),
    linewidth = 1.2
  ) +
  scale_x_continuous(
    breaks = seq(-3, 1, 1),
    labels = c("0.001", "0.01", "0.1", "1", "10"),
    limits = c(-3, 1)
  ) +
  scale_y_continuous(
    limits = c(0, 1),
    labels = c("0", "0.25", "0.5", "0.75", "1.0")
  ) +
  labs(
    title = "Zur_Suheita - Hive 3 Dose-Response by Date",
    x = "log Concentration (µg/vial)",
    y = "Viability (proportion)",
    color = "Date"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
    legend.position = "right"
  )

print(p_hive3_by_date_pub)

Hive Classification Analysis

# Analysis to classify hives into sensitive, benign, and resistant categories
# Based on their overall dose-response characteristics

cat("## Hive Classification Analysis\n")

## ## Hive Classification Analysis

cat("**Objective:** Classify all hives into sensitivity categories based on dose-response patterns\n")

## **Objective:** Classify all hives into sensitivity categories based on dose-response patterns

cat("**Categories:** Sensitive, Benign, Resistant\n\n")

## **Categories:** Sensitive, Benign, Resistant

# 1. Calculate summary statistics for each hive
cat("**1. Hive-level summary statistics:**\n")

## **1. Hive-level summary statistics:**

hive_summary <- df_viability_corrected %>%
  filter(Dose_numeric > 0) %>%
  group_by(Yard, Hive) %>%
  summarise(
    n_vials = n(),
    n_doses = n_distinct(Dose_numeric),
    dose_range = paste0(round(min(Dose_numeric), 3), " - ", round(max(Dose_numeric), 1)),
    mean_viability = mean(Corrected_Viability, na.rm = TRUE),
    median_viability = median(Corrected_Viability, na.rm = TRUE),
    min_viability = min(Corrected_Viability, na.rm = TRUE),
    max_viability = max(Corrected_Viability, na.rm = TRUE),
    sd_viability = sd(Corrected_Viability, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  mutate(
    mean_viability_pct = round(mean_viability * 100, 1),
    median_viability_pct = round(median_viability * 100, 1),
    min_viability_pct = round(min_viability * 100, 1),
    max_viability_pct = round(max_viability * 100, 1),
    cv_viability = round((sd_viability / mean_viability) * 100, 1)
  ) %>%
  arrange(Yard, Hive)

# Render hive summary as styled HTML table
if (knitr::is_html_output()) {
  html_tbl <- knitr::kable(
    hive_summary,
    format = "html",
    align = c('l','l','r','r','l','r','r','r','r','r','r','r','r','r','r')
  ) %>%
    kableExtra::kable_styling(
      full_width = FALSE,
      bootstrap_options = c("striped", "hover", "condensed")
    ) %>%
    kableExtra::row_spec(0, bold = TRUE)
  knitr::asis_output(html_tbl)
} else {
  print(hive_summary)
}

Yard	Hive	n_vials	n_doses	dose_range	mean_viability	median_viability	max_viability	sd_viability	mean_viability_pct	median_viability_pct	max_viability_pct	cv_viability
Dan_Zaatar	21	1	1	10 - 10	0.0000000	0.0000000	0.0000000	NA	0.0	0.0	0.0	NA
Dan_Zaatar	29	2	1	10 - 10	0.0000000	0.0000000	0.0000000	0.0000000	0.0	0.0	0.0	NaN
Raabane_Ein_Kinya	2	15	6	0.01 - 5	0.1866667	0.1000000	0.5000000	0.1552264	18.7	10.0	50.0	83.2
Raabane_Ein_Kinya	3	13	5	0.01 - 5	0.2153846	0.1000000	0.7000000	0.2577019	21.5	10.0	70.0	119.6
Shamir	1	10	6	1 - 2000	0.6272727	0.7272727	1.0000000	0.3722465	62.7	72.7	100.0	59.3
Shamir	3	22	8	0.01 - 2000	0.4634298	0.4000000	1.0000000	0.3372881	46.3	40.0	100.0	72.8
Shamir	3_4	4	2	1 - 10	0.2894737	0.2105263	0.7368421	0.3258627	28.9	21.1	73.7	112.6
Shamir	4	6	6	1 - 2000	0.4500000	0.4500000	1.0000000	0.4183300	45.0	45.0	100.0	93.0
Shamir	6	18	6	1 - 2000	0.3222222	0.2500000	1.0000000	0.3606458	32.2	25.0	100.0	111.9
Zur_Suheita	3	37	7	0.01 - 10	0.3082310	0.1363636	1.0000000	0.3907326	30.8	13.6	100.0	126.8
Zur_Suheita	5	2	2	0.01 - 10	0.0000000	0.0000000	0.0000000	0.0000000	0.0	0.0	0.0	NaN
Zur_Suheita	7	7	4	0.01 - 5	0.1064039	0.0000000	0.7448276	0.2815184	10.6	0.0	74.5	264.6
Zur_Suheita	9	4	4	0.01 - 10	0.1396552	0.0000000	0.5586207	0.2793103	14.0	0.0	55.9	200.0

# 2. Calculate LD50 for each hive (if possible)
cat("\n**2. LD50 calculations for each hive:**\n")

## 
## **2. LD50 calculations for each hive:**

# Function to calculate LD50 for a hive
calculate_hive_ld50 <- function(hive_data) {
  if (nrow(hive_data) < 3 || length(unique(hive_data$Dose_numeric)) < 2) {
    return(NA)
  }
  
  model <- tryCatch(
    glm(Corrected_Viability ~ log10(Dose_numeric), 
        data = hive_data, 
        weights = Total, 
        family = binomial(link = "logit")),
    error = function(e) NULL
  )
  
  if (is.null(model)) return(NA)
  
  # Calculate LD50
  coefs <- coef(model)
  if (length(coefs) >= 2 && !is.na(coefs[2]) && coefs[2] != 0) {
    # logit(0.5) = 0, so the formula simplifies
    ld50 <- 10^(-coefs[1] / coefs[2])
    return(ld50)
} else {
    return(NA)
  }
}

# Calculate LD50 for each hive
hive_ld50 <- df_viability_corrected %>%
  filter(Dose_numeric > 0) %>%
  group_by(Yard, Hive) %>%
  group_modify(~ {
    ld50 <- calculate_hive_ld50(.x)
    tibble(LD50 = ld50)
  }) %>%
  ungroup()

# Combine with hive summary
hive_classification <- hive_summary %>%
  left_join(hive_ld50, by = c("Yard", "Hive")) %>%
  mutate(
    LD50_log = ifelse(!is.na(LD50), round(log10(LD50), 2), NA),
    LD50_display = ifelse(!is.na(LD50), round(LD50, 3), "N/A")
  )

# Render hive classification as styled HTML table
if (knitr::is_html_output()) {
  html_tbl <- knitr::kable(
    hive_classification,
    format = "html",
    align = c('l','l','r','r','l','r','r','r','r','r','r','r','r','r','r','r','l','l')
  ) %>%
    kableExtra::kable_styling(
      full_width = FALSE,
      bootstrap_options = c("striped", "hover", "condensed")
    ) %>%
    kableExtra::row_spec(0, bold = TRUE)
  knitr::asis_output(html_tbl)
} else {
  print(hive_classification)
}

Yard	Hive	n_vials	n_doses	dose_range	mean_viability	median_viability	max_viability	sd_viability	mean_viability_pct	median_viability_pct	max_viability_pct	cv_viability	LD50	LD50_log	LD50_display
Dan_Zaatar	21	1	1	10 - 10	0.0000000	0.0000000	0.0000000	NA	0.0	0.0	0.0	NA	NA	NA	N/A
Dan_Zaatar	29	2	1	10 - 10	0.0000000	0.0000000	0.0000000	0.0000000	0.0	0.0	0.0	NaN	NA	NA	N/A
Raabane_Ein_Kinya	2	15	6	0.01 - 5	0.1866667	0.1000000	0.5000000	0.1552264	18.7	10.0	50.0	83.2	0.0012599	-2.90	0.001
Raabane_Ein_Kinya	3	13	5	0.01 - 5	0.2153846	0.1000000	0.7000000	0.2577019	21.5	10.0	70.0	119.6	0.0138868	-1.86	0.014
Shamir	1	10	6	1 - 2000	0.6272727	0.7272727	1.0000000	0.3722465	62.7	72.7	100.0	59.3	289.9987998	2.46	289.999
Shamir	3	22	8	0.01 - 2000	0.4634298	0.4000000	1.0000000	0.3372881	46.3	40.0	100.0	72.8	5.4998710	0.74	5.5
Shamir	3_4	4	2	1 - 10	0.2894737	0.2105263	0.7368421	0.3258627	28.9	21.1	73.7	112.6	1.0843518	0.04	1.084
Shamir	4	6	6	1 - 2000	0.4500000	0.4500000	1.0000000	0.4183300	45.0	45.0	100.0	93.0	49.7662056	1.70	49.766
Shamir	6	18	6	1 - 2000	0.3222222	0.2500000	1.0000000	0.3606458	32.2	25.0	100.0	111.9	9.9659035	1.00	9.966
Zur_Suheita	3	37	7	0.01 - 10	0.3082310	0.1363636	1.0000000	0.3907326	30.8	13.6	100.0	126.8	0.0282545	-1.55	0.028
Zur_Suheita	5	2	2	0.01 - 10	0.0000000	0.0000000	0.0000000	0.0000000	0.0	0.0	0.0	NaN	NA	NA	N/A
Zur_Suheita	7	7	4	0.01 - 5	0.1064039	0.0000000	0.7448276	0.2815184	10.6	0.0	74.5	264.6	0.0078678	-2.10	0.008
Zur_Suheita	9	4	4	0.01 - 10	0.1396552	0.0000000	0.5586207	0.2793103	14.0	0.0	55.9	200.0	0.0103693	-1.98	0.01

# 3. Classify hives based on multiple criteria
cat("\n**3. Hive classification based on dose-response characteristics:**\n")

## 
## **3. Hive classification based on dose-response characteristics:**

# Define classification criteria
hive_classification <- hive_classification %>%
  mutate(
    # Classification based on mean viability and LD50
    sensitivity_class = case_when(
      # Resistant: High mean viability (>30%) OR high LD50 (>10 µg/vial)
      mean_viability > 0.30 | (!is.na(LD50) & LD50 > 10) ~ "Resistant",
      
      # Sensitive: Low mean viability (<15%) AND low LD50 (<1 µg/vial)
      mean_viability < 0.15 & (!is.na(LD50) & LD50 < 1) ~ "Sensitive",
      
      # Benign: Everything else (intermediate response)
      TRUE ~ "Benign"
    ),
    
    # Additional classification based on viability range
    viability_class = case_when(
      max_viability < 0.20 ~ "Low variability",
      max_viability > 0.60 ~ "High variability", 
      TRUE ~ "Moderate variability"
    )
  )

# Display classification results
classification_summary <- hive_classification %>%
  select(Yard, Hive, mean_viability_pct, LD50_display, sensitivity_class, viability_class) %>%
  arrange(sensitivity_class, Yard, Hive)

# Render classification summary as styled HTML table
if (knitr::is_html_output()) {
  html_tbl <- knitr::kable(
    classification_summary,
    format = "html",
    align = c('l','l','r','l','l','l')
  ) %>%
    kableExtra::kable_styling(
      full_width = FALSE,
      bootstrap_options = c("striped", "hover", "condensed")
    ) %>%
    kableExtra::row_spec(0, bold = TRUE)
  knitr::asis_output(html_tbl)
} else {
  print(classification_summary)
}

Yard	Hive	mean_viability_pct	LD50_display	sensitivity_class	viability_class
Dan_Zaatar	21	0.0	N/A	Benign	Low variability
Dan_Zaatar	29	0.0	N/A	Benign	Low variability
Raabane_Ein_Kinya	2	18.7	0.001	Benign	Moderate variability
Raabane_Ein_Kinya	3	21.5	0.014	Benign	High variability
Shamir	3_4	28.9	1.084	Benign	High variability
Zur_Suheita	5	0.0	N/A	Benign	Low variability
Shamir	1	62.7	289.999	Resistant	High variability
Shamir	3	46.3	5.5	Resistant	High variability
Shamir	4	45.0	49.766	Resistant	High variability
Shamir	6	32.2	9.966	Resistant	High variability
Zur_Suheita	3	30.8	0.028	Resistant	High variability
Zur_Suheita	7	10.6	0.008	Sensitive	High variability
Zur_Suheita	9	14.0	0.01	Sensitive	Moderate variability

# 4. Summary statistics by classification
cat("\n**4. Summary by sensitivity classification:**\n")

## 
## **4. Summary by sensitivity classification:**

class_summary <- hive_classification %>%
  group_by(sensitivity_class) %>%
  summarise(
    n_hives = n(),
    mean_viability_pct = round(mean(mean_viability_pct, na.rm = TRUE), 1),
    median_viability_pct = round(median(mean_viability_pct, na.rm = TRUE), 1),
    mean_ld50 = round(mean(LD50, na.rm = TRUE), 3),
    median_ld50 = round(median(LD50, na.rm = TRUE), 3),
    .groups = 'drop'
  )

# Render class summary as styled HTML table
if (knitr::is_html_output()) {
  html_tbl <- knitr::kable(
    class_summary,
    format = "html",
    align = c('l','r','r','r','r','r')
  ) %>%
    kableExtra::kable_styling(
      full_width = FALSE,
      bootstrap_options = c("striped", "hover", "condensed")
    ) %>%
    kableExtra::row_spec(0, bold = TRUE)
  knitr::asis_output(html_tbl)
} else {
  print(class_summary)
}

sensitivity_class	n_hives	mean_viability_pct	median_viability_pct	mean_ld50	median_ld50
Benign	6	11.5	11.5	0.366	0.014
Resistant	5	43.4	43.4	71.052	9.966
Sensitive	2	12.3	12.3	0.009	0.009

# 5. Visualization of hive classification
cat("\n**5. Hive classification visualization:**\n")

## 
## **5. Hive classification visualization:**

# Create scatter plot of mean viability vs LD50
p_classification <- ggplot(hive_classification, aes(x = LD50_log, y = mean_viability_pct, color = sensitivity_class)) +
  geom_point(size = 4, alpha = 0.8) +
  geom_text(aes(label = paste(Yard, Hive, sep = "-")), 
            hjust = -0.2, vjust = 0.5, size = 3) +
  scale_color_manual(values = c("Sensitive" = "blue", "Benign" = "green", "Resistant" = "red")) +
  scale_x_continuous(breaks = seq(-2, 2, 1), labels = c("0.01", "0.1", "1", "10", "100")) +
  labs(
    title = "Hive Classification by Sensitivity",
    subtitle = "Based on mean viability and LD50 values",
    x = "LD50 (µg/vial, log scale)",
    y = "Mean Viability (%)",
    color = "Sensitivity Class"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    legend.position = "bottom"
  )

print(p_classification)

# 6. Detailed breakdown by yard
cat("\n**6. Classification breakdown by yard:**\n")

## 
## **6. Classification breakdown by yard:**

yard_classification <- hive_classification %>%
  group_by(Yard, sensitivity_class) %>%
  summarise(
    n_hives = n(),
    hives = paste(Hive, collapse = ", "),
    .groups = 'drop'
  ) %>%
  pivot_wider(names_from = sensitivity_class, values_from = c(n_hives, hives), 
              names_sep = "_", values_fill = list(n_hives = 0, hives = ""))

# Render yard classification as styled HTML table
if (knitr::is_html_output()) {
  html_tbl <- knitr::kable(
    yard_classification,
    format = "html",
    align = c('l','r','l','r','l','r','l')
  ) %>%
    kableExtra::kable_styling(
      full_width = FALSE,
      bootstrap_options = c("striped", "hover", "condensed")
    ) %>%
    kableExtra::row_spec(0, bold = TRUE)
  knitr::asis_output(html_tbl)
} else {
  print(yard_classification)
}

Yard	n_hives_Benign	n_hives_Resistant	n_hives_Sensitive	hives_Benign	hives_Resistant	hives_Sensitive
Dan_Zaatar	2	0	0	21, 29
Raabane_Ein_Kinya	2	0	0	2, 3
Shamir	1	4	0	3_4	1, 3, 4, 6
Zur_Suheita	1	1	2	5	3	7, 9

cat("\n**Analysis complete.**\n")

## 
## **Analysis complete.**

# 7. Diagnostic dose analysis based on sensitive hives
cat("\n**7. Diagnostic dose analysis for sensitive hives:**\n")

## 
## **7. Diagnostic dose analysis for sensitive hives:**

# Get sensitive hives
sensitive_hives <- hive_classification %>%
  filter(sensitivity_class == "Sensitive") %>%
  select(Yard, Hive)

if (nrow(sensitive_hives) > 0) {
  cat(paste0("Found ", nrow(sensitive_hives), " sensitive hives:\n"))
  print(sensitive_hives)
  
  # Get data for sensitive hives only
  sensitive_data <- df_viability_corrected %>%
    filter(Dose_numeric > 0) %>%
    inner_join(sensitive_hives, by = c("Yard", "Hive"))
  
  # Analyze dose-response for sensitive hives
  cat("\n**Dose-response analysis for sensitive hives:**\n")
  sensitive_dose_analysis <- sensitive_data %>%
    group_by(Dose_numeric) %>%
    summarise(
      n_hives = n_distinct(paste(Yard, Hive)),
      n_vials = n(),
      mean_viability = mean(Corrected_Viability, na.rm = TRUE),
      se_viability = sd(Corrected_Viability, na.rm = TRUE) / sqrt(n()),
      min_viability = min(Corrected_Viability, na.rm = TRUE),
      max_viability = max(Corrected_Viability, na.rm = TRUE),
      .groups = 'drop'
    ) %>%
    mutate(
      mean_viability_pct = round(mean_viability * 100, 1),
      se_viability_pct = round(se_viability * 100, 1),
      min_viability_pct = round(min_viability * 100, 1),
      max_viability_pct = round(max_viability * 100, 1)
    ) %>%
    arrange(Dose_numeric)
  
  # Render sensitive dose analysis as styled HTML table
  if (knitr::is_html_output()) {
    html_tbl <- knitr::kable(
      sensitive_dose_analysis,
      format = "html",
      align = c('r','r','r','r','r','r','r','r','r','r')
    ) %>%
      kableExtra::kable_styling(
        full_width = FALSE,
        bootstrap_options = c("striped", "hover", "condensed")
      ) %>%
      kableExtra::row_spec(0, bold = TRUE)
    knitr::asis_output(html_tbl)
  } else {
    print(sensitive_dose_analysis)
  }
  
  # Find doses that give 0.01-0.1 viability (1-10%) in sensitive hives
  cat("\n**Doses giving 0.01-0.1 viability (1-10%) in sensitive hives:**\n")
  diagnostic_candidates <- sensitive_dose_analysis %>%
    filter(mean_viability >= 0.01 & mean_viability <= 0.1) %>%
    arrange(desc(n_hives), desc(n_vials))
  
  if (nrow(diagnostic_candidates) > 0) {
    # Render diagnostic candidates as styled HTML table
    if (knitr::is_html_output()) {
      html_tbl <- knitr::kable(
        diagnostic_candidates,
        format = "html",
        align = c('r','r','r','r','r','r','r','r','r','r')
      ) %>%
        kableExtra::kable_styling(
          full_width = FALSE,
          bootstrap_options = c("striped", "hover", "condensed")
        ) %>%
        kableExtra::row_spec(0, bold = TRUE)
      knitr::asis_output(html_tbl)
    } else {
      print(diagnostic_candidates)
    }
    
    # Select the best diagnostic dose
    best_diagnostic <- diagnostic_candidates[1, ]
    cat("\n**Recommended diagnostic dose based on sensitive hives:**\n")
    cat(paste0("Dose: ", best_diagnostic$Dose_numeric, " µg/vial\n"))
    cat(paste0("Mean viability in sensitive hives: ", round(best_diagnostic$mean_viability, 3), " (SE: ±", round(best_diagnostic$se_viability, 3), ")\n"))
    cat(paste0("Range: ", round(best_diagnostic$min_viability, 3), " - ", round(best_diagnostic$max_viability, 3), "\n"))
    cat(paste0("Tested in: ", best_diagnostic$n_hives, " sensitive hives, ", best_diagnostic$n_vials, " vials\n"))
    
    # Create visualization for diagnostic dose
    cat("\n**Diagnostic dose performance in sensitive hives:**\n")
    
    diagnostic_dose_data <- sensitive_data %>%
      filter(Dose_numeric == best_diagnostic$Dose_numeric) %>%
      mutate(
        Yard_Hive = paste(Yard, Hive, sep = "-")
      )
    
    p_diagnostic_sensitive <- ggplot(diagnostic_dose_data, aes(x = Yard_Hive, y = Corrected_Viability, fill = Yard)) +
      geom_boxplot(alpha = 0.7) +
      geom_jitter(width = 0.2, alpha = 0.6, size = 3) +
      geom_hline(yintercept = c(0.01, 0.1), linetype = "dashed", color = "red", linewidth = 1) +
      geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = 0.1, ymax = Inf), 
                fill = "red", alpha = 0.1, inherit.aes = FALSE) +
      annotate("text", x = length(unique(diagnostic_dose_data$Yard_Hive))/2, y = 0.15, 
               label = "RESISTANCE ZONE\n(>0.1 viability)", 
               color = "red", fontface = "bold", size = 3) +
      scale_y_continuous(limits = c(0, 0.4), breaks = seq(0, 0.4, 0.1), 
                         labels = c("0", "0.1", "0.2", "0.3", "0.4")) +
      labs(
        title = paste0("Diagnostic Dose Performance in Sensitive Hives: ", best_diagnostic$Dose_numeric, " µg/vial"),
        subtitle = paste0("Mean viability: ", round(best_diagnostic$mean_viability, 3), " (n = ", best_diagnostic$n_vials, " vials)"),
        x = "Sensitive Hives",
        y = "Viability (proportion)",
        fill = "Yard"
      ) +
      theme_minimal() +
      theme(
        plot.title = element_text(size = 14, face = "bold"),
        plot.subtitle = element_text(size = 12),
        axis.title = element_text(size = 12, face = "bold"),
        axis.text = element_text(size = 10),
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "bottom"
      )
    
    print(p_diagnostic_sensitive)
    
    # Protocol recommendation
    cat("\n**Recommended protocol based on sensitive hive analysis:**\n")
    cat("1. Use dose of", best_diagnostic$Dose_numeric, "µg/vial for screening\n")
    cat("2. Expected viability in sensitive populations: 0.01-0.1 (1-10%)\n")
    cat("3. If viability > 0.1, population shows potential resistance\n")
    cat("4. This dose is validated on", best_diagnostic$n_hives, "sensitive hives\n")
    
  } else {
    cat("No doses found that give 0.01-0.1 viability in sensitive hives.\n")
    cat("Expanding search to 0.02-0.3 range:\n")
    
    diagnostic_candidates_expanded <- sensitive_dose_analysis %>%
      filter(mean_viability >= 0.02 & mean_viability <= 0.3) %>%
      arrange(desc(n_hives), desc(n_vials))
    
    if (nrow(diagnostic_candidates_expanded) > 0) {
      print(diagnostic_candidates_expanded)
      best_diagnostic <- diagnostic_candidates_expanded[1, ]
      cat("\n**Best alternative diagnostic dose:**\n")
      cat(paste0("Dose: ", best_diagnostic$Dose_numeric, " µg/vial\n"))
      cat(paste0("Mean viability: ", round(best_diagnostic$mean_viability, 3), "\n"))
    } else {
      cat("No suitable diagnostic dose found in sensitive hives.\n")
    }
  }
  
  # 8. Model-based diagnostic dose estimation
  cat("\n**8. Model-based diagnostic dose estimation for 1-10% viability:**\n")
  
  # Function to calculate dose for specific viability level
  calculate_dose_for_viability <- function(hive_data, target_viability) {
    if (nrow(hive_data) < 3 || length(unique(hive_data$Dose_numeric)) < 2) {
      return(NA)
    }
    
    model <- tryCatch(
      glm(Corrected_Viability ~ log10(Dose_numeric), 
          data = hive_data, 
          weights = Total, 
          family = binomial(link = "logit")),
      error = function(e) NULL
    )
    
    if (is.null(model)) return(NA)
    
    # Calculate dose for target viability
    coefs <- coef(model)
    if (length(coefs) >= 2 && !is.na(coefs[2]) && coefs[2] != 0) {
      # logit(target_viability) = coefs[1] + coefs[2] * log10(dose)
      # log10(dose) = (logit(target_viability) - coefs[1]) / coefs[2]
      # dose = 10^((logit(target_viability) - coefs[1]) / coefs[2])
      logit_target <- log(target_viability / (1 - target_viability))
      dose <- 10^((logit_target - coefs[1]) / coefs[2])
      return(dose)
    } else {
      return(NA)
    }
  }
  
  # Calculate doses for 1%, 5%, and 10% viability for each sensitive hive
  model_based_doses <- sensitive_data %>%
    group_by(Yard, Hive) %>%
    group_modify(~ {
      hive_data <- .x
      dose_1pct <- calculate_dose_for_viability(hive_data, 0.01)
      dose_5pct <- calculate_dose_for_viability(hive_data, 0.05)
      dose_10pct <- calculate_dose_for_viability(hive_data, 0.10)
      
      tibble(
        dose_1pct = dose_1pct,
        dose_5pct = dose_5pct,
        dose_10pct = dose_10pct
      )
    }) %>%
    ungroup() %>%
    filter(!is.na(dose_5pct))  # Only include hives where we can calculate doses
  
  if (nrow(model_based_doses) > 0) {
    cat("Model-based dose estimates for 1%, 5%, and 10% viability:\n")
    # Render model-based doses as styled HTML table
    if (knitr::is_html_output()) {
      html_tbl <- knitr::kable(
        model_based_doses,
        format = "html",
        align = c('l','l','r','r','r')
      ) %>%
        kableExtra::kable_styling(
          full_width = FALSE,
          bootstrap_options = c("striped", "hover", "condensed")
        ) %>%
        kableExtra::row_spec(0, bold = TRUE)
      knitr::asis_output(html_tbl)
    } else {
      print(model_based_doses)
    }
    
    # Calculate summary statistics for recommended diagnostic doses
    cat("\n**Summary of model-based diagnostic doses:**\n")
    dose_summary <- model_based_doses %>%
      summarise(
        n_hives = n(),
        mean_dose_1pct = round(mean(dose_1pct, na.rm = TRUE), 3),
        mean_dose_5pct = round(mean(dose_5pct, na.rm = TRUE), 3),
        mean_dose_10pct = round(mean(dose_10pct, na.rm = TRUE), 3),
        median_dose_5pct = round(median(dose_5pct, na.rm = TRUE), 3),
        range_dose_5pct = paste0(round(min(dose_5pct, na.rm = TRUE), 3), " - ", round(max(dose_5pct, na.rm = TRUE), 3)),
        .groups = 'drop'
      )
    
    # Render dose summary as styled HTML table
    if (knitr::is_html_output()) {
      html_tbl <- knitr::kable(
        dose_summary,
        format = "html",
        align = c('r','r','r','r','r','l')
      ) %>%
        kableExtra::kable_styling(
          full_width = FALSE,
          bootstrap_options = c("striped", "hover", "condensed")
        ) %>%
        kableExtra::row_spec(0, bold = TRUE)
      knitr::asis_output(html_tbl)
    } else {
      print(dose_summary)
    }
    
    # Recommend the median dose for 5% viability as diagnostic dose
    recommended_dose <- dose_summary$median_dose_5pct
    cat("\n**Recommended model-based diagnostic dose:**\n")
    cat(paste0("Dose: ", recommended_dose, " µg/vial (median dose for 5% viability)\n"))
    cat(paste0("This dose should give approximately 5% viability in sensitive populations\n"))
    cat(paste0("Based on models from ", dose_summary$n_hives, " sensitive hives\n"))
    cat(paste0("Dose range for 5% viability: ", dose_summary$range_dose_5pct, " µg/vial\n"))
    
  } else {
    cat("Unable to calculate model-based doses for sensitive hives.\n")
  }
  
} else {
  cat("No sensitive hives found in the dataset.\n")
  cat("Cannot determine diagnostic dose based on sensitive population.\n")
}

## Found 2 sensitive hives:
## # A tibble: 2 × 2
##   Yard        Hive 
##   <chr>       <chr>
## 1 Zur_Suheita 7    
## 2 Zur_Suheita 9    
## 
## **Dose-response analysis for sensitive hives:**
## 
## **Doses giving 0.01-0.1 viability (1-10%) in sensitive hives:**
## No doses found that give 0.01-0.1 viability in sensitive hives.
## Expanding search to 0.02-0.3 range:
## No suitable diagnostic dose found in sensitive hives.
## 
## **8. Model-based diagnostic dose estimation for 1-10% viability:**
## Model-based dose estimates for 1%, 5%, and 10% viability:
## 
## **Summary of model-based diagnostic doses:**
## 
## **Recommended model-based diagnostic dose:**
## Dose: 0.016 µg/vial (median dose for 5% viability)
## This dose should give approximately 5% viability in sensitive populations
## Based on models from 2 sensitive hives
## Dose range for 5% viability: 0.015 - 0.016 µg/vial

Focused comparison plots: selected hives with LD lines

# Hives: 7, 9, 3 from Zur_Suheita; 3 from Shamir

# Helper to build per-hive summaries with mean ± SE
summarise_hive_means <- function(data) {
  data %>%
    group_by(Yard, Hive, Dose_numeric) %>%
    summarise(
      n_vials = n(),
      mean_viability = mean(Corrected_Viability, na.rm = TRUE),
      se_viability = sd(Corrected_Viability, na.rm = TRUE) / sqrt(n()),
      .groups = 'drop'
    )
}

# Helper to fit GLM and compute LDs, returning prediction lines and LDs
fit_hive_glm <- function(df_hive) {
  if (nrow(df_hive) < 3 || length(unique(df_hive$Dose_numeric)) < 2) {
    return(list(pred = tibble(Dose_numeric = numeric(0), Pred = numeric(0)),
                LD5 = NA_real_, LD10 = NA_real_, LD50 = NA_real_))
  }
  model <- tryCatch(
    glm(Corrected_Viability ~ log10(Dose_numeric), data = df_hive, weights = Total,
        family = binomial(link = "logit")),
    error = function(e) NULL
  )
  if (is.null(model)) {
    return(list(pred = tibble(Dose_numeric = numeric(0), Pred = numeric(0)),
                LD5 = NA_real_, LD10 = NA_real_, LD50 = NA_real_))
  }
  # prediction grid within observed range
  dmin <- min(df_hive$Dose_numeric); dmax <- max(df_hive$Dose_numeric)
  dseq <- 10^seq(log10(dmin), log10(dmax), length.out = 400)
  pred <- tibble(
    Dose_numeric = dseq,
    Pred = predict(model, newdata = data.frame(Dose_numeric = dseq), type = "response")
  )
  co <- coef(model)
  inv_ld <- function(v) 10^((log(v/(1-v)) - co[1]) / co[2])
  out <- list(pred = pred,
              LD5 = tryCatch(inv_ld(0.05), error = function(e) NA_real_),
              LD10 = tryCatch(inv_ld(0.10), error = function(e) NA_real_),
              LD50 = tryCatch(inv_ld(0.50), error = function(e) NA_real_))
  out
}

# Select data
selected <- df_viability_corrected %>%
  filter((Yard == "Zur_Suheita" & Hive %in% c("7", "9", "3")) |
         (Yard == "Shamir" & Hive %in% c("3"))) %>%
  filter(Dose_numeric > 0, Total > 0)

# Per-hive means for error bars
selected_means <- summarise_hive_means(selected)

# Fit per-hive models for prediction lines and LDs
model_outputs <- selected %>%
  group_by(Yard, Hive) %>%
  group_modify(~{
    res <- fit_hive_glm(.x %>% filter(Dose_numeric > 0, Total > 0))
    tibble(LD5 = res$LD5, LD10 = res$LD10, LD50 = res$LD50,
           pred_list = list(res$pred))
  }) %>%
  ungroup()

pred_lines_sel <- model_outputs %>%
  select(Yard, Hive, pred_list) %>%
  tidyr::unnest(pred_list)

lds_sel <- model_outputs %>% select(Yard, Hive, LD5, LD10, LD50) %>%
  mutate(
    LD5 = ifelse(is.infinite(LD5) | LD5 <= 0, NA, LD5),
    LD10 = ifelse(is.infinite(LD10) | LD10 <= 0, NA, LD10),
    LD50 = ifelse(is.infinite(LD50) | LD50 <= 0, NA, LD50)
  )

# Plot
p_selected <- ggplot() +
  geom_point(
    data = selected_means,
    aes(x = Dose_numeric, y = mean_viability, color = interaction(Yard, Hive)),
    size = 3, shape = 16
  ) +
  geom_errorbar(
    data = selected_means,
    aes(x = Dose_numeric, ymin = pmax(0, mean_viability - se_viability),
        ymax = pmin(1, mean_viability + se_viability), color = interaction(Yard, Hive)),
    width = 0.1, linewidth = 0.8
  ) +
  geom_line(
    data = pred_lines_sel,
    aes(x = Dose_numeric, y = Pred, color = interaction(Yard, Hive)),
    linewidth = 1.2
  ) +
  # Horizontal reference lines
  geom_hline(yintercept = c(0.05, 0.10, 0.50), linetype = "dashed", color = "grey50") +
  scale_x_log10(breaks = c(0.01, 0.1, 1, 10, 100, 1000), labels = c("0.01", "0.1", "1", "10", "100", "1000")) +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent_format(accuracy = 1)) +
  labs(
    title = "Selected hives dose–response with reference lines",
    subtitle = "Zur_Suheita: hives 7, 9, 3; Shamir: hive 3",
    x = "Dose (µg/vial, log scale)",
    y = "Abbott-corrected Viability",
    color = "Yard-Hive"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    legend.position = "right"
  )

print(p_selected)

# Build summary table: vial count, mite count, and LDs per hive
summary_sel <- selected %>%
  group_by(Yard, Hive) %>%
  summarise(
    n_vials = n(),
    total_mites = sum(Total, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  left_join(lds_sel, by = c("Yard", "Hive")) %>%
  mutate(
    LD5 = round(LD5, 4),
    LD10 = round(LD10, 4),
    LD50 = round(LD50, 4)
  ) %>%
  arrange(Yard, Hive)

cat("\n**Summary for selected hives (vials, mites, LDs):**\n")

## 
## **Summary for selected hives (vials, mites, LDs):**

# Render selected hives summary as styled HTML table
if (knitr::is_html_output()) {
  html_tbl <- knitr::kable(
    summary_sel,
    format = "html",
    align = c('l','l','r','r','r','r','r')
  ) %>%
    kableExtra::kable_styling(
      full_width = FALSE,
      bootstrap_options = c("striped", "hover", "condensed")
    ) %>%
    kableExtra::row_spec(0, bold = TRUE)
  knitr::asis_output(html_tbl)
} else {
  print(summary_sel)
}

Yard	Hive	n_vials	total_mites	LD5	LD10	LD50
Shamir	3	22	220	129744.6783	10078.9189	5.4999
Zur_Suheita	3	37	370	70.3827	9.6730	0.0283
Zur_Suheita	7	7	70	0.0149	0.0127	0.0079
Zur_Suheita	9	4	40	0.0163	0.0145	0.0104

Correlation between lab and field assays

# Load Apivar efficacy data (field assay)
apivar_data <- read.csv("/Users/nuriteliash/Documents/GitHub/Glass-vials-short-term/data/amitraz/apivar_efficacy.csv") %>%
  mutate(
    yard = as.character(yard),
    hive = as.character(hive),
    # Clean up efficacy values (remove #DIV/0! and convert to numeric)
    Apivar_effic_abbotCor = case_when(
      Apivar_effic_abbotCor == "#DIV/0!" ~ NA_real_,
      Apivar_effic_abbotCor == "" ~ NA_real_,
      TRUE ~ as.numeric(Apivar_effic_abbotCor)
    ),
    # Clean up other numeric columns
    infestation_ethanol = as.numeric(infestation_ethanol),
    total_mites_apivar = as.numeric(total_mites_apivar)
  ) %>%
  # Filter out rows with missing efficacy data
  filter(!is.na(Apivar_effic_abbotCor))

cat("**Apivar efficacy data summary:**\n")

## **Apivar efficacy data summary:**

cat("Total records with efficacy data:", nrow(apivar_data), "\n")

## Total records with efficacy data: 105

cat("Unique yards:", length(unique(apivar_data$yard)), "\n")

## Unique yards: 20

cat("Unique hives:", length(unique(paste(apivar_data$yard, apivar_data$hive, sep = "-"))), "\n")

## Unique hives: 83

# Get lab assay data for specific doses (1 and 10 µg/vial) - Shamir yard, hive 3 only
lab_data_dose1 <- df_viability_corrected %>%
  filter(Yard == "Shamir", Hive == "3", Dose_numeric == 1) %>%
  select(Yard, Hive, Corrected_Viability) %>%
  rename(Corrected_Viability_1ug = Corrected_Viability)

lab_data_dose10 <- df_viability_corrected %>%
  filter(Yard == "Shamir", Hive == "3", Dose_numeric == 10) %>%
  select(Yard, Hive, Corrected_Viability) %>%
  rename(Corrected_Viability_10ug = Corrected_Viability)

# Combine lab data
lab_data_combined <- lab_data_dose1 %>%
  full_join(lab_data_dose10, by = c("Yard", "Hive"))

cat("\n**Lab assay data summary:**\n")

## 
## **Lab assay data summary:**

cat("Records with dose 1 µg/vial:", nrow(lab_data_dose1), "\n")

## Records with dose 1 µg/vial: 4

cat("Records with dose 10 µg/vial:", nrow(lab_data_dose10), "\n")

## Records with dose 10 µg/vial: 8

cat("Records with both doses:", nrow(lab_data_combined %>% filter(!is.na(Corrected_Viability_1ug) & !is.na(Corrected_Viability_10ug))), "\n")

## Records with both doses: 32

# Join field and lab data
correlation_data <- apivar_data %>%
  # Standardize yard names to match lab data
  mutate(
    Yard = case_when(
      yard == "Shamir" ~ "Shamir",
      yard == "Raabane_Ein_Kinya" ~ "Raabane_Ein_Kinya", 
      yard == "Zur_Suheita" ~ "Zur_Suheita",
      TRUE ~ yard  # Keep other yards as is
    )
  ) %>%
  # Join with lab data
  left_join(lab_data_combined, by = c("Yard", "hive" = "Hive")) %>%
  # Filter to only include records that have both field and lab data
  filter(!is.na(Apivar_effic_abbotCor) & (!is.na(Corrected_Viability_1ug) | !is.na(Corrected_Viability_10ug))) %>%
  # Select and rename columns as requested
  select(
    Yard,
    Hive = hive,
    Apivar_effic_abbotCor,
    infestation_ethanol,
    total_mites_apivar,
    Corrected_Viability_1ug,
    Corrected_Viability_10ug
  ) %>%
  # Remove any remaining rows with all NA lab values
  filter(!(is.na(Corrected_Viability_1ug) & is.na(Corrected_Viability_10ug)))

cat("\n**Combined dataset summary:**\n")

## 
## **Combined dataset summary:**

cat("Records with both field and lab data:", nrow(correlation_data), "\n")

## Records with both field and lab data: 224

cat("Yards represented:", paste(unique(correlation_data$Yard), collapse = ", "), "\n")

## Yards represented: Shamir

# Display the correlation table
cat("\n**Correlation dataset:**\n")

## 
## **Correlation dataset:**

if (knitr::is_html_output()) {
  html_tbl <- knitr::kable(
    correlation_data,
    format = "html",
    align = c('l','l','r','r','r','r','r')
  ) %>%
    kableExtra::kable_styling(
      full_width = FALSE,
      bootstrap_options = c("striped", "hover", "condensed")
    ) %>%
    kableExtra::row_spec(0, bold = TRUE)
  knitr::asis_output(html_tbl)
} else {
  print(correlation_data)
}

Yard	Hive	Apivar_effic_abbotCor	infestation_ethanol	total_mites_apivar	Corrected_Viability_1ug	Corrected_Viability_10ug
Shamir	3	0.79	0.02	14	0.9000000	0.8000000
Shamir	3	0.79	0.02	14	0.9000000	0.3000000
Shamir	3	0.79	0.02	14	0.9000000	0.4000000
Shamir	3	0.79	0.02	14	0.9000000	0.1000000
Shamir	3	0.79	0.02	14	0.9000000	1.0000000
Shamir	3	0.79	0.02	14	0.9000000	0.9090909
Shamir	3	0.79	0.02	14	0.9000000	0.2500000
Shamir	3	0.79	0.02	14	0.9000000	0.4000000
Shamir	3	0.79	0.02	14	0.2000000	0.8000000
Shamir	3	0.79	0.02	14	0.2000000	0.3000000
Shamir	3	0.79	0.02	14	0.2000000	0.4000000
Shamir	3	0.79	0.02	14	0.2000000	0.1000000
Shamir	3	0.79	0.02	14	0.2000000	1.0000000
Shamir	3	0.79	0.02	14	0.2000000	0.9090909
Shamir	3	0.79	0.02	14	0.2000000	0.2500000
Shamir	3	0.79	0.02	14	0.2000000	0.4000000
Shamir	3	0.79	0.02	14	0.7000000	0.8000000
Shamir	3	0.79	0.02	14	0.7000000	0.3000000
Shamir	3	0.79	0.02	14	0.7000000	0.4000000
Shamir	3	0.79	0.02	14	0.7000000	0.1000000
Shamir	3	0.79	0.02	14	0.7000000	1.0000000
Shamir	3	0.79	0.02	14	0.7000000	0.9090909
Shamir	3	0.79	0.02	14	0.7000000	0.2500000
Shamir	3	0.79	0.02	14	0.7000000	0.4000000
Shamir	3	0.79	0.02	14	0.6363636	0.8000000
Shamir	3	0.79	0.02	14	0.6363636	0.3000000
Shamir	3	0.79	0.02	14	0.6363636	0.4000000
Shamir	3	0.79	0.02	14	0.6363636	0.1000000
Shamir	3	0.79	0.02	14	0.6363636	1.0000000
Shamir	3	0.79	0.02	14	0.6363636	0.9090909
Shamir	3	0.79	0.02	14	0.6363636	0.2500000
Shamir	3	0.79	0.02	14	0.6363636	0.4000000
Shamir	3	0.53	0.03	15	0.9000000	0.8000000
Shamir	3	0.53	0.03	15	0.9000000	0.3000000
Shamir	3	0.53	0.03	15	0.9000000	0.4000000
Shamir	3	0.53	0.03	15	0.9000000	0.1000000
Shamir	3	0.53	0.03	15	0.9000000	1.0000000
Shamir	3	0.53	0.03	15	0.9000000	0.9090909
Shamir	3	0.53	0.03	15	0.9000000	0.2500000
Shamir	3	0.53	0.03	15	0.9000000	0.4000000
Shamir	3	0.53	0.03	15	0.2000000	0.8000000
Shamir	3	0.53	0.03	15	0.2000000	0.3000000
Shamir	3	0.53	0.03	15	0.2000000	0.4000000
Shamir	3	0.53	0.03	15	0.2000000	0.1000000
Shamir	3	0.53	0.03	15	0.2000000	1.0000000
Shamir	3	0.53	0.03	15	0.2000000	0.9090909
Shamir	3	0.53	0.03	15	0.2000000	0.2500000
Shamir	3	0.53	0.03	15	0.2000000	0.4000000
Shamir	3	0.53	0.03	15	0.7000000	0.8000000
Shamir	3	0.53	0.03	15	0.7000000	0.3000000
Shamir	3	0.53	0.03	15	0.7000000	0.4000000
Shamir	3	0.53	0.03	15	0.7000000	0.1000000
Shamir	3	0.53	0.03	15	0.7000000	1.0000000
Shamir	3	0.53	0.03	15	0.7000000	0.9090909
Shamir	3	0.53	0.03	15	0.7000000	0.2500000
Shamir	3	0.53	0.03	15	0.7000000	0.4000000
Shamir	3	0.53	0.03	15	0.6363636	0.8000000
Shamir	3	0.53	0.03	15	0.6363636	0.3000000
Shamir	3	0.53	0.03	15	0.6363636	0.4000000
Shamir	3	0.53	0.03	15	0.6363636	0.1000000
Shamir	3	0.53	0.03	15	0.6363636	1.0000000
Shamir	3	0.53	0.03	15	0.6363636	0.9090909
Shamir	3	0.53	0.03	15	0.6363636	0.2500000
Shamir	3	0.53	0.03	15	0.6363636	0.4000000
Shamir	3	0.68	0.05	19	0.9000000	0.8000000
Shamir	3	0.68	0.05	19	0.9000000	0.3000000
Shamir	3	0.68	0.05	19	0.9000000	0.4000000
Shamir	3	0.68	0.05	19	0.9000000	0.1000000
Shamir	3	0.68	0.05	19	0.9000000	1.0000000
Shamir	3	0.68	0.05	19	0.9000000	0.9090909
Shamir	3	0.68	0.05	19	0.9000000	0.2500000
Shamir	3	0.68	0.05	19	0.9000000	0.4000000
Shamir	3	0.68	0.05	19	0.2000000	0.8000000
Shamir	3	0.68	0.05	19	0.2000000	0.3000000
Shamir	3	0.68	0.05	19	0.2000000	0.4000000
Shamir	3	0.68	0.05	19	0.2000000	0.1000000
Shamir	3	0.68	0.05	19	0.2000000	1.0000000
Shamir	3	0.68	0.05	19	0.2000000	0.9090909
Shamir	3	0.68	0.05	19	0.2000000	0.2500000
Shamir	3	0.68	0.05	19	0.2000000	0.4000000
Shamir	3	0.68	0.05	19	0.7000000	0.8000000
Shamir	3	0.68	0.05	19	0.7000000	0.3000000
Shamir	3	0.68	0.05	19	0.7000000	0.4000000
Shamir	3	0.68	0.05	19	0.7000000	0.1000000
Shamir	3	0.68	0.05	19	0.7000000	1.0000000
Shamir	3	0.68	0.05	19	0.7000000	0.9090909
Shamir	3	0.68	0.05	19	0.7000000	0.2500000
Shamir	3	0.68	0.05	19	0.7000000	0.4000000
Shamir	3	0.68	0.05	19	0.6363636	0.8000000
Shamir	3	0.68	0.05	19	0.6363636	0.3000000
Shamir	3	0.68	0.05	19	0.6363636	0.4000000
Shamir	3	0.68	0.05	19	0.6363636	0.1000000
Shamir	3	0.68	0.05	19	0.6363636	1.0000000
Shamir	3	0.68	0.05	19	0.6363636	0.9090909
Shamir	3	0.68	0.05	19	0.6363636	0.2500000
Shamir	3	0.68	0.05	19	0.6363636	0.4000000
Shamir	3	1.00	0.01	3	0.9000000	0.8000000
Shamir	3	1.00	0.01	3	0.9000000	0.3000000
Shamir	3	1.00	0.01	3	0.9000000	0.4000000
Shamir	3	1.00	0.01	3	0.9000000	0.1000000
Shamir	3	1.00	0.01	3	0.9000000	1.0000000
Shamir	3	1.00	0.01	3	0.9000000	0.9090909
Shamir	3	1.00	0.01	3	0.9000000	0.2500000
Shamir	3	1.00	0.01	3	0.9000000	0.4000000
Shamir	3	1.00	0.01	3	0.2000000	0.8000000
Shamir	3	1.00	0.01	3	0.2000000	0.3000000
Shamir	3	1.00	0.01	3	0.2000000	0.4000000
Shamir	3	1.00	0.01	3	0.2000000	0.1000000
Shamir	3	1.00	0.01	3	0.2000000	1.0000000
Shamir	3	1.00	0.01	3	0.2000000	0.9090909
Shamir	3	1.00	0.01	3	0.2000000	0.2500000
Shamir	3	1.00	0.01	3	0.2000000	0.4000000
Shamir	3	1.00	0.01	3	0.7000000	0.8000000
Shamir	3	1.00	0.01	3	0.7000000	0.3000000
Shamir	3	1.00	0.01	3	0.7000000	0.4000000
Shamir	3	1.00	0.01	3	0.7000000	0.1000000
Shamir	3	1.00	0.01	3	0.7000000	1.0000000
Shamir	3	1.00	0.01	3	0.7000000	0.9090909
Shamir	3	1.00	0.01	3	0.7000000	0.2500000
Shamir	3	1.00	0.01	3	0.7000000	0.4000000
Shamir	3	1.00	0.01	3	0.6363636	0.8000000
Shamir	3	1.00	0.01	3	0.6363636	0.3000000
Shamir	3	1.00	0.01	3	0.6363636	0.4000000
Shamir	3	1.00	0.01	3	0.6363636	0.1000000
Shamir	3	1.00	0.01	3	0.6363636	1.0000000
Shamir	3	1.00	0.01	3	0.6363636	0.9090909
Shamir	3	1.00	0.01	3	0.6363636	0.2500000
Shamir	3	1.00	0.01	3	0.6363636	0.4000000
Shamir	3	0.91	0.12	46	0.9000000	0.8000000
Shamir	3	0.91	0.12	46	0.9000000	0.3000000
Shamir	3	0.91	0.12	46	0.9000000	0.4000000
Shamir	3	0.91	0.12	46	0.9000000	0.1000000
Shamir	3	0.91	0.12	46	0.9000000	1.0000000
Shamir	3	0.91	0.12	46	0.9000000	0.9090909
Shamir	3	0.91	0.12	46	0.9000000	0.2500000
Shamir	3	0.91	0.12	46	0.9000000	0.4000000
Shamir	3	0.91	0.12	46	0.2000000	0.8000000
Shamir	3	0.91	0.12	46	0.2000000	0.3000000
Shamir	3	0.91	0.12	46	0.2000000	0.4000000
Shamir	3	0.91	0.12	46	0.2000000	0.1000000
Shamir	3	0.91	0.12	46	0.2000000	1.0000000
Shamir	3	0.91	0.12	46	0.2000000	0.9090909
Shamir	3	0.91	0.12	46	0.2000000	0.2500000
Shamir	3	0.91	0.12	46	0.2000000	0.4000000
Shamir	3	0.91	0.12	46	0.7000000	0.8000000
Shamir	3	0.91	0.12	46	0.7000000	0.3000000
Shamir	3	0.91	0.12	46	0.7000000	0.4000000
Shamir	3	0.91	0.12	46	0.7000000	0.1000000
Shamir	3	0.91	0.12	46	0.7000000	1.0000000
Shamir	3	0.91	0.12	46	0.7000000	0.9090909
Shamir	3	0.91	0.12	46	0.7000000	0.2500000
Shamir	3	0.91	0.12	46	0.7000000	0.4000000
Shamir	3	0.91	0.12	46	0.6363636	0.8000000
Shamir	3	0.91	0.12	46	0.6363636	0.3000000
Shamir	3	0.91	0.12	46	0.6363636	0.4000000
Shamir	3	0.91	0.12	46	0.6363636	0.1000000
Shamir	3	0.91	0.12	46	0.6363636	1.0000000
Shamir	3	0.91	0.12	46	0.6363636	0.9090909
Shamir	3	0.91	0.12	46	0.6363636	0.2500000
Shamir	3	0.91	0.12	46	0.6363636	0.4000000
Shamir	3	0.33	0.07	30	0.9000000	0.8000000
Shamir	3	0.33	0.07	30	0.9000000	0.3000000
Shamir	3	0.33	0.07	30	0.9000000	0.4000000
Shamir	3	0.33	0.07	30	0.9000000	0.1000000
Shamir	3	0.33	0.07	30	0.9000000	1.0000000
Shamir	3	0.33	0.07	30	0.9000000	0.9090909
Shamir	3	0.33	0.07	30	0.9000000	0.2500000
Shamir	3	0.33	0.07	30	0.9000000	0.4000000
Shamir	3	0.33	0.07	30	0.2000000	0.8000000
Shamir	3	0.33	0.07	30	0.2000000	0.3000000
Shamir	3	0.33	0.07	30	0.2000000	0.4000000
Shamir	3	0.33	0.07	30	0.2000000	0.1000000
Shamir	3	0.33	0.07	30	0.2000000	1.0000000
Shamir	3	0.33	0.07	30	0.2000000	0.9090909
Shamir	3	0.33	0.07	30	0.2000000	0.2500000
Shamir	3	0.33	0.07	30	0.2000000	0.4000000
Shamir	3	0.33	0.07	30	0.7000000	0.8000000
Shamir	3	0.33	0.07	30	0.7000000	0.3000000
Shamir	3	0.33	0.07	30	0.7000000	0.4000000
Shamir	3	0.33	0.07	30	0.7000000	0.1000000
Shamir	3	0.33	0.07	30	0.7000000	1.0000000
Shamir	3	0.33	0.07	30	0.7000000	0.9090909
Shamir	3	0.33	0.07	30	0.7000000	0.2500000
Shamir	3	0.33	0.07	30	0.7000000	0.4000000
Shamir	3	0.33	0.07	30	0.6363636	0.8000000
Shamir	3	0.33	0.07	30	0.6363636	0.3000000
Shamir	3	0.33	0.07	30	0.6363636	0.4000000
Shamir	3	0.33	0.07	30	0.6363636	0.1000000
Shamir	3	0.33	0.07	30	0.6363636	1.0000000
Shamir	3	0.33	0.07	30	0.6363636	0.9090909
Shamir	3	0.33	0.07	30	0.6363636	0.2500000
Shamir	3	0.33	0.07	30	0.6363636	0.4000000
Shamir	3	0.10	0.12	52	0.9000000	0.8000000
Shamir	3	0.10	0.12	52	0.9000000	0.3000000
Shamir	3	0.10	0.12	52	0.9000000	0.4000000
Shamir	3	0.10	0.12	52	0.9000000	0.1000000
Shamir	3	0.10	0.12	52	0.9000000	1.0000000
Shamir	3	0.10	0.12	52	0.9000000	0.9090909
Shamir	3	0.10	0.12	52	0.9000000	0.2500000
Shamir	3	0.10	0.12	52	0.9000000	0.4000000
Shamir	3	0.10	0.12	52	0.2000000	0.8000000
Shamir	3	0.10	0.12	52	0.2000000	0.3000000
Shamir	3	0.10	0.12	52	0.2000000	0.4000000
Shamir	3	0.10	0.12	52	0.2000000	0.1000000
Shamir	3	0.10	0.12	52	0.2000000	1.0000000
Shamir	3	0.10	0.12	52	0.2000000	0.9090909
Shamir	3	0.10	0.12	52	0.2000000	0.2500000
Shamir	3	0.10	0.12	52	0.2000000	0.4000000
Shamir	3	0.10	0.12	52	0.7000000	0.8000000
Shamir	3	0.10	0.12	52	0.7000000	0.3000000
Shamir	3	0.10	0.12	52	0.7000000	0.4000000
Shamir	3	0.10	0.12	52	0.7000000	0.1000000
Shamir	3	0.10	0.12	52	0.7000000	1.0000000
Shamir	3	0.10	0.12	52	0.7000000	0.9090909
Shamir	3	0.10	0.12	52	0.7000000	0.2500000
Shamir	3	0.10	0.12	52	0.7000000	0.4000000
Shamir	3	0.10	0.12	52	0.6363636	0.8000000
Shamir	3	0.10	0.12	52	0.6363636	0.3000000
Shamir	3	0.10	0.12	52	0.6363636	0.4000000
Shamir	3	0.10	0.12	52	0.6363636	0.1000000
Shamir	3	0.10	0.12	52	0.6363636	1.0000000
Shamir	3	0.10	0.12	52	0.6363636	0.9090909
Shamir	3	0.10	0.12	52	0.6363636	0.2500000
Shamir	3	0.10	0.12	52	0.6363636	0.4000000

# Create correlation plots
if (nrow(correlation_data) > 0) {
  cat("\n**Correlation Analysis:**\n")
  
  # Plot 1: Field efficacy vs Lab viability at 1 µg/vial
  if (sum(!is.na(correlation_data$Corrected_Viability_1ug)) > 2) {
    p_corr_1ug <- ggplot(correlation_data, aes(x = Corrected_Viability_1ug, y = Apivar_effic_abbotCor)) +
      geom_point(aes(color = Yard), size = 3, alpha = 0.7) +
      geom_smooth(method = "lm", se = TRUE, color = "black", linewidth = 1) +
      labs(
        title = "Field Efficacy vs Lab Viability (1 µg/vial)",
        x = "Lab Viability (1 µg/vial)",
        y = "Field Efficacy (Apivar)",
        color = "Yard"
      ) +
      theme_minimal() +
      theme(
        plot.title = element_text(size = 14, face = "bold"),
        axis.title = element_text(size = 12, face = "bold"),
        axis.text = element_text(size = 10)
      )
    
    print(p_corr_1ug)
    
    # Calculate correlation coefficient
    cor_1ug <- cor(correlation_data$Corrected_Viability_1ug, correlation_data$Apivar_effic_abbotCor, use = "complete.obs")
    cat("Correlation coefficient (1 µg/vial):", round(cor_1ug, 3), "\n")
  }
  
  # Plot 2: Field efficacy vs Lab viability at 10 µg/vial
  if (sum(!is.na(correlation_data$Corrected_Viability_10ug)) > 2) {
    p_corr_10ug <- ggplot(correlation_data, aes(x = Corrected_Viability_10ug, y = Apivar_effic_abbotCor)) +
      geom_point(aes(color = Yard), size = 3, alpha = 0.7) +
      geom_smooth(method = "lm", se = TRUE, color = "black", linewidth = 1) +
      labs(
        title = "Field Efficacy vs Lab Viability (10 µg/vial)",
        x = "Lab Viability (10 µg/vial)",
        y = "Field Efficacy (Apivar)",
        color = "Yard"
      ) +
      theme_minimal() +
      theme(
        plot.title = element_text(size = 14, face = "bold"),
        axis.title = element_text(size = 12, face = "bold"),
        axis.text = element_text(size = 10)
      )
    
    print(p_corr_10ug)
    
    # Calculate correlation coefficient
    cor_10ug <- cor(correlation_data$Corrected_Viability_10ug, correlation_data$Apivar_effic_abbotCor, use = "complete.obs")
    cat("Correlation coefficient (10 µg/vial):", round(cor_10ug, 3), "\n")
  }
  
  # Summary statistics
  cat("\n**Summary Statistics:**\n")
  summary_stats <- correlation_data %>%
    summarise(
      n_records = n(),
      n_yards = n_distinct(Yard),
      mean_field_efficacy = round(mean(Apivar_effic_abbotCor, na.rm = TRUE), 3),
      mean_lab_viability_1ug = round(mean(Corrected_Viability_1ug, na.rm = TRUE), 3),
      mean_lab_viability_10ug = round(mean(Corrected_Viability_10ug, na.rm = TRUE), 3),
      .groups = 'drop'
    )
  
  if (knitr::is_html_output()) {
    html_tbl <- knitr::kable(
      summary_stats,
      format = "html",
      align = c('r','r','r','r','r')
    ) %>%
      kableExtra::kable_styling(
        full_width = FALSE,
        bootstrap_options = c("striped", "hover", "condensed")
      ) %>%
      kableExtra::row_spec(0, bold = TRUE)
    knitr::asis_output(html_tbl)
  } else {
    print(summary_stats)
  }
  
} else {
  cat("No overlapping data found between field and lab assays.\n")
  cat("This could be due to:\n")
  cat("- Different yard names between datasets\n")
  cat("- Different hive numbering systems\n")
  cat("- No lab data available for the doses tested in field trials\n")
}

## 
## **Correlation Analysis:**

## Correlation coefficient (1 µg/vial): 0

## Correlation coefficient (10 µg/vial): 0 
## 
## **Summary Statistics:**

n_records	n_yards	mean_field_efficacy	mean_lab_viability_1ug	mean_lab_viability_10ug
224	1	0.62	0.609	0.52