#Boxplot of total fertilized by Cross ID
#Numbers are based by each individuals of each group by total fertilized (whole #)

#load data set
library(readr)
larvae_total_fert <- read_csv("~/Desktop/Larvae hybrid counts /larvae total fert.csv")
## Rows: 104 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Cross ID
## dbl (1): Total Fertilized
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Load necessary libraries
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(multcompView)

# Extract the Cross ID group for labeling
larvae_total_fert <- larvae_total_fert %>%
  mutate(Cross_Group = sub("-.*", "", `Cross ID`)) # Extracts only the letter part before the hyphen

# Perform one-way ANOVA
anova_result <- aov(`Total Fertilized` ~ Cross_Group, data = larvae_total_fert)

# Display the summary of the ANOVA test
summary(anova_result)
##             Df    Sum Sq   Mean Sq F value  Pr(>F)   
## Cross_Group  8 1.346e+10 1.682e+09   2.932 0.00585 **
## Residuals   91 5.222e+10 5.738e+08                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 4 observations deleted due to missingness
# Plot residuals to check assumptions of ANOVA
par(mfrow = c(2, 2))  # Create 2x2 grid for diagnostic plots
plot(anova_result)

# Perform Tukey's HSD post-hoc test for group comparisons
tukey_result <- TukeyHSD(anova_result)
print(tukey_result)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = `Total Fertilized` ~ Cross_Group, data = larvae_total_fert)
## 
## $Cross_Group
##             diff        lwr       upr     p adj
## B-A  -4074.00000 -39957.310 31809.310 0.9999910
## C-A -20319.08333 -53884.846 13246.679 0.5994830
## D-A -27901.22222 -63784.532  7982.088 0.2601032
## E-A  16214.00000 -19669.310 52097.310 0.8808363
## G-A   3693.33333 -29872.429 37259.096 0.9999930
## H-A -10658.41667 -44224.179 22907.346 0.9841142
## J-A     20.41667 -33545.346 33586.179 1.0000000
## K-A  -3220.20833 -34936.873 28496.456 0.9999962
## C-B -16245.08333 -49810.846 17320.679 0.8345385
## D-B -23827.22222 -59710.532 12056.088 0.4731515
## E-B  20288.00000 -15595.310 56171.310 0.6842208
## G-B   7767.33333 -25798.429 41333.096 0.9981246
## H-B  -6584.41667 -40150.179 26981.346 0.9994308
## J-B   4094.41667 -29471.346 37660.179 0.9999844
## K-B    853.79167 -30862.873 32570.456 1.0000000
## D-C  -7582.13889 -41147.902 25983.624 0.9984202
## E-C  36533.08333   2967.321 70098.846 0.0223949
## G-C  24012.41667  -7063.441 55088.274 0.2678113
## H-C   9660.66667 -21415.191 40736.524 0.9861259
## J-C  20339.50000 -10736.358 51415.358 0.4933399
## K-C  17098.87500 -11969.928 46167.678 0.6362630
## E-D  44115.22222   8231.912 79998.532 0.0053933
## G-D  31594.55556  -1971.207 65160.318 0.0816456
## H-D  17242.80556 -16322.957 50808.568 0.7844811
## J-D  27921.63889  -5644.124 61487.402 0.1836860
## K-D  24681.01389  -7035.651 56397.678 0.2591322
## G-E -12520.66667 -46086.429 21045.096 0.9576572
## H-E -26872.41667 -60438.179  6693.346 0.2254623
## J-E -16193.58333 -49759.346 17372.179 0.8369414
## K-E -19434.20833 -51150.873 12282.456 0.5834946
## H-G -14351.75000 -45427.608 16724.108 0.8673771
## J-G  -3672.91667 -34748.774 27402.941 0.9999878
## K-G  -6913.54167 -35982.345 22155.262 0.9977243
## J-H  10678.83333 -20397.024 41754.691 0.9740190
## K-H   7438.20833 -21630.595 36507.012 0.9962114
## K-J  -3240.62500 -32309.428 25828.178 0.9999922
# Visualize Tukey's HSD results
plot(tukey_result, las = 2)

# Filter the dataset to include only specified Cross ID groups
selected_cross_groups <- c("A", "B", "C", "D", "E", "G", "H", "J", "K")
filtered_data <- larvae_total_fert %>%
  filter(Cross_Group %in% selected_cross_groups)

# Create the boxplot 
ggplot(filtered_data, aes(x = Cross_Group, y = `Total Fertilized`, fill = Cross_Group)) +
  geom_boxplot(color = "black", outlier.color = "red", outlier.shape = 21) +  
  labs(
    x = "Cross ID",
    y = "Fertilization Rates",
    title = "Fertilization Rates by Cross Group"
  ) +
  theme_minimal() + 
  theme(
    plot.title = element_text(hjust = 0.5, vjust = -1, size = 14, face = "bold"),  
    panel.border = element_rect(color = "black", fill = NA, size = 1),  
    axis.title = element_text(size = 12), 
    axis.text = element_text(size = 10),  
    legend.position = "right"  
  ) +
  scale_fill_brewer(palette = "Set3")  
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.