# Load necessary libraries
pacman::p_load(pacman, tidyverse, reshape2, gridExtra)  

# Define the roles of each bacterial genus 
bacterial_roles <- data.frame(
  Bacterial_Genera = c("Methylobacterium", "Hymenobacter", "Spirosoma", "Zymomonas", 
                       "Bdellovibrio", "Kineococcus", "Deinococcus", "Amnibacterium", 
                       "Paenibacillus", "Sorangium", "Rickettsiaceae sp.", "Oligoflexales sp.", 
                       "Modestobacter", "Cohnella", "Chitinophaga", "Burkholderia", 
                       "Pantoea", "Erwinia", "Acinetobacter", "Chryseobacterium", 
                       "Reyranella", "Holophagaceae sp.", "Caulobacter", "Moraxella", 
                       "Arcicella", "Rhizobacter", "Micrococcus", "Rhodospirillaceae sp.", 
                       "Enterobacteriaceae sp.", "Mycobacterium", "Escherichia-Shigella", 
                       "Polynucleobacter", "Sphingomonadaceae sp.", "Rhodococcus", "Anaerococcus",
                       "Brevundimonas", "Rickettsiales sp.", "Flavobacterium", "Nocardia",
                       "Kineosporia", "Nakamurella", "Microbacteriaceae sp.", "Armatimonadetes sp.", 
                       "Pontibacter", "Mucilaginibacter", "Methylotenera", "Niabella", "Pseudoclavibacter", 
                       "Nocardioides", "Sphingobacteriaceae sp.", "Pedobacter", "Devosia", "Chloroplast"),
  Role = c("Good", "Neutral", "Neutral", "Good", "Good", "Neutral", "Good", "Neutral",
           "Good", "Neutral", "Bad", "Neutral", "Neutral", "Neutral", "Good", "Mixed", 
           "Mixed", "Bad", "Bad", "Bad", "Bad", "Neutral", "Neutral", "Neutral", "Bad", 
           "Neutral", "Good", "Neutral", "Good", "Mixed", "Bad", "Bad", "Neutral", 
           "Neutral", "Bad", "Bad", "Bad", "Bad", "Bad", "Neutral", "Neutral", "Neutral",
           "Neutral", "Neutral", "Neutral", "Neutral", "Neutral", "Neutral", "Neutral", 
           "Neutral", "Neutral", "Neutral", "Neutral")
)

# Load the CSV file
apples <- read_csv("apples.csv", col_names = TRUE)
## Rows: 34 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Bacterial_Genera, Bacterial_Orders
## dbl (4): Organic_Genera, Conventional_Genera, Organic_Orders, Conventional_O...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Heatmap Plot
# Reshape the data for heatmap
heatmap_data <- apples %>%
  pivot_longer(cols = c("Organic_Genera", "Conventional_Genera"), names_to = "Management", values_to = "Abundance")

ggplot(heatmap_data, aes(x = Bacterial_Genera, y = Management, fill = Abundance)) +
  geom_tile() +
  scale_fill_gradient(low = "#ffff99", high = "#cc0000") +
  labs(x = "Bacterial Genera", y = "Management Type", fill = "Relative Abundance",
       title = "Heatmap of Relative Abundance in Organic vs Conventional Apples") +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))

# Merge the roles into the apples dataset
apples <- apples %>%
  left_join(bacterial_roles, by = "Bacterial_Genera")

# Function to create bar plots
create_bar_plot <- function(data, role) {
  
  # Filter data for the specific role
  data_filtered <- data %>% filter(Role == role) 
  
  # Remove rows with missing values in Organic_Genera or Conventional_Genera
  data_filtered <- data_filtered %>% 
    filter(!is.na(Organic_Genera), !is.na(Conventional_Genera)) 
  
  # Melt the dataframe to long format for plotting
  genera_data <- melt(data_filtered[, c("Bacterial_Genera", "Organic_Genera", "Conventional_Genera")],
                      id.vars = "Bacterial_Genera")
  
  # Create the bar chart with adjusted y-axis limits
  ggplot(genera_data, aes(x = Bacterial_Genera, y = value, fill = variable)) +
    geom_bar(stat = "identity", position = "dodge") +
    scale_y_continuous(limits = c(0, NA)) + # Adjust y-axis to start from 0 and automatically determine the upper limit
    theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1, margin = margin(t = 10))) +
    labs(x = "Bacterial Genera", y = "Relative Abundance", fill = "Management Type",
         title = paste("Relative Abundance of", role, "Bacterial Genera in Organic vs Conventional Apples"))
}

# Create plots for each role
good_plot <- create_bar_plot(apples, "Good")
bad_plot <- create_bar_plot(apples, "Bad")
neutral_plot <- create_bar_plot(apples, "Neutral")
mixed_plot <- create_bar_plot(apples, "Mixed")

# Combine the four plots
grid.arrange(good_plot, bad_plot, neutral_plot, mixed_plot, nrow = 2, ncol = 2)