title: “BOXPLOT Script based on groups” author: “Upasna Srivastava” date: “2024-09-25” output: html_document —

# Create the data frame
data <- data.frame(
  Sample = c('BV2_1', 'BV2_2', 'BV2_3', 'BV2_LPS1', 'BV2_LPS2', 'BV2_LPS3', 
             'BV2_73_1', 'BV2_73_2', 'BV2_73_3', 'BV2_73_LPS_1', 'BV2_73_LPS_2', 
             'BV2_73_LPS_3', 'BV2_74_1', 'BV2_74_2', 'BV2_74_3', 'BV2_74_LPS_1', 
             'BV2_74_LPS_2', 'BV2_74_LPS_3'),
  Stat1 = c(15.59302, 16.67622, 16.94814, 17.03402, 17.32190, 18.34435, 22.44578, 
            18.40087, 18.97113, 18.82411, 22.40567, 23.43608, 19.70951, 19.22592, 
            19.24438, 22.95850, 23.26308, 22.88869),
  Stat3 = c(17.61255, 17.07630, 16.67930, 18.12450, 17.81749, 17.98019, 20.98712, 
            21.97632, 22.21805, 22.31064, 21.69203, 22.25930, 20.29302, 21.48824, 
            20.61366, 21.33336, 21.52830, 21.48058),
  Kcna3 = c(17.60650, 16.98960, 16.59115, 17.47375, 16.33725, 17.32704, 27.40778, 
            27.42072, 27.90793, 27.81759, 27.32015, 27.44740, 27.48709, 27.22643, 
            26.90194, 27.10410, 27.37716, 26.60006))
# Define groups
group1 <- c('BV2_1', 'BV2_2', 'BV2_3')
group2 <- c('BV2_LPS1', 'BV2_LPS2', 'BV2_LPS3')
group3 <- c('BV2_73_1', 'BV2_73_2', 'BV2_73_3')
group4 <- c('BV2_73_LPS_1', 'BV2_73_LPS_2', 'BV2_73_LPS_3')
group5 <- c('BV2_74_1', 'BV2_74_2', 'BV2_74_3')
group6 <- c('BV2_74_LPS_1', 'BV2_74_LPS_2', 'BV2_74_LPS_3')
# Add a 'Group' column to the data frame
data$Group <- ifelse(data$Sample %in% group1, 'Group 1', 
               ifelse(data$Sample %in% group2, 'Group 2', 
               ifelse(data$Sample %in% group3, 'Group 3', 
               ifelse(data$Sample %in% group4, 'Group 4', 
               ifelse(data$Sample %in% group5, 'Group 5', 
               ifelse(data$Sample %in% group6, 'Group 6', 'Other'))))))

# Melt the data for plotting
library(reshape2)
library(ggplot2)
melted_data <- melt(data, id.vars = c('Sample', 'Group'), 
                    variable.name = 'Gene', value.name = 'Expression')

# Create the boxplot
# Create the boxplot with darker colors
# Assuming the data is already melted as 'melted_data' and colors for the groups
# Create the boxplot with separate plots for each gene and additional styling
ggplot(melted_data, aes(x = Group, y = Expression, fill = Group)) +
    geom_boxplot() +
    theme_minimal() +
    labs(title = 'Expression of Stat1, Stat3, and Kcna3 by Group',
         x = 'Group', y = 'Expression') +
    scale_fill_manual(values = c('Group 1' = '#1f77b4',  # dark blue
                                 'Group 2' = '#2ca02c',  # dark green
                                 'Group 3' = '#d62728',  # dark red
                                 'Group 4' = '#ff7f0e',  # dark orange
                                 'Group 5' = '#9467bd',  # dark purple
                                 'Group 6' = '#7f7f7f')) + # dark gray
    facet_wrap(~ Gene, scales = 'fixed', ncol = 3) +  # Keep the same x and y scale, one column layout
    theme(panel.border = element_rect(colour = "black", fill=NA, size=1),  # Outer box
          panel.grid.major = element_blank(),  # Remove major grid lines
          panel.grid.minor = element_blank(),  # Remove minor grid lines
          plot.title = element_text(hjust = 0.5),  # Center the title
          strip.background = element_rect(fill = "white"),  # Background of facet labels
          strip.text = element_text(face = "bold"))  # Bold text for facet labels
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Create the data frame
data <- data.frame(
  Sample = c('BV2_1', 'BV2_2', 'BV2_3', 'BV2_LPS1', 'BV2_LPS2', 'BV2_LPS3', 
             'BV2_73_1', 'BV2_73_2', 'BV2_73_3', 'BV2_73_LPS_1', 'BV2_73_LPS_2', 
             'BV2_73_LPS_3', 'BV2_74_1', 'BV2_74_2', 'BV2_74_3', 'BV2_74_LPS_1', 
             'BV2_74_LPS_2', 'BV2_74_LPS_3'),
  Stat1 = c(15.59302, 16.67622, 16.94814, 17.03402, 17.32190, 18.34435, 22.44578, 
            18.40087, 18.97113, 18.82411, 22.40567, 23.43608, 19.70951, 19.22592, 
            19.24438, 22.95850, 23.26308, 22.88869),
  Stat3 = c(17.61255, 17.07630, 16.67930, 18.12450, 17.81749, 17.98019, 20.98712, 
            21.97632, 22.21805, 22.31064, 21.69203, 22.25930, 20.29302, 21.48824, 
            20.61366, 21.33336, 21.52830, 21.48058),
  Kcna3 = c(17.60650, 16.98960, 16.59115, 17.47375, 16.33725, 17.32704, 27.40778, 
            27.42072, 27.90793, 27.81759, 27.32015, 27.44740, 27.48709, 27.22643, 
            26.90194, 27.10410, 27.37716, 26.60006))
# Define groups
group1 <- c('BV2_1', 'BV2_2', 'BV2_3')
group2 <- c('BV2_LPS1', 'BV2_LPS2', 'BV2_LPS3')
group3 <- c('BV2_73_1', 'BV2_73_2', 'BV2_73_3')
group4 <- c('BV2_73_LPS_1', 'BV2_73_LPS_2', 'BV2_73_LPS_3')
group5 <- c('BV2_74_1', 'BV2_74_2', 'BV2_74_3')
group6 <- c('BV2_74_LPS_1', 'BV2_74_LPS_2', 'BV2_74_LPS_3')
# Add a 'Group' column to the data frame
data$Group <- ifelse(data$Sample %in% group1, 'Group 1', 
               ifelse(data$Sample %in% group2, 'Group 2', 
               ifelse(data$Sample %in% group3, 'Group 3', 
               ifelse(data$Sample %in% group4, 'Group 4', 
               ifelse(data$Sample %in% group5, 'Group 5', 
               ifelse(data$Sample %in% group6, 'Group 6', 'Other'))))))

# Melt the data for plotting
library(reshape2)
library(ggplot2)
melted_data <- melt(data, id.vars = c('Sample', 'Group'), 
                    variable.name = 'Gene', value.name = 'Expression')

# Create the boxplot
# Create the boxplot with darker colors
# Assuming the data is already melted as 'melted_data' and colors for the groups
ggplot(melted_data, aes(x = Group, y = Expression, fill = Group)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 1.5) + # Boxplot settings
  facet_wrap(~ Gene, scales = 'fixed', ncol = 3) +  # Three boxplots in one row
  theme_classic() +  # Set classic theme to get the outer box and remove background grid
  labs(title = '', x = '', y = 'Expression') +  # Adjust axis labels and title
  scale_fill_manual(values = c('BV2' = '#1f77b4',  # dark blue
                               'BV2-LPS' = '#2ca02c',  # dark green
                               'SR73' = '#d62728',  # dark red
                               'SR73-LPS' = '#ff7f0e',  # dark orange
                               'SR74' = '#9467bd',  # dark purple
                               'SR74-LPS' = '#7f7f7f')) +  # dark gray
  theme(strip.text = element_text(face = "bold", size = 12),  # Bold gene names
        axis.text.x = element_text(angle = 45, hjust = 1),  # Rotate x-axis labels for readability
        axis.title.y = element_text(size = 14),  # Y-axis label size
        axis.text = element_text(size = 12),  # Axis tick labels size
        legend.position = "right",  # Place legend on the right
        panel.border = element_rect(colour = "black", fill=NA, size=1))  # Add outer box around each plot
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.