knitr::opts_chunk$set(echo = TRUE)

library(dplyr)
library(reshape2)
library(ggplot2)
library(plyr)
library(readr)
library(ggthemes)

Tranposed, all commas, spaces and semi colons removed in notepad++. INSERT The metab data catagories in the to first row

#Import Annotated Files and Cleaning up data
Data_Curated_Annotations_transpose <- read.csv(file="Full_Data_Raw_Transpose_Name.csv",
                                               sep = ";",
                                               header = TRUE)
metab_data <- Data_Curated_Annotations_transpose

metab_data <- metab_data %>% 
  dplyr::mutate_each_(., funs = funs(as.factor), 
  vars = c("ionActive", "Extraction", "ET","Mols","Mouse", 
           "Diet", "Sex","Age", "Age.Cohort")) 

Add indexes for replicaites using the mod function on the sample index

metab_data <- merge(metab_data %>% mutate(replicate  = paste("rep_", rep(c(1,2), times = dim(metab_data)[1]/2), sep = "")) %>% select(ionActive, replicate), metab_data, by = "ionActive")
#you need to have unqiue values in the bz mere
#can be done by selecting multiple columns

Order the Metabolite Data Frame by Cohorts and Get Group wise means for later data anaylsis

metab_data$row_mean <- rowMeans(metab_data[,12:ncol(metab_data)])
metab_data_ordered <-metab_data[order(metab_data$row_mean),]

metab_strain_mean <- aggregate(metab_data[,12:ncol(metab_data)], list(metab_data$Mouse), mean)
melt_metab_strain_mean <- melt(metab_strain_mean)
cast_metab_strain_mean <- dcast(melt_metab_strain_mean,formula =  variable ~ Group.1)


melt_metab_strain_mean <- arrange(melt_metab_strain_mean, value,Group.1)
strain_order <- as.character(unique(melt_metab_strain_mean$Group.1))

melt_metab_strain_mean$Group.1 <- factor(melt_metab_strain_mean$Group.1,
                                         levels = strain_order,
                                         ordered = TRUE)


Strain_Boxplot <- ggplot(arrange(melt_metab_strain_mean,value,Group.1), 
                          aes(y = melt_metab_strain_mean$value,
                              x = melt_metab_strain_mean$Group.1)) +
                          scale_x_discrete() +
                          scale_y_log10(limits = c(1e2,1e8)) +
                          theme(axis.text.x = element_text(angle = 90, hjust = 1))

Strain_Boxplot + geom_boxplot() 

metab_Diet_mean <- aggregate(metab_data[,12:ncol(metab_data)], list(metab_data$Diet), mean)
melt_metab_Diet_mean <- melt(metab_Diet_mean, id.vars = "Group.1")
melt_metab_Diet_mean <- melt_metab_Diet_mean %>% filter(Group.1 %in% c("CD", "HF"))

cast_metab_Diet_mean <- dcast(melt_metab_Diet_mean,formula =  variable ~ Group.1)
cast_metab_Diet_mean$Differance <- cast_metab_Diet_mean[2] - cast_metab_Diet_mean[3]
cast_metab_Diet_mean$Fold.Change.log2 <- log2(abs(cast_metab_Diet_mean$Differance))

View(cast_metab_Diet_mean)
melted_metab_data <- melt(metab_data[-c(1:5,9,10)], 
                     id.vars = c("Mouse","Diet","Sex"))

metabolite_means <- aggregate(melted_metab_data, 
                    by = list(melted_metab_data$variable), mean)

metabolite_means <- select(metabolite_means, one_of(c("Group.1","value")))
colnames(metabolite_means) <- c("variable", "value")
metabolite_means <- arrange(metabolite_means, value)

metabolite_order <- as.character(metabolite_means$variable)
melted_metab_data$variable <- factor(melted_metab_data$variable,
                                     levels = metabolite_order,
                                     ordered = TRUE)
melted_metab_data <- arrange(melted_metab_data,variable)
All_Metabolites_Plot <- ggplot(melted_metab_data, aes(y = melted_metab_data$value, 
                                  x = melted_metab_data$variable, 
                                  colour = factor(melted_metab_data$Mouse))) +
                                  scale_x_discrete("Metabolites") +
                                  scale_y_log10("Log Metabolite Intensities a.u")
All_Metabolites_Plot + geom_point()

#This Function takes any metabolite generated by the command
head(unique(melted_metab_data$variable))
## [1] DG.14.0.20.1.11Z..0.0. Arachidonyl.carnitine  Deoxyhypusine         
## [4] Cysteine.S.sulfate     LysoPC.22.2.13Z.16Z..  Linoleoyl.ethanolamide
## 367 Levels: DG.14.0.20.1.11Z..0.0. < ... < Taurocholic.acid
# And generateds a ranked boxplot with increasing means with respect to strain

Single.Metabolite.Boxplot <- function(Query.Metabolite) {
Query.Metabolite.melted.metab.data <- melted_metab_data %>%
     filter(variable == as.character(Query.Metabolite)) %>%
     filter(Diet %in% c("CD", "HF"))

View(Query.Metabolite.melted.metab.data)

Query.Metabolite.mean <- aggregate(data = Query.Metabolite.melted.metab.data,
                                   value ~ Mouse * Diet,
                                   FUN=median)
View(Query.Metabolite.mean)

Query.Metabolite.mean <- Query.Metabolite.mean
Query.Metabolite.mean <- arrange(Query.Metabolite.mean, value)
Query.Metabolite.strain.order <- as.character(unique(Query.Metabolite.mean$Mouse))

Query.Metabolite.melted.metab.data$Mouse <- factor(Query.Metabolite.melted.metab.data$Mouse,
                                         levels =Query.Metabolite.strain.order,
                                         ordered = TRUE)


Query.Metabolite.BoxPlot <- ggplot(Query.Metabolite.melted.metab.data, 
                                  aes(y = Query.Metabolite.melted.metab.data$value, 
                                  x = Query.Metabolite.melted.metab.data$Mouse),
                                  colour = Query.Metabolite.melted.metab.data$Diet) +
                                  scale_x_discrete() +
                                  scale_y_log10(limits = c(100, 100000)) +
                                  theme_fivethirtyeight() +
                                  theme(axis.text.x=element_text(angle = 90, hjust = 0))

Box.plot <-Query.Metabolite.BoxPlot + geom_boxplot(outlier.colour = "red", outlier.size = 1)
Box.plot + labs(list(title = paste("Acsending Mean Boxplot of", as.character(Query.Metabolite)))) +
           ylab(paste(as.character(Query.Metabolite),"Log Metabolite Intensities a.u")) +
           xlab("BXD Mouse Strains") 
Box.plot
}


Single.Metabolite.Boxplot(as.character(droplevels.factor(unique(melted_metab_data$variable)[1])))

Single.Metabolite.Boxplot(as.character(droplevels.factor(unique(melted_metab_data$variable)[50])))

Single.Metabolite.Boxplot(as.character(droplevels.factor(unique(melted_metab_data$variable)[100])))

Single.Metabolite.Boxplot(as.character(droplevels.factor(unique(melted_metab_data$variable)[150])))

Single.Metabolite.Boxplot(as.character(droplevels.factor(unique(melted_metab_data$variable)[200])))

Single.Metabolite.Boxplot(as.character(droplevels.factor(unique(melted_metab_data$variable)[250])))

Single.Metabolite.Boxplot(as.character(droplevels.factor(unique(melted_metab_data$variable)[300])))

Query.Metabolite.ScatterPlot <- ggplot(Query.Metabolite.melted.metab.data, aes(y = value, x = Mouse, colour = Diet)) + theme_hc() + geom_boxplot() + scale_y_log10() + scale_x_discrete() + labs(list(title = paste(“Acsending Mean Boxplot of”, as.character(Query.Metabolite)))) + ylab(paste(as.character(Query.Metabolite),“Log Metabolite Intensities a.u”)) + xlab(“BXD Mouse Strains”) + theme(axis.text.x=element_text(angle = 90, hjust = 0)) Query.Metabolite.ScatterPlot