title: “Metabolomics data analysis script for plasma samples” author: “Marat Kasakin” date: “2022-06-12” output: html_document — ## Install required packages
read.csv("~/20220406_MiceSPF_plasma.csv", stringsAsFactors = FALSE, skip = 0, header = TRUE) -> tf.polar.m
dim(tf.polar.m)
## [1] 40641 67
40641 67
read.xlsx('/Users/marat.kasakin/Desktop/CRC project/Experimental/Mina Collaboration/2021_03_Diettary mouse study SPF/GC-MS_LCMS/20210226_3TMVJ_MT001_PLASMA_results.xlsx', sheet = 4, startRow=1, colNames = TRUE, rowNames = FALSE, detectDates = FALSE, skipEmptyRows = TRUE,
skipEmptyCols = TRUE, rows = NULL, cols = NULL, check.names = FALSE,
namedRegion = NULL, na.strings = "NA", fillMergedCells = FALSE) -> mouse_data1
dim(mouse_data1) # 92 17
## [1] 92 17
92 17
read.xlsx('/Users/marat.kasakin/Desktop/CRC project/Experimental/Mina Collaboration/2021_03_Diettary mouse study SPF/GC-MS_LCMS/annot.data.spf.xlsx', sheet = 1, startRow=1, colNames = TRUE, rowNames = FALSE, detectDates = FALSE, skipEmptyRows = TRUE,
skipEmptyCols = TRUE, rows = NULL, cols = NULL, check.names = FALSE,
namedRegion = NULL, na.strings = "NA", fillMergedCells = FALSE) -> anot.data1
dim(anot.data1) # 174 metabolites in the rows in annotated data 5 columns
## [1] 174 5
colnames(anot.data1) # [1] "compound"
## [1] "compound"
## [2] "annotation.level"
## [3] "ms2.(yes.or.no)"
## [4] "Requires.additional.verification.with.std,.yes.or.no"
## [5] "X5"
# [2] "annotation.level"
# [3] "ms2.(yes.or.no)"
# [4] "Requires.additional.verification.with.std,.yes.or.no"
# [5] "X5"
tf.polar.m.2 <- tf.polar.m.1 %>% select(Filename, contains(filter(anot.data1, annotation.level %in% 1:3 & `ms2.(yes.or.no)`==1)[, 1]))
tf.polar.m.2[, 1] # remove first 3 rows: standard mix, pooled sample and spiked pooled sample
## # A tibble: 19 × 1
## Filename
## <chr>
## 1 LC-EMunt_mix_50uM_01
## 2 MT001_LC-EMunt_Plasma_Pool_01
## 3 MT001_LC-EMunt_Plasma_Pool_spiked
## 4 MT001_LC-EMunt_Plasma01_Std-Diet_1
## 5 MT001_LC-EMunt_Plasma02_Std-Diet_1
## 6 MT001_LC-EMunt_Plasma03_Std-Diet_1
## 7 MT001_LC-EMunt_Plasma04_Std-Diet_1
## 8 MT001_LC-EMunt_Plasma11_HighFat_LowCarb_1
## 9 MT001_LC-EMunt_Plasma12_HighFat_LowCarb_1
## 10 MT001_LC-EMunt_Plasma13_HighFat_LowCarb_1
## 11 MT001_LC-EMunt_Plasma16_Std-Diet_1
## 12 MT001_LC-EMunt_Plasma17_Std-Diet_1
## 13 MT001_LC-EMunt_Plasma18_Std-Diet_1
## 14 MT001_LC-EMunt_Plasma19_Std-Diet_1
## 15 MT001_LC-EMunt_Plasma20_Std-Diet_1
## 16 MT001_LC-EMunt_Plasma21_HighFat_LowCarb_1
## 17 MT001_LC-EMunt_Plasma22_HighFat_LowCarb_1
## 18 MT001_LC-EMunt_Plasma23_HighFat_LowCarb_1
## 19 MT001_LC-EMunt_Plasma24_HighFat_LowCarb_1
tf.polar.m.2[-1:-3, ] -> tf.polar.m.3
tf.polar.m.4 <- tf.polar.m.3 %>% add_column(c(rep("Standard_diet", 4), rep("HighFat_LowCarb_diet", 3), rep("Standard_diet", 5), rep("HighFat_LowCarb_diet", 4)), .before = "Filename")
tf.polar.m.5 <- tf.polar.m.4 %>% rename(Sample.Type = colnames(tf.polar.m.4)[1], Sample.Name = colnames(tf.polar.m.4)[2])
dim(tf.polar.m.5) # 16 129
## [1] 16 129
tf.polar.m.6 <- tf.polar.m.5 %>% select_if(~ !any(is.na(.)))
dim(tf.polar.m.6) # 16 89
## [1] 16 89
tf.polar.m.5-> tf.polar.m.5a
## NA
tf.polar.m.5a[, -which(colMeans(is.na(tf.polar.m.5a)) > 0.20)] -> tf.polar.m.5b # removed variables with more that 20% of NAs
dim(tf.polar.m.5b) # 16 100
## [1] 16 100
rownames(tf.polar.m.5b) <- tf.polar.m.5b$Sample.Name
## Warning: Setting row names on a tibble is deprecated.
as.data.frame(t(tf.polar.m.5b[, 3:ncol(tf.polar.m.5b)])) -> tf.polar.m.7
names(tf.polar.m.7) <- tf.polar.m.5b$Sample.Name
rownames(tf.polar.m.7) -> Metabolites
colSums(tf.polar.m.7, na.rm = T) -> Metabolite.sums
mean(Metabolite.sums) -> mean.Sums
Metabolite.sums/mean.Sums -> metabolite.sums.rel
mapply(`/`, tf.polar.m.7, metabolite.sums.rel) -> tf.polar.m.7a
as.data.frame(tf.polar.m.7a) -> tf.polar.m.8
rownames(tf.polar.m.8) <- Metabolites
dim(tf.polar.m.8) # 98 16
## [1] 98 16
as.data.frame(t(tf.polar.m.8)) -> tf.polar.m.9
dim(tf.polar.m.9) # 16 98
## [1] 16 98
cbind(tf.polar.m.5[, 1:2], tf.polar.m.9) -> tf.polar.m.10
tf.polar.m.10[, 1:2] # ok
## Sample.Type
## MT001_LC-EMunt_Plasma01_Std-Diet_1 Standard_diet
## MT001_LC-EMunt_Plasma02_Std-Diet_1 Standard_diet
## MT001_LC-EMunt_Plasma03_Std-Diet_1 Standard_diet
## MT001_LC-EMunt_Plasma04_Std-Diet_1 Standard_diet
## MT001_LC-EMunt_Plasma11_HighFat_LowCarb_1 HighFat_LowCarb_diet
## MT001_LC-EMunt_Plasma12_HighFat_LowCarb_1 HighFat_LowCarb_diet
## MT001_LC-EMunt_Plasma13_HighFat_LowCarb_1 HighFat_LowCarb_diet
## MT001_LC-EMunt_Plasma16_Std-Diet_1 Standard_diet
## MT001_LC-EMunt_Plasma17_Std-Diet_1 Standard_diet
## MT001_LC-EMunt_Plasma18_Std-Diet_1 Standard_diet
## MT001_LC-EMunt_Plasma19_Std-Diet_1 Standard_diet
## MT001_LC-EMunt_Plasma20_Std-Diet_1 Standard_diet
## MT001_LC-EMunt_Plasma21_HighFat_LowCarb_1 HighFat_LowCarb_diet
## MT001_LC-EMunt_Plasma22_HighFat_LowCarb_1 HighFat_LowCarb_diet
## MT001_LC-EMunt_Plasma23_HighFat_LowCarb_1 HighFat_LowCarb_diet
## MT001_LC-EMunt_Plasma24_HighFat_LowCarb_1 HighFat_LowCarb_diet
## Sample.Name
## MT001_LC-EMunt_Plasma01_Std-Diet_1 MT001_LC-EMunt_Plasma01_Std-Diet_1
## MT001_LC-EMunt_Plasma02_Std-Diet_1 MT001_LC-EMunt_Plasma02_Std-Diet_1
## MT001_LC-EMunt_Plasma03_Std-Diet_1 MT001_LC-EMunt_Plasma03_Std-Diet_1
## MT001_LC-EMunt_Plasma04_Std-Diet_1 MT001_LC-EMunt_Plasma04_Std-Diet_1
## MT001_LC-EMunt_Plasma11_HighFat_LowCarb_1 MT001_LC-EMunt_Plasma11_HighFat_LowCarb_1
## MT001_LC-EMunt_Plasma12_HighFat_LowCarb_1 MT001_LC-EMunt_Plasma12_HighFat_LowCarb_1
## MT001_LC-EMunt_Plasma13_HighFat_LowCarb_1 MT001_LC-EMunt_Plasma13_HighFat_LowCarb_1
## MT001_LC-EMunt_Plasma16_Std-Diet_1 MT001_LC-EMunt_Plasma16_Std-Diet_1
## MT001_LC-EMunt_Plasma17_Std-Diet_1 MT001_LC-EMunt_Plasma17_Std-Diet_1
## MT001_LC-EMunt_Plasma18_Std-Diet_1 MT001_LC-EMunt_Plasma18_Std-Diet_1
## MT001_LC-EMunt_Plasma19_Std-Diet_1 MT001_LC-EMunt_Plasma19_Std-Diet_1
## MT001_LC-EMunt_Plasma20_Std-Diet_1 MT001_LC-EMunt_Plasma20_Std-Diet_1
## MT001_LC-EMunt_Plasma21_HighFat_LowCarb_1 MT001_LC-EMunt_Plasma21_HighFat_LowCarb_1
## MT001_LC-EMunt_Plasma22_HighFat_LowCarb_1 MT001_LC-EMunt_Plasma22_HighFat_LowCarb_1
## MT001_LC-EMunt_Plasma23_HighFat_LowCarb_1 MT001_LC-EMunt_Plasma23_HighFat_LowCarb_1
## MT001_LC-EMunt_Plasma24_HighFat_LowCarb_1 MT001_LC-EMunt_Plasma24_HighFat_LowCarb_1
tf.polar.m.10$Sample.Type -> Sample.Type
c("Fructose", "Galactose", "Glucose 1-phosphate", "Glucose-6-phosphate", "Isocitric acid", "Isoleucine", "Mannose") -> excl.list.lc # this data will be taken from GCMS data
tf.polar.m.10[, -which(colnames(tf.polar.m.10) %in% excl.list.lc)] -> tf.polar.m.12
dim(tf.polar.m.12) # 16 93
## [1] 16 93
t_test <-function(x) {
t.test(x~Sample.Type, data=tf.polar.m.10, na.rm=T)
}
apply(tf.polar.m.12[, -1:-2], 2, t_test) -> t.test.data
as.data.frame(t(sapply(t.test.data, '[', c("statistic","p.value")))) -> ttest_stat
filter(tf.polar.m.12, Sample.Type == "Standard_diet") -> Std.m
filter(tf.polar.m.12, Sample.Type == "HighFat_LowCarb_diet") -> HighF.m
apply(Std.m[, -1:-2], 2, mean, na.rm =T) -> mean.std
apply(HighF.m[ -1:-2], 2, mean, na.rm =T) -> mean.hf
as.data.frame(cbind(mean.std, mean.hf)) -> means.SPF
transform(means.SPF, fold.changes = mean.std / mean.hf) -> fold.changes
ttest_stat[order(as.data.frame(ttest_stat$p.value), decreasing = F), ] -> ttest.res
## Warning in xtfrm.data.frame(x): cannot xtfrm data frames
p.adjust(ttest.res$p.value, method = "fdr") -> p.fdr
cbind(rownames(ttest.res), ttest.res, p.fdr) -> ttest.stat
merge(ttest.stat, fold.changes, by="row.names") -> ttest.stat.1
ttest.stat.1[order(as.data.frame(ttest.stat.1$p.value), decreasing = F), ] -> ttest.stat.2
## Warning in xtfrm.data.frame(x): cannot xtfrm data frames
ttest.stat.2[, c(1, 3:8)] -> ttest.stat.3
names(ttest.stat.3) <- c("Metabolites", "statistic", "p.value", "p.fdr", "mean.StdDiet", "mean.HighFat.LowCarbDiet", "Fold.changes")
rownames(ttest.stat.3) <- ttest.stat.3$Metabolites
dim(ttest.stat.3) #91 7
## [1] 91 7
write.xlsx(ttest.stat.3, "~/ttest-SPF_tr.xlsx")
rownames(mouse_data1) <- mouse_data1$Metabolite # Set rownames as metabolites
as.data.frame(t(mouse_data1[, -1])) -> mouse_data2
dim(mouse_data2) # 16 observations and 92 features
## [1] 16 92
mouse_data2[, -grep(c('\\JP_|IS_|No match|No direct'), colnames(mouse_data2))] -> mouse_data3
dim(mouse_data3) # 16 observations and 37 metabolites
## [1] 16 37
read.xlsx('/Users/marat.kasakin/Desktop/CRC project/Experimental/Mina Collaboration/2021_03_Diettary mouse study SPF/GC-MS_LCMS/Sample_condition_2.xlsx', sheet = 1, startRow=1, colNames = TRUE, rowNames = FALSE, detectDates = FALSE, skipEmptyRows = TRUE,
skipEmptyCols = TRUE, rows = NULL, cols = NULL, check.names = FALSE,
namedRegion = NULL, na.strings = "NA", fillMergedCells = FALSE) -> sample_condition
rownames(sample_condition) <- sample_condition$Sample
merge(sample_condition, mouse_data3, by = "row.names") -> mouse_data4
rownames(mouse_data4) <- mouse_data4$Row.names
mouse_data4[, -c(1:2)] -> mouse_data5
dim(mouse_data5) # 16 38
## [1] 16 38
colnames(mouse_data5) # GC-MS data related to metabolite derivatives with TMS, some has duplicates in GC-MS data, some metabolites has duplicates in LC-MS data
## [1] "Diet" "Lactic_acid_2TMS"
## [3] "Glycine_2TMS" "Glycolic_acid_2TMS"
## [5] "Alanine_2TMS" "2-Hydroxypyridine_1TMS"
## [7] "Pyruvic_acid_1MEOX_1TMS" "2-Hydroxybutyric_acid_2TMS"
## [9] "3-Hydroxybutyric_acid_2TMS" "Carbonic_acid_1MeOX_2TMS"
## [11] "Oxalic_acid_2TMS" "Valine_2TMS"
## [13] "Glycerol_3TMS" "Leucine_2TMS"
## [15] "Isoleucine_2TMS" "Glycine_3TMS"
## [17] "Phosphoric_acid_3TMS" "Glyceric_acid_3TMS"
## [19] "Serine_3TMS" "Succinic_acid_2TMS"
## [21] "Threonine_3TMS" "Fumaric_acid_2TMS"
## [23] "Malic_acid_3TMS" "Erythronic_acid_4TMS"
## [25] "Threonic_acid_4TMS" "Methionine_2TMS"
## [27] "Lyxose_1MeOX_4TMS_BP" "Pyroglutamic_acid_2TMS"
## [29] "2-oxo-glutaric_acid_1MeOX_2TMS" "1,5-Anhydro-D-glucitol_4TMS"
## [31] "Fructose_1MEOX_5TMS_MP" "Mannose_1MEOX_5TMS_MP"
## [33] "Fructose_1MEOX_5TMS_BP" "Citric_acid_4TMS"
## [35] "Lysine_4TMS" "Gluconic_acid_6TMS"
## [37] "Inositol_6TMS" "Hexadecanoic_acid_1TMS"
t_test <-function(x) {
t.test(x~Diet, data=mouse_data5)
}
apply(mouse_data5[, -1], 2, t_test) -> t.test.data
as.data.frame(t(sapply(t.test.data, '[', c("statistic","p.value")))) -> ttest_stat.gc
filter(mouse_data5, Diet == "high fat low carb diet") -> GC.HighF.m
filter(mouse_data5, Diet == "standard diet") -> GC.Std.m
apply(GC.Std.m[, -1], 2, mean, na.rm =T) -> mean.std.gc
apply(GC.HighF.m[, -1], 2, mean, na.rm =T) -> mean.hf.gc
as.data.frame(cbind(mean.std.gc, mean.hf.gc)) -> means.gc.SPF
transform(means.gc.SPF, fold.changes = mean.std.gc / means.gc.SPF) -> fold.changes.gc
p.adjust(ttest_stat.gc$p.value, method = "fdr") -> p.fdr
cbind(ttest_stat.gc, p.fdr) -> ttest_stat.gc.1
merge(ttest_stat.gc.1, fold.changes.gc[, -3], by="row.names") -> ttest.stat.1.gc
ttest.stat.1.gc[order(as.data.frame(ttest.stat.1.gc$p.value), decreasing = F), ] -> ttest.stat.2.gc
## Warning in xtfrm.data.frame(x): cannot xtfrm data frames
names(ttest.stat.2.gc) <- c("Metabolites", "statistic", "p.value", "p.fdr", "mean.StdDiet", "mean.HighFat.LowCarbDiet", "Fold.changes")
rownames(ttest.stat.2.gc) <- ttest.stat.2.gc$Metabolites
write.xlsx(ttest.stat.2.gc, "~/ttest-SPF_gc.xlsx")
read.xlsx('/Users/marat.kasakin/Desktop/CRC project/Experimental/Mina Collaboration/2021_03_Diettary mouse study SPF/GC-MS_LCMS/ttest-SPF_tr1.xlsx', sheet = 1, startRow=1, colNames = TRUE, rowNames = FALSE, detectDates = FALSE, skipEmptyRows = TRUE,
skipEmptyCols = TRUE, rows = NULL, cols = NULL, check.names = FALSE,
namedRegion = NULL, na.strings = "NA", fillMergedCells = FALSE) -> sel.Tr
read.xlsx('/Users/marat.kasakin/Desktop/CRC project/Experimental/Mina Collaboration/2021_03_Diettary mouse study SPF/GC-MS_LCMS/ttest-SPF_gc1.xlsx', sheet = 1, startRow=1, colNames = TRUE, rowNames = FALSE, detectDates = FALSE, skipEmptyRows = TRUE,
skipEmptyCols = TRUE, rows = NULL, cols = NULL, check.names = FALSE,
namedRegion = NULL, na.strings = "NA", fillMergedCells = FALSE) -> sel.GC
dim(mouse_data5) # 16 38
## [1] 16 38
mouse_data5[, c(1, which(colnames(mouse_data5) %in% sel.GC[complete.cases(sel.GC), 2]))] -> mouse_data6
match(sel.GC[complete.cases(sel.GC), 2], colnames(mouse_data6)) -> index.gc
names(mouse_data6)[index.gc] <- sel.GC[complete.cases(sel.GC), 1]
dim(mouse_data6) # 16 27
## [1] 16 27
colnames(tf.polar.m.12)[1] <- "Diet"
dim(tf.polar.m.12) # 16 93
## [1] 16 93
tf.polar.m.12[, c(1, which(colnames(tf.polar.m.12) %in% sel.Tr[complete.cases(sel.Tr), 2]))] -> tf.polar.m.13
match(sel.Tr[complete.cases(sel.Tr), 2], colnames(tf.polar.m.13)) -> index.lc
names(tf.polar.m.13)[index.lc] <- sel.Tr[complete.cases(sel.Tr), 1]
dim(tf.polar.m.13) # 16 75
## [1] 16 75
### unification of sample names and diet abbreviations:
rownames(tf.polar.m.13) -> rows.df.tr
rows.df.tr1 <- rows.df.tr %>% str_replace_all("MT001_LC-EMunt_Plasma", "") %>%
str_replace_all("_Std-Diet_1", "") %>%
str_replace_all("_HighFat_LowCarb_1", "")
rows.df.tr1 -> rownames(tf.polar.m.13)
rownames(mouse_data6) -> rows.df.gc
rows.df.gc1 <- rows.df.gc %>% str_replace_all("MT001_MousePlasma_HighFatLowCarb_", "") %>%
str_replace_all("MT001_MousePlasma_Standard_", "") %>%
str_replace_all("^3", "03") %>%
str_replace_all("^4", "04")
rows.df.gc1[8] <- "01"
rows.df.gc1[13] <- "02"
rows.df.gc1 -> rownames(mouse_data6)
# now rownames are unified, merge table, rename rownames column
merge(mouse_data6, tf.polar.m.13, by="row.names") -> united.data.SPF
united.data.SPF[, -which(colnames(united.data.SPF) == "Diet.y")] -> united.SPF
names(united.SPF)[1:2] <- c("SampleID", "Diet")
rownames(united.SPF) <- united.SPF$SampleID
dim(united.SPF) #16 102
## [1] 16 102
dim(na.omit(united.SPF[, -1:-2])) # 7 101 too low number of observations remain
## [1] 7 100
# variant 1 remove metabolites with NA values:
united.SPF1<- united.SPF %>%
select_if(~ !any(is.na(.)))
prcomp(united.SPF1[, -1:-2], scale = T, center = T) -> pca.SPF
diet <- as.factor(united.SPF1$Diet)
library(factoextra)
fviz_pca_ind(pca.SPF,
palette = c("red", "blue"),
geom.ind = "point",
col.ind = united.SPF1$Diet,
addEllipses = TRUE, # Concentration ellipses
ellipse.type = "t", ellipse.level = 0.67,
legend.title = "Diet",
repel = TRUE,
labelsize = 4, pointsize = 2) +
theme(text = element_text(size = 20), legend.position = c(0.8, 0.15)) -> ind.mouse
ggpubr::ggpar(ind.mouse, subtitle = "t-distribution, 67% CI", xlab = paste("PC1=",
round((get_eigenvalue(pca.SPF)$variance.percent[1]), 2), "%"),
ylab = paste("PC2=", round((get_eigenvalue(pca.SPF)$variance.percent[2]), 2), "%")) -> p
pdf("pca_ind_SPF_v1.pdf", width = 8, height = 8)
p
dev.off()
## quartz_off_screen
## 2
p
# variant 2 imputation of NA values (substitution by mean values)
Meanf=function(x){
x<-as.numeric(as.character(x)) #first convert each column into numeric from factor
x[is.na(x)] =mean(x, na.rm=TRUE) #convert the item with NA to mean value from the column
x #display the column
}
imp.data.SPF=data.frame(apply(united.SPF[, -1:-2], 2, Meanf))
cbind(united.SPF[1:2], imp.data.SPF) -> imp.data.SPF.1
prcomp(imp.data.SPF.1[, -1:-2], scale = T, center = T) -> pca.SPF
fviz_pca_ind(pca.SPF,
palette = c("red", "blue"),
geom.ind = "point",
col.ind = imp.data.SPF.1$Diet,
addEllipses = TRUE, # Concentration ellipses
ellipse.type = "t", ellipse.level = 0.67,
legend.title = "Diet",
repel = TRUE,
labelsize = 4, pointsize = 2) +
theme(text = element_text(size = 20), legend.position = c(0.8, 0.15)) -> ind.mouse
ggpubr::ggpar(ind.mouse, subtitle = "t-distribution, 67% CI", xlab = paste("PC1=",
round((get_eigenvalue(pca.SPF)$variance.percent[1]), 2), "%"),
ylab = paste("PC2=", round((get_eigenvalue(pca.SPF)$variance.percent[2]), 2), "%")) -> p
pdf("pca_ind_SPF_v2.pdf", width = 8, height = 8)
p
dev.off()
## quartz_off_screen
## 2
p
## PCA variant 3: NA cleaning and with exclusion list of metabolites
# variant 3 selected metabolites
# list of exclusions:
c("2-oxoglutaric acid", "Dopamine", "Indole", "Indole-3-propionic acid", "Niacin", "Quinolinic acid",
"3-Hydroxyanthranilic acid", "3-(4-Hydroxyphenyl)propionic acid", "Anthranilic acid",
"Indole-3-carboxylic acid", "Quinaldic acid", "3-Methoxytyramine", "Indole-3-glyoxylic acid") -> exclusion.list
united.SPF1[, -which(colnames(united.SPF1) %in% exclusion.list)] -> united.SPF2
dim(united.SPF2) # 16 84
## [1] 16 83
prcomp(united.SPF2[, -1:-2], scale = T, center = T) -> pca.SPF.sel
diet <- as.factor(pca.SPF.sel$Diet)
pdf("pca_ind_SPF_v3.pdf", width = 8, height = 8)
fviz_pca_ind(pca.SPF.sel,
palette = c("red", "blue"),
geom.ind = "point",
col.ind = united.SPF2$Diet,
addEllipses = TRUE, # Concentration ellipses
ellipse.type = "t", ellipse.level = 0.67,
legend.title = "Diet",
repel = TRUE,
labelsize = 4, pointsize = 2) +
theme(text = element_text(size = 20), legend.position = c(0.8, 0.15)) -> ind.mouse
ggpubr::ggpar(ind.mouse, subtitle = "t-distribution, 67% CI", xlab = paste("PC1=",
round((get_eigenvalue(pca.SPF.sel)$variance.percent[1]), 2), "%"),
ylab = paste("PC2=", round((get_eigenvalue(pca.SPF.sel)$variance.percent[2]), 2), "%")) -> p
p
dev.off()
## quartz_off_screen
## 2
p
## HCA and complex Heatmap construction for samples
library(RColorBrewer)
library(GlobalOptions)
library(shape)
library(ComplexHeatmap)
df.m <- scale(united.SPF2[, -1:-2]) # all means are equal to zero, all sd equal to 1
cbind(united.SPF2[, 1:2], df.m) -> united.SPF3
distmat_mouse <- dist(df.m[, -1:-2], method = "euclidean")
hcldat_mouse <- hclust(distmat_mouse, method = "average")
cor(distmat_mouse, cophenetic(hcldat_mouse)) # 0.7748869
## [1] 0.7748869
#####################
# Create the heatmap annotation for samples diet:
brewer.pal(2, "Spectral") -> color.key1
## Warning in brewer.pal(2, "Spectral"): minimal value for n is 3, returning requested palette with 3 different levels
annot_df <- data.frame(Diet = united.SPF3$Diet, stringsAsFactors = FALSE)
color_map = list(Diet = c("high fat low carb diet" = "red", "standard diet" = "blue"))
ha <- HeatmapAnnotation(df = annot_df, name = colnames(annot_df),
col = color_map, border = TRUE, gap = unit(1, "cm"), simple_anno_size = unit(2, "cm"))
library(circlize)
## ========================================
## circlize version 0.4.14
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
max(apply(united.SPF3[, -1:-2] , 2, max)) # +3.6984
## [1] 3.698494
min(apply(united.SPF3[, -1:-2] , 2, min)) # -2.6111
## [1] -2.611114
colnames(united.SPF[, -1:-2]) -> Metabolites
write.xlsx(data.frame(Metabolites), "~Metabolites.xlsx") # manually assigned metabolite groups based on KEGG and HMDB databases
read.xlsx("/Users/marat.kasakin/rworkshop-MKasakin/Metabolites.xlsx", startRow=1, colNames = TRUE, rowNames = FALSE, detectDates = FALSE, skipEmptyRows = TRUE, skipEmptyCols = TRUE, rows = NULL, cols = NULL, check.names = FALSE,
namedRegion = NULL, na.strings = "NA", fillMergedCells = FALSE) -> Metabolites
Metabolites
## Metabolites Pathways.group
## 1 Glycolic acid Carbohydrate metabolism
## 2 2-Hydroxypyridine unspecified
## 3 Pyruvic acid Carbohydrate metabolism
## 4 3-Hydroxybutyric acid ketone bodies
## 5 Carbonic acid unspecified
## 6 Oxalic acid unspecified
## 7 Glycerol unspecified
## 8 Leucine BCAA metabolism
## 9 Isoleucine BCAA metabolism
## 10 Phosphoric acid unspecified
## 11 Glyceric acid unspecified
## 12 Fumaric acid TCA cycle
## 13 Malic acid TCA cycle
## 14 Erythronic acid Carbohydrate metabolism
## 15 Threonic acid Carbohydrate metabolism
## 16 Methionine Gly, Thr, Ser, Met, Cys metabolism
## 17 Lyxose Carbohydrate metabolism
## 18 Pyroglutamic acid Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 19 2-oxoglutaric acid Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 20 1,5-Anhydro-D-glucitol Carbohydrate metabolism
## 21 Fructose Carbohydrate metabolism
## 22 Mannose Carbohydrate metabolism
## 23 Citric acid TCA cycle
## 24 Gluconic acid Carbohydrate metabolism
## 25 Inositol Carbohydrate metabolism
## 26 Hexadecanoic acid unspecified
## 27 2-Hydroxybutyric acid Gly, Thr, Ser, Met, Cys metabolism
## 28 2-Hydroxyglutaric acid Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 29 3-(4-Hydroxyphenyl)propionic acid unspecified
## 30 3-Aminobutanoic acid unspecified
## 31 3-Hydroxyanthranilic acid Trp, Indole, Kynurenine metabolism
## 32 3-Hydroxykyunrenine Trp, Indole, Kynurenine metabolism
## 33 3-Methoxytyramine Phe, Tyr metabolism
## 34 Acetylcarnitine unspecified
## 35 Aconitic Acid TCA cycle
## 36 Adenine Nucleotides metabolism
## 37 Alanine Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 38 Phenylalanine Phe, Tyr metabolism
## 39 alpha-Ketoglutaric acid TCA cycle
## 40 Anthranilic acid Trp, Indole, Kynurenine metabolism
## 41 Arginine Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 42 Asparagine Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 43 Aspartic acid Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 44 N-Acetylaspartic acid Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 45 Valine BCAA metabolism
## 46 Carnitine Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 47 Choline Gly, Thr, Ser, Met, Cys metabolism
## 48 Isocitric acid TCA cycle
## 49 Citrulline Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 50 Creatine Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 51 Creatinine Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 52 Cystine Gly, Thr, Ser, Met, Cys metabolism
## 53 Cytidine monophosphate Nucleotides metabolism
## 54 Cytosine Nucleotides metabolism
## 55 Dopamine Phe, Tyr metabolism
## 56 D-PANTOTHENIC ACID unspecified
## 57 gamma-Aminobutyric acid Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 58 Glucose Carbohydrate metabolism
## 59 Glutamic acid Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 60 Glutamine Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 61 Glutathione, oxidized (GSSG) Gly, Thr, Ser, Met, Cys metabolism
## 62 Glutathione, reduced (GSH) Gly, Thr, Ser, Met, Cys metabolism
## 63 Glycine Gly, Thr, Ser, Met, Cys metabolism
## 64 Isobutyrylglycine unspecified
## 65 HIAA Trp, Indole, Kynurenine metabolism
## 66 Histidine unspecified
## 67 Hydroxyproline Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 68 Indole Trp, Indole, Kynurenine metabolism
## 69 Indole-3-acetaldehyde Trp, Indole, Kynurenine metabolism
## 70 Indole-3-acetamide Trp, Indole, Kynurenine metabolism
## 71 Indole-3-carboxaldehyde Trp, Indole, Kynurenine metabolism
## 72 Indole-3-carboxylic acid Trp, Indole, Kynurenine metabolism
## 73 Indole-3-ethanol Trp, Indole, Kynurenine metabolism
## 74 Indole-3-glyoxylic acid Trp, Indole, Kynurenine metabolism
## 75 Indole-3-propionic acid Trp, Indole, Kynurenine metabolism
## 76 Kynurenic acid Trp, Indole, Kynurenine metabolism
## 77 Kynurenine Trp, Indole, Kynurenine metabolism
## 78 Lactic Acid Carbohydrate metabolism
## 79 Lysine Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 80 Methylmalonic acid unspecified
## 81 N-Acetylserotonin Trp, Indole, Kynurenine metabolism
## 82 Niacin Trp, Indole, Kynurenine metabolism
## 83 Nicotinamide Trp, Indole, Kynurenine metabolism
## 84 Ornithine Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 85 Proline Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 86 Putrescine Arg, Ala, Asp, Glu, Pro, Lys metabolism
## 87 Quinaldic acid Trp, Indole, Kynurenine metabolism
## 88 Quinolinic acid Trp, Indole, Kynurenine metabolism
## 89 Sarcosine Gly, Thr, Ser, Met, Cys metabolism
## 90 Serine Gly, Thr, Ser, Met, Cys metabolism
## 91 Serotonin Trp, Indole, Kynurenine metabolism
## 92 Succinic acid TCA cycle
## 93 Taurine Gly, Thr, Ser, Met, Cys metabolism
## 94 Threonine Gly, Thr, Ser, Met, Cys metabolism
## 95 Thymine Nucleotides metabolism
## 96 Tryptophan Trp, Indole, Kynurenine metabolism
## 97 Tyrosine Phe, Tyr metabolism
## 98 Uracil Nucleotides metabolism
## 99 Uridine Nucleotides metabolism
## 100 Betaine Gly, Thr, Ser, Met, Cys metabolism
dim(Metabolites) # 100 2
## [1] 100 2
filter(Metabolites, Metabolites %in% colnames(united.SPF3)) -> Metabolites
dim(Metabolites) # 81 2
## [1] 81 2
names(Metabolites)[2] <- "Pathways_group"
col_fun1 = colorRamp2(breaks =c(-2, -0.25, 0, 0.25, 2), colors=rev(brewer.pal(5, "RdYlBu")))
col <- colorRampPalette(bias =1, rev(brewer.pal(10, "RdYlBu")))(8)
color_map2 = list(Pathways = c("ketone bodies" = "red",
"Carbohydrate metabolism" = "blue",
"Gly, Thr, Ser, Met, Cys metabolism" = "green",
"Arg, Ala, Asp, Glu, Pro, Lys metabolism" = "darkgreen",
"BCAA metabolism" = "yellowgreen",
"Phe, Tyr metabolism" = "lightgreen",
"TCA cycle" = "yellow",
"Nucleotides metabolism" = "purple",
"Trp, Indole, Kynurenine metabolism" = "orange",
"unspecified" = "grey"))
annot_df2 <- data.frame(Pathways = Metabolites$Pathways_group, stringsAsFactors = FALSE)
ha1 <- HeatmapAnnotation(df = annot_df2, name = colnames(annot_df2),
col = color_map2, border = TRUE, gap = unit(1, "cm"), simple_anno_size = unit(2, "cm"), which = "row")
set.seed(4321)
Heatmap(as.matrix(t(united.SPF3[, -1:-2])),
name = "Relative changes", column_title = "Samples",
column_title_gp = gpar(fontsize = 20), km = 2,
row_title_gp = gpar(fontsize = 20),
row_names_gp = gpar(fontsize = 6, fontface = 1, fontfamily = "sans"),
row_title = "Metabolites", col = col_fun1, column_dend_height = unit(4, "cm"),
row_dend_width = unit(3, "cm"),
column_km = 2,
column_names_gp = gpar(fontsize = 10, fontface = 1, fontfamily = "sans"),
column_names_rot = 0,
show_column_names = T,
heatmap_height = unit(32, "cm"),
top_annotation = ha,
row_dend_reorder = TRUE,
column_dend_reorder = TRUE,
right_annotation = ha1) -> p
pdf("HCA_heatmap_mSPF_sel_2e.pdf", width = 14, height = 14)
p
dev.off()
## quartz_off_screen
## 2
p