library(mixOmics) # import the mixOmics library
library(tidyverse)
gene.methylation <- read.csv("/home/sam/gitrepos/ceabigr/output/40-gene-methylaiton.csv")
gene.fpkm <- read.csv("/home/sam/gitrepos/2018_L18-adult-methylation/analyses/gene_fpkm.csv")
metadata <- read.csv("/home/sam/gitrepos/2018_L18-adult-methylation/data/adult-meta.csv")

metadata <- metadata %>% remove_rownames %>% column_to_rownames(var="Sample.ID")

rows <- metadata$Sample.ID
row.names(gene.fpkm) <- rows
gene.methylation.only <- gene.methylation %>% select(-(1:2))
gene.methylation.no_NA_genes <- gene.methylation.only %>% 
  select_if(~ !any(is.na(.)))
gene.fpkm.only <- gene.fpkm %>% select(-(1:6))
gene.methylation.rownames <- gene.methylation %>% remove_rownames %>% column_to_rownames(var="sample_id") %>% select(., -"X")
MyResult.pca <- pca(gene.methylation.no_NA_genes)   # 1 Run the method
plotIndiv(MyResult.pca)    # 2 Plot the samples

plotVar(MyResult.pca, cutoff = 0.8)      # 3 Plot the variables

plotIndiv(MyResult.pca, group = metadata$Sex, 
          legend = TRUE)

plotIndiv(MyResult.pca, ind.names = FALSE,
          group = metadata$Sex,
          pch = as.factor(metadata$Treatment),
          legend = TRUE, title = 'Gene methylation: PCA comp 1 - 2',
          legend.title = 'Sex', legend.title.pch = 'Exposure')

plot(MyResult.pca)

MyResult.pca
##   Eigenvalues for the first 2 principal components, see object$sdev^2: 
##       PC1       PC2 
## 623395.46  76732.05 
##   
##   Proportion of  explained variance for the first 2 principal components, see object$prop_expl_var: 
##        PC1             PC2      
## 0.33543662      0.04128798      
##   
##   Cumulative proportion of  explained variance for the first 2 principal components, see object$cum.var: 
##       PC1            PC2      
## 0.3354366      0.3767246      
##   
##   Other available components: 
##  -------------------- 
##   loading vectors: see object$rotation 
##   Other functions: 
##  -------------------- 
##   plotIndiv, plot, plotVar, selectVar, biplot
plotLoadings(MyResult.pca)

gene.methylation.no_NA_genes.non_distinct.only <- gene.methylation.no_NA_genes %>% select(where(~n_distinct(.) > 1 ))
MyResult.spca <- spca(gene.methylation.no_NA_genes.non_distinct.only, ncomp = 3, keepX = c(21,21,21)) # Max is 21 for each component

plotIndiv(MyResult.spca, group = metadata$Sex,   # 2 Plot the samples
          pch = as.factor(metadata$Treatment),
          legend = TRUE, title = 'Gene methylation, sPCA comp 1 - 2',
          legend.title = 'Sex', legend.title.pch = 'Exposure')

plotVar(MyResult.spca, cex = 1)                                        # 3 Plot the variables

selectVar(MyResult.spca, comp = 1)$value
##                     value.var
## gene.LOC111103309 -0.46711549
## gene.LOC111119712 -0.43006898
## gene.LOC111130064 -0.37260339
## gene.LOC111125856 -0.23223912
## gene.LOC111124634 -0.22763040
## gene.LOC111135417 -0.22537165
## gene.LOC111105773 -0.21035349
## gene.LOC111124843 -0.20890579
## gene.LOC111122495 -0.19782694
## gene.LOC111121296 -0.19731563
## gene.LOC111107792 -0.19714801
## gene.LOC111134426 -0.16529297
## gene.LOC111129084 -0.15717639
## gene.LOC111122401 -0.11466815
## gene.LOC111100582 -0.10146390
## gene.LOC111123892 -0.09984532
## gene.LOC111134568 -0.08551034
## gene.LOC111103502 -0.03946499
## gene.LOC111118013 -0.03735791
## gene.LOC111118072 -0.02215079
## gene.LOC111137051 -0.02112462
plotLoadings(MyResult.spca, comp=1)

plotLoadings(MyResult.spca, comp=2)

MyResult.spls <- spls(gene.fpkm.only,gene.methylation.no_NA_genes, keepX = c(25, 25), keepY = c(25,25))
## Warning in cor(A[[k]], variates.A[[k]]): the standard deviation is zero
plotIndiv(MyResult.spls)     ## sample plot

plotVar(MyResult.spls)       ## variable plot

plotIndiv(MyResult.spls, group = metadata$Treatment,
          rep.space = "XY-variate", legend = TRUE,
          legend.title = 'Treatment',
          ind.names = metadata$OldSample.ID,
          title = 'C.virginica gonad OA: sPLS')

plotIndiv(MyResult.spls, group = metadata$Sex,
          rep.space = "XY-variate", legend = TRUE,
          legend.title = 'Sex',
          ind.names = metadata$OldSample.ID,
          title = 'C.virginica gonad OA: sPLS')

plotIndiv(MyResult.spls, group=metadata$Treatment,
          pch = metadata$Sex,
          rep.space = "XY-variate",  legend = TRUE,
          legend.title = 'Treatment', legend.title.pch = 'Sex',
          ind.names = FALSE, 
          title = 'C.virginica gonad OA: sPLS')

# png("~/Downloads/spls-heatmap-01.png", width=1900, height=1900)

cim(MyResult.spls, comp = 1)
## Error in cim plot: figure margins too large. See ?cim for help.
plotLoadings(MyResult.spls, comp = 1, size.name = rel(0.5))