module-covariates-regression

Regression analysis of covariates (categorical), on eigengenes (numerial).

a module-covariate correlation for each covariate to see if it interacts with the modules’ eigengenes

set the libraries

set the directory and relevant files

## [1] "for_modules" "viralTraits"

## [1] "MEs"          "moduleLabels" "moduleColors" "geneTree"

PCAs of libraries, for each covariate, colored by the different covariate level:

# join the MEs and the ModTraitFac into one data.frame: "Modules_Covar" 
MEs2 <- rownames_to_column(MEs, var = "Library")
Modules_Covar <- left_join(MEs2, ModTraitFac, by = "Library")

## Warning: Column `Library` joining character vector and factor, coercing into
## character vector

#plot PCA, for example - "mite stage" 

PC <- prcomp(MEs)
PCi<-data.frame(PC$x,Stage=Modules_Covar$mite_stage)

p <- autoplotly(ggplot(PCi,aes(x=PC1,y=PC2,col=Stage), dynamicTicks = TRUE, tooltip = "all")+
   geom_point(size=3,alpha=0.5)+ #Size and alpha just for fun
   scale_color_manual(values = c("#FF1BB3","#A7FF5B","#99554D", "#1AB2FF"))+ #your colors here
   theme_classic())
p

run regression analysis for each pair of module ~ covariate (15 x 6)

#make a function that takes variables: x, y and "lable", runs lm(y~x), and returns a dataframe with the desired parameters
corFun <- function(x, y, label) {
  s <- summary(lm(y~x))
  return(data.frame(coefficient = rownames(s$coefficients),
             estimate = s$coefficients[,1],
             p = s$coefficients[,4],
             r.sqr = s$r.squared,
             name = label))
}

# run the function (corFun) where x = one of the 6 covariates,  y = one of the 15 modules, lable = covariate name.

# first make a list , "dat", for the final output. 
dat <- list()
# now we run the "corFun" for each of the covariates (x), 
# the function runs in a loop against all 15 modules (for each "i" in the "MEs" dataframe)
for (i in 1:(ncol(MEs))) {
  mite_stage <- corFun(x = ModTraitFac$mite_stage, y = MEs[,i], label = "mite_stage")
  bee_sp <- corFun(x = ModTraitFac$bee_sp, y = MEs[,i], label = "bee_sp")
  mite_sp <- corFun(x = ModTraitFac$mite_sp, y = MEs[,i], label = "mite_sp")
  collec_method <- corFun(x = ModTraitFac$collec_method, y = MEs[,i], label = "collec_method")
  Lib_select <- corFun(x = ModTraitFac$Lib_select, y = MEs[,i], label = "Lib_select")
  Lib_treat <- corFun(x = ModTraitFac$Lib_treat, y = MEs[,i], label = "Lib_treat")
  # now bind all the correlations outputs in "dat" list 
  dat[[i]] <- bind_rows(mite_stage, bee_sp, mite_sp, collec_method, Lib_select, Lib_treat)
}

# add the modules names to each analysis (list)
names(dat) <- names(MEs)

# the "dat" is a list of 15 lists. each contains the dataframe generated by the "corFun" function. 
# you can look at each of the modules results, by "dat$___".
# e.g. Magenta module:
dat$MEmagenta

##    coefficient     estimate          p       r.sqr          name
## 1  (Intercept) -0.003229689 0.84588930 0.032364175    mite_stage
## 2  xadult_Male -0.025128892 0.66777471 0.032364175    mite_stage
## 3         xegg  0.054722527 0.66562193 0.032364175    mite_stage
## 4       xnymph  0.094693809 0.20544369 0.032364175    mite_stage
## 5  (Intercept)  0.065030987 0.29329783 0.081727287        bee_sp
## 6          xAm -0.064528553 0.31354766 0.081727287        bee_sp
## 7       xAm_In -0.258901603 0.06386964 0.081727287        bee_sp
## 8       xAm_Sy -0.171251924 0.11219010 0.081727287        bee_sp
## 9          xNI -0.025680245 0.78497690 0.081727287        bee_sp
## 10 (Intercept) -0.002208115 0.89421422 0.002038073       mite_sp
## 11         xVj  0.016192845 0.71889503 0.002038073       mite_sp
## 12 (Intercept) -0.041282918 0.22425125 0.073435375 collec_method
## 13      xbrood  0.040900522 0.28592734 0.073435375 collec_method
## 14         xNI  0.133724676 0.02902938 0.073435375 collec_method
## 15 (Intercept) -0.005548055 0.75040098 0.028011849    Lib_select
## 16        xPCR -0.074125210 0.55973478 0.028011849    Lib_select
## 17      xPolyA  0.128183197 0.31449903 0.028011849    Lib_select
## 18     xRANDOM  0.026009471 0.51897082 0.028011849    Lib_select
## 19 (Intercept) -0.014851889 0.41139790 0.044865911     Lib_treat
## 20         xNI  0.072069511 0.09776502 0.044865911     Lib_treat
## 21   xrRNA_dpl  0.028836619 0.52213024 0.044865911     Lib_treat

plot the Padjust of the module-covariates Regression

code example: Magenta module

# in this chunk, replace ____ with the desired module

# pick each of the modules, by "dat$___".
# e.g. Magenta module:
module <- dat$MEmagenta

# calculate the p.adjust for each p-value, and add the column of adjusted pvalues
Padjust <- p.adjust(module$p, method = "fdr")
module <- module %>% add_column(Padjust) 
module

##    coefficient     estimate          p       r.sqr          name   Padjust
## 1  (Intercept) -0.003229689 0.84588930 0.032364175    mite_stage 0.8881838
## 2  xadult_Male -0.025128892 0.66777471 0.032364175    mite_stage 0.8676060
## 3         xegg  0.054722527 0.66562193 0.032364175    mite_stage 0.8676060
## 4       xnymph  0.094693809 0.20544369 0.032364175    mite_stage 0.6604480
## 5  (Intercept)  0.065030987 0.29329783 0.081727287        bee_sp 0.6604480
## 6          xAm -0.064528553 0.31354766 0.081727287        bee_sp 0.6604480
## 7       xAm_In -0.258901603 0.06386964 0.081727287        bee_sp 0.5889980
## 8       xAm_Sy -0.171251924 0.11219010 0.081727287        bee_sp 0.5889980
## 9          xNI -0.025680245 0.78497690 0.081727287        bee_sp 0.8676060
## 10 (Intercept) -0.002208115 0.89421422 0.002038073       mite_sp 0.8942142
## 11         xVj  0.016192845 0.71889503 0.002038073       mite_sp 0.8676060
## 12 (Intercept) -0.041282918 0.22425125 0.073435375 collec_method 0.6604480
## 13      xbrood  0.040900522 0.28592734 0.073435375 collec_method 0.6604480
## 14         xNI  0.133724676 0.02902938 0.073435375 collec_method 0.5889980
## 15 (Intercept) -0.005548055 0.75040098 0.028011849    Lib_select 0.8676060
## 16        xPCR -0.074125210 0.55973478 0.028011849    Lib_select 0.8396022
## 17      xPolyA  0.128183197 0.31449903 0.028011849    Lib_select 0.6604480
## 18     xRANDOM  0.026009471 0.51897082 0.028011849    Lib_select 0.8396022
## 19 (Intercept) -0.014851889 0.41139790 0.044865911     Lib_treat 0.7853960
## 20         xNI  0.072069511 0.09776502 0.044865911     Lib_treat 0.5889980
## 21   xrRNA_dpl  0.028836619 0.52213024 0.044865911     Lib_treat 0.8396022

# make a wider table to plot
plotmodule <- module %>% pivot_wider(id_cols = c("name", "coefficient"), values_from = Padjust)
plotmodule

## # A tibble: 14 x 7
##    coefficient mite_stage bee_sp mite_sp collec_method Lib_select Lib_treat
##    <chr>            <dbl>  <dbl>   <dbl>         <dbl>      <dbl>     <dbl>
##  1 (Intercept)      0.888  0.660   0.894         0.660      0.868     0.785
##  2 xadult_Male      0.868 NA      NA            NA         NA        NA    
##  3 xegg             0.868 NA      NA            NA         NA        NA    
##  4 xnymph           0.660 NA      NA            NA         NA        NA    
##  5 xAm             NA      0.660  NA            NA         NA        NA    
##  6 xAm_In          NA      0.589  NA            NA         NA        NA    
##  7 xAm_Sy          NA      0.589  NA            NA         NA        NA    
##  8 xNI             NA      0.868  NA             0.589     NA         0.589
##  9 xVj             NA     NA       0.868        NA         NA        NA    
## 10 xbrood          NA     NA      NA             0.660     NA        NA    
## 11 xPCR            NA     NA      NA            NA          0.840    NA    
## 12 xPolyA          NA     NA      NA            NA          0.660    NA    
## 13 xRANDOM         NA     NA      NA            NA          0.840    NA    
## 14 xrRNA_dpl       NA     NA      NA            NA         NA         0.840

#make the variables into numeric
plotmodule[is.na(x = plotmodule)] <- 0
cols.num <- plotmodule[,c(2:7)]  
plotmodule <- plotmodule %>% column_to_rownames(var = "coefficient") 
plotmodule <- data.matrix(plotmodule)
plotmodule

##             mite_stage   bee_sp   mite_sp collec_method Lib_select Lib_treat
## (Intercept)  0.8881838 0.660448 0.8942142      0.660448  0.8676060 0.7853960
## xadult_Male  0.8676060 0.000000 0.0000000      0.000000  0.0000000 0.0000000
## xegg         0.8676060 0.000000 0.0000000      0.000000  0.0000000 0.0000000
## xnymph       0.6604480 0.000000 0.0000000      0.000000  0.0000000 0.0000000
## xAm          0.0000000 0.660448 0.0000000      0.000000  0.0000000 0.0000000
## xAm_In       0.0000000 0.588998 0.0000000      0.000000  0.0000000 0.0000000
## xAm_Sy       0.0000000 0.588998 0.0000000      0.000000  0.0000000 0.0000000
## xNI          0.0000000 0.867606 0.0000000      0.588998  0.0000000 0.5889980
## xVj          0.0000000 0.000000 0.8676060      0.000000  0.0000000 0.0000000
## xbrood       0.0000000 0.000000 0.0000000      0.660448  0.0000000 0.0000000
## xPCR         0.0000000 0.000000 0.0000000      0.000000  0.8396022 0.0000000
## xPolyA       0.0000000 0.000000 0.0000000      0.000000  0.6604480 0.0000000
## xRANDOM      0.0000000 0.000000 0.0000000      0.000000  0.8396022 0.0000000
## xrRNA_dpl    0.0000000 0.000000 0.0000000      0.000000  0.0000000 0.8396022

#plot it (dont forget to change the title "____ reg Padjust")
heatmap.2(plotmodule, main = "Magenta reg Padjust", xlab = "Covariates", ylab = "Levels", cexRow=1,cexCol=1,margins=c(5,8),srtCol=30, breaks=c(0,0.001,0.05,1), col=c("white", "orange", "blue"),
# aiming for >0.05 is blue
dendrogram="none", trace="none", key = FALSE) 

legend(x="left", legend=c("NA", "<0.05", ">0.05"), 
     fill=c("white", "orange", "blue"))

salmon

yellow

purple

black

green

turquoise

blue

greenyellow

brown

cyan

red

pink

tan

grey

other codes, not used for this analysis: ### DONT run - specifically for “MEmagenta” module and “mite_stage” covariate: