perturb.met workflow

#library(perturb.met)
devtools::load_all()
#> Loading perturb.met
library(dplyr)
#> Warning: package 'dplyr' was built under R version 3.5.2
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
ad_sc<-read.table(system.file("extdata", "neuron_scRNA_rawCounts.tsv", package = "perturb.met"),header=T,sep="\t")
sc_meta<-read.table(system.file("extdata", "neuron_scRNA_metadata.tsv", package = "perturb.met"),header=T,sep="\t")
ad_genes<-ad_sc$geneName
ad_sc<-ad_sc[,-1]

bad<-sc_meta$cellType %in% c("doublet","unID")
ad_sc<-ad_sc[,!bad]
sc_meta<-sc_meta[!bad,]

total<-colSums(ad_sc)
total<-total/median(total)
ad_sc<-ad_sc/matrix(rep(total,nrow(ad_sc)),nrow=nrow(ad_sc),ncol=ncol(ad_sc),byrow = T)

Run perturb-Met analysis on each neuron sub-type separately

The results are saved separately in a R data file.

#cell_types<-c("n2","n3","n4","n5")
cell_types<-"n5" #only run one subtype to save time.  
for (i in seq_along(cell_types)){
  #good<- rowSums(ad_sc[,sc_meta$subIDn==cell_types[i] & sc_meta$batchCond=='ct']>0)>=5 | rowSums(ad_sc[,sc_meta$subIDn==cell_types[i] & sc_meta$batchCond=='AD']>0)>=5
  ref_freq<-rowSums(ad_sc[,sc_meta$subIDn==cell_types[i] & sc_meta$batchCond=='ct']>0)
  ad_freq<- rowSums(ad_sc[,sc_meta$subIDn==cell_types[i] & sc_meta$batchCond=='AD']>0)
  expression_level_final<-data.frame(genes=ad_genes,
                                     exprs_ref=100*rowMeans(ad_sc[,sc_meta$subIDn==cell_types[i] & sc_meta$batchCond=='ct']),
                                     exprs_treat=100*rowMeans(ad_sc[,sc_meta$subIDn==cell_types[i] & sc_meta$batchCond=='AD']))
  good<- expression_level_final$exprs_ref>0 | expression_level_final$exprs_treat>0
  expression_level_final<-expression_level_final[good,]
  ref_freq<-ref_freq[good]
  ad_freq<-ad_freq[good]
  good<- (expression_level_final$exprs_ref>expression_level_final$exprs_treat & ref_freq>=10) |(expression_level_final$exprs_ref<expression_level_final$exprs_treat & ad_freq>=10) #require a gene to be expressed in at least 10 cells in either condition.  
  expression_level_final<-expression_level_final[good,]
  
  expression_level_final<-na.omit(expression_level_final)
  result_ad_sc<-findPerturbMet(expression_level_final,mets2genes,4)
  save(result_ad_sc,file=paste("human_AD_sc_perturbMet_neuron_subcluster_filtered_",cell_types[i],".RData",sep=""))
}

Illustrate how to study the output of perturb-Met

load("human_AD_sc_perturbMet_neuron_subcluster_filtered_n5.RData")

perturbed_mets<-result_ad_sc$perturbed_mets
met_gene_pairs<-result_ad_sc$met_gene_pairs
head(perturbed_mets[,c("mets","combined_pval","names")],n=10)
#>        mets combined_pval                         names
#> 1   hc02057   0.003584326 1,2-Diacylglycerol-Ld-Pe-Pool
#> 2     gam6p   0.004148028     D-Glucosamine-6-Phosphate
#> 3  chsterol   0.004178578                   Cholesterol
#> 4       itp   0.005265598      Iinosine-5'-Triphosphate
#> 5       idp   0.006318889        Inosine-5'-Diphosphate
#> 6   hc02054   0.006526106      Phosphatidate-Ld-Sm-Pool
#> 7       f6p   0.008702573        D-Fructose 6-Phosphate
#> 8   hc02060   0.009705944 1,2-Diacylglycerol-Ld-Sm-Pool
#> 9      dadp   0.010979746    Deoxyadenosine Diphosphate
#> 10     datp   0.014149049   Deoxyadenosine Triphosphate
met2look<-'chsterol'
sub_met_genes<-met_gene_pairs[met_gene_pairs$mets==met2look,]
sub_met_genes$exprs_ref<-round(sub_met_genes$exprs_ref,digits = 1)
sub_met_genes$exprs_treat<-round(sub_met_genes$exprs_treat,digits = 1)
sub_met_genes$pct_ref<-(sub_met_genes$exprs_ref+1)/sum(sub_met_genes$exprs_ref+1)
sub_met_genes$pct_treat<-(sub_met_genes$exprs_treat+1)/sum(sub_met_genes$exprs_treat+1)
sub_met_genes$contribution<- sub_met_genes$pct_treat*log2(sub_met_genes$pct_treat/sub_met_genes$pct_ref)
sub_met_genes<-arrange(sub_met_genes,desc(abs(contribution)))

sub_met_genes
#>       mets       names   kegg      hmdb   genes exprs_ref exprs_treat   pct_ref
#> 1 chsterol Cholesterol C00187 HMDB00067   HDLBP       5.7        23.0 0.4060606
#> 2 chsterol Cholesterol C00187 HMDB00067 CYP46A1       8.8         5.2 0.5939394
#>   pct_treat contribution
#> 1  0.794702    0.7698428
#> 2  0.205298   -0.3146389

source( "~/bin/R_scripts/plot_pct_change.R")
plot_pct_change(met_gene_pairs,"chsterol",c(0,1),6,100) #c(0,1) means placing the legend at top left corner; 6 means maximum number of genes to plot; 100 means y axis limit
#> Loading required package: ggplot2

Another example.

plot_pct_change(met_gene_pairs,"hc02057",c(1,1),6,50) #legend at top right; y axis limit at 50.