#library(perturb.met)
devtools::load_all()
#> Loading perturb.met
library(dplyr)
#> Warning: package 'dplyr' was built under R version 3.5.2
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
ad_sc<-read.table(system.file("extdata", "neuron_scRNA_rawCounts.tsv", package = "perturb.met"),header=T,sep="\t")
sc_meta<-read.table(system.file("extdata", "neuron_scRNA_metadata.tsv", package = "perturb.met"),header=T,sep="\t")ad_genes<-ad_sc$geneName
ad_sc<-ad_sc[,-1]
bad<-sc_meta$cellType %in% c("doublet","unID")
ad_sc<-ad_sc[,!bad]
sc_meta<-sc_meta[!bad,]
total<-colSums(ad_sc)
total<-total/median(total)
ad_sc<-ad_sc/matrix(rep(total,nrow(ad_sc)),nrow=nrow(ad_sc),ncol=ncol(ad_sc),byrow = T)The results are saved separately in a R data file.
#cell_types<-c("n2","n3","n4","n5")
cell_types<-"n5" #only run one subtype to save time.
for (i in seq_along(cell_types)){
#good<- rowSums(ad_sc[,sc_meta$subIDn==cell_types[i] & sc_meta$batchCond=='ct']>0)>=5 | rowSums(ad_sc[,sc_meta$subIDn==cell_types[i] & sc_meta$batchCond=='AD']>0)>=5
ref_freq<-rowSums(ad_sc[,sc_meta$subIDn==cell_types[i] & sc_meta$batchCond=='ct']>0)
ad_freq<- rowSums(ad_sc[,sc_meta$subIDn==cell_types[i] & sc_meta$batchCond=='AD']>0)
expression_level_final<-data.frame(genes=ad_genes,
exprs_ref=100*rowMeans(ad_sc[,sc_meta$subIDn==cell_types[i] & sc_meta$batchCond=='ct']),
exprs_treat=100*rowMeans(ad_sc[,sc_meta$subIDn==cell_types[i] & sc_meta$batchCond=='AD']))
good<- expression_level_final$exprs_ref>0 | expression_level_final$exprs_treat>0
expression_level_final<-expression_level_final[good,]
ref_freq<-ref_freq[good]
ad_freq<-ad_freq[good]
good<- (expression_level_final$exprs_ref>expression_level_final$exprs_treat & ref_freq>=10) |(expression_level_final$exprs_ref<expression_level_final$exprs_treat & ad_freq>=10) #require a gene to be expressed in at least 10 cells in either condition.
expression_level_final<-expression_level_final[good,]
expression_level_final<-na.omit(expression_level_final)
result_ad_sc<-findPerturbMet(expression_level_final,mets2genes,4)
save(result_ad_sc,file=paste("human_AD_sc_perturbMet_neuron_subcluster_filtered_",cell_types[i],".RData",sep=""))
}load("human_AD_sc_perturbMet_neuron_subcluster_filtered_n5.RData")
perturbed_mets<-result_ad_sc$perturbed_mets
met_gene_pairs<-result_ad_sc$met_gene_pairs
head(perturbed_mets[,c("mets","combined_pval","names")],n=10)
#> mets combined_pval names
#> 1 hc02057 0.003584326 1,2-Diacylglycerol-Ld-Pe-Pool
#> 2 gam6p 0.004148028 D-Glucosamine-6-Phosphate
#> 3 chsterol 0.004178578 Cholesterol
#> 4 itp 0.005265598 Iinosine-5'-Triphosphate
#> 5 idp 0.006318889 Inosine-5'-Diphosphate
#> 6 hc02054 0.006526106 Phosphatidate-Ld-Sm-Pool
#> 7 f6p 0.008702573 D-Fructose 6-Phosphate
#> 8 hc02060 0.009705944 1,2-Diacylglycerol-Ld-Sm-Pool
#> 9 dadp 0.010979746 Deoxyadenosine Diphosphate
#> 10 datp 0.014149049 Deoxyadenosine Triphosphatemet2look<-'chsterol'
sub_met_genes<-met_gene_pairs[met_gene_pairs$mets==met2look,]
sub_met_genes$exprs_ref<-round(sub_met_genes$exprs_ref,digits = 1)
sub_met_genes$exprs_treat<-round(sub_met_genes$exprs_treat,digits = 1)
sub_met_genes$pct_ref<-(sub_met_genes$exprs_ref+1)/sum(sub_met_genes$exprs_ref+1)
sub_met_genes$pct_treat<-(sub_met_genes$exprs_treat+1)/sum(sub_met_genes$exprs_treat+1)
sub_met_genes$contribution<- sub_met_genes$pct_treat*log2(sub_met_genes$pct_treat/sub_met_genes$pct_ref)
sub_met_genes<-arrange(sub_met_genes,desc(abs(contribution)))
sub_met_genes
#> mets names kegg hmdb genes exprs_ref exprs_treat pct_ref
#> 1 chsterol Cholesterol C00187 HMDB00067 HDLBP 5.7 23.0 0.4060606
#> 2 chsterol Cholesterol C00187 HMDB00067 CYP46A1 8.8 5.2 0.5939394
#> pct_treat contribution
#> 1 0.794702 0.7698428
#> 2 0.205298 -0.3146389
source( "~/bin/R_scripts/plot_pct_change.R")
plot_pct_change(met_gene_pairs,"chsterol",c(0,1),6,100) #c(0,1) means placing the legend at top left corner; 6 means maximum number of genes to plot; 100 means y axis limit
#> Loading required package: ggplot2Another example.