setwd("/Users/dongzeyuan/Desktop/TRGN_lab/")
library(limma) 
library(edgeR) 


counts <- read.table("ProstCa_030921_2.txt")
file<-"ProstCa_030921_2.txt"
dim(counts)
## [1] 58779     6
colnames(counts)<-c("C1_1","C1_2","C1_3","T1_1","T1_2","T1_3")#NEW
x<-DGEList(counts) #
group<-as.factor(c(rep("C1",3),rep("T1",3))) #NEW
x$samples$group<-group
library(limma)
library(Glimma)
library(edgeR)
library(Homo.sapiens)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:limma':
## 
##     plotMA
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: OrganismDbi
## Loading required package: GenomicFeatures
## Loading required package: GenomeInfoDb
## Loading required package: GenomicRanges
## Loading required package: GO.db
## 
## Loading required package: org.Hs.eg.db
## 
## Loading required package: TxDb.Hsapiens.UCSC.hg19.knownGene
#presentation
library(stringr)
setwd("/Users/dongzeyuan/Desktop/TRGN_lab/")
#install.packages("Homo.sapiens")
library(Homo.sapiens)
geneid <- rownames(x) 
gene <- str_match(geneid, "(\\w*).*")
geneid <- gene[,2]

genes <- select(Homo.sapiens, keys=geneid,
                columns=c("SYMBOL","TXCHROM"),
                keytype="ENSEMBL")
## 'select()' returned many:many mapping between keys and columns
dim(genes)
## [1] 60213     3
genes <- genes[!duplicated(genes$ENSEMBL),]

x$genes <- genes

samplenames <- substring(colnames(x), 6, nchar(colnames(x))) 
cpm <- cpm(x)
lcpm <- cpm(x, log=TRUE)
L <- mean(x$samples$lib.size) * 1e-6
M <- median(x$samples$lib.size) * 1e-6
c(L, M)
## [1] 43.72306 40.43761
summary(lcpm)
##       C1_1              C1_2             C1_3             T1_1        
##  Min.   :-4.4503   Min.   :-4.450   Min.   :-4.450   Min.   :-4.4503  
##  1st Qu.:-4.4503   1st Qu.:-4.450   1st Qu.:-4.450   1st Qu.:-4.4503  
##  Median :-1.0828   Median :-2.849   Median :-2.514   Median :-1.9200  
##  Mean   :-0.4579   Mean   :-1.130   Mean   :-0.967   Mean   :-0.8838  
##  3rd Qu.: 2.2291   3rd Qu.: 2.017   3rd Qu.: 2.257   3rd Qu.: 2.1005  
##  Max.   :15.7358   Max.   :15.899   Max.   :16.464   Max.   :16.3088  
##       T1_2              T1_3        
##  Min.   :-4.4503   Min.   :-4.4503  
##  1st Qu.:-4.4503   1st Qu.:-4.4503  
##  Median : 0.6721   Median :-2.3416  
##  Mean   : 0.2754   Mean   :-0.9174  
##  3rd Qu.: 3.1734   3rd Qu.: 2.0058  
##  Max.   :15.5588   Max.   :16.3109
keep.exprs <- filterByExpr(x)  
x <- x[keep.exprs,, keep.lib.sizes=FALSE]
dim(x)
## [1] 30027     6
lcpm.cutoff <- log2(5/M + 2/L)
library(RColorBrewer)
nsamples <- ncol(x)
col <- brewer.pal(nsamples, "Paired")

par(mfrow=c(1,2))
plot(density(lcpm[,1]), col=col[1], lwd=2, ylim=c(0,0.26), las=2, main="", xlab="")
title(main="A. Raw data", xlab="Log-cpm")
abline(v=lcpm.cutoff, lty=3)
for (i in 2:nsamples){
  den <- density(lcpm[,i])
  lines(den$x, den$y, col=col[i], lwd=2)
}
legend("topright", samplenames, text.col=col, bty="n")
lcpm <- cpm(x, log=TRUE)
plot(density(lcpm[,1]), col=col[1], lwd=2, ylim=c(0,0.26), las=2, main="", xlab="")
title(main="B. Filtered data", xlab="Log-cpm")
abline(v=lcpm.cutoff, lty=3)
for (i in 2:nsamples){
  den <- density(lcpm[,i])
  lines(den$x, den$y, col=col[i], lwd=2)
}
legend("topright", samplenames, text.col=col, bty="n")

x <- calcNormFactors(x, method = "TMM")
x$samples$norm.factors
## [1] 1.0321479 0.9002265 1.0233137 0.9688055 1.3125091 0.8271010
x2 <- x
x2$samples$norm.factors <- 1
x2$counts[,1] <- ceiling(x2$counts[,1]*0.05)
x2$counts[,2] <- x2$counts[,2]*5

par(mfrow=c(1,2))
lcpm <- cpm(x2, log=TRUE)
boxplot(lcpm, las=2, col=col, main="")
title(main="A. Example: Unnormalised data",ylab="Log-cpm")
x2 <- calcNormFactors(x2)  
x2$samples$norm.factors
## [1] 0.0683339 5.7762475 1.2625805 1.2339692 1.5945590 1.0197978
lcpm <- cpm(x2, log=TRUE)
boxplot(lcpm, las=2, col=col, main="")
title(main="B. Example: Normalised data",ylab="Log-cpm")

design <- model.matrix(~0+group)
colnames(design) <- gsub("group", "", colnames(design))
design
##   C1 T1
## 1  1  0
## 2  1  0
## 3  1  0
## 4  0  1
## 5  0  1
## 6  0  1
## attr(,"assign")
## [1] 1 1
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"
contr.matrix <- makeContrasts(
   C1vsT1= C1 - T1,
   levels = colnames(design))
contr.matrix
##       Contrasts
## Levels C1vsT1
##     C1      1
##     T1     -1
par(mfrow=c(1,2))
v <- voom(x, design, plot=TRUE)
v
## An object of class "EList"
## $genes
##           ENSEMBL   SYMBOL TXCHROM
## 1 ENSG00000000003   TSPAN6    chrX
## 2 ENSG00000000005     TNMD    chrX
## 3 ENSG00000000419     DPM1   chr20
## 4 ENSG00000000457    SCYL3    chr1
## 5 ENSG00000000460 C1orf112    chr1
## 30022 more rows ...
## 
## $targets
##      group lib.size norm.factors
## C1_1    C1 43410027    1.0321479
## C1_2    C1 57984280    0.9002265
## C1_3    C1 39514220    1.0233137
## T1_1    T1 53111013    0.9688055
## T1_2    T1 37219342    1.3125091
## T1_3    T1 27207086    0.8271010
## 
## $E
##                         C1_1      C1_2      C1_3       T1_1     T1_2      T1_3
## ENSG00000000003.14  4.500357  4.850201  4.849885  4.7402280 4.529373  4.933662
## ENSG00000000005.5  -1.013692 -1.024700 -1.446319 -0.6224147 1.182899 -1.517983
## ENSG00000000419.12  3.041843  3.653174  3.727746  3.6386582 1.864168  4.282576
## ENSG00000000457.13  4.567772  4.645739  5.375620  4.8703674 4.077788  5.097501
## ENSG00000000460.16  3.021523  2.675740  3.428715  2.7016027 2.818193  2.944896
## 30022 more rows ...
## 
## $weights
##           [,1]      [,2]      [,3]      [,4]      [,5]      [,6]
## [1,] 4.3771247 4.6329833 4.2740829 4.5676182 4.2003682 3.7596742
## [2,] 0.2440368 0.3078621 0.2274268 0.4766986 0.3471982 0.2671885
## [3,] 3.0855775 3.5718464 2.9251478 3.1786667 2.5825194 2.0985551
## [4,] 4.4684432 4.6956192 4.3726970 4.5352295 4.1557993 3.7030934
## [5,] 2.5848926 3.0686155 2.4332656 2.6636523 2.1106906 1.6864280
## 30022 more rows ...
## 
## $design
##   C1 T1
## 1  1  0
## 2  1  0
## 3  1  0
## 4  0  1
## 5  0  1
## 6  0  1
## attr(,"assign")
## [1] 1 1
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"

vfit <- lmFit(v, design)
vfit <- contrasts.fit(vfit, contrasts=contr.matrix)
efit <- eBayes(vfit)
plotSA(efit)

summary(decideTests(efit,p.value = 0.05))
##        C1vsT1
## Down        0
## NotSig  30027
## Up          0
tfit <- treat(vfit, lfc=log2(1.8))
dt <- decideTests(tfit, p.value = 0.05)
summary(dt)
##        C1vsT1
## Down        0
## NotSig  30027
## Up          0
plotMD(tfit, column=1, status=dt[,1], main=colnames(tfit)[1], 
       xlim=c(-8,13))

C1vsT1 <- topTreat(tfit, coef=1, n=Inf)
head(C1vsT1)
##                            ENSEMBL     SYMBOL TXCHROM     logFC  AveExpr
## ENSG00000273777.4  ENSG00000273855       <NA>    <NA> -2.900723 2.845955
## ENSG00000272732.1  ENSG00000272804  KRTAP10-7   chr21 -2.625514 3.242335
## ENSG00000243365.3  ENSG00000243466    IGKV1-5    <NA> -4.150990 1.424784
## ENSG00000210140.1  ENSG00000210839 RNU6ATAC5P    <NA> -2.683541 3.481430
## ENSG00000224271.5  ENSG00000224302       <NA>    <NA> -2.524138 2.353195
## ENSG00000134612.12 ENSG00000134640     MTNR1B   chr11 -2.796207 2.215427
##                            t     P.Value adj.P.Val
## ENSG00000273777.4  -3.564315 0.001769231 0.9999805
## ENSG00000272732.1  -3.259206 0.003163032 0.9999805
## ENSG00000243365.3  -3.051043 0.004941342 0.9999805
## ENSG00000210140.1  -2.996491 0.005244118 0.9999805
## ENSG00000224271.5  -2.919336 0.006067087 0.9999805
## ENSG00000134612.12 -2.911848 0.006183691 0.9999805
library(gplots) 
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
## 
##     space
## The following object is masked from 'package:S4Vectors':
## 
##     space
## The following object is masked from 'package:stats':
## 
##     lowess
C1vsT1.topgenes <- C1vsT1$ENSEMBL[1:100]
i <- which(v$genes$ENSEMBL %in% C1vsT1.topgenes)
mycol <- colorpanel(1000,"blue","white","red")
heatmap.2(lcpm[i,], scale="row",
   labRow=v$genes$SYMBOL[i], labCol=colnames(x), 
   col=mycol, trace="none", density.info="none", 
   lhei=NULL, dendrogram="column")

load('/Users/dongzeyuan/Desktop/TRGN_lab/human_c2_v5p2.rdata')
idx <- ids2indices(Hs.c2,id=rownames(v$genes))
cam.RaceVS <- camera(v,idx,design,contrast=contr.matrix[,1])
head(cam.RaceVS,20)
##                                                  NGenes Direction       PValue
## KEGG_PANCREATIC_CANCER                               65        Up 0.0005183241
## BARRIER_CANCER_RELAPSE_NORMAL_SAMPLE_UP              26        Up 0.0006908885
## MAGRANGEAS_MULTIPLE_MYELOMA_IGG_VS_IGA_UP            18        Up 0.0011836084
## REACTOME_SIGNALING_BY_FGFR1_MUTANTS                  25        Up 0.0017610200
## MIKHAYLOVA_OXIDATIVE_STRESS_RESPONSE_VIA_VHL_UP       6        Up 0.0020272148
## KANG_GIST_WITH_PDGFRA_DN                              5      Down 0.0020852547
## PUIFFE_INVASION_INHIBITED_BY_ASCITES_UP              70        Up 0.0021603387
## MIKKELSEN_MEF_ICP_WITH_H3K4ME3_AND_H3K27ME3          22        Up 0.0023700218
## WU_HBX_TARGETS_3_UP                                  18        Up 0.0026958433
## REACTOME_RNA_POL_I_TRANSCRIPTION_INITIATION          18        Up 0.0034709207
## WOO_LIVER_CANCER_RECURRENCE_DN                       68        Up 0.0039063178
## WENDT_COHESIN_TARGETS_UP                             21        Up 0.0044888897
## MIKKELSEN_IPS_ICP_WITH_H3K4ME3_AND_H327ME3           69        Up 0.0045691588
## REACTOME_SIGNALING_BY_FGFR1_FUSION_MUTANTS           15        Up 0.0049215854
## REACTOME_G_ALPHA_Z_SIGNALLING_EVENTS                 36        Up 0.0050400801
## BHATI_G2M_ARREST_BY_2METHOXYESTRADIOL_UP             70        Up 0.0052058851
## REACTOME_APOPTOTIC_CLEAVAGE_OF_CELLULAR_PROTEINS     31        Up 0.0055022029
## MCMURRAY_TP53_HRAS_COOPERATION_RESPONSE_DN           43        Up 0.0058120290
## OXFORD_RALB_TARGETS_DN                                8        Up 0.0064671374
## FONTAINE_FOLLICULAR_THYROID_ADENOMA_UP               47        Up 0.0065464775
##                                                        FDR
## KEGG_PANCREATIC_CANCER                           0.8837534
## BARRIER_CANCER_RELAPSE_NORMAL_SAMPLE_UP          0.8837534
## MAGRANGEAS_MULTIPLE_MYELOMA_IGG_VS_IGA_UP        0.8837534
## REACTOME_SIGNALING_BY_FGFR1_MUTANTS              0.8837534
## MIKHAYLOVA_OXIDATIVE_STRESS_RESPONSE_VIA_VHL_UP  0.8837534
## KANG_GIST_WITH_PDGFRA_DN                         0.8837534
## PUIFFE_INVASION_INHIBITED_BY_ASCITES_UP          0.8837534
## MIKKELSEN_MEF_ICP_WITH_H3K4ME3_AND_H3K27ME3      0.8837534
## WU_HBX_TARGETS_3_UP                              0.8837534
## REACTOME_RNA_POL_I_TRANSCRIPTION_INITIATION      0.8837534
## WOO_LIVER_CANCER_RECURRENCE_DN                   0.8837534
## WENDT_COHESIN_TARGETS_UP                         0.8837534
## MIKKELSEN_IPS_ICP_WITH_H3K4ME3_AND_H327ME3       0.8837534
## REACTOME_SIGNALING_BY_FGFR1_FUSION_MUTANTS       0.8837534
## REACTOME_G_ALPHA_Z_SIGNALLING_EVENTS             0.8837534
## BHATI_G2M_ARREST_BY_2METHOXYESTRADIOL_UP         0.8837534
## REACTOME_APOPTOTIC_CLEAVAGE_OF_CELLULAR_PROTEINS 0.8837534
## MCMURRAY_TP53_HRAS_COOPERATION_RESPONSE_DN       0.8837534
## OXFORD_RALB_TARGETS_DN                           0.8837534
## FONTAINE_FOLLICULAR_THYROID_ADENOMA_UP           0.8837534
barcodeplot(efit$t[,1], index=idx$PUIFFE_INVASION_INHIBITED_BY_ASCITES_UP, 
            index2=idx$WOO_LIVER_CANCER_RECURRENCE_DN, main="C1vsT1")