Workflow

Grabbing protein coding normalized genes

# Load up packages
suppressPackageStartupMessages(library(DESeq2))
suppressPackageStartupMessages(library(biomaRt))
# Load in raw counts
setwd("/sc/arion/scratch/rompag01")
counts <- read.csv("amygdala.raw.counts.csv",header=T,row.names=1)

# Remove outlier
counts <- counts[,-26] # Rat.47

# Filter counts
f.counts <- counts[rowMeans(counts)>10,]

# Filtering for protein-coding genes
ensembl = useMart("ensembl", dataset="rnorvegicus_gene_ensembl")
values <- unique(rownames(f.counts))
data <- getBM(attributes=c("external_gene_name","description","gene_biotype",
                           "ensembl_gene_id"), filters = "ensembl_gene_id", values = values, mart= ensembl)
temp <- f.counts
temp$ID <- rownames(f.counts)
f.counts.anno <- merge(temp,data,by.x="ID",by.y="ensembl_gene_id",all.x=T)
rownames(f.counts.anno) <- f.counts.anno$ID

f.counts.protein <- f.counts.anno[f.counts.anno$gene_biotype=="protein_coding",]
f.counts.final <- f.counts.protein[,2:45]  # Stripping away annotation columns


vst.counts <- vst(as.matrix(f.counts.final))
vst.all.genes <- vst(as.matrix(f.counts))
str(vst.counts)
##  num [1:14383, 1:44] 13 10.65 8.02 7.12 9.87 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:14383] "ENSRNOG00000000007" "ENSRNOG00000000010" "ENSRNOG00000000012" "ENSRNOG00000000017" ...
##   ..$ : chr [1:44] "merged_AdoTHC.Rat.10_S9" "merged_AdoTHC.Rat.11_S10" "merged_AdoTHC.Rat.12_S11" "merged_AdoTHC.Rat.13_S12" ...

PCA computation

# visualizing PCA result with library(ggfortify)
suppressPackageStartupMessages(library(ggfortify))
PCA <- prcomp(t(vst.counts))

Amount of variance accounted for by each PC and each PC for all subjects

summary(PCA) # Amount of variance accounted for by each PC
## Importance of components:
##                            PC1    PC2   PC3     PC4     PC5     PC6
## Standard deviation     10.5862 9.2665 7.575 6.55400 5.76714 4.40159
## Proportion of Variance  0.2208 0.1692 0.113 0.08462 0.06552 0.03817
## Cumulative Proportion   0.2208 0.3899 0.503 0.58761 0.65313 0.69130
##                            PC7     PC8     PC9    PC10    PC11    PC12
## Standard deviation     3.74006 3.57772 3.14019 2.94796 2.72347 2.62148
## Proportion of Variance 0.02756 0.02522 0.01943 0.01712 0.01461 0.01354
## Cumulative Proportion  0.71886 0.74407 0.76350 0.78062 0.79523 0.80877
##                           PC13    PC14    PC15    PC16    PC17    PC18
## Standard deviation     2.45560 2.35465 2.28727 2.22636 2.21569 2.05682
## Proportion of Variance 0.01188 0.01092 0.01031 0.00976 0.00967 0.00833
## Cumulative Proportion  0.82065 0.83157 0.84188 0.85164 0.86131 0.86965
##                           PC19    PC20    PC21    PC22    PC23    PC24
## Standard deviation     2.01286 1.95298 1.90845 1.88096 1.82618 1.77834
## Proportion of Variance 0.00798 0.00751 0.00718 0.00697 0.00657 0.00623
## Cumulative Proportion  0.87763 0.88514 0.89232 0.89929 0.90586 0.91209
##                           PC25    PC26    PC27    PC28    PC29    PC30
## Standard deviation     1.76929 1.72245 1.70411 1.63914 1.63310 1.62056
## Proportion of Variance 0.00617 0.00584 0.00572 0.00529 0.00525 0.00517
## Cumulative Proportion  0.91825 0.92410 0.92982 0.93511 0.94037 0.94554
##                           PC31    PC32    PC33    PC34    PC35    PC36
## Standard deviation     1.58578 1.54910 1.54823 1.51994 1.49723 1.48686
## Proportion of Variance 0.00495 0.00473 0.00472 0.00455 0.00442 0.00436
## Cumulative Proportion  0.95049 0.95522 0.95994 0.96449 0.96891 0.97327
##                           PC37    PC38    PC39    PC40    PC41    PC42
## Standard deviation     1.45470 1.43508 1.41762 1.38676 1.36369 1.34384
## Proportion of Variance 0.00417 0.00406 0.00396 0.00379 0.00366 0.00356
## Cumulative Proportion  0.97743 0.98149 0.98545 0.98924 0.99290 0.99646
##                           PC43      PC44
## Standard deviation     1.34041 5.806e-14
## Proportion of Variance 0.00354 0.000e+00
## Cumulative Proportion  1.00000 1.000e+00
PCA$x[,1:3] # PCA1-3 for all subjects
##                                   PC1         PC2          PC3
## merged_AdoTHC.Rat.10_S9   -0.04495532   9.4985823  10.35550903
## merged_AdoTHC.Rat.11_S10   6.10431407   1.9113113  -7.68124992
## merged_AdoTHC.Rat.12_S11  -0.30315701   9.1784156  10.24655887
## merged_AdoTHC.Rat.13_S12  -3.65850052  -0.2914637  -3.68235085
## merged_AdoTHC.Rat.14_S13  11.49073108  12.7722465  -3.22839720
## merged_AdoTHC.Rat.16_S14  -0.07448627   3.4484713  -7.42749936
## merged_AdoTHC.Rat.18_S15   1.87603765   1.7719211  -6.70255607
## merged_AdoTHC.Rat.19_S16 -10.25012700  -7.4042240  -1.88358264
## merged_AdoTHC.Rat.1_S1    -6.02313886  -7.8772728   1.30127202
## merged_AdoTHC.Rat.20_S17   7.44807949   5.7478109  -8.87495577
## merged_AdoTHC.Rat.21_S18  -0.27104306   7.2616326   3.42512081
## merged_AdoTHC.Rat.22_S19  19.00414822   8.4847194 -14.08922625
## merged_AdoTHC.Rat.23_S20   5.62384134  -1.9037748  -9.09907439
## merged_AdoTHC.Rat.24_S21  -0.47358155   4.0173297  -1.99115668
## merged_AdoTHC.Rat.25_S22  -7.34699838 -12.4683422 -11.65794755
## merged_AdoTHC.Rat.26_S23  -7.96672928   3.2079177   9.28810994
## merged_AdoTHC.Rat.27_S24  -7.85611117   2.5409066   6.11097822
## merged_AdoTHC.Rat.28_S25  14.85376366  14.0313015  -5.18095785
## merged_AdoTHC.Rat.29_S26  -4.15621677 -10.7557851 -10.05308525
## merged_AdoTHC.Rat.2_S2   -10.16976959   6.0715420  17.96380930
## merged_AdoTHC.Rat.30_S27  12.80606171  14.0248858  -0.52501633
## merged_AdoTHC.Rat.43_S28  -4.00580492  -3.1261192  -1.88288696
## merged_AdoTHC.Rat.44_S29  -9.40693620  -1.2174964   7.19210350
## merged_AdoTHC.Rat.45_S30 -12.14862414  -7.7562423  -1.51639210
## merged_AdoTHC.Rat.46_S31   7.06435421  11.5927328  -0.88760981
## merged_AdoTHC.Rat.48_S33  -1.03017866   3.1092773   0.06286498
## merged_AdoTHC.Rat.49_S34  -6.86215842  -6.6248704  -5.27730401
## merged_AdoTHC.Rat.4_S3   -15.69191200 -11.9792391  -4.45170105
## merged_AdoTHC.Rat.50_S35 -14.20638048  -5.0952628   7.97846840
## merged_AdoTHC.Rat.51_S36   0.50266468   3.5143549   1.95151847
## merged_AdoTHC.Rat.52_S37  -4.96369424  -1.4138371   0.62605297
## merged_AdoTHC.Rat.53_S38  -2.96962188  -8.8782299 -11.16240750
## merged_AdoTHC.Rat.54_S39   0.86948210   7.6725451   4.47264843
## merged_AdoTHC.Rat.55_S40  36.08643082 -34.5921518  13.64224206
## merged_AdoTHC.Rat.56_S41   1.54804374   2.1554268  12.68245673
## merged_AdoTHC.Rat.57_S42  29.39661389 -14.1972979   9.73738517
## merged_AdoTHC.Rat.58_S43  -2.55694178  -5.5107342  -3.58647018
## merged_AdoTHC.Rat.59_S44  -6.21537543  -6.9010186  -6.15592557
## merged_AdoTHC.Rat.5_S4   -13.32810317  -4.4498611   5.83711626
## merged_AdoTHC.Rat.60_S45   1.29780642   3.0489899  -1.58337904
## merged_AdoTHC.Rat.6_S5    -5.86906995   3.8974399  10.26737599
## merged_AdoTHC.Rat.7_S6    -6.79477540  -4.6559995  -3.71242406
## merged_AdoTHC.Rat.8_S7     1.68560128   5.1299618  -3.05254715
## merged_AdoTHC.Rat.9_S8     6.98641708  13.0095003   2.20451238

PCA plot for first two principal components

pca.plot <- autoplot(PCA) # can add metadata and assign colors- data='meta' colour = 'Group')
pca.plot

importance <- summary(PCA)
write.csv(importance$importance,file="Amygdala.RNAseq.PCA.summary.csv")
write.csv(PCA$x,file="Amygdala.RNAseq.PCA.values.for.each.subject.csv")

For contrast, with all genes

PCA.all <- prcomp(t(vst.all.genes))
pca.plot <- autoplot(PCA.all) # can add metadata and assign colors- data='meta' colour = 'Group')
pca.plot