Grabbing protein coding normalized genes
# Load up packages
suppressPackageStartupMessages(library(DESeq2))
suppressPackageStartupMessages(library(biomaRt))
# Load in raw counts
setwd("/sc/arion/scratch/rompag01")
counts <- read.csv("amygdala.raw.counts.csv",header=T,row.names=1)
# Remove outlier
counts <- counts[,-26] # Rat.47
# Filter counts
f.counts <- counts[rowMeans(counts)>10,]
# Filtering for protein-coding genes
ensembl = useMart("ensembl", dataset="rnorvegicus_gene_ensembl")
values <- unique(rownames(f.counts))
data <- getBM(attributes=c("external_gene_name","description","gene_biotype",
"ensembl_gene_id"), filters = "ensembl_gene_id", values = values, mart= ensembl)
temp <- f.counts
temp$ID <- rownames(f.counts)
f.counts.anno <- merge(temp,data,by.x="ID",by.y="ensembl_gene_id",all.x=T)
rownames(f.counts.anno) <- f.counts.anno$ID
f.counts.protein <- f.counts.anno[f.counts.anno$gene_biotype=="protein_coding",]
f.counts.final <- f.counts.protein[,2:45] # Stripping away annotation columns
vst.counts <- vst(as.matrix(f.counts.final))
vst.all.genes <- vst(as.matrix(f.counts))
str(vst.counts)
## num [1:14383, 1:44] 13 10.65 8.02 7.12 9.87 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:14383] "ENSRNOG00000000007" "ENSRNOG00000000010" "ENSRNOG00000000012" "ENSRNOG00000000017" ...
## ..$ : chr [1:44] "merged_AdoTHC.Rat.10_S9" "merged_AdoTHC.Rat.11_S10" "merged_AdoTHC.Rat.12_S11" "merged_AdoTHC.Rat.13_S12" ...
PCA computation
# visualizing PCA result with library(ggfortify)
suppressPackageStartupMessages(library(ggfortify))
PCA <- prcomp(t(vst.counts))
Amount of variance accounted for by each PC and each PC for all subjects
summary(PCA) # Amount of variance accounted for by each PC
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 10.5862 9.2665 7.575 6.55400 5.76714 4.40159
## Proportion of Variance 0.2208 0.1692 0.113 0.08462 0.06552 0.03817
## Cumulative Proportion 0.2208 0.3899 0.503 0.58761 0.65313 0.69130
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 3.74006 3.57772 3.14019 2.94796 2.72347 2.62148
## Proportion of Variance 0.02756 0.02522 0.01943 0.01712 0.01461 0.01354
## Cumulative Proportion 0.71886 0.74407 0.76350 0.78062 0.79523 0.80877
## PC13 PC14 PC15 PC16 PC17 PC18
## Standard deviation 2.45560 2.35465 2.28727 2.22636 2.21569 2.05682
## Proportion of Variance 0.01188 0.01092 0.01031 0.00976 0.00967 0.00833
## Cumulative Proportion 0.82065 0.83157 0.84188 0.85164 0.86131 0.86965
## PC19 PC20 PC21 PC22 PC23 PC24
## Standard deviation 2.01286 1.95298 1.90845 1.88096 1.82618 1.77834
## Proportion of Variance 0.00798 0.00751 0.00718 0.00697 0.00657 0.00623
## Cumulative Proportion 0.87763 0.88514 0.89232 0.89929 0.90586 0.91209
## PC25 PC26 PC27 PC28 PC29 PC30
## Standard deviation 1.76929 1.72245 1.70411 1.63914 1.63310 1.62056
## Proportion of Variance 0.00617 0.00584 0.00572 0.00529 0.00525 0.00517
## Cumulative Proportion 0.91825 0.92410 0.92982 0.93511 0.94037 0.94554
## PC31 PC32 PC33 PC34 PC35 PC36
## Standard deviation 1.58578 1.54910 1.54823 1.51994 1.49723 1.48686
## Proportion of Variance 0.00495 0.00473 0.00472 0.00455 0.00442 0.00436
## Cumulative Proportion 0.95049 0.95522 0.95994 0.96449 0.96891 0.97327
## PC37 PC38 PC39 PC40 PC41 PC42
## Standard deviation 1.45470 1.43508 1.41762 1.38676 1.36369 1.34384
## Proportion of Variance 0.00417 0.00406 0.00396 0.00379 0.00366 0.00356
## Cumulative Proportion 0.97743 0.98149 0.98545 0.98924 0.99290 0.99646
## PC43 PC44
## Standard deviation 1.34041 5.806e-14
## Proportion of Variance 0.00354 0.000e+00
## Cumulative Proportion 1.00000 1.000e+00
PCA$x[,1:3] # PCA1-3 for all subjects
## PC1 PC2 PC3
## merged_AdoTHC.Rat.10_S9 -0.04495532 9.4985823 10.35550903
## merged_AdoTHC.Rat.11_S10 6.10431407 1.9113113 -7.68124992
## merged_AdoTHC.Rat.12_S11 -0.30315701 9.1784156 10.24655887
## merged_AdoTHC.Rat.13_S12 -3.65850052 -0.2914637 -3.68235085
## merged_AdoTHC.Rat.14_S13 11.49073108 12.7722465 -3.22839720
## merged_AdoTHC.Rat.16_S14 -0.07448627 3.4484713 -7.42749936
## merged_AdoTHC.Rat.18_S15 1.87603765 1.7719211 -6.70255607
## merged_AdoTHC.Rat.19_S16 -10.25012700 -7.4042240 -1.88358264
## merged_AdoTHC.Rat.1_S1 -6.02313886 -7.8772728 1.30127202
## merged_AdoTHC.Rat.20_S17 7.44807949 5.7478109 -8.87495577
## merged_AdoTHC.Rat.21_S18 -0.27104306 7.2616326 3.42512081
## merged_AdoTHC.Rat.22_S19 19.00414822 8.4847194 -14.08922625
## merged_AdoTHC.Rat.23_S20 5.62384134 -1.9037748 -9.09907439
## merged_AdoTHC.Rat.24_S21 -0.47358155 4.0173297 -1.99115668
## merged_AdoTHC.Rat.25_S22 -7.34699838 -12.4683422 -11.65794755
## merged_AdoTHC.Rat.26_S23 -7.96672928 3.2079177 9.28810994
## merged_AdoTHC.Rat.27_S24 -7.85611117 2.5409066 6.11097822
## merged_AdoTHC.Rat.28_S25 14.85376366 14.0313015 -5.18095785
## merged_AdoTHC.Rat.29_S26 -4.15621677 -10.7557851 -10.05308525
## merged_AdoTHC.Rat.2_S2 -10.16976959 6.0715420 17.96380930
## merged_AdoTHC.Rat.30_S27 12.80606171 14.0248858 -0.52501633
## merged_AdoTHC.Rat.43_S28 -4.00580492 -3.1261192 -1.88288696
## merged_AdoTHC.Rat.44_S29 -9.40693620 -1.2174964 7.19210350
## merged_AdoTHC.Rat.45_S30 -12.14862414 -7.7562423 -1.51639210
## merged_AdoTHC.Rat.46_S31 7.06435421 11.5927328 -0.88760981
## merged_AdoTHC.Rat.48_S33 -1.03017866 3.1092773 0.06286498
## merged_AdoTHC.Rat.49_S34 -6.86215842 -6.6248704 -5.27730401
## merged_AdoTHC.Rat.4_S3 -15.69191200 -11.9792391 -4.45170105
## merged_AdoTHC.Rat.50_S35 -14.20638048 -5.0952628 7.97846840
## merged_AdoTHC.Rat.51_S36 0.50266468 3.5143549 1.95151847
## merged_AdoTHC.Rat.52_S37 -4.96369424 -1.4138371 0.62605297
## merged_AdoTHC.Rat.53_S38 -2.96962188 -8.8782299 -11.16240750
## merged_AdoTHC.Rat.54_S39 0.86948210 7.6725451 4.47264843
## merged_AdoTHC.Rat.55_S40 36.08643082 -34.5921518 13.64224206
## merged_AdoTHC.Rat.56_S41 1.54804374 2.1554268 12.68245673
## merged_AdoTHC.Rat.57_S42 29.39661389 -14.1972979 9.73738517
## merged_AdoTHC.Rat.58_S43 -2.55694178 -5.5107342 -3.58647018
## merged_AdoTHC.Rat.59_S44 -6.21537543 -6.9010186 -6.15592557
## merged_AdoTHC.Rat.5_S4 -13.32810317 -4.4498611 5.83711626
## merged_AdoTHC.Rat.60_S45 1.29780642 3.0489899 -1.58337904
## merged_AdoTHC.Rat.6_S5 -5.86906995 3.8974399 10.26737599
## merged_AdoTHC.Rat.7_S6 -6.79477540 -4.6559995 -3.71242406
## merged_AdoTHC.Rat.8_S7 1.68560128 5.1299618 -3.05254715
## merged_AdoTHC.Rat.9_S8 6.98641708 13.0095003 2.20451238
PCA plot for first two principal components
pca.plot <- autoplot(PCA) # can add metadata and assign colors- data='meta' colour = 'Group')
pca.plot
importance <- summary(PCA)
write.csv(importance$importance,file="Amygdala.RNAseq.PCA.summary.csv")
write.csv(PCA$x,file="Amygdala.RNAseq.PCA.values.for.each.subject.csv")
For contrast, with all genes
PCA.all <- prcomp(t(vst.all.genes))
pca.plot <- autoplot(PCA.all) # can add metadata and assign colors- data='meta' colour = 'Group')
pca.plot