library(gplots)
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
library(limma)
library(edgeR)
setwd("~/Projects//2013-10_HTL_RNASeq")
# Read and fix data
featureC <- read.csv("Raw_data//STAR.merged.20131027.featureCounts",sep="\t",row.names=1)
HTSeqC <- read.csv("Raw_data//htseq-count_MergedFiles.txt",sep="\t",row.names=1)
Sample_names <- sapply(strsplit(colnames(featureC), "\\."), `[`, 3)
colnames(featureC) <- paste(Sample_names,"_featureC",sep="")
colnames(HTSeqC) <- paste(Sample_names,"_HTSeqC",sep="")
merge <- cbind(HTSeqC,featureC)
#remove failed samples
remove_to_few_reads<-colnames(merge[,colSums(merge) / 1000000 < 10])
MergeQC1 <- merge[,!(colnames(merge) %in% remove_to_few_reads)]
Treatment <- ifelse(grepl("featureC",colnames(MergeQC1)),"featureC","HTSeqC")
# sample to sample corr matrix
corlog <- cor(MergeQC1,method="spearman")
heatmap.2(corlog,scale="none", symkey=FALSE,density.info="none",col=redblue, trace="none", cexRow=0.3,cexCol=0.3,
ColSideColors=rep(c("red","blue"),each=182))
# scatter
plot(y=log10(rowMeans(MergeQC1[,Treatment=="HTSeqC"])+.1),x=log10(rowMeans(MergeQC1[,Treatment=="featureC"])+.1),pch=20,ylab="HtSeqC",xlab="featureC",
main=paste("Cor: ", cor(log10(rowMeans(MergeQC1[,1:182])+.1),log10(rowMeans(MergeQC1[,183:364])+.1),method="spearman")))
# de-genes
design <- model.matrix(~Treatment)
y <- DGEList(counts=MergeQC1)
y <- calcNormFactors(y)
v <- voom(y,design,plot=TRUE)
fit <- lmFit(v,design)
fit <- eBayes(fit)
options(digits=3)
tt<-topTable(fit,coef=2,sort="p",n=100000)
tt$ID <- rownames(tt)
table(tt$adj.P.Val < .05)
##
## FALSE TRUE
## 20594 1741
# Annotate top table with GC/gene lenght
GC <-read.csv("~/Dropbox/LUDC/HTL/GC_INS/gc.csv",sep=";")
sum(duplicated(GC$HGNC.symbol))
## [1] 221
GC_per_gene<-aggregate(X..GC.content~HGNC.symbol,data=GC,median)
GeneLength <- read.csv("~/Dropbox/LUDC/HTL/GC_INS/ExonsLengthPerGene.txt",sep="\t",header=F)
tt$GC <- GC_per_gene[match(tt$ID,GC_per_gene$HGNC.symbol),2]
tt$GeneLength <- GeneLength[match(tt$ID,GeneLength$V1),2]
par(mfrow=c(1,2))
scatter.smooth(tt$logFC~log10(tt$GeneLength),pch=20,ylab="logFC",
xlab="Gene length (log 10)",,main="Gene length",col="grey")
scatter.smooth(tt$logFC~tt$GC,pch=20,ylab="logFC",xlab="GC %",main="GC-content (%)",
col="grey")
head(tt,n=100)
## logFC AveExpr t P.Value adj.P.Val B
## CD99 -14.64 2.4213 -146.9 0.00e+00 0.00e+00 739
## DHRSX -10.73 -0.1431 -214.4 0.00e+00 0.00e+00 873
## GPR89C -9.79 -0.5413 -218.4 0.00e+00 0.00e+00 879
## VAMP7 -12.24 0.6398 -227.5 0.00e+00 0.00e+00 894
## ZBED1 -11.85 0.4496 -220.7 0.00e+00 0.00e+00 883
## CD99P1 -7.96 -1.5260 -142.4 2.17e-322 8.09e-319 727
## ASMTL -10.70 -0.1828 -138.1 1.32e-317 4.22e-314 717
## PLCXD1 -9.41 -0.8115 -135.2 2.73e-314 7.62e-311 709
## IL3RA -8.33 -1.3641 -128.0 1.02e-305 2.53e-302 690
## SLC25A6 -14.09 1.5807 -122.5 5.25e-299 1.17e-295 675
## SPDYE8P -7.49 -1.4183 -121.6 7.56e-298 1.54e-294 672
## CBWD3 -4.66 3.3872 -119.8 1.90e-295 3.53e-292 667
## AKAP17A -11.56 0.2541 -114.8 6.11e-289 1.05e-285 652
## PDZK1P1 -9.01 -0.7391 -104.4 2.51e-274 4.01e-271 618
## GTF2IRD2P1 -6.71 0.4902 -98.3 5.24e-265 7.80e-262 597
## P2RY8 -7.11 -1.9523 -92.9 2.21e-256 3.08e-253 577
## RGPD3 -5.38 2.6417 -89.1 3.85e-250 5.05e-247 563
## TMED7-TICAM2 6.06 -2.4576 87.4 3.36e-247 4.17e-244 556
## LOC727849 -6.75 0.8892 -85.1 3.55e-243 4.18e-240 547
## CBWD6 -3.76 3.5223 -84.6 2.78e-242 3.10e-239 545
## GTF2IRD2 -2.63 3.9867 -83.7 1.18e-240 1.26e-237 541
## SPRY3 -6.91 -1.4380 -83.0 2.38e-239 2.41e-236 538
## GPR89A -2.50 3.7037 -81.4 2.26e-236 2.20e-233 531
## TBC1D3H -7.60 -1.7280 -79.9 1.43e-233 1.34e-230 525
## NBPF11 -5.92 2.6554 -78.6 4.08e-231 3.65e-228 519
## FAM35B -6.64 -0.7210 -78.4 1.18e-230 1.01e-227 518
## FAM35B2 -5.24 0.4202 -78.2 2.52e-230 2.09e-227 517
## GTF2IRD2B -2.43 4.2152 -77.3 1.41e-228 1.12e-225 513
## LOC728407 -6.77 -1.0271 -77.1 3.04e-228 2.34e-225 512
## MYCBP 6.03 -2.1498 75.8 9.35e-226 6.96e-223 507
## LOC440354 -3.99 2.7583 -73.9 6.75e-222 4.87e-219 498
## RPL17-C18ORF32 5.57 -2.7134 73.8 1.19e-221 8.31e-219 497
## SPIN2A -5.80 -1.2175 -70.7 2.21e-215 1.49e-212 483
## NBPF16 -4.61 3.7510 -69.8 2.31e-213 1.52e-210 478
## NSFP1 -6.77 1.9807 -69.7 2.73e-213 1.74e-210 478
## PMS2L2 -6.37 -0.1239 -69.5 1.00e-212 6.23e-210 477
## LOC349196 -6.24 -1.8351 -68.7 4.62e-211 2.79e-208 473
## NOMO2 -2.87 7.6567 -68.4 1.65e-210 9.69e-208 472
## CSF2RA -6.54 -2.1955 -68.2 5.58e-210 3.19e-207 471
## RPL17 4.95 5.0631 67.3 5.13e-208 2.87e-205 466
## FAM18B2 -2.83 4.3462 -66.4 3.73e-206 2.03e-203 462
## LOC641298 -4.55 2.5491 -66.3 7.98e-206 4.24e-203 461
## BCRP2 -6.05 -1.7643 -65.2 1.63e-203 8.48e-201 456
## RPL21 5.15 -2.2607 64.7 2.06e-202 1.04e-199 453
## NOMO3 -4.87 6.5580 -64.5 9.66e-202 4.79e-199 452
## TBC1D3C -7.71 -1.3718 -64.2 2.83e-201 1.38e-198 451
## ANKHD1-EIF4EBP3 3.77 0.1227 63.5 1.25e-199 5.94e-197 447
## LOC728855 -3.63 2.5326 -63.3 4.37e-199 2.04e-196 446
## PPP2R3B -9.01 -1.1043 -63.3 4.52e-199 2.06e-196 446
## GTPBP6 -9.97 -0.6289 -62.7 1.11e-197 4.94e-195 442
## GTF2H2B -4.76 3.3653 -62.0 3.24e-196 1.42e-193 439
## PRSS2 8.99 -0.9416 61.6 2.80e-195 1.20e-192 437
## CEP170P1 -5.45 -0.5929 -61.5 6.27e-195 2.64e-192 436
## RPL36A 4.77 3.0921 61.2 3.04e-194 1.26e-191 434
## CTAGE6P -5.99 -0.7441 -61.2 3.16e-194 1.28e-191 434
## GATSL1 -5.59 -0.0118 -60.6 8.55e-193 3.41e-190 431
## FAM45B -2.42 2.3269 -60.1 1.10e-191 4.31e-189 429
## PGAM4 -6.76 0.7103 -59.9 3.09e-191 1.19e-188 427
## C15orf38-AP3S2 4.64 -3.1851 59.1 3.58e-189 1.35e-186 423
## ASAH2 -4.62 -0.2458 -59.1 3.62e-189 1.35e-186 423
## POM121L8P -5.33 -1.9357 -58.6 4.56e-188 1.67e-185 420
## RANGRF 2.52 1.8231 57.7 1.12e-185 4.03e-183 415
## FLJ39739 -5.15 0.4226 -57.5 3.96e-185 1.40e-182 413
## RGPD4 -4.77 2.7762 -56.6 4.58e-183 1.60e-180 409
## LOC613037 -6.58 1.3361 -56.4 1.34e-182 4.60e-180 408
## C7orf28B -2.34 5.0102 -56.3 3.89e-182 1.32e-179 407
## TBC1D3B -7.25 -1.7997 -56.1 9.50e-182 3.17e-179 406
## GNG10 5.87 1.1885 55.8 4.51e-181 1.48e-178 404
## ZNF322B -5.43 1.7945 -55.5 4.35e-180 1.41e-177 402
## LOC151009 -3.95 2.5098 -55.4 7.09e-180 2.26e-177 401
## LRRC37A -4.14 1.7566 -55.2 1.62e-179 5.08e-177 401
## CCZ1 -2.27 5.0598 -55.0 7.93e-179 2.46e-176 399
## DHX40P1 -3.78 1.3194 -54.5 1.28e-177 3.92e-175 396
## LOC728875 -3.86 1.9639 -54.2 5.68e-177 1.71e-174 395
## GTF2I -2.09 7.7809 -53.5 4.98e-175 1.48e-172 390
## NME1-NME2 4.81 -3.0719 53.3 2.05e-174 6.01e-172 389
## OR2A4 -5.50 -2.6669 -53.3 2.30e-174 6.67e-172 389
## LOC100271836 -3.77 2.9847 -53.1 4.93e-174 1.41e-171 388
## LOC285074 -2.34 2.9096 -52.9 2.24e-173 6.34e-171 386
## LOC653075 -5.03 -0.4749 -52.4 5.03e-172 1.40e-169 383
## CFHR1 -5.20 -2.4724 -52.1 3.43e-171 9.46e-169 381
## LOC100190986 -6.67 -0.5364 -52.0 4.36e-171 1.19e-168 381
## GOLGA6L10 -7.09 0.2877 -51.7 2.79e-170 7.50e-168 379
## FAM157B -5.02 -2.6283 -51.4 1.72e-169 4.58e-167 377
## UBE2Q2P3 -4.63 -3.0490 -51.0 2.89e-168 7.58e-166 375
## FAM133B 2.82 0.5736 50.9 3.78e-168 9.81e-166 374
## LOC100132352 -2.10 2.2202 -50.9 4.55e-168 1.17e-165 374
## RRN3P2 -3.08 0.8851 -50.1 8.67e-166 2.20e-163 369
## NBPF24 -5.05 -2.6497 -49.5 3.48e-164 8.73e-162 365
## FAM27B -5.21 -0.7242 -49.5 4.29e-164 1.07e-161 365
## GATSL2 -3.92 1.1644 -49.5 4.44e-164 1.09e-161 365
## FOXO3B -2.01 3.6972 -49.1 6.44e-163 1.56e-160 362
## POLR2J2 -6.23 0.9036 -48.8 2.65e-162 6.36e-160 361
## SMA5 -6.13 -0.4897 -48.7 6.20e-162 1.47e-159 360
## LIMS3 -5.90 -0.0221 -48.2 1.55e-160 3.64e-158 357
## C9orf30-TMEFF1 4.05 -3.4879 48.2 2.13e-160 4.95e-158 357
## FAM27C -5.17 -0.9976 -48.0 5.10e-160 1.17e-157 356
## LOC440297 -6.59 0.7126 -47.6 9.79e-159 2.23e-156 353
## PGAM1 -1.46 7.5198 -47.5 2.05e-158 4.63e-156 352
## NOMO1 -1.87 8.0733 -47.2 1.07e-157 2.38e-155 350
## ID GC GeneLength
## CD99 CD99 47.4 2474
## DHRSX DHRSX 47.1 5138
## GPR89C GPR89C 39.0 3138
## VAMP7 VAMP7 38.8 5318
## ZBED1 ZBED1 49.6 9478
## CD99P1 CD99P1 NA 6632
## ASMTL ASMTL 52.2 4556
## PLCXD1 PLCXD1 53.8 10750
## IL3RA IL3RA 54.5 3420
## SLC25A6 SLC25A6 58.9 2968
## SPDYE8P SPDYE8P NA 8652
## CBWD3 CBWD3 36.0 3452
## AKAP17A AKAP17A 60.9 6556
## PDZK1P1 PDZK1P1 NA 6996
## GTF2IRD2P1 GTF2IRD2P1 NA 3010
## P2RY8 P2RY8 48.2 8468
## RGPD3 RGPD3 40.2 5873
## TMED7-TICAM2 TMED7-TICAM2 NA 3627
## LOC727849 LOC727849 NA 9890
## CBWD6 CBWD6 36.0 1719
## GTF2IRD2 GTF2IRD2 46.7 3551
## SPRY3 SPRY3 40.3 18076
## GPR89A GPR89A 38.9 2032
## TBC1D3H TBC1D3H 47.6 2305
## NBPF11 NBPF11 45.5 10934
## FAM35B FAM35B NA 3290
## FAM35B2 FAM35B2 NA 3294
## GTF2IRD2B GTF2IRD2B 46.6 3585
## LOC728407 LOC728407 NA 1420
## MYCBP MYCBP 43.5 2565
## LOC440354 LOC440354 NA 7745
## RPL17-C18ORF32 RPL17-C18ORF32 NA 2049
## SPIN2A SPIN2A 50.3 1319
## NBPF16 NBPF16 43.3 7662
## NSFP1 NSFP1 NA 2348
## PMS2L2 PMS2L2 NA 1522
## LOC349196 LOC349196 NA 8622
## NOMO2 NOMO2 45.5 4253
## CSF2RA CSF2RA 50.9 4596
## RPL17 RPL17 43.0 1163
## FAM18B2 FAM18B2 NA 5196
## LOC641298 LOC641298 NA 5010
## BCRP2 BCRP2 NA 2334
## RPL21 RPL21 42.0 566
## NOMO3 NOMO3 45.5 4315
## TBC1D3C TBC1D3C 47.5 4220
## ANKHD1-EIF4EBP3 ANKHD1-EIF4EBP3 NA 8349
## LOC728855 LOC728855 NA 2543
## PPP2R3B PPP2R3B 58.3 4852
## GTPBP6 GTPBP6 55.5 2360
## GTF2H2B GTF2H2B NA 4374
## PRSS2 PRSS2 52.0 408
## CEP170P1 CEP170P1 NA 1070
## RPL36A RPL36A 42.0 2911
## CTAGE6P CTAGE6P NA 2662
## GATSL1 GATSL1 52.4 990
## FAM45B FAM45B NA 4824
## PGAM4 PGAM4 50.6 1678
## C15orf38-AP3S2 C15orf38-AP3S2 45.4 6256
## ASAH2 ASAH2 37.1 2418
## POM121L8P POM121L8P NA 7712
## RANGRF RANGRF 59.6 1166
## FLJ39739 FLJ39739 NA 2130
## RGPD4 RGPD4 40.6 7172
## LOC613037 LOC613037 NA 3455
## C7orf28B C7orf28B NA 1836
## TBC1D3B TBC1D3B 59.9 2114
## GNG10 GNG10 46.6 1255
## ZNF322B ZNF322B NA 2374
## LOC151009 LOC151009 NA 2182
## LRRC37A LRRC37A 42.3 10351
## CCZ1 CCZ1 46.1 1824
## DHX40P1 DHX40P1 NA 1129
## LOC728875 LOC728875 NA 1540
## GTF2I GTF2I 42.8 4529
## NME1-NME2 NME1-NME2 48.9 1295
## OR2A4 OR2A4 48.8 933
## LOC100271836 LOC100271836 NA 4952
## LOC285074 LOC285074 NA 2201
## LOC653075 LOC653075 NA 3471
## CFHR1 CFHR1 33.7 1297
## LOC100190986 LOC100190986 NA 2432
## GOLGA6L10 GOLGA6L10 NA 7077
## FAM157B FAM157B NA 1278
## UBE2Q2P3 UBE2Q2P3 NA 2420
## FAM133B FAM133B 37.4 2468
## LOC100132352 LOC100132352 NA 1286
## RRN3P2 RRN3P2 NA 2396
## NBPF24 NBPF24 45.9 6818
## FAM27B FAM27B NA 703
## GATSL2 GATSL2 52.6 990
## FOXO3B FOXO3B NA 7259
## POLR2J2 POLR2J2 NA 1716
## SMA5 SMA5 NA 1759
## LIMS3 LIMS3 41.8 6258
## C9orf30-TMEFF1 C9orf30-TMEFF1 NA 2434
## FAM27C FAM27C NA 703
## LOC440297 LOC440297 NA 20420
## PGAM1 PGAM1 47.5 1720
## NOMO1 NOMO1 45.6 4247