library(gplots)
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess
library(limma)
library(edgeR)

setwd("~/Projects//2013-10_HTL_RNASeq")


# Read and fix data
featureC <- read.csv("Raw_data//STAR.merged.20131027.featureCounts",sep="\t",row.names=1)
HTSeqC <- read.csv("Raw_data//htseq-count_MergedFiles.txt",sep="\t",row.names=1)

Sample_names <- sapply(strsplit(colnames(featureC), "\\."), `[`, 3)

colnames(featureC) <- paste(Sample_names,"_featureC",sep="")
colnames(HTSeqC) <- paste(Sample_names,"_HTSeqC",sep="")

merge <- cbind(HTSeqC,featureC)




#remove failed samples
remove_to_few_reads<-colnames(merge[,colSums(merge) / 1000000 < 10])

MergeQC1 <- merge[,!(colnames(merge) %in% remove_to_few_reads)]

Treatment <- ifelse(grepl("featureC",colnames(MergeQC1)),"featureC","HTSeqC")

# sample to sample corr matrix
corlog <- cor(MergeQC1,method="spearman")

heatmap.2(corlog,scale="none", symkey=FALSE,density.info="none",col=redblue, trace="none", cexRow=0.3,cexCol=0.3,
          ColSideColors=rep(c("red","blue"),each=182))

plot of chunk unnamed-chunk-1



# scatter
plot(y=log10(rowMeans(MergeQC1[,Treatment=="HTSeqC"])+.1),x=log10(rowMeans(MergeQC1[,Treatment=="featureC"])+.1),pch=20,ylab="HtSeqC",xlab="featureC",
     main=paste("Cor: ", cor(log10(rowMeans(MergeQC1[,1:182])+.1),log10(rowMeans(MergeQC1[,183:364])+.1),method="spearman")))

plot of chunk unnamed-chunk-1


# de-genes

design <- model.matrix(~Treatment)
y <- DGEList(counts=MergeQC1)
y <- calcNormFactors(y)
v <- voom(y,design,plot=TRUE)

plot of chunk unnamed-chunk-1



fit <- lmFit(v,design)
fit <- eBayes(fit)
options(digits=3)
tt<-topTable(fit,coef=2,sort="p",n=100000)
tt$ID <- rownames(tt)
table(tt$adj.P.Val < .05)
## 
## FALSE  TRUE 
## 20594  1741

# Annotate top table with GC/gene lenght
GC <-read.csv("~/Dropbox/LUDC/HTL/GC_INS/gc.csv",sep=";")
sum(duplicated(GC$HGNC.symbol))
## [1] 221
GC_per_gene<-aggregate(X..GC.content~HGNC.symbol,data=GC,median)
GeneLength <- read.csv("~/Dropbox/LUDC/HTL/GC_INS/ExonsLengthPerGene.txt",sep="\t",header=F)

tt$GC <- GC_per_gene[match(tt$ID,GC_per_gene$HGNC.symbol),2]
tt$GeneLength <- GeneLength[match(tt$ID,GeneLength$V1),2]


par(mfrow=c(1,2))
scatter.smooth(tt$logFC~log10(tt$GeneLength),pch=20,ylab="logFC",
               xlab="Gene length (log 10)",,main="Gene length",col="grey")


scatter.smooth(tt$logFC~tt$GC,pch=20,ylab="logFC",xlab="GC %",main="GC-content (%)",
               col="grey")

plot of chunk unnamed-chunk-1


head(tt,n=100)
##                  logFC AveExpr      t   P.Value adj.P.Val   B
## CD99            -14.64  2.4213 -146.9  0.00e+00  0.00e+00 739
## DHRSX           -10.73 -0.1431 -214.4  0.00e+00  0.00e+00 873
## GPR89C           -9.79 -0.5413 -218.4  0.00e+00  0.00e+00 879
## VAMP7           -12.24  0.6398 -227.5  0.00e+00  0.00e+00 894
## ZBED1           -11.85  0.4496 -220.7  0.00e+00  0.00e+00 883
## CD99P1           -7.96 -1.5260 -142.4 2.17e-322 8.09e-319 727
## ASMTL           -10.70 -0.1828 -138.1 1.32e-317 4.22e-314 717
## PLCXD1           -9.41 -0.8115 -135.2 2.73e-314 7.62e-311 709
## IL3RA            -8.33 -1.3641 -128.0 1.02e-305 2.53e-302 690
## SLC25A6         -14.09  1.5807 -122.5 5.25e-299 1.17e-295 675
## SPDYE8P          -7.49 -1.4183 -121.6 7.56e-298 1.54e-294 672
## CBWD3            -4.66  3.3872 -119.8 1.90e-295 3.53e-292 667
## AKAP17A         -11.56  0.2541 -114.8 6.11e-289 1.05e-285 652
## PDZK1P1          -9.01 -0.7391 -104.4 2.51e-274 4.01e-271 618
## GTF2IRD2P1       -6.71  0.4902  -98.3 5.24e-265 7.80e-262 597
## P2RY8            -7.11 -1.9523  -92.9 2.21e-256 3.08e-253 577
## RGPD3            -5.38  2.6417  -89.1 3.85e-250 5.05e-247 563
## TMED7-TICAM2      6.06 -2.4576   87.4 3.36e-247 4.17e-244 556
## LOC727849        -6.75  0.8892  -85.1 3.55e-243 4.18e-240 547
## CBWD6            -3.76  3.5223  -84.6 2.78e-242 3.10e-239 545
## GTF2IRD2         -2.63  3.9867  -83.7 1.18e-240 1.26e-237 541
## SPRY3            -6.91 -1.4380  -83.0 2.38e-239 2.41e-236 538
## GPR89A           -2.50  3.7037  -81.4 2.26e-236 2.20e-233 531
## TBC1D3H          -7.60 -1.7280  -79.9 1.43e-233 1.34e-230 525
## NBPF11           -5.92  2.6554  -78.6 4.08e-231 3.65e-228 519
## FAM35B           -6.64 -0.7210  -78.4 1.18e-230 1.01e-227 518
## FAM35B2          -5.24  0.4202  -78.2 2.52e-230 2.09e-227 517
## GTF2IRD2B        -2.43  4.2152  -77.3 1.41e-228 1.12e-225 513
## LOC728407        -6.77 -1.0271  -77.1 3.04e-228 2.34e-225 512
## MYCBP             6.03 -2.1498   75.8 9.35e-226 6.96e-223 507
## LOC440354        -3.99  2.7583  -73.9 6.75e-222 4.87e-219 498
## RPL17-C18ORF32    5.57 -2.7134   73.8 1.19e-221 8.31e-219 497
## SPIN2A           -5.80 -1.2175  -70.7 2.21e-215 1.49e-212 483
## NBPF16           -4.61  3.7510  -69.8 2.31e-213 1.52e-210 478
## NSFP1            -6.77  1.9807  -69.7 2.73e-213 1.74e-210 478
## PMS2L2           -6.37 -0.1239  -69.5 1.00e-212 6.23e-210 477
## LOC349196        -6.24 -1.8351  -68.7 4.62e-211 2.79e-208 473
## NOMO2            -2.87  7.6567  -68.4 1.65e-210 9.69e-208 472
## CSF2RA           -6.54 -2.1955  -68.2 5.58e-210 3.19e-207 471
## RPL17             4.95  5.0631   67.3 5.13e-208 2.87e-205 466
## FAM18B2          -2.83  4.3462  -66.4 3.73e-206 2.03e-203 462
## LOC641298        -4.55  2.5491  -66.3 7.98e-206 4.24e-203 461
## BCRP2            -6.05 -1.7643  -65.2 1.63e-203 8.48e-201 456
## RPL21             5.15 -2.2607   64.7 2.06e-202 1.04e-199 453
## NOMO3            -4.87  6.5580  -64.5 9.66e-202 4.79e-199 452
## TBC1D3C          -7.71 -1.3718  -64.2 2.83e-201 1.38e-198 451
## ANKHD1-EIF4EBP3   3.77  0.1227   63.5 1.25e-199 5.94e-197 447
## LOC728855        -3.63  2.5326  -63.3 4.37e-199 2.04e-196 446
## PPP2R3B          -9.01 -1.1043  -63.3 4.52e-199 2.06e-196 446
## GTPBP6           -9.97 -0.6289  -62.7 1.11e-197 4.94e-195 442
## GTF2H2B          -4.76  3.3653  -62.0 3.24e-196 1.42e-193 439
## PRSS2             8.99 -0.9416   61.6 2.80e-195 1.20e-192 437
## CEP170P1         -5.45 -0.5929  -61.5 6.27e-195 2.64e-192 436
## RPL36A            4.77  3.0921   61.2 3.04e-194 1.26e-191 434
## CTAGE6P          -5.99 -0.7441  -61.2 3.16e-194 1.28e-191 434
## GATSL1           -5.59 -0.0118  -60.6 8.55e-193 3.41e-190 431
## FAM45B           -2.42  2.3269  -60.1 1.10e-191 4.31e-189 429
## PGAM4            -6.76  0.7103  -59.9 3.09e-191 1.19e-188 427
## C15orf38-AP3S2    4.64 -3.1851   59.1 3.58e-189 1.35e-186 423
## ASAH2            -4.62 -0.2458  -59.1 3.62e-189 1.35e-186 423
## POM121L8P        -5.33 -1.9357  -58.6 4.56e-188 1.67e-185 420
## RANGRF            2.52  1.8231   57.7 1.12e-185 4.03e-183 415
## FLJ39739         -5.15  0.4226  -57.5 3.96e-185 1.40e-182 413
## RGPD4            -4.77  2.7762  -56.6 4.58e-183 1.60e-180 409
## LOC613037        -6.58  1.3361  -56.4 1.34e-182 4.60e-180 408
## C7orf28B         -2.34  5.0102  -56.3 3.89e-182 1.32e-179 407
## TBC1D3B          -7.25 -1.7997  -56.1 9.50e-182 3.17e-179 406
## GNG10             5.87  1.1885   55.8 4.51e-181 1.48e-178 404
## ZNF322B          -5.43  1.7945  -55.5 4.35e-180 1.41e-177 402
## LOC151009        -3.95  2.5098  -55.4 7.09e-180 2.26e-177 401
## LRRC37A          -4.14  1.7566  -55.2 1.62e-179 5.08e-177 401
## CCZ1             -2.27  5.0598  -55.0 7.93e-179 2.46e-176 399
## DHX40P1          -3.78  1.3194  -54.5 1.28e-177 3.92e-175 396
## LOC728875        -3.86  1.9639  -54.2 5.68e-177 1.71e-174 395
## GTF2I            -2.09  7.7809  -53.5 4.98e-175 1.48e-172 390
## NME1-NME2         4.81 -3.0719   53.3 2.05e-174 6.01e-172 389
## OR2A4            -5.50 -2.6669  -53.3 2.30e-174 6.67e-172 389
## LOC100271836     -3.77  2.9847  -53.1 4.93e-174 1.41e-171 388
## LOC285074        -2.34  2.9096  -52.9 2.24e-173 6.34e-171 386
## LOC653075        -5.03 -0.4749  -52.4 5.03e-172 1.40e-169 383
## CFHR1            -5.20 -2.4724  -52.1 3.43e-171 9.46e-169 381
## LOC100190986     -6.67 -0.5364  -52.0 4.36e-171 1.19e-168 381
## GOLGA6L10        -7.09  0.2877  -51.7 2.79e-170 7.50e-168 379
## FAM157B          -5.02 -2.6283  -51.4 1.72e-169 4.58e-167 377
## UBE2Q2P3         -4.63 -3.0490  -51.0 2.89e-168 7.58e-166 375
## FAM133B           2.82  0.5736   50.9 3.78e-168 9.81e-166 374
## LOC100132352     -2.10  2.2202  -50.9 4.55e-168 1.17e-165 374
## RRN3P2           -3.08  0.8851  -50.1 8.67e-166 2.20e-163 369
## NBPF24           -5.05 -2.6497  -49.5 3.48e-164 8.73e-162 365
## FAM27B           -5.21 -0.7242  -49.5 4.29e-164 1.07e-161 365
## GATSL2           -3.92  1.1644  -49.5 4.44e-164 1.09e-161 365
## FOXO3B           -2.01  3.6972  -49.1 6.44e-163 1.56e-160 362
## POLR2J2          -6.23  0.9036  -48.8 2.65e-162 6.36e-160 361
## SMA5             -6.13 -0.4897  -48.7 6.20e-162 1.47e-159 360
## LIMS3            -5.90 -0.0221  -48.2 1.55e-160 3.64e-158 357
## C9orf30-TMEFF1    4.05 -3.4879   48.2 2.13e-160 4.95e-158 357
## FAM27C           -5.17 -0.9976  -48.0 5.10e-160 1.17e-157 356
## LOC440297        -6.59  0.7126  -47.6 9.79e-159 2.23e-156 353
## PGAM1            -1.46  7.5198  -47.5 2.05e-158 4.63e-156 352
## NOMO1            -1.87  8.0733  -47.2 1.07e-157 2.38e-155 350
##                              ID   GC GeneLength
## CD99                       CD99 47.4       2474
## DHRSX                     DHRSX 47.1       5138
## GPR89C                   GPR89C 39.0       3138
## VAMP7                     VAMP7 38.8       5318
## ZBED1                     ZBED1 49.6       9478
## CD99P1                   CD99P1   NA       6632
## ASMTL                     ASMTL 52.2       4556
## PLCXD1                   PLCXD1 53.8      10750
## IL3RA                     IL3RA 54.5       3420
## SLC25A6                 SLC25A6 58.9       2968
## SPDYE8P                 SPDYE8P   NA       8652
## CBWD3                     CBWD3 36.0       3452
## AKAP17A                 AKAP17A 60.9       6556
## PDZK1P1                 PDZK1P1   NA       6996
## GTF2IRD2P1           GTF2IRD2P1   NA       3010
## P2RY8                     P2RY8 48.2       8468
## RGPD3                     RGPD3 40.2       5873
## TMED7-TICAM2       TMED7-TICAM2   NA       3627
## LOC727849             LOC727849   NA       9890
## CBWD6                     CBWD6 36.0       1719
## GTF2IRD2               GTF2IRD2 46.7       3551
## SPRY3                     SPRY3 40.3      18076
## GPR89A                   GPR89A 38.9       2032
## TBC1D3H                 TBC1D3H 47.6       2305
## NBPF11                   NBPF11 45.5      10934
## FAM35B                   FAM35B   NA       3290
## FAM35B2                 FAM35B2   NA       3294
## GTF2IRD2B             GTF2IRD2B 46.6       3585
## LOC728407             LOC728407   NA       1420
## MYCBP                     MYCBP 43.5       2565
## LOC440354             LOC440354   NA       7745
## RPL17-C18ORF32   RPL17-C18ORF32   NA       2049
## SPIN2A                   SPIN2A 50.3       1319
## NBPF16                   NBPF16 43.3       7662
## NSFP1                     NSFP1   NA       2348
## PMS2L2                   PMS2L2   NA       1522
## LOC349196             LOC349196   NA       8622
## NOMO2                     NOMO2 45.5       4253
## CSF2RA                   CSF2RA 50.9       4596
## RPL17                     RPL17 43.0       1163
## FAM18B2                 FAM18B2   NA       5196
## LOC641298             LOC641298   NA       5010
## BCRP2                     BCRP2   NA       2334
## RPL21                     RPL21 42.0        566
## NOMO3                     NOMO3 45.5       4315
## TBC1D3C                 TBC1D3C 47.5       4220
## ANKHD1-EIF4EBP3 ANKHD1-EIF4EBP3   NA       8349
## LOC728855             LOC728855   NA       2543
## PPP2R3B                 PPP2R3B 58.3       4852
## GTPBP6                   GTPBP6 55.5       2360
## GTF2H2B                 GTF2H2B   NA       4374
## PRSS2                     PRSS2 52.0        408
## CEP170P1               CEP170P1   NA       1070
## RPL36A                   RPL36A 42.0       2911
## CTAGE6P                 CTAGE6P   NA       2662
## GATSL1                   GATSL1 52.4        990
## FAM45B                   FAM45B   NA       4824
## PGAM4                     PGAM4 50.6       1678
## C15orf38-AP3S2   C15orf38-AP3S2 45.4       6256
## ASAH2                     ASAH2 37.1       2418
## POM121L8P             POM121L8P   NA       7712
## RANGRF                   RANGRF 59.6       1166
## FLJ39739               FLJ39739   NA       2130
## RGPD4                     RGPD4 40.6       7172
## LOC613037             LOC613037   NA       3455
## C7orf28B               C7orf28B   NA       1836
## TBC1D3B                 TBC1D3B 59.9       2114
## GNG10                     GNG10 46.6       1255
## ZNF322B                 ZNF322B   NA       2374
## LOC151009             LOC151009   NA       2182
## LRRC37A                 LRRC37A 42.3      10351
## CCZ1                       CCZ1 46.1       1824
## DHX40P1                 DHX40P1   NA       1129
## LOC728875             LOC728875   NA       1540
## GTF2I                     GTF2I 42.8       4529
## NME1-NME2             NME1-NME2 48.9       1295
## OR2A4                     OR2A4 48.8        933
## LOC100271836       LOC100271836   NA       4952
## LOC285074             LOC285074   NA       2201
## LOC653075             LOC653075   NA       3471
## CFHR1                     CFHR1 33.7       1297
## LOC100190986       LOC100190986   NA       2432
## GOLGA6L10             GOLGA6L10   NA       7077
## FAM157B                 FAM157B   NA       1278
## UBE2Q2P3               UBE2Q2P3   NA       2420
## FAM133B                 FAM133B 37.4       2468
## LOC100132352       LOC100132352   NA       1286
## RRN3P2                   RRN3P2   NA       2396
## NBPF24                   NBPF24 45.9       6818
## FAM27B                   FAM27B   NA        703
## GATSL2                   GATSL2 52.6        990
## FOXO3B                   FOXO3B   NA       7259
## POLR2J2                 POLR2J2   NA       1716
## SMA5                       SMA5   NA       1759
## LIMS3                     LIMS3 41.8       6258
## C9orf30-TMEFF1   C9orf30-TMEFF1   NA       2434
## FAM27C                   FAM27C   NA        703
## LOC440297             LOC440297   NA      20420
## PGAM1                     PGAM1 47.5       1720
## NOMO1                     NOMO1 45.6       4247