#/////////////////////////////////////////////////////////////////////////////
# LIBRARIES AND CUSTOM FUNCTIONS
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
library(edgeR)
## Loading required package: limma
library(car)
library(gplots)
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
library(reshape)
library(stringr)
#library(affycoretools)
library(limma)
library(ggplot2)
require(biomaRt)
## Loading required package: biomaRt
library(stringr)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:biomaRt':
##
## select
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Run once #################################################################################################
library(biomaRt)
ensMart<-useMart("ensembl")
ensembl_hs_mart <- useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl")
#head(listAttributes(ensembl_hs_mart),n=157)
#grep("length",listAttributes(ensembl_hs_mart)$description,ignore.case = T,value=T)
ensembl_df <- getBM(attributes=c("ensembl_gene_id", "refseq_mrna","description",
"hgnc_symbol","chromosome_name"),mart=ensembl_hs_mart)
head(ensembl_df)
## ensembl_gene_id refseq_mrna
## 1 ENSG00000261657
## 2 ENSG00000223116
## 3 ENSG00000233440
## 4 ENSG00000207157
## 5 ENSG00000229483
## 6 ENSG00000252952 NR_046932
## description
## 1 solute carrier family 25 (S-adenosylmethionine carrier), member 26 [Source:HGNC Symbol;Acc:20661]
## 2
## 3 high mobility group AT-hook 1 pseudogene 6 [Source:HGNC Symbol;Acc:19121]
## 4 RNA, Ro-associated Y3 pseudogene 4 [Source:HGNC Symbol;Acc:42488]
## 5 long intergenic non-protein coding RNA 362 [Source:HGNC Symbol;Acc:42682]
## 6 RNA, U6 small nuclear 58, pseudogene [Source:HGNC Symbol;Acc:42548]
## hgnc_symbol chromosome_name
## 1 SLC25A26 HG991_PATCH
## 2 13
## 3 HMGA1P6 13
## 4 RNY3P4 13
## 5 LINC00362 13
## 6 RNU6-58P 13
# /Run once #################################################################################################
# Set wordking folder for results files
setwd("~/Projects/2014-08-19_GlucoseIslets_2014//Analysis")
#/////////////////////////////////////////////////////////////////////////////
# Read in data ------------------------------------------------------------
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
## Read in the data making the row names the first column
counttable_all <- read.table("~/Projects/2014-08-19_GlucoseIslets_2014/RawData/RNAseq_human_islets.featureCounts.nolastline.tsv", header=T, row.names=1)
#head(counttable_all)
colnames(counttable_all)<-sapply(strsplit(colnames(counttable_all), "\\."), `[`, 4)
# fix stupid naming
colnames(counttable_all)[55] <- "HTL0006"
#/////////////////////////////////////////////////////////////////////////////
# Bring in metadata
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
meta <- read.csv("~/Projects/2013-10_HTL_RNASeq/Metadata/20131020_phenotypes_from_intranet.csv",row.names=1,sep="\t")
meta$Glycemic<-factor(recode(meta$Hba1c,"0:6='Normo';6:6.5='IGT';6.5:1000='T2D'"))
meta$Glycemic <- relevel(meta$Glycemic,ref="Normo")
meta$Donor <- rownames(meta)
#/////////////////////////////////////////////////////////////////////////////
# Add information for high glucose
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
highcl <- colnames(counttable_all)[grep("High",colnames(counttable_all))]
HighSamples <- gsub("HighGlucose","",highcl)
meta$Treatment <- "Low"
meta$InGlucosePair <- 0
meta[HighSamples,]$InGlucosePair <- 1
MetaHigh <- meta[HighSamples,]
rownames(MetaHigh) <- highcl
MetaHigh$Treatment <- "High"
MetaComplete <- rbind(meta,MetaHigh)
#/////////////////////////////////////////////////////////////////////////////
# Final clean up
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
MetaSeqSamples <- MetaComplete[colnames(counttable_all),]
MetaSeqSamples$Batch <- ifelse(as.integer(substr(MetaSeqSamples$Donor,5,8)) > 110,"B2","B1")
#/////////////////////////////////////////////////////////////////////////////
# Remove samples with < 10M counts
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
# Samples to remove
remove_to_few_reads<-colnames(counttable_all[,colSums(counttable_all) / 1000000 < 3])
kept_samples<-colnames(counttable_all[,colSums(counttable_all) / 1000000 > 3])
CounttableQC <- counttable_all[,kept_samples]
MetaSeqSamplesQC <- MetaSeqSamples[colnames(CounttableQC),]
rownames(CounttableQC) <- substr(rownames(CounttableQC),1,15)
#/////////////////////////////////////////////////////////////////////////////
# All low samples
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
par(mfrow=c(1,1))
design<-model.matrix(~Batch+Treatment,MetaSeqSamplesQC)
y <- DGEList(counts=CounttableQC, genes=rownames(CounttableQC))
#isexpr <- rowSums(cpm(y)>1) >= 10
#y <- y[isexpr,]
v <- voom(y,design,plot=T)

plotMDS(v,col=rainbow(2)[as.factor(MetaSeqSamplesQC$Treatment)],top=50,gene.selection="common")

plotMDS(v,col=topo.colors(2)[as.factor(MetaSeqSamplesQC$Gender)],top=50,gene.selection="common")
