#Opload packages and install libraries with pacman
#main study
z1 <- read.delim("quantifications_gencode.shipments.blood.nodups.txt", row.names = 1, stringsAsFactors=FALSE)
#Pilot
z2 <- read.delim("quantifications_gencode.nodups.txt", row.names = 1, stringsAsFactors=FALSE)
names(z2)
z2 <- subset(z2,
select=-c(X10A.tissue.RNAlater.nodups.Aligned.bamProcessed.out.bam,
X10B.tissue.RNAlater.nodups.Aligned.bamProcessed.out.bam,
X17A.tissue.RNAlater.nodups.Aligned.bamProcessed.out.bam,
X17B.tissue.RNAlater.nodups.Aligned.bamProcessed.out.bam,
X19A.tissue.RNAlater.nodups.Aligned.bamProcessed.out.bam,
X23A.tissue.RNAlater.nodups.Aligned.bamProcessed.out.bam,
X23B.tissue.RNAlater.nodups.Aligned.bamProcessed.out.bam))
names(z2)
colnames(z2) <- sub(".whole.blood", "", colnames(z2))
names(z2)
class(z1)
## [1] "data.frame"
dim(z1)
## [1] 67049 179
rownames(z1) <- make.names(cleanid(rownames(z1)), unique = TRUE)
class(z2)
## [1] "data.frame"
dim(z2)
## [1] 67049 8
rownames(z2) <- make.names(cleanid(rownames(z2)), unique = TRUE)
z3 <- cbind(z1, z2)
rownames(z3) <- make.names(cleanid(rownames(z3)), unique = TRUE)
head(z3)
indice_na_rows <- function(x){
out <- all(is.na(x))
#out <- x %>% is.na %>% all
return(out)
}
ind <- apply(X = z3, MARGIN = 1, FUN = indice_na_rows)
ind <- !ind
table(ind)
## ind
## TRUE
## 67049
count.blood <- z3[ind,]
rmZero.blood <- rowSums(count.blood)>0
count.blood <- count.blood[rmZero.blood,]
ind3 <- apply(X = count.blood, MARGIN = 1, FUN = indice_na_rows)
ind3 <- !ind3
count.blood <- count.blood[ind3,]
67049 - 51839
## [1] 15210
head(count.blood)
apply(count.blood, 2, function(x){which(is.na(x))})
names(count.blood)
colnames(count.blood) <- sub(".nodups.Aligned.bamProcessed.out.bam", "", colnames(count.blood))
names(count.blood)
colnames(count.blood) <- sub("a", "A", colnames(count.blood))
names(count.blood)
colnames(count.blood) <- sub("b", "B", colnames(count.blood))
names(count.blood)
colnames(count.blood) <- sub(".whole.blood", "", colnames(count.blood))
colnames(count.blood) <- sub("X", "", colnames(count.blood))
names(count.blood)
duplicated(colnames(count.blood))
count.blood = count.blood[,mixedsort(colnames(count.blood))]
names(count.blood)
## [1] "1A" "1B" "2A" "2B" "3A" "3B" "4A" "4B" "5A"
## [10] "5B" "6A" "6B" "7A" "7B" "8A" "8B" "9A" "10A"
## [19] "10B" "11A" "11B" "12A" "12B" "13A" "13B" "14A" "14B"
## [28] "15A" "15B" "16A" "16B" "17A" "17B" "18A" "18B" "19A"
## [37] "19B" "20A" "20B" "21A" "21B" "22A" "23A" "23B.1" "23B"
## [46] "24A" "24B" "25A" "25B" "26A" "26B" "27A" "27B" "28A"
## [55] "28B" "29A" "29B" "30A" "30B" "31A" "31B" "32A" "32B"
## [64] "33A" "33B" "34A" "34B" "34c" "35A" "35B" "36A" "36B"
## [73] "37A" "37B" "38A" "38B" "39A" "39B" "40A" "40B" "41A"
## [82] "41B" "43A" "43B" "44A" "44B" "45A" "45B" "46A" "46B"
## [91] "47A" "47B" "48A" "48B" "49A" "49B" "49c" "50A" "50B"
## [100] "51A" "51B" "52A" "52B" "53A" "53B" "54A" "54B" "55A"
## [109] "55B" "56A" "56B" "57A" "57B" "60A" "60B" "61A" "61B"
## [118] "63A" "63B" "64A" "64B" "65A" "65B" "66A" "66B" "67A"
## [127] "67B" "68A" "68B" "70A" "70B" "71A" "71B" "73A" "73B"
## [136] "74A" "74B" "75A" "75B" "76A" "76B" "77A" "77B" "78A"
## [145] "78B" "80A" "80B" "81A" "81B" "82A" "82B" "84A" "84B"
## [154] "85A" "85B" "86A" "86B" "87A" "87B" "88A" "88B" "90A"
## [163] "90B" "91A" "91B" "92A" "92B" "93A" "93B" "94A" "94B"
## [172] "95A" "95B" "96A" "96B" "97A" "97B" "99A" "99B" "100A"
## [181] "100B" "101A" "101B" "102A" "102B" "103A" "103B"
dir()
m1 <- read.csv2("Demographics.variables.blood.csv")
m2 <- read.csv2("INFECT.variables.longitudinal_blood_V2.csv")
m3 <- read.csv2("LABKA.variable.HBOmic.v1.csv" )
m4 <- read.csv2("INFECT_shock_and_death.csv")
names(m1)
meta.blood.temp.1 <- merge(x =m1, y= m2, by="Sample_ID")
meta.blood.temp.2 <- merge(x = meta.blood.temp.1, y = m3, by = "Sample_ID")
meta.blood.temp.3 <- merge(x = meta.blood.temp.2, y = m4, by = "Sample_ID")
names(meta.blood.temp.3)
meta.blood <- subset(meta.blood.temp.3, select=-c(Nr..HBO.y, Pre.or.post.HBO ))
names(meta.blood)
head(meta.blood)
meta.blood$Sample_ID
rownames(meta.blood) <- meta.blood[,1]
meta.blood = meta.blood[mixedsort(rownames(meta.blood)),]
rownames(meta.blood) <- sub("X", "", rownames(meta.blood))
meta.blood$Sample_ID
## [1] "1A" "1B" "2A" "2B" "3A" "3B" "4A" "4B" "5A"
## [10] "5B" "6A" "6B" "7A" "7B" "8A" "8B" "9A" "10A"
## [19] "10B" "11A" "11B" "12A" "12B" "13A" "13B" "14A" "14B"
## [28] "15A" "15B" "16A" "16B" "17A" "17B" "18A" "18B" "19A"
## [37] "19B" "20A" "20B" "21A" "21B" "22A" "23A" "23B.1" "23B"
## [46] "24A" "24B" "25A" "25B" "26A" "26B" "27A" "27B" "28A"
## [55] "28B" "29A" "29B" "30A" "30B" "31A" "31B" "32A" "32B"
## [64] "33A" "33B" "34A" "34B" "34C" "35A" "35B" "36A" "36B"
## [73] "37A" "37B" "38A" "38B" "39A" "39B" "40A" "40B" "41A"
## [82] "41B" "43A" "43B" "44A" "44B" "45A" "45B" "46A" "46B"
## [91] "47A" "47B" "48A" "48B" "49A" "49B" "49C" "50A" "50B"
## [100] "51A" "51B" "52A" "52B" "53A" "53B" "54A" "54B" "55A"
## [109] "55B" "56A" "56B" "57A" "57B" "60A" "60B" "61A" "61B"
## [118] "63A" "63B" "64A" "64B" "65A" "65B" "66A" "66B" "67A"
## [127] "67B" "68A" "68B" "70A" "70B" "71A" "71B" "73A" "73B"
## [136] "74A" "74B" "75A" "75B" "76A" "76B" "77A" "77B" "78A"
## [145] "78B" "80A" "80B" "81A" "81B" "82A" "82B" "84A" "84B"
## [154] "85A" "85B" "86A" "86B" "87A" "87B" "88A" "88B" "90A"
## [163] "90B" "91A" "91B" "92A" "92B" "93A" "93B" "94A" "94B"
## [172] "95A" "95B" "96A" "96B" "97A" "97B" "99A" "99B" "100A"
## [181] "100B" "101A" "101B" "102A" "102B" "103A" "103B"
y.blood <- DGEList(counts = count.blood, genes=rownames(count.blood), samples = meta.blood)
design <- model.matrix(~0 + patient + Bf.or.Af, meta.blood)
head(design)
## patient Bf.or.Afa Bf.or.Afax Bf.or.Afb Bf.or.Afbx
## 1A 1 1 0 0 0
## 1B 1 0 0 1 0
## 2A 2 1 0 0 0
## 2B 2 0 0 1 0
## 3A 3 1 0 0 0
## 3B 3 0 0 1 0
tail(design)
## patient Bf.or.Afa Bf.or.Afax Bf.or.Afb Bf.or.Afbx
## 101A 101 0 1 0 0
## 101B 101 0 0 0 1
## 102A 102 0 1 0 0
## 102B 102 0 0 0 1
## 103A 103 0 1 0 0
## 103B 103 0 0 0 1
#{r} #keep <- filterByExpr(y.blood, group = meta.blood$group) #y.blood <- y.blood[keep, ,keep.lib.sizes=FALSE] #table(keep) #
### We removed 31291 genes with low counts.
y.blood <- calcNormFactors(y.blood)
#(TMM) between each pair of samples. The product, the effective library size replaces the original #library size in all downsteam analyses
y.blood <- estimateDisp(y.blood, design, robust = T)
par(mfrow =c(1,1))
plotBCV(y.blood)
Differential expression
fit <- glmQLFit(y.blood, design, robust = TRUE)
plotQLDisp(fit)
# create contrasts to test
colnames(design)
## [1] "patient" "Bf.or.Afa" "Bf.or.Afax" "Bf.or.Afb" "Bf.or.Afbx"
con.B.all.HBO <- makeContrasts(HBO.af_vs_HBO.bf = Bf.or.Afb - Bf.or.Afa, levels = design)
con.B.all.HBO
## Contrasts
## Levels HBO.af_vs_HBO.bf
## patient 0
## Bf.or.Afa -1
## Bf.or.Afax 0
## Bf.or.Afb 1
## Bf.or.Afbx 0
qlf.B.HBO.all <- glmQLFTest(fit, contrast = con.B.all.HBO, coef=2)
summary(decideTests(qlf.B.HBO.all))
## -1*Bf.or.Afa 1*Bf.or.Afb
## Down 6
## NotSig 51832
## Up 1
dge.B.HBO.all <- topTags(qlf.B.HBO.all, n = Inf)$table
head(dge.B.HBO.all)
## genes logFC logCPM F PValue
## ENSG00000117000 ENSG00000117000 -0.4321854 5.522535 50.59806 2.112781e-11
## ENSG00000163631 ENSG00000163631 -3.1483896 -1.120656 35.27876 1.397073e-08
## ENSG00000171560 ENSG00000171560 -2.3275094 -2.116866 30.66241 9.856929e-08
## ENSG00000186352 ENSG00000186352 -0.6713196 0.398807 29.20358 1.891752e-07
## ENSG00000269586 ENSG00000269586 1.1898700 -2.491662 32.30901 2.963559e-07
## ENSG00000266445 ENSG00000266445 -0.6295410 1.525380 24.69570 1.466900e-06
## FDR
## ENSG00000117000 1.095244e-06
## ENSG00000163631 3.621143e-04
## ENSG00000171560 1.703245e-03
## ENSG00000186352 2.451663e-03
## ENSG00000269586 3.072559e-03
## ENSG00000266445 1.267377e-02
limma::plotMD(qlf.B.HBO.all, main = "Before vs after HBO2 treatment")