#Opload packages and install libraries with pacman

Loading data from the two counts tables produced by Leonor

#main study

z1 <- read.delim("quantifications_gencode.shipments.blood.nodups.txt", row.names = 1, stringsAsFactors=FALSE)

#Pilot

z2 <- read.delim("quantifications_gencode.nodups.txt", row.names = 1, stringsAsFactors=FALSE)

Removing non-blood samples from the pilot dataset

names(z2)
z2 <- subset(z2, 
          select=-c(X10A.tissue.RNAlater.nodups.Aligned.bamProcessed.out.bam, 
                    X10B.tissue.RNAlater.nodups.Aligned.bamProcessed.out.bam, 
                    X17A.tissue.RNAlater.nodups.Aligned.bamProcessed.out.bam, 
                    X17B.tissue.RNAlater.nodups.Aligned.bamProcessed.out.bam, 
                    X19A.tissue.RNAlater.nodups.Aligned.bamProcessed.out.bam, 
                    X23A.tissue.RNAlater.nodups.Aligned.bamProcessed.out.bam,
                    X23B.tissue.RNAlater.nodups.Aligned.bamProcessed.out.bam))
names(z2)

colnames(z2) <- sub(".whole.blood", "", colnames(z2))
names(z2)

Remove version number in row.names

class(z1)
## [1] "data.frame"
dim(z1)
## [1] 67049   179
rownames(z1) <- make.names(cleanid(rownames(z1)), unique = TRUE)
class(z2)
## [1] "data.frame"
dim(z2)
## [1] 67049     8
rownames(z2) <- make.names(cleanid(rownames(z2)), unique = TRUE)

Merging count table and clearining gene IDs

z3 <- cbind(z1, z2)

rownames(z3) <- make.names(cleanid(rownames(z3)), unique = TRUE)

head(z3)

Create function to detect NA (by row)

indice_na_rows <- function(x){
  out <- all(is.na(x))
  #out <- x %>% is.na %>% all
  return(out)
}

Apply function to count data to detect NA’s

ind <- apply(X = z3, MARGIN = 1, FUN = indice_na_rows)


ind <- !ind
table(ind)
## ind
##  TRUE 
## 67049

NO NA’s

count.blood <- z3[ind,]

Removing genes with 0 counts across all samples

rmZero.blood <- rowSums(count.blood)>0

count.blood <- count.blood[rmZero.blood,]
ind3 <- apply(X = count.blood, MARGIN = 1, FUN = indice_na_rows)
ind3 <- !ind3

count.blood <- count.blood[ind3,]

67049 - 51839
## [1] 15210

We removed 15210 genes with 0 counts in all rows

Making sure our count data subset without zero counts has no NA’s

head(count.blood)

apply(count.blood, 2, function(x){which(is.na(x))})

Fixing column names

names(count.blood)
colnames(count.blood) <- sub(".nodups.Aligned.bamProcessed.out.bam", "", colnames(count.blood))
names(count.blood)
colnames(count.blood) <- sub("a", "A", colnames(count.blood))
names(count.blood)
colnames(count.blood) <- sub("b", "B", colnames(count.blood))
names(count.blood)
colnames(count.blood) <- sub(".whole.blood", "", colnames(count.blood))

colnames(count.blood) <- sub("X", "", colnames(count.blood))
names(count.blood)

duplicated(colnames(count.blood))

Sort count data

count.blood = count.blood[,mixedsort(colnames(count.blood))]
names(count.blood)
##   [1] "1A"    "1B"    "2A"    "2B"    "3A"    "3B"    "4A"    "4B"    "5A"   
##  [10] "5B"    "6A"    "6B"    "7A"    "7B"    "8A"    "8B"    "9A"    "10A"  
##  [19] "10B"   "11A"   "11B"   "12A"   "12B"   "13A"   "13B"   "14A"   "14B"  
##  [28] "15A"   "15B"   "16A"   "16B"   "17A"   "17B"   "18A"   "18B"   "19A"  
##  [37] "19B"   "20A"   "20B"   "21A"   "21B"   "22A"   "23A"   "23B.1" "23B"  
##  [46] "24A"   "24B"   "25A"   "25B"   "26A"   "26B"   "27A"   "27B"   "28A"  
##  [55] "28B"   "29A"   "29B"   "30A"   "30B"   "31A"   "31B"   "32A"   "32B"  
##  [64] "33A"   "33B"   "34A"   "34B"   "34c"   "35A"   "35B"   "36A"   "36B"  
##  [73] "37A"   "37B"   "38A"   "38B"   "39A"   "39B"   "40A"   "40B"   "41A"  
##  [82] "41B"   "43A"   "43B"   "44A"   "44B"   "45A"   "45B"   "46A"   "46B"  
##  [91] "47A"   "47B"   "48A"   "48B"   "49A"   "49B"   "49c"   "50A"   "50B"  
## [100] "51A"   "51B"   "52A"   "52B"   "53A"   "53B"   "54A"   "54B"   "55A"  
## [109] "55B"   "56A"   "56B"   "57A"   "57B"   "60A"   "60B"   "61A"   "61B"  
## [118] "63A"   "63B"   "64A"   "64B"   "65A"   "65B"   "66A"   "66B"   "67A"  
## [127] "67B"   "68A"   "68B"   "70A"   "70B"   "71A"   "71B"   "73A"   "73B"  
## [136] "74A"   "74B"   "75A"   "75B"   "76A"   "76B"   "77A"   "77B"   "78A"  
## [145] "78B"   "80A"   "80B"   "81A"   "81B"   "82A"   "82B"   "84A"   "84B"  
## [154] "85A"   "85B"   "86A"   "86B"   "87A"   "87B"   "88A"   "88B"   "90A"  
## [163] "90B"   "91A"   "91B"   "92A"   "92B"   "93A"   "93B"   "94A"   "94B"  
## [172] "95A"   "95B"   "96A"   "96B"   "97A"   "97B"   "99A"   "99B"   "100A" 
## [181] "100B"  "101A"  "101B"  "102A"  "102B"  "103A"  "103B"

Loading metadata

dir()

m1 <- read.csv2("Demographics.variables.blood.csv")
m2 <- read.csv2("INFECT.variables.longitudinal_blood_V2.csv")
m3 <- read.csv2("LABKA.variable.HBOmic.v1.csv" )
m4 <- read.csv2("INFECT_shock_and_death.csv")

names(m1)
meta.blood.temp.1 <- merge(x =m1, y= m2, by="Sample_ID")
meta.blood.temp.2 <- merge(x = meta.blood.temp.1, y = m3, by = "Sample_ID")
meta.blood.temp.3 <- merge(x = meta.blood.temp.2, y = m4, by = "Sample_ID")

names(meta.blood.temp.3)
meta.blood <- subset(meta.blood.temp.3, select=-c(Nr..HBO.y, Pre.or.post.HBO ))
names(meta.blood)

Changing rownames of metadata

head(meta.blood)
meta.blood$Sample_ID

rownames(meta.blood) <- meta.blood[,1]

Sort metadata

meta.blood = meta.blood[mixedsort(rownames(meta.blood)),]

Fixing rownames

rownames(meta.blood) <- sub("X", "", rownames(meta.blood))
meta.blood$Sample_ID
##   [1] "1A"    "1B"    "2A"    "2B"    "3A"    "3B"    "4A"    "4B"    "5A"   
##  [10] "5B"    "6A"    "6B"    "7A"    "7B"    "8A"    "8B"    "9A"    "10A"  
##  [19] "10B"   "11A"   "11B"   "12A"   "12B"   "13A"   "13B"   "14A"   "14B"  
##  [28] "15A"   "15B"   "16A"   "16B"   "17A"   "17B"   "18A"   "18B"   "19A"  
##  [37] "19B"   "20A"   "20B"   "21A"   "21B"   "22A"   "23A"   "23B.1" "23B"  
##  [46] "24A"   "24B"   "25A"   "25B"   "26A"   "26B"   "27A"   "27B"   "28A"  
##  [55] "28B"   "29A"   "29B"   "30A"   "30B"   "31A"   "31B"   "32A"   "32B"  
##  [64] "33A"   "33B"   "34A"   "34B"   "34C"   "35A"   "35B"   "36A"   "36B"  
##  [73] "37A"   "37B"   "38A"   "38B"   "39A"   "39B"   "40A"   "40B"   "41A"  
##  [82] "41B"   "43A"   "43B"   "44A"   "44B"   "45A"   "45B"   "46A"   "46B"  
##  [91] "47A"   "47B"   "48A"   "48B"   "49A"   "49B"   "49C"   "50A"   "50B"  
## [100] "51A"   "51B"   "52A"   "52B"   "53A"   "53B"   "54A"   "54B"   "55A"  
## [109] "55B"   "56A"   "56B"   "57A"   "57B"   "60A"   "60B"   "61A"   "61B"  
## [118] "63A"   "63B"   "64A"   "64B"   "65A"   "65B"   "66A"   "66B"   "67A"  
## [127] "67B"   "68A"   "68B"   "70A"   "70B"   "71A"   "71B"   "73A"   "73B"  
## [136] "74A"   "74B"   "75A"   "75B"   "76A"   "76B"   "77A"   "77B"   "78A"  
## [145] "78B"   "80A"   "80B"   "81A"   "81B"   "82A"   "82B"   "84A"   "84B"  
## [154] "85A"   "85B"   "86A"   "86B"   "87A"   "87B"   "88A"   "88B"   "90A"  
## [163] "90B"   "91A"   "91B"   "92A"   "92B"   "93A"   "93B"   "94A"   "94B"  
## [172] "95A"   "95B"   "96A"   "96B"   "97A"   "97B"   "99A"   "99B"   "100A" 
## [181] "100B"  "101A"  "101B"  "102A"  "102B"  "103A"  "103B"

Constructing DGElist object for edgeR pipeline

edgeRUsersGuide()

The variable “Bf.or.Af” categorises the data in four groups;

a = HBO1 and HBO2 at baseline

b = HBO1 and HBO2 at follow-up

ax = non-HBO at baseline

bx = non-HBO at follow-up

y.blood <- DGEList(counts = count.blood, genes=rownames(count.blood), samples = meta.blood)

design <- model.matrix(~0 + patient + Bf.or.Af, meta.blood)
head(design)
##    patient Bf.or.Afa Bf.or.Afax Bf.or.Afb Bf.or.Afbx
## 1A       1         1          0         0          0
## 1B       1         0          0         1          0
## 2A       2         1          0         0          0
## 2B       2         0          0         1          0
## 3A       3         1          0         0          0
## 3B       3         0          0         1          0
tail(design)
##      patient Bf.or.Afa Bf.or.Afax Bf.or.Afb Bf.or.Afbx
## 101A     101         0          1         0          0
## 101B     101         0          0         0          1
## 102A     102         0          1         0          0
## 102B     102         0          0         0          1
## 103A     103         0          1         0          0
## 103B     103         0          0         0          1

Filtering our lowly expressed counts

#{r} #keep <- filterByExpr(y.blood, group = meta.blood$group) #y.blood <- y.blood[keep, ,keep.lib.sizes=FALSE] #table(keep) # ### We removed 31291 genes with low counts.

Normalization

calculate normalization factors

y.blood <- calcNormFactors(y.blood)

The default method for computing these scale factors uses a trimmed mean of M-values

#(TMM) between each pair of samples. The product, the effective library size replaces the original #library size in all downsteam analyses

Estimate common dispersions and tagwise dispersions in one run

y.blood <- estimateDisp(y.blood, design, robust =  T)

visualizing the dispersion estimates

par(mfrow =c(1,1))
plotBCV(y.blood)

Differential expression

Fit the model

fit <- glmQLFit(y.blood, design, robust = TRUE)
plotQLDisp(fit)

# create contrasts to test

colnames(design)
## [1] "patient"    "Bf.or.Afa"  "Bf.or.Afax" "Bf.or.Afb"  "Bf.or.Afbx"
con.B.all.HBO <- makeContrasts(HBO.af_vs_HBO.bf = Bf.or.Afb - Bf.or.Afa, levels = design)
con.B.all.HBO
##             Contrasts
## Levels       HBO.af_vs_HBO.bf
##   patient                   0
##   Bf.or.Afa                -1
##   Bf.or.Afax                0
##   Bf.or.Afb                 1
##   Bf.or.Afbx                0

Test for differentially expressed genes

qlf.B.HBO.all <- glmQLFTest(fit, contrast = con.B.all.HBO, coef=2)

summary(decideTests(qlf.B.HBO.all))
##        -1*Bf.or.Afa 1*Bf.or.Afb
## Down                          6
## NotSig                    51832
## Up                            1

Create DEG table

dge.B.HBO.all <- topTags(qlf.B.HBO.all, n = Inf)$table
head(dge.B.HBO.all)
##                           genes      logFC    logCPM        F       PValue
## ENSG00000117000 ENSG00000117000 -0.4321854  5.522535 50.59806 2.112781e-11
## ENSG00000163631 ENSG00000163631 -3.1483896 -1.120656 35.27876 1.397073e-08
## ENSG00000171560 ENSG00000171560 -2.3275094 -2.116866 30.66241 9.856929e-08
## ENSG00000186352 ENSG00000186352 -0.6713196  0.398807 29.20358 1.891752e-07
## ENSG00000269586 ENSG00000269586  1.1898700 -2.491662 32.30901 2.963559e-07
## ENSG00000266445 ENSG00000266445 -0.6295410  1.525380 24.69570 1.466900e-06
##                          FDR
## ENSG00000117000 1.095244e-06
## ENSG00000163631 3.621143e-04
## ENSG00000171560 1.703245e-03
## ENSG00000186352 2.451663e-03
## ENSG00000269586 3.072559e-03
## ENSG00000266445 1.267377e-02

MA plot

limma::plotMD(qlf.B.HBO.all, main = "Before vs after HBO2 treatment")