if (!require("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
## Warning: package 'BiocManager' was built under R version 4.2.1
BiocManager::install("edgeR")
## Bioconductor version 3.15 (BiocManager 1.30.18), R 4.2.0 (2022-04-22 ucrt)
## Warning: package(s) not installed when version(s) same as current; use `force = TRUE` to
##   re-install: 'edgeR'
## Installation paths not writeable, unable to update packages
##   path: C:/Program Files/R/R-4.2.0/library
##   packages:
##     MASS, nlme
## Old packages: 'htmltools', 'quantreg', 'rlang'
BiocManager::install("GO.db")
## Bioconductor version 3.15 (BiocManager 1.30.18), R 4.2.0 (2022-04-22 ucrt)
## Warning: package(s) not installed when version(s) same as current; use `force = TRUE` to
##   re-install: 'GO.db'
## Installation paths not writeable, unable to update packages
##   path: C:/Program Files/R/R-4.2.0/library
##   packages:
##     MASS, nlme
## Old packages: 'htmltools', 'quantreg', 'rlang'
BiocManager::install("org.Hs.eg.db")
## Bioconductor version 3.15 (BiocManager 1.30.18), R 4.2.0 (2022-04-22 ucrt)
## Warning: package(s) not installed when version(s) same as current; use `force = TRUE` to
##   re-install: 'org.Hs.eg.db'
## Installation paths not writeable, unable to update packages
##   path: C:/Program Files/R/R-4.2.0/library
##   packages:
##     MASS, nlme
## Old packages: 'htmltools', 'quantreg', 'rlang'
library(edgeR)
## Loading required package: limma
library(ggplot2)
library(pheatmap)
library(GO.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:limma':
## 
##     plotMA
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
## 
##     windows
## 
library(org.Hs.eg.db)
## 

Load data

x <- read.csv("D:/ChIP seq and RNA seq analysis/E15 to E17 Foxg1 RNA seq/Foxg1_countsMatrix.csv")

Parse out the counts and gene names

x1 <- x[,-1]
rownames(x1) <- x[,1]
head(x1)
##               Ctrl_Ctx1 Ctrl_Ctx2 Ctrl_Ctx3 Mut_Ctx1 Mut_Ctx2 Mut_Ctx3 Mut_Ctx4
## Zc3h14             2631      2258      2946     2521     3300     1996     2643
## Troap               502       537       522      313      555      269      446
## Clca2                 0         0         1        0        1        0        0
## 1810013L24Rik      1732      1525      1969     2007     2481     2037     1943
## Srsf7              7693      6986      8574     9098    11508     6650     9552
## Fignl2               78        66        71       55       49       35       33
##               Ctrl_HC1 Ctrl_HC2 Ctrl_HC3 Mut_HC1 Mut_HC2 Mut_HC3 Mut_HC4
## Zc3h14            4200     3261     2990    3198    4076    2371    2747
## Troap              799      499      540     469     639     294     438
## Clca2                0        5        2       0       0       0       0
## 1810013L24Rik     2979     2313     1933    2292    2935    1585    2160
## Srsf7            10672     8991     9577   11042   12883    7658    9929
## Fignl2              74       58       25      25      71      15      29

Mention the different kinds of variables in the data in their specific order on the data frame

group <- factor(c(rep("Ctrl_Ctx",3),rep("Mut_Ctx",4), rep("Ctrl_HC", 3), rep("Mut_HC", 4)))

Load the info of counts and variable types as an EdgeR object “DGEList”

y <- DGEList(counts=x1, group=group)

Normalize the counts

y <- calcNormFactors(y)

Mention the variable within which comparison should be done

design <- model.matrix(~group)

Estimating dispersions for quantile-adjusted conditional maximum likelihood

(This analysis is only applicable on datasets with a single-factor design)

y <- estimateDisp(y,design)
y$samples
##              group lib.size norm.factors
## Ctrl_Ctx1 Ctrl_Ctx 25409337    1.0012619
## Ctrl_Ctx2 Ctrl_Ctx 22512259    1.0086813
## Ctrl_Ctx3 Ctrl_Ctx 28159040    0.9874403
## Mut_Ctx1   Mut_Ctx 25341282    0.9888326
## Mut_Ctx2   Mut_Ctx 33507884    1.0093675
## Mut_Ctx3   Mut_Ctx 22649157    0.9646663
## Mut_Ctx4   Mut_Ctx 26313877    0.9750238
## Ctrl_HC1   Ctrl_HC 43825848    0.9671781
## Ctrl_HC2   Ctrl_HC 34783186    1.0089196
## Ctrl_HC3   Ctrl_HC 28933516    1.0368129
## Mut_HC1     Mut_HC 33292670    1.0154158
## Mut_HC2     Mut_HC 40736924    1.0052011
## Mut_HC3     Mut_HC 23466510    1.0015300
## Mut_HC4     Mut_HC 31090524    1.0327587

Plot the biological dispersion estimates

plotBCV(y)

To set a level as a baseline value

y$samples$group <- relevel(y$samples$group, ref = "Ctrl_HC")

##Testing for Differentially Expressed genes

(To be used only for single-factor design datasets)

et <- exactTest(y)
out <- topTags(et, p.value=0.05, n=20000)
head(out, 10)
## Comparison of groups:  Ctrl_Ctx-Ctrl_HC 
##             logFC   logCPM       PValue          FDR
## Nhlh2   -4.079698 6.118688 5.036721e-55 1.270714e-50
## Nr4a3   -3.639890 6.204284 1.263256e-50 1.593534e-46
## Rmst    -3.801514 4.598996 4.868075e-38 4.093889e-34
## Lhx9    -6.147132 3.929907 2.893449e-37 1.824971e-33
## Col2a1  -2.384743 7.299961 2.136243e-36 1.077906e-32
## Slc16a2 -1.516636 6.953510 1.415101e-33 5.950263e-30
## Reln    -2.203911 8.653360 1.653303e-32 5.958740e-29
## Nhlh1   -1.625738 7.294928 1.529751e-31 4.824261e-28
## Klhl35  -1.245108 5.899472 4.462839e-31 1.251033e-27
## Pdlim3  -5.030091 4.917858 7.544504e-30 1.903403e-26

Generalized Linear Models for non-normally distributed data - used for >1 design factor comaparison

Using the quasi-likelihood F-test to get DE genes

1) glmQLFit() - Fits a negative binomial GLM and creates a DGELM object

2) glmQLFTest() - Does the QL F-test

Coefficient 2 means comparison of set 2 with set 1(selected as baseline)

fit <- glmQLFit(y,design)
#tr <- glmTreat(fit, coef=2, lfc=1)
qlf <- glmQLFTest(fit,coef=2)

## To compare 3 vs 1
#qlf.3vs1 <- glmQLFTest(fit, coef=3)

## To compare 3 vs 2
#qlf.3vs2 <- glmQLFTest(fit, contrast=c(0,-1,1,0))

## To compare all 4 elements
#qlf <- glmQLFTest(fit, coef=2:4)

To find top DE Genes between samples (n = Number of top DE genes you want to see)

sup <- topTags(qlf, n=30000)
write.csv(as.data.frame(sup), file="abc_qlf.csv")

If sample has more than single-factor comparisons

Model matrix without setting any sample type as baseline

Create contrasts for pair-wise comparisons

design <- model.matrix(~0+group, data = y$samples)
colnames(design) <- levels(y$samples$group)
my.contrasts <- makeContrasts(CtxMutvsWT=Ctrl_Ctx-Mut_Ctx,
                              HCMutvsWT=Ctrl_HC-Mut_HC,
                              CtxvsHC=Ctrl_Ctx-Ctrl_HC, levels = design)

Perform Quasi-likelihood model test on the samples according to the mentioned pairs

qlf.CtxMutvsWT <- glmQLFTest(fit, contrast = my.contrasts[,'CtxMutvsWT'], )
qlf.HCMutvsWT <- glmQLFTest(fit, contrast = my.contrasts[,'HCMutvsWT'])
qlf.CtxvsHC <- glmQLFTest(fit, contrast = my.contrasts[,'CtxvsHC'])

## Print top DE Genes
sup.Ctx <- topTags(qlf.CtxMutvsWT, n=30000)
sup.HC <- topTags(qlf.HCMutvsWT, n=30000)
sup.CtxvsHC <- topTags(qlf.CtxvsHC, n=30000)

# Write as a .csv file
write.csv(as.data.frame(sup.Ctx), file="Ctx_Ctrl_vs_Mut_qlf.csv")

Alternative

Likelihood Ratio Test

fit <- glmFit(y,design)
colnames(fit)
## [1] "Ctrl_HC"  "Ctrl_Ctx" "Mut_Ctx"  "Mut_HC"
lrt <- glmLRT(fit,coef=2)
sup1 <- topTags(lrt, n=30000)
write.csv(as.data.frame(sup1), file="abc_lrt.csv")

To get Normalised counts for a heatmap

logcpm <- cpm(y, log=TRUE)
# write.csv(logcpm, file = "logCPM_HC_vs_CTX_EdgeR.csv" )