Load Packages

library(tximport) library(DESeq2) library(biomaRt) library(vsn) library(dplyr) library(pheatmap) library(gplots) library(RColorBrewer) library(ComplexHeatmap) library(readr) library(circlize) library(EnhancedVolcano) library(ggrepel) library(edgeR)

Set Directory

dir1 <- “~/Desktop/Feature-Counts/PV1SN3/6-12_months/”

Read in Meta Data

file<- paste0(dir1, “PV1SN3-meta-6+9-female.csv”)

samples <- read.csv(file, stringsAsFactors = FALSE, header = TRUE, sep = “,”)

Assign Sample Variables as Factors

samples$Age<-factor(samples$Age, levels = c(“Months9”,“Months6”))

samples$Strain<-factor(samples$Strain, levels = c(“AD”,“WT”))

Set up Data Frame with Variables

sampleTable <- data.frame(sampleName = samples$Name, Age = samples$Age, Strain = samples$Strain)

full_model<- ~ Strain

Load Data into DESeq

data<-read.csv(paste0(dir1, “PV1SN3-counts-6+9-female.csv”), sep = “,”, stringsAsFactors = F, header = T) #read data into DESeq2 data.1<-as.matrix(data[,2:12]) rownames(data.1)<-data[,1] dds <-DESeqDataSetFromMatrix(data.1, sampleTable, full_model)

Drop Low Read Counts

smallestGroupSize <- 5 keep <- rowSums(counts(dds) >=10) >= smallestGroupSize dds <- dds[keep,]

Run DESeq

dds_strain<-DESeq(dds)

Exploratory Analysis

Visualize Gene Expression Variance and Dispersion

hist(counts(dds_strain, normalized=FALSE)[,5], breaks = 500, col=“blue”, xlab=“Raw expression counts”, ylab=“Number of genes”, main = “Count distribution for sample X”)

head(counts(dds_strain, normalized=FALSE))

mean_counts <- apply(counts(dds_strain, normalized=FALSE) [,1:3], 1, mean) variance_counts <- apply(counts(dds_strain, normalized=FALSE) [,1:3], 1, var)

plot(log10(mean_counts), log10(variance_counts), ylim=c(0,9), xlim=c(0,9), ylab = “log10 (mean counts)”, xlab = “log10 (variance)”, main = “Mean-variance trend”, las = 1)

Add line for x=y

abline(0,1,lwd=2,col=“red”)

Set the Plotting Window to 3 rows and 1 column

par(mfrow=c(3,1))

dispersion = 0.001

hist(rnbinom(n = 10000, mu = 100, size = 1/0.001), xlim = c(0, 300), xlab = ““, breaks = 500, main =” Dispersion 0.001”)

dispersion = 0.01

hist(rnbinom(n = 10000, mu = 100, size = 1/0.01), xlim = c(0, 300), xlab = ““, breaks = 500, main =” Dispersion 0.01”)

dispersion = 0.1

hist(rnbinom(n = 10000, mu = 100, size = 1/0.1), xlim = c(0, 300), xlab = ““, breaks = 500, main =” Dispersion 0.1”)

Set the Plotting Window Back to 1 row and 1 column

par(mfrow=c(1,1))

plotDispEsts(dds_strain)

DESeq Results

resultsNames(dds_strain) res <- results(dds_strain, contrast=c(“Strain”,“AD”,“WT”))

dim(head) head(res)

Order by adjusted p-value

res_ord <- res[order(res$padj),]

Check for how many DEGs with significance in either direction of fold

change

sum(res$padj < 0.05, na.rm=TRUE) sum(res$padj < 0.05 & res$log2FoldChange > 2, na.rm=TRUE) sum(res$padj < 0.05 & res$log2FoldChange < -2, na.rm=TRUE)

Write csv file for Complete Results

write.csv(as.data.frame(res_ord), file = paste0(dir1,“6F_9F_ADvWT_DE_results_PV1SN3_081823_3.csv”), quote = FALSE, row.names = T, col.names = T, sep = “,”)

res_file<- paste0(dir1, “6F_9F_ADvWT_DE_results_PV1SN3_081823_3.csv”) all_data <- read.csv(res_file, stringsAsFactors = FALSE, header = TRUE, sep = “,”)

Generate Volcano Plot

ppi=300

png(paste0(dir1, “6_9_F_ADvWT_volcanoPlot_DEG_PV1SN3_3”, “.png”), width=7.5ppi, height=6ppi, res=ppi) EnhancedVolcano(all_data, lab = NA, x=“log2FoldChange”,y=“padj”, #selectLab = labels, xlim=c(-4,4),ylim=c(0.5,5), axisLabSize=10, title= “AD vs. WT female 6-9 month old mice”, titleLabSize=10, subtitle = NULL, captionLabSize = 6, pCutoff=0.05, FCcutoff = 2, labSize = 2.5, legendPosition = “right”, legendLabSize = 6, #shape=c(6,6,19,16), caption = “FC cutoff, 2; p-value cutoff, 0.05”)

dev.off()

Generate Heatmap

Subset adjusted p-value significance level

res_order_FDR_05 <- subset(res_ord, padj <0.05) nrow(res_order_FDR_05)

Take the regularized log

rld <- rlog(dds_strain, blind = FALSE)

mat2 <- assay(rld)[rownames(res_order_FDR_05),] head(mat2)

Scale matrix by col. values

mat_scaled = t(apply(mat2, 1, scale)) print(mat_scaled)

Set up colors for heatmap

col = colorRamp2(c(-3, 0, 3), c(“blue”, “white”, “red”)) cols1 <- brewer.pal(11, “Paired”)

Subset coldata for samples in specific groups

colData_sub <- colData(dds_strain)

Set up annotation bar for samples

ha1 = HeatmapAnnotation(Strain = colData_sub$Strain, col = list(Strain = c(AD = “#926eae”, WT = “#AF8969”), show_legend = TRUE))

Set up column annotation labels (samples)

ha = columnAnnotation(x = anno_text(colData_sub$sampleName, which=“column”, rot = 45, gp = gpar(fontsize = 10)))

Generate Heatmap Object

ht1 = Heatmap(mat_scaled, name = “Expression”, col = col, top_annotation = c(ha1), bottom_annotation = c(ha), show_row_names = FALSE, show_column_names = TRUE, show_row_dend = FALSE)

Plot the Heatmap

png(paste0(dir1, “6_9F-ADvWT-heatmap_DEG_PV1SN3_3”, “.png”), width=7.5ppi, height=6ppi, res=ppi) draw(ht1, row_title = “Genes”, column_title = “Hierarchical clustering of DEGs (padj<0.05)”)

dev.off()