Differential Expression Analysis

library(DT)
## Warning: package 'DT' was built under R version 4.0.5
library(readr)
temp_df <- read_csv("C:/Users/billc/Downloads/20210513 AAM-BLG-1 SA25 Service Report.xlsx - Data Normalization.csv", col_names = TRUE)
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_number(),
##   Normalization = col_character()
## )
## i Use `spec()` for the full column specifications.
row_labels <- temp_df[, 1]
map_df = row_labels
map_df['Gene'] <- length(c("NA", "NA", unlist(gene_symbel)))

temp_df <- temp_df[, -1]
head(temp_df)
## # A tibble: 6 x 25
##      SA1    SA2    SA3    SA4    SA5    SA6    SA7    SA8    SA9   SA10   SA11
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 120414 120414 120414 120414 120414 120414 120414 120414 120414 120414 120414
## 2      0      0      0      0      0      0      0      0      0      0      0
## 3      0      0      0      0      0      0      0      0      0    728   2003
## 4    517      0      0      0     57  93426   7081      0   1908   9512   4679
## 5      0      0      0      0      0      0      0      0      0      0      0
## 6      0      0      0      0      0      0      0    130   4388      1      0
## # ... with 14 more variables: SA12 <dbl>, SA13 <dbl>, SA14 <dbl>, SA15 <dbl>,
## #   SA16 <dbl>, SA17 <dbl>, SA18 <dbl>, SA19 <dbl>, SA20 <dbl>, SA21 <dbl>,
## #   SA22 <dbl>, SA23 <dbl>, SA24 <dbl>, SA25 <dbl>
# row_labels
temp_meta <- read_csv("C:/Users/billc/Downloads/study_MS038 meta.csv")
## Warning: Missing column names filled in: 'X1' [1]
## 
## -- Column specification --------------------------------------------------------
## cols(
##   X1 = col_character(),
##   condition = col_character()
## )
# unique(temp_meta$condition)
temp_meta
## # A tibble: 25 x 2
##    X1    condition 
##    <chr> <chr>     
##  1 SA1   Aged      
##  2 SA2   Aged      
##  3 SA3   Aged      
##  4 SA4   Aged      
##  5 SA5   Aged      
##  6 SA11  Aged+Combo
##  7 SA12  Aged+Combo
##  8 SA13  Aged+Combo
##  9 SA14  Aged+Combo
## 10 SA15  Aged+Combo
## # ... with 15 more rows
temp_data <- as.matrix(temp_df)
row.names(temp_data) <- row_labels$Normalization
datatable(head(temp_data))

Remove all 0’s rows.

temp_data <- temp_data[rowSums(temp_data)>1, ]
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.0.5
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
## 
##     windows
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.0.5
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## Warning: package 'matrixStats' was built under R version 4.0.5
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
dds = DESeqDataSetFromMatrix(countData=temp_data,
                             colData=temp_meta,
                             design=~condition)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
dds = DESeq(dds)
## estimating size factors
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## fitting model and testing
# dds

Age vs Aged+Combo

res = results(dds, contrast=c("condition", "Aged", "Aged+Combo"))
# res = res[order(res$pvalue),]

# res = res[order(res$padj),]
res <- data.frame(res)
datatable(res)

Age vs Young

rec_1 = results(dds, contrast=c("condition", "Aged", "Young"))
# rec_1 = rec_1[order(rec_1$pvalue),]
summary(rec_1)
## 
## out of 308 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 5, 1.6%
## LFC < 0 (down)     : 46, 15%
## outliers [1]       : 7, 2.3%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# rec_1 = rec_1[order(rec_1$padj),]
rec_1 <- data.frame(rec_1)

datatable(rec_1)

Aged+Combo vs Young

rec_2 = results(dds, contrast=c("condition", "Aged+Combo", "Young"))
# rec_2 = rec_2[order(rec_2$pvalue),]
summary(rec_2)
## 
## out of 308 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 0, 0%
## LFC < 0 (down)     : 2, 0.65%
## outliers [1]       : 7, 2.3%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# rec_2 = rec_2[order(rec_2$padj),]
rec_2 <- data.frame(rec_2)

datatable(rec_2)
result_df <- merge(res, rec_1, by='row.names', all=TRUE)
row.names(result_df) <- result_df[, 1]

result_df <- result_df[, -1]
result_df <- merge(result_df, rec_2, by='row.names', all=TRUE)

# write.csv(result_df, "./Raybio_df_result_df.csv")


# datatable(result_df)