library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
## 
##     windows
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
# Load the count data alread filtered
vst_count <-readRDS("F:/BAI HC/Research 2/Covid19/vst_count.RDS")
pca_data <-assay(vst_count)

pca_data <- as.data.frame(pca_data)
pca_data$geneID <-rownames(pca_data)
# Load gene_var

gene_var_sorted <- readRDS("F:/BAI HC/Research 2/Covid19/gene_var.RDS")

# Filter 100 gene with the most variance
gene_500 <-gene_var_sorted[1:500,]
gene_id <-gene_500$gene

the_500_count <- pca_data[match(gene_id,pca_data$geneID),]
the_500_count <- the_500_count[, 1:38]
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.6.2
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
patient_info_final <-readRDS("F:/BAI HC/Research 2/Covid19/patient_info_final.RDS")

patient_info_final <- as.data.frame(patient_info_final)
# Sample annotation for the heatmap
condition <- patient_info_final$condition

hm_name <- HeatmapAnnotation(df=data.frame(condition),
                             col = list(condition = c("covid19_case" = "red",
                                                      "covid19_control" = "green", 
                                                      "colon_case" = "black", 
                                                      "colon_control" = "yellow")))
# The heatmap for 100 gene with larger variance

Heatmap(the_500_count,
clustering_method_rows = "average",
clustering_method_columns = "average",
clustering_distance_rows = "euclidean",
cluster_rows = T,
cluster_columns = T,
show_row_dend = F,
gap=unit(8,"mm"),
show_row_names = F,
show_column_names = T,
top_annotation = hm_name)
## Warning: The input is a data frame, convert it to the matrix.

Figure 1. The heatmap of 38 samples by the 500 genes having larger variance

# Cases sample information
 male_covid19_case <- c("SRR12816736", "SRR12816724", "SRR12816722",
                        "SRR12816721", "SRR12816720")
 female_covid19_case <-c("SRR12816723", "SRR12816725", "SRR12816719",
                         "SRR12816735", "NA")
 male_colon_case <- c("SRR12951219","SRR12951216", "SRR12951213", 
                      "SRR12951217", "SRR12951215" )
 female_colon_case <- c("SRR12951220", "SRR12951212", "SRR12951218", 
                        "SRR12951214", "NA")
 cases_table <- data.frame(male_covid19_case, female_covid19_case,
                           male_colon_case, female_colon_case )
Gender information for case samples
male_covid19_case female_covid19_case male_colon_case female_colon_case
SRR12816736 SRR12816723 SRR12951219 SRR12951220
SRR12816724 SRR12816725 SRR12951216 SRR12951212
SRR12816722 SRR12816719 SRR12951213 SRR12951218
SRR12816721 SRR12816735 SRR12951217 SRR12951214
SRR12816720 NA SRR12951215 NA

From the heatmap, we see clear different expression patterns of sample from lung tissue and colon tissue. Male samples SRR12816736, SRR12816724, SRR12816722, SRR12816721“, SRR12816720 are clustered together while four female covid19 case samples separated into two group, with one group includes SRR12816719” and “SRR12816735” and two other samples somehow clusters with colon case sample.

In the covid19 control group, we see five sample clustering together SRR12816718, SRR12816728, SRR12816730, SRR12816732, and SRR12816726. The cluster has similar up-regulated expression of those genes at the bottom of the heatmap to the male covid19 case sample. This indicates that those are the five male control samples and the other five control samples are female.

In colon_case sample group, we have five male case samples also have up-regulated expression for genes at the bottom of the heatmap. In the colon control sample group, we also have 5 samples clustered together at the left-most of the heatmap showing the same expression pattern for those genes. Hence, The five left-most samples can be male control sample and the other five samples are from female.

Furthermore, our analysis compares the difference between control and case samples, which indicates that similarity of the cases and control groups is attributed to the biological nature of the samples.

Therefore, I think we have gender balance in the two control groups.