library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.1     v dplyr   1.0.5
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## Warning: package 'forcats' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
count_matrix <-readRDS("F:/BAI HC/Research 2/Covid19/data_df.RDS")
patient_data <- read_csv("F:/BAI HC/Research 2/Covid19/SraRunTable.txt")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_character(),
##   AvgSpotLen = col_double(),
##   Bases = col_double(),
##   Bytes = col_double(),
##   ReleaseDate = col_datetime(format = "")
## )
## i Use `spec()` for the full column specifications.
rownames(patient_data) <-patient_data$Run
## Warning: Setting row names on a tibble is deprecated.
# Matcht column name of the count matrix with the $Library_name

sample_name <-c("Wuhan_case_1", "Wuhan_case_2", "Wuhan_case_3", "Wuhan_case_4", "Wuhan_case_5", 
                "Wuhan_case_6", "Wuhan_case_7", "Wuhan_case_8", "Wuhan_case_9", "Wuhan_case_colon_1", 
                "Wuhan_case_colon_2", "Wuhan_case_colon_3", "Wuhan_case_colon_4", "Wuhan_case_colon_5", 
                "Wuhan_case_colon_6", "Wuhan_case_colon_7", "Wuhan_case_colon_8", "Wuhan_case_colon_9", 
                "Wuhan_control_1", "Wuhan_control_2", "Wuhan_control_3", "Wuhan_control_4", 
                "Wuhan_control_5", "Wuhan_control_6", "Wuhan_control_7", 
                "Wuhan_control_8", "Wuhan_control_9", "Wuhan_control_10", 
                "Wuhan_control_colon_1", "Wuhan_control_colon_2", "Wuhan_control_colon_3", 
                "Wuhan_control_colon_4", "Wuhan_control_colon_5", "Wuhan_control_colon_6", 
                "Wuhan_control_colon_7", "Wuhan_control_colon_8", "Wuhan_control_colon_9", 
                "Wuhan_control_colon_10")

colnames(count_matrix) <- sample_name

# The order of column name does not resembles order of library name
patient_data$`Library Name` == colnames(count_matrix)
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE
## [37] FALSE FALSE
# Arrange column order to the exact order of library name in Patient_data

library_name <-patient_data$`Library Name`
library(data.table)
## Warning: package 'data.table' was built under R version 4.0.5
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
# Arrange count_matrix column by the order of the samples in the library name
 new_count_matrix <- setcolorder(count_matrix,library_name )

# Check the order of count_matrix  column and the samples in library name
patient_data$`Library Name` == colnames(new_count_matrix)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# Rename count_matrix column names as sample name

colnames(new_count_matrix) <- patient_data$Run

# Check if rownames of the patient_data is the same as 
#the count column name

rownames(patient_data)== colnames(new_count_matrix)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# SummarizedExperiment object

library(SummarizedExperiment)
## Warning: package 'SummarizedExperiment' was built under R version 4.0.3
## Loading required package: MatrixGenerics
## Warning: package 'MatrixGenerics' was built under R version 4.0.3
## Loading required package: matrixStats
## Warning: package 'matrixStats' was built under R version 4.0.5
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Warning: package 'GenomicRanges' was built under R version 4.0.3
## Loading required package: stats4
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.0.5
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.0.3
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:data.table':
## 
##     first, second
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.0.3
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:data.table':
## 
##     shift
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:purrr':
## 
##     reduce
## The following object is masked from 'package:grDevices':
## 
##     windows
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.0.5
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.0.3
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
covid19_sum <- SummarizedExperiment::SummarizedExperiment(new_count_matrix, colData = patient_data )

covid19_sum 
## class: SummarizedExperiment 
## dim: 60683 38 
## metadata(0):
## assays(1): ''
## rownames(60683): ENSG00000223972 ENSG00000227232 ... ENSG00000277475
##   ENSG00000268674
## rowData names(0):
## colnames(38): SRR12816720 SRR12816722 ... SRR12951228 SRR12951229
## colData names(29): Run AGE ... SRA Study Tissue