library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.1 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## Warning: package 'forcats' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
count_matrix <-readRDS("F:/BAI HC/Research 2/Covid19/data_df.RDS")
patient_data <- read_csv("F:/BAI HC/Research 2/Covid19/SraRunTable.txt")
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_character(),
## AvgSpotLen = col_double(),
## Bases = col_double(),
## Bytes = col_double(),
## ReleaseDate = col_datetime(format = "")
## )
## i Use `spec()` for the full column specifications.
rownames(patient_data) <-patient_data$Run
## Warning: Setting row names on a tibble is deprecated.
# Matcht column name of the count matrix with the $Library_name
sample_name <-c("Wuhan_case_1", "Wuhan_case_2", "Wuhan_case_3", "Wuhan_case_4", "Wuhan_case_5",
"Wuhan_case_6", "Wuhan_case_7", "Wuhan_case_8", "Wuhan_case_9", "Wuhan_case_colon_1",
"Wuhan_case_colon_2", "Wuhan_case_colon_3", "Wuhan_case_colon_4", "Wuhan_case_colon_5",
"Wuhan_case_colon_6", "Wuhan_case_colon_7", "Wuhan_case_colon_8", "Wuhan_case_colon_9",
"Wuhan_control_1", "Wuhan_control_2", "Wuhan_control_3", "Wuhan_control_4",
"Wuhan_control_5", "Wuhan_control_6", "Wuhan_control_7",
"Wuhan_control_8", "Wuhan_control_9", "Wuhan_control_10",
"Wuhan_control_colon_1", "Wuhan_control_colon_2", "Wuhan_control_colon_3",
"Wuhan_control_colon_4", "Wuhan_control_colon_5", "Wuhan_control_colon_6",
"Wuhan_control_colon_7", "Wuhan_control_colon_8", "Wuhan_control_colon_9",
"Wuhan_control_colon_10")
colnames(count_matrix) <- sample_name
# The order of column name does not resembles order of library name
patient_data$`Library Name` == colnames(count_matrix)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## [37] FALSE FALSE
# Arrange column order to the exact order of library name in Patient_data
library_name <-patient_data$`Library Name`
library(data.table)
## Warning: package 'data.table' was built under R version 4.0.5
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
# Arrange count_matrix column by the order of the samples in the library name
new_count_matrix <- setcolorder(count_matrix,library_name )
# Check the order of count_matrix column and the samples in library name
patient_data$`Library Name` == colnames(new_count_matrix)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# Rename count_matrix column names as sample name
colnames(new_count_matrix) <- patient_data$Run
# Check if rownames of the patient_data is the same as
#the count column name
rownames(patient_data)== colnames(new_count_matrix)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# SummarizedExperiment object
library(SummarizedExperiment)
## Warning: package 'SummarizedExperiment' was built under R version 4.0.3
## Loading required package: MatrixGenerics
## Warning: package 'MatrixGenerics' was built under R version 4.0.3
## Loading required package: matrixStats
## Warning: package 'matrixStats' was built under R version 4.0.5
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Warning: package 'GenomicRanges' was built under R version 4.0.3
## Loading required package: stats4
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.0.5
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.0.3
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:data.table':
##
## first, second
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.0.3
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:data.table':
##
## shift
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.0.5
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.0.3
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
covid19_sum <- SummarizedExperiment::SummarizedExperiment(new_count_matrix, colData = patient_data )
covid19_sum
## class: SummarizedExperiment
## dim: 60683 38
## metadata(0):
## assays(1): ''
## rownames(60683): ENSG00000223972 ENSG00000227232 ... ENSG00000277475
## ENSG00000268674
## rowData names(0):
## colnames(38): SRR12816720 SRR12816722 ... SRR12951228 SRR12951229
## colData names(29): Run AGE ... SRA Study Tissue