Setup: Load Packages

# (Consider adding non-interactive setup from previous discussion here)
# Example: options(repos = c(CRAN = "https://cloud.r-project.org/", BiocManager::repositories()))
# Example: if (!requireNamespace("devtools", quietly = TRUE)) install.packages("devtools", ask = FALSE)

devtools::install_github("francescojm/CRISPRcleanR")
## Skipping install of 'CRISPRcleanR' from a github remote, the SHA1 (63112887) has not changed since last install.
##   Use `force = TRUE` to force installation
library("CRISPRcleanR")
## Loading required package: stringr
## Loading required package: DNAcopy
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
## Loading required package: withr
## Loading required package: pracma
## Loading required package: PRROC
## Loading required package: rlang
## 
## Attaching package: 'rlang'
## The following objects are masked from 'package:withr':
## 
##     local_options, with_options
## Loading required package: tools
## 
## Attaching package: 'tools'
## The following object is masked from 'package:withr':
## 
##     makevars_user
## Loading required package: BiocManager
## Loading required package: ShortRead
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:pROC':
## 
##     var
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: BiocParallel
## Loading required package: Biostrings
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: XVector
## Loading required package: GenomeInfoDb
## 
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
## 
##     strsplit
## Loading required package: Rsamtools
## Loading required package: GenomicRanges
## Loading required package: GenomicAlignments
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
## The following object is masked from 'package:rlang':
## 
##     exprs
## Loading required package: Rsubread
## Loading required package: Rqc
## Loading required package: ggplot2
## Loading required package: jsonlite
## 
## Attaching package: 'jsonlite'
## The following objects are masked from 'package:rlang':
## 
##     flatten, unbox
## Warning: replacing previous import 'withr::makevars_user' by
## 'tools::makevars_user' when loading 'CRISPRcleanR'
library(BSgenome)
## Loading required package: BiocIO
## Loading required package: rtracklayer
## 
## Attaching package: 'rtracklayer'
## The following object is masked from 'package:BiocIO':
## 
##     FileForFormat
## Importing Data
data(KY_Library_v1.0)

fn<-paste(system.file('extdata',package = 'CRISPRcleanR'), '/HT-29_counts.tsv',sep='')

head(KY_Library_v1.0)
##                                                                                            CODE
## A1BG_CCDS12976.1_ex3_19:58862927-58862950:-_5-1 A1BG_CCDS12976.1_ex3_19:58862927-58862950:-_5-1
## A1BG_CCDS12976.1_ex4_19:58863655-58863678:+_5-2 A1BG_CCDS12976.1_ex4_19:58863655-58863678:+_5-2
## A1BG_CCDS12976.1_ex4_19:58863697-58863720:-_5-3 A1BG_CCDS12976.1_ex4_19:58863697-58863720:-_5-3
## A1BG_CCDS12976.1_ex4_19:58863866-58863889:+_5-4 A1BG_CCDS12976.1_ex4_19:58863866-58863889:+_5-4
## A1BG_CCDS12976.1_ex5_19:58864367-58864390:-_5-5 A1BG_CCDS12976.1_ex5_19:58864367-58864390:-_5-5
## A1CF_CCDS7241.1_ex6_10:52588014-52588037:-_5-1   A1CF_CCDS7241.1_ex6_10:52588014-52588037:-_5-1
##                                                 GENES EXONE CHRM STRAND
## A1BG_CCDS12976.1_ex3_19:58862927-58862950:-_5-1  A1BG   ex3   19      -
## A1BG_CCDS12976.1_ex4_19:58863655-58863678:+_5-2  A1BG   ex4   19      +
## A1BG_CCDS12976.1_ex4_19:58863697-58863720:-_5-3  A1BG   ex4   19      -
## A1BG_CCDS12976.1_ex4_19:58863866-58863889:+_5-4  A1BG   ex4   19      +
## A1BG_CCDS12976.1_ex5_19:58864367-58864390:-_5-5  A1BG   ex5   19      -
## A1CF_CCDS7241.1_ex6_10:52588014-52588037:-_5-1   A1CF   ex6   10      -
##                                                 STARTpos   ENDpos
## A1BG_CCDS12976.1_ex3_19:58862927-58862950:-_5-1 58862927 58862950
## A1BG_CCDS12976.1_ex4_19:58863655-58863678:+_5-2 58863655 58863678
## A1BG_CCDS12976.1_ex4_19:58863697-58863720:-_5-3 58863697 58863720
## A1BG_CCDS12976.1_ex4_19:58863866-58863889:+_5-4 58863866 58863889
## A1BG_CCDS12976.1_ex5_19:58864367-58864390:-_5-5 58864367 58864390
## A1CF_CCDS7241.1_ex6_10:52588014-52588037:-_5-1  52588014 52588037
##                                                                 seq
## A1BG_CCDS12976.1_ex3_19:58862927-58862950:-_5-1 GACTTCCAGCTACGGCGCG
## A1BG_CCDS12976.1_ex4_19:58863655-58863678:+_5-2 TCAATGGTCACAGTAGCGC
## A1BG_CCDS12976.1_ex4_19:58863697-58863720:-_5-3 CTGCAGCTACCGGACCGAT
## A1BG_CCDS12976.1_ex4_19:58863866-58863889:+_5-4 CGGGGGTGATCCAGGACAC
## A1BG_CCDS12976.1_ex5_19:58864367-58864390:-_5-5 TGCTGACGGGTGACACCCA
## A1CF_CCDS7241.1_ex6_10:52588014-52588037:-_5-1  ACATGGTATTGCAGTAGAC
## Normalization and Log-Fold Change Calculation
normANDfcs<-ccr.NormfoldChanges(fn,
                                min_reads=30,
                                EXPname='HT-29',
                                libraryAnnotation=KY_Library_v1.0)

## Ordering sgRNAs by Genomic Position
gwSortedFCs<-
  ccr.logFCs2chromPos(normANDfcs$logFCs,KY_Library_v1.0)
## Genome-Wide Copy Number Correction
correctedFCs<-ccr.GWclean(gwSortedFCs,display=TRUE,label='HT-29')
## Analyzing: HT.29.Chr.1.sgRNA.FCs 
## Analyzing: HT.29.Chr.1.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.2.sgRNA.FCs 
## Analyzing: HT.29.Chr.2.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.3.sgRNA.FCs 
## Analyzing: HT.29.Chr.3.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.4.sgRNA.FCs 
## Analyzing: HT.29.Chr.4.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.5.sgRNA.FCs 
## Analyzing: HT.29.Chr.5.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.6.sgRNA.FCs 
## Analyzing: HT.29.Chr.6.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.7.sgRNA.FCs 
## Analyzing: HT.29.Chr.7.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.8.sgRNA.FCs 
## Analyzing: HT.29.Chr.8.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.9.sgRNA.FCs 
## Analyzing: HT.29.Chr.9.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.10.sgRNA.FCs 
## Analyzing: HT.29.Chr.10.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.11.sgRNA.FCs 
## Analyzing: HT.29.Chr.11.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.12.sgRNA.FCs 
## Analyzing: HT.29.Chr.12.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.13.sgRNA.FCs 
## Analyzing: HT.29.Chr.13.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.14.sgRNA.FCs 
## Analyzing: HT.29.Chr.14.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.15.sgRNA.FCs 
## Analyzing: HT.29.Chr.15.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.16.sgRNA.FCs 
## Analyzing: HT.29.Chr.16.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.17.sgRNA.FCs 
## Analyzing: HT.29.Chr.17.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.18.sgRNA.FCs 
## Analyzing: HT.29.Chr.18.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.19.sgRNA.FCs 
## Analyzing: HT.29.Chr.19.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.20.sgRNA.FCs 
## Analyzing: HT.29.Chr.20.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.21.sgRNA.FCs 
## Analyzing: HT.29.Chr.21.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.22.sgRNA.FCs 
## Analyzing: HT.29.Chr.22.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.23.sgRNA.FCs 
## Analyzing: HT.29.Chr.23.sgRNA.FCs...post.CRISPRcleanR

## Analyzing: HT.29.Chr.24.sgRNA.FCs 
## Analyzing: HT.29.Chr.24.sgRNA.FCs...post.CRISPRcleanR

## Loading Data for Performance Evaluation
data(EPLC.272HcorrectedFCs)

data(BAGEL_essential)

data(BAGEL_nonEssential)

FCs<-EPLC.272HcorrectedFCs$corrected_logFCs$avgFC

names(FCs)<-rownames(EPLC.272HcorrectedFCs$corrected_logFCs)

BAGEL_essential_sgRNAs<-
  ccr.genes2sgRNAs(KY_Library_v1.0,BAGEL_essential)
## Warning in ccr.genes2sgRNAs(KY_Library_v1.0, BAGEL_essential): No sgRNAs
## targeting the following genes in this library: PHF16, PPP2R2D, PRKDC, RPS17,
## RPS26, RPS3A
BAGEL_nonEssential_sgRNAs<-
  ccr.genes2sgRNAs(KY_Library_v1.0,BAGEL_nonEssential)
## Warning in ccr.genes2sgRNAs(KY_Library_v1.0, BAGEL_nonEssential): No sgRNAs
## targeting the following genes in this library: ANKRD60, AQP12A, BHLHE23, BPY2,
## C15orf55, C18orf26, C20orf203, C20orf79, C8orf17, C9orf53, CDY1, CDY2A, CT45A2,
## CT45A4, CT47A11, CTRB1, CXorf1, CYP4F8, DAZ1, DAZ2, DAZ3, DAZ4, DEFB106A, DGKK,
## DUX4, DUX4L7, FAM106A, FAM75A7, FAM75D1, FOXD4L3, FOXD4L4, FRG2, GAGE1, GAGE2C,
## GOLGA6L2, GPR111, GPR144, GRK1, HHLA1, HTN3, IL28A, IL28B, IL29, KRTAP10-7,
## MBD3L2, NOBOX, OR10A2, OR10A4, OR10A5, OR10H1, OR10H2, OR10H3, OR10J1, OR10R2,
## OR10S1, OR10X1, OR10Z1, OR11A1, OR12D2, OR12D3, OR13C3, OR13D1, OR14A16, OR1A1,
## OR1A2, OR1B1, OR1D2, OR1E1, OR1E2, OR1G1, OR1L6, OR1N2, OR1S1, OR1S2, OR2AK2,
## OR2AT4, OR2C1, OR2C3, OR2D2, OR2D3, OR2F1, OR2G2, OR2G3, OR2H1, OR2J2, OR2L3,
## OR2T1, OR2T10, OR2T12, OR2T2, OR2T27, OR2T33, OR2T4, OR2T5, OR2W1, OR3A1,
## OR3A2, OR3A3, OR4C11, OR4C3, OR4D1, OR4D10, OR4D11, OR4D9, OR4K17, OR51B6,
## OR51D1, OR51F2, OR51T1, OR51V1, OR52A1, OR52A5, OR52B2, OR52B6, OR52E8, OR52I2,
## OR52K2, OR52L1, OR52M1, OR52R1, OR52W1, OR56A1, OR56A4, OR56B1, OR5AU1, OR5C1,
## OR5I1, OR5M1, OR5M10, OR5P2, OR5P3, OR5R1, OR5T1, OR5T2, OR5T3, OR5V1, OR5W2,
## OR6A2, OR6K6, OR6S1, OR6V1, OR7A17, OR7C2, OR7D4, OR7G2, OR8A1, OR8B8, OR8G5,
## OR8U1, OR9Q2, PAGE3, PKD1L3, PLAC1L, PNLIPRP2, POTEA, POTEH, PRAMEF19, PRAMEF3,
## PRSS41, PRY2, RBMY1A1, RBMY1B, RBMY1D, RBMY1E, RBMY1F, RBMY1J, REXO1L1,
## SLC22A24, SPACA5, SSX8, SSX9, TAAR9, TCEB3C, TEX13A, TEX34, TRIM51, VCX3A,
## VHLL, VN1R5, ZAN
## Performance Assessment: ROC and Precision-Recall
sgRNA_level_ROC<-ccr.ROC_Curve(FCsprofile = FCs,
                               positives = BAGEL_essential_sgRNAs,
                               negatives = BAGEL_nonEssential_sgRNAs,
                               display = TRUE)

sgRNA_level_ROC<-ccr.PrRc_Curve(FCs,BAGEL_essential_sgRNAs,
                                BAGEL_nonEssential_sgRNAs)

geneFCs<-ccr.geneMeanFCs(FCs,KY_Library_v1.0)
head(geneFCs)
##       A1BG       A1CF        A2M      A2ML1    A3GALT2     A4GALT 
## -0.2474235 -0.1550534  0.2190111  0.3736683 -0.5151889 -0.1698269
gene_level_ROC<-ccr.ROC_Curve(geneFCs,
                              BAGEL_essential,
                              BAGEL_nonEssential,
                              FDRth = 0.05)

gene_level_PrRc<-ccr.PrRc_Curve(geneFCs,
                                BAGEL_essential,
                                BAGEL_nonEssential,
                                FDRth = 0.05)

gene_level_PrRc$sigthreshold
##  threshold 
## -0.7683409
sum(geneFCs<gene_level_PrRc$sigthreshold)
## [1] 1418
FCs<-EPLC.272HcorrectedFCs$corrected_logFCs$correctedFC
names(FCs)<-rownames(EPLC.272HcorrectedFCs$corrected_logFCs)
CgeneFCs<-ccr.geneMeanFCs(FCs,KY_Library_v1.0)

RES<-ccr.PrRc_Curve(CgeneFCs,
                    BAGEL_essential,
                    BAGEL_nonEssential,
                    FDRth = 0.05)

sum(CgeneFCs<RES$sigthreshold)
## [1] 1338
## Gene Set Enrichment Visualization
data(EssGenes.ribosomalProteins)
data(EssGenes.DNA_REPLICATION_cons)
data(EssGenes.KEGG_rna_polymerase)
data(EssGenes.PROTEASOME_cons)
data(EssGenes.SPLICEOSOME_cons)

SIGNATURES<-list(Ribosomal_Proteins=EssGenes.ribosomalProteins,
                 DNA_Replication = EssGenes.DNA_REPLICATION_cons,
                 RNA_polymerase = EssGenes.KEGG_rna_polymerase,
                 Proteasome = EssGenes.PROTEASOME_cons,
                 Spliceosome = EssGenes.SPLICEOSOME_cons,
                 CFE = BAGEL_essential,
                 non_essential = BAGEL_nonEssential)

Recall_scores<-ccr.VisDepAndSig(FCsprofile = geneFCs,
                                SIGNATURES = SIGNATURES,
                                TITLE = 'EPLC-272H',
                                pIs = 6,
                                nIs = 7)

cRecall_scores<-ccr.VisDepAndSig(FCsprofile = CgeneFCs,
                                 SIGNATURES = SIGNATURES,
                                 TITLE = 'EPLC-272H',
                                 pIs = 6,
                                 nIs = 7)

sort(abs(Recall_scores-cRecall_scores),decreasing = TRUE)[1]
## RNA_polymerase 
##      0.1153846
## Final Statistical Tests and Output
data(HT.29correctedFCs)

RES<-ccr.perf_statTests('HT-29',libraryAnnotation = KY_Library_v1.0,
                        correctedFCs = HT.29correctedFCs$corrected_logFCs,
                        GDSC.geneLevCNA = NULL,
                        CCLE.gisticCNA = NULL,
                        RNAseq.fpkms = NULL)
## [1] "Testing sgRNAs targeting: Dep (PN) genes"
## [1] "Testing sgRNAs targeting: Dep (Gistic) genes"
## [1] "Testing sgRNAs targeting: notExp genes"
## [1] "Testing sgRNAs targeting: Amp (Gistic +1) genes"
## [1] "Testing sgRNAs targeting: Amp (Gistic +2) genes"
## [1] "Testing sgRNAs targeting: Amp (Gistic +1) notExp genes"
## [1] "Testing sgRNAs targeting: Amp (Gistic +2) notExp genes"
## [1] "Testing sgRNAs targeting: Amp (PNs >= 2) genes"
## [1] "Testing sgRNAs targeting: Amp (PNs >= 4) genes"
## [1] "Testing sgRNAs targeting: Amp (PNs >= 8) genes"
## [1] "Testing sgRNAs targeting: Amp (PNs >= 10) genes"
## [1] "Testing sgRNAs targeting: Amp (PNs >= 2) notExp genes"
## [1] "Testing sgRNAs targeting: Amp (PNs >= 4) notExp genes"
## [1] "Testing sgRNAs targeting: Amp (PNs >= 8) notExp genes"
## [1] "Testing sgRNAs targeting: Amp (PNs >= 10) notExp genes"
## [1] "Testing sgRNAs targeting: Essential genes"
## [1] "Testing sgRNAs targeting: BAGEL Essential genes"
## [1] "Testing sgRNAs targeting: BAGEL Ess.Only genes"
## [1] "Testing sgRNAs targeting: BAGEL nonEssential genes"
RES<-ccr.perf_statTests('EPLC-272H',libraryAnnotation = KY_Library_v1.0,
                        correctedFCs = EPLC.272HcorrectedFCs$corrected_logFCs)
## [1] "No gistic CNA scores available for this cell line"
## [1] "Testing sgRNAs targeting: Dep (PN) genes"
## [1] "Testing sgRNAs targeting: Dep (Gistic) genes"
## [1] "Testing sgRNAs targeting: notExp genes"
## [1] "Testing sgRNAs targeting: Amp (Gistic +1) genes"
## [1] "Testing sgRNAs targeting: Amp (Gistic +2) genes"
## [1] "Testing sgRNAs targeting: Amp (Gistic +1) notExp genes"
## [1] "Testing sgRNAs targeting: Amp (Gistic +2) notExp genes"
## [1] "Testing sgRNAs targeting: Amp (PNs >= 2) genes"
## [1] "Testing sgRNAs targeting: Amp (PNs >= 4) genes"
## [1] "Testing sgRNAs targeting: Amp (PNs >= 8) genes"
## [1] "Testing sgRNAs targeting: Amp (PNs >= 10) genes"
## [1] "Testing sgRNAs targeting: Amp (PNs >= 2) notExp genes"
## [1] "Testing sgRNAs targeting: Amp (PNs >= 4) notExp genes"
## [1] "Testing sgRNAs targeting: Amp (PNs >= 8) notExp genes"
## [1] "Testing sgRNAs targeting: Amp (PNs >= 10) notExp genes"
## [1] "Testing sgRNAs targeting: Essential genes"
## [1] "Testing sgRNAs targeting: BAGEL Essential genes"
## [1] "Testing sgRNAs targeting: BAGEL Ess.Only genes"
## [1] "Testing sgRNAs targeting: BAGEL nonEssential genes"
ccr.perf_distributions('HT-29',HT.29correctedFCs$corrected_logFCs,
                       libraryAnnotation = KY_Library_v1.0)

HT.29correctedFCs$corrected_logFCs$genes[order(HT.29correctedFCs$corrected_logFCs$correctedFC)[1:10]]
##  [1] "POLR2C"  "ATP6V0C" "POLR2A"  "SF3A1"   "HNRNPC"  "SRP9"    "SNRPB"  
##  [8] "BCAS2"   "SPC24"   "NHP2L1"
EPLC.272HcorrectedFCs$corrected_logFCs$genes[order(EPLC.272HcorrectedFCs$corrected_logFCs$correctedFC)[1:10]]
##  [1] "HIST1H2BE" "RRM1"      "CDC27"     "RRN3"      "SNRPA1"    "POLR2B"   
##  [7] "CKAP5"     "SNRPF"     "POLR2A"    "NACA"