library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Read barcodes
nextflexBarcodes <-
readxl::read_excel("UDI_Index_Sequences_v21.06_v1-New.xlsx") %>%
rename(
barcodeID = 1,
i7 = 2,
i5 = 3,
i5RevComp = 4
) %>%
unite(col = "barcodeSeq", i7, i5, sep = "_") %>%
select(barcodeID, barcodeSeq) %>%
mutate(across(barcodeID, str_replace, "UDI0", "NOVA"))
selectedBarcodes <-
nextflexBarcodes %>%
filter(
barcodeID %in% c(
"NOVA008",
"NOVA020",
"NOVA032",
"NOVA044",
"NOVA056",
"NOVA068",
"NOVA076",
"NOVA088",
"NOVA077",
"NOVA089",
"NOVA078",
"NOVA090"
)
)
Validate assignment of barcodes
old <-
read_tsv("oldNames", col_names = F) %>%
rename("fastqName" = 1) %>%
separate(
col = 1,
into = c(
"strain",
"cell",
"libraryType",
"initials",
"ldate",
"barcodei7",
"barcodei5",
"samplesheet",
"lane",
"read",
"extension"
),
remove = FALSE
) %>%
unite(col = "barcodeSeq", barcodei7:barcodei5, sep = "_") %>%
select(fastqName, barcodeSeq)
## Rows: 12 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): X1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 11 pieces. Additional pieces discarded in 12 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12].
old %>% knitr::kable()
| mouse_PU1nuclei_ChIP_EZ_l20211112_CAATCGAA_GCACACGC_S12_L001_R1_001.fastq.gz |
CAATCGAA_GCACACGC |
| mouse_PU1nuclei_ChIP_EZ_l20211112_CAGAAGAT_GTACATTG_S11_L001_R1_001.fastq.gz |
CAGAAGAT_GTACATTG |
| mouse_PU1nuclei_ChIP_EZ_l20211112_CAGACCAC_GCTTCGGC_S8_L001_R1_001.fastq.gz |
CAGACCAC_GCTTCGGC |
| mouse_PU1nuclei_ChIP_EZ_l20211112_CATGATGA_GTTAGAGG_S7_L001_R1_001.fastq.gz |
CATGATGA_GTTAGAGG |
| mouse_PU1nuclei_ChIP_EZ_l20211112_CGCGCCAA_TGGCTAGG_S4_L001_R1_001.fastq.gz |
CGCGCCAA_TGGCTAGG |
| mouse_PU1nuclei_ChIP_EZ_l20211112_GAAGTCTT_ACTACGGT_S2_L001_R1_001.fastq.gz |
GAAGTCTT_ACTACGGT |
| mouse_PU1nuclei_ChIP_EZ_l20211112_GATGCGTC_GAGGCTGC_S1_L001_R1_001.fastq.gz |
GATGCGTC_GAGGCTGC |
| mouse_PU1nuclei_ChIP_EZ_l20211112_TCCTCGAT_GTAACTGC_S5_L001_R1_001.fastq.gz |
TCCTCGAT_GTAACTGC |
| mouse_PU1nuclei_ChIP_EZ_l20211112_TCGGCACC_CCAACGTC_S3_L001_R1_001.fastq.gz |
TCGGCACC_CCAACGTC |
| mouse_PU1nuclei_ChIP_EZ_l20211112_TGACTACT_CCTTACAG_S9_L001_R1_001.fastq.gz |
TGACTACT_CCTTACAG |
| mouse_PU1nuclei_ChIP_EZ_l20211112_TGGTGCAC_GGAATCAC_S6_L001_R1_001.fastq.gz |
TGGTGCAC_GGAATCAC |
| mouse_PU1nuclei_ChIP_EZ_l20211112_TTCTGGTG_CCAGTGGT_S10_L001_R1_001.fastq.gz |
TTCTGGTG_CCAGTGGT |
Verify that my barcode lists intersect with the barcodes on the fastq.gz file name defined in the variable ‘old’.
nextflexBarcodes %>% slice(8, 20, 32, 44, 56, 68, 76, 88, 77, 89, 78, 90)
intersect(
nextflexBarcodes %>% slice(8, 20, 32, 44, 56, 68, 76, 88, 77, 89, 78, 90) %>%
pull(barcodeSeq),
pull(old, barcodeSeq)
)
## [1] "GATGCGTC_GAGGCTGC" "GAAGTCTT_ACTACGGT" "TCGGCACC_CCAACGTC"
## [4] "CGCGCCAA_TGGCTAGG" "TCCTCGAT_GTAACTGC" "TGGTGCAC_GGAATCAC"
## [7] "CATGATGA_GTTAGAGG" "CAGACCAC_GCTTCGGC" "TGACTACT_CCTTACAG"
## [10] "TTCTGGTG_CCAGTGGT" "CAGAAGAT_GTACATTG" "CAATCGAA_GCACACGC"
intersect(selectedBarcodes %>% pull(barcodeSeq),
pull(old, barcodeSeq))
## [1] "GATGCGTC_GAGGCTGC" "GAAGTCTT_ACTACGGT" "TCGGCACC_CCAACGTC"
## [4] "CGCGCCAA_TGGCTAGG" "TCCTCGAT_GTAACTGC" "TGGTGCAC_GGAATCAC"
## [7] "CATGATGA_GTTAGAGG" "TGACTACT_CCTTACAG" "CAGAAGAT_GTACATTG"
## [10] "CAGACCAC_GCTTCGGC" "TTCTGGTG_CCAGTGGT" "CAATCGAA_GCACACGC"
Read in desired name linked from the original sample name emailed to me by Enchen
orig_desired <- read_tsv("renameSamples")
## Rows: 12 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): originalName, barcodeID, desiredName
## dbl (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Update names
orig_desired <- read_tsv("renameSamples")
## Rows: 12 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): originalName, barcodeID, desiredName
## dbl (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
updateNames <-
inner_join(
selectedBarcodes, orig_desired, by = "barcodeID"
) %>%
inner_join(
old, ., by = "barcodeSeq"
)
Write bash script for renaming samples
updateNames %>%
select(fastqName, desiredName)
str_c(
"mv ",
pull(updateNames, fastqName),
" ",
pull(updateNames, desiredName),
"_",
pull(updateNames, barcodeSeq),
"_L001_R1_001.fastq.gz"
) %>% write_lines("renameScript.sh") %>%
knitr::kable()
| mv mouse_PU1nuclei_ChIP_EZ_l20211112_CAATCGAA_GCACACGC_S12_L001_R1_001.fastq.gz mouse_balbcj_male_KupfferPU1Nuclei_ChIP_Input_ChowNoFasting_rep2_EZ_CAATCGAA_GCACACGC_L001_R1_001.fastq.gz |
| mv mouse_PU1nuclei_ChIP_EZ_l20211112_CAGAAGAT_GTACATTG_S11_L001_R1_001.fastq.gz mouse_balbcj_male_KupfferPU1Nuclei_ChIP_Input_ChowNoFasting_rep1_EZ_CAGAAGAT_GTACATTG_L001_R1_001.fastq.gz |
| mv mouse_PU1nuclei_ChIP_EZ_l20211112_CAGACCAC_GCTTCGGC_S8_L001_R1_001.fastq.gz mouse_c57bl6j_male_KupfferPU1Nuclei_ChIP_Input_ChowNoFasting_rep2_EZ_CAGACCAC_GCTTCGGC_L001_R1_001.fastq.gz |
| mv mouse_PU1nuclei_ChIP_EZ_l20211112_CATGATGA_GTTAGAGG_S7_L001_R1_001.fastq.gz mouse_c57bl6j_male_KupfferPU1Nuclei_ChIP_Input_ChowNoFasting_rep1_EZ_CATGATGA_GTTAGAGG_L001_R1_001.fastq.gz |
| mv mouse_PU1nuclei_ChIP_EZ_l20211112_CGCGCCAA_TGGCTAGG_S4_L001_R1_001.fastq.gz mouse_cb6f1j_male_KupfferPU1Nuclei_ChIP_p65_ChowNoFasting_rep2_EZ_CGCGCCAA_TGGCTAGG_L001_R1_001.fastq.gz |
| mv mouse_PU1nuclei_ChIP_EZ_l20211112_GAAGTCTT_ACTACGGT_S2_L001_R1_001.fastq.gz mouse_c57bl6j_male_KupfferPU1Nuclei_ChIP_p65_ChowNoFasting_rep2_EZ_GAAGTCTT_ACTACGGT_L001_R1_001.fastq.gz |
| mv mouse_PU1nuclei_ChIP_EZ_l20211112_GATGCGTC_GAGGCTGC_S1_L001_R1_001.fastq.gz mouse_c57bl6j_male_KupfferPU1Nuclei_ChIP_p65_ChowNoFasting_rep1_EZ_GATGCGTC_GAGGCTGC_L001_R1_001.fastq.gz |
| mv mouse_PU1nuclei_ChIP_EZ_l20211112_TCCTCGAT_GTAACTGC_S5_L001_R1_001.fastq.gz mouse_balbcj_male_KupfferPU1Nuclei_ChIP_p65_ChowNoFasting_rep1_EZ_TCCTCGAT_GTAACTGC_L001_R1_001.fastq.gz |
| mv mouse_PU1nuclei_ChIP_EZ_l20211112_TCGGCACC_CCAACGTC_S3_L001_R1_001.fastq.gz mouse_cb6f1j_male_KupfferPU1Nuclei_ChIP_p65_ChowNoFasting_rep1_EZ_TCGGCACC_CCAACGTC_L001_R1_001.fastq.gz |
| mv mouse_PU1nuclei_ChIP_EZ_l20211112_TGACTACT_CCTTACAG_S9_L001_R1_001.fastq.gz mouse_cb6f1j_male_KupfferPU1Nuclei_ChIP_Input_ChowNoFasting_rep1_EZ_TGACTACT_CCTTACAG_L001_R1_001.fastq.gz |
| mv mouse_PU1nuclei_ChIP_EZ_l20211112_TGGTGCAC_GGAATCAC_S6_L001_R1_001.fastq.gz mouse_balbcj_male_KupfferPU1Nuclei_ChIP_p65_ChowNoFasting_rep2_EZ_TGGTGCAC_GGAATCAC_L001_R1_001.fastq.gz |
| mv mouse_PU1nuclei_ChIP_EZ_l20211112_TTCTGGTG_CCAGTGGT_S10_L001_R1_001.fastq.gz mouse_cb6f1j_male_KupfferPU1Nuclei_ChIP_Input_ChowNoFasting_rep2_EZ_TTCTGGTG_CCAGTGGT_L001_R1_001.fastq.gz |
Finished!
sessioninfo::session_info(pkgs = NULL) %>% details::details(summary = 'Current session info', open = TRUE)
Current session info
─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.1.0 (2021-05-18)
os macOS Big Sur 10.16
system x86_64, darwin17.0
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/New_York
date 2021-12-21
─ Packages ───────────────────────────────────────────────────────────────────
package * version date lib source
assertthat 0.2.1 2019-03-21 [2] CRAN (R 4.1.0)
backports 1.2.1 2020-12-09 [2] CRAN (R 4.1.0)
bit 4.0.4 2020-08-04 [1] CRAN (R 4.1.0)
bit64 4.0.5 2020-08-30 [1] CRAN (R 4.1.0)
broom 0.7.9 2021-07-27 [2] CRAN (R 4.1.0)
bslib 0.2.5.1 2021-05-18 [1] CRAN (R 4.1.0)
cellranger 1.1.0 2016-07-27 [2] CRAN (R 4.1.0)
cli 3.0.1 2021-07-17 [2] CRAN (R 4.1.0)
clipr 0.7.1 2020-10-08 [2] CRAN (R 4.1.0)
colorspace 2.0-2 2021-06-24 [2] CRAN (R 4.1.0)
crayon 1.4.1 2021-02-08 [2] CRAN (R 4.1.0)
DBI 1.1.1 2021-01-15 [2] CRAN (R 4.1.0)
dbplyr 2.1.1 2021-04-06 [2] CRAN (R 4.1.0)
desc 1.3.0 2021-03-05 [1] CRAN (R 4.1.0)
details 0.2.1 2020-01-12 [2] CRAN (R 4.1.0)
digest 0.6.27 2020-10-24 [2] CRAN (R 4.1.0)
dplyr * 1.0.7 2021-06-18 [2] CRAN (R 4.1.0)
ellipsis 0.3.2 2021-04-29 [2] CRAN (R 4.1.0)
evaluate 0.14 2019-05-28 [2] CRAN (R 4.1.0)
fansi 0.5.0 2021-05-25 [2] CRAN (R 4.1.0)
fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.1.0)
forcats * 0.5.1 2021-01-27 [2] CRAN (R 4.1.0)
fs 1.5.0 2020-07-31 [2] CRAN (R 4.1.0)
generics 0.1.0 2020-10-31 [2] CRAN (R 4.1.0)
ggplot2 * 3.3.5 2021-06-25 [2] CRAN (R 4.1.0)
glue 1.4.2 2020-08-27 [2] CRAN (R 4.1.0)
gtable 0.3.0 2019-03-25 [2] CRAN (R 4.1.0)
haven 2.4.3 2021-08-04 [1] CRAN (R 4.1.0)
highr 0.9 2021-04-16 [2] CRAN (R 4.1.0)
hms 1.1.0 2021-05-17 [2] CRAN (R 4.1.0)
htmltools 0.5.2 2021-08-25 [2] CRAN (R 4.1.0)
httr 1.4.2 2020-07-20 [2] CRAN (R 4.1.0)
jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.1.0)
jsonlite 1.7.2 2020-12-09 [2] CRAN (R 4.1.0)
knitr 1.33 2021-04-24 [2] CRAN (R 4.1.0)
lifecycle 1.0.0 2021-02-15 [2] CRAN (R 4.1.0)
lubridate 1.7.10 2021-02-26 [2] CRAN (R 4.1.0)
magrittr 2.0.1 2020-11-17 [2] CRAN (R 4.1.0)
modelr 0.1.8 2020-05-19 [2] CRAN (R 4.1.0)
munsell 0.5.0 2018-06-12 [2] CRAN (R 4.1.0)
pillar 1.6.2 2021-07-29 [2] CRAN (R 4.1.0)
pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.1.0)
png 0.1-7 2013-12-03 [2] CRAN (R 4.1.0)
purrr * 0.3.4 2020-04-17 [2] CRAN (R 4.1.0)
R6 2.5.1 2021-08-19 [2] CRAN (R 4.1.0)
Rcpp 1.0.7 2021-07-07 [2] CRAN (R 4.1.0)
readr * 2.1.1 2021-11-30 [1] CRAN (R 4.1.0)
readxl 1.3.1 2019-03-13 [2] CRAN (R 4.1.0)
reprex 2.0.1 2021-08-05 [2] CRAN (R 4.1.0)
rlang 0.4.11 2021-04-30 [2] CRAN (R 4.1.0)
rmarkdown 2.11 2021-09-14 [1] CRAN (R 4.1.0)
rprojroot 2.0.2 2020-11-15 [1] CRAN (R 4.1.0)
rstudioapi 0.13 2020-11-12 [2] CRAN (R 4.1.0)
rvest 1.0.1 2021-07-26 [2] CRAN (R 4.1.0)
sass 0.4.0 2021-05-12 [1] CRAN (R 4.1.0)
scales 1.1.1 2020-05-11 [2] CRAN (R 4.1.0)
sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.1.0)
stringi 1.7.6 2021-11-29 [2] CRAN (R 4.1.0)
stringr * 1.4.0 2019-02-10 [2] CRAN (R 4.1.0)
tibble * 3.1.6 2021-11-07 [2] CRAN (R 4.1.0)
tidyr * 1.1.4 2021-09-27 [2] CRAN (R 4.1.0)
tidyselect 1.1.1 2021-04-30 [2] CRAN (R 4.1.0)
tidyverse * 1.3.1 2021-04-15 [2] CRAN (R 4.1.0)
tzdb 0.1.2 2021-07-20 [2] CRAN (R 4.1.0)
utf8 1.2.2 2021-07-24 [2] CRAN (R 4.1.0)
vctrs 0.3.8 2021-04-29 [2] CRAN (R 4.1.0)
vroom 1.5.7 2021-11-30 [1] CRAN (R 4.1.0)
withr 2.4.2 2021-04-18 [2] CRAN (R 4.1.0)
xfun 0.29 2021-12-14 [1] CRAN (R 4.1.0)
xml2 1.3.2 2020-04-23 [2] CRAN (R 4.1.0)
yaml 2.2.1 2020-02-01 [2] CRAN (R 4.1.0)
[1] /Users/tro3nr/Library/R/x86_64/4.1/library
[2] /Library/Frameworks/R.framework/Versions/4.1/Resources/library