library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Read barcodes

nextflexBarcodes <-
  readxl::read_excel("UDI_Index_Sequences_v21.06_v1-New.xlsx") %>%
  rename(
    barcodeID = 1,
    i7 = 2,
    i5 = 3,
    i5RevComp = 4
  ) %>%
  unite(col = "barcodeSeq", i7, i5, sep = "_") %>%
  select(barcodeID, barcodeSeq) %>%
  mutate(across(barcodeID, str_replace, "UDI0", "NOVA"))

selectedBarcodes <-
  nextflexBarcodes %>%
  filter(
    barcodeID %in% c(
      "NOVA008",
      "NOVA020",
      "NOVA032",
      "NOVA044",
      "NOVA056",
      "NOVA068",
      "NOVA076",
      "NOVA088",
      "NOVA077",
      "NOVA089",
      "NOVA078",
      "NOVA090"
    )
  )

Validate assignment of barcodes

old <-
  read_tsv("oldNames", col_names = F) %>%
  rename("fastqName" = 1) %>%
  separate(
    col = 1,
    into = c(
      "strain",
      "cell",
      "libraryType",
      "initials",
      "ldate",
      "barcodei7",
      "barcodei5",
      "samplesheet",
      "lane",
      "read",
      "extension"
    ),
    remove = FALSE
  ) %>%
  unite(col = "barcodeSeq", barcodei7:barcodei5, sep = "_") %>%
  select(fastqName, barcodeSeq)
## Rows: 12 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): X1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 11 pieces. Additional pieces discarded in 12 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12].
old %>% knitr::kable()
fastqName barcodeSeq
mouse_PU1nuclei_ChIP_EZ_l20211112_CAATCGAA_GCACACGC_S12_L001_R1_001.fastq.gz CAATCGAA_GCACACGC
mouse_PU1nuclei_ChIP_EZ_l20211112_CAGAAGAT_GTACATTG_S11_L001_R1_001.fastq.gz CAGAAGAT_GTACATTG
mouse_PU1nuclei_ChIP_EZ_l20211112_CAGACCAC_GCTTCGGC_S8_L001_R1_001.fastq.gz CAGACCAC_GCTTCGGC
mouse_PU1nuclei_ChIP_EZ_l20211112_CATGATGA_GTTAGAGG_S7_L001_R1_001.fastq.gz CATGATGA_GTTAGAGG
mouse_PU1nuclei_ChIP_EZ_l20211112_CGCGCCAA_TGGCTAGG_S4_L001_R1_001.fastq.gz CGCGCCAA_TGGCTAGG
mouse_PU1nuclei_ChIP_EZ_l20211112_GAAGTCTT_ACTACGGT_S2_L001_R1_001.fastq.gz GAAGTCTT_ACTACGGT
mouse_PU1nuclei_ChIP_EZ_l20211112_GATGCGTC_GAGGCTGC_S1_L001_R1_001.fastq.gz GATGCGTC_GAGGCTGC
mouse_PU1nuclei_ChIP_EZ_l20211112_TCCTCGAT_GTAACTGC_S5_L001_R1_001.fastq.gz TCCTCGAT_GTAACTGC
mouse_PU1nuclei_ChIP_EZ_l20211112_TCGGCACC_CCAACGTC_S3_L001_R1_001.fastq.gz TCGGCACC_CCAACGTC
mouse_PU1nuclei_ChIP_EZ_l20211112_TGACTACT_CCTTACAG_S9_L001_R1_001.fastq.gz TGACTACT_CCTTACAG
mouse_PU1nuclei_ChIP_EZ_l20211112_TGGTGCAC_GGAATCAC_S6_L001_R1_001.fastq.gz TGGTGCAC_GGAATCAC
mouse_PU1nuclei_ChIP_EZ_l20211112_TTCTGGTG_CCAGTGGT_S10_L001_R1_001.fastq.gz TTCTGGTG_CCAGTGGT

Verify that my barcode lists intersect with the barcodes on the fastq.gz file name defined in the variable ‘old’.

nextflexBarcodes %>% slice(8, 20, 32, 44, 56, 68, 76, 88, 77, 89, 78, 90)
intersect(
  nextflexBarcodes %>% slice(8, 20, 32, 44, 56, 68, 76, 88, 77, 89, 78, 90) %>%
    pull(barcodeSeq),
  pull(old, barcodeSeq)
)
##  [1] "GATGCGTC_GAGGCTGC" "GAAGTCTT_ACTACGGT" "TCGGCACC_CCAACGTC"
##  [4] "CGCGCCAA_TGGCTAGG" "TCCTCGAT_GTAACTGC" "TGGTGCAC_GGAATCAC"
##  [7] "CATGATGA_GTTAGAGG" "CAGACCAC_GCTTCGGC" "TGACTACT_CCTTACAG"
## [10] "TTCTGGTG_CCAGTGGT" "CAGAAGAT_GTACATTG" "CAATCGAA_GCACACGC"
intersect(selectedBarcodes %>% pull(barcodeSeq),
          pull(old, barcodeSeq))
##  [1] "GATGCGTC_GAGGCTGC" "GAAGTCTT_ACTACGGT" "TCGGCACC_CCAACGTC"
##  [4] "CGCGCCAA_TGGCTAGG" "TCCTCGAT_GTAACTGC" "TGGTGCAC_GGAATCAC"
##  [7] "CATGATGA_GTTAGAGG" "TGACTACT_CCTTACAG" "CAGAAGAT_GTACATTG"
## [10] "CAGACCAC_GCTTCGGC" "TTCTGGTG_CCAGTGGT" "CAATCGAA_GCACACGC"

Read in desired name linked from the original sample name emailed to me by Enchen

orig_desired <- read_tsv("renameSamples")
## Rows: 12 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): originalName, barcodeID, desiredName
## dbl (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Update names

orig_desired <- read_tsv("renameSamples")
## Rows: 12 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): originalName, barcodeID, desiredName
## dbl (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
updateNames <-
  inner_join(
  selectedBarcodes, orig_desired, by = "barcodeID"
) %>% 
  inner_join(
    old, ., by = "barcodeSeq"
  )

Write bash script for renaming samples

updateNames %>%
  select(fastqName, desiredName)
str_c(
  "mv ",
  pull(updateNames, fastqName),
  " ",
  pull(updateNames, desiredName),
  "_",
  pull(updateNames, barcodeSeq),
  "_L001_R1_001.fastq.gz"
) %>% write_lines("renameScript.sh") %>%
  knitr::kable()
x
mv mouse_PU1nuclei_ChIP_EZ_l20211112_CAATCGAA_GCACACGC_S12_L001_R1_001.fastq.gz mouse_balbcj_male_KupfferPU1Nuclei_ChIP_Input_ChowNoFasting_rep2_EZ_CAATCGAA_GCACACGC_L001_R1_001.fastq.gz
mv mouse_PU1nuclei_ChIP_EZ_l20211112_CAGAAGAT_GTACATTG_S11_L001_R1_001.fastq.gz mouse_balbcj_male_KupfferPU1Nuclei_ChIP_Input_ChowNoFasting_rep1_EZ_CAGAAGAT_GTACATTG_L001_R1_001.fastq.gz
mv mouse_PU1nuclei_ChIP_EZ_l20211112_CAGACCAC_GCTTCGGC_S8_L001_R1_001.fastq.gz mouse_c57bl6j_male_KupfferPU1Nuclei_ChIP_Input_ChowNoFasting_rep2_EZ_CAGACCAC_GCTTCGGC_L001_R1_001.fastq.gz
mv mouse_PU1nuclei_ChIP_EZ_l20211112_CATGATGA_GTTAGAGG_S7_L001_R1_001.fastq.gz mouse_c57bl6j_male_KupfferPU1Nuclei_ChIP_Input_ChowNoFasting_rep1_EZ_CATGATGA_GTTAGAGG_L001_R1_001.fastq.gz
mv mouse_PU1nuclei_ChIP_EZ_l20211112_CGCGCCAA_TGGCTAGG_S4_L001_R1_001.fastq.gz mouse_cb6f1j_male_KupfferPU1Nuclei_ChIP_p65_ChowNoFasting_rep2_EZ_CGCGCCAA_TGGCTAGG_L001_R1_001.fastq.gz
mv mouse_PU1nuclei_ChIP_EZ_l20211112_GAAGTCTT_ACTACGGT_S2_L001_R1_001.fastq.gz mouse_c57bl6j_male_KupfferPU1Nuclei_ChIP_p65_ChowNoFasting_rep2_EZ_GAAGTCTT_ACTACGGT_L001_R1_001.fastq.gz
mv mouse_PU1nuclei_ChIP_EZ_l20211112_GATGCGTC_GAGGCTGC_S1_L001_R1_001.fastq.gz mouse_c57bl6j_male_KupfferPU1Nuclei_ChIP_p65_ChowNoFasting_rep1_EZ_GATGCGTC_GAGGCTGC_L001_R1_001.fastq.gz
mv mouse_PU1nuclei_ChIP_EZ_l20211112_TCCTCGAT_GTAACTGC_S5_L001_R1_001.fastq.gz mouse_balbcj_male_KupfferPU1Nuclei_ChIP_p65_ChowNoFasting_rep1_EZ_TCCTCGAT_GTAACTGC_L001_R1_001.fastq.gz
mv mouse_PU1nuclei_ChIP_EZ_l20211112_TCGGCACC_CCAACGTC_S3_L001_R1_001.fastq.gz mouse_cb6f1j_male_KupfferPU1Nuclei_ChIP_p65_ChowNoFasting_rep1_EZ_TCGGCACC_CCAACGTC_L001_R1_001.fastq.gz
mv mouse_PU1nuclei_ChIP_EZ_l20211112_TGACTACT_CCTTACAG_S9_L001_R1_001.fastq.gz mouse_cb6f1j_male_KupfferPU1Nuclei_ChIP_Input_ChowNoFasting_rep1_EZ_TGACTACT_CCTTACAG_L001_R1_001.fastq.gz
mv mouse_PU1nuclei_ChIP_EZ_l20211112_TGGTGCAC_GGAATCAC_S6_L001_R1_001.fastq.gz mouse_balbcj_male_KupfferPU1Nuclei_ChIP_p65_ChowNoFasting_rep2_EZ_TGGTGCAC_GGAATCAC_L001_R1_001.fastq.gz
mv mouse_PU1nuclei_ChIP_EZ_l20211112_TTCTGGTG_CCAGTGGT_S10_L001_R1_001.fastq.gz mouse_cb6f1j_male_KupfferPU1Nuclei_ChIP_Input_ChowNoFasting_rep2_EZ_TTCTGGTG_CCAGTGGT_L001_R1_001.fastq.gz

Finished!

sessioninfo::session_info(pkgs = NULL) %>% details::details(summary = 'Current session info', open = TRUE)
Current session info

─ Session info ───────────────────────────────────────────────────────────────
 setting  value                       
 version  R version 4.1.0 (2021-05-18)
 os       macOS Big Sur 10.16         
 system   x86_64, darwin17.0          
 ui       X11                         
 language (EN)                        
 collate  en_US.UTF-8                 
 ctype    en_US.UTF-8                 
 tz       America/New_York            
 date     2021-12-21                  

─ Packages ───────────────────────────────────────────────────────────────────
 package     * version date       lib source        
 assertthat    0.2.1   2019-03-21 [2] CRAN (R 4.1.0)
 backports     1.2.1   2020-12-09 [2] CRAN (R 4.1.0)
 bit           4.0.4   2020-08-04 [1] CRAN (R 4.1.0)
 bit64         4.0.5   2020-08-30 [1] CRAN (R 4.1.0)
 broom         0.7.9   2021-07-27 [2] CRAN (R 4.1.0)
 bslib         0.2.5.1 2021-05-18 [1] CRAN (R 4.1.0)
 cellranger    1.1.0   2016-07-27 [2] CRAN (R 4.1.0)
 cli           3.0.1   2021-07-17 [2] CRAN (R 4.1.0)
 clipr         0.7.1   2020-10-08 [2] CRAN (R 4.1.0)
 colorspace    2.0-2   2021-06-24 [2] CRAN (R 4.1.0)
 crayon        1.4.1   2021-02-08 [2] CRAN (R 4.1.0)
 DBI           1.1.1   2021-01-15 [2] CRAN (R 4.1.0)
 dbplyr        2.1.1   2021-04-06 [2] CRAN (R 4.1.0)
 desc          1.3.0   2021-03-05 [1] CRAN (R 4.1.0)
 details       0.2.1   2020-01-12 [2] CRAN (R 4.1.0)
 digest        0.6.27  2020-10-24 [2] CRAN (R 4.1.0)
 dplyr       * 1.0.7   2021-06-18 [2] CRAN (R 4.1.0)
 ellipsis      0.3.2   2021-04-29 [2] CRAN (R 4.1.0)
 evaluate      0.14    2019-05-28 [2] CRAN (R 4.1.0)
 fansi         0.5.0   2021-05-25 [2] CRAN (R 4.1.0)
 fastmap       1.1.0   2021-01-25 [1] CRAN (R 4.1.0)
 forcats     * 0.5.1   2021-01-27 [2] CRAN (R 4.1.0)
 fs            1.5.0   2020-07-31 [2] CRAN (R 4.1.0)
 generics      0.1.0   2020-10-31 [2] CRAN (R 4.1.0)
 ggplot2     * 3.3.5   2021-06-25 [2] CRAN (R 4.1.0)
 glue          1.4.2   2020-08-27 [2] CRAN (R 4.1.0)
 gtable        0.3.0   2019-03-25 [2] CRAN (R 4.1.0)
 haven         2.4.3   2021-08-04 [1] CRAN (R 4.1.0)
 highr         0.9     2021-04-16 [2] CRAN (R 4.1.0)
 hms           1.1.0   2021-05-17 [2] CRAN (R 4.1.0)
 htmltools     0.5.2   2021-08-25 [2] CRAN (R 4.1.0)
 httr          1.4.2   2020-07-20 [2] CRAN (R 4.1.0)
 jquerylib     0.1.4   2021-04-26 [1] CRAN (R 4.1.0)
 jsonlite      1.7.2   2020-12-09 [2] CRAN (R 4.1.0)
 knitr         1.33    2021-04-24 [2] CRAN (R 4.1.0)
 lifecycle     1.0.0   2021-02-15 [2] CRAN (R 4.1.0)
 lubridate     1.7.10  2021-02-26 [2] CRAN (R 4.1.0)
 magrittr      2.0.1   2020-11-17 [2] CRAN (R 4.1.0)
 modelr        0.1.8   2020-05-19 [2] CRAN (R 4.1.0)
 munsell       0.5.0   2018-06-12 [2] CRAN (R 4.1.0)
 pillar        1.6.2   2021-07-29 [2] CRAN (R 4.1.0)
 pkgconfig     2.0.3   2019-09-22 [2] CRAN (R 4.1.0)
 png           0.1-7   2013-12-03 [2] CRAN (R 4.1.0)
 purrr       * 0.3.4   2020-04-17 [2] CRAN (R 4.1.0)
 R6            2.5.1   2021-08-19 [2] CRAN (R 4.1.0)
 Rcpp          1.0.7   2021-07-07 [2] CRAN (R 4.1.0)
 readr       * 2.1.1   2021-11-30 [1] CRAN (R 4.1.0)
 readxl        1.3.1   2019-03-13 [2] CRAN (R 4.1.0)
 reprex        2.0.1   2021-08-05 [2] CRAN (R 4.1.0)
 rlang         0.4.11  2021-04-30 [2] CRAN (R 4.1.0)
 rmarkdown     2.11    2021-09-14 [1] CRAN (R 4.1.0)
 rprojroot     2.0.2   2020-11-15 [1] CRAN (R 4.1.0)
 rstudioapi    0.13    2020-11-12 [2] CRAN (R 4.1.0)
 rvest         1.0.1   2021-07-26 [2] CRAN (R 4.1.0)
 sass          0.4.0   2021-05-12 [1] CRAN (R 4.1.0)
 scales        1.1.1   2020-05-11 [2] CRAN (R 4.1.0)
 sessioninfo   1.1.1   2018-11-05 [1] CRAN (R 4.1.0)
 stringi       1.7.6   2021-11-29 [2] CRAN (R 4.1.0)
 stringr     * 1.4.0   2019-02-10 [2] CRAN (R 4.1.0)
 tibble      * 3.1.6   2021-11-07 [2] CRAN (R 4.1.0)
 tidyr       * 1.1.4   2021-09-27 [2] CRAN (R 4.1.0)
 tidyselect    1.1.1   2021-04-30 [2] CRAN (R 4.1.0)
 tidyverse   * 1.3.1   2021-04-15 [2] CRAN (R 4.1.0)
 tzdb          0.1.2   2021-07-20 [2] CRAN (R 4.1.0)
 utf8          1.2.2   2021-07-24 [2] CRAN (R 4.1.0)
 vctrs         0.3.8   2021-04-29 [2] CRAN (R 4.1.0)
 vroom         1.5.7   2021-11-30 [1] CRAN (R 4.1.0)
 withr         2.4.2   2021-04-18 [2] CRAN (R 4.1.0)
 xfun          0.29    2021-12-14 [1] CRAN (R 4.1.0)
 xml2          1.3.2   2020-04-23 [2] CRAN (R 4.1.0)
 yaml          2.2.1   2020-02-01 [2] CRAN (R 4.1.0)

[1] /Users/tro3nr/Library/R/x86_64/4.1/library
[2] /Library/Frameworks/R.framework/Versions/4.1/Resources/library