Overview: Generate 5000 files per student to test students skill in using: grep, cut, sort, unique, pipes, redirects, wc

Input number of

1) students &

2) working directory

N = 3 # Number of Students

work_dir = "~/Dropbox/cli-test-generator"

Constants #1

CHRS <- append(paste("chr", 1:22, sep = ""), c("chrx", "chry"))
BASES = c("A", "T", "C", "G")
BOOLES = c("TRUE", "FALSE")

Generate MOCK Columnar Data

gen_chr <- function(){
    sample(CHRS, 1)
    } #Col1

gen_start <- function(){
    return(sample(10^6, 1))
    } #Col2

gen_stop <- function(col2_val){
    return(col2_val + sample(10^3, 1))
    } #Col3

gen_codon <- function(){
    v = c(sample(BASES, 1, replace = TRUE),
          sample(BASES, 1, replace = TRUE),
          sample(BASES, 1, replace = TRUE))
    return(paste0(v, collapse = ""))
    } #Col4

gen_bool <- function(){
    sample(BOOLES, 1, replace = TRUE)
    } #Col5

gen_mock_data <- function() {
    col1_val = paste(gen_chr(), sep="")
    col2_val = gen_start()
    col3_val = gen_stop(col2_val)
    col4_val = gen_codon()
    col5_val = gen_bool()
    # addition of more columns be inserted here
    data_line = paste(col1_val, col2_val, col3_val,
                      col4_val, col5_val, sep = ",")
    return(data_line)
}

gen_mock_data()
## [1] "chr1,322148,322752,ATC,TRUE"

Create Header For MOCK Files

DB_IDS <- c("gb", "emb","dbj","pir","prf","sp","pdb","pat","bbs","gnl","ref","lcl")
ACCESSION = c(100, 200, 300, 400)
LATIN = c("[Lehr und Kunst]", "[Illegitimi non carborundum]")

gen_db_id <- function(){
    sample(DB_IDS, 1)
    } #1

gen_accession <- function(){
    sample(ACCESSION, 1)
    } #2

gen_header_line <- function(i) {
    db_id = gen_db_id()
    accession = gen_accession()
    mod2 = (i%%2)+1
    # additional columns can be inserted here
    header_line = paste("<", db_id, "|", accession, "|", LATIN[mod2], sep = "")
    return(header_line)
    } #3

gen_header_line(0)
## [1] "<gnl|200|[Lehr und Kunst]"

Produce MOCK Data Files

Using ‘gen_mock_data’ & ‘gen_header_line’

#dir.create(path = "~/Dropbox/cli-test-generator/test_files/", showWarnings = TRUE)
#setwd("~/Dropbox/cli-test-generator/test_files")

gen_5000_files <- function() {
    # Generate 1000 .csv files with Mock FASTA data
    # insert random number generator for number of files to a student
    for (i in 1:5000) {
        connection <- file(paste("gene_name_", i, ".csv", sep = ""), 'w')
        write.table(x = gen_header_line(i), 
                    file = connection,
                    col.names = FALSE,
                    row.names = FALSE,
                    quote = FALSE,
                    append = TRUE,
                    eol = '\n'
                    )
        write.table(gen_mock_data(), 
                    file = connection,
                    col.names = FALSE,
                    row.names = FALSE,
                    quote = FALSE
                    )
        close(connection)
        #return(mock_file)
    }
} # Gen. mock data

Generate 5000 files

setwd("~/Dropbox/cli-test-generator/test_files")
# Start the clock!
    ptm <- proc.time()
    
    gen_5000_files()
    
    # Stop the clock
    proc.time() - ptm
##    user  system elapsed 
##   7.374   0.378   8.270

Compress the 5000 files using ‘bzip2’ then delete ’*.csv’

setwd("~/Dropbox/cli-test-generator/test_files")
# Start the clock!
    ptm <- proc.time()
    
    a = system("tar -cjf archive.tar.bz *.csv", intern = TRUE)
    a
## character(0)
    b = system("find . -name '*.csv' -type f -delete", intern = TRUE)
    b
## character(0)
    # Stop the clock
    proc.time() - ptm
##    user  system elapsed 
##   0.229   0.109   0.312
# For N students
    
# # Initialize the bzip2 compressed file.
# for (k in 1:N) {
#     
# }
# fileConn<-file("output.txt")
# writeLines(c("Hello","World"), fileConn)
# close(fileConn)

EOF