N = 3 # Number of Students
work_dir = "~/Dropbox/cli-test-generator"
CHRS <- append(paste("chr", 1:22, sep = ""), c("chrx", "chry"))
BASES = c("A", "T", "C", "G")
BOOLES = c("TRUE", "FALSE")
gen_chr <- function(){
sample(CHRS, 1)
} #Col1
gen_start <- function(){
return(sample(10^6, 1))
} #Col2
gen_stop <- function(col2_val){
return(col2_val + sample(10^3, 1))
} #Col3
gen_codon <- function(){
v = c(sample(BASES, 1, replace = TRUE),
sample(BASES, 1, replace = TRUE),
sample(BASES, 1, replace = TRUE))
return(paste0(v, collapse = ""))
} #Col4
gen_bool <- function(){
sample(BOOLES, 1, replace = TRUE)
} #Col5
gen_mock_data <- function() {
col1_val = paste(gen_chr(), sep="")
col2_val = gen_start()
col3_val = gen_stop(col2_val)
col4_val = gen_codon()
col5_val = gen_bool()
# addition of more columns be inserted here
data_line = paste(col1_val, col2_val, col3_val,
col4_val, col5_val, sep = ",")
return(data_line)
}
gen_mock_data()
## [1] "chr1,322148,322752,ATC,TRUE"
DB_IDS <- c("gb", "emb","dbj","pir","prf","sp","pdb","pat","bbs","gnl","ref","lcl")
ACCESSION = c(100, 200, 300, 400)
LATIN = c("[Lehr und Kunst]", "[Illegitimi non carborundum]")
gen_db_id <- function(){
sample(DB_IDS, 1)
} #1
gen_accession <- function(){
sample(ACCESSION, 1)
} #2
gen_header_line <- function(i) {
db_id = gen_db_id()
accession = gen_accession()
mod2 = (i%%2)+1
# additional columns can be inserted here
header_line = paste("<", db_id, "|", accession, "|", LATIN[mod2], sep = "")
return(header_line)
} #3
gen_header_line(0)
## [1] "<gnl|200|[Lehr und Kunst]"
#dir.create(path = "~/Dropbox/cli-test-generator/test_files/", showWarnings = TRUE)
#setwd("~/Dropbox/cli-test-generator/test_files")
gen_5000_files <- function() {
# Generate 1000 .csv files with Mock FASTA data
# insert random number generator for number of files to a student
for (i in 1:5000) {
connection <- file(paste("gene_name_", i, ".csv", sep = ""), 'w')
write.table(x = gen_header_line(i),
file = connection,
col.names = FALSE,
row.names = FALSE,
quote = FALSE,
append = TRUE,
eol = '\n'
)
write.table(gen_mock_data(),
file = connection,
col.names = FALSE,
row.names = FALSE,
quote = FALSE
)
close(connection)
#return(mock_file)
}
} # Gen. mock data
setwd("~/Dropbox/cli-test-generator/test_files")
# Start the clock!
ptm <- proc.time()
gen_5000_files()
# Stop the clock
proc.time() - ptm
## user system elapsed
## 7.374 0.378 8.270
Compress the 5000 files using ‘bzip2’ then delete ’*.csv’
setwd("~/Dropbox/cli-test-generator/test_files")
# Start the clock!
ptm <- proc.time()
a = system("tar -cjf archive.tar.bz *.csv", intern = TRUE)
a
## character(0)
b = system("find . -name '*.csv' -type f -delete", intern = TRUE)
b
## character(0)
# Stop the clock
proc.time() - ptm
## user system elapsed
## 0.229 0.109 0.312
# For N students
# # Initialize the bzip2 compressed file.
# for (k in 1:N) {
#
# }
# fileConn<-file("output.txt")
# writeLines(c("Hello","World"), fileConn)
# close(fileConn)
EOF