Title: Produce % AA compositions

Libraries

Libraries = c("readr", "beepr", "stringr", "dplyr", "knitr")

# Install if not present
for(p in Libraries){
    if(!require(p, character.only = T)) { 
        install.packages(p, dependencies = T) 
        }
    library(p, character.only = T)
}

Import Data

setwd("~/Dropbox/Oxy-RF/7_mean_aa_graphic")
#getwd()
seven_classes <- read_csv("seven_class_1372_test_harness_2019-06-11_10-44-46-PM.txt",
                        col_names = FALSE)

Initialization info - Column 1; Protein Class = (“Ctrl”, “Ery”, “Hcy”, “Hhe”, “Hgb”, “Lgb”, “Mgb”) - Column 2; Total AA per protein
- Columns 3:22; aa = (“A”, “C”, “D”, “E”, “F”, “G”, “H”, “I”, “K”, “L”, “M”, “N”, “P”, “Q”, “R”, “S”, “T”, “V”, “W”, “Y”)

protein_class = c(“Ctrl”, “Ery”, “Hcy”, “Hhe”, “Hgb”, “Lgb”, “Mgb”) protein_count = c(700, 20, 31, 77, 486, 13, 45) # Number of proteins per class aa = c(“A”, “C”, “D”, “E”, “F”, “G”, “H”, “I”, “K”, “L”, “M”, “N”, “P”, “Q”, “R”, “S”, “T”, “V”, “W”, “Y”)

Function - Prep empty file

file_name <- "test_harness_1372_TEST_Class_aa.csv"

save_empty_file <- function(file_name) {
    col_titles = t(c("Class", "TotalAA", "A", "C", "D", "E", 
                     "F", "G", "H", "I", "K", "L", "M", "N", 
                     "P", "Q", "R", "S", "T", "V", "W", "Y"))
    write.table(col_titles, 
                file_name,  
                sep = ",", 
                col.names = FALSE, 
                row.names = FALSE, 
                eol = "\n")
}

save_empty_file(file_name)

Produce % AA compositions & save to file

save_aa_info <- function(protein_seq, protein_class) {
    aa_nums = rep(0, times = 22)
    for (col in nchar(protein_seq)) {
        # 1st column is protein_class
        aa_nums[1] = protein_class
        # 2nd column is total count of amino acids
        total_aa = nchar(protein_seq, keepNA = FALSE)
        aa_nums[2] = total_aa
        # Column 3:22 - Calculate percent AA
        aa_nums[3] = str_count(protein_seq, "A") / total_aa
        aa_nums[4] = str_count(protein_seq, "C") / total_aa
        aa_nums[5] = str_count(protein_seq, "D") / total_aa
        aa_nums[6] = str_count(protein_seq, "E") / total_aa
        aa_nums[7] = str_count(protein_seq, "F") / total_aa
        aa_nums[8] = str_count(protein_seq, "G") / total_aa
        aa_nums[9] = str_count(protein_seq, "H") / total_aa
        aa_nums[10] = str_count(protein_seq, "I") / total_aa
        aa_nums[11] = str_count(protein_seq, "K") / total_aa
        aa_nums[12] = str_count(protein_seq, "L") / total_aa
        aa_nums[13] = str_count(protein_seq, "M") / total_aa
        aa_nums[14] = str_count(protein_seq, "N") / total_aa
        aa_nums[15] = str_count(protein_seq, "P") / total_aa
        aa_nums[16] = str_count(protein_seq, "Q") / total_aa
        aa_nums[17] = str_count(protein_seq, "R") / total_aa
        aa_nums[18] = str_count(protein_seq, "S") / total_aa
        aa_nums[19] = str_count(protein_seq, "T") / total_aa
        aa_nums[20] = str_count(protein_seq, "V") / total_aa
        aa_nums[21] = str_count(protein_seq, "W") / total_aa
        aa_nums[22] = str_count(protein_seq, "Y") / total_aa
        aa_nums = t(aa_nums)
        # Write/append vector of AA values
        write.table(aa_nums, 
                    file = file_name,
                    append = TRUE,
                    col.names = FALSE,
                    row.names = FALSE,
                    sep = ",",
                    eol = "\n")
    }
}

Put all together

start_time <- Sys.time() # Start timer

for (i in 1:1372) {
    protein_seq = seven_classes[i,]
    save_aa_info(protein_seq, "test")
}

Sys.time() - start_time  # Display time difference
## Time difference of 2.294043 secs
beep(sound = 1, expr = NULL)

EOF