Libraries
Libraries = c("readr", "beepr", "stringr", "dplyr", "knitr")
# Install if not present
for(p in Libraries){
if(!require(p, character.only = T)) {
install.packages(p, dependencies = T)
}
library(p, character.only = T)
}
Import Data
setwd("~/Dropbox/Oxy-RF/7_mean_aa_graphic")
#getwd()
seven_classes <- read_csv("seven_class_1372_test_harness_2019-06-11_10-44-46-PM.txt",
col_names = FALSE)
Initialization info - Column 1; Protein Class = (“Ctrl”, “Ery”, “Hcy”, “Hhe”, “Hgb”, “Lgb”, “Mgb”) - Column 2; Total AA per protein
- Columns 3:22; aa = (“A”, “C”, “D”, “E”, “F”, “G”, “H”, “I”, “K”, “L”, “M”, “N”, “P”, “Q”, “R”, “S”, “T”, “V”, “W”, “Y”)
protein_class = c(“Ctrl”, “Ery”, “Hcy”, “Hhe”, “Hgb”, “Lgb”, “Mgb”) protein_count = c(700, 20, 31, 77, 486, 13, 45) # Number of proteins per class aa = c(“A”, “C”, “D”, “E”, “F”, “G”, “H”, “I”, “K”, “L”, “M”, “N”, “P”, “Q”, “R”, “S”, “T”, “V”, “W”, “Y”)
Function - Prep empty file
file_name <- "test_harness_1372_TEST_Class_aa.csv"
save_empty_file <- function(file_name) {
col_titles = t(c("Class", "TotalAA", "A", "C", "D", "E",
"F", "G", "H", "I", "K", "L", "M", "N",
"P", "Q", "R", "S", "T", "V", "W", "Y"))
write.table(col_titles,
file_name,
sep = ",",
col.names = FALSE,
row.names = FALSE,
eol = "\n")
}
save_empty_file(file_name)
Produce % AA compositions & save to file
save_aa_info <- function(protein_seq, protein_class) {
aa_nums = rep(0, times = 22)
for (col in nchar(protein_seq)) {
# 1st column is protein_class
aa_nums[1] = protein_class
# 2nd column is total count of amino acids
total_aa = nchar(protein_seq, keepNA = FALSE)
aa_nums[2] = total_aa
# Column 3:22 - Calculate percent AA
aa_nums[3] = str_count(protein_seq, "A") / total_aa
aa_nums[4] = str_count(protein_seq, "C") / total_aa
aa_nums[5] = str_count(protein_seq, "D") / total_aa
aa_nums[6] = str_count(protein_seq, "E") / total_aa
aa_nums[7] = str_count(protein_seq, "F") / total_aa
aa_nums[8] = str_count(protein_seq, "G") / total_aa
aa_nums[9] = str_count(protein_seq, "H") / total_aa
aa_nums[10] = str_count(protein_seq, "I") / total_aa
aa_nums[11] = str_count(protein_seq, "K") / total_aa
aa_nums[12] = str_count(protein_seq, "L") / total_aa
aa_nums[13] = str_count(protein_seq, "M") / total_aa
aa_nums[14] = str_count(protein_seq, "N") / total_aa
aa_nums[15] = str_count(protein_seq, "P") / total_aa
aa_nums[16] = str_count(protein_seq, "Q") / total_aa
aa_nums[17] = str_count(protein_seq, "R") / total_aa
aa_nums[18] = str_count(protein_seq, "S") / total_aa
aa_nums[19] = str_count(protein_seq, "T") / total_aa
aa_nums[20] = str_count(protein_seq, "V") / total_aa
aa_nums[21] = str_count(protein_seq, "W") / total_aa
aa_nums[22] = str_count(protein_seq, "Y") / total_aa
aa_nums = t(aa_nums)
# Write/append vector of AA values
write.table(aa_nums,
file = file_name,
append = TRUE,
col.names = FALSE,
row.names = FALSE,
sep = ",",
eol = "\n")
}
}
Put all together
start_time <- Sys.time() # Start timer
for (i in 1:1372) {
protein_seq = seven_classes[i,]
save_aa_info(protein_seq, "test")
}
Sys.time() - start_time # Display time difference
## Time difference of 2.294043 secs
beep(sound = 1, expr = NULL)
EOF