This is product offers a workflow to take a few thousand unidentified
sequences and provide a better understanding of what genes are present.
This will be accomplished through using Blast and protein sequenes from
UniProt/Swiss-prot.
A few weeks ago I perfected software installation, so I will not
demonstrate that here. Please see this notebook for more.
Database Creation
Obtain Fasta
(UniProt/Swiss-Prot)
This is from here picur reviewe sequences I named based on the
identify of the database given
## current date and time is April 09, 2023 13:15:42
cd ../data
curl -O https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz
mv uniprot_sprot.fasta.gz uniprot_sprot_r2023_01.fasta.gz
gunzip -k uniprot_sprot_r2023_01.fasta.gz
Making the
database
mkdir ../blastdb
/home/shared/ncbi-blast-2.11.0+/bin/makeblastdb \
-in ../data/uniprot_sprot_r2023_01.fasta \
-dbtype prot \
-out ../blastdb/uniprot_sprot_r2023_01
Getting the query fasta
file
curl https://eagle.fish.washington.edu/cnidarian/Ab_4denovo_CLC6_a.fa \
-k \
> ../data/Ab_4denovo_CLC6_a.fa
Exploring what fasta file
head -3 ../data/Ab_4denovo_CLC6_a.fa
## >solid0078_20110412_FRAG_BC_WHITE_WHITE_F3_QV_SE_trimmed_contig_1
## ACACCCCACCCCAACGCACCCTCACCCCCACCCCAACAATCCATGATTGAATACTTCATC
## TATCCAAGACAAACTCCTCCTACAATCCATGATAGAATTCCTCCAAAAATAATTTCACAC
echo "How many sequences are there?"
grep -c ">" ../data/Ab_4denovo_CLC6_a.fa
## How many sequences are there?
## 5490
# Read FASTA file
fasta_file <- "../data/Ab_4denovo_CLC6_a.fa" # Replace with the name of your FASTA file
sequences <- readDNAStringSet(fasta_file)
# Calculate sequence lengths
sequence_lengths <- width(sequences)
# Create a data frame
sequence_lengths_df <- data.frame(Length = sequence_lengths)
# Plot histogram using ggplot2
ggplot(sequence_lengths_df, aes(x = Length)) +
geom_histogram(binwidth = 1, color = "grey", fill = "blue", alpha = 0.75) +
labs(title = "Histogram of Sequence Lengths",
x = "Sequence Length",
y = "Frequency") +
theme_minimal()

# Read FASTA file
fasta_file <- "../data/Ab_4denovo_CLC6_a.fa"
sequences <- readDNAStringSet(fasta_file)
# Calculate base composition
base_composition <- alphabetFrequency(sequences, baseOnly = TRUE)
# Convert to data frame and reshape for ggplot2
base_composition_df <- as.data.frame(base_composition)
base_composition_df$ID <- rownames(base_composition_df)
base_composition_melted <- reshape2::melt(base_composition_df, id.vars = "ID", variable.name = "Base", value.name = "Count")
# Plot base composition bar chart using ggplot2
ggplot(base_composition_melted, aes(x = Base, y = Count, fill = Base)) +
geom_bar(stat = "identity", position = "dodge", color = "black") +
labs(title = "Base Composition",
x = "Base",
y = "Count") +
theme_minimal() +
scale_fill_manual(values = c("A" = "green", "C" = "blue", "G" = "yellow", "T" = "red"))

# Read FASTA file
fasta_file <- "../data/Ab_4denovo_CLC6_a.fa"
sequences <- readDNAStringSet(fasta_file)
# Count CG motifs in each sequence
count_cg_motifs <- function(sequence) {
cg_motif <- "CG"
return(length(gregexpr(cg_motif, sequence, fixed = TRUE)[[1]]))
}
cg_motifs_counts <- sapply(sequences, count_cg_motifs)
# Create a data frame
cg_motifs_counts_df <- data.frame(CG_Count = cg_motifs_counts)
# Plot CG motifs distribution using ggplot2
ggplot(cg_motifs_counts_df, aes(x = CG_Count)) +
geom_histogram(binwidth = 1, color = "black", fill = "blue", alpha = 0.75) +
labs(title = "Distribution of CG Motifs",
x = "Number of CG Motifs",
y = "Frequency") +
theme_minimal()

Running Blastx
~/applications/ncbi-blast-2.13.0+/bin/blastx \
-query ../data/Ab_4denovo_CLC6_a.fa \
-db ../blastdb/uniprot_sprot_r2023_01 \
-out ../output/Ab_4-uniprot_blastx.tab \
-evalue 1E-20 \
-num_threads 20 \
-max_target_seqs 1 \
-outfmt 6
head -2 ../output/Ab_4-uniprot_blastx.tab
## solid0078_20110412_FRAG_BC_WHITE_WHITE_F3_QV_SE_trimmed_contig_3 sp|O42248|GBLP_DANRE 82.456 171 30 0 1 513 35 205 2.81e-103 301
## solid0078_20110412_FRAG_BC_WHITE_WHITE_F3_QV_SE_trimmed_contig_5 sp|Q08013|SSRG_RAT 75.385 65 16 0 3 197 121 185 1.40e-28 104
echo "Number of lines in output"
wc -l ../output/Ab_4-uniprot_blastx.tab
## Number of lines in output
## 727 ../output/Ab_4-uniprot_blastx.tab
Joining Blast table
with annoations.
Prepping Blast table
for easy join
tr '|' '\t' < ../output/Ab_4-uniprot_blastx.tab \
> ../output/Ab_4-uniprot_blastx_sep.tab
head -1 ../output/Ab_4-uniprot_blastx_sep.tab
## solid0078_20110412_FRAG_BC_WHITE_WHITE_F3_QV_SE_trimmed_contig_3 sp O42248 GBLP_DANRE 82.456 171 30 0 1 513 35 205 2.81e-103 301
Could do some cool
stuff in R here reading in table
bltabl <- read.csv("../output/Ab_4-uniprot_blastx_sep.tab", sep = '\t', header = FALSE)
spgo <- read.csv("https://gannet.fish.washington.edu/seashell/snaps/uniprot_table_r2023_01.tab", sep = '\t', header = TRUE)
datatable(head(bltabl), options = list(scrollX = TRUE, scrollY = "400px", scrollCollapse = TRUE, paging = FALSE))
datatable(head(spgo), options = list(scrollX = TRUE, scrollY = "400px", scrollCollapse = TRUE, paging = FALSE))
datatable(
left_join(bltabl, spgo, by = c("V3" = "Entry")) %>%
select(V1, V3, V13, Protein.names, Organism, Gene.Ontology..biological.process., Gene.Ontology.IDs) %>% mutate(V1 = str_replace_all(V1,
pattern = "solid0078_20110412_FRAG_BC_WHITE_WHITE_F3_QV_SE_trimmed", replacement = "Ab"))
)
annot_tab <-
left_join(bltabl, spgo, by = c("V3" = "Entry")) %>%
select(V1, V3, V13, Protein.names, Organism, Gene.Ontology..biological.process., Gene.Ontology.IDs) %>% mutate(V1 = str_replace_all(V1,
pattern = "solid0078_20110412_FRAG_BC_WHITE_WHITE_F3_QV_SE_trimmed", replacement = "Ab"))
# Read dataset
dataset <- read.csv("../output/blast_annot_go.tab", sep = '\t') # Replace with the path to your dataset
# Select the column of interest
column_name <- "Organism" # Replace with the name of the column of interest
column_data <- dataset[[column_name]]
# Count the occurrences of the strings in the column
string_counts <- table(column_data)
# Convert to a data frame, sort by count, and select the top 10
string_counts_df <- as.data.frame(string_counts)
colnames(string_counts_df) <- c("String", "Count")
string_counts_df <- string_counts_df[order(string_counts_df$Count, decreasing = TRUE), ]
top_10_strings <- head(string_counts_df, n = 10)
# Plot the top 10 most common strings using ggplot2
ggplot(top_10_strings, aes(x = reorder(String, -Count), y = Count, fill = String)) +
geom_bar(stat = "identity", position = "dodge", color = "black") +
labs(title = "Top 10 Species hits",
x = column_name,
y = "Count") +
theme_minimal() +
theme(legend.position = "none") +
coord_flip()

LS0tCnRpdGxlOiAiT2ggV2hhdCBhIEJsYXN0ISIKYXV0aG9yOiBTdGV2ZW4gUm9iZXJ0cwpkYXRlOiAiYHIgZm9ybWF0KFN5cy50aW1lKCksICclZCAlQiwgJVknKWAiICAKb3V0cHV0OiAKICBodG1sX2RvY3VtZW50OgogICAgdGhlbWU6IHJlYWRhYmxlCiAgICBoaWdobGlnaHQ6IHplbmJ1cm4KICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgIGNvZGVfZm9sZGluZzogc2hvdwogICAgY29kZV9kb3dubG9hZDogdHJ1ZQotLS0KCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQpsaWJyYXJ5KGtuaXRyKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShrYWJsZUV4dHJhKQpsaWJyYXJ5KERUKQpsaWJyYXJ5KEJpb3N0cmluZ3MpCmtuaXRyOjpvcHRzX2NodW5rJHNldCgKICBlY2hvID0gVFJVRSwgICAgICAgICAjIERpc3BsYXkgY29kZSBjaHVua3MKICBldmFsID0gRkFMU0UsICAgICAgICAgIyBFdmFsdWF0ZSBjb2RlIGNodW5rcwogIHdhcm5pbmcgPSBGQUxTRSwgICAgICMgSGlkZSB3YXJuaW5ncwogIG1lc3NhZ2UgPSBGQUxTRSwgICAgICMgSGlkZSBtZXNzYWdlcwogIGZpZy53aWR0aCA9IDYsICAgICAgICMgU2V0IHBsb3Qgd2lkdGggaW4gaW5jaGVzCiAgZmlnLmhlaWdodCA9IDQsICAgICAgIyBTZXQgcGxvdCBoZWlnaHQgaW4gaW5jaGVzCiAgZmlnLmFsaWduID0gImNlbnRlciIgIyBBbGlnbiBwbG90cyB0byB0aGUgY2VudGVyCikKYGBgCgpgYGB7ciBjbGlwYm9hcmQtanMsIGV2YWw9VFJVRSwgaW5jbHVkZT1GQUxTRX0KY2F0KCcKPCEtLSBMb2FkIGNsaXBib2FyZC5qcyBsaWJyYXJ5IC0tPgo8c2NyaXB0IHNyYz0iaHR0cHM6Ly9jZG5qcy5jbG91ZGZsYXJlLmNvbS9hamF4L2xpYnMvY2xpcGJvYXJkLmpzLzIuMC44L2NsaXBib2FyZC5taW4uanMiPjwvc2NyaXB0PgoKPCEtLSBKYXZhU2NyaXB0IGNvZGUgdG8gZW5hYmxlIHRoZSBjb3B5LXRvLWNsaXBib2FyZCBmdW5jdGlvbmFsaXR5IC0tPgo8c2NyaXB0Pgpkb2N1bWVudC5hZGRFdmVudExpc3RlbmVyKCJET01Db250ZW50TG9hZGVkIiwgZnVuY3Rpb24oKSB7CiAgdmFyIGNvZGVCbG9ja3MgPSBkb2N1bWVudC5xdWVyeVNlbGVjdG9yQWxsKCJwcmUgY29kZSIpOwogIGNvZGVCbG9ja3MuZm9yRWFjaChmdW5jdGlvbihjb2RlQmxvY2ssIGkpIHsKICAgIHZhciBjb3B5SWQgPSAiY29weS0iICsgaTsKICAgIHZhciBjb3B5QnV0dG9uID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgiYnV0dG9uIik7CiAgICBjb3B5QnV0dG9uLmlubmVySFRNTCA9ICJDb3B5IjsKICAgIGNvcHlCdXR0b24uaWQgPSBjb3B5SWQ7CiAgICBjb3B5QnV0dG9uLnN0eWxlLnBvc2l0aW9uID0gImFic29sdXRlIjsKICAgIGNvcHlCdXR0b24uc3R5bGUudG9wID0gIjAiOwogICAgY29weUJ1dHRvbi5zdHlsZS5yaWdodCA9ICIwIjsKICAgIGNvZGVCbG9jay5wYXJlbnROb2RlLmluc2VydEJlZm9yZShjb3B5QnV0dG9uLCBjb2RlQmxvY2spOwogICAgbmV3IENsaXBib2FyZEpTKCIjIiArIGNvcHlJZCwgewogICAgICB0YXJnZXQ6IGZ1bmN0aW9uKCkgewogICAgICAgIHJldHVybiBjb2RlQmxvY2s7CiAgICAgIH0KICAgIH0pOwogIH0pOwp9KTsKPC9zY3JpcHQ+CicsIHNlcCA9ICdcbicpCmBgYAoKClRoaXMgaXMgcHJvZHVjdCBvZmZlcnMgYSB3b3JrZmxvdyB0byB0YWtlIGEgZmV3IHRob3VzYW5kIHVuaWRlbnRpZmllZCBzZXF1ZW5jZXMgYW5kIHByb3ZpZGUgYSBiZXR0ZXIgdW5kZXJzdGFuZGluZyBvZiB3aGF0IGdlbmVzIGFyZSBwcmVzZW50LiBUaGlzIHdpbGwgYmUgYWNjb21wbGlzaGVkIHRocm91Z2ggdXNpbmcgQmxhc3QgYW5kIHByb3RlaW4gc2VxdWVuZXMgZnJvbSBVbmlQcm90L1N3aXNzLXByb3QuCgoKCi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQoKQSBmZXcgd2Vla3MgYWdvIEkgcGVyZmVjdGVkIHNvZnR3YXJlIGluc3RhbGxhdGlvbiwgc28gSSB3aWxsIG5vdCBkZW1vbnN0cmF0ZSB0aGF0IGhlcmUuIFBsZWFzZSBzZWUgdGhpcyBub3RlYm9vayBmb3IgbW9yZS4KCiMgRGF0YWJhc2UgQ3JlYXRpb24KCiMjIE9idGFpbiBGYXN0YSAoVW5pUHJvdC9Td2lzcy1Qcm90KQoKVGhpcyBpcyBmcm9tIGhlcmUgcGljdXIgcmV2aWV3ZSBzZXF1ZW5jZXMgSSBuYW1lZCBiYXNlZCBvbiB0aGUgaWRlbnRpZnkgb2YgdGhlIGRhdGFiYXNlIGdpdmVuCgpgYGB7ciB0aW1lLCBldmFsPVRSVUUsIGVjaG89RkFMU0V9CmN1cnJlbnRfdGltZSA8LSBmb3JtYXQoU3lzLnRpbWUoKSwgIiVCICVkLCAlWSAlSDolTTolUyIpCmNhdCgiY3VycmVudCBkYXRlIGFuZCB0aW1lIGlzICIsIGN1cnJlbnRfdGltZSkKYGBgCgpgYGB7ciBkb3dubG9hZC1kYXRhLCBlbmdpbmU9J2Jhc2gnfQpjZCAuLi9kYXRhCmN1cmwgLU8gaHR0cHM6Ly9mdHAudW5pcHJvdC5vcmcvcHViL2RhdGFiYXNlcy91bmlwcm90L2N1cnJlbnRfcmVsZWFzZS9rbm93bGVkZ2ViYXNlL2NvbXBsZXRlL3VuaXByb3Rfc3Byb3QuZmFzdGEuZ3oKbXYgdW5pcHJvdF9zcHJvdC5mYXN0YS5neiB1bmlwcm90X3Nwcm90X3IyMDIzXzAxLmZhc3RhLmd6Cmd1bnppcCAtayB1bmlwcm90X3Nwcm90X3IyMDIzXzAxLmZhc3RhLmd6CmBgYAoKIyMgTWFraW5nIHRoZSBkYXRhYmFzZQoKYGBge3IgbWFrZS1ibGFzdGRiLCBlbmdpbmU9J2Jhc2gnfQpta2RpciAuLi9ibGFzdGRiCi9ob21lL3NoYXJlZC9uY2JpLWJsYXN0LTIuMTEuMCsvYmluL21ha2VibGFzdGRiIFwKLWluIC4uL2RhdGEvdW5pcHJvdF9zcHJvdF9yMjAyM18wMS5mYXN0YSBcCi1kYnR5cGUgcHJvdCBcCi1vdXQgLi4vYmxhc3RkYi91bmlwcm90X3Nwcm90X3IyMDIzXzAxCmBgYAoKIyBHZXR0aW5nIHRoZSBxdWVyeSBmYXN0YSBmaWxlCgpgYGB7ciBkb3dubG9hZC1xdWVyeSwgZW5naW5lPSdiYXNoJ30KY3VybCBodHRwczovL2VhZ2xlLmZpc2gud2FzaGluZ3Rvbi5lZHUvY25pZGFyaWFuL0FiXzRkZW5vdm9fQ0xDNl9hLmZhIFwKLWsgXAo+IC4uL2RhdGEvQWJfNGRlbm92b19DTEM2X2EuZmEKYGBgCgpFeHBsb3Jpbmcgd2hhdCBmYXN0YSBmaWxlCgpgYGB7ciB2aWV3LXF1ZXJ5LCBlbmdpbmU9J2Jhc2gnLCBldmFsPVRSVUV9CmhlYWQgLTMgLi4vZGF0YS9BYl80ZGVub3ZvX0NMQzZfYS5mYQpgYGAKCmBgYHtyIHZpZXcyLXF1ZXJ5LCBlbmdpbmU9J2Jhc2gnLCBldmFsPVRSVUV9CmVjaG8gIkhvdyBtYW55IHNlcXVlbmNlcyBhcmUgdGhlcmU/IgpncmVwIC1jICI+IiAuLi9kYXRhL0FiXzRkZW5vdm9fQ0xDNl9hLmZhCmBgYAoKYGBge3IgaGlzdG9ncmFtLCBldmFsPVRSVUV9CiMgUmVhZCBGQVNUQSBmaWxlCmZhc3RhX2ZpbGUgPC0gIi4uL2RhdGEvQWJfNGRlbm92b19DTEM2X2EuZmEiICAjIFJlcGxhY2Ugd2l0aCB0aGUgbmFtZSBvZiB5b3VyIEZBU1RBIGZpbGUKc2VxdWVuY2VzIDwtIHJlYWRETkFTdHJpbmdTZXQoZmFzdGFfZmlsZSkKCiMgQ2FsY3VsYXRlIHNlcXVlbmNlIGxlbmd0aHMKc2VxdWVuY2VfbGVuZ3RocyA8LSB3aWR0aChzZXF1ZW5jZXMpCgojIENyZWF0ZSBhIGRhdGEgZnJhbWUKc2VxdWVuY2VfbGVuZ3Roc19kZiA8LSBkYXRhLmZyYW1lKExlbmd0aCA9IHNlcXVlbmNlX2xlbmd0aHMpCgojIFBsb3QgaGlzdG9ncmFtIHVzaW5nIGdncGxvdDIKZ2dwbG90KHNlcXVlbmNlX2xlbmd0aHNfZGYsIGFlcyh4ID0gTGVuZ3RoKSkgKwogIGdlb21faGlzdG9ncmFtKGJpbndpZHRoID0gMSwgY29sb3IgPSAiZ3JleSIsIGZpbGwgPSAiYmx1ZSIsIGFscGhhID0gMC43NSkgKwogIGxhYnModGl0bGUgPSAiSGlzdG9ncmFtIG9mIFNlcXVlbmNlIExlbmd0aHMiLAogICAgICAgeCA9ICJTZXF1ZW5jZSBMZW5ndGgiLAogICAgICAgeSA9ICJGcmVxdWVuY3kiKSArCiAgdGhlbWVfbWluaW1hbCgpCmBgYAoKYGBge3IgQUNHVCwgZXZhbD1UUlVFfQoKIyBSZWFkIEZBU1RBIGZpbGUKZmFzdGFfZmlsZSA8LSAiLi4vZGF0YS9BYl80ZGVub3ZvX0NMQzZfYS5mYSIKc2VxdWVuY2VzIDwtIHJlYWRETkFTdHJpbmdTZXQoZmFzdGFfZmlsZSkKCiMgQ2FsY3VsYXRlIGJhc2UgY29tcG9zaXRpb24KYmFzZV9jb21wb3NpdGlvbiA8LSBhbHBoYWJldEZyZXF1ZW5jeShzZXF1ZW5jZXMsIGJhc2VPbmx5ID0gVFJVRSkKCiMgQ29udmVydCB0byBkYXRhIGZyYW1lIGFuZCByZXNoYXBlIGZvciBnZ3Bsb3QyCmJhc2VfY29tcG9zaXRpb25fZGYgPC0gYXMuZGF0YS5mcmFtZShiYXNlX2NvbXBvc2l0aW9uKQpiYXNlX2NvbXBvc2l0aW9uX2RmJElEIDwtIHJvd25hbWVzKGJhc2VfY29tcG9zaXRpb25fZGYpCmJhc2VfY29tcG9zaXRpb25fbWVsdGVkIDwtIHJlc2hhcGUyOjptZWx0KGJhc2VfY29tcG9zaXRpb25fZGYsIGlkLnZhcnMgPSAiSUQiLCB2YXJpYWJsZS5uYW1lID0gIkJhc2UiLCB2YWx1ZS5uYW1lID0gIkNvdW50IikKCiMgUGxvdCBiYXNlIGNvbXBvc2l0aW9uIGJhciBjaGFydCB1c2luZyBnZ3Bsb3QyCmdncGxvdChiYXNlX2NvbXBvc2l0aW9uX21lbHRlZCwgYWVzKHggPSBCYXNlLCB5ID0gQ291bnQsIGZpbGwgPSBCYXNlKSkgKwogIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiLCBwb3NpdGlvbiA9ICJkb2RnZSIsIGNvbG9yID0gImJsYWNrIikgKwogIGxhYnModGl0bGUgPSAiQmFzZSBDb21wb3NpdGlvbiIsCiAgICAgICB4ID0gIkJhc2UiLAogICAgICAgeSA9ICJDb3VudCIpICsKICB0aGVtZV9taW5pbWFsKCkgKwogIHNjYWxlX2ZpbGxfbWFudWFsKHZhbHVlcyA9IGMoIkEiID0gImdyZWVuIiwgIkMiID0gImJsdWUiLCAiRyIgPSAieWVsbG93IiwgIlQiID0gInJlZCIpKQpgYGAKCgpgYGB7ciBjZywgZXZhbD1UUlVFfQojIFJlYWQgRkFTVEEgZmlsZQpmYXN0YV9maWxlIDwtICIuLi9kYXRhL0FiXzRkZW5vdm9fQ0xDNl9hLmZhIgpzZXF1ZW5jZXMgPC0gcmVhZEROQVN0cmluZ1NldChmYXN0YV9maWxlKQoKIyBDb3VudCBDRyBtb3RpZnMgaW4gZWFjaCBzZXF1ZW5jZQpjb3VudF9jZ19tb3RpZnMgPC0gZnVuY3Rpb24oc2VxdWVuY2UpIHsKICBjZ19tb3RpZiA8LSAiQ0ciCiAgcmV0dXJuKGxlbmd0aChncmVnZXhwcihjZ19tb3RpZiwgc2VxdWVuY2UsIGZpeGVkID0gVFJVRSlbWzFdXSkpCn0KCmNnX21vdGlmc19jb3VudHMgPC0gc2FwcGx5KHNlcXVlbmNlcywgY291bnRfY2dfbW90aWZzKQoKIyBDcmVhdGUgYSBkYXRhIGZyYW1lCmNnX21vdGlmc19jb3VudHNfZGYgPC0gZGF0YS5mcmFtZShDR19Db3VudCA9IGNnX21vdGlmc19jb3VudHMpCgojIFBsb3QgQ0cgbW90aWZzIGRpc3RyaWJ1dGlvbiB1c2luZyBnZ3Bsb3QyCmdncGxvdChjZ19tb3RpZnNfY291bnRzX2RmLCBhZXMoeCA9IENHX0NvdW50KSkgKwogIGdlb21faGlzdG9ncmFtKGJpbndpZHRoID0gMSwgY29sb3IgPSAiYmxhY2siLCBmaWxsID0gImJsdWUiLCBhbHBoYSA9IDAuNzUpICsKICBsYWJzKHRpdGxlID0gIkRpc3RyaWJ1dGlvbiBvZiBDRyBNb3RpZnMiLAogICAgICAgeCA9ICJOdW1iZXIgb2YgQ0cgTW90aWZzIiwKICAgICAgIHkgPSAiRnJlcXVlbmN5IikgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCgojIFJ1bm5pbmcgQmxhc3R4CgpgYGB7ciBibGFzdHgsIGVuZ2luZT0nYmFzaCd9Cn4vYXBwbGljYXRpb25zL25jYmktYmxhc3QtMi4xMy4wKy9iaW4vYmxhc3R4IFwKLXF1ZXJ5IC4uL2RhdGEvQWJfNGRlbm92b19DTEM2X2EuZmEgXAotZGIgLi4vYmxhc3RkYi91bmlwcm90X3Nwcm90X3IyMDIzXzAxIFwKLW91dCAuLi9vdXRwdXQvQWJfNC11bmlwcm90X2JsYXN0eC50YWIgXAotZXZhbHVlIDFFLTIwIFwKLW51bV90aHJlYWRzIDIwIFwKLW1heF90YXJnZXRfc2VxcyAxIFwKLW91dGZtdCA2CmBgYAoKYGBge3IgYmxhc3QtbG9vaywgZW5naW5lPSdiYXNoJywgZXZhbD1UUlVFfQpoZWFkIC0yIC4uL291dHB1dC9BYl80LXVuaXByb3RfYmxhc3R4LnRhYgpgYGAKCmBgYHtyIGJsYXN0LWxvb2syLCBlbmdpbmU9J2Jhc2gnLCBldmFsPVRSVUV9CmVjaG8gIk51bWJlciBvZiBsaW5lcyBpbiBvdXRwdXQiCndjIC1sIC4uL291dHB1dC9BYl80LXVuaXByb3RfYmxhc3R4LnRhYgpgYGAKCgoKCiMgSm9pbmluZyBCbGFzdCB0YWJsZSB3aXRoIGFubm9hdGlvbnMuCgojIyBQcmVwcGluZyBCbGFzdCB0YWJsZSBmb3IgZWFzeSBqb2luCgpgYGB7ciBzZXBhcmF0ZSwgZW5naW5lPSdiYXNoJywgZXZhbD1UUlVFfQp0ciAnfCcgJ1x0JyA8IC4uL291dHB1dC9BYl80LXVuaXByb3RfYmxhc3R4LnRhYiBcCj4gLi4vb3V0cHV0L0FiXzQtdW5pcHJvdF9ibGFzdHhfc2VwLnRhYgoKaGVhZCAtMSAuLi9vdXRwdXQvQWJfNC11bmlwcm90X2JsYXN0eF9zZXAudGFiCgpgYGAKCiMjIENvdWxkIGRvIHNvbWUgY29vbCBzdHVmZiBpbiBSIGhlcmUgcmVhZGluZyBpbiB0YWJsZQoKYGBge3IgcmVhZC1kYXRhLCBldmFsPVRSVUUsIGNhY2hlPVRSVUV9CmJsdGFibCA8LSByZWFkLmNzdigiLi4vb3V0cHV0L0FiXzQtdW5pcHJvdF9ibGFzdHhfc2VwLnRhYiIsIHNlcCA9ICdcdCcsIGhlYWRlciA9IEZBTFNFKQoKc3BnbyA8LSByZWFkLmNzdigiaHR0cHM6Ly9nYW5uZXQuZmlzaC53YXNoaW5ndG9uLmVkdS9zZWFzaGVsbC9zbmFwcy91bmlwcm90X3RhYmxlX3IyMDIzXzAxLnRhYiIsIHNlcCA9ICdcdCcsIGhlYWRlciA9IFRSVUUpCmBgYAoKYGBge3IsIGV2YWw9VFJVRX0KZGF0YXRhYmxlKGhlYWQoYmx0YWJsKSwgb3B0aW9ucyA9IGxpc3Qoc2Nyb2xsWCA9IFRSVUUsIHNjcm9sbFkgPSAiNDAwcHgiLCBzY3JvbGxDb2xsYXBzZSA9IFRSVUUsIHBhZ2luZyA9IEZBTFNFKSkKYGBgCgpgYGB7ciBzcGdvLXRhYmxlLCBldmFsPVRSVUV9CmRhdGF0YWJsZShoZWFkKHNwZ28pLCBvcHRpb25zID0gbGlzdChzY3JvbGxYID0gVFJVRSwgc2Nyb2xsWSA9ICI0MDBweCIsIHNjcm9sbENvbGxhcHNlID0gVFJVRSwgcGFnaW5nID0gRkFMU0UpKQpgYGAKCmBgYHtyIHNlZSwgZXZhbD1UUlVFfQpkYXRhdGFibGUoCiAgbGVmdF9qb2luKGJsdGFibCwgc3BnbywgIGJ5ID0gYygiVjMiID0gIkVudHJ5IikpICU+JQogIHNlbGVjdChWMSwgVjMsIFYxMywgUHJvdGVpbi5uYW1lcywgT3JnYW5pc20sIEdlbmUuT250b2xvZ3kuLmJpb2xvZ2ljYWwucHJvY2Vzcy4sIEdlbmUuT250b2xvZ3kuSURzKSAlPiUgbXV0YXRlKFYxID0gc3RyX3JlcGxhY2VfYWxsKFYxLCAKICAgICAgICAgICAgcGF0dGVybiA9ICJzb2xpZDAwNzhfMjAxMTA0MTJfRlJBR19CQ19XSElURV9XSElURV9GM19RVl9TRV90cmltbWVkIiwgcmVwbGFjZW1lbnQgPSAiQWIiKSkKKQpgYGAKCmBgYHtyIGpvaW59CmFubm90X3RhYiA8LQogIGxlZnRfam9pbihibHRhYmwsIHNwZ28sICBieSA9IGMoIlYzIiA9ICJFbnRyeSIpKSAlPiUKICBzZWxlY3QoVjEsIFYzLCBWMTMsIFByb3RlaW4ubmFtZXMsIE9yZ2FuaXNtLCBHZW5lLk9udG9sb2d5Li5iaW9sb2dpY2FsLnByb2Nlc3MuLCBHZW5lLk9udG9sb2d5LklEcykgJT4lIG11dGF0ZShWMSA9IHN0cl9yZXBsYWNlX2FsbChWMSwgCiAgICAgICAgICAgIHBhdHRlcm4gPSAic29saWQwMDc4XzIwMTEwNDEyX0ZSQUdfQkNfV0hJVEVfV0hJVEVfRjNfUVZfU0VfdHJpbW1lZCIsIHJlcGxhY2VtZW50ID0gIkFiIikpCmBgYAoKCmBgYHtyLCBldmFsPVRSVUV9CiMgUmVhZCBkYXRhc2V0CmRhdGFzZXQgPC0gcmVhZC5jc3YoIi4uL291dHB1dC9ibGFzdF9hbm5vdF9nby50YWIiLCBzZXAgPSAnXHQnKSAgIyBSZXBsYWNlIHdpdGggdGhlIHBhdGggdG8geW91ciBkYXRhc2V0CgojIFNlbGVjdCB0aGUgY29sdW1uIG9mIGludGVyZXN0CmNvbHVtbl9uYW1lIDwtICJPcmdhbmlzbSIgICMgUmVwbGFjZSB3aXRoIHRoZSBuYW1lIG9mIHRoZSBjb2x1bW4gb2YgaW50ZXJlc3QKY29sdW1uX2RhdGEgPC0gZGF0YXNldFtbY29sdW1uX25hbWVdXQoKIyBDb3VudCB0aGUgb2NjdXJyZW5jZXMgb2YgdGhlIHN0cmluZ3MgaW4gdGhlIGNvbHVtbgpzdHJpbmdfY291bnRzIDwtIHRhYmxlKGNvbHVtbl9kYXRhKQoKIyBDb252ZXJ0IHRvIGEgZGF0YSBmcmFtZSwgc29ydCBieSBjb3VudCwgYW5kIHNlbGVjdCB0aGUgdG9wIDEwCnN0cmluZ19jb3VudHNfZGYgPC0gYXMuZGF0YS5mcmFtZShzdHJpbmdfY291bnRzKQpjb2xuYW1lcyhzdHJpbmdfY291bnRzX2RmKSA8LSBjKCJTdHJpbmciLCAiQ291bnQiKQpzdHJpbmdfY291bnRzX2RmIDwtIHN0cmluZ19jb3VudHNfZGZbb3JkZXIoc3RyaW5nX2NvdW50c19kZiRDb3VudCwgZGVjcmVhc2luZyA9IFRSVUUpLCBdCnRvcF8xMF9zdHJpbmdzIDwtIGhlYWQoc3RyaW5nX2NvdW50c19kZiwgbiA9IDEwKQoKIyBQbG90IHRoZSB0b3AgMTAgbW9zdCBjb21tb24gc3RyaW5ncyB1c2luZyBnZ3Bsb3QyCmdncGxvdCh0b3BfMTBfc3RyaW5ncywgYWVzKHggPSByZW9yZGVyKFN0cmluZywgLUNvdW50KSwgeSA9IENvdW50LCBmaWxsID0gU3RyaW5nKSkgKwogIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiLCBwb3NpdGlvbiA9ICJkb2RnZSIsIGNvbG9yID0gImJsYWNrIikgKwogIGxhYnModGl0bGUgPSAiVG9wIDEwIFNwZWNpZXMgaGl0cyIsCiAgICAgICB4ID0gY29sdW1uX25hbWUsCiAgICAgICB5ID0gIkNvdW50IikgKwogIHRoZW1lX21pbmltYWwoKSArCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gIm5vbmUiKSArCiAgY29vcmRfZmxpcCgpCmBgYAoKCgoKCiMgRm9vdG5vdGUKCldoYXQgeW91ICBzZWUgdmVyc3VzIHdoYXQgaXMgaW4gUm1kCgpXaGF0IHRoZSBSbWQgbG9va3MgbGlrZS4uLgoKYGBge3IgcmVhbCwgZWNobyA9IEZBTFNFLCBldmFsPVRSVUUsIGZpZy5hbGlnbiA9ICJsZWZ0In0Ka25pdHI6OmluY2x1ZGVfZ3JhcGhpY3MoImltZy9jb2RlMi5wbmciKQpgYGAKCgoKaXQgcG9zdCBrbml0Li4uCgpgYGB7ciB2aWV3LXF1ZXJ5LiwgZW5naW5lPSdiYXNoJywgZXZhbD1UUlVFfQpoZWFkIC0zIC4uL2RhdGEvQWJfNGRlbm92b19DTEM2X2EuZmEKYGBg