/Applications/bioinfo/ncbi-blast-2.13.0+/bin/blastx -hcd ../output
curl -O https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz
mv uniprot_sprot.fasta.gz uniprot_sprot_r2023_01.fasta.gz
gunzip -k uniprot_sprot_r2023_01.fasta.gz
ls ../output/Applications/bioinfo/ncbi-blast-2.13.0+/bin/makeblastdb \
-in ../output/uniprot_sprot_r2023_01.fasta \
-dbtype prot \
-out ../blastdb/uniprot_sprot_r2023_01curl https://eagle.fish.washington.edu/cnidarian/Ab_4denovo_CLC6_a.fa \
-k \
> ../output/Ab_4denovo_CLC6_a.fahead ../output/Ab_4denovo_CLC6_a.fa
echo "How many sequences are there?"
grep -c ">" ../output/Ab_4denovo_CLC6_a.fa/Applications/bioinfo/ncbi-blast-2.13.0+/bin/blastx \
-query ../output/Ab_4denovo_CLC6_a.fa \
-db ../blastdb/uniprot_sprot_r2023_01 \
-out ../output/Ab_4-uniprot_blastx.tab \
-evalue 1E-20 \
-num_threads 20 \
-max_target_seqs 1 \
-outfmt 6head -2 ../output/Ab_4-uniprot_blastx.tab
wc -l ../output/Ab_4-uniprot_blastx.tabcurl -O "Accept: text/plain; format=tsv" "https://rest.uniprot.org/uniprotkb/search?query=reviewed:true+AND+organism_id:9606"curl -O -H "Accept: text/plain; format=tsv" "https://rest.uniprot.org/uniprotkb/stream?compressed=true&format=fasta&query=%28%2A%29%20AND%20%28reviewed%3Atrue%29"head -2 ../output/Ab_4-uniprot_blastx.tab
wc -l ../output/Ab_4-uniprot_blastx.tabtr '|' '\t' < ../output/Ab_4-uniprot_blastx.tab | head -2tr '|' '\t' < ../output/Ab_4-uniprot_blastx.tab \
> ../output/Ab_4-uniprot_blastx_sep.tabhead -2 ../output/uniprot_table_r2023_01.tab
wc -l ../output/uniprot_table_r2023_01.tabbltabl <- read.csv("../output/Ab_4-uniprot_blastx_sep.tab", sep = '\t', header = FALSE)
spgo <- read.csv("https://gannet.fish.washington.edu/seashell/snaps/uniprot_table_r2023_01.tab", sep = '\t', header = TRUE)
str(spgo)left_join(bltabl, spgo, by = c("V3" = "Entry")) %>%
select(V1, V3, V13, Protein.names, Organism, Gene.Ontology..biological.process., Gene.Ontology.IDs) %>%
mutate(V1 = str_replace_all(V1, pattern = "solid0078_20110412_FRAG_BC_WHITE_WHITE_F3_QV_SE_trimmed", replacement = "Ab")) %>%
kbl() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))left_join(bltabl, spgo, by = c("V3" = "Entry")) %>%
select(V1, V3, V13, Protein.names, Organism, Gene.Ontology..biological.process., Gene.Ontology.IDs) %>% mutate(V1 = str_replace_all(V1,
pattern = "solid0078_20110412_FRAG_BC_WHITE_WHITE_F3_QV_SE_trimmed", replacement = "Ab")) %>%
write_delim("../output/blast_annot_go.tab", delim = '\t')#Final link to annotation table
annot_tab <- read.csv("../output/blast_annot_go.tab", sep = '\t', header = TRUE)
head(annot_tab)