By: Nathan L. Brouwer
Computational phylogenetics applies computational algorithms, methods, and programs to assemble a phylogenetic tree about the evolutionary ancestry of a set of genes, species, or other taxa. While traditionally reliant on morphological data, the more recent field of molecular phylogenetics uses nucleotide sequences for classification. Sequence alignments are used to construct phylogenetic trees which help classify evolutionary relationships between homologous genes in genomes of divergent species.
Gene Genus Genome Morphology Monophyletic Homonid Clade Dendrogram Taxonomy
knitr::opts_chunk$set rentrez::entrez_fetch devtools::install_github ggmsa::ggmsa BiocManager::install Biostrings::AAStringSet Biostrings::pid
install.packages(“devtools”) library(devtools)
github packages devtools::install_github(“brouwern/compbio4all”) library(compbio4all) devtools::install_github(“YuLab-SMU/ggmsa”) library(ggmsa)
CRAN packages install.packages(“rentrez”) library(rentrez) install.packages(“seqinr”) library(seqinr) install.packages(“ape”) library(ape)
Bioconductor packages install.packages(“BiocManager”) library(BiocManager) BiocManager::install(“Biostrings”) BiocManager::install(“msa”) BiocManager::install(“score”)
#install.packages("devtools")
library(devtools)
# github packages
#devtools::install_github("brouwern/compbio4all")
library(compbio4all)
#devtools::install_github("YuLab-SMU/ggmsa")
library(ggmsa)
# CRAN packages
#install.packages("rentrez")
library(rentrez)
#install.packages("seqinr")
library(seqinr)
#install.packages("ape")
library(ape)
# Bioconductor packages
#install.packages("BiocManager")
library(BiocManager)
#BiocManager::install("Biostrings")
#BiocManager::install("msa")
#BiocManager::install("score")
Here, we access rentrez to access and download sequence data while assigning it to a variable.
library(rentrez)
# Human shroom 3 (H. sapiens)
hShroom3 <- rentrez::entrez_fetch(db = "protein",
id = "NP_065910",
rettype = "fasta")
Cat() is a way to print out the sequence the way it would show on a web browser, with respect to slash n notation
cat(hShroom3)
## >NP_065910.3 protein Shroom3 [Homo sapiens]
## MMRTTEDFHKPSATLNSNTATKGRYIYLEAFLEGGAPWGFTLKGGLEHGEPLIISKVEEGGKADTLSSKL
## QAGDEVVHINEVTLSSSRKEAVSLVKGSYKTLRLVVRRDVCTDPGHADTGASNFVSPEHLTSGPQHRKAA
## WSGGVKLRLKHRRSEPAGRPHSWHTTKSGEKQPDASMMQISQGMIGPPWHQSYHSSSSTSDLSNYDHAYL
## RRSPDQCSSQGSMESLEPSGAYPPCHLSPAKSTGSIDQLSHFHNKRDSAYSSFSTSSSILEYPHPGISGR
## ERSGSMDNTSARGGLLEGMRQADIRYVKTVYDTRRGVSAEYEVNSSALLLQGREARASANGQGYDKWSNI
## PRGKGVPPPSWSQQCPSSLETATDNLPPKVGAPLPPARSDSYAAFRHRERPSSWSSLDQKRLCRPQANSL
## GSLKSPFIEEQLHTVLEKSPENSPPVKPKHNYTQKAQPGQPLLPTSIYPVPSLEPHFAQVPQPSVSSNGM
## LYPALAKESGYIAPQGACNKMATIDENGNQNGSGRPGFAFCQPLEHDLLSPVEKKPEATAKYVPSKVHFC
## SVPENEEDASLKRHLTPPQGNSPHSNERKSTHSNKPSSHPHSLKCPQAQAWQAGEDKRSSRLSEPWEGDF
## QEDHNANLWRRLEREGLGQSLSGNFGKTKSAFSSLQNIPESLRRHSSLELGRGTQEGYPGGRPTCAVNTK
## AEDPGRKAAPDLGSHLDRQVSYPRPEGRTGASASFNSTDPSPEEPPAPSHPHTSSLGRRGPGPGSASALQ
## GFQYGKPHCSVLEKVSKFEQREQGSQRPSVGGSGFGHNYRPHRTVSTSSTSGNDFEETKAHIRFSESAEP
## LGNGEQHFKNGELKLEEASRQPCGQQLSGGASDSGRGPQRPDARLLRSQSTFQLSSEPEREPEWRDRPGS
## PESPLLDAPFSRAYRNSIKDAQSRVLGATSFRRRDLELGAPVASRSWRPRPSSAHVGLRSPEASASASPH
## TPRERHSVTPAEGDLARPVPPAARRGARRRLTPEQKKRSYSEPEKMNEVGIVEEAEPAPLGPQRNGMRFP
## ESSVADRRRLFERDGKACSTLSLSGPELKQFQQSALADYIQRKTGKRPTSAAGCSLQEPGPLRERAQSAY
## LQPGPAALEGSGLASASSLSSLREPSLQPRREATLLPATVAETQQAPRDRSSSFAGGRRLGERRRGDLLS
## GANGGTRGTQRGDETPREPSSWGARAGKSMSAEDLLERSDVLAGPVHVRSRSSPATADKRQDVLLGQDSG
## FGLVKDPCYLAGPGSRSLSCSERGQEEMLPLFHHLTPRWGGSGCKAIGDSSVPSECPGTLDHQRQASRTP
## CPRPPLAGTQGLVTDTRAAPLTPIGTPLPSAIPSGYCSQDGQTGRQPLPPYTPAMMHRSNGHTLTQPPGP
## RGCEGDGPEHGVEEGTRKRVSLPQWPPPSRAKWAHAAREDSLPEESSAPDFANLKHYQKQQSLPSLCSTS
## DPDTPLGAPSTPGRISLRISESVLRDSPPPHEDYEDEVFVRDPHPKATSSPTFEPLPPPPPPPPSQETPV
## YSMDDFPPPPPHTVCEAQLDSEDPEGPRPSFNKLSKVTIARERHMPGAAHVVGSQTLASRLQTSIKGSEA
## ESTPPSFMSVHAQLAGSLGGQPAPIQTQSLSHDPVSGTQGLEKKVSPDPQKSSEDIRTEALAKEIVHQDK
## SLADILDPDSRLKTTMDLMEGLFPRDVNLLKENSVKRKAIQRTVSSSGCEGKRNEDKEAVSMLVNCPAYY
## SVSAPKAELLNKIKEMPAEVNEEEEQADVNEKKAELIGSLTHKLETLQEAKGSLLTDIKLNNALGEEVEA
## LISELCKPNEFDKYRMFIGDLDKVVNLLLSLSGRLARVENVLSGLGEDASNEERSSLYEKRKILAGQHED
## ARELKENLDRRERVVLGILANYLSEEQLQDYQHFVKMKSTLLIEQRKLDDKIKLGQEQVKCLLESLPSDF
## IPKAGALALPPNLTSEPIPAGGCTFSGIFPTLTSPL
This code chunk pings the server in Maryland and downloads the sequences for the mouse shroom, human, shroom, and sea urchin shroom.
# Mouse shroom 3a (M. musculus)
mShroom3a <- rentrez::entrez_fetch(db = "protein",
id = "AAF13269",
rettype = "fasta")
# Human shroom 2 (H. sapiens)
hShroom2 <- rentrez::entrez_fetch(db = "protein",
id = "CAA58534",
rettype = "fasta")
# Sea-urchin shroom
sShroom <- rentrez::entrez_fetch(db = "protein",
id = "XP_783573",
rettype = "fasta")
Here, we look at the number of characters (amino acids) in the sequence to check the size of the gene length.
nchar(hShroom3)
## [1] 2070
nchar(mShroom3a)
## [1] 2083
nchar(sShroom)
## [1] 1758
nchar(hShroom2)
## [1] 1673
Fasta_cleaner is a function in compbio4all, written by D. Brouwer. This function calls it from his github repository.
fasta_cleaner
## function (fasta_object, parse = TRUE)
## {
## fasta_object <- sub("^(>)(.*?)(\\n)(.*)(\\n\\n)", "\\4",
## fasta_object)
## fasta_object <- gsub("\n", "", fasta_object)
## if (parse == TRUE) {
## fasta_object <- stringr::str_split(fasta_object, pattern = "",
## simplify = FALSE)
## }
## return(fasta_object[[1]])
## }
## <bytecode: 0x7fc150634bb0>
## <environment: namespace:compbio4all>
If compbio4all is not downloaded, we can look at the fasta_cleaner R file and copy it into a code chunk manually.
fasta_cleaner <- function(fasta_object, parse = TRUE){
fasta_object <- sub("^(>)(.*?)(\\n)(.*)(\\n\\n)","\\4",fasta_object)
fasta_object <- gsub("\n", "", fasta_object)
if(parse == TRUE){
fasta_object <- stringr::str_split(fasta_object,
pattern = "",
simplify = FALSE)
}
return(fasta_object[[1]])
}
Fasta_cleaner is being called on each of the shroom types to parse through notation in the sequence and make them alignable.
hShroom3 <- fasta_cleaner(hShroom3, parse = F)
mShroom3a <- fasta_cleaner(mShroom3a, parse = F)
hShroom2 <- fasta_cleaner(hShroom2, parse = F)
sShroom <- fasta_cleaner(sShroom, parse = F)
hShroom3
## [1] "MMRTTEDFHKPSATLNSNTATKGRYIYLEAFLEGGAPWGFTLKGGLEHGEPLIISKVEEGGKADTLSSKLQAGDEVVHINEVTLSSSRKEAVSLVKGSYKTLRLVVRRDVCTDPGHADTGASNFVSPEHLTSGPQHRKAAWSGGVKLRLKHRRSEPAGRPHSWHTTKSGEKQPDASMMQISQGMIGPPWHQSYHSSSSTSDLSNYDHAYLRRSPDQCSSQGSMESLEPSGAYPPCHLSPAKSTGSIDQLSHFHNKRDSAYSSFSTSSSILEYPHPGISGRERSGSMDNTSARGGLLEGMRQADIRYVKTVYDTRRGVSAEYEVNSSALLLQGREARASANGQGYDKWSNIPRGKGVPPPSWSQQCPSSLETATDNLPPKVGAPLPPARSDSYAAFRHRERPSSWSSLDQKRLCRPQANSLGSLKSPFIEEQLHTVLEKSPENSPPVKPKHNYTQKAQPGQPLLPTSIYPVPSLEPHFAQVPQPSVSSNGMLYPALAKESGYIAPQGACNKMATIDENGNQNGSGRPGFAFCQPLEHDLLSPVEKKPEATAKYVPSKVHFCSVPENEEDASLKRHLTPPQGNSPHSNERKSTHSNKPSSHPHSLKCPQAQAWQAGEDKRSSRLSEPWEGDFQEDHNANLWRRLEREGLGQSLSGNFGKTKSAFSSLQNIPESLRRHSSLELGRGTQEGYPGGRPTCAVNTKAEDPGRKAAPDLGSHLDRQVSYPRPEGRTGASASFNSTDPSPEEPPAPSHPHTSSLGRRGPGPGSASALQGFQYGKPHCSVLEKVSKFEQREQGSQRPSVGGSGFGHNYRPHRTVSTSSTSGNDFEETKAHIRFSESAEPLGNGEQHFKNGELKLEEASRQPCGQQLSGGASDSGRGPQRPDARLLRSQSTFQLSSEPEREPEWRDRPGSPESPLLDAPFSRAYRNSIKDAQSRVLGATSFRRRDLELGAPVASRSWRPRPSSAHVGLRSPEASASASPHTPRERHSVTPAEGDLARPVPPAARRGARRRLTPEQKKRSYSEPEKMNEVGIVEEAEPAPLGPQRNGMRFPESSVADRRRLFERDGKACSTLSLSGPELKQFQQSALADYIQRKTGKRPTSAAGCSLQEPGPLRERAQSAYLQPGPAALEGSGLASASSLSSLREPSLQPRREATLLPATVAETQQAPRDRSSSFAGGRRLGERRRGDLLSGANGGTRGTQRGDETPREPSSWGARAGKSMSAEDLLERSDVLAGPVHVRSRSSPATADKRQDVLLGQDSGFGLVKDPCYLAGPGSRSLSCSERGQEEMLPLFHHLTPRWGGSGCKAIGDSSVPSECPGTLDHQRQASRTPCPRPPLAGTQGLVTDTRAAPLTPIGTPLPSAIPSGYCSQDGQTGRQPLPPYTPAMMHRSNGHTLTQPPGPRGCEGDGPEHGVEEGTRKRVSLPQWPPPSRAKWAHAAREDSLPEESSAPDFANLKHYQKQQSLPSLCSTSDPDTPLGAPSTPGRISLRISESVLRDSPPPHEDYEDEVFVRDPHPKATSSPTFEPLPPPPPPPPSQETPVYSMDDFPPPPPHTVCEAQLDSEDPEGPRPSFNKLSKVTIARERHMPGAAHVVGSQTLASRLQTSIKGSEAESTPPSFMSVHAQLAGSLGGQPAPIQTQSLSHDPVSGTQGLEKKVSPDPQKSSEDIRTEALAKEIVHQDKSLADILDPDSRLKTTMDLMEGLFPRDVNLLKENSVKRKAIQRTVSSSGCEGKRNEDKEAVSMLVNCPAYYSVSAPKAELLNKIKEMPAEVNEEEEQADVNEKKAELIGSLTHKLETLQEAKGSLLTDIKLNNALGEEVEALISELCKPNEFDKYRMFIGDLDKVVNLLLSLSGRLARVENVLSGLGEDASNEERSSLYEKRKILAGQHEDARELKENLDRRERVVLGILANYLSEEQLQDYQHFVKMKSTLLIEQRKLDDKIKLGQEQVKCLLESLPSDFIPKAGALALPPNLTSEPIPAGGCTFSGIFPTLTSPL"
nchar(hShroom3)
## [1] 1996
Pairwise alignment is the little sibling of msa. This code chunk lines up the sequences of amino acids so we can look at the similarities and differences between different sequences. In this case, it compares the human shroom 3 versus mouse 3a.
# add necessary function
align.h3.vs.m3a <- Biostrings::pairwiseAlignment(
hShroom3,
mShroom3a)
This object is an algorithm for comparing two sequences and figuring out how they pair up. Like a blast search, it gives a score to rank the match along with the pattern versus subject. The dashes are indels, or gaps; these indicate that either an insertion or deletion is present.
align.h3.vs.m3a
## Global PairwiseAlignmentsSingleSubject (1 of 1)
## pattern: MMRTTEDFHKPSATLN-SNTATKGRYIYLEAFLE...KAGALALPPNLTSEPIPAGGCTFSGIFPTLTSPL
## subject: MK-TPENLEEPSATPNPSRTPTE-RFVYLEALLE...KAGAISLPPALTGHATPGGTSVFGGVFPTLTSPL
## score: 2189.934
Percent identity shows a rough percentage similarity between the two sequences. Indels are skipped.
# add necessary function
Biostrings::pid(align.h3.vs.m3a)
## [1] 70.56511
In comparison, this alignment method compares two shrooms and assigns the value to a variable without displaying it.
align.h3.vs.h2 <- Biostrings::pairwiseAlignment(
hShroom3,
hShroom2)
Here, we calculate the score. In contrast to the previous chunk, this displays the result immediately.
Biostrings::score(align.h3.vs.h2)
## [1] -5673.853
Pid() stands for the percent identity; it estimates the similarity between two sequences and drops indels. Score() is the mathematical summary of the quality for the alignment; this is used as a relative comparison.
Biostrings::pid(align.h3.vs.h2)
## [1] 33.83277
Here, we take the accession number, name origin, and Shroom name and lay it out in a vector so we can make it into a table.
shroom_table <- c("CAA78718" , "X. laevis Apx" , "xShroom1",
"NP_597713" , "H. sapiens APXL2" , "hShroom1",
"CAA58534" , "H. sapiens APXL", "hShroom2",
"ABD19518" , "M. musculus Apxl" , "mShroom2",
"AAF13269" , "M. musculus ShroomL" , "mShroom3a",
"AAF13270" , "M. musculus ShroomS" , "mShroom3b",
"NP_065910", "H. sapiens Shroom" , "hShroom3",
"ABD59319" , "X. laevis Shroom-like", "xShroom3",
"NP_065768", "H. sapiens KIAA1202" , "hShroom4a",
"AAK95579" , "H. sapiens SHAP-A" , "hShroom4b",
#"DQ435686" , "M. musculus KIAA1202" , "mShroom4",
"ABA81834" , "D. melanogaster Shroom", "dmShroom",
"EAA12598" , "A. gambiae Shroom", "agShroom",
"XP_392427" , "A. mellifera Shroom" , "amShroom",
"XP_783573" , "S. purpuratus Shroom" , "spShroom") #sea urchin
This code chunk creates a neat table out of the raw data above to perform further analysis.
# convert to XXXXXXXXXC
shroom_table_matrix <- matrix(shroom_table,
byrow = T,
nrow = 14)
# convert to XXXXXXXXXC
shroom_table <- data.frame(shroom_table_matrix,
stringsAsFactors = F)
# XXXXXXXXXC columns
names(shroom_table) <- c("accession", "name.orig","name.new")
# Create simplified species names
shroom_table$spp <- "Homo"
shroom_table$spp[grep("laevis",shroom_table$name.orig)] <- "Xenopus"
shroom_table$spp[grep("musculus",shroom_table$name.orig)] <- "Mus"
shroom_table$spp[grep("melanogaster",shroom_table$name.orig)] <- "Drosophila"
shroom_table$spp[grep("gambiae",shroom_table$name.orig)] <- "mosquito"
shroom_table$spp[grep("mellifera",shroom_table$name.orig)] <- "bee"
shroom_table$spp[grep("purpuratus",shroom_table$name.orig)] <- "sea urchin"
Calling the object displays the table.
shroom_table
## accession name.orig name.new spp
## 1 CAA78718 X. laevis Apx xShroom1 Xenopus
## 2 NP_597713 H. sapiens APXL2 hShroom1 Homo
## 3 CAA58534 H. sapiens APXL hShroom2 Homo
## 4 ABD19518 M. musculus Apxl mShroom2 Mus
## 5 AAF13269 M. musculus ShroomL mShroom3a Mus
## 6 AAF13270 M. musculus ShroomS mShroom3b Mus
## 7 NP_065910 H. sapiens Shroom hShroom3 Homo
## 8 ABD59319 X. laevis Shroom-like xShroom3 Xenopus
## 9 NP_065768 H. sapiens KIAA1202 hShroom4a Homo
## 10 AAK95579 H. sapiens SHAP-A hShroom4b Homo
## 11 ABA81834 D. melanogaster Shroom dmShroom Drosophila
## 12 EAA12598 A. gambiae Shroom agShroom mosquito
## 13 XP_392427 A. mellifera Shroom amShroom bee
## 14 XP_783573 S. purpuratus Shroom spShroom sea urchin
The $ allows us to extract accession numbers from the tables.
shroom_table$accession
## [1] "CAA78718" "NP_597713" "CAA58534" "ABD19518" "AAF13269" "AAF13270"
## [7] "NP_065910" "ABD59319" "NP_065768" "AAK95579" "ABA81834" "EAA12598"
## [13] "XP_392427" "XP_783573"
This function calls fasta sequences from the NCBI database and stores it in the variable shrooms.
# add necessary function
shrooms <- rentrez::entrez_fetch(db = "protein",
id = shroom_table$accession,
rettype = "fasta")
Cat() prints out the sequences with respect to slash n notation.
cat(shrooms)
From the protein database, we pull all the accession numbers from the shroom table and return it as a fasta file. The entrez_fetch_list is a wrapper function to entrez_fetch that relies on dependencies.
shrooms_list <- compbio4all::entrez_fetch_list(db = "protein",
id = shroom_table$accession,
rettype = "fasta")
is(shrooms_list)
## [1] "list" "vector" "list_OR_List" "vector_OR_Vector"
## [5] "vector_OR_factor"
length(shrooms_list)
## [1] 14
nchar(shrooms_list)
## CAA78718 NP_597713 CAA58534 ABD19518 AAF13269 AAF13270 NP_065910 ABD59319
## 1486 915 1673 1543 2083 1895 2070 1864
## NP_065768 AAK95579 ABA81834 EAA12598 XP_392427 XP_783573
## 1560 778 1647 750 2230 1758
By checking the length, we can see the number of entries in the data structure. To see the individual sequence in the list, use nchar() for vectorization.
length(shrooms_list)
## [1] 14
We repeat cleaning with fasta_cleaner for a set amount of times determined by a for loop.
for(i in 1:length(shrooms_list)){
shrooms_list[[i]] <- fasta_cleaner(shrooms_list[[i]], parse = F)
}
TODO: summarize what is going on in this code chunk, then annotate each line of code with what its doing With 14 sequences, we
# XXXXXXXXCX
shrooms_vector <- rep(NA, length(shrooms_list))
# XXXXXXXXCX
for(i in 1:length(shrooms_vector)){
shrooms_vector[i] <- shrooms_list[[i]]
}
# XXXXXXXXCX
names(shrooms_vector) <- names(shrooms_list)
We convert our cleaned vector into a string set with the Amino Acid String Set.
# add necessary function
shrooms_vector_ss <- Biostrings::AAStringSet(shrooms_vector)
TODO: briefly summarize what this section of the document will do.
Readings will be assigned to explain what MSAs are.
With the string set, we use the reimplementation msa to align our shrooms.
# add necessary function
library(msa)
## Loading required package: Biostrings
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: XVector
## Loading required package: GenomeInfoDb
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:ape':
##
## complement
## The following object is masked from 'package:seqinr':
##
## translate
## The following object is masked from 'package:base':
##
## strsplit
##
## Attaching package: 'msa'
## The following object is masked from 'package:BiocManager':
##
## version
shrooms_align <- msa(shrooms_vector_ss,
method = "ClustalW")
## use default substitution matrix
TODO: briefly summarize what this section will do.
This outputs the raw unprocessed data from R that includes the sequences and the accession numbers.
shrooms_align
## CLUSTAL 2.1
##
## Call:
## msa(shrooms_vector_ss, method = "ClustalW")
##
## MsaAAMultipleAlignment with 14 rows and 2252 columns
## aln names
## [1] -------------------------...------------------------- NP_065768
## [2] -------------------------...------------------------- AAK95579
## [3] -------------------------...SVFGGVFPTLTSPL----------- AAF13269
## [4] -------------------------...SVFGGVFPTLTSPL----------- AAF13270
## [5] -------------------------...CTFSGIFPTLTSPL----------- NP_065910
## [6] -------------------------...NKS--LPPPLTSSL----------- ABD59319
## [7] -------------------------...------------------------- CAA58534
## [8] -------------------------...------------------------- ABD19518
## [9] -------------------------...LT----------------------- NP_597713
## [10] -------------------------...------------------------- CAA78718
## [11] -------------------------...------------------------- EAA12598
## [12] -------------------------...------------------------- ABA81834
## [13] MTELQPSPPGYRVQDEAPGPPSCPP...------------------------- XP_392427
## [14] -------------------------...AATSSSSNGIGGPEQLNSNATSSYC XP_783573
## Con -------------------------...------------------------- Consensus
TODO: briefly explain what is being done in this chunk. This is tricky (and annoying) so do your best This is data preparation for further printing.
# WHAT IS THE LINE BELOW DOING? (its tricky - do your best)
class(shrooms_align) <- "AAMultipleAlignment"
# WHAT IS THE LINE BELOW DOING? This is simpler
shrooms_align_seqinr <- msaConvert(shrooms_align, type = "seqinr::alignment")
After all the preparation, we can print the cleaner version of letters and dashes to the console.
print_msa(alignment = shrooms_align_seqinr,
chunksize = 60)
The function ggmsa prints the output in a neater fashion and determines where to start and end the alignment rather than the entirety of it as the previous commands have done.
## add necessary function
library(ggmsa)
ggmsa::ggmsa(shrooms_align, # shrooms_align, NOT shrooms_align_seqinr
start = 2000,
end = 2100)
TODO: explain what this command is doing. Add the package the function is coming from using :: notation This may not work for everyone. If its not working you can comment it out. Instead of printing the output to R, we can save it to a PDF. The variable y sets the limits of the residues for the saving.
msaPrettyPrint(shrooms_align, # alignment
file = "shroom_msa.pdf", # file name
y=c(2000, 2100), # range
askForOverwrite=FALSE)
This command tells us the file path of the current working directory. We can use this to find where the file has been saved onto our drives. List.files gives all the files in the working directory.
getwd()
## [1] "/Users/dragon/Downloads"