1 Load libaries

library(Biostrings)
library(ape)
library(phangorn)
library(msa)
library(seqinr)
library(ggtree)
library(ggplot2)
library(gridExtra)
library(treeio)

 

1.1 Read the FASTA file back into R

seq_data <- readDNAStringSet("AlignedWithGap_CutRem.fas")

 

1.3 Perform MSA using ClustalW

msa_result <- msa(seq_data, method = "ClustalW")
## use default substitution matrix

 

1.4 Convert the MSA result to a format suitable for tree construction

alignment <- msaConvert(msa_result, type = "phangorn::phyDat")

 

1.5 Neighbor-Joining Tree

1.5.1 Create a distance matrix

dna_dist_matrix <- dist.ml(alignment)

 

1.5.2 Neighbor-Joining Tree

nj_tree <- nj(dna_dist_matrix)
plot(nj_tree, main = "Neighbor-Joining Tree",cex=0.5)

 

1.6 Maximum Likelihood Tree

# Create an initial tree using NJ
initial_tree <- nj(dna_dist_matrix)
# Optimize the tree using Maximum Likelihood
ml_tree <- pml(initial_tree, data = alignment)
optimized_ml_tree <- optim.pml(ml_tree, model = "GTR", optGamma = TRUE)

1.7 Plot the ML Tree

plot(optimized_ml_tree$tree, main = "Maximum Likelihood Tree", cex=0.5)

 

1.7.1 Bootstrap

bootstraps <- bootstrap.pml(optimized_ml_tree, bs = 100)

 

1.7.2 Add the bootstrap values

plotBS(optimized_ml_tree$tree, bootstraps, cex=0.5)

 

1.8 Root the tree using an outgroup, here: “C0801_Rhizomys_pruinosus”

## Root the tree using an 'outgroup_taxon'
outgroup_taxon <- "Calicivirus_Mink_USA_(AF338404.1)"
rooted_tree <- root(nj_tree, outgroup = outgroup_taxon, resolve.root = TRUE)

 

1.9 Plot the rooted tree

plot(rooted_tree, main = "Rooted Phylogenetic Tree", cex=0.5)

 

1.10 Root the tree using the midpoint

midpoint_rooted_tree <- midpoint(nj_tree)

 

1.11 Plot the midpoint-rooted tree

plot(midpoint_rooted_tree, main = "Midpoint-Rooted Phylogenetic Tree", cex=0.5)

 

1.12 Optimize the unrooted ML tree

rooted_ml_tree <- root(optimized_ml_tree$tree, outgroup = outgroup_taxon,
                       resolve.root = TRUE)

 

1.13 Plot the rooted ML tree

plot(rooted_ml_tree, main = "Rooted Maximum Likelihood Tree", cex=0.5)

 

1.13.1 Add a scale bar

plot(rooted_tree, main = "Rooted Phylogenetic Tree", cex=0.5)
add.scale.bar()

 

1.14 save tree using the package “ape”

write.tree(rooted_ml_tree, file='CalicivirusTree.txt')

 

2 Using ggtree to layout the tree

2.1 read the tree using the package “ape”

tree <- read.tree("CalicivirusTree.txt")

 

2.2 View the tree

ggtree(tree) + 
    geom_tiplab(size=2, align=FALSE, linesize=.1,color="red") + 
    theme_tree2()

 

2.3 Subset tree around “CAP1_37_2_Bat_Thailand_Nan”

t_subset <-  tidytree::tree_subset(tree, "CAP1_37_2_Bat_Thailand_Nan", levels_back = 4)

2.3.1 Show subset tree around “CAP1_37_2_Bat_Thailand_Nan”

 t_subset |>
  ggtree(aes(color = group)) + 
  geom_tiplab() + 
  theme_tree2() + 
  scale_color_manual(values = c(`1` = "red", `0` = "black"))

<

 


3 List of some tutorials