library(seqinr)
library(ape)
##
## Attaching package: 'ape'
## The following objects are masked from 'package:seqinr':
##
## as.alignment, consensus
library(ggtree)
## Registered S3 method overwritten by 'ggtree':
## method from
## identify.gg ggfun
## ggtree v3.2.1 For help: https://yulab-smu.top/treedata-book/
##
## If you use ggtree in published research, please cite the most appropriate paper(s):
##
## 1. Guangchuang Yu. Using ggtree to visualize data on tree-like structures. Current Protocols in Bioinformatics. 2020, 69:e96. doi:10.1002/cpbi.96
## 2. Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution. 2018, 35(12):3041-3043. doi:10.1093/molbev/msy194
## 3. Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
##
## Attaching package: 'ggtree'
## The following object is masked from 'package:ape':
##
## rotate
library(DECIPHER)
## Loading required package: Biostrings
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:ggtree':
##
## expand
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:ggtree':
##
## collapse
## Loading required package: XVector
## Loading required package: GenomeInfoDb
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:ape':
##
## complement
## The following object is masked from 'package:seqinr':
##
## translate
## The following object is masked from 'package:base':
##
## strsplit
## Loading required package: RSQLite
## Loading required package: parallel
library(viridis)
## Loading required package: viridisLite
library(ggplot2)
library(ade4)
##
## Attaching package: 'ade4'
## The following object is masked from 'package:Biostrings':
##
## score
## The following object is masked from 'package:BiocGenerics':
##
## score
seqs <- readDNAStringSet("Litho_ITS.txt", format = "fasta")
seqs
## DNAStringSet object of length 15:
## width seq names
## [1] 575 TCGAAACCTGCCCCGCAGAACGA...GCCCCGGCGGCGCTCGCAACGC AF389105.1 Lithoc...
## [2] 581 TCGAAACCTGCCCCGCAGAACGA...GCCCCGGCGGCGCTCGCAACGC AY040419.1 Lithoc...
## [3] 580 TCGAAACCTACACAGCAGAACGA...GCCTCGGCGGCGCTCACAACGC AY040407.1 Lithoc...
## [4] 411 AGGATCATTGTCGAACCTGCACA...CCCCCAAAACCCCCGGGTTCGG MT274517.1 Lithoc...
## [5] 579 TCGAAACCTACACAGCAGAACGA...GCCTCGGCGGCGCTCACAACGC AY040406.1 Lithoc...
## ... ... ...
## [11] 396 AAACCGAACCCCGGTGCAGAACA...GTGGTTTTTCGACCCTCGTTCC KJ685201.1 Lithoc...
## [12] 396 AAACAGAACCCCAGTGCGGAATG...GTGGTTTTTCAACCTCCGTTCC KJ685198.1 Lithoc...
## [13] 396 AAACCGAACGCTGCACGGAACAC...GTGATTTTTCGACCTTCGTTCC KJ685169.1 Lithoc...
## [14] 590 TCGAAACCTGCACAGCAGAACGA...GCCTCGGCGGCGCTCGCAACGC AF389107.1 Lithoc...
## [15] 594 TCGAAACCTGCCCCGCAGAACGA...GCCCCGGCGGCGCTCGCAACGC AF389106.1 Lithoc...
seqs <- OrientNucleotides(seqs)
## ========================================================================================================================================================================================================
##
## Time difference of 0.02 secs
aligned <- AlignSeqs(seqs)
## Determining distance matrix based on shared 9-mers:
## ================================================================================
##
## Time difference of 0 secs
##
## Clustering into groups by similarity:
## ================================================================================
##
## Time difference of 0 secs
##
## Aligning Sequences:
## ================================================================================
##
## Time difference of 0.06 secs
##
## Iteration 1 of 2:
##
## Determining distance matrix based on alignment:
## ================================================================================
##
## Time difference of 0 secs
##
## Reclustering into groups by similarity:
## ================================================================================
##
## Time difference of 0 secs
##
## Realigning Sequences:
## ================================================================================
##
## Time difference of 0.03 secs
##
## Iteration 2 of 2:
##
## Determining distance matrix based on alignment:
## ================================================================================
##
## Time difference of 0 secs
##
## Reclustering into groups by similarity:
## ================================================================================
##
## Time difference of 0 secs
##
## Realigning Sequences:
## ================================================================================
##
## Time difference of 0 secs
##
## Refining the alignment:
## ================================================================================
##
## Time difference of 0.01 secs
BrowseSeqs(aligned, highlight=0)
writeXStringSet(aligned, file=“Litho_aligned.fasta”)
dna <- read.alignment("Litho_aligned.fasta", format = "fasta")
D <- dist.alignment(dna, matrix = "similarity")
temp <- as.data.frame(as.matrix(D))
table.paint(temp, cleg=0, clabel.row=.5, clabel.col=.5)+scale_color_viridis()
## NULL
tre <- nj(D)
class(tre) # tất cả cây được tạo ra sử dụng packages ape
## [1] "phylo"
tre <- ladderize(tre)
plot(tre, cex = 0.6)
plot(tre, cex = 0.6)
title("similarity in Lithocarpus (ITS)")
h_cluster <- hclust(D, method = "average", members = NULL)
plot(h_cluster, cex = 0.6)
ggtree(tre, yscale = "NA")+
geom_tiplab(hjust = -0.3, size=4, align = TRUE)+
xlim(0,0.5)
## Warning: The tree contained negative edge length. If you want to ignore the edge, you
## can set 'options(ignore.negative.edge=TRUE)', then re-run ggtree.
## Warning in scaleY(as.phylo(model), res, yscale, layout, ...): yscale is not available...
ggtree(tre) +
geom_tiplab(hjust = -0.3, size=4, align = TRUE)+
xlim(0,0.5)
## Warning: The tree contained negative edge length. If you want to ignore the edge, you
## can set 'options(ignore.negative.edge=TRUE)', then re-run ggtree.
ggtree(tre) +
geom_tiplab(hjust = -0.3, size=4, align = TRUE) +
geom_hilight(node=19, fill="purple", alpha = 0.2) +
geom_hilight(node=17, fill="dark green", alpha = 0.2) +
geom_hilight(node=20, fill="gold", alpha = 0.2) +
xlim(0,0.5)
## Warning: The tree contained negative edge length. If you want to ignore the edge, you
## can set 'options(ignore.negative.edge=TRUE)', then re-run ggtree.
ggtree(tre) +
geom_tiplab(hjust = -0.3, size=4, align = TRUE) +
geom_hilight(node=19, fill="purple", alpha = 0.2) +
geom_hilight(node=17, fill="dark green", alpha = 0.2) +
geom_hilight(node=20, fill="gold", alpha = 0.2) +
geom_cladelabel(node=19, label=" Cluster 1",
color="purple", offset=.1, barsize = 2,
fontsize = 5, align=TRUE, alpha = 0.5) +
geom_cladelabel(node=17, label=" Cluster 2",
color="dark green", offset=.1, barsize = 2,
fontsize = 5, align=TRUE, alpha = 0.5) +
geom_cladelabel(node=20, label=" Cluster 3",
color="gold", offset=.1, barsize = 2,
fontsize = 5, align=TRUE, alpha = 0.5) +
xlim(0,0.5)
## Warning: The tree contained negative edge length. If you want to ignore the edge, you
## can set 'options(ignore.negative.edge=TRUE)', then re-run ggtree.
tre.new <- tre
# change tip labels to full alignment names
tre.new$tip.label <- aligned@ranges@NAMES
# plot the alignment
msaplot(p=ggtree(tre.new), fasta="Litho_aligned.fasta",
window=c(150, 175))+
scale_fill_viridis_d(alpha = 0.8)
## Warning: The tree contained negative edge length. If you want to ignore the edge, you
## can set 'options(ignore.negative.edge=TRUE)', then re-run ggtree.
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.