PON2 encodes a ubiqitous membrane-bound protein called paraoxonase 2 and is part of the paraoxonase gene family, located on human chromosome 7. It likely has an immune function, as it may act to reduce oxidative stress and can also break down acyl-homoserine lactones which are used by gram negative bacteria in quorum sensing. This quorum quenching function may help to prevent virulence factors from being expressed.
References: https://www.sciencedirect.com/topics/biochemistry-genetics-and-molecular-biology/pon2 https://www.uniprot.org/uniprot/Q15165 https://www.genecards.org/cgi-bin/carddisp.pl?gene=PON2 https://www.ncbi.nlm.nih.gov/homologene/385 https://www.ncbi.nlm.nih.gov/nuccore/209447066
Loading relevant packages:
library(rentrez)
#used to acces Entrez
library(compbio4all)
#used to help clean FASTA's
library(msa)
## Loading required package: Biostrings
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: XVector
## Loading required package: GenomeInfoDb
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
#used to build MSA's
library(ggplot2)
#used for better plotting
library(pander)
#used for displaying data frames better
library(ape)
##
## Attaching package: 'ape'
## The following object is masked from 'package:Biostrings':
##
## complement
#used for phyogenies
library(drawProteins)
#used to draw protein domains
library(HGNChelper)
## Warning: package 'HGNChelper' was built under R version 4.1.2
#used for gene symbols
library(seqinr)
##
## Attaching package: 'seqinr'
## The following objects are masked from 'package:ape':
##
## as.alignment, consensus
## The following object is masked from 'package:Biostrings':
##
## translate
#used for distance alignments
Creating a data frame of 10 PON2 proteins found in different species:
Ref_Accessions <- c("NP_000296.2","NP_899131","XP_519213","XP_003809757","XP_018886552","NP_001080649","NP_001003205","NP_001013606","XP_013835161.1","NP_997899.1")
Uni_Accessions <-c("Q15165","Q62086","H2QUY6","A0A2R9A6X3","G3RXU5","Q6IRR7","P54832","Q58DS7","F1SFA2","Q6NXA5")
PDB_Accessions <- c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)
Name_Sci <- c("Homo sapiens","Mus musculus","Pan troglodytes","Pan paniscus","Gorilla gorilla","Xenopus laevis","Canis lupis familiaris","Bos taurus","Sus scrofa","Danio rerio")
Name_Common <- c("Human","House Mouse","Chimpanzee","Bonobo","Gorilla","African Clawed Frog","Dog","Cow","Pig","Zebra Fish")
PON2_Accession <-data.frame(Ref_Accessions, Uni_Accessions, PDB_Accessions, Name_Sci, Name_Common)
Displaying Data:
pander(PON2_Accession)
| Ref_Accessions | Uni_Accessions | PDB_Accessions | Name_Sci |
|---|---|---|---|
| NP_000296.2 | Q15165 | NA | Homo sapiens |
| NP_899131 | Q62086 | NA | Mus musculus |
| XP_519213 | H2QUY6 | NA | Pan troglodytes |
| XP_003809757 | A0A2R9A6X3 | NA | Pan paniscus |
| XP_018886552 | G3RXU5 | NA | Gorilla gorilla |
| NP_001080649 | Q6IRR7 | NA | Xenopus laevis |
| NP_001003205 | P54832 | NA | Canis lupis familiaris |
| NP_001013606 | Q58DS7 | NA | Bos taurus |
| XP_013835161.1 | F1SFA2 | NA | Sus scrofa |
| NP_997899.1 | Q6NXA5 | NA | Danio rerio |
| Name_Common |
|---|
| Human |
| House Mouse |
| Chimpanzee |
| Bonobo |
| Gorilla |
| African Clawed Frog |
| Dog |
| Cow |
| Pig |
| Zebra Fish |
Downloading all FASTA files using RefSeq Accession Numbers:
PON2_Sequences <-matrix(nrow = length(Ref_Accessions), ncol=1)
for(i in 1:nrow(PON2_Sequences)){
PON2_Sequences[i,] <- rentrez::entrez_fetch(db = "protein", id = Ref_Accessions[i], rettype = "fasta")
}
Cleaning the FASTA file sequences we downloaded:
Cleaned_Sequences <- PON2_Sequences
for(i in 1:nrow(PON2_Sequences)){
Cleaned_Sequences[i,] <- fasta_cleaner(Cleaned_Sequences[i,], parse=FALSE)
}
Creating a data frame of features:
PON2_features <- get_features("Q15165")
## [1] "Download has worked"
PON2_feature_df<-feature_to_dataframe(PON2_features)
## Warning in drawProteins::extract_feat_acc(features_in_lists_of_six[[i]]): NAs
## introduced by coercion
Visualizing those features:
my_canvas <- draw_canvas(PON2_feature_df)
my_canvas <- draw_chains(my_canvas, PON2_feature_df, label_size = 2.5)
my_canvas <- draw_regions(my_canvas, PON2_feature_df)
my_canvas <- draw_motif(my_canvas, PON2_feature_df)
my_canvas <- draw_phospho(my_canvas, PON2_feature_df)
my_canvas <- draw_repeat(my_canvas, PON2_feature_df)
my_canvas <- draw_recept_dom(my_canvas, PON2_feature_df)
my_canvas <- draw_folding(my_canvas, PON2_feature_df)
my_canvas
The lack of any detail shows UniProt did not have much infromation on PON2’s structure.
Plotting various dot plots at different parameters vs self using the human PON2 protein sequence:
#create 2x2 showing different values
human_PON2 <- fasta_cleaner(PON2_Sequences[1])
par(mfrow = c(2,2), mar = c(0,0,2,1))
dotPlot(human_PON2, human_PON2, wsize = 1, nmatch = 1, main = "PON2: Default")
dotPlot(human_PON2, human_PON2, wsize = 10, nmatch = 1, main = "PON2: wsize = 10, nmatch = 1")
dotPlot(human_PON2, human_PON2, wsize = 10, nmatch = 5, main = "PON2: wsize = 10, nmatch = 5")
dotPlot(human_PON2, human_PON2, wsize = 20, nmatch = 5, main = "PON2: wsize = 20, nmatch = 5")
par(mfrow = c(1,1), mar = c(4,4,4,4))
Enlarging Best Dot Plot:
#single large plot with the best version
dotPlot(human_PON2, human_PON2, wsize = 20, nmatch = 5, main = "PON2: wsize = 20, nmatch = 5")
Creating Table of PON2 features:
features <- c("Arulesterase start:167 end:252", "http://pfam.xfam.org/protein/Q15165")
DisProt <- c(NA,NA)
RepeatsDB <- c(NA,NA)
Subcell_loc <- c("Membrane Protein", "https://www.uniprot.org/uniprot/Q15165")
Sec_class <- c("alpha + beta","https://alphafold.ebi.ac.uk/entry/Q15165")
Pro_properties <- data.frame(features,DisProt,RepeatsDB,Subcell_loc,Sec_class)
colnames(Pro_properties) <- c("Features", "Disorganized", "Repeats", "Subcellular", "Structure Class")
pander(Pro_properties)
| Features | Disorganized | Repeats |
|---|---|---|
| Arulesterase start:167 end:252 | NA | NA |
| http://pfam.xfam.org/protein/Q15165 | NA | NA |
| Subcellular |
|---|
| Membrane Protein |
| https://www.uniprot.org/uniprot/Q15165 |
| Structure Class |
|---|
| alpha + beta |
| https://alphafold.ebi.ac.uk/entry/Q15165 |
These three methods are necessary for the code below:
table_to_vector <- function(table_x){
table_names <- attr(table_x, "dimnames")[[1]]
table_vect <- as.vector(table_x)
names(table_vect) <- table_names
return(table_vect)
}
chou_cor <- function(x,y){
numerator <- sum(x*y)
denominator <- sqrt((sum(x^2))*(sum(y^2)))
result <- numerator/denominator
return(result)
}
chou_cosine <- function(z.1, z.2){
z.1.abs <- sqrt(sum(z.1^2))
z.2.abs <- sqrt(sum(z.2^2))
my.cosine <- sum(z.1*z.2)/(z.1.abs*z.2.abs)
return(my.cosine)
}
Compiling Chou’s (1995) data for protein prediction:
alpha <- c(285, 53, 97, 163, 22, 67, 134, 197, 111, 91, 221, 249, 48, 123, 82, 122, 119, 33, 63, 167)
beta <- c(203, 67, 139, 121, 75, 122, 86, 297, 49, 120, 177, 115, 16, 85, 127, 341, 253, 44, 110, 229)
a.plus.b <- c(175, 78, 120, 111, 74, 74, 86, 171, 33, 93, 110, 112, 25, 52, 71, 126, 117, 30, 108, 123)
a.div.b <- c(361, 146, 183, 244, 63, 114, 257, 377, 107, 239, 339, 321, 91, 158, 188, 327, 238, 72, 130, 378)
alpha.prop <- alpha/sum(alpha)
beta.prop <- beta/sum(beta)
a.plus.b.prop <- a.plus.b/sum(a.plus.b)
a.div.b <- a.div.b/sum(a.div.b)
aa.prop <- data.frame(alpha.prop, beta.prop, a.plus.b.prop, a.div.b)
row.names(aa.prop) <- c("A","R","N","D","C","Q","E","G","H","I","L","K","M","F","P","S","T","W","Y","V")
Getting PON2 amino acid frequencies:
PON2_freq_table <- table(human_PON2)/length(human_PON2)
PON2_freq <- table_to_vector(PON2_freq_table)
aa.prop$PON2_freq <- PON2_freq
pander(aa.prop)
| alpha.prop | beta.prop | a.plus.b.prop | a.div.b | PON2_freq | |
|---|---|---|---|---|---|
| A | 0.1165 | 0.07313 | 0.09264 | 0.08331 | 0.06215 |
| R | 0.02166 | 0.02414 | 0.04129 | 0.03369 | 0.008475 |
| N | 0.03964 | 0.05007 | 0.06353 | 0.04223 | 0.06215 |
| D | 0.06661 | 0.04359 | 0.05876 | 0.05631 | 0.06497 |
| C | 0.008991 | 0.02702 | 0.03917 | 0.01454 | 0.0452 |
| Q | 0.02738 | 0.04395 | 0.03917 | 0.02631 | 0.06497 |
| E | 0.05476 | 0.03098 | 0.04553 | 0.05931 | 0.04237 |
| G | 0.08051 | 0.107 | 0.09052 | 0.08701 | 0.05932 |
| H | 0.04536 | 0.01765 | 0.01747 | 0.02469 | 0.05085 |
| I | 0.03719 | 0.04323 | 0.04923 | 0.05516 | 0.1328 |
| L | 0.09031 | 0.06376 | 0.05823 | 0.07824 | 0.0113 |
| K | 0.1018 | 0.04143 | 0.05929 | 0.07408 | 0.06497 |
| M | 0.01962 | 0.005764 | 0.01323 | 0.021 | 0.05367 |
| F | 0.05027 | 0.03062 | 0.02753 | 0.03646 | 0.0113 |
| P | 0.03351 | 0.04575 | 0.03759 | 0.04339 | 0.03107 |
| S | 0.04986 | 0.1228 | 0.0667 | 0.07547 | 0.0678 |
| T | 0.04863 | 0.09114 | 0.06194 | 0.05493 | 0.03955 |
| W | 0.01349 | 0.01585 | 0.01588 | 0.01662 | 0.08192 |
| Y | 0.02575 | 0.03963 | 0.05717 | 0.03 | 0.00565 |
| V | 0.06825 | 0.08249 | 0.06511 | 0.08724 | 0.03955 |
Calculating Correlation, Similarity, and Distance
#Correlation
corr.alpha <- chou_cor(aa.prop[,5], aa.prop[,1])
corr.beta <- chou_cor(aa.prop[,5], aa.prop[,2])
corr.apb <- chou_cor(aa.prop[,5], aa.prop[,3])
corr.adb <- chou_cor(aa.prop[,5], aa.prop[,4])
#Cosine Similarity
cos.alpha <- chou_cosine(aa.prop[,5], aa.prop[,1])
cos.beta <- chou_cosine(aa.prop[,5], aa.prop[,2])
cos.apb <- chou_cosine(aa.prop[,5], aa.prop[,3])
cos.adb <- chou_cosine(aa.prop[,5], aa.prop[,4])
#Euclidian Distance
aa.prop.flipped <- t(aa.prop)
dist.alpha <- dist((aa.prop.flipped[c(1,5),]), method = "euclidean")
dist.beta <- dist((aa.prop.flipped[c(2,5),]), method = "euclidean")
dist.apb <- dist((aa.prop.flipped[c(3,5),]), method = "euclidean")
dist.adb <- dist((aa.prop.flipped[c(4,5),]), method = "euclidean")
Compile all the data together and display:
fold.type <- c("alpha","beta","alpha plus beta", "alpha/beta")
corr.sim <- round(c(corr.alpha,corr.beta,corr.apb,corr.adb),5)
cosine.sim <- round(c(cos.alpha,cos.beta,cos.apb,cos.adb),5)
Euclidean.dist <- round(c(dist.alpha,dist.beta,dist.apb,dist.adb),5)
sim.sum <- c("","","most.sim","")
dist.sum <- c("","","min.dist","")
df <- data.frame(fold.type, corr.sim, cosine.sim, Euclidean.dist, sim.sum, dist.sum)
pander(df)
| fold.type | corr.sim | cosine.sim | Euclidean.dist | sim.sum | dist.sum |
|---|---|---|---|---|---|
| alpha | 0.7545 | 0.7545 | 0.1809 | ||
| beta | 0.7594 | 0.7594 | 0.1803 | ||
| alpha plus beta | 0.8092 | 0.8092 | 0.1555 | most.sim | min.dist |
| alpha/beta | 0.8007 | 0.8007 | 0.16 |
Calculate PID for Humans, Mice, Chimpanzees, and Bonobos:
pid_matrix <- matrix(nrow=4, ncol=4)
for(i in 1:4){
for(j in 1:4){
temp_align <- pairwiseAlignment(Cleaned_Sequences[i], Cleaned_Sequences[j])
pid_matrix[i,j] <- pid(temp_align)
}
}
Display the PID matrix:
pid_names<-c(Name_Sci[1:4])
colnames(pid_matrix) <- pid_names
rownames(pid_matrix) <- pid_names
pid_matrix
## Homo sapiens Mus musculus Pan troglodytes Pan paniscus
## Homo sapiens 100.00000 88.13559 94.40000 94.40000
## Mus musculus 88.13559 100.00000 88.13559 88.13559
## Pan troglodytes 94.40000 88.13559 100.00000 100.00000
## Pan paniscus 94.40000 88.13559 100.00000 100.00000
Calculating PID using different methods, demonstrated through comparing human and chimp PON2 proteins:
chimp.human.align <- pairwiseAlignment(Cleaned_Sequences[1,], Cleaned_Sequences[3,])
methods <- c("PID1","PID2","PID3","PID4")
chimpPID <- c(NA,NA,NA,NA)
for(i in 1:4){
chimpPID[i]<-pid(chimp.human.align, type = methods[i])
}
denominator <- c("aligned positions + internal gap positions","aligned positions","length shorter sequence","average length of the two sequences")
pid_data1 <- data.frame(methods, chimpPID, denominator)
pander(pid_data1)
| methods | chimpPID | denominator |
|---|---|---|
| PID1 | 94.4 | aligned positions + internal gap positions |
| PID2 | 100 | aligned positions |
| PID3 | 100 | length shorter sequence |
| PID4 | 97.12 | average length of the two sequences |
Calculating PID using different methods, demonstrated through comparing human and Zebrafish PON2 proteins:
fish.human.align <- pairwiseAlignment(Cleaned_Sequences[1,], Cleaned_Sequences[10,])
fishPID <- c(NA,NA,NA,NA)
for(i in 1:4){
fishPID[i]<-pid(fish.human.align, type = methods[i])
}
pid_data2 <- data.frame(methods, fishPID, denominator)
pander(pid_data2)
| methods | fishPID | denominator |
|---|---|---|
| PID1 | 53.93 | aligned positions + internal gap positions |
| PID2 | 54.55 | aligned positions |
| PID3 | 54.24 | length shorter sequence |
| PID4 | 54.16 | average length of the two sequences |
Build the MSA using all 10 PON2 protein sequences:
PON2_ss <- AAStringSet(Cleaned_Sequences)
names(PON2_ss) <- Name_Sci
PON2_msa <- msa(PON2_ss, method = "ClustalW")
## use default substitution matrix
Display the MSA:
class(PON2_msa) <- "AAMultipleAlignment"
PON2_align_seqinr <- msaConvert(PON2_msa, type = "seqinr::alignment")
print_msa(alignment = PON2_align_seqinr,
chunksize = 50)
## [1] "MAPPTELLARPERSSAPGSRAMGRLVAVGLLGIALA-LLGERLLALRNRL 0"
## [1] "MAPPTELLARPERSSAPGSRAMGRLVAVGLLGIALA-LLGERLLALRNRL 0"
## [1] "---------------------MGRLVAVGLLGIALA-LLGERLLALRNRL 0"
## [1] "MAPPTELLARPERGSARGSRAMGRLVAVGLLGIALA-LLGERLLALRNRL 0"
## [1] "---------------------MGRLLALSLLGIALA-LLGERLLALRNRL 0"
## [1] "---------------------MGRLLALSLLGIALA-LLGERLLALRNRL 0"
## [1] "---------------------MGRLLAVGLLGLALA-LLGERLLALRNRL 0"
## [1] "---------------------MGRMVALSLLGIGLA-LLGERFLALRSRL 0"
## [1] "---------------------MGKLLKVTLIGILLA-FIGERIVQFCHRA 0"
## [1] "---------------------MGTLAFLSLAVVAFAVLIGERLISLRHVA 0"
## [1] " "
## [1] "KASREVESVD-LPHCHLIKGIEAGSEDIDILPNGLAFFSVGLKFPGLHSF 0"
## [1] "KASREVESVD-LPHCHLIKGIEAGSEDIDILPNGLAFFSVGLKFPGLHSF 0"
## [1] "KASREVESVD-LPHCHLIKGIEAGSEDIDILPNGLAFFSVGLKFPGLHSF 0"
## [1] "KASREVESVD-LPHCHLIKGIEAGSEDIDILPNGLAFFSVGLKFPGLHSF 0"
## [1] "KASREVESVD-LPNCHLIKGIEAGAEDIDILPNGLAFFSVGLKCPGLHSF 0"
## [1] "KASREVESVD-LPNCHLIKGIEAGSEDIDILPSGLAFFSVGLKCPGLHSF 0"
## [1] "KASREVESVD-LPNCHLIKGIEAGADDIDILPNGLAFFSVGLKCPGLHSF 0"
## [1] "KASREVESVD-LPNCHLIKGIETGAEDIDILPNGLAFFSVGLKFPGLHSF 0"
## [1] "NAFRKVDPVDLLPNCQLLKGIEFGSEDIEILPNGLAFISSGLKYPGVMNF 0"
## [1] "LSYRELTQNY-LPNCNFIEGIDFGAEDITIL-DGLAFLSTGLKYPGVPSY 0"
## [1] " "
## [1] "APDKPGGILMMDLKEEKPRARELRISRGFDLASFNPHGISTFID-NDDTV 0"
## [1] "APDKPGGILMMDLKEEKPRARELRISRGFDLASFNPHGISTFID-NDDTV 0"
## [1] "APDKPGGILMMDLKEEKPRARELRISRGFDLASFNPHGISTFID-NDDTV 0"
## [1] "APDKPGGILMMDLKEEKPRARELRISRGFDLASFNPHGISTFID-NDDTV 0"
## [1] "APDKPGGILMMDLNEENPRALELRVSRGFNLASFNPHGISTFID-SDDTV 0"
## [1] "APDKPGGILMMDLKEENPRALELRISRGFNLASFNPHGISTFID-SDDTV 0"
## [1] "SPDKPGGILLMDLKKENPRALELRISRGFNLASFNPHGISTFID-SDDTV 0"
## [1] "APDKPGGILMMDLKDERPRALELRVSWGFDLASFNPHGISTFID-DDDTV 0"
## [1] "QPDKPGEIFLLDLNDEKLRPVPLRLSRGFDFSTFNPHGMSTYIDPKDDTV 0"
## [1] "SED-PGKIYTLNLLDSEQKIKVLHIRGDFDKDSFNPHGISVYTDDKDGAI 0"
## [1] " "
## [1] "YLFVVNHPEFKNTVEIFKFEEAENSLLHLKTVKHELLPSVNDITAVGPAH 0"
## [1] "YLFVVNHPEFKNTVEIFKFEEAENSLLHLKTVKHELLPSVNDITAVGPAH 0"
## [1] "YLFVVNHPEFKNTVEIFKFEEAENSLLHLKTVKHELLPSVNDITAVGPAH 0"
## [1] "YLFVVNHPEFKNTVEIFKFEEAENSLLHLKTVKHELLPSVNDITAVGPAH 0"
## [1] "YLFVVNHPEFKNTVEIFKFEEEENSLLHLKTIKHELLPSVNDIIAVGPEH 0"
## [1] "YLFVVNHPEFKNTVEIFKFEEEENSLLHLKTIKHELLP------------ 0"
## [1] "YLFVVNHPEFKNTVEIFKFEEEENSLLHLKTIKHELLPSVNDIIAVGPAH 0"
## [1] "YLFVVNHPQFKSTVEIFKFQEEENSLLHLKTIKHELLPSVNDIIAVGPTH 0"
## [1] "YLFVVNHPLYKTTIELFKFEEEENVLLHLKTIKHDLMWSANDIVAVGPES 0"
## [1] "YLFVVNHPQGKSQVEIFRFLENENALEYLKTIRHELLHNVNDIVAVGTES 0"
## [1] " "
## [1] "FYATNDHYFSDPFLKYLETYLNLHWANVVYYSPNEVKVVAEGFDSANGIN 0"
## [1] "FYATNDHYFSDPFLKYLETYLNLHWANVVYYSPNEVKVVAEGFDSANGIN 0"
## [1] "FYATNDHYFSDPFLKYLETYLNLHWANVVYYSPNEVKVVAEGFDSANGIN 0"
## [1] "FYATNDHYFSDPFLKYLETYLNLHWANVVYYSPNEVKVVAEGFDSANGIN 0"
## [1] "FYATNDHYFSDPFLKYLETYLNLHWTNVVYYSPNEVKVVAEGFDSANGIN 0"
## [1] "-------------------------------------------------- 0"
## [1] "FYATNDHYFSDPFLKYLETYLNLHWANVVYYSPDEVKVVAEGFDAANGIN 0"
## [1] "FYATNDHYFSDPFLKYLETYLNLHWANVVYYSPEEVKLVAEGFDSANGIN 0"
## [1] "FYTTNDLYFTDFTMRQLEIFLGIAWSNVIYYSPTEVKQVSSGYYYANGIA 0"
## [1] "FYATNDHYFTNDILKIVEPFLSLPWCDVVYYSPETVQVVAGGFLSANGIN 0"
## [1] " "
## [1] "ISPDDKYIYVADILAHEIHVLEKHTNMNLTQLKVLELDTLVDNLSIDPSS 0"
## [1] "ISPDDKYIYVADILAHEIHVLEKHTNMNLTQLKVLELDTLVDNLSIDPSS 0"
## [1] "ISPDDKYIYVADILAHEIHVLEKHTNMNLTQLKVLELDTLVDNLSIDPSS 0"
## [1] "ISPDDKYIYVADILAHEIHVLEKHTNMNLTQLKVLELDTLVDNLSIDPSS 0"
## [1] "ISPDKKYIYVADILAHEIHVLEKHPNMNLTQLKVLKLDTLVDNLSIDPSS 0"
## [1] "-----RYIYVADILAHEIHVLEKQPNMNLTQLKVLELDTLVDNISIDPSS 0"
## [1] "ISPDKKYIYVADILAHEIHVLEKHPNMNLTQLKVLKLDTLVDNLSIDPSS 0"
## [1] "ISPDKKYVYVADILAHEIHVLEKQPNMNLTQLKVLQLGTLVDNLSIDPSS 0"
## [1] "MSTDNKYIYVADIMGHTIDILEKQADWSLTPVKVLKLDTLLDNLFVDPNT 0"
## [1] "ISPDKRHLYVSHILKHTIAVLEIQKNTVLSHVKEIDVGSLCDNIEVDRET 0"
## [1] " "
## [1] "GDIWVGCHPNGQKLFVYDPNNPPSSEVLRIQNILSEKPTVTTVYANNGSV 0"
## [1] "GDIWVGCHPNGQKLFVYDPNNPPSSEVLRIQNILSEKPTVTTVYANNGSV 0"
## [1] "GDIWVGCHPNGQKLFVYDPNNPPSSEVLRIQNILSEKPTVTTVYANNGSV 0"
## [1] "GDIWVGCHPNGQKLFVYDPNNPPSSEVLRIQNILSEKPTVTTVYANNGSV 0"
## [1] "GDVLVGCHPNGQKLFVYDPKNPPSSEVLRIQNILSEKPTVTTVYANNGSV 0"
## [1] "GDILVGCHPNGQKLFVYDPNNPPSSEVLRIQNILSEKPTVTTVYANNGSI 0"
## [1] "GDILVGCHPNGQKLFIYDPNNPPSSEVLRIQNILSEKPTVTTVYANNGSV 0"
## [1] "GDIWVGCHPNGQRLFVYHPNHPPASEVLRIQNILSEKPSVTTVYINNGSV 0"
## [1] "GDIWTGAHPNGWKLFSYNSDDLPGSEVIRVQNIHSDNPIVTQVYVNNGSV 0"
## [1] "GDLWIGCHPNGLKCVFHDPNDPPGSEVIRIENILSEKPQVTQVYSDDGSV 0"
## [1] " "
## [1] "LQGSSVASVYDGKLLIGTLYHRALYCEL- 21"
## [1] "LQGSSVASVYDGKLLIGTLYHRALYCEL- 21"
## [1] "LQGSSVASVYDGKLLIGTLYHRALYCEL- 21"
## [1] "LQGSSVASVYDGKLLIGTLYHRALYCEL- 21"
## [1] "LQGSSVASVYDKKLLIGTLYHRALYCEL- 21"
## [1] "LQGSSVASLYDRKLLIGTLYHRALYCEL- 21"
## [1] "LQGSSVASVYDRKLLIGTLYHRALYCEL- 21"
## [1] "LQGSSVATIYDRKLLVGTLYQKALYCEL- 21"
## [1] "IQASSSAAVYEGKLLIGTVFHKALCCELS 21"
## [1] "IIASSVAAPYREKLLIGTVYQKALICDLK 21"
## [1] " "
#ggmsa(PON2_msa,start=160,end=260)
Building a Distance Matrix with all sequences for use in constructing a phylogenetic tree:
PON2_dist <- dist.alignment(PON2_align_seqinr, matrix = "identity")
PON2_dist_round <- round(PON2_dist, 3)
PON2_dist_round
## Pan troglodytes Pan paniscus Homo sapiens
## Pan paniscus 0.000
## Homo sapiens 0.000 0.000
## Gorilla gorilla 0.073 0.073 0.000
## Bos taurus 0.260 0.260 0.260
## Sus scrofa 0.264 0.264 0.264
## Canis lupis familiaris 0.260 0.260 0.260
## Mus musculus 0.344 0.344 0.344
## Xenopus laevis 0.618 0.618 0.618
## Danio rerio 0.674 0.674 0.674
## Gorilla gorilla Bos taurus Sus scrofa
## Pan paniscus
## Homo sapiens
## Gorilla gorilla
## Bos taurus 0.260
## Sus scrofa 0.264 0.213
## Canis lupis familiaris 0.260 0.219 0.236
## Mus musculus 0.344 0.332 0.354
## Xenopus laevis 0.618 0.618 0.619
## Danio rerio 0.674 0.668 0.683
## Canis lupis familiaris Mus musculus Xenopus laevis
## Pan paniscus
## Homo sapiens
## Gorilla gorilla
## Bos taurus
## Sus scrofa
## Canis lupis familiaris
## Mus musculus 0.349
## Xenopus laevis 0.620 0.633
## Danio rerio 0.672 0.661 0.701
Making a rooted phylogenetic tree using all PON2 sequences:
PON2_tree <- nj(PON2_dist_round)
plot.phylo(PON2_tree, use.edge.length = F, main = "PON2 Protein Family Tree")