Introduction
A disintegrin and metalloproteinase domain-containing protein is a protein involved in numerous biological processes such as cell to cell and cell matrix interaction, including fertilization, muscle development, and neurogenesis. The purpose of the project is to give an in depth analysis of ADAM 17 in Homo-sapiens and other organsims.
Preparation
Load following packages:
# github packages
library(compbio4all)
library(ggmsa)
# CRAN packages
library(rentrez)
library(seqinr)
library(ape)
library(pander)
library(ggplot2)
# Bioconductor packages
library(BiocManager)
library(drawProteins)
library(msa)
# Biostrings
library(Biostrings)
library(HGNChelper)
Accession Numbers
Refseq <- c("NP_003174", "XP_515293","XP_002799185.1","XP_005630166.1","XP_002691532.1", "NP_033745.4", "NP_064702.1", "NP_001008682.1", "NP_001182159.1", "NP_733334.1")
Uniprot <- c( "P78536", "H2R900", "I0FUK6", "A0A5F4CDW3", "E1B867", "Q9Z0F8", "Q9Z1K9", "Q5QHR9", "E2IFV1" ,"Q9VAC5")
PDB <- c("2M2F", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A", "N/A")
Scien_Name <- c("Homo Sapiens", "Pan Troglodytes", "Macaca Mulatta", "Canis Lupus", "Bos Taurus", "Mus Musculus", "Rattus Norvegicus", "Gallus Gallus", "Xenopus Tropicalis", "Drosophilia Melanogaster")
Common_Name <- c( "Human", "Chimpanzee", "Rhesus Monkey", "Dog", "Cattle", "House Mouse", "Norway Rat","Chicken", "Tropical Frog", "Fruit Fly" )
Gene <- c("ADAM17", "ADAM17", "ADAM17", "ADAM17", "ADAM17", "ADAM17", "ADAM17", "ADAM17", "ADAM17", "TACE" )
Adam17.df <- data.frame( NCBI.Protein.Accession = Refseq,
Uniprot.ID = Uniprot,
PDB = PDB,
Species = Scien_Name,
Common.Name = Common_Name,
Gene = Gene)
pander::pander(Adam17.df)
Table continues below
| NP_003174 |
P78536 |
2M2F |
Homo Sapiens |
| XP_515293 |
H2R900 |
N/A |
Pan Troglodytes |
| XP_002799185.1 |
I0FUK6 |
N/A |
Macaca Mulatta |
| XP_005630166.1 |
A0A5F4CDW3 |
N/A |
Canis Lupus |
| XP_002691532.1 |
E1B867 |
N/A |
Bos Taurus |
| NP_033745.4 |
Q9Z0F8 |
N/A |
Mus Musculus |
| NP_064702.1 |
Q9Z1K9 |
N/A |
Rattus Norvegicus |
| NP_001008682.1 |
Q5QHR9 |
N/A |
Gallus Gallus |
| NP_001182159.1 |
E2IFV1 |
N/A |
Xenopus Tropicalis |
| NP_733334.1 |
Q9VAC5 |
N/A |
Drosophilia Melanogaster |
| Human |
ADAM17 |
| Chimpanzee |
ADAM17 |
| Rhesus Monkey |
ADAM17 |
| Dog |
ADAM17 |
| Cattle |
ADAM17 |
| House Mouse |
ADAM17 |
| Norway Rat |
ADAM17 |
| Chicken |
ADAM17 |
| Tropical Frog |
ADAM17 |
| Fruit Fly |
TACE |
Data Preparation
HomoSapien_fasta <- rentrez::entrez_fetch(db = "protein",
id = "NP_003174",
rettype = "fasta")
Chimp_fasta <- rentrez::entrez_fetch(db = "protein",
id = "XP_515293 ",
rettype = "fasta")
Rmonkey_fasta <- rentrez::entrez_fetch(db = "protein",
id = " XP_002799185.1",
rettype = "fasta")
Dog_fasta <- rentrez::entrez_fetch(db = "protein",
id = " XP_005630166.1",
rettype = "fasta")
Cattle_fasta <- rentrez::entrez_fetch(db = "protein",
id = "XP_002691532.1",
rettype = "fasta")
Mouse_fasta <- rentrez::entrez_fetch(db = "protein",
id = "NP_033745.4",
rettype = "fasta")
Rat_fasta <- rentrez::entrez_fetch(db = "protein",
id = " NP_064702.1",
rettype = "fasta")
Chicken_fasta <- rentrez::entrez_fetch(db = "protein",
id = "NP_001008682.1",
rettype = "fasta")
Tropfrog_fasta <- rentrez::entrez_fetch(db = "protein",
id = " NP_001182159.1",
rettype = "fasta")
Drosophilia_fasta <- rentrez::entrez_fetch(db = "protein",
id = " NP_733334.1",
rettype = "fasta")
Adam17_list <- list(HomoSapien_fasta,Chimp_fasta, Rmonkey_fasta,Dog_fasta, Cattle_fasta, Mouse_fasta, Rat_fasta, Chicken_fasta, Tropfrog_fasta, Drosophilia_fasta)
for(i in 1:length(Adam17_list)){
Adam17_list[[i]] <- fasta_cleaner(Adam17_list[[i]], parse = F)
}
Adam17_vector <- rep(NA, length(Adam17_list))
names(Adam17_vector) <- names(Adam17_list)
for(i in 1:length(Adam17_vector)){
Adam17_vector[i] <- Adam17_list[[i]]
}
Protein Diagram
Homosap_json <- drawProteins::get_features("P78536")
## [1] "Download has worked"
is(Homosap_json)
## [1] "list" "vector" "list_OR_List" "vector_OR_Vector"
## [5] "vector_OR_factor"
my_prot_df <- drawProteins::feature_to_dataframe(Homosap_json)
is(my_prot_df)
## [1] "data.frame" "list" "oldClass" "vector"
## [5] "list_OR_List" "vector_OR_Vector" "vector_OR_factor"
my_canvas <- draw_canvas(my_prot_df)
my_canvas <- draw_chains(my_canvas, my_prot_df,
label_size = 2.5)
my_canvas <- draw_domains(my_canvas, my_prot_df)
my_canvas
## Dotplot
par(mfrow = c(2,2),
mar = c(0,0,2,1))
HomoSapien_vector <- fasta_cleaner(HomoSapien_fasta)
dotPlot(HomoSapien_vector, HomoSapien_vector,
wsize = 13.5,
nmatch = 3,
main = "Human")
par(mfrow = c(1,1),
mar = c(4,4,4,4))

Protein Properties Compiled From Databases
Descript <- c("ADAM17 is a single pass type I membrane protein","AlphaFold predicts with high confidence that Adam17 contains a mix of alpha helices and beta sheets" )
Source <- c("Uniprot", "AlphaFold")
Links <- c("https://www.uniprot.org/uniprot/P78536","https://alphafold.ebi.ac.uk/entry/O77636")
Property.df <- data.frame(Source = Source,
ProteinProperty = Descript,
Links = Links)
pander::pander(Property.df)
Table continues below
| Uniprot |
ADAM17 is a single pass type I membrane protein |
| AlphaFold |
AlphaFold predicts with high confidence that Adam17 contains a mix of alpha helices and beta sheets |
Prediction of Protein Fold
aa.1 <- c("A","R","N","D","C","Q","E","G","H","I",
"L","K","M","F","P","S","T","W","Y","V")
#From Chou Data
alpha <- c(285, 53, 97, 163, 22, 67, 134, 197, 111, 91,
221, 249, 48, 123, 82, 122, 119, 33, 63, 167)
beta <- c(203, 67, 139, 121, 75, 122, 86, 297, 49, 120,
177, 115, 16, 85, 127, 341, 253, 44, 110, 229)
a.plus.b <- c(175, 78, 120, 111, 74, 74, 86, 171, 33, 93,
110, 112, 25, 52, 71, 126, 117, 30, 108, 123)
a.div.b <- c(361, 146, 183, 244, 63, 114, 257, 377, 107, 239,
339, 321, 91, 158, 188, 327, 238, 72, 130, 378)
alpha.prop <- alpha/sum(alpha)
beta.prop <- beta/sum(beta)
a.plus.b.prop <- a.plus.b/sum(a.plus.b)
a.div.b <- a.div.b/sum(a.div.b)
aa.prop <- data.frame(
alpha.prop,
beta.prop,
a.plus.b.prop,
a.div.b)
row.names(aa.prop) <- aa.1
pander::pander(aa.prop)
| A |
0.1165 |
0.07313 |
0.09264 |
0.08331 |
| R |
0.02166 |
0.02414 |
0.04129 |
0.03369 |
| N |
0.03964 |
0.05007 |
0.06353 |
0.04223 |
| D |
0.06661 |
0.04359 |
0.05876 |
0.05631 |
| C |
0.008991 |
0.02702 |
0.03917 |
0.01454 |
| Q |
0.02738 |
0.04395 |
0.03917 |
0.02631 |
| E |
0.05476 |
0.03098 |
0.04553 |
0.05931 |
| G |
0.08051 |
0.107 |
0.09052 |
0.08701 |
| H |
0.04536 |
0.01765 |
0.01747 |
0.02469 |
| I |
0.03719 |
0.04323 |
0.04923 |
0.05516 |
| L |
0.09031 |
0.06376 |
0.05823 |
0.07824 |
| K |
0.1018 |
0.04143 |
0.05929 |
0.07408 |
| M |
0.01962 |
0.005764 |
0.01323 |
0.021 |
| F |
0.05027 |
0.03062 |
0.02753 |
0.03646 |
| P |
0.03351 |
0.04575 |
0.03759 |
0.04339 |
| S |
0.04986 |
0.1228 |
0.0667 |
0.07547 |
| T |
0.04863 |
0.09114 |
0.06194 |
0.05493 |
| W |
0.01349 |
0.01585 |
0.01588 |
0.01662 |
| Y |
0.02575 |
0.03963 |
0.05717 |
0.03 |
| V |
0.06825 |
0.08249 |
0.06511 |
0.08724 |
HomoSapien_fasta <- rentrez::entrez_fetch(db = "protein",
id = "NP_003174",
rettype = "fasta")
HomoSapien_vector <- compbio4all:: fasta_cleaner(HomoSapien_fasta, parse = TRUE)
homosapien.freq.table <- table(HomoSapien_vector)/length(HomoSapien_vector)
table_to_vector <- function(table_x){
table_names <- attr(table_x, "dimnames")[[1]]
table_vect <- as.vector(table_x)
names(table_vect) <- table_names
return(table_vect)
}
Adam17.human.aa.freq <- table_to_vector(homosapien.freq.table)
aa.prop$Adam17.human.aa.freq <- Adam17.human.aa.freq
Adam17.human.aa.freq
## A C D E F G
## 0.046116505 0.042475728 0.078883495 0.070388350 0.043689320 0.054611650
## H I K L M N
## 0.024271845 0.042475728 0.075242718 0.076456311 0.018203883 0.050970874
## P Q R S T V
## 0.055825243 0.040048544 0.047330097 0.077669903 0.049757282 0.066747573
## W Y
## 0.007281553 0.031553398
aa.names <- names(Adam17.human.aa.freq)
i.U <- which(aa.names == "U")
aa.names[i.U]
## character(0)
Adam17.human.aa.freq[i.U]
## named numeric(0)
aa.prop$Adam.human.aa.freq <- Adam17.human.aa.freq
pander::pander(aa.prop)
Table continues below
| A |
0.1165 |
0.07313 |
0.09264 |
0.08331 |
| R |
0.02166 |
0.02414 |
0.04129 |
0.03369 |
| N |
0.03964 |
0.05007 |
0.06353 |
0.04223 |
| D |
0.06661 |
0.04359 |
0.05876 |
0.05631 |
| C |
0.008991 |
0.02702 |
0.03917 |
0.01454 |
| Q |
0.02738 |
0.04395 |
0.03917 |
0.02631 |
| E |
0.05476 |
0.03098 |
0.04553 |
0.05931 |
| G |
0.08051 |
0.107 |
0.09052 |
0.08701 |
| H |
0.04536 |
0.01765 |
0.01747 |
0.02469 |
| I |
0.03719 |
0.04323 |
0.04923 |
0.05516 |
| L |
0.09031 |
0.06376 |
0.05823 |
0.07824 |
| K |
0.1018 |
0.04143 |
0.05929 |
0.07408 |
| M |
0.01962 |
0.005764 |
0.01323 |
0.021 |
| F |
0.05027 |
0.03062 |
0.02753 |
0.03646 |
| P |
0.03351 |
0.04575 |
0.03759 |
0.04339 |
| S |
0.04986 |
0.1228 |
0.0667 |
0.07547 |
| T |
0.04863 |
0.09114 |
0.06194 |
0.05493 |
| W |
0.01349 |
0.01585 |
0.01588 |
0.01662 |
| Y |
0.02575 |
0.03963 |
0.05717 |
0.03 |
| V |
0.06825 |
0.08249 |
0.06511 |
0.08724 |
| A |
0.04612 |
0.04612 |
| R |
0.04248 |
0.04248 |
| N |
0.07888 |
0.07888 |
| D |
0.07039 |
0.07039 |
| C |
0.04369 |
0.04369 |
| Q |
0.05461 |
0.05461 |
| E |
0.02427 |
0.02427 |
| G |
0.04248 |
0.04248 |
| H |
0.07524 |
0.07524 |
| I |
0.07646 |
0.07646 |
| L |
0.0182 |
0.0182 |
| K |
0.05097 |
0.05097 |
| M |
0.05583 |
0.05583 |
| F |
0.04005 |
0.04005 |
| P |
0.04733 |
0.04733 |
| S |
0.07767 |
0.07767 |
| T |
0.04976 |
0.04976 |
| W |
0.06675 |
0.06675 |
| Y |
0.007282 |
0.007282 |
| V |
0.03155 |
0.03155 |
Calculating Similarities
chou_cor <- function(x,y){
numerator <- sum(x*y)
denominator <- sqrt((sum(x^2))*(sum(y^2)))
result <- numerator/denominator
return(result)
}
chou_cosine <- function(z.1, z.2){
z.1.abs <- sqrt(sum(z.1^2))
z.2.abs <- sqrt(sum(z.2^2))
my.cosine <- sum(z.1*z.2)/(z.1.abs*z.2.abs)
return(my.cosine)
}
par(mfrow = c(2,2), mar = c(1,4,1,1))
plot(alpha.prop ~ Adam17.human.aa.freq, data = aa.prop)
plot(beta.prop ~ Adam17.human.aa.freq, data = aa.prop)
plot(a.plus.b.prop ~ Adam17.human.aa.freq, data = aa.prop)
plot(a.div.b ~ Adam17.human.aa.freq, data = aa.prop)

corr.alpha <- chou_cor(aa.prop[,5], aa.prop[,1])
corr.beta <- chou_cor(aa.prop[,5], aa.prop[,2])
corr.apb <- chou_cor(aa.prop[,5], aa.prop[,3])
corr.adb <- chou_cor(aa.prop[,5], aa.prop[,4])
cos.alpha <- chou_cosine(aa.prop[,5], aa.prop[,1])
cos.beta <- chou_cosine(aa.prop[,5], aa.prop[,2])
cos.apb <- chou_cosine(aa.prop[,5], aa.prop[,3])
cos.adb <- chou_cosine(aa.prop[,5], aa.prop[,4])
aa.prop.flipped <- t(aa.prop)
round(aa.prop.flipped,2)
## A R N D C Q E G H I L
## alpha.prop 0.12 0.02 0.04 0.07 0.01 0.03 0.05 0.08 0.05 0.04 0.09
## beta.prop 0.07 0.02 0.05 0.04 0.03 0.04 0.03 0.11 0.02 0.04 0.06
## a.plus.b.prop 0.09 0.04 0.06 0.06 0.04 0.04 0.05 0.09 0.02 0.05 0.06
## a.div.b 0.08 0.03 0.04 0.06 0.01 0.03 0.06 0.09 0.02 0.06 0.08
## Adam17.human.aa.freq 0.05 0.04 0.08 0.07 0.04 0.05 0.02 0.04 0.08 0.08 0.02
## Adam.human.aa.freq 0.05 0.04 0.08 0.07 0.04 0.05 0.02 0.04 0.08 0.08 0.02
## K M F P S T W Y V
## alpha.prop 0.10 0.02 0.05 0.03 0.05 0.05 0.01 0.03 0.07
## beta.prop 0.04 0.01 0.03 0.05 0.12 0.09 0.02 0.04 0.08
## a.plus.b.prop 0.06 0.01 0.03 0.04 0.07 0.06 0.02 0.06 0.07
## a.div.b 0.07 0.02 0.04 0.04 0.08 0.05 0.02 0.03 0.09
## Adam17.human.aa.freq 0.05 0.06 0.04 0.05 0.08 0.05 0.07 0.01 0.03
## Adam.human.aa.freq 0.05 0.06 0.04 0.05 0.08 0.05 0.07 0.01 0.03
dist(aa.prop.flipped, method = "euclidean")
## alpha.prop beta.prop a.plus.b.prop a.div.b
## beta.prop 0.13342098
## a.plus.b.prop 0.09281824 0.08289406
## a.div.b 0.06699039 0.08659174 0.06175113
## Adam17.human.aa.freq 0.16845806 0.16242520 0.14086756 0.14971043
## Adam.human.aa.freq 0.16845806 0.16242520 0.14086756 0.14971043
## Adam17.human.aa.freq
## beta.prop
## a.plus.b.prop
## a.div.b
## Adam17.human.aa.freq
## Adam.human.aa.freq 0.00000000
dist.alpha <- dist((aa.prop.flipped[c(1,5),]), method = "euclidean")
dist.beta <- dist((aa.prop.flipped[c(2,5),]), method = "euclidean")
dist.apb <- dist((aa.prop.flipped[c(3,5),]), method = "euclidean")
dist.adb <- dist((aa.prop.flipped[c(4,5),]), method = "euclidean")
fold.type <- c("alpha","beta","alpha plus beta", "alpha/beta")
# data
corr.sim <- round(c(corr.alpha,corr.beta,corr.apb,corr.adb),5)
cosine.sim <- round(c(cos.alpha,cos.beta,cos.apb,cos.adb),5)
Euclidean.dist <- round(c(dist.alpha,dist.beta,dist.apb,dist.adb),5)
# summary
sim.sum <- c("","","most.sim","")
dist.sum <- c("","","min.dist","")
df <- data.frame(fold.type,
corr.sim ,
cosine.sim ,
Euclidean.dist ,
sim.sum ,
dist.sum )
pander::pander(df)
| alpha |
0.774 |
0.774 |
0.1685 |
|
|
| beta |
0.7939 |
0.7939 |
0.1624 |
|
|
| alpha plus beta |
0.8304 |
0.8304 |
0.1409 |
most.sim |
min.dist |
| alpha/beta |
0.8124 |
0.8124 |
0.1497 |
|
|
Percent Identity Comparisons (PID)
HomoSapien_vector <- fasta_cleaner(HomoSapien_fasta, parse =F)
Chimp_vector <- fasta_cleaner(Chimp_fasta, parse = F)
Rmonkey_vector <- fasta_cleaner(Rmonkey_fasta, parse = F)
Dog_vector <- fasta_cleaner(Dog_fasta, parse = F)
align1.2 <- Biostrings::pairwiseAlignment(HomoSapien_vector, Chimp_vector)
align1.3 <- Biostrings::pairwiseAlignment(HomoSapien_vector, Rmonkey_vector)
align1.4 <- Biostrings::pairwiseAlignment(HomoSapien_vector, Dog_vector)
align2.3 <- Biostrings::pairwiseAlignment(Chimp_vector, Rmonkey_vector)
align2.4 <- Biostrings::pairwiseAlignment(Chimp_vector, Dog_vector)
align3.4 <- Biostrings::pairwiseAlignment(Rmonkey_vector, Dog_vector)
Biostrings::pid(align1.2)
## [1] 100
Biostrings::pid(align1.3)
## [1] 85.80097
Biostrings::pid(align1.4)
## [1] 93.95405
pids <- c(1, NA, NA, NA,
pid(align1.2), 1, NA, NA,
pid(align1.3), pid(align2.3), 1, NA,
pid(align1.4), pid(align2.4), pid(align3.4), 1)
mat <- matrix(pids, nrow = 4, byrow = T)
row.names(mat) <- c("Homo","Chimpanzee","Monkey","Dog")
colnames(mat) <- c("Homo","Chimpanzee","Monkey","Dog")
pander::pander(mat)
| Homo |
1 |
NA |
NA |
NA |
| Chimpanzee |
100 |
1 |
NA |
NA |
| Monkey |
85.8 |
85.8 |
1 |
NA |
| Dog |
93.95 |
93.95 |
81.02 |
1 |
PID Methods Comparison
Biostrings::pid(align1.2, type = "PID1")
## [1] 100
Biostrings::pid(align1.2, type = "PID2")
## [1] 100
Biostrings::pid(align1.2, type = "PID3")
## [1] 100
Biostrings::pid(align1.2, type = "PID4")
## [1] 100
Method <- c("PID1", "PID2", "PID3", "PID4")
PID <- c("100", "100", "100", "100")
Denominator <- c( "alligned position + internal gap positions", "aligned postions", "length shorter sequence", "average length of the two sequences")
diff_pid.df <- data.frame(Method = Method,
PID = PID,
Denominator = Denominator)
pander::pander(diff_pid.df)
| PID1 |
100 |
alligned position + internal gap positions |
| PID2 |
100 |
aligned postions |
| PID3 |
100 |
length shorter sequence |
| PID4 |
100 |
average length of the two sequences |
MSA Data Preperation
for(i in 1:length(Adam17_list)){
Adam17_list[[i]] <- fasta_cleaner(Adam17_list[[i]], parse = F)
}
# make a vector to hold each sequence
Adam_vector <- rep(NA, length(Adam17_list))
# name the vector (this makes ggmsa happy)
names(Adam17_vector) <- names(Adam17_list)
# extract the sequences from list and put into vector
for(i in 1:length(Adam17_vector)){
Adam17_vector[i] <- Adam17_list[[i]]
}
Adam17_vector_ss <- Biostrings::AAStringSet(Adam17_vector)
Adam17_align <- msa(Adam17_vector_ss,
method = "ClustalW")
## use default substitution matrix
class(Adam17_align)
## [1] "MsaAAMultipleAlignment"
## attr(,"package")
## [1] "msa"
is(Adam17_align)
## [1] "MsaAAMultipleAlignment" "AAMultipleAlignment" "MsaMetaData"
## [4] "MultipleAlignment"
class(Adam17_align) <- "AAMultipleAlignment"
Adam17_align_seqinr <- msaConvert(Adam17_align,
type = "seqinr::alignment")
compbio4all::print_msa(Adam17_align_seqinr)
## [1] "-----------MRQSLLFLTSVVPFVLAPRPPDDPGFGPHQRLEKLDSLLSDYDILSLSN 0"
## [1] "-----------MRQSLLFLTSVVPFVLAPRPPDDPGFGPHQRLEKLDSLLSDYDILSLSN 0"
## [1] "-----------MRQSLLFLTSVVPFVLAPRPPDDPSFGPHQRLVP--------------- 0"
## [1] "-----------MRRCVLFWTSVVPLVLAPRPPEQ----PPQRLENLDSLLSDYDILSLSN 0"
## [1] "-----------MRHRVLLLTSLVHLVLAPRPPDER--ESPRRFEKLDSLLSDYDILSLSN 0"
## [1] "-----------MRRRLLILTTLVPFVLAPRPPEEAGSGSHPRLEKLDSLLSDYDILSLAN 0"
## [1] "-----------MRQRLLFLTTLVPFVLAPRPPEEPGSGSHLRLEKLDSLLSDYDILSLSN 0"
## [1] "-----------MRLRLWLVSALALLWAPGGLQALGRLPREQRYDAVESMLSNYDILSQSS 0"
## [1] "MKVLRLMGQLSLVEVIFALLLCLVVAEADHIPFGSQKEMPAKHQGLDSLLSEYDVLSLSS 0"
## [1] "-----------------MFTKCISCCGLAIISVFFACLFVENCAALQKTLRHYEIFHKDD 0"
## [1] " "
## [1] "IQQHSVRKRDLQTSTHVETL--LTFSALKRHFKLYLTSSTERFSQNFKVVVVDGK-NESE 0"
## [1] "IQQHSVRKRDLQTSTHVETL--LTFSALKRHFKLYLTSSTERFSQNFKVVVVDGK-NESE 0"
## [1] "----------------------------ERHFKLYLTSSTERFSQNFKVVVVDGK-NESE 0"
## [1] "IQQHSVRKRDLQASTHIETL--LTFSALKRHFKLYLTSSTERFSQNFKVVVVDGK-DESE 0"
## [1] "IQQHSVRKRELQASTHLETL--LTFSALKRHFKLYLTSSTERFSKNFKVVVVDGK-DERE 0"
## [1] "IQQHSIRKRDLQSATHLETL--LTFSALKRHFKLYLTSSTERFSQNLRVVVVDGK-EESE 0"
## [1] "IQQHSIRKRDLQSATHLETL--LTFSALKRHFKLYLTSSTERFSQNLRVVVVDGK-EESE 0"
## [1] "IQQHSLKKRDLQPETHVERL--LSFSALQRHFKLYLTATAEHFSERFQALIVDGEGKEKE 0"
## [1] "IQQHSLRKRDLQSQSQLERM--LSFTALQRHFKLYLTSSTELVSENLEALVLDGNGKEKK 0"
## [1] "VVHRVVKRGAKHSTNPFNTIKEVEFTTLGKNFRLILHPHRDVLHSKFRAYAVDADGNETV 0"
## [1] " "
## [1] "YTVKWQDFFTGHVVGEPDSRVLAHIRDDDVIIRINTDGAEYNIEPLWRFVNDTKDKRMLV 0"
## [1] "YTVKWQDFFTGHVVGEPDSRVLAHIRDDDVIIRINTDGAEYNIEPLWRFVNDTKDKRMLV 0"
## [1] "YTVKWQDFFTGHVVGEPDSRVLAHIRDDDVIIRINTDGAEYNIEPLWRFVNDTKDKRMLV 0"
## [1] "YTVKWQDFFSGHVVGEPDSRVLAHIGDDDMTIRINTDGAEYNIEPLWRLINDTKDKRMLV 0"
## [1] "YPVKWQDFFSGHVVGEPDSRVLAHIGDDDITIRINTDGAEYNIEPLWRLINDTKDKRMLV 0"
## [1] "YSVKWQNFFSGHVVGEPDSRVLAHIGDDDVTVRINTDGAEYNVEPLWRFVNDTKDKRMLV 0"
## [1] "YSVKWQDFFSGHVVGEPDSRVLAHIGDDDVTVRINTDGAEYNIEPLWRFVNDTKDKRMLV 0"
## [1] "YRVQWQDFFTGHVVGEHNSKVVAHIGDEDFTVRINTDGEEYNIEPLWRFIDNVQDERLLV 0"
## [1] "YQVKWSDFFTGHVVGEHNSKVLAHIGDGDFTARITTDGEEYNIEPLWRFVENTSGDNMLV 0"
## [1] "VHMDHDSFYSGRVFGELESSVRAHIEDGTMTMSIHLPEETYHIEPSWRHLPEAKKDTMVA 0"
## [1] " "
## [1] "YKSEDIKNVSRLQ--SPKVCGYLKVDNEELLP-KGLVDREPPEELVHRVKRR---ADPDP 0"
## [1] "YKSEDIKNVSRLQ--SPKVCGYLKVDNEELLP-KGLVDREPPEELVHRVKRR---ADPDP 0"
## [1] "YKSEDIKNVSRLQ--SPKVCGYLKVDNEELLP-KGLVDRGPPEELVHRVKRR---ADPDP 0"
## [1] "YKSEDIKNVSRLQ--SPKVCGYIKADNEELLP-KGLVDREPPDEFVHRVKRR---ADPNP 0"
## [1] "YKSEDIKNVSRLQ--SPKVCGYIKANNEELLP-KGLVDSEPPDELVHRVKRR---ADPNP 0"
## [1] "YKSEDIKDFSRLQ--SPKVCGYLNADSEELLP-KGLIDREPSEEFVRRVKRR---AEPNP 0"
## [1] "YKSEDIKDFSRLQ--SPKVCGYLNADSEELLP-KGLIDREPSEEFVRRVKRR---AEPNP 0"
## [1] "YRSEDIKDFSRLQ--SPKVCGYLKLNEDELLP-KGLEESKQNEASIHREKR----AIPES 0"
## [1] "YRSGDIKDFSRLK--SSKVCGYIKPKEDLFKNEQDTAESAEGDDYLHREKRQR--AGPDS 0"
## [1] "YKASDVKVHKNEAGATPKTCGYIKEG-LELED-KEHGDTLDN-ELHTREKRQSDQYEYTP 0"
## [1] " "
## [1] "MKNTCKLLVVADHRFYRYMGRGEESTTTNYLIELIDRVDDIYRNTSWDN----AGFKGYG 0"
## [1] "MKNTCKLLVVADHRFYRYMGRGEESTTTNYLIELIDRVDDIYRNTSWDN----AGFKGYG 0"
## [1] "MKNTCKLLVVADHRFYRYMGRGEESTTTNYLIELIDRVDDIYRNTSWDN----AGFKGYG 0"
## [1] "LKNTCKLLVVADHRFYRYMGRGEESTTTNYLIELIDRVDDIYRNTSWDN----AGFKGYG 0"
## [1] "LKNTCKLLVVADHRFYKYMGRGEESTTTNYLIELIDRVDDIYRNTSWDN----AGFKGYG 0"
## [1] "LKNTCKLLVVADHRFYKYMGRGEESTTTNYLIELIDRVDDIYRNTSWDN----AGFKGYG 0"
## [1] "LKNTCKLLVVADHRFYKYMGRGEESTTTNYLIELIDRVDDIYRNTSWDN----AGFKGYG 0"
## [1] "SKDTCKMLVVADHRFFKYMGRGEESTTINYLIELIDRVDDIYRNTPWDN----EAFKGYG 0"
## [1] "LRNTCKMLVVADHRFFKYMGRGEESTTINYLIELIDRVDDIYRNTSWDN----NQWKGYG 0"
## [1] "TKTRCPLLLVADYRFFQEMGGGNTKTTINYLISLIDRVHKIYNDTVWQDRSDQEGFKGMG 0"
## [1] " "
## [1] "IQIEQIRILKSPQEVKPGEKHYNMAKSYPNEEKDAWDVKMLLEQFSFDIAEEASKVCLAH 0"
## [1] "IQIEQIRILKSPQEVKPGEKHYNMAKSYPNEEKDAWDVKMLLEQFSFDIAEEASKVCLAH 0"
## [1] "IQIEQIRILKSPQEVKPGEKHYNMAKSYPNEEKDAWDVKMLLEQFSFDIAEEASKVCLAH 0"
## [1] "IQIEQIRILKSPQEVKPGERHYNMAKSYPNEEKDAWDVKMLLEQFSFDIAEEASKVCLAH 0"
## [1] "IQIEQIRILKSPQVVKPGERHFNMAKSYPNEEKDAWDVKMLLEQFSFDIAEEASKVCLAH 0"
## [1] "VQIEQIRILKSPQEVKPGERHFNMAKSFPNEEKDAWDVKMLLEQFSFDIAEEASKVCLAH 0"
## [1] "VQIEQIRILKSPQEVKPGERHFNMAKSFPNEEKDAWDVKMLLEQFSLDIAEEASKVCLAH 0"
## [1] "IQIEQIIIHSEPERVAPGGKHYNMAKSYPDDKKDAWDVKMLLEQFSFDIAEKAAHVCLAH 0"
## [1] "VQIEQIIVHKEPENVTQGQKHYNMARAFPNPDKDAWDVKQLLEQFSYDISEKAAQVCLAH 0"
## [1] "FVIKKIVVHSEPTRLRGGEAHYNMIR-------EKWDVRNLLEVFSREYSHKD--FCLAH 0"
## [1] " "
## [1] "LFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYSPVGKKNIYLNSGLTSTKN-YGKTIL 0"
## [1] "LFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYSPVGKKNIYLNSGLTSTKN-YGKTIL 0"
## [1] "LFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYSPVGKKNIYLNSGLTSTKN-YGKTIL 0"
## [1] "LFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYSPIGKKNIYLNSGLTSTKN-YGKTIL 0"
## [1] "LFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYSPIGKKNIYLNSGLTSTKN-YGKTIL 0"
## [1] "LFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYNPTVKKNIYLNSGLTSTKN-YGKTIL 0"
## [1] "LFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYNPGVKKNIYLNSGLTSTKN-YGKTIL 0"
## [1] "LFTYQDFDMGTLGLAYVGSPRPNSHGGICPKAYHSPIVKKDIYLNSGLTSTKN-YGKTIL 0"
## [1] "LFTYQDFDMGTLGLAYVGSHKPNTHGGICPKAYENDLSKKAVYLNTGLTSTKN-YGKTIL 0"
## [1] "LFTDLKFEGGILGLAYVGSPRRNSVGGICTPEYFKNG--YTLYLNSGLSSSRNHYGQRVI 0"
## [1] " "
## [1] "TKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSK 0"
## [1] "TKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSK 0"
## [1] "TKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSK 0"
## [1] "TKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSK 0"
## [1] "TKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSK 0"
## [1] "TKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSK 0"
## [1] "TKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSK 0"
## [1] "TKEADLVTTHELGHNFGAEHDPDSLPECAPTEDQGGKYVMYPIAVSGDHENNKMFSSCSK 0"
## [1] "TKEADLVTTHELGHNFGSEHDPDSMEACAPSEDHGGKFVMYPIAVSGDHENNKMFSSCSR 0"
## [1] "TREADLVTAHEFGHNWGSEHDPD-IPECSPSASQGGSFLMYTYSVSGYDVNNKKFSPCSL 0"
## [1] " "
## [1] "QSIYKTIESKAQECFQERSNKVCGNSRVDEGEECDPGIMYLN-NDTCCNSDCTLK--EGV 0"
## [1] "QSIYKTIESKAQECFQERSNKVCGNSRVDEGEECDPGIMYLN-NDTCCNSDCTLK--EGV 0"
## [1] "QSIYKTIESKAQECFQERSNKVCGNSRVDEGEECDPGIMYLN-NDTCCNSDCTLK--EGV 0"
## [1] "QSIYKTIESKSQECFQERSNKVCGNSRVDEGEECDPGIMYLN-NDTCCSSDCMLR--AGV 0"
## [1] "QSIYKTIESKSQECFQERSNKVCGNSRVDEGEECDPGIMYLN-NDTCCNSDCTLR--PGV 0"
## [1] "QSIYKTIESKAQECFQERSNKVCGNSRVDEGEECDPGIMYLN-NDTCCNSDCTLK--PGV 0"
## [1] "QSIYKTIESKAQECFQERSNKVCGNSRVDEGEECDPGIMYLN-NDTCCNSDCTLK--PGV 0"
## [1] "KSIHRTIEVKAQECFKERNNKVCGNSRVDEGEECDPGLLYQR-VDPCCSADCKLK--DGA 0"
## [1] "ESILRTLMAKSPICFKERNNKVCGNSRVDEGEECDPGLLHQH-NDPCCTSDCKFQ--PGV 0"
## [1] "RSIRKVLQAKSGRCFSEPEESFCGNLRVEGDEQCDAGLLGTEDNDSCCDKNCKLRRNQGA 0"
## [1] " "
## [1] "QCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGNAEDDTVCLDLGK 0"
## [1] "QCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGNAEDDTVCLDLGK 0"
## [1] "QCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGNAEDDTVCLDLGK 0"
## [1] "QCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGNAADDTVCLDLGK 0"
## [1] "QCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGNAADDTVCLDLGK 0"
## [1] "QCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGDAEDDTVCLDLGK 0"
## [1] "QCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGDAEDDTVCLDLGK 0"
## [1] "KCSDRNSPCCKGCQFESAQKKCQEAINATCKGESFCTGNSSECPPPGNAPDDTICVDMGK 0"
## [1] "HCSDRNSPCCRGCQFESAQKKCQEAINATCKGESYCTGSSSECPTPGNAENDTVCVDLGK 0"
## [1] "MCSDKNSPCCQNCQFMASGMKCREAQYATCEQEARCTGAHAECPKSPAMADGTTCQERGQ 0"
## [1] " "
## [1] "CKDGKCIPFCEREQQLESCACNETDNSCKVCCRD-LSGRCVPYVDAEQKNLFLRKGKPCT 0"
## [1] "CKDGKCIPFCEREQQLESCACNETDNSCKVCCRD-LSGRCVPYVDAEQKNLFLRKGKPCT 0"
## [1] "CKDGKCIPFCEREQQLESCACNETDNSCKVCCRD-LSGRCVPYVDAEQKNLFLRKGKPCT 0"
## [1] "CKDGKCVPFCEREQHLESCACNETDNSCKVCCRD-PSGRCVPYVDAEQKNLFLRKGKPCT 0"
## [1] "CKDGKCVPFCEREQRLESCACNETDNSCKVCCRD-PSGRCVPYVNAEQKNLFLRKGKPCT 0"
## [1] "CKAGKCIPFCKREQELESCACVDTDNSCKVCCRN-LSGPCVPYVDAEQKNLFLRKGKPCT 0"
## [1] "CKAGKCIPFCKREQELESCACADTDNSCKVCCRN-LSGPCVPYVDAEQKNLFLRKGKPCT 0"
## [1] "CKDGECIPFCEREKNLRSCACNETDNSCKVCCRD-EQDRCTPYVDANNQFLFLRKGKPCT 0"
## [1] "CVNGECRPFCEIERNLKSCACNDTENSCKVCCRD-ENGVCSPQKDQHDHFLYLRKGKPCT 0"
## [1] "CRNGKCVPYCETQG-LQSCMCDIIADACKRCCRMSINETCFPVEPPD----VLPDGTPCI 0"
## [1] " "
## [1] "VGFCDMNGKCEKRVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLIFWIPFSILV 0"
## [1] "VGFCDMNGKCEKRVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLIFWIPFSILV 0"
## [1] "VGFCDMNGKCEKRVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLIFWIPFSILV 0"
## [1] "VGFCDMNGKCEKRVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLIFWIPFSILV 0"
## [1] "VGFCDTNGKCEKRVQDVIERFWEFIDKLSINTFGKFLADNIVGSVLVFSLIFWIPFSILV 0"
## [1] "VGFCDMNGKCEKRVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLIFWIPFSILV 0"
## [1] "VGFCDMNGKCEKRVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLIFWIPFSILV 0"
## [1] "VGFCDSNGKCEKQVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLLFWIPLSILV 0"
## [1] "VGFCDANGKCEKQVQDVIERIWDFIDKLSINMFGKFLADNIVGSVVVFSLLFWIPLSILV 0"
## [1] "TGFCN-KGVCEKTIQDVVERFWDIIEEINVAKTLRFLKDNIVMAVVLVTAVFWIPISCVI 0"
## [1] " "
## [1] "HCVDKKLDKQYES--LSLFHPSNVEMLSSMDSASVRIIKPFPAPQTPGRLQPAPVI---P 0"
## [1] "HCVDKKLDKQYES--LSLFHPSNVEMLSSMDSASVRIIKPFPAPQTPGRLQPAPVI---P 0"
## [1] "HCVDKKLDKQYES--LSLFHP--------------------------------------- 0"
## [1] "HCVDKKLDKQYES--LSLFHPSNVEMLSSMDSASVRIIKPFPAPQTPGRPQPLQAAPVPP 0"
## [1] "HCVDKKLDKQFES--LSLFHPSNVEMLSSMDSASVRIIKPFPAPQTPGRLQTTPVMPPAP 0"
## [1] "HCVDKKLDKQYES--LSLFHHSNIEMLSSMDSASVRIIKPFPAPQTPGRLQALQPAAMMP 0"
## [1] "HCVDKKLDKQYES--LSLFHHSNIEMLSSMDSASVRIIKPFPAPQTPGRLQALQPAAMMP 0"
## [1] "HCVDKKLDKQYEENNKSLFCASNAEVPSSLDSASVRIIKPFPAAQPTSRHQPLQPIVAAP 0"
## [1] "HCVDKKLDKRYEETTKSLFTPSNVEMLSSMDSAPVRIVKPSPSSQATNRFQALQTVPIVP 0"
## [1] "SYFDRKKLRHEMK----------------------------------------------- 0"
## [1] " "
## [1] "SAP-AAPKLDHQRMDTIQEDPSTDSHMDEDGFEKDPFPNSSTAAKSFEDLTDHPVTRSEK 0"
## [1] "SAP-AAPKLDHQRMDTIQEDPSTDSHMDEDGFEKDPFPNSSTAAKSFEDLTDHPVTRSEK 0"
## [1] "-------------------------------------QCNSTAAKSFEDLTDHPVTRSEK 0"
## [1] "PMP-AAPKLEHQRMDTIQEDPSTDSHVDEDGFEKDPFPNSSTAAKSFEDLTGRPVTRSEK 0"
## [1] "SAP-LAPKLDHQRMDTIQEDPSTDSHADEDGFEKDPFPNSSAAAKSFEDLTDHPVTRSEK 0"
## [1] "PVP-AAPKLDHQRMDTIQEDPSTDSHADDDGFEKDPFPNSSTAAKSFEDLTDHPVTRSEK 0"
## [1] "PVS-AAPKLDHQRMDTIQEDPSTDSHVDDDGFEKDPFPNSSAAAKSFEDLTDHPVTRSEK 0"
## [1] "AAA-TAPKQDHQRMDTIQEDPSTDSHIDEDGFEKDPFPNSSSAAKSFEDLTEHPVTRSEK 0"
## [1] "TSAGAATKADHQRMDTIQEDPSVDSHLD-DQFEEDHFPNSGSPAKSFEDLTDHPVVRSDK 0"
## [1] "-----------------------------------------LIEWSQKLDLIHPSDERRR 0"
## [1] " "
## [1] "AASFKLQRQNRVDSKETEC 41"
## [1] "AASFKLQRQNRVDSKETEC 41"
## [1] "AASFKLQRQNRVDSKETEC 41"
## [1] "AASFKLQRQNRVGSKETEC 41"
## [1] "ASSFKLQRQNRVDSKETEC 41"
## [1] "AASFKLQRQSRVDSKETEC 41"
## [1] "AASFKLQRQSRVDSKETEC 41"
## [1] "ASSFKLQRQNRVDSKETEC 41"
## [1] "ASSFRLQRQGRVDSKETEC 41"
## [1] "VIHIRVPRQKISVARACN- 41"
## [1] " "
Distance Matrix
Adam17_subset_dist <- seqinr::dist.alignment(Adam17_align_seqinr,
matrix = "identity")
is(Adam17_subset_dist)
## [1] "dist" "oldClass"
class(Adam17_subset_dist)
## [1] "dist"
Adam17_align_seqinr_rnd <- round(Adam17_subset_dist, 3)
Adam17_align_seqinr_rnd
## 1 2 3 4 5 6 7 8 9
## 2 0.000
## 3 0.106 0.106
## 4 0.239 0.239 0.244
## 5 0.266 0.266 0.284 0.249
## 6 0.285 0.285 0.290 0.304 0.317
## 7 0.279 0.279 0.283 0.302 0.315 0.125
## 8 0.484 0.484 0.485 0.492 0.493 0.504 0.502
## 9 0.559 0.559 0.560 0.559 0.556 0.559 0.556 0.528
## 10 0.775 0.775 0.767 0.769 0.770 0.777 0.775 0.771 0.774
Phylogenetic Tree
tree_subset <- nj(Adam17_subset_dist)
plot.phylo(tree_subset, main="Phylogenetic Tree",
use.edge.length = F)
mtext(text = "Adam17 family gene tree - rooted, no branch lenths")
