Introduction

A disintegrin and metalloproteinase domain-containing protein is a protein involved in numerous biological processes such as cell to cell and cell matrix interaction, including fertilization, muscle development, and neurogenesis. The purpose of the project is to give an in depth analysis of ADAM 17 in Homo-sapiens and other organsims.

Resources / References

https://www.uniprot.org/uniprot/P78536 https://www.genecards.org/cgi-bin/carddisp.pl?gene=ADAM17(info from intro)

Preparation

Load following packages:

  # github packages
  
  library(compbio4all)
  library(ggmsa)
  
  # CRAN packages
  
  library(rentrez)
  library(seqinr)
  library(ape)
  library(pander)
  library(ggplot2)
  
  # Bioconductor packages
  library(BiocManager)
  library(drawProteins)
  library(msa)
  
  # Biostrings
  library(Biostrings)
  library(HGNChelper)

Accession Numbers

Refseq <- c("NP_003174", "XP_515293","XP_002799185.1","XP_005630166.1","XP_002691532.1", "NP_033745.4", "NP_064702.1",  "NP_001008682.1", "NP_001182159.1",  "NP_733334.1")

Uniprot <- c( "P78536", "H2R900", "I0FUK6", "A0A5F4CDW3", "E1B867", "Q9Z0F8", "Q9Z1K9", "Q5QHR9", "E2IFV1" ,"Q9VAC5")

PDB <- c("2M2F", "N/A",  "N/A",  "N/A",  "N/A",  "N/A",  "N/A",  "N/A",  "N/A",  "N/A")

Scien_Name <- c("Homo Sapiens", "Pan Troglodytes", "Macaca Mulatta", "Canis Lupus", "Bos Taurus", "Mus Musculus", "Rattus Norvegicus", "Gallus Gallus", "Xenopus Tropicalis", "Drosophilia Melanogaster")

Common_Name <- c( "Human", "Chimpanzee", "Rhesus Monkey", "Dog", "Cattle", "House Mouse", "Norway Rat","Chicken", "Tropical Frog", "Fruit Fly" )

Gene <- c("ADAM17", "ADAM17", "ADAM17", "ADAM17", "ADAM17", "ADAM17", "ADAM17", "ADAM17", "ADAM17", "TACE" )

Adam17.df <- data.frame( NCBI.Protein.Accession = Refseq,
                         Uniprot.ID = Uniprot,
                         PDB = PDB,
                         Species = Scien_Name,
                         Common.Name = Common_Name,
                         Gene = Gene)
pander::pander(Adam17.df)
Table continues below
NCBI.Protein.Accession Uniprot.ID PDB Species
NP_003174 P78536 2M2F Homo Sapiens
XP_515293 H2R900 N/A Pan Troglodytes
XP_002799185.1 I0FUK6 N/A Macaca Mulatta
XP_005630166.1 A0A5F4CDW3 N/A Canis Lupus
XP_002691532.1 E1B867 N/A Bos Taurus
NP_033745.4 Q9Z0F8 N/A Mus Musculus
NP_064702.1 Q9Z1K9 N/A Rattus Norvegicus
NP_001008682.1 Q5QHR9 N/A Gallus Gallus
NP_001182159.1 E2IFV1 N/A Xenopus Tropicalis
NP_733334.1 Q9VAC5 N/A Drosophilia Melanogaster
Common.Name Gene
Human ADAM17
Chimpanzee ADAM17
Rhesus Monkey ADAM17
Dog ADAM17
Cattle ADAM17
House Mouse ADAM17
Norway Rat ADAM17
Chicken ADAM17
Tropical Frog ADAM17
Fruit Fly TACE

Data Preparation

HomoSapien_fasta <- rentrez::entrez_fetch(db = "protein", 
                          id = "NP_003174", 
                          rettype = "fasta")
Chimp_fasta <- rentrez::entrez_fetch(db = "protein", 
                          id = "XP_515293  ", 
                          rettype = "fasta")
Rmonkey_fasta <- rentrez::entrez_fetch(db = "protein", 
                          id = "  XP_002799185.1", 
                          rettype = "fasta")
Dog_fasta <- rentrez::entrez_fetch(db = "protein", 
                          id = " XP_005630166.1", 
                          rettype = "fasta")
Cattle_fasta <- rentrez::entrez_fetch(db = "protein", 
                          id = "XP_002691532.1", 
                          rettype = "fasta")
Mouse_fasta <- rentrez::entrez_fetch(db = "protein", 
                          id = "NP_033745.4", 
                          rettype = "fasta")
Rat_fasta <- rentrez::entrez_fetch(db = "protein",
                          id = " NP_064702.1", 
                          rettype = "fasta")
Chicken_fasta <- rentrez::entrez_fetch(db = "protein", 
                          id = "NP_001008682.1", 
                          rettype = "fasta")
Tropfrog_fasta <- rentrez::entrez_fetch(db = "protein", 
                          id = " NP_001182159.1", 
                          rettype = "fasta")
Drosophilia_fasta <- rentrez::entrez_fetch(db = "protein", 
                          id = " NP_733334.1", 
                          rettype = "fasta")





Adam17_list <- list(HomoSapien_fasta,Chimp_fasta, Rmonkey_fasta,Dog_fasta, Cattle_fasta, Mouse_fasta, Rat_fasta, Chicken_fasta, Tropfrog_fasta, Drosophilia_fasta)
  
for(i in 1:length(Adam17_list)){
  Adam17_list[[i]] <- fasta_cleaner(Adam17_list[[i]], parse = F)
}

Adam17_vector <- rep(NA, length(Adam17_list))

names(Adam17_vector) <- names(Adam17_list)

for(i in 1:length(Adam17_vector)){
  Adam17_vector[i] <- Adam17_list[[i]]
}

Protein Diagram

Homosap_json  <- drawProteins::get_features("P78536")
## [1] "Download has worked"
is(Homosap_json)
## [1] "list"             "vector"           "list_OR_List"     "vector_OR_Vector"
## [5] "vector_OR_factor"
my_prot_df <- drawProteins::feature_to_dataframe(Homosap_json)
is(my_prot_df)
## [1] "data.frame"       "list"             "oldClass"         "vector"          
## [5] "list_OR_List"     "vector_OR_Vector" "vector_OR_factor"
my_canvas <- draw_canvas(my_prot_df)  
my_canvas <- draw_chains(my_canvas, my_prot_df, 
                         label_size = 2.5)
my_canvas <- draw_domains(my_canvas, my_prot_df)
my_canvas

## Dotplot

par(mfrow = c(2,2), 
    mar = c(0,0,2,1))

HomoSapien_vector <- fasta_cleaner(HomoSapien_fasta)

dotPlot(HomoSapien_vector, HomoSapien_vector, 
        wsize = 13.5, 
        nmatch = 3, 
        main = "Human")

par(mfrow = c(1,1), 
    mar = c(4,4,4,4))

Protein Properties Compiled From Databases

Descript <- c("ADAM17 is a single pass type I membrane protein","AlphaFold predicts with high confidence that Adam17 contains a mix of alpha helices and beta sheets" )

Source <- c("Uniprot", "AlphaFold")

Links <- c("https://www.uniprot.org/uniprot/P78536","https://alphafold.ebi.ac.uk/entry/O77636")

Property.df <- data.frame(Source = Source,
                ProteinProperty = Descript,
                Links = Links)
                

pander::pander(Property.df)
Table continues below
Source ProteinProperty
Uniprot ADAM17 is a single pass type I membrane protein
AlphaFold AlphaFold predicts with high confidence that Adam17 contains a mix of alpha helices and beta sheets
Links
https://www.uniprot.org/uniprot/P78536
https://alphafold.ebi.ac.uk/entry/O77636

Prediction of Protein Fold

aa.1 <- c("A","R","N","D","C","Q","E","G","H","I",
            "L","K","M","F","P","S","T","W","Y","V")
#From Chou Data
alpha <- c(285, 53, 97, 163, 22, 67, 134, 197, 111, 91, 
           221, 249, 48, 123, 82, 122, 119, 33, 63, 167)

beta <- c(203, 67, 139, 121, 75, 122, 86, 297, 49, 120, 
          177, 115, 16, 85, 127, 341, 253, 44, 110, 229)

a.plus.b <- c(175, 78, 120, 111, 74, 74, 86, 171, 33, 93,
              110, 112, 25, 52, 71, 126, 117, 30, 108, 123)

a.div.b <- c(361, 146, 183, 244, 63, 114, 257, 377, 107, 239, 
             339, 321, 91, 158, 188, 327, 238, 72, 130, 378)

alpha.prop <- alpha/sum(alpha)
beta.prop <- beta/sum(beta)
a.plus.b.prop <- a.plus.b/sum(a.plus.b)
a.div.b <- a.div.b/sum(a.div.b)

aa.prop <- data.frame(
                      alpha.prop,
                      beta.prop,
                      a.plus.b.prop,
                      a.div.b)
row.names(aa.prop) <- aa.1

pander::pander(aa.prop)
  alpha.prop beta.prop a.plus.b.prop a.div.b
A 0.1165 0.07313 0.09264 0.08331
R 0.02166 0.02414 0.04129 0.03369
N 0.03964 0.05007 0.06353 0.04223
D 0.06661 0.04359 0.05876 0.05631
C 0.008991 0.02702 0.03917 0.01454
Q 0.02738 0.04395 0.03917 0.02631
E 0.05476 0.03098 0.04553 0.05931
G 0.08051 0.107 0.09052 0.08701
H 0.04536 0.01765 0.01747 0.02469
I 0.03719 0.04323 0.04923 0.05516
L 0.09031 0.06376 0.05823 0.07824
K 0.1018 0.04143 0.05929 0.07408
M 0.01962 0.005764 0.01323 0.021
F 0.05027 0.03062 0.02753 0.03646
P 0.03351 0.04575 0.03759 0.04339
S 0.04986 0.1228 0.0667 0.07547
T 0.04863 0.09114 0.06194 0.05493
W 0.01349 0.01585 0.01588 0.01662
Y 0.02575 0.03963 0.05717 0.03
V 0.06825 0.08249 0.06511 0.08724
HomoSapien_fasta <- rentrez::entrez_fetch(db = "protein", 
                          id = "NP_003174", 
                          rettype = "fasta")

HomoSapien_vector <- compbio4all:: fasta_cleaner(HomoSapien_fasta, parse = TRUE) 

homosapien.freq.table <- table(HomoSapien_vector)/length(HomoSapien_vector)
                                                
table_to_vector <- function(table_x){
  table_names <- attr(table_x, "dimnames")[[1]]
  table_vect <- as.vector(table_x)
  names(table_vect) <- table_names
  return(table_vect)
}


Adam17.human.aa.freq <- table_to_vector(homosapien.freq.table)

aa.prop$Adam17.human.aa.freq <- Adam17.human.aa.freq

Adam17.human.aa.freq
##           A           C           D           E           F           G 
## 0.046116505 0.042475728 0.078883495 0.070388350 0.043689320 0.054611650 
##           H           I           K           L           M           N 
## 0.024271845 0.042475728 0.075242718 0.076456311 0.018203883 0.050970874 
##           P           Q           R           S           T           V 
## 0.055825243 0.040048544 0.047330097 0.077669903 0.049757282 0.066747573 
##           W           Y 
## 0.007281553 0.031553398
aa.names <- names(Adam17.human.aa.freq)
i.U <- which(aa.names == "U")
aa.names[i.U]
## character(0)
Adam17.human.aa.freq[i.U]
## named numeric(0)
aa.prop$Adam.human.aa.freq <- Adam17.human.aa.freq

pander::pander(aa.prop)
Table continues below
  alpha.prop beta.prop a.plus.b.prop a.div.b
A 0.1165 0.07313 0.09264 0.08331
R 0.02166 0.02414 0.04129 0.03369
N 0.03964 0.05007 0.06353 0.04223
D 0.06661 0.04359 0.05876 0.05631
C 0.008991 0.02702 0.03917 0.01454
Q 0.02738 0.04395 0.03917 0.02631
E 0.05476 0.03098 0.04553 0.05931
G 0.08051 0.107 0.09052 0.08701
H 0.04536 0.01765 0.01747 0.02469
I 0.03719 0.04323 0.04923 0.05516
L 0.09031 0.06376 0.05823 0.07824
K 0.1018 0.04143 0.05929 0.07408
M 0.01962 0.005764 0.01323 0.021
F 0.05027 0.03062 0.02753 0.03646
P 0.03351 0.04575 0.03759 0.04339
S 0.04986 0.1228 0.0667 0.07547
T 0.04863 0.09114 0.06194 0.05493
W 0.01349 0.01585 0.01588 0.01662
Y 0.02575 0.03963 0.05717 0.03
V 0.06825 0.08249 0.06511 0.08724
  Adam17.human.aa.freq Adam.human.aa.freq
A 0.04612 0.04612
R 0.04248 0.04248
N 0.07888 0.07888
D 0.07039 0.07039
C 0.04369 0.04369
Q 0.05461 0.05461
E 0.02427 0.02427
G 0.04248 0.04248
H 0.07524 0.07524
I 0.07646 0.07646
L 0.0182 0.0182
K 0.05097 0.05097
M 0.05583 0.05583
F 0.04005 0.04005
P 0.04733 0.04733
S 0.07767 0.07767
T 0.04976 0.04976
W 0.06675 0.06675
Y 0.007282 0.007282
V 0.03155 0.03155

Calculating Similarities

chou_cor <- function(x,y){
  numerator <- sum(x*y)
denominator <- sqrt((sum(x^2))*(sum(y^2)))
result <- numerator/denominator
return(result)
}

chou_cosine <- function(z.1, z.2){
  z.1.abs <- sqrt(sum(z.1^2))
  z.2.abs <- sqrt(sum(z.2^2))
  my.cosine <- sum(z.1*z.2)/(z.1.abs*z.2.abs)
  return(my.cosine)
}

par(mfrow = c(2,2), mar = c(1,4,1,1))
plot(alpha.prop ~ Adam17.human.aa.freq, data = aa.prop)
plot(beta.prop ~ Adam17.human.aa.freq, data = aa.prop)
plot(a.plus.b.prop  ~ Adam17.human.aa.freq, data = aa.prop)
plot(a.div.b ~ Adam17.human.aa.freq, data = aa.prop)

corr.alpha <- chou_cor(aa.prop[,5], aa.prop[,1])
corr.beta  <- chou_cor(aa.prop[,5], aa.prop[,2])
corr.apb   <- chou_cor(aa.prop[,5], aa.prop[,3])
corr.adb   <- chou_cor(aa.prop[,5], aa.prop[,4])

cos.alpha <- chou_cosine(aa.prop[,5], aa.prop[,1])
cos.beta  <- chou_cosine(aa.prop[,5], aa.prop[,2])
cos.apb   <- chou_cosine(aa.prop[,5], aa.prop[,3])
cos.adb   <- chou_cosine(aa.prop[,5], aa.prop[,4])

aa.prop.flipped <- t(aa.prop)
round(aa.prop.flipped,2)
##                         A    R    N    D    C    Q    E    G    H    I    L
## alpha.prop           0.12 0.02 0.04 0.07 0.01 0.03 0.05 0.08 0.05 0.04 0.09
## beta.prop            0.07 0.02 0.05 0.04 0.03 0.04 0.03 0.11 0.02 0.04 0.06
## a.plus.b.prop        0.09 0.04 0.06 0.06 0.04 0.04 0.05 0.09 0.02 0.05 0.06
## a.div.b              0.08 0.03 0.04 0.06 0.01 0.03 0.06 0.09 0.02 0.06 0.08
## Adam17.human.aa.freq 0.05 0.04 0.08 0.07 0.04 0.05 0.02 0.04 0.08 0.08 0.02
## Adam.human.aa.freq   0.05 0.04 0.08 0.07 0.04 0.05 0.02 0.04 0.08 0.08 0.02
##                         K    M    F    P    S    T    W    Y    V
## alpha.prop           0.10 0.02 0.05 0.03 0.05 0.05 0.01 0.03 0.07
## beta.prop            0.04 0.01 0.03 0.05 0.12 0.09 0.02 0.04 0.08
## a.plus.b.prop        0.06 0.01 0.03 0.04 0.07 0.06 0.02 0.06 0.07
## a.div.b              0.07 0.02 0.04 0.04 0.08 0.05 0.02 0.03 0.09
## Adam17.human.aa.freq 0.05 0.06 0.04 0.05 0.08 0.05 0.07 0.01 0.03
## Adam.human.aa.freq   0.05 0.06 0.04 0.05 0.08 0.05 0.07 0.01 0.03
dist(aa.prop.flipped, method = "euclidean")
##                      alpha.prop  beta.prop a.plus.b.prop    a.div.b
## beta.prop            0.13342098                                    
## a.plus.b.prop        0.09281824 0.08289406                         
## a.div.b              0.06699039 0.08659174    0.06175113           
## Adam17.human.aa.freq 0.16845806 0.16242520    0.14086756 0.14971043
## Adam.human.aa.freq   0.16845806 0.16242520    0.14086756 0.14971043
##                      Adam17.human.aa.freq
## beta.prop                                
## a.plus.b.prop                            
## a.div.b                                  
## Adam17.human.aa.freq                     
## Adam.human.aa.freq             0.00000000
dist.alpha <- dist((aa.prop.flipped[c(1,5),]),  method = "euclidean")
dist.beta  <- dist((aa.prop.flipped[c(2,5),]),  method = "euclidean")
dist.apb   <- dist((aa.prop.flipped[c(3,5),]),  method = "euclidean")
dist.adb  <- dist((aa.prop.flipped[c(4,5),]), method = "euclidean")


fold.type <- c("alpha","beta","alpha plus beta", "alpha/beta")

# data
corr.sim <- round(c(corr.alpha,corr.beta,corr.apb,corr.adb),5)
cosine.sim <- round(c(cos.alpha,cos.beta,cos.apb,cos.adb),5)
Euclidean.dist <- round(c(dist.alpha,dist.beta,dist.apb,dist.adb),5)

# summary
sim.sum <- c("","","most.sim","")
dist.sum <- c("","","min.dist","")

df <- data.frame(fold.type,
           corr.sim ,
           cosine.sim ,
           Euclidean.dist ,
           sim.sum ,
           dist.sum )

pander::pander(df)
fold.type corr.sim cosine.sim Euclidean.dist sim.sum dist.sum
alpha 0.774 0.774 0.1685
beta 0.7939 0.7939 0.1624
alpha plus beta 0.8304 0.8304 0.1409 most.sim min.dist
alpha/beta 0.8124 0.8124 0.1497

Percent Identity Comparisons (PID)

HomoSapien_vector <- fasta_cleaner(HomoSapien_fasta, parse =F)

Chimp_vector <- fasta_cleaner(Chimp_fasta, parse = F)

Rmonkey_vector <- fasta_cleaner(Rmonkey_fasta, parse = F)

Dog_vector <- fasta_cleaner(Dog_fasta, parse = F)


align1.2 <- Biostrings::pairwiseAlignment(HomoSapien_vector, Chimp_vector)

align1.3 <- Biostrings::pairwiseAlignment(HomoSapien_vector, Rmonkey_vector)

align1.4 <- Biostrings::pairwiseAlignment(HomoSapien_vector, Dog_vector)

align2.3 <- Biostrings::pairwiseAlignment(Chimp_vector, Rmonkey_vector)

align2.4 <- Biostrings::pairwiseAlignment(Chimp_vector, Dog_vector)

align3.4 <- Biostrings::pairwiseAlignment(Rmonkey_vector, Dog_vector)

Biostrings::pid(align1.2)
## [1] 100
Biostrings::pid(align1.3)
## [1] 85.80097
Biostrings::pid(align1.4)
## [1] 93.95405
pids <- c(1,                  NA,     NA,     NA,
          pid(align1.2),          1,     NA,     NA,
          pid(align1.3), pid(align2.3),      1,     NA,
          pid(align1.4), pid(align2.4), pid(align3.4), 1)

mat <- matrix(pids, nrow = 4, byrow = T)
row.names(mat) <- c("Homo","Chimpanzee","Monkey","Dog")   
colnames(mat) <- c("Homo","Chimpanzee","Monkey","Dog")   
pander::pander(mat)  
  Homo Chimpanzee Monkey Dog
Homo 1 NA NA NA
Chimpanzee 100 1 NA NA
Monkey 85.8 85.8 1 NA
Dog 93.95 93.95 81.02 1

PID Methods Comparison

Biostrings::pid(align1.2, type = "PID1")
## [1] 100
Biostrings::pid(align1.2, type = "PID2")
## [1] 100
Biostrings::pid(align1.2, type = "PID3")
## [1] 100
Biostrings::pid(align1.2, type = "PID4")
## [1] 100
Method <- c("PID1", "PID2", "PID3", "PID4")

PID <- c("100", "100", "100", "100")

Denominator <- c( "alligned position + internal gap positions", "aligned postions", "length shorter sequence", "average length of the two sequences")


diff_pid.df <- data.frame(Method = Method,
                PID = PID,
                Denominator = Denominator)

pander::pander(diff_pid.df)
Method PID Denominator
PID1 100 alligned position + internal gap positions
PID2 100 aligned postions
PID3 100 length shorter sequence
PID4 100 average length of the two sequences

MSA Data Preperation

for(i in 1:length(Adam17_list)){
  Adam17_list[[i]] <- fasta_cleaner(Adam17_list[[i]], parse = F)
}

# make a vector to hold each sequence
Adam_vector <- rep(NA, length(Adam17_list))

# name the vector (this makes ggmsa happy)
names(Adam17_vector) <- names(Adam17_list)

# extract the sequences from list and put into vector
for(i in 1:length(Adam17_vector)){
  Adam17_vector[i] <- Adam17_list[[i]]
}

Adam17_vector_ss <- Biostrings::AAStringSet(Adam17_vector)

Adam17_align <- msa(Adam17_vector_ss,
                     method = "ClustalW")
## use default substitution matrix
class(Adam17_align)
## [1] "MsaAAMultipleAlignment"
## attr(,"package")
## [1] "msa"
is(Adam17_align)
## [1] "MsaAAMultipleAlignment" "AAMultipleAlignment"    "MsaMetaData"           
## [4] "MultipleAlignment"
class(Adam17_align) <- "AAMultipleAlignment"

Adam17_align_seqinr <- msaConvert(Adam17_align, 
                                   type = "seqinr::alignment")

compbio4all::print_msa(Adam17_align_seqinr)
## [1] "-----------MRQSLLFLTSVVPFVLAPRPPDDPGFGPHQRLEKLDSLLSDYDILSLSN 0"
## [1] "-----------MRQSLLFLTSVVPFVLAPRPPDDPGFGPHQRLEKLDSLLSDYDILSLSN 0"
## [1] "-----------MRQSLLFLTSVVPFVLAPRPPDDPSFGPHQRLVP--------------- 0"
## [1] "-----------MRRCVLFWTSVVPLVLAPRPPEQ----PPQRLENLDSLLSDYDILSLSN 0"
## [1] "-----------MRHRVLLLTSLVHLVLAPRPPDER--ESPRRFEKLDSLLSDYDILSLSN 0"
## [1] "-----------MRRRLLILTTLVPFVLAPRPPEEAGSGSHPRLEKLDSLLSDYDILSLAN 0"
## [1] "-----------MRQRLLFLTTLVPFVLAPRPPEEPGSGSHLRLEKLDSLLSDYDILSLSN 0"
## [1] "-----------MRLRLWLVSALALLWAPGGLQALGRLPREQRYDAVESMLSNYDILSQSS 0"
## [1] "MKVLRLMGQLSLVEVIFALLLCLVVAEADHIPFGSQKEMPAKHQGLDSLLSEYDVLSLSS 0"
## [1] "-----------------MFTKCISCCGLAIISVFFACLFVENCAALQKTLRHYEIFHKDD 0"
## [1] " "
## [1] "IQQHSVRKRDLQTSTHVETL--LTFSALKRHFKLYLTSSTERFSQNFKVVVVDGK-NESE 0"
## [1] "IQQHSVRKRDLQTSTHVETL--LTFSALKRHFKLYLTSSTERFSQNFKVVVVDGK-NESE 0"
## [1] "----------------------------ERHFKLYLTSSTERFSQNFKVVVVDGK-NESE 0"
## [1] "IQQHSVRKRDLQASTHIETL--LTFSALKRHFKLYLTSSTERFSQNFKVVVVDGK-DESE 0"
## [1] "IQQHSVRKRELQASTHLETL--LTFSALKRHFKLYLTSSTERFSKNFKVVVVDGK-DERE 0"
## [1] "IQQHSIRKRDLQSATHLETL--LTFSALKRHFKLYLTSSTERFSQNLRVVVVDGK-EESE 0"
## [1] "IQQHSIRKRDLQSATHLETL--LTFSALKRHFKLYLTSSTERFSQNLRVVVVDGK-EESE 0"
## [1] "IQQHSLKKRDLQPETHVERL--LSFSALQRHFKLYLTATAEHFSERFQALIVDGEGKEKE 0"
## [1] "IQQHSLRKRDLQSQSQLERM--LSFTALQRHFKLYLTSSTELVSENLEALVLDGNGKEKK 0"
## [1] "VVHRVVKRGAKHSTNPFNTIKEVEFTTLGKNFRLILHPHRDVLHSKFRAYAVDADGNETV 0"
## [1] " "
## [1] "YTVKWQDFFTGHVVGEPDSRVLAHIRDDDVIIRINTDGAEYNIEPLWRFVNDTKDKRMLV 0"
## [1] "YTVKWQDFFTGHVVGEPDSRVLAHIRDDDVIIRINTDGAEYNIEPLWRFVNDTKDKRMLV 0"
## [1] "YTVKWQDFFTGHVVGEPDSRVLAHIRDDDVIIRINTDGAEYNIEPLWRFVNDTKDKRMLV 0"
## [1] "YTVKWQDFFSGHVVGEPDSRVLAHIGDDDMTIRINTDGAEYNIEPLWRLINDTKDKRMLV 0"
## [1] "YPVKWQDFFSGHVVGEPDSRVLAHIGDDDITIRINTDGAEYNIEPLWRLINDTKDKRMLV 0"
## [1] "YSVKWQNFFSGHVVGEPDSRVLAHIGDDDVTVRINTDGAEYNVEPLWRFVNDTKDKRMLV 0"
## [1] "YSVKWQDFFSGHVVGEPDSRVLAHIGDDDVTVRINTDGAEYNIEPLWRFVNDTKDKRMLV 0"
## [1] "YRVQWQDFFTGHVVGEHNSKVVAHIGDEDFTVRINTDGEEYNIEPLWRFIDNVQDERLLV 0"
## [1] "YQVKWSDFFTGHVVGEHNSKVLAHIGDGDFTARITTDGEEYNIEPLWRFVENTSGDNMLV 0"
## [1] "VHMDHDSFYSGRVFGELESSVRAHIEDGTMTMSIHLPEETYHIEPSWRHLPEAKKDTMVA 0"
## [1] " "
## [1] "YKSEDIKNVSRLQ--SPKVCGYLKVDNEELLP-KGLVDREPPEELVHRVKRR---ADPDP 0"
## [1] "YKSEDIKNVSRLQ--SPKVCGYLKVDNEELLP-KGLVDREPPEELVHRVKRR---ADPDP 0"
## [1] "YKSEDIKNVSRLQ--SPKVCGYLKVDNEELLP-KGLVDRGPPEELVHRVKRR---ADPDP 0"
## [1] "YKSEDIKNVSRLQ--SPKVCGYIKADNEELLP-KGLVDREPPDEFVHRVKRR---ADPNP 0"
## [1] "YKSEDIKNVSRLQ--SPKVCGYIKANNEELLP-KGLVDSEPPDELVHRVKRR---ADPNP 0"
## [1] "YKSEDIKDFSRLQ--SPKVCGYLNADSEELLP-KGLIDREPSEEFVRRVKRR---AEPNP 0"
## [1] "YKSEDIKDFSRLQ--SPKVCGYLNADSEELLP-KGLIDREPSEEFVRRVKRR---AEPNP 0"
## [1] "YRSEDIKDFSRLQ--SPKVCGYLKLNEDELLP-KGLEESKQNEASIHREKR----AIPES 0"
## [1] "YRSGDIKDFSRLK--SSKVCGYIKPKEDLFKNEQDTAESAEGDDYLHREKRQR--AGPDS 0"
## [1] "YKASDVKVHKNEAGATPKTCGYIKEG-LELED-KEHGDTLDN-ELHTREKRQSDQYEYTP 0"
## [1] " "
## [1] "MKNTCKLLVVADHRFYRYMGRGEESTTTNYLIELIDRVDDIYRNTSWDN----AGFKGYG 0"
## [1] "MKNTCKLLVVADHRFYRYMGRGEESTTTNYLIELIDRVDDIYRNTSWDN----AGFKGYG 0"
## [1] "MKNTCKLLVVADHRFYRYMGRGEESTTTNYLIELIDRVDDIYRNTSWDN----AGFKGYG 0"
## [1] "LKNTCKLLVVADHRFYRYMGRGEESTTTNYLIELIDRVDDIYRNTSWDN----AGFKGYG 0"
## [1] "LKNTCKLLVVADHRFYKYMGRGEESTTTNYLIELIDRVDDIYRNTSWDN----AGFKGYG 0"
## [1] "LKNTCKLLVVADHRFYKYMGRGEESTTTNYLIELIDRVDDIYRNTSWDN----AGFKGYG 0"
## [1] "LKNTCKLLVVADHRFYKYMGRGEESTTTNYLIELIDRVDDIYRNTSWDN----AGFKGYG 0"
## [1] "SKDTCKMLVVADHRFFKYMGRGEESTTINYLIELIDRVDDIYRNTPWDN----EAFKGYG 0"
## [1] "LRNTCKMLVVADHRFFKYMGRGEESTTINYLIELIDRVDDIYRNTSWDN----NQWKGYG 0"
## [1] "TKTRCPLLLVADYRFFQEMGGGNTKTTINYLISLIDRVHKIYNDTVWQDRSDQEGFKGMG 0"
## [1] " "
## [1] "IQIEQIRILKSPQEVKPGEKHYNMAKSYPNEEKDAWDVKMLLEQFSFDIAEEASKVCLAH 0"
## [1] "IQIEQIRILKSPQEVKPGEKHYNMAKSYPNEEKDAWDVKMLLEQFSFDIAEEASKVCLAH 0"
## [1] "IQIEQIRILKSPQEVKPGEKHYNMAKSYPNEEKDAWDVKMLLEQFSFDIAEEASKVCLAH 0"
## [1] "IQIEQIRILKSPQEVKPGERHYNMAKSYPNEEKDAWDVKMLLEQFSFDIAEEASKVCLAH 0"
## [1] "IQIEQIRILKSPQVVKPGERHFNMAKSYPNEEKDAWDVKMLLEQFSFDIAEEASKVCLAH 0"
## [1] "VQIEQIRILKSPQEVKPGERHFNMAKSFPNEEKDAWDVKMLLEQFSFDIAEEASKVCLAH 0"
## [1] "VQIEQIRILKSPQEVKPGERHFNMAKSFPNEEKDAWDVKMLLEQFSLDIAEEASKVCLAH 0"
## [1] "IQIEQIIIHSEPERVAPGGKHYNMAKSYPDDKKDAWDVKMLLEQFSFDIAEKAAHVCLAH 0"
## [1] "VQIEQIIVHKEPENVTQGQKHYNMARAFPNPDKDAWDVKQLLEQFSYDISEKAAQVCLAH 0"
## [1] "FVIKKIVVHSEPTRLRGGEAHYNMIR-------EKWDVRNLLEVFSREYSHKD--FCLAH 0"
## [1] " "
## [1] "LFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYSPVGKKNIYLNSGLTSTKN-YGKTIL 0"
## [1] "LFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYSPVGKKNIYLNSGLTSTKN-YGKTIL 0"
## [1] "LFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYSPVGKKNIYLNSGLTSTKN-YGKTIL 0"
## [1] "LFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYSPIGKKNIYLNSGLTSTKN-YGKTIL 0"
## [1] "LFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYSPIGKKNIYLNSGLTSTKN-YGKTIL 0"
## [1] "LFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYNPTVKKNIYLNSGLTSTKN-YGKTIL 0"
## [1] "LFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYNPGVKKNIYLNSGLTSTKN-YGKTIL 0"
## [1] "LFTYQDFDMGTLGLAYVGSPRPNSHGGICPKAYHSPIVKKDIYLNSGLTSTKN-YGKTIL 0"
## [1] "LFTYQDFDMGTLGLAYVGSHKPNTHGGICPKAYENDLSKKAVYLNTGLTSTKN-YGKTIL 0"
## [1] "LFTDLKFEGGILGLAYVGSPRRNSVGGICTPEYFKNG--YTLYLNSGLSSSRNHYGQRVI 0"
## [1] " "
## [1] "TKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSK 0"
## [1] "TKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSK 0"
## [1] "TKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSK 0"
## [1] "TKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSK 0"
## [1] "TKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSK 0"
## [1] "TKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSK 0"
## [1] "TKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSK 0"
## [1] "TKEADLVTTHELGHNFGAEHDPDSLPECAPTEDQGGKYVMYPIAVSGDHENNKMFSSCSK 0"
## [1] "TKEADLVTTHELGHNFGSEHDPDSMEACAPSEDHGGKFVMYPIAVSGDHENNKMFSSCSR 0"
## [1] "TREADLVTAHEFGHNWGSEHDPD-IPECSPSASQGGSFLMYTYSVSGYDVNNKKFSPCSL 0"
## [1] " "
## [1] "QSIYKTIESKAQECFQERSNKVCGNSRVDEGEECDPGIMYLN-NDTCCNSDCTLK--EGV 0"
## [1] "QSIYKTIESKAQECFQERSNKVCGNSRVDEGEECDPGIMYLN-NDTCCNSDCTLK--EGV 0"
## [1] "QSIYKTIESKAQECFQERSNKVCGNSRVDEGEECDPGIMYLN-NDTCCNSDCTLK--EGV 0"
## [1] "QSIYKTIESKSQECFQERSNKVCGNSRVDEGEECDPGIMYLN-NDTCCSSDCMLR--AGV 0"
## [1] "QSIYKTIESKSQECFQERSNKVCGNSRVDEGEECDPGIMYLN-NDTCCNSDCTLR--PGV 0"
## [1] "QSIYKTIESKAQECFQERSNKVCGNSRVDEGEECDPGIMYLN-NDTCCNSDCTLK--PGV 0"
## [1] "QSIYKTIESKAQECFQERSNKVCGNSRVDEGEECDPGIMYLN-NDTCCNSDCTLK--PGV 0"
## [1] "KSIHRTIEVKAQECFKERNNKVCGNSRVDEGEECDPGLLYQR-VDPCCSADCKLK--DGA 0"
## [1] "ESILRTLMAKSPICFKERNNKVCGNSRVDEGEECDPGLLHQH-NDPCCTSDCKFQ--PGV 0"
## [1] "RSIRKVLQAKSGRCFSEPEESFCGNLRVEGDEQCDAGLLGTEDNDSCCDKNCKLRRNQGA 0"
## [1] " "
## [1] "QCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGNAEDDTVCLDLGK 0"
## [1] "QCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGNAEDDTVCLDLGK 0"
## [1] "QCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGNAEDDTVCLDLGK 0"
## [1] "QCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGNAADDTVCLDLGK 0"
## [1] "QCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGNAADDTVCLDLGK 0"
## [1] "QCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGDAEDDTVCLDLGK 0"
## [1] "QCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGDAEDDTVCLDLGK 0"
## [1] "KCSDRNSPCCKGCQFESAQKKCQEAINATCKGESFCTGNSSECPPPGNAPDDTICVDMGK 0"
## [1] "HCSDRNSPCCRGCQFESAQKKCQEAINATCKGESYCTGSSSECPTPGNAENDTVCVDLGK 0"
## [1] "MCSDKNSPCCQNCQFMASGMKCREAQYATCEQEARCTGAHAECPKSPAMADGTTCQERGQ 0"
## [1] " "
## [1] "CKDGKCIPFCEREQQLESCACNETDNSCKVCCRD-LSGRCVPYVDAEQKNLFLRKGKPCT 0"
## [1] "CKDGKCIPFCEREQQLESCACNETDNSCKVCCRD-LSGRCVPYVDAEQKNLFLRKGKPCT 0"
## [1] "CKDGKCIPFCEREQQLESCACNETDNSCKVCCRD-LSGRCVPYVDAEQKNLFLRKGKPCT 0"
## [1] "CKDGKCVPFCEREQHLESCACNETDNSCKVCCRD-PSGRCVPYVDAEQKNLFLRKGKPCT 0"
## [1] "CKDGKCVPFCEREQRLESCACNETDNSCKVCCRD-PSGRCVPYVNAEQKNLFLRKGKPCT 0"
## [1] "CKAGKCIPFCKREQELESCACVDTDNSCKVCCRN-LSGPCVPYVDAEQKNLFLRKGKPCT 0"
## [1] "CKAGKCIPFCKREQELESCACADTDNSCKVCCRN-LSGPCVPYVDAEQKNLFLRKGKPCT 0"
## [1] "CKDGECIPFCEREKNLRSCACNETDNSCKVCCRD-EQDRCTPYVDANNQFLFLRKGKPCT 0"
## [1] "CVNGECRPFCEIERNLKSCACNDTENSCKVCCRD-ENGVCSPQKDQHDHFLYLRKGKPCT 0"
## [1] "CRNGKCVPYCETQG-LQSCMCDIIADACKRCCRMSINETCFPVEPPD----VLPDGTPCI 0"
## [1] " "
## [1] "VGFCDMNGKCEKRVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLIFWIPFSILV 0"
## [1] "VGFCDMNGKCEKRVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLIFWIPFSILV 0"
## [1] "VGFCDMNGKCEKRVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLIFWIPFSILV 0"
## [1] "VGFCDMNGKCEKRVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLIFWIPFSILV 0"
## [1] "VGFCDTNGKCEKRVQDVIERFWEFIDKLSINTFGKFLADNIVGSVLVFSLIFWIPFSILV 0"
## [1] "VGFCDMNGKCEKRVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLIFWIPFSILV 0"
## [1] "VGFCDMNGKCEKRVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLIFWIPFSILV 0"
## [1] "VGFCDSNGKCEKQVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLLFWIPLSILV 0"
## [1] "VGFCDANGKCEKQVQDVIERIWDFIDKLSINMFGKFLADNIVGSVVVFSLLFWIPLSILV 0"
## [1] "TGFCN-KGVCEKTIQDVVERFWDIIEEINVAKTLRFLKDNIVMAVVLVTAVFWIPISCVI 0"
## [1] " "
## [1] "HCVDKKLDKQYES--LSLFHPSNVEMLSSMDSASVRIIKPFPAPQTPGRLQPAPVI---P 0"
## [1] "HCVDKKLDKQYES--LSLFHPSNVEMLSSMDSASVRIIKPFPAPQTPGRLQPAPVI---P 0"
## [1] "HCVDKKLDKQYES--LSLFHP--------------------------------------- 0"
## [1] "HCVDKKLDKQYES--LSLFHPSNVEMLSSMDSASVRIIKPFPAPQTPGRPQPLQAAPVPP 0"
## [1] "HCVDKKLDKQFES--LSLFHPSNVEMLSSMDSASVRIIKPFPAPQTPGRLQTTPVMPPAP 0"
## [1] "HCVDKKLDKQYES--LSLFHHSNIEMLSSMDSASVRIIKPFPAPQTPGRLQALQPAAMMP 0"
## [1] "HCVDKKLDKQYES--LSLFHHSNIEMLSSMDSASVRIIKPFPAPQTPGRLQALQPAAMMP 0"
## [1] "HCVDKKLDKQYEENNKSLFCASNAEVPSSLDSASVRIIKPFPAAQPTSRHQPLQPIVAAP 0"
## [1] "HCVDKKLDKRYEETTKSLFTPSNVEMLSSMDSAPVRIVKPSPSSQATNRFQALQTVPIVP 0"
## [1] "SYFDRKKLRHEMK----------------------------------------------- 0"
## [1] " "
## [1] "SAP-AAPKLDHQRMDTIQEDPSTDSHMDEDGFEKDPFPNSSTAAKSFEDLTDHPVTRSEK 0"
## [1] "SAP-AAPKLDHQRMDTIQEDPSTDSHMDEDGFEKDPFPNSSTAAKSFEDLTDHPVTRSEK 0"
## [1] "-------------------------------------QCNSTAAKSFEDLTDHPVTRSEK 0"
## [1] "PMP-AAPKLEHQRMDTIQEDPSTDSHVDEDGFEKDPFPNSSTAAKSFEDLTGRPVTRSEK 0"
## [1] "SAP-LAPKLDHQRMDTIQEDPSTDSHADEDGFEKDPFPNSSAAAKSFEDLTDHPVTRSEK 0"
## [1] "PVP-AAPKLDHQRMDTIQEDPSTDSHADDDGFEKDPFPNSSTAAKSFEDLTDHPVTRSEK 0"
## [1] "PVS-AAPKLDHQRMDTIQEDPSTDSHVDDDGFEKDPFPNSSAAAKSFEDLTDHPVTRSEK 0"
## [1] "AAA-TAPKQDHQRMDTIQEDPSTDSHIDEDGFEKDPFPNSSSAAKSFEDLTEHPVTRSEK 0"
## [1] "TSAGAATKADHQRMDTIQEDPSVDSHLD-DQFEEDHFPNSGSPAKSFEDLTDHPVVRSDK 0"
## [1] "-----------------------------------------LIEWSQKLDLIHPSDERRR 0"
## [1] " "
## [1] "AASFKLQRQNRVDSKETEC 41"
## [1] "AASFKLQRQNRVDSKETEC 41"
## [1] "AASFKLQRQNRVDSKETEC 41"
## [1] "AASFKLQRQNRVGSKETEC 41"
## [1] "ASSFKLQRQNRVDSKETEC 41"
## [1] "AASFKLQRQSRVDSKETEC 41"
## [1] "AASFKLQRQSRVDSKETEC 41"
## [1] "ASSFKLQRQNRVDSKETEC 41"
## [1] "ASSFRLQRQGRVDSKETEC 41"
## [1] "VIHIRVPRQKISVARACN- 41"
## [1] " "

Distance Matrix

Adam17_subset_dist <- seqinr::dist.alignment(Adam17_align_seqinr, 
                                       matrix = "identity")

is(Adam17_subset_dist)
## [1] "dist"     "oldClass"
class(Adam17_subset_dist)
## [1] "dist"
Adam17_align_seqinr_rnd <- round(Adam17_subset_dist, 3)

Adam17_align_seqinr_rnd 
##        1     2     3     4     5     6     7     8     9
## 2  0.000                                                
## 3  0.106 0.106                                          
## 4  0.239 0.239 0.244                                    
## 5  0.266 0.266 0.284 0.249                              
## 6  0.285 0.285 0.290 0.304 0.317                        
## 7  0.279 0.279 0.283 0.302 0.315 0.125                  
## 8  0.484 0.484 0.485 0.492 0.493 0.504 0.502            
## 9  0.559 0.559 0.560 0.559 0.556 0.559 0.556 0.528      
## 10 0.775 0.775 0.767 0.769 0.770 0.777 0.775 0.771 0.774

Phylogenetic Tree

tree_subset <- nj(Adam17_subset_dist)


plot.phylo(tree_subset, main="Phylogenetic Tree", 
            use.edge.length = F)


mtext(text = "Adam17 family gene tree - rooted, no branch lenths")