#Introduction MC1R is a gene with no introns that encodes the receptor protein for melanocyte-stimulating hormone, or MSH. The protein that it codes for functions in melanogenesis and plays a major factor in determining sun sensitivity.

#Links RefSeq Gene: https://www.ncbi.nlm.nih.gov/gene/4157

RefSeq Homologene: https://www.ncbi.nlm.nih.gov/homologene/?term=Homo+sapiens+MC1R

# github packages
library(compbio4all)
#useful for functions in computational biology
library(ggmsa)
## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2
#used for annotating and visualizing MSAs

# CRAN packages
library(rentrez)
#uses NCBI's databases to download genetic and biographic data
library(seqinr)
#data analysis and visualizing protein and DNA data
library(ape)
## 
## Attaching package: 'ape'
## The following objects are masked from 'package:seqinr':
## 
##     as.alignment, consensus
#used for reading and writing data for phylogenetic trees
library(pander)
#renders tables in R Markdown
library(ggplot2)
#creates complex plots from data in a data frame

## Biostrings
library(Biostrings)
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
## 
##     windows
## Loading required package: XVector
## Loading required package: GenomeInfoDb
## 
## Attaching package: 'Biostrings'
## The following object is masked from 'package:ape':
## 
##     complement
## The following object is masked from 'package:seqinr':
## 
##     translate
## The following object is masked from 'package:base':
## 
##     strsplit
#efficient string containers for manipuling biological sequences
library(HGNChelper)
## Warning: package 'HGNChelper' was built under R version 4.1.2
#identifies known aliases and outdated gene symbols

# Bioconductor packages
library(msa)
#interface for many multiple sequence alignment algorithms
#ACCESSION NUMBER TABLE
#table_names <- c("RefSeq", "Uniprot", "PDB", "Scientific Name", "Common ")

refseq_table <- c("NP_001914", "NP_032585", "XP_006255857", "NP_001009324", "NP_001009152", "XP_012817790", "NP_001108006", "NP_001008690", "NP_001026633",    
"NP_851301")

uniprot_table <- c("Q01726", "Q01727", "B6ZGR5", "Q7YSE7", "Q9TUK4", "A0A803JW76", "P79166", "Q1ELV2", "Q802G0", "Q7ZTA3")

PDB_table <- c("NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA")
  
sciname_table <- c("Homo sapiens", "Mus musculus", "Rattus norvegicus", "Felis catus", "Pan troglodytes", "Xenopus tropicalus", "Equus caballus", "Sus scrofa", "Gallus gallus", "Danio rerio")

name_table <- c("Human", "Mouse", "Rat", "Cat", "Chimpanzee", "Western Clawed Frog",  "Horse", "Pig",  "Chicken", "Zebrafish")
  
genename_table <- c( "MC1R", "MC1R", "MC1R", "MC1R", "MC1R", "MC1R", "MC1R", "MC1R", "MC1R", "MC1R")

MC1r.df <- data.frame(RefSeq = refseq_table, Uniprot = uniprot_table, PDB = PDB_table, Scientfic_name = sciname_table, Common_name = name_table, Gene_name = genename_table)
pander::pander(MC1r.df)
Table continues below
RefSeq Uniprot PDB Scientfic_name Common_name
NP_001914 Q01726 NA Homo sapiens Human
NP_032585 Q01727 NA Mus musculus Mouse
XP_006255857 B6ZGR5 NA Rattus norvegicus Rat
NP_001009324 Q7YSE7 NA Felis catus Cat
NP_001009152 Q9TUK4 NA Pan troglodytes Chimpanzee
XP_012817790 A0A803JW76 NA Xenopus tropicalus Western Clawed Frog
NP_001108006 P79166 NA Equus caballus Horse
NP_001008690 Q1ELV2 NA Sus scrofa Pig
NP_001026633 Q802G0 NA Gallus gallus Chicken
NP_851301 Q7ZTA3 NA Danio rerio Zebrafish
Gene_name
MC1R
MC1R
MC1R
MC1R
MC1R
MC1R
MC1R
MC1R
MC1R
MC1R

DATA PREPARATION

#DOWNLOADING SEQUENCES

#homo sap MC1R
homosap_fasta <- rentrez::entrez_fetch(db = "protein",
                        id = "NP_001914.3",
                         rettype = "fasta")

#mus MC1R
mous_fasta <- rentrez::entrez_fetch(db = "protein",
                         id = "NP_032585.2",
                         rettype = "fasta")

#rattus norvegicus(rat) MC1R
rat_fasta <- rentrez::entrez_fetch(db = "protein", 
                        id = "XP_006255857.1", 
                        rettype = "fasta")

#felis catus(cat) MC1R
fel_fasta <- rentrez::entrez_fetch(db = "protein", 
                        id = "NP_001009324.1",
                        rettype = "fasta")

#pan trog(chimp) MC1R
pan_fasta <- rentrez::entrez_fetch(db = "protein",
                        id = "NP_001009152.1",
                        rettype = "fasta")

#xenopus trop(frog) MC1R
xeno_fasta <- rentrez::entrez_fetch(db = "protein",
                        id = "XP_012817790.1",
                        rettype = "fasta")

#equus call(horse) MC1R
equus_fasta <- rentrez::entrez_fetch(db = "protein",
                        id = "NP_001108006.1",
                        rettype = "fasta")


#sus scrofa(pig) MC1R
sus_fasta <- rentrez::entrez_fetch(db = "protein",
                        id = "NP_001008690.1",
                        rettype = "fasta")

#gallus gallus(chicken) MC1R
gallus_fasta <- rentrez::entrez_fetch(db = "protein",
                        id = "NP_001026633.1",
                        rettype = "fasta")

#danio rerio(zebrafish) MC1R
danio_fasta <- rentrez::entrez_fetch(db = "protein",
                        id = "NP_851301.1",
                        rettype = "fasta")

mc1r_list <- list("NP_001914" = homosap_fasta, "NP_032585" = mous_fasta, "XP_006255857" =  rat_fasta,"NP_001009324" = fel_fasta, "NP_001009152" = pan_fasta, "XP_012817790" = xeno_fasta, "NP_001108006" = equus_fasta, "NP_001008690" = sus_fasta, "NP_001026633" =  gallus_fasta, "NP_851301" = danio_fasta)
fasta_cleaner <- function(fasta_object, parse = TRUE){

  fasta_object <- sub("^(>)(.*?)(\\n)(.*)(\\n\\n)","\\4",fasta_object)
  fasta_object <- gsub("\n", "", fasta_object)

  if(parse == TRUE){
    fasta_object <- stringr::str_split(fasta_object,
                                       pattern = "",
                                       simplify = FALSE)
  }

  return(fasta_object[[1]])
}
#Cleaning

homo_clean <- fasta_cleaner(homosap_fasta, parse = F)

mus_clean <- fasta_cleaner(mous_fasta, parse = F)

rat_clean <- fasta_cleaner(rat_fasta, parse = F)

fel_clean <- fasta_cleaner(fel_fasta, parse = F)

pan_clean <- fasta_cleaner(pan_fasta, parse = F)

xeno_clean <- fasta_cleaner(xeno_fasta, parse = F)

equus_clean <- fasta_cleaner(equus_fasta, parse = F)

sus_clean <- fasta_cleaner(sus_fasta, parse = F)

gallus_clean <- fasta_cleaner(gallus_fasta, parse = F)

danio_clean <- fasta_cleaner(danio_fasta, parse = F)

for(i in 1:length(mc1r_list)){
  mc1r_list[[i]] <- compbio4all::fasta_cleaner(mc1r_list[[i]], parse = F)
}

SKIPPED PROTEIN DIAGRAM BECAUSE OF RTOOLS ISSUE*

#LARGE DOTPLOT

homo_vector <- fasta_cleaner(homosap_fasta)
 
  
seqinr::dotPlot(homo_vector[350:500], homo_vector[350:500],
        wsize = 1, 
        nmatch = 1,
        main = "mc1r large dotplot w/ defaults")

#2x2 PANEL OF DOTPLOTS
par(mfrow = c(2,2), 
    mar = c(0,0,2,1))

# plot 1
dotPlot(homo_vector[350:500], 
        homo_vector[350:500], 
        wsize = 1, 
        nmatch = 1, 
        main = "MC1R Defaults")

# plot 2 Homo sapiens MC1R - size = 10, nmatch = 1
dotPlot(homo_vector[350:500], homo_vector[350:500], 
        wsize = 10, 
        nmatch = 1, 
        main = "homosap - size = 10, nmatch = 1")

# plot 3: Homo sapiens MC1R - size = 10, nmatch = 5
dotPlot(homo_vector[350:500], homo_vector[350:500], 
        wsize = 10, 
        nmatch = 5, 
        main = "homosap - size = 10, nmatch = 5")

# plot 4: size = 20, nmatch = 5
dotPlot(homo_vector[350:500], homo_vector[350:500], 
        wsize = 20,
        nmatch =5,
        main = "homosap - size = 20, nmatch = 5")

PROTEIN PROPERTIES COMPILED FROM DATABASES

Pfam <- c("7tm_1: From 55 to 298", "7tm_1: From 53 to 296", "7tm_1: From 55 to 298", "7tm_1: From 55 to 298", "7tm_1: From 55 to 298", "7tm_1: From 46 to 293", "7tm_1: From 55 to 298", "7tm_1: From 58 to 301", "7tm_1: From 53 to 295", "7tm_1: From 60 to 300")

DisProt <- c("No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available")

RepeatDB <- c("No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available")

Uniprot <- c("Plasma membrane", "Plasma membrane", "Plasma membrane",  "Plasma membrane", "Plasma membrane", "Plasma membrane", "Plasma membrane", "Plasma membrane",  "Plasma membrane", "Plasma membrane")

PDB <- c("No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available")

prot_prop.df <- data.frame(Pfam = Pfam, DisProt = DisProt, RepeatsDB = RepeatDB, UniProt = Uniprot, PDB = PDB)

pander::pander(prot_prop.df)
Table continues below
Pfam DisProt RepeatsDB
7tm_1: From 55 to 298 No information available No information available
7tm_1: From 53 to 296 No information available No information available
7tm_1: From 55 to 298 No information available No information available
7tm_1: From 55 to 298 No information available No information available
7tm_1: From 55 to 298 No information available No information available
7tm_1: From 46 to 293 No information available No information available
7tm_1: From 55 to 298 No information available No information available
7tm_1: From 58 to 301 No information available No information available
7tm_1: From 53 to 295 No information available No information available
7tm_1: From 60 to 300 No information available No information available
UniProt PDB
Plasma membrane No information available
Plasma membrane No information available
Plasma membrane No information available
Plasma membrane No information available
Plasma membrane No information available
Plasma membrane No information available
Plasma membrane No information available
Plasma membrane No information available
Plasma membrane No information available
Plasma membrane No information available

PROTEIN FEATURE PREDICTION

#SECONDARY STRUCTURE PREDICTION
aa.1.1 <- c("A","R","N","D","C","Q","E","G","H","I",
            "L","K","M","F","P","S","T","W","Y","V")

# enter again
aa.1.2 <- c("A","R","N","D","C","Q","E","G","H","I",
            "L","K","M","F","P","S","T","W","Y","V")

# are they the same length?
length(aa.1.1) == length(aa.1.2)
## [1] TRUE
aa.1.1 == aa.1.2
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE
unique(aa.1.1)
##  [1] "A" "R" "N" "D" "C" "Q" "E" "G" "H" "I" "L" "K" "M" "F" "P" "S" "T" "W" "Y"
## [20] "V"
length(unique(aa.1.1)) == length(unique(aa.1.2))
## [1] TRUE
any(c(aa.1.1 == aa.1.2) == FALSE)
## [1] FALSE
#alpha
alpha <- c(285, 53, 97, 163, 22, 67, 134, 197, 111, 91, 
           221, 249, 48, 123, 82, 122, 119, 33, 63, 167)

sum(alpha) == 2447
## [1] TRUE
#beta
beta <- c(203, 67, 139, 121, 75, 122, 86, 297, 49, 120, 
          177, 115, 16, 85, 127, 341, 253, 44, 110, 229)

sum(beta) == 2776
## [1] TRUE
# alpha + beta
a.plus.b <- c(175, 78, 120, 111, 74, 74, 86, 171, 33, 93,
              110, 112, 25, 52, 71, 126, 117, 30, 108, 123)
sum(a.plus.b) == 1889
## [1] TRUE
# alpha/beta
a.div.b <- c(361, 146, 183, 244, 63, 114, 257, 377, 107, 239, 
             339, 321, 91, 158, 188, 327, 238, 72, 130, 378)
sum(a.div.b) == 4333
## [1] TRUE
data.frame(aa.1.1, alpha, beta, a.plus.b, a.div.b)
##    aa.1.1 alpha beta a.plus.b a.div.b
## 1       A   285  203      175     361
## 2       R    53   67       78     146
## 3       N    97  139      120     183
## 4       D   163  121      111     244
## 5       C    22   75       74      63
## 6       Q    67  122       74     114
## 7       E   134   86       86     257
## 8       G   197  297      171     377
## 9       H   111   49       33     107
## 10      I    91  120       93     239
## 11      L   221  177      110     339
## 12      K   249  115      112     321
## 13      M    48   16       25      91
## 14      F   123   85       52     158
## 15      P    82  127       71     188
## 16      S   122  341      126     327
## 17      T   119  253      117     238
## 18      W    33   44       30      72
## 19      Y    63  110      108     130
## 20      V   167  229      123     378
alpha.prop <- alpha/sum(alpha)
beta.prop <- beta/sum(beta)
a.plus.b.prop <- a.plus.b/sum(a.plus.b)
a.div.b <- a.div.b/sum(a.div.b)

aa.prop <- data.frame(alpha.prop,
                      beta.prop,
                      a.plus.b.prop,
                      a.div.b)
#row labels
row.names(aa.prop) <- aa.1.1

plot(aa.prop,panel = panel.smooth)

cor(aa.prop)
##               alpha.prop beta.prop a.plus.b.prop   a.div.b
## alpha.prop     1.0000000 0.4941143     0.6969508 0.8555289
## beta.prop      0.4941143 1.0000000     0.7977771 0.7706654
## a.plus.b.prop  0.6969508 0.7977771     1.0000000 0.8198043
## a.div.b        0.8555289 0.7706654     0.8198043 1.0000000
round(cor(aa.prop), 3)
##               alpha.prop beta.prop a.plus.b.prop a.div.b
## alpha.prop         1.000     0.494         0.697   0.856
## beta.prop          0.494     1.000         0.798   0.771
## a.plus.b.prop      0.697     0.798         1.000   0.820
## a.div.b            0.856     0.771         0.820   1.000
par(mfrow = c(1,3), mar = c(4,4,1,0))
plot(alpha.prop ~ beta.prop, data = aa.prop)
plot(alpha.prop ~ a.plus.b.prop, data = aa.prop)
plot(alpha.prop ~ a.div.b, data = aa.prop)

par(mfrow = c(1,1), mar = c(4,4,4,4))
#MC1R

NP_001914 <- rentrez::entrez_fetch(db = "protein",
                        id = "NP_001914.3",
                         rettype = "fasta")

NP_001914 <- compbio4all::fasta_cleaner(NP_001914, parse = TRUE)

NP_001914.freq.table <- table(NP_001914)/length(NP_001914)
#Convert table to a vector

table_to_vector <- function(table_x){
  table_names <- attr(table_x, "dimnames")[[1]]
  table_vect <- as.vector(table_x)
  names(table_vect) <- table_names
  return(table_vect)
}

MC1R.human.aa.freq <- table_to_vector(NP_001914.freq.table)
#check for presence of U

aa.names <- names(MC1R.human.aa.freq)
any(aa.names == "U")
## [1] FALSE
#i.U <- which(aa.names == "U")

#aa.names[i.U]

#MC1R.human.aa.freq[i.U]

#which(aa.names %in% c("U"))

#MC1R.human.aa.freq <- MC1R.human.aa.freq[-i.U]

sum(MC1R.human.aa.freq)
## [1] 1
aa.prop$MC1R.human.aa.freq <- MC1R.human.aa.freq

pander::pander(aa.prop)
  alpha.prop beta.prop a.plus.b.prop a.div.b MC1R.human.aa.freq
A 0.1165 0.07313 0.09264 0.08331 0.05175
R 0.02166 0.02414 0.04129 0.03369 0.0193
N 0.03964 0.05007 0.06353 0.04223 0.04825
D 0.06661 0.04359 0.05876 0.05631 0.07982
C 0.008991 0.02702 0.03917 0.01454 0.04035
Q 0.02738 0.04395 0.03917 0.02631 0.07193
E 0.05476 0.03098 0.04553 0.05931 0.02456
G 0.08051 0.107 0.09052 0.08701 0.06316
H 0.04536 0.01765 0.01747 0.02469 0.04737
I 0.03719 0.04323 0.04923 0.05516 0.1096
L 0.09031 0.06376 0.05823 0.07824 0.02368
K 0.1018 0.04143 0.05929 0.07408 0.04737
M 0.01962 0.005764 0.01323 0.021 0.03596
F 0.05027 0.03062 0.02753 0.03646 0.04123
P 0.03351 0.04575 0.03759 0.04339 0.04211
S 0.04986 0.1228 0.0667 0.07547 0.07456
T 0.04863 0.09114 0.06194 0.05493 0.0614
W 0.01349 0.01585 0.01588 0.01662 0.07982
Y 0.02575 0.03963 0.05717 0.03 0.007018
V 0.06825 0.08249 0.06511 0.08724 0.0307
#Correlation 

chou_cor <- function(x,y){
  numerator <- sum(x*y)
denominator <- sqrt((sum(x^2))*(sum(y^2)))
result <- numerator/denominator
return(result)
}

chou_cosine <- function(z.1, z.2){
  z.1.abs <- sqrt(sum(z.1^2))
  z.2.abs <- sqrt(sum(z.2^2))
  my.cosine <- sum(z.1*z.2)/(z.1.abs*z.2.abs)
  return(my.cosine)
}

corr.alpha <- chou_cor(aa.prop[,5], aa.prop[,1])
corr.beta  <- chou_cor(aa.prop[,5], aa.prop[,2])
corr.apb   <- chou_cor(aa.prop[,5], aa.prop[,3])
corr.adb   <- chou_cor(aa.prop[,5], aa.prop[,4])

cos.alpha <- chou_cosine(aa.prop[,5], aa.prop[,1])
cos.beta  <- chou_cosine(aa.prop[,5], aa.prop[,2])
cos.apb   <- chou_cosine(aa.prop[,5], aa.prop[,3])
cos.adb   <- chou_cosine(aa.prop[,5], aa.prop[,4])
#Distance

aa.prop.flipped <- t(aa.prop)
round(aa.prop.flipped,2)
##                       A    R    N    D    C    Q    E    G    H    I    L    K
## alpha.prop         0.12 0.02 0.04 0.07 0.01 0.03 0.05 0.08 0.05 0.04 0.09 0.10
## beta.prop          0.07 0.02 0.05 0.04 0.03 0.04 0.03 0.11 0.02 0.04 0.06 0.04
## a.plus.b.prop      0.09 0.04 0.06 0.06 0.04 0.04 0.05 0.09 0.02 0.05 0.06 0.06
## a.div.b            0.08 0.03 0.04 0.06 0.01 0.03 0.06 0.09 0.02 0.06 0.08 0.07
## MC1R.human.aa.freq 0.05 0.02 0.05 0.08 0.04 0.07 0.02 0.06 0.05 0.11 0.02 0.05
##                       M    F    P    S    T    W    Y    V
## alpha.prop         0.02 0.05 0.03 0.05 0.05 0.01 0.03 0.07
## beta.prop          0.01 0.03 0.05 0.12 0.09 0.02 0.04 0.08
## a.plus.b.prop      0.01 0.03 0.04 0.07 0.06 0.02 0.06 0.07
## a.div.b            0.02 0.04 0.04 0.08 0.05 0.02 0.03 0.09
## MC1R.human.aa.freq 0.04 0.04 0.04 0.07 0.06 0.08 0.01 0.03
dist(aa.prop.flipped, method = "euclidean")
##                    alpha.prop  beta.prop a.plus.b.prop    a.div.b
## beta.prop          0.13342098                                    
## a.plus.b.prop      0.09281824 0.08289406                         
## a.div.b            0.06699039 0.08659174    0.06175113           
## MC1R.human.aa.freq 0.16921327 0.15410105    0.13982324 0.14662849
dist.alpha <- dist((aa.prop.flipped[c(1,5),]),  method = "euclidean")
dist.beta  <- dist((aa.prop.flipped[c(2,5),]),  method = "euclidean")
dist.apb   <- dist((aa.prop.flipped[c(3,5),]),  method = "euclidean")
dist.adb  <- dist((aa.prop.flipped[c(4,5),]), method = "euclidean")

fold.type <- c("alpha","beta","alpha plus beta", "alpha/beta")

# data
corr.sim <- round(c(corr.alpha,corr.beta,corr.apb,corr.adb),5)
cosine.sim <- round(c(cos.alpha,cos.beta,cos.apb,cos.adb),5)
Euclidean.dist <- round(c(dist.alpha,dist.beta,dist.apb,dist.adb),5)

# summary
sim.sum <- c("","","most.sim","")
dist.sum <- c("","","min.dist","")

df <- data.frame(fold.type,
           corr.sim ,
           cosine.sim ,
           Euclidean.dist ,
           sim.sum ,
           dist.sum )

pander::pander(df)
fold.type corr.sim cosine.sim Euclidean.dist sim.sum dist.sum
alpha 0.7775 0.7775 0.1692
beta 0.8186 0.8186 0.1541
alpha plus beta 0.8384 0.8384 0.1398 most.sim min.dist
alpha/beta 0.8254 0.8254 0.1466

PID

#Data prep

names(mc1r_list)
##  [1] "NP_001914"    "NP_032585"    "XP_006255857" "NP_001009324" "NP_001009152"
##  [6] "XP_012817790" "NP_001108006" "NP_001008690" "NP_001026633" "NP_851301"
length(mc1r_list)
## [1] 10
mc1r_list[1]
## $NP_001914
## [1] "MSYNYVVTAQKPTAVNGCVTGHFTSAEDLNLLIAKNTRLEIYVVTAEGLRPVKEVGMYGKIAVMELFRPKGESKDLLFILTAKYNACILEYKQSGESIDIITRAHGNVQDRIGRPSETGIIGIIDPECRMIGLRLYDGLFKVIPLDRDNKELKAFNIRLEELHVIDVKFLYGCQAPTICFVYQDPQGRHVKTYEVSLREKEFNKGPWKQENVEAEASMVIAVPEPFGGAIIIGQESITYHNGDKYLAIAPPIIKQSTIVCHNRVDPNGSRYLLGDMEGRLFMLLLEKEEQMDGTVTLKDLRVELLGETSIAECLTYLDNGVVFVGSRLGDSQLVKLNVDSNEQGSYVVAMETFTNLGPIVDMCVVDLERQGQGQLVTCSGAFKEGSLRIIRNGIGIHEHASIDLPGIKGLWPLRSDPNRETDDTLVLSFVGQTRVLMLNGEEVEETELMGFVDDQQTFFCGNVAHQQLIQITSASVRLVSQEPKALVSEWKEPQAKNISVASCNSSQVVVAVGRALYYLQIHPQELRQISHTEMEHEVACLDITPLGDSNGLSPLCAIGLWTDISARILKLPSFELLHKEMLGGEIIPRSILMTTFESSHYLLCALGDGALFYFGLNIETGLLSDRKKVTLGTQPTVLRTFRSLSTTNVFACSDRPTVIYSSNHKLVFSNVNLKEVNYMCPLNSDGYPDSLALANNSTLTIGTIDEIQKLHIRTVPLYESPRKICYQEVSQCFGVLSSRIEVQDTSGGTTALRPSASTQALSSSVSSSKLFSSSTAPHETSFGEEVEVHNLLIIDQHTFEVLHAHQFLQNEYALSLVSCKLGKDPNTYFIVGTAMVYPEEAEPKQGRIVVFQYSDGKLQTVAEKEVKGAVYSMVEFNGKLLASINSTVRLYEWTTEKELRTECNHYNNIMALYLKTKGDFILVGDLMRSVLLLAYKPMEGNFEEIARDFNPNWMSAVEILDDDNFLGAENAFNLFVCQKDSAATTDEERQHLQEVGLFHLGEFVNVFCHGSLVMQNLGETSTPTQGSVLFGTVNGMIGLVTSLSESWYNLLLDMQNRLNKVIKSVGKIEHSFWRSFHTERKTEPATGFIDGDLIESFLDISRPKMQEVVANLQYDDGSGMKREATADDLIKVVEELTRIH"
homo_vector <- unlist(mc1r_list[1])

mus_vector <- unlist(mc1r_list[2])

rat_vector <- unlist(mc1r_list[3])

fel_vector <- unlist(mc1r_list[4])

pan_vector <- unlist(mc1r_list[5])

xeno_vector <- unlist(mc1r_list[6])

equus_vector <- unlist(mc1r_list[7])

sus_vector <- unlist(mc1r_list[8])

gallus_vector <- unlist(mc1r_list[9])

danio_vector <- unlist(mc1r_list[10])

mc1r_vector <- unlist(mc1r_list)

names(mc1r_vector) <- names(mc1r_list)
#PID score 

#human aligns
align.homo.mus <- Biostrings::pairwiseAlignment(
                  homo_vector,
                  mus_vector)

align.homo.pan <- Biostrings::pairwiseAlignment(
                  homo_vector,
                  pan_vector)

align.homo.xeno <- Biostrings::pairwiseAlignment(
                  homo_vector,
                  xeno_vector)

#pan aligns
align.pan.mus <- Biostrings::pairwiseAlignment(
                  pan_vector,
                  mus_vector)

align.pan.xeno <- Biostrings::pairwiseAlignment(
                  pan_vector,
                  xeno_vector)

#xeno aligns

align.xeno.mus <- Biostrings::pairwiseAlignment(
                  xeno_vector,
                  mus_vector)

pids <- c(1,                  NA,     NA,     NA,
          pid(align.homo.pan),          1,     NA,     NA,
          pid(align.homo.mus), pid(align.pan.mus),      1,     NA,
          pid(align.homo.xeno), pid(align.pan.xeno), pid(align.xeno.mus), 1)

mat <- matrix(pids, nrow = 4, byrow = T)
row.names(mat) <- c("Homo","Pan","Xeno","Mus")   
colnames(mat) <- c("Homo","Pan","Xeno","Mus")   
pander::pander(mat)  
  Homo Pan Xeno Mus
Homo 1 NA NA NA
Pan 15.53 1 NA NA
Xeno 14.86 75.08 1 NA
Mus 14.79 51.4 55.11 1
#Biostrings::pid(align.homo.mus)
#PID comparison table- humans and chimps
method <- c("PID1", "PID2", "PID3", "PID4")
PID1 <- pid(align.homo.pan, type = "PID1")
PID2 <- pid(align.homo.pan, type = "PID2")
PID3 <- pid(align.homo.pan, type = "PID3")
PID4 <- pid(align.homo.pan, type = "PID4")

PID <- c(PID1, PID2, PID3, PID4)

denominator <- c("(aligned positions + internal gap positions)", "(aligned positions)", "(length shorter sequence)", "(average length of the two sequences)")

PID.df <- data.frame(method = method, PID = PID, denominator = denominator)

pander::pander(PID.df)
method PID denominator
PID1 15.53 (aligned positions + internal gap positions)
PID2 55.84 (aligned positions)
PID3 55.84 (length shorter sequence)
PID4 24.3 (average length of the two sequences)

MSA ALIGNMENT

#Alignment
mc1r_vector_ss <- Biostrings::AAStringSet(mc1r_vector)
mc1r_align <- msa::msa(mc1r_vector_ss,
                     method = "ClustalW")
## use default substitution matrix
class(mc1r_align) <- "AAMultipleAlignment"
mc1r_align_seqinr <- msaConvert(mc1r_align, type = "seqinr::alignment")

compbio4all::print_msa(mc1r_align_seqinr)
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "MSYNYVVTAQKPTAVNGCVTGHFTSAEDLNLLIAKNTRLEIYVVTAEGLRPVKEVGMYGK 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "IAVMELFRPKGESKDLLFILTAKYNACILEYKQSGESIDIITRAHGNVQDRIGRPSETGI 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "IGIIDPECRMIGLRLYDGLFKVIPLDRDNKELKAFNIRLEELHVIDVKFLYGCQAPTICF 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "VYQDPQGRHVKTYEVSLREKEFNKGPWKQENVEAEASMVIAVPEPFGGAIIIGQESITYH 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "NGDKYLAIAPPIIKQSTIVCHNRVDPNGSRYLLGDMEGRLFMLLLEKEEQMDGTVTLKDL 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "RVELLGETSIAECLTYLDNGVVFVGSRLGDSQLVKLNVDSNEQGSYVVAMETFTNLGPIV 0"
## [1] " "
## [1] "---------------------------------MSVQGPQRRLLGSLNSTSPAAPRLGLA 0"
## [1] "---------------------------------MPVLGPERRLLASLSSAPPAAPRLGLA 0"
## [1] "---------------------------------MPLQGPQRRLLGSLNSTLPATPYLGLT 0"
## [1] "---------------------------------MAVQGSQRRLLGSLNSTPTAIPQLGLA 0"
## [1] "---------------------------------MSTQEPQKSLLGSLNSN--ATSHLGLA 0"
## [1] "---------------------------------MPTQGPPKRLLGSLNSTSITTSHLGLA 0"
## [1] "---------------------------------MS----MLAPLRLLREPWNASEGNQSN 0"
## [1] "---------------------------------MNDSSRHHFSMKHMDYMYNADNNITLN 0"
## [1] "------------------------------------------MLHSTVNSTNATINVGTE 0"
## [1] "DMCVVDLERQGQGQLVTCSGAFKEGSLRIIRNGIGIHEHASIDLPGIKGLWPLRSDPNRE 0"
## [1] " "
## [1] "ANQT-------GPRCLELS------------VPDGLFLGLGLVSVVENVLVVAAIAKNRN 0"
## [1] "ANQTNQT----GPQCLEVS------------IPDGLFLSLGLVSLVENVLVVAAIAKNRN 0"
## [1] "TNQT-------EPPCLEVS------------IPDGLFLSLGLVSLVENVLVVTAIAKNRN 0"
## [1] "ANQT-------GARCLEVS------------IPDGLFLSLGLVSLVENMLVVATIAKNRN 0"
## [1] "TNQS-------EPWCLYVS------------IPDGLFLSLGLVSLVENVLVVIAITKNRN 0"
## [1] "TNQT-------GSWCLHVS------------IPDGLFLSLGLVSLVENVLVVVAITKNRN 0"
## [1] "ATAGAG-----GAWCQGLD------------IPNELFLTLGLVSLVENLLVVAAILKNRN 0"
## [1] "SNSTAS-----DINVTGIAQI---------MIPQELFLMLGLISLVENILVVVAIIKNRN 0"
## [1] "LKPT-------NTSDTVMD------------VPEELFLFLCVFSLLENILVVIAIFRNHN 0"
## [1] "TDDTLVLSFVGQTRVLMLNGEEVEETELMGFVDDQQTFFCGNVAHQQLIQITSASVRLVS 0"
## [1] " "
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "-------------------------------------------------LHSPMYYFVCC 0"
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "-------------------------------------------------LHSPMYCFICC 0"
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "-------------------------------------------------LHSPTYYFICC 0"
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "QEPKALVSEWKEPQAKNISVASCNSSQVVVAVGRALYYLQIHPQELRQISHTEMEHEVAC 0"
## [1] " "
## [1] "LAVS------------------DLLVSVSSVLETAVMLLLEAGALAGRAAVVQRLDDIID 0"
## [1] "LAVS------------------DLLVSVSNVLETAVLLLLEAGALAAQAAVVQQLDNVMD 0"
## [1] "LAVS------------------DLLVSMSNVLEMAILLLLEAGVLATQASVLQQLDNIID 0"
## [1] "LALS------------------DLLVSGSNVLETAVILLLEAGALVARAAVLQQVDNVID 0"
## [1] "LALS------------------DLMVSVSIVLETTIILLLEAGILVARVALVQQLDNLID 0"
## [1] "LALS------------------DLMVSVSIVLETTIILLLEAGILVARAALVQQLDNVID 0"
## [1] "LAVS------------------DMLVSVSNLAKTLFMLLMEHGVLVIRASIVRHMDNVID 0"
## [1] "LAVA------------------DMLVSVSNVVETLFMLLTEHGLLLVTAKMLQHLDNVID 0"
## [1] "LAAS------------------DMLVSSSNLGETLIIFMLKQGIIKSEPLLVKKMDYIFD 0"
## [1] "LDITPLGDSNGLSPLCAIGLWTDISARILKLPSFELLHKEMLGGEIIPRSILMTTFESSH 0"
## [1] " "
## [1] "VLVCGAMVSSLCFLGAIAVDRYISIFYALRYHSIVTLPRAWRAISAIWVASVLSSTLFIA 0"
## [1] "VLICGSMVSSLCFLGAIAVDRYVSIFYALRYHSIVTLPRAGRAIAAIWAGSVLSSTLFIA 0"
## [1] "VLICGSMVSSLCFLGSIAVDRYISIFYALRYHSIMMLPRVWRAIVAIWVVSVLSSTLFIA 0"
## [1] "VITCSSMLSSLCFLGAIAVDRYISIFYALRYHSIVTLPRARRAIAAIWVASVLFSTLFIA 0"
## [1] "VLICGSMVSSLCFLGIIAIDRYISIFYALRYHSIVTLPRARRAVVGIWMVSIVSSTLFIT 0"
## [1] "VLICGSMVSSLCFLGVIAIDRYISIFYALRYHSIVTLSRARRAVVGIWVVSIVSSTLFIT 0"
## [1] "MLICSSVVSSLSFLGVIAVDRYITIFYALRYHSIMTLQRAVVTMASVWLASTVSSTVLIT 0"
## [1] "IMICSSVVSSLSFLCTIAADRYITIFYALRYHSIMTTQRAVGIILVVWLASITSSSLFIV 0"
## [1] "TMICCSLVTSLSFLGAIAIDRYITIFYALRYHSIMTLRRVVIAIGVIWSVSLVCAAIFIV 0"
## [1] "YLLCALGDGALFYFGLNIETGLLSDRKKVTLGTQPTVLRTFRSLSTTNVFACSDRPTVIY 0"
## [1] " "
## [1] "YYDHTAVLLCL-VSFFVAMLVLMAVLYVHMLARACQHARGIARLHKRQR----------- 0"
## [1] "YYHHTAVLLGL-VSFFVAMLALMAVLYVHMLARACQHGRHIARLHKTQH----------- 0"
## [1] "YYNHTAVLLCL-VTFFVAMLVLMAVLYVHMLARACQHARGIARLHKRQH----------- 0"
## [1] "YCDHTAVLLCL-VVFFLAVLVLMAVLYVHMLARACQHAQGIARLHKRQR----------- 0"
## [1] "YYKHTAVLLCL-VTFFLAMLALMAILYAHMFTRACQHAQGIAQLHKRRR----------- 0"
## [1] "YYKHTAVLLCL-VTFFLAMLALMAILYVHMLSRACQHAQGIARLHKRRH----------- 0"
## [1] "YYRNNAILLCL-IGFFLFMLVLMLVLYIHMFALARHHVRSISSQQKQP------------ 0"
## [1] "YHTDNAVIACL-VTFFGVTLVFTAVLYLHMFILAHVHSRRITALHK-------------- 0"
## [1] "YHESRAVILCL-IVFFLFMLALMVALYIHMFALARQHARSISALQKGKSRRI-------- 0"
## [1] "SSNHKLVFSNVNLKEVNYMCPLNSDGYPDSLALANNSTLTIGTIDEIQKLHIRTVPLYES 0"
## [1] " "
## [1] "---------------------------------PVHQGLGLKGAATLTILLGIFFLCWGP 0"
## [1] "---------------------------------PTRQGCGLKGAATLTILLGVFLLCWAP 0"
## [1] "---------------------------------PIHQGFGLKGAATLTILLGVFFLCWGP 0"
## [1] "---------------------------------PVHQGFGLKGAVTLTILLGIFFLCWGP 0"
## [1] "---------------------------------SIRQGFCLKGAATLTILLGIFFLCWGP 0"
## [1] "---------------------------------SIRQGFCLKGAATLTILLGIFFLCWAP 0"
## [1] "---------------------------------TIYRTSSLKGAVTLTILLGVFFICWGP 0"
## [1] "---------------------------------SRRQTTSMKGAITLTILLGVFILCWGP 0"
## [1] "--------------------------------TPHQARANMKGAITLTLLLGVFFLCWGP 0"
## [1] "PRKICYQEVSQCFGVLSSRIEVQDTSGGTTALRPSASTQALSSSVSSSKLFSSSTAPHET 0"
## [1] " "
## [1] "FF------LHLSLMVLCPRHPICGCVFKNFNLFLTLIICNSIVDP--------------- 0"
## [1] "FF------LHLSLVVLCPQHPTCGCVFKNVNLFLALVICNSIVDP--------------- 0"
## [1] "FF------LHLSLLILCPQHPTCGCVFKNFKLFLTLILCSAIVDP--------------- 0"
## [1] "FF------LHLTLIXLCPEHPTCGCIFKNFNLFLALIICNAIIDP--------------- 0"
## [1] "FF------LHLLLIVLCPQHPTCSCIFKNFNLFLLLIVLSSTVDP--------------- 0"
## [1] "FF------LHLLLIVLCPQHPTCSCIFKNFNLFLILIILSSIVDP--------------- 0"
## [1] "FF------FHLILIVTCPTNPFCTCFFSYFNLFLILIICNSVVDP--------------- 0"
## [1] "FF------LHLILILTCPTNPYCKCYFSHFNLFLILIICNSLIDP--------------- 0"
## [1] "LF------LHLTLFVSCPGHHICNSYFYYFNIYLLLVICNSVIDP--------------- 0"
## [1] "SFGEEVEVHNLLIIDQHTFEVLHAHQFLQNEYALSLVSCKLGKDPNTYFIVGTAMVYPEE 0"
## [1] " "
## [1] "--------LIYAFRSQELRKTLQEVLLCSW------------------------------ 0"
## [1] "--------LIYAFRSQELRKTLQEVLQCSW------------------------------ 0"
## [1] "--------LIYAFRSQELRKTLQEVLLCSW------------------------------ 0"
## [1] "--------LIYAFHSQELRRTLKEVLTCSW------------------------------ 0"
## [1] "--------LIYAFRSQELRMTLKEVLLCSW------------------------------ 0"
## [1] "--------LIYAFRSQELRMTLKEVLLCSW------------------------------ 0"
## [1] "--------LIYAFRSQELRRTLREVVLCSW------------------------------ 0"
## [1] "--------LIYAYRSQELRKTLKELIFCSWCFAV-------------------------- 0"
## [1] "--------LIYAFRSQELRKTLKEIVWCSW------------------------------ 0"
## [1] "AEPKQGRIVVFQYSDGKLQTVAEKEVKGAVYSMVEFNGKLLASINSTVRLYEWTTEKELR 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "TECNHYNNIMALYLKTKGDFILVGDLMRSVLLLAYKPMEGNFEEIARDFNPNWMSAVEIL 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "DDDNFLGAENAFNLFVCQKDSAATTDEERQHLQEVGLFHLGEFVNVFCHGSLVMQNLGET 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "STPTQGSVLFGTVNGMIGLVTSLSESWYNLLLDMQNRLNKVIKSVGKIEHSFWRSFHTER 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "KTEPATGFIDGDLIESFLDISRPKMQEVVANLQYDDGSGMKREATADDLIKVVEELTRIH 0"
## [1] " "
#library(ggmsa)
ggmsa::ggmsa(mc1r_align,   # shrooms_align, NOT shrooms_align_seqinr
      start = 780, 
      end = 830)

DISTANCE MATRIX

names(mc1r_vector_ss)
##  [1] "NP_001914"    "NP_032585"    "XP_006255857" "NP_001009324" "NP_001009152"
##  [6] "XP_012817790" "NP_001108006" "NP_001008690" "NP_001026633" "NP_851301"
names.focal <- c("NP_001914", "NP_032585", "XP_006255857", "NP_001009324", "NP_001009152", "XP_012817790", "NP_001108006", "NP_001008690", "NP_001026633",     
"NP_851301")
mc1r_vector_ss_subset <- mc1r_vector_ss[names.focal]

mc1r_align_subset <- msa(mc1r_vector_ss_subset,
                     method = "ClustalW")
## use default substitution matrix
class(mc1r_align_subset) <- "AAMultipleAlignment"
mc1r_align_subset_seqinr <- msaConvert(mc1r_align_subset, type = "seqinr::alignment")

ggmsa::ggmsa(mc1r_align_subset,   # shrooms_align, NOT shrooms_align_seqinr
      start = 780, 
      end = 830) 

mc1r_subset_dist <- seqinr::dist.alignment(mc1r_align_subset_seqinr, 
                                       matrix = "identity")

is(mc1r_subset_dist)
## [1] "dist"     "oldClass"
class(mc1r_subset_dist)
## [1] "dist"
mc1r_subset_dist_alt <- matrix(data = mc1r_subset_dist,
                              nrow = 10, 
                              ncol = 10)
## Warning in matrix(data = mc1r_subset_dist, nrow = 10, ncol = 10): data length
## [45] is not a sub-multiple or multiple of the number of rows [10]
#mc1r_subset_dist_alt[lower.tri(mc1r_subset_dist_alt)] <- distances

seqnames <- c("NP_001914", "NP_032585", "XP_006255857", "NP_001009324", "NP_001009152", "XP_012817790", "NP_001108006", "NP_001008690", "NP_001026633",    
"NP_851301")
colnames(mc1r_subset_dist_alt) <- seqnames
row.names(mc1r_subset_dist_alt)  <- seqnames
mc1r_subset_dist_alt <- as.dist(mc1r_subset_dist_alt)
mc1r_subset_dist <- mc1r_subset_dist_alt

mc1r_subset_dist_rounded <- round(mc1r_subset_dist,
                              digits = 3)

mc1r_subset_dist_rounded
##              NP_001914 NP_032585 XP_006255857 NP_001009324 NP_001009152
## NP_032585        0.389                                                 
## XP_006255857     0.406     0.480                                       
## NP_001009324     0.488     0.648        0.918                          
## NP_001009152     0.473     0.666        0.492        0.929             
## XP_012817790     0.630     0.691        0.471        0.635        0.389
## NP_001108006     0.661     0.922        0.642        0.658        0.389
## NP_001008690     0.684     0.447        0.662        0.684        0.406
## NP_001026633     0.918     0.471        0.692        0.926        0.488
## NP_851301        0.417     0.456        0.924        0.610        0.473
##              XP_012817790 NP_001108006 NP_001008690 NP_001026633
## NP_032585                                                       
## XP_006255857                                                    
## NP_001009324                                                    
## NP_001009152                                                    
## XP_012817790                                                    
## NP_001108006        0.510                                       
## NP_001008690        0.480        0.686                          
## NP_001026633        0.648        0.918        0.674             
## NP_851301           0.666        0.492        0.929        0.927

PHYLOGENETIC TREE

tree_subset <- nj(mc1r_subset_dist)

plot.phylo(tree_subset, main="Phylogenetic Tree", 
            use.edge.length = F)

# add label
mtext(text = "MC1R family gene tree - rooted, no branch lengths")