CB Final Project

#Introduction MC1R is a gene with no introns that encodes the receptor protein for melanocyte-stimulating hormone, or MSH. The protein that it codes for functions in melanogenesis and plays a major factor in determining sun sensitivity.

#Links RefSeq Gene: https://www.ncbi.nlm.nih.gov/gene/4157

RefSeq Homologene: https://www.ncbi.nlm.nih.gov/homologene/?term=Homo+sapiens+MC1R

# github packages
library(compbio4all)
#useful for functions in computational biology
library(ggmsa)

## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2

#used for annotating and visualizing MSAs

# CRAN packages
library(rentrez)
#uses NCBI's databases to download genetic and biographic data
library(seqinr)
#data analysis and visualizing protein and DNA data
library(ape)

## 
## Attaching package: 'ape'

## The following objects are masked from 'package:seqinr':
## 
##     as.alignment, consensus

#used for reading and writing data for phylogenetic trees
library(pander)
#renders tables in R Markdown
library(ggplot2)
#creates complex plots from data in a data frame

## Biostrings
library(Biostrings)

## Loading required package: BiocGenerics

## Loading required package: parallel

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min

## Loading required package: S4Vectors

## Loading required package: stats4

## 
## Attaching package: 'S4Vectors'

## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname

## Loading required package: IRanges

## 
## Attaching package: 'IRanges'

## The following object is masked from 'package:grDevices':
## 
##     windows

## Loading required package: XVector

## Loading required package: GenomeInfoDb

## 
## Attaching package: 'Biostrings'

## The following object is masked from 'package:ape':
## 
##     complement

## The following object is masked from 'package:seqinr':
## 
##     translate

## The following object is masked from 'package:base':
## 
##     strsplit

#efficient string containers for manipuling biological sequences
library(HGNChelper)

## Warning: package 'HGNChelper' was built under R version 4.1.2

#identifies known aliases and outdated gene symbols

# Bioconductor packages
library(msa)
#interface for many multiple sequence alignment algorithms

#ACCESSION NUMBER TABLE
#table_names <- c("RefSeq", "Uniprot", "PDB", "Scientific Name", "Common ")

refseq_table <- c("NP_001914", "NP_032585", "XP_006255857", "NP_001009324", "NP_001009152", "XP_012817790", "NP_001108006", "NP_001008690", "NP_001026633",    
"NP_851301")

uniprot_table <- c("Q01726", "Q01727", "B6ZGR5", "Q7YSE7", "Q9TUK4", "A0A803JW76", "P79166", "Q1ELV2", "Q802G0", "Q7ZTA3")

PDB_table <- c("NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA")
  
sciname_table <- c("Homo sapiens", "Mus musculus", "Rattus norvegicus", "Felis catus", "Pan troglodytes", "Xenopus tropicalus", "Equus caballus", "Sus scrofa", "Gallus gallus", "Danio rerio")

name_table <- c("Human", "Mouse", "Rat", "Cat", "Chimpanzee", "Western Clawed Frog",  "Horse", "Pig",  "Chicken", "Zebrafish")
  
genename_table <- c( "MC1R", "MC1R", "MC1R", "MC1R", "MC1R", "MC1R", "MC1R", "MC1R", "MC1R", "MC1R")

MC1r.df <- data.frame(RefSeq = refseq_table, Uniprot = uniprot_table, PDB = PDB_table, Scientfic_name = sciname_table, Common_name = name_table, Gene_name = genename_table)

pander::pander(MC1r.df)

Table continues below
RefSeq	Uniprot	PDB	Scientfic_name	Common_name
NP_001914	Q01726	NA	Homo sapiens	Human
NP_032585	Q01727	NA	Mus musculus	Mouse
XP_006255857	B6ZGR5	NA	Rattus norvegicus	Rat
NP_001009324	Q7YSE7	NA	Felis catus	Cat
NP_001009152	Q9TUK4	NA	Pan troglodytes	Chimpanzee
XP_012817790	A0A803JW76	NA	Xenopus tropicalus	Western Clawed Frog
NP_001108006	P79166	NA	Equus caballus	Horse
NP_001008690	Q1ELV2	NA	Sus scrofa	Pig
NP_001026633	Q802G0	NA	Gallus gallus	Chicken
NP_851301	Q7ZTA3	NA	Danio rerio	Zebrafish

Gene_name
MC1R
MC1R
MC1R
MC1R
MC1R
MC1R
MC1R
MC1R
MC1R
MC1R

DATA PREPARATION

#DOWNLOADING SEQUENCES

#homo sap MC1R
homosap_fasta <- rentrez::entrez_fetch(db = "protein",
                        id = "NP_001914.3",
                         rettype = "fasta")

#mus MC1R
mous_fasta <- rentrez::entrez_fetch(db = "protein",
                         id = "NP_032585.2",
                         rettype = "fasta")

#rattus norvegicus(rat) MC1R
rat_fasta <- rentrez::entrez_fetch(db = "protein", 
                        id = "XP_006255857.1", 
                        rettype = "fasta")

#felis catus(cat) MC1R
fel_fasta <- rentrez::entrez_fetch(db = "protein", 
                        id = "NP_001009324.1",
                        rettype = "fasta")

#pan trog(chimp) MC1R
pan_fasta <- rentrez::entrez_fetch(db = "protein",
                        id = "NP_001009152.1",
                        rettype = "fasta")

#xenopus trop(frog) MC1R
xeno_fasta <- rentrez::entrez_fetch(db = "protein",
                        id = "XP_012817790.1",
                        rettype = "fasta")

#equus call(horse) MC1R
equus_fasta <- rentrez::entrez_fetch(db = "protein",
                        id = "NP_001108006.1",
                        rettype = "fasta")


#sus scrofa(pig) MC1R
sus_fasta <- rentrez::entrez_fetch(db = "protein",
                        id = "NP_001008690.1",
                        rettype = "fasta")

#gallus gallus(chicken) MC1R
gallus_fasta <- rentrez::entrez_fetch(db = "protein",
                        id = "NP_001026633.1",
                        rettype = "fasta")

#danio rerio(zebrafish) MC1R
danio_fasta <- rentrez::entrez_fetch(db = "protein",
                        id = "NP_851301.1",
                        rettype = "fasta")

mc1r_list <- list("NP_001914" = homosap_fasta, "NP_032585" = mous_fasta, "XP_006255857" =  rat_fasta,"NP_001009324" = fel_fasta, "NP_001009152" = pan_fasta, "XP_012817790" = xeno_fasta, "NP_001108006" = equus_fasta, "NP_001008690" = sus_fasta, "NP_001026633" =  gallus_fasta, "NP_851301" = danio_fasta)

fasta_cleaner <- function(fasta_object, parse = TRUE){

  fasta_object <- sub("^(>)(.*?)(\\n)(.*)(\\n\\n)","\\4",fasta_object)
  fasta_object <- gsub("\n", "", fasta_object)

  if(parse == TRUE){
    fasta_object <- stringr::str_split(fasta_object,
                                       pattern = "",
                                       simplify = FALSE)
  }

  return(fasta_object[[1]])
}

#Cleaning

homo_clean <- fasta_cleaner(homosap_fasta, parse = F)

mus_clean <- fasta_cleaner(mous_fasta, parse = F)

rat_clean <- fasta_cleaner(rat_fasta, parse = F)

fel_clean <- fasta_cleaner(fel_fasta, parse = F)

pan_clean <- fasta_cleaner(pan_fasta, parse = F)

xeno_clean <- fasta_cleaner(xeno_fasta, parse = F)

equus_clean <- fasta_cleaner(equus_fasta, parse = F)

sus_clean <- fasta_cleaner(sus_fasta, parse = F)

gallus_clean <- fasta_cleaner(gallus_fasta, parse = F)

danio_clean <- fasta_cleaner(danio_fasta, parse = F)

for(i in 1:length(mc1r_list)){
  mc1r_list[[i]] <- compbio4all::fasta_cleaner(mc1r_list[[i]], parse = F)
}

SKIPPED PROTEIN DIAGRAM BECAUSE OF RTOOLS ISSUE*

#LARGE DOTPLOT

homo_vector <- fasta_cleaner(homosap_fasta)
 
  
seqinr::dotPlot(homo_vector[350:500], homo_vector[350:500],
        wsize = 1, 
        nmatch = 1,
        main = "mc1r large dotplot w/ defaults")

#2x2 PANEL OF DOTPLOTS
par(mfrow = c(2,2), 
    mar = c(0,0,2,1))

# plot 1
dotPlot(homo_vector[350:500], 
        homo_vector[350:500], 
        wsize = 1, 
        nmatch = 1, 
        main = "MC1R Defaults")

# plot 2 Homo sapiens MC1R - size = 10, nmatch = 1
dotPlot(homo_vector[350:500], homo_vector[350:500], 
        wsize = 10, 
        nmatch = 1, 
        main = "homosap - size = 10, nmatch = 1")

# plot 3: Homo sapiens MC1R - size = 10, nmatch = 5
dotPlot(homo_vector[350:500], homo_vector[350:500], 
        wsize = 10, 
        nmatch = 5, 
        main = "homosap - size = 10, nmatch = 5")

# plot 4: size = 20, nmatch = 5
dotPlot(homo_vector[350:500], homo_vector[350:500], 
        wsize = 20,
        nmatch =5,
        main = "homosap - size = 20, nmatch = 5")

PROTEIN PROPERTIES COMPILED FROM DATABASES

Pfam <- c("7tm_1: From 55 to 298", "7tm_1: From 53 to 296", "7tm_1: From 55 to 298", "7tm_1: From 55 to 298", "7tm_1: From 55 to 298", "7tm_1: From 46 to 293", "7tm_1: From 55 to 298", "7tm_1: From 58 to 301", "7tm_1: From 53 to 295", "7tm_1: From 60 to 300")

DisProt <- c("No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available")

RepeatDB <- c("No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available")

Uniprot <- c("Plasma membrane", "Plasma membrane", "Plasma membrane",  "Plasma membrane", "Plasma membrane", "Plasma membrane", "Plasma membrane", "Plasma membrane",  "Plasma membrane", "Plasma membrane")

PDB <- c("No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available")

prot_prop.df <- data.frame(Pfam = Pfam, DisProt = DisProt, RepeatsDB = RepeatDB, UniProt = Uniprot, PDB = PDB)

pander::pander(prot_prop.df)

Table continues below
Pfam	DisProt	RepeatsDB
7tm_1: From 55 to 298	No information available	No information available
7tm_1: From 53 to 296	No information available	No information available
7tm_1: From 55 to 298	No information available	No information available
7tm_1: From 55 to 298	No information available	No information available
7tm_1: From 55 to 298	No information available	No information available
7tm_1: From 46 to 293	No information available	No information available
7tm_1: From 55 to 298	No information available	No information available
7tm_1: From 58 to 301	No information available	No information available
7tm_1: From 53 to 295	No information available	No information available
7tm_1: From 60 to 300	No information available	No information available

UniProt	PDB
Plasma membrane	No information available
Plasma membrane	No information available
Plasma membrane	No information available
Plasma membrane	No information available
Plasma membrane	No information available
Plasma membrane	No information available
Plasma membrane	No information available
Plasma membrane	No information available
Plasma membrane	No information available
Plasma membrane	No information available

PROTEIN FEATURE PREDICTION

#SECONDARY STRUCTURE PREDICTION
aa.1.1 <- c("A","R","N","D","C","Q","E","G","H","I",
            "L","K","M","F","P","S","T","W","Y","V")

# enter again
aa.1.2 <- c("A","R","N","D","C","Q","E","G","H","I",
            "L","K","M","F","P","S","T","W","Y","V")

# are they the same length?
length(aa.1.1) == length(aa.1.2)

## [1] TRUE

aa.1.1 == aa.1.2

##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE

unique(aa.1.1)

##  [1] "A" "R" "N" "D" "C" "Q" "E" "G" "H" "I" "L" "K" "M" "F" "P" "S" "T" "W" "Y"
## [20] "V"

length(unique(aa.1.1)) == length(unique(aa.1.2))

## [1] TRUE

any(c(aa.1.1 == aa.1.2) == FALSE)

## [1] FALSE

#alpha
alpha <- c(285, 53, 97, 163, 22, 67, 134, 197, 111, 91, 
           221, 249, 48, 123, 82, 122, 119, 33, 63, 167)

sum(alpha) == 2447

## [1] TRUE

#beta
beta <- c(203, 67, 139, 121, 75, 122, 86, 297, 49, 120, 
          177, 115, 16, 85, 127, 341, 253, 44, 110, 229)

sum(beta) == 2776

## [1] TRUE

# alpha + beta
a.plus.b <- c(175, 78, 120, 111, 74, 74, 86, 171, 33, 93,
              110, 112, 25, 52, 71, 126, 117, 30, 108, 123)
sum(a.plus.b) == 1889

## [1] TRUE

# alpha/beta
a.div.b <- c(361, 146, 183, 244, 63, 114, 257, 377, 107, 239, 
             339, 321, 91, 158, 188, 327, 238, 72, 130, 378)
sum(a.div.b) == 4333

## [1] TRUE

data.frame(aa.1.1, alpha, beta, a.plus.b, a.div.b)

##    aa.1.1 alpha beta a.plus.b a.div.b
## 1       A   285  203      175     361
## 2       R    53   67       78     146
## 3       N    97  139      120     183
## 4       D   163  121      111     244
## 5       C    22   75       74      63
## 6       Q    67  122       74     114
## 7       E   134   86       86     257
## 8       G   197  297      171     377
## 9       H   111   49       33     107
## 10      I    91  120       93     239
## 11      L   221  177      110     339
## 12      K   249  115      112     321
## 13      M    48   16       25      91
## 14      F   123   85       52     158
## 15      P    82  127       71     188
## 16      S   122  341      126     327
## 17      T   119  253      117     238
## 18      W    33   44       30      72
## 19      Y    63  110      108     130
## 20      V   167  229      123     378

alpha.prop <- alpha/sum(alpha)
beta.prop <- beta/sum(beta)
a.plus.b.prop <- a.plus.b/sum(a.plus.b)
a.div.b <- a.div.b/sum(a.div.b)

aa.prop <- data.frame(alpha.prop,
                      beta.prop,
                      a.plus.b.prop,
                      a.div.b)
#row labels
row.names(aa.prop) <- aa.1.1

plot(aa.prop,panel = panel.smooth)

cor(aa.prop)

##               alpha.prop beta.prop a.plus.b.prop   a.div.b
## alpha.prop     1.0000000 0.4941143     0.6969508 0.8555289
## beta.prop      0.4941143 1.0000000     0.7977771 0.7706654
## a.plus.b.prop  0.6969508 0.7977771     1.0000000 0.8198043
## a.div.b        0.8555289 0.7706654     0.8198043 1.0000000

round(cor(aa.prop), 3)

##               alpha.prop beta.prop a.plus.b.prop a.div.b
## alpha.prop         1.000     0.494         0.697   0.856
## beta.prop          0.494     1.000         0.798   0.771
## a.plus.b.prop      0.697     0.798         1.000   0.820
## a.div.b            0.856     0.771         0.820   1.000

par(mfrow = c(1,3), mar = c(4,4,1,0))
plot(alpha.prop ~ beta.prop, data = aa.prop)
plot(alpha.prop ~ a.plus.b.prop, data = aa.prop)
plot(alpha.prop ~ a.div.b, data = aa.prop)

par(mfrow = c(1,1), mar = c(4,4,4,4))

#MC1R

NP_001914 <- rentrez::entrez_fetch(db = "protein",
                        id = "NP_001914.3",
                         rettype = "fasta")

NP_001914 <- compbio4all::fasta_cleaner(NP_001914, parse = TRUE)

NP_001914.freq.table <- table(NP_001914)/length(NP_001914)

#Convert table to a vector

table_to_vector <- function(table_x){
  table_names <- attr(table_x, "dimnames")[[1]]
  table_vect <- as.vector(table_x)
  names(table_vect) <- table_names
  return(table_vect)
}

MC1R.human.aa.freq <- table_to_vector(NP_001914.freq.table)

#check for presence of U

aa.names <- names(MC1R.human.aa.freq)
any(aa.names == "U")

## [1] FALSE

#i.U <- which(aa.names == "U")

#aa.names[i.U]

#MC1R.human.aa.freq[i.U]

#which(aa.names %in% c("U"))

#MC1R.human.aa.freq <- MC1R.human.aa.freq[-i.U]

sum(MC1R.human.aa.freq)

## [1] 1

aa.prop$MC1R.human.aa.freq <- MC1R.human.aa.freq

pander::pander(aa.prop)

	alpha.prop	beta.prop	a.plus.b.prop	a.div.b	MC1R.human.aa.freq
A	0.1165	0.07313	0.09264	0.08331	0.05175
R	0.02166	0.02414	0.04129	0.03369	0.0193
N	0.03964	0.05007	0.06353	0.04223	0.04825
D	0.06661	0.04359	0.05876	0.05631	0.07982
C	0.008991	0.02702	0.03917	0.01454	0.04035
Q	0.02738	0.04395	0.03917	0.02631	0.07193
E	0.05476	0.03098	0.04553	0.05931	0.02456
G	0.08051	0.107	0.09052	0.08701	0.06316
H	0.04536	0.01765	0.01747	0.02469	0.04737
I	0.03719	0.04323	0.04923	0.05516	0.1096
L	0.09031	0.06376	0.05823	0.07824	0.02368
K	0.1018	0.04143	0.05929	0.07408	0.04737
M	0.01962	0.005764	0.01323	0.021	0.03596
F	0.05027	0.03062	0.02753	0.03646	0.04123
P	0.03351	0.04575	0.03759	0.04339	0.04211
S	0.04986	0.1228	0.0667	0.07547	0.07456
T	0.04863	0.09114	0.06194	0.05493	0.0614
W	0.01349	0.01585	0.01588	0.01662	0.07982
Y	0.02575	0.03963	0.05717	0.03	0.007018
V	0.06825	0.08249	0.06511	0.08724	0.0307

#Correlation 

chou_cor <- function(x,y){
  numerator <- sum(x*y)
denominator <- sqrt((sum(x^2))*(sum(y^2)))
result <- numerator/denominator
return(result)
}

chou_cosine <- function(z.1, z.2){
  z.1.abs <- sqrt(sum(z.1^2))
  z.2.abs <- sqrt(sum(z.2^2))
  my.cosine <- sum(z.1*z.2)/(z.1.abs*z.2.abs)
  return(my.cosine)
}

corr.alpha <- chou_cor(aa.prop[,5], aa.prop[,1])
corr.beta  <- chou_cor(aa.prop[,5], aa.prop[,2])
corr.apb   <- chou_cor(aa.prop[,5], aa.prop[,3])
corr.adb   <- chou_cor(aa.prop[,5], aa.prop[,4])

cos.alpha <- chou_cosine(aa.prop[,5], aa.prop[,1])
cos.beta  <- chou_cosine(aa.prop[,5], aa.prop[,2])
cos.apb   <- chou_cosine(aa.prop[,5], aa.prop[,3])
cos.adb   <- chou_cosine(aa.prop[,5], aa.prop[,4])

#Distance

aa.prop.flipped <- t(aa.prop)
round(aa.prop.flipped,2)

##                       A    R    N    D    C    Q    E    G    H    I    L    K
## alpha.prop         0.12 0.02 0.04 0.07 0.01 0.03 0.05 0.08 0.05 0.04 0.09 0.10
## beta.prop          0.07 0.02 0.05 0.04 0.03 0.04 0.03 0.11 0.02 0.04 0.06 0.04
## a.plus.b.prop      0.09 0.04 0.06 0.06 0.04 0.04 0.05 0.09 0.02 0.05 0.06 0.06
## a.div.b            0.08 0.03 0.04 0.06 0.01 0.03 0.06 0.09 0.02 0.06 0.08 0.07
## MC1R.human.aa.freq 0.05 0.02 0.05 0.08 0.04 0.07 0.02 0.06 0.05 0.11 0.02 0.05
##                       M    F    P    S    T    W    Y    V
## alpha.prop         0.02 0.05 0.03 0.05 0.05 0.01 0.03 0.07
## beta.prop          0.01 0.03 0.05 0.12 0.09 0.02 0.04 0.08
## a.plus.b.prop      0.01 0.03 0.04 0.07 0.06 0.02 0.06 0.07
## a.div.b            0.02 0.04 0.04 0.08 0.05 0.02 0.03 0.09
## MC1R.human.aa.freq 0.04 0.04 0.04 0.07 0.06 0.08 0.01 0.03

dist(aa.prop.flipped, method = "euclidean")

##                    alpha.prop  beta.prop a.plus.b.prop    a.div.b
## beta.prop          0.13342098                                    
## a.plus.b.prop      0.09281824 0.08289406                         
## a.div.b            0.06699039 0.08659174    0.06175113           
## MC1R.human.aa.freq 0.16921327 0.15410105    0.13982324 0.14662849

dist.alpha <- dist((aa.prop.flipped[c(1,5),]),  method = "euclidean")
dist.beta  <- dist((aa.prop.flipped[c(2,5),]),  method = "euclidean")
dist.apb   <- dist((aa.prop.flipped[c(3,5),]),  method = "euclidean")
dist.adb  <- dist((aa.prop.flipped[c(4,5),]), method = "euclidean")

fold.type <- c("alpha","beta","alpha plus beta", "alpha/beta")

# data
corr.sim <- round(c(corr.alpha,corr.beta,corr.apb,corr.adb),5)
cosine.sim <- round(c(cos.alpha,cos.beta,cos.apb,cos.adb),5)
Euclidean.dist <- round(c(dist.alpha,dist.beta,dist.apb,dist.adb),5)

# summary
sim.sum <- c("","","most.sim","")
dist.sum <- c("","","min.dist","")

df <- data.frame(fold.type,
           corr.sim ,
           cosine.sim ,
           Euclidean.dist ,
           sim.sum ,
           dist.sum )

pander::pander(df)

fold.type	corr.sim	cosine.sim	Euclidean.dist	sim.sum	dist.sum
alpha	0.7775	0.7775	0.1692
beta	0.8186	0.8186	0.1541
alpha plus beta	0.8384	0.8384	0.1398	most.sim	min.dist
alpha/beta	0.8254	0.8254	0.1466

PID

#Data prep

names(mc1r_list)

##  [1] "NP_001914"    "NP_032585"    "XP_006255857" "NP_001009324" "NP_001009152"
##  [6] "XP_012817790" "NP_001108006" "NP_001008690" "NP_001026633" "NP_851301"

length(mc1r_list)

## [1] 10

mc1r_list[1]

## $NP_001914
## [1] "MSYNYVVTAQKPTAVNGCVTGHFTSAEDLNLLIAKNTRLEIYVVTAEGLRPVKEVGMYGKIAVMELFRPKGESKDLLFILTAKYNACILEYKQSGESIDIITRAHGNVQDRIGRPSETGIIGIIDPECRMIGLRLYDGLFKVIPLDRDNKELKAFNIRLEELHVIDVKFLYGCQAPTICFVYQDPQGRHVKTYEVSLREKEFNKGPWKQENVEAEASMVIAVPEPFGGAIIIGQESITYHNGDKYLAIAPPIIKQSTIVCHNRVDPNGSRYLLGDMEGRLFMLLLEKEEQMDGTVTLKDLRVELLGETSIAECLTYLDNGVVFVGSRLGDSQLVKLNVDSNEQGSYVVAMETFTNLGPIVDMCVVDLERQGQGQLVTCSGAFKEGSLRIIRNGIGIHEHASIDLPGIKGLWPLRSDPNRETDDTLVLSFVGQTRVLMLNGEEVEETELMGFVDDQQTFFCGNVAHQQLIQITSASVRLVSQEPKALVSEWKEPQAKNISVASCNSSQVVVAVGRALYYLQIHPQELRQISHTEMEHEVACLDITPLGDSNGLSPLCAIGLWTDISARILKLPSFELLHKEMLGGEIIPRSILMTTFESSHYLLCALGDGALFYFGLNIETGLLSDRKKVTLGTQPTVLRTFRSLSTTNVFACSDRPTVIYSSNHKLVFSNVNLKEVNYMCPLNSDGYPDSLALANNSTLTIGTIDEIQKLHIRTVPLYESPRKICYQEVSQCFGVLSSRIEVQDTSGGTTALRPSASTQALSSSVSSSKLFSSSTAPHETSFGEEVEVHNLLIIDQHTFEVLHAHQFLQNEYALSLVSCKLGKDPNTYFIVGTAMVYPEEAEPKQGRIVVFQYSDGKLQTVAEKEVKGAVYSMVEFNGKLLASINSTVRLYEWTTEKELRTECNHYNNIMALYLKTKGDFILVGDLMRSVLLLAYKPMEGNFEEIARDFNPNWMSAVEILDDDNFLGAENAFNLFVCQKDSAATTDEERQHLQEVGLFHLGEFVNVFCHGSLVMQNLGETSTPTQGSVLFGTVNGMIGLVTSLSESWYNLLLDMQNRLNKVIKSVGKIEHSFWRSFHTERKTEPATGFIDGDLIESFLDISRPKMQEVVANLQYDDGSGMKREATADDLIKVVEELTRIH"

homo_vector <- unlist(mc1r_list[1])

mus_vector <- unlist(mc1r_list[2])

rat_vector <- unlist(mc1r_list[3])

fel_vector <- unlist(mc1r_list[4])

pan_vector <- unlist(mc1r_list[5])

xeno_vector <- unlist(mc1r_list[6])

equus_vector <- unlist(mc1r_list[7])

sus_vector <- unlist(mc1r_list[8])

gallus_vector <- unlist(mc1r_list[9])

danio_vector <- unlist(mc1r_list[10])

mc1r_vector <- unlist(mc1r_list)

names(mc1r_vector) <- names(mc1r_list)

#PID score 

#human aligns
align.homo.mus <- Biostrings::pairwiseAlignment(
                  homo_vector,
                  mus_vector)

align.homo.pan <- Biostrings::pairwiseAlignment(
                  homo_vector,
                  pan_vector)

align.homo.xeno <- Biostrings::pairwiseAlignment(
                  homo_vector,
                  xeno_vector)

#pan aligns
align.pan.mus <- Biostrings::pairwiseAlignment(
                  pan_vector,
                  mus_vector)

align.pan.xeno <- Biostrings::pairwiseAlignment(
                  pan_vector,
                  xeno_vector)

#xeno aligns

align.xeno.mus <- Biostrings::pairwiseAlignment(
                  xeno_vector,
                  mus_vector)

pids <- c(1,                  NA,     NA,     NA,
          pid(align.homo.pan),          1,     NA,     NA,
          pid(align.homo.mus), pid(align.pan.mus),      1,     NA,
          pid(align.homo.xeno), pid(align.pan.xeno), pid(align.xeno.mus), 1)

mat <- matrix(pids, nrow = 4, byrow = T)
row.names(mat) <- c("Homo","Pan","Xeno","Mus")   
colnames(mat) <- c("Homo","Pan","Xeno","Mus")   
pander::pander(mat)

	Homo	Pan	Xeno	Mus
Homo	1	NA	NA	NA
Pan	15.53	1	NA	NA
Xeno	14.86	75.08	1	NA
Mus	14.79	51.4	55.11	1

#Biostrings::pid(align.homo.mus)

#PID comparison table- humans and chimps
method <- c("PID1", "PID2", "PID3", "PID4")
PID1 <- pid(align.homo.pan, type = "PID1")
PID2 <- pid(align.homo.pan, type = "PID2")
PID3 <- pid(align.homo.pan, type = "PID3")
PID4 <- pid(align.homo.pan, type = "PID4")

PID <- c(PID1, PID2, PID3, PID4)

denominator <- c("(aligned positions + internal gap positions)", "(aligned positions)", "(length shorter sequence)", "(average length of the two sequences)")

PID.df <- data.frame(method = method, PID = PID, denominator = denominator)

pander::pander(PID.df)

method	PID	denominator
PID1	15.53	(aligned positions + internal gap positions)
PID2	55.84	(aligned positions)
PID3	55.84	(length shorter sequence)
PID4	24.3	(average length of the two sequences)

MSA ALIGNMENT

#Alignment
mc1r_vector_ss <- Biostrings::AAStringSet(mc1r_vector)
mc1r_align <- msa::msa(mc1r_vector_ss,
                     method = "ClustalW")

## use default substitution matrix

class(mc1r_align) <- "AAMultipleAlignment"
mc1r_align_seqinr <- msaConvert(mc1r_align, type = "seqinr::alignment")

compbio4all::print_msa(mc1r_align_seqinr)

## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "MSYNYVVTAQKPTAVNGCVTGHFTSAEDLNLLIAKNTRLEIYVVTAEGLRPVKEVGMYGK 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "IAVMELFRPKGESKDLLFILTAKYNACILEYKQSGESIDIITRAHGNVQDRIGRPSETGI 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "IGIIDPECRMIGLRLYDGLFKVIPLDRDNKELKAFNIRLEELHVIDVKFLYGCQAPTICF 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "VYQDPQGRHVKTYEVSLREKEFNKGPWKQENVEAEASMVIAVPEPFGGAIIIGQESITYH 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "NGDKYLAIAPPIIKQSTIVCHNRVDPNGSRYLLGDMEGRLFMLLLEKEEQMDGTVTLKDL 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "RVELLGETSIAECLTYLDNGVVFVGSRLGDSQLVKLNVDSNEQGSYVVAMETFTNLGPIV 0"
## [1] " "
## [1] "---------------------------------MSVQGPQRRLLGSLNSTSPAAPRLGLA 0"
## [1] "---------------------------------MPVLGPERRLLASLSSAPPAAPRLGLA 0"
## [1] "---------------------------------MPLQGPQRRLLGSLNSTLPATPYLGLT 0"
## [1] "---------------------------------MAVQGSQRRLLGSLNSTPTAIPQLGLA 0"
## [1] "---------------------------------MSTQEPQKSLLGSLNSN--ATSHLGLA 0"
## [1] "---------------------------------MPTQGPPKRLLGSLNSTSITTSHLGLA 0"
## [1] "---------------------------------MS----MLAPLRLLREPWNASEGNQSN 0"
## [1] "---------------------------------MNDSSRHHFSMKHMDYMYNADNNITLN 0"
## [1] "------------------------------------------MLHSTVNSTNATINVGTE 0"
## [1] "DMCVVDLERQGQGQLVTCSGAFKEGSLRIIRNGIGIHEHASIDLPGIKGLWPLRSDPNRE 0"
## [1] " "
## [1] "ANQT-------GPRCLELS------------VPDGLFLGLGLVSVVENVLVVAAIAKNRN 0"
## [1] "ANQTNQT----GPQCLEVS------------IPDGLFLSLGLVSLVENVLVVAAIAKNRN 0"
## [1] "TNQT-------EPPCLEVS------------IPDGLFLSLGLVSLVENVLVVTAIAKNRN 0"
## [1] "ANQT-------GARCLEVS------------IPDGLFLSLGLVSLVENMLVVATIAKNRN 0"
## [1] "TNQS-------EPWCLYVS------------IPDGLFLSLGLVSLVENVLVVIAITKNRN 0"
## [1] "TNQT-------GSWCLHVS------------IPDGLFLSLGLVSLVENVLVVVAITKNRN 0"
## [1] "ATAGAG-----GAWCQGLD------------IPNELFLTLGLVSLVENLLVVAAILKNRN 0"
## [1] "SNSTAS-----DINVTGIAQI---------MIPQELFLMLGLISLVENILVVVAIIKNRN 0"
## [1] "LKPT-------NTSDTVMD------------VPEELFLFLCVFSLLENILVVIAIFRNHN 0"
## [1] "TDDTLVLSFVGQTRVLMLNGEEVEETELMGFVDDQQTFFCGNVAHQQLIQITSASVRLVS 0"
## [1] " "
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "-------------------------------------------------LHSPMYYFVCC 0"
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "-------------------------------------------------LHSPMYCFICC 0"
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "-------------------------------------------------LHSPTYYFICC 0"
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "QEPKALVSEWKEPQAKNISVASCNSSQVVVAVGRALYYLQIHPQELRQISHTEMEHEVAC 0"
## [1] " "
## [1] "LAVS------------------DLLVSVSSVLETAVMLLLEAGALAGRAAVVQRLDDIID 0"
## [1] "LAVS------------------DLLVSVSNVLETAVLLLLEAGALAAQAAVVQQLDNVMD 0"
## [1] "LAVS------------------DLLVSMSNVLEMAILLLLEAGVLATQASVLQQLDNIID 0"
## [1] "LALS------------------DLLVSGSNVLETAVILLLEAGALVARAAVLQQVDNVID 0"
## [1] "LALS------------------DLMVSVSIVLETTIILLLEAGILVARVALVQQLDNLID 0"
## [1] "LALS------------------DLMVSVSIVLETTIILLLEAGILVARAALVQQLDNVID 0"
## [1] "LAVS------------------DMLVSVSNLAKTLFMLLMEHGVLVIRASIVRHMDNVID 0"
## [1] "LAVA------------------DMLVSVSNVVETLFMLLTEHGLLLVTAKMLQHLDNVID 0"
## [1] "LAAS------------------DMLVSSSNLGETLIIFMLKQGIIKSEPLLVKKMDYIFD 0"
## [1] "LDITPLGDSNGLSPLCAIGLWTDISARILKLPSFELLHKEMLGGEIIPRSILMTTFESSH 0"
## [1] " "
## [1] "VLVCGAMVSSLCFLGAIAVDRYISIFYALRYHSIVTLPRAWRAISAIWVASVLSSTLFIA 0"
## [1] "VLICGSMVSSLCFLGAIAVDRYVSIFYALRYHSIVTLPRAGRAIAAIWAGSVLSSTLFIA 0"
## [1] "VLICGSMVSSLCFLGSIAVDRYISIFYALRYHSIMMLPRVWRAIVAIWVVSVLSSTLFIA 0"
## [1] "VITCSSMLSSLCFLGAIAVDRYISIFYALRYHSIVTLPRARRAIAAIWVASVLFSTLFIA 0"
## [1] "VLICGSMVSSLCFLGIIAIDRYISIFYALRYHSIVTLPRARRAVVGIWMVSIVSSTLFIT 0"
## [1] "VLICGSMVSSLCFLGVIAIDRYISIFYALRYHSIVTLSRARRAVVGIWVVSIVSSTLFIT 0"
## [1] "MLICSSVVSSLSFLGVIAVDRYITIFYALRYHSIMTLQRAVVTMASVWLASTVSSTVLIT 0"
## [1] "IMICSSVVSSLSFLCTIAADRYITIFYALRYHSIMTTQRAVGIILVVWLASITSSSLFIV 0"
## [1] "TMICCSLVTSLSFLGAIAIDRYITIFYALRYHSIMTLRRVVIAIGVIWSVSLVCAAIFIV 0"
## [1] "YLLCALGDGALFYFGLNIETGLLSDRKKVTLGTQPTVLRTFRSLSTTNVFACSDRPTVIY 0"
## [1] " "
## [1] "YYDHTAVLLCL-VSFFVAMLVLMAVLYVHMLARACQHARGIARLHKRQR----------- 0"
## [1] "YYHHTAVLLGL-VSFFVAMLALMAVLYVHMLARACQHGRHIARLHKTQH----------- 0"
## [1] "YYNHTAVLLCL-VTFFVAMLVLMAVLYVHMLARACQHARGIARLHKRQH----------- 0"
## [1] "YCDHTAVLLCL-VVFFLAVLVLMAVLYVHMLARACQHAQGIARLHKRQR----------- 0"
## [1] "YYKHTAVLLCL-VTFFLAMLALMAILYAHMFTRACQHAQGIAQLHKRRR----------- 0"
## [1] "YYKHTAVLLCL-VTFFLAMLALMAILYVHMLSRACQHAQGIARLHKRRH----------- 0"
## [1] "YYRNNAILLCL-IGFFLFMLVLMLVLYIHMFALARHHVRSISSQQKQP------------ 0"
## [1] "YHTDNAVIACL-VTFFGVTLVFTAVLYLHMFILAHVHSRRITALHK-------------- 0"
## [1] "YHESRAVILCL-IVFFLFMLALMVALYIHMFALARQHARSISALQKGKSRRI-------- 0"
## [1] "SSNHKLVFSNVNLKEVNYMCPLNSDGYPDSLALANNSTLTIGTIDEIQKLHIRTVPLYES 0"
## [1] " "
## [1] "---------------------------------PVHQGLGLKGAATLTILLGIFFLCWGP 0"
## [1] "---------------------------------PTRQGCGLKGAATLTILLGVFLLCWAP 0"
## [1] "---------------------------------PIHQGFGLKGAATLTILLGVFFLCWGP 0"
## [1] "---------------------------------PVHQGFGLKGAVTLTILLGIFFLCWGP 0"
## [1] "---------------------------------SIRQGFCLKGAATLTILLGIFFLCWGP 0"
## [1] "---------------------------------SIRQGFCLKGAATLTILLGIFFLCWAP 0"
## [1] "---------------------------------TIYRTSSLKGAVTLTILLGVFFICWGP 0"
## [1] "---------------------------------SRRQTTSMKGAITLTILLGVFILCWGP 0"
## [1] "--------------------------------TPHQARANMKGAITLTLLLGVFFLCWGP 0"
## [1] "PRKICYQEVSQCFGVLSSRIEVQDTSGGTTALRPSASTQALSSSVSSSKLFSSSTAPHET 0"
## [1] " "
## [1] "FF------LHLSLMVLCPRHPICGCVFKNFNLFLTLIICNSIVDP--------------- 0"
## [1] "FF------LHLSLVVLCPQHPTCGCVFKNVNLFLALVICNSIVDP--------------- 0"
## [1] "FF------LHLSLLILCPQHPTCGCVFKNFKLFLTLILCSAIVDP--------------- 0"
## [1] "FF------LHLTLIXLCPEHPTCGCIFKNFNLFLALIICNAIIDP--------------- 0"
## [1] "FF------LHLLLIVLCPQHPTCSCIFKNFNLFLLLIVLSSTVDP--------------- 0"
## [1] "FF------LHLLLIVLCPQHPTCSCIFKNFNLFLILIILSSIVDP--------------- 0"
## [1] "FF------FHLILIVTCPTNPFCTCFFSYFNLFLILIICNSVVDP--------------- 0"
## [1] "FF------LHLILILTCPTNPYCKCYFSHFNLFLILIICNSLIDP--------------- 0"
## [1] "LF------LHLTLFVSCPGHHICNSYFYYFNIYLLLVICNSVIDP--------------- 0"
## [1] "SFGEEVEVHNLLIIDQHTFEVLHAHQFLQNEYALSLVSCKLGKDPNTYFIVGTAMVYPEE 0"
## [1] " "
## [1] "--------LIYAFRSQELRKTLQEVLLCSW------------------------------ 0"
## [1] "--------LIYAFRSQELRKTLQEVLQCSW------------------------------ 0"
## [1] "--------LIYAFRSQELRKTLQEVLLCSW------------------------------ 0"
## [1] "--------LIYAFHSQELRRTLKEVLTCSW------------------------------ 0"
## [1] "--------LIYAFRSQELRMTLKEVLLCSW------------------------------ 0"
## [1] "--------LIYAFRSQELRMTLKEVLLCSW------------------------------ 0"
## [1] "--------LIYAFRSQELRRTLREVVLCSW------------------------------ 0"
## [1] "--------LIYAYRSQELRKTLKELIFCSWCFAV-------------------------- 0"
## [1] "--------LIYAFRSQELRKTLKEIVWCSW------------------------------ 0"
## [1] "AEPKQGRIVVFQYSDGKLQTVAEKEVKGAVYSMVEFNGKLLASINSTVRLYEWTTEKELR 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "TECNHYNNIMALYLKTKGDFILVGDLMRSVLLLAYKPMEGNFEEIARDFNPNWMSAVEIL 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "DDDNFLGAENAFNLFVCQKDSAATTDEERQHLQEVGLFHLGEFVNVFCHGSLVMQNLGET 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "STPTQGSVLFGTVNGMIGLVTSLSESWYNLLLDMQNRLNKVIKSVGKIEHSFWRSFHTER 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "KTEPATGFIDGDLIESFLDISRPKMQEVVANLQYDDGSGMKREATADDLIKVVEELTRIH 0"
## [1] " "

#library(ggmsa)
ggmsa::ggmsa(mc1r_align,   # shrooms_align, NOT shrooms_align_seqinr
      start = 780, 
      end = 830)

DISTANCE MATRIX

names(mc1r_vector_ss)

##  [1] "NP_001914"    "NP_032585"    "XP_006255857" "NP_001009324" "NP_001009152"
##  [6] "XP_012817790" "NP_001108006" "NP_001008690" "NP_001026633" "NP_851301"

names.focal <- c("NP_001914", "NP_032585", "XP_006255857", "NP_001009324", "NP_001009152", "XP_012817790", "NP_001108006", "NP_001008690", "NP_001026633",     
"NP_851301")
mc1r_vector_ss_subset <- mc1r_vector_ss[names.focal]

mc1r_align_subset <- msa(mc1r_vector_ss_subset,
                     method = "ClustalW")

## use default substitution matrix

class(mc1r_align_subset) <- "AAMultipleAlignment"
mc1r_align_subset_seqinr <- msaConvert(mc1r_align_subset, type = "seqinr::alignment")

ggmsa::ggmsa(mc1r_align_subset,   # shrooms_align, NOT shrooms_align_seqinr
      start = 780, 
      end = 830)

mc1r_subset_dist <- seqinr::dist.alignment(mc1r_align_subset_seqinr, 
                                       matrix = "identity")

is(mc1r_subset_dist)

## [1] "dist"     "oldClass"

class(mc1r_subset_dist)

## [1] "dist"

mc1r_subset_dist_alt <- matrix(data = mc1r_subset_dist,
                              nrow = 10, 
                              ncol = 10)

## Warning in matrix(data = mc1r_subset_dist, nrow = 10, ncol = 10): data length
## [45] is not a sub-multiple or multiple of the number of rows [10]

#mc1r_subset_dist_alt[lower.tri(mc1r_subset_dist_alt)] <- distances

seqnames <- c("NP_001914", "NP_032585", "XP_006255857", "NP_001009324", "NP_001009152", "XP_012817790", "NP_001108006", "NP_001008690", "NP_001026633",    
"NP_851301")
colnames(mc1r_subset_dist_alt) <- seqnames
row.names(mc1r_subset_dist_alt)  <- seqnames
mc1r_subset_dist_alt <- as.dist(mc1r_subset_dist_alt)
mc1r_subset_dist <- mc1r_subset_dist_alt

mc1r_subset_dist_rounded <- round(mc1r_subset_dist,
                              digits = 3)

mc1r_subset_dist_rounded

##              NP_001914 NP_032585 XP_006255857 NP_001009324 NP_001009152
## NP_032585        0.389                                                 
## XP_006255857     0.406     0.480                                       
## NP_001009324     0.488     0.648        0.918                          
## NP_001009152     0.473     0.666        0.492        0.929             
## XP_012817790     0.630     0.691        0.471        0.635        0.389
## NP_001108006     0.661     0.922        0.642        0.658        0.389
## NP_001008690     0.684     0.447        0.662        0.684        0.406
## NP_001026633     0.918     0.471        0.692        0.926        0.488
## NP_851301        0.417     0.456        0.924        0.610        0.473
##              XP_012817790 NP_001108006 NP_001008690 NP_001026633
## NP_032585                                                       
## XP_006255857                                                    
## NP_001009324                                                    
## NP_001009152                                                    
## XP_012817790                                                    
## NP_001108006        0.510                                       
## NP_001008690        0.480        0.686                          
## NP_001026633        0.648        0.918        0.674             
## NP_851301           0.666        0.492        0.929        0.927

PHYLOGENETIC TREE

tree_subset <- nj(mc1r_subset_dist)

plot.phylo(tree_subset, main="Phylogenetic Tree", 
            use.edge.length = F)

# add label
mtext(text = "MC1R family gene tree - rooted, no branch lengths")

CB Final Project

Thulasi Varatharajan

12/2/21