#Introduction MC1R is a gene with no introns that encodes the receptor protein for melanocyte-stimulating hormone, or MSH. The protein that it codes for functions in melanogenesis and plays a major factor in determining sun sensitivity.
#Links RefSeq Gene: https://www.ncbi.nlm.nih.gov/gene/4157
RefSeq Homologene: https://www.ncbi.nlm.nih.gov/homologene/?term=Homo+sapiens+MC1R
# github packages
library(compbio4all)
#useful for functions in computational biology
library(ggmsa)
## Registered S3 methods overwritten by 'ggalt':
## method from
## grid.draw.absoluteGrob ggplot2
## grobHeight.absoluteGrob ggplot2
## grobWidth.absoluteGrob ggplot2
## grobX.absoluteGrob ggplot2
## grobY.absoluteGrob ggplot2
#used for annotating and visualizing MSAs
# CRAN packages
library(rentrez)
#uses NCBI's databases to download genetic and biographic data
library(seqinr)
#data analysis and visualizing protein and DNA data
library(ape)
##
## Attaching package: 'ape'
## The following objects are masked from 'package:seqinr':
##
## as.alignment, consensus
#used for reading and writing data for phylogenetic trees
library(pander)
#renders tables in R Markdown
library(ggplot2)
#creates complex plots from data in a data frame
## Biostrings
library(Biostrings)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: XVector
## Loading required package: GenomeInfoDb
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:ape':
##
## complement
## The following object is masked from 'package:seqinr':
##
## translate
## The following object is masked from 'package:base':
##
## strsplit
#efficient string containers for manipuling biological sequences
library(HGNChelper)
## Warning: package 'HGNChelper' was built under R version 4.1.2
#identifies known aliases and outdated gene symbols
# Bioconductor packages
library(msa)
#interface for many multiple sequence alignment algorithms
#ACCESSION NUMBER TABLE
#table_names <- c("RefSeq", "Uniprot", "PDB", "Scientific Name", "Common ")
refseq_table <- c("NP_001914", "NP_032585", "XP_006255857", "NP_001009324", "NP_001009152", "XP_012817790", "NP_001108006", "NP_001008690", "NP_001026633",
"NP_851301")
uniprot_table <- c("Q01726", "Q01727", "B6ZGR5", "Q7YSE7", "Q9TUK4", "A0A803JW76", "P79166", "Q1ELV2", "Q802G0", "Q7ZTA3")
PDB_table <- c("NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA")
sciname_table <- c("Homo sapiens", "Mus musculus", "Rattus norvegicus", "Felis catus", "Pan troglodytes", "Xenopus tropicalus", "Equus caballus", "Sus scrofa", "Gallus gallus", "Danio rerio")
name_table <- c("Human", "Mouse", "Rat", "Cat", "Chimpanzee", "Western Clawed Frog", "Horse", "Pig", "Chicken", "Zebrafish")
genename_table <- c( "MC1R", "MC1R", "MC1R", "MC1R", "MC1R", "MC1R", "MC1R", "MC1R", "MC1R", "MC1R")
MC1r.df <- data.frame(RefSeq = refseq_table, Uniprot = uniprot_table, PDB = PDB_table, Scientfic_name = sciname_table, Common_name = name_table, Gene_name = genename_table)
pander::pander(MC1r.df)
| RefSeq | Uniprot | PDB | Scientfic_name | Common_name |
|---|---|---|---|---|
| NP_001914 | Q01726 | NA | Homo sapiens | Human |
| NP_032585 | Q01727 | NA | Mus musculus | Mouse |
| XP_006255857 | B6ZGR5 | NA | Rattus norvegicus | Rat |
| NP_001009324 | Q7YSE7 | NA | Felis catus | Cat |
| NP_001009152 | Q9TUK4 | NA | Pan troglodytes | Chimpanzee |
| XP_012817790 | A0A803JW76 | NA | Xenopus tropicalus | Western Clawed Frog |
| NP_001108006 | P79166 | NA | Equus caballus | Horse |
| NP_001008690 | Q1ELV2 | NA | Sus scrofa | Pig |
| NP_001026633 | Q802G0 | NA | Gallus gallus | Chicken |
| NP_851301 | Q7ZTA3 | NA | Danio rerio | Zebrafish |
| Gene_name |
|---|
| MC1R |
| MC1R |
| MC1R |
| MC1R |
| MC1R |
| MC1R |
| MC1R |
| MC1R |
| MC1R |
| MC1R |
DATA PREPARATION
#DOWNLOADING SEQUENCES
#homo sap MC1R
homosap_fasta <- rentrez::entrez_fetch(db = "protein",
id = "NP_001914.3",
rettype = "fasta")
#mus MC1R
mous_fasta <- rentrez::entrez_fetch(db = "protein",
id = "NP_032585.2",
rettype = "fasta")
#rattus norvegicus(rat) MC1R
rat_fasta <- rentrez::entrez_fetch(db = "protein",
id = "XP_006255857.1",
rettype = "fasta")
#felis catus(cat) MC1R
fel_fasta <- rentrez::entrez_fetch(db = "protein",
id = "NP_001009324.1",
rettype = "fasta")
#pan trog(chimp) MC1R
pan_fasta <- rentrez::entrez_fetch(db = "protein",
id = "NP_001009152.1",
rettype = "fasta")
#xenopus trop(frog) MC1R
xeno_fasta <- rentrez::entrez_fetch(db = "protein",
id = "XP_012817790.1",
rettype = "fasta")
#equus call(horse) MC1R
equus_fasta <- rentrez::entrez_fetch(db = "protein",
id = "NP_001108006.1",
rettype = "fasta")
#sus scrofa(pig) MC1R
sus_fasta <- rentrez::entrez_fetch(db = "protein",
id = "NP_001008690.1",
rettype = "fasta")
#gallus gallus(chicken) MC1R
gallus_fasta <- rentrez::entrez_fetch(db = "protein",
id = "NP_001026633.1",
rettype = "fasta")
#danio rerio(zebrafish) MC1R
danio_fasta <- rentrez::entrez_fetch(db = "protein",
id = "NP_851301.1",
rettype = "fasta")
mc1r_list <- list("NP_001914" = homosap_fasta, "NP_032585" = mous_fasta, "XP_006255857" = rat_fasta,"NP_001009324" = fel_fasta, "NP_001009152" = pan_fasta, "XP_012817790" = xeno_fasta, "NP_001108006" = equus_fasta, "NP_001008690" = sus_fasta, "NP_001026633" = gallus_fasta, "NP_851301" = danio_fasta)
fasta_cleaner <- function(fasta_object, parse = TRUE){
fasta_object <- sub("^(>)(.*?)(\\n)(.*)(\\n\\n)","\\4",fasta_object)
fasta_object <- gsub("\n", "", fasta_object)
if(parse == TRUE){
fasta_object <- stringr::str_split(fasta_object,
pattern = "",
simplify = FALSE)
}
return(fasta_object[[1]])
}
#Cleaning
homo_clean <- fasta_cleaner(homosap_fasta, parse = F)
mus_clean <- fasta_cleaner(mous_fasta, parse = F)
rat_clean <- fasta_cleaner(rat_fasta, parse = F)
fel_clean <- fasta_cleaner(fel_fasta, parse = F)
pan_clean <- fasta_cleaner(pan_fasta, parse = F)
xeno_clean <- fasta_cleaner(xeno_fasta, parse = F)
equus_clean <- fasta_cleaner(equus_fasta, parse = F)
sus_clean <- fasta_cleaner(sus_fasta, parse = F)
gallus_clean <- fasta_cleaner(gallus_fasta, parse = F)
danio_clean <- fasta_cleaner(danio_fasta, parse = F)
for(i in 1:length(mc1r_list)){
mc1r_list[[i]] <- compbio4all::fasta_cleaner(mc1r_list[[i]], parse = F)
}
SKIPPED PROTEIN DIAGRAM BECAUSE OF RTOOLS ISSUE*
#LARGE DOTPLOT
homo_vector <- fasta_cleaner(homosap_fasta)
seqinr::dotPlot(homo_vector[350:500], homo_vector[350:500],
wsize = 1,
nmatch = 1,
main = "mc1r large dotplot w/ defaults")
#2x2 PANEL OF DOTPLOTS
par(mfrow = c(2,2),
mar = c(0,0,2,1))
# plot 1
dotPlot(homo_vector[350:500],
homo_vector[350:500],
wsize = 1,
nmatch = 1,
main = "MC1R Defaults")
# plot 2 Homo sapiens MC1R - size = 10, nmatch = 1
dotPlot(homo_vector[350:500], homo_vector[350:500],
wsize = 10,
nmatch = 1,
main = "homosap - size = 10, nmatch = 1")
# plot 3: Homo sapiens MC1R - size = 10, nmatch = 5
dotPlot(homo_vector[350:500], homo_vector[350:500],
wsize = 10,
nmatch = 5,
main = "homosap - size = 10, nmatch = 5")
# plot 4: size = 20, nmatch = 5
dotPlot(homo_vector[350:500], homo_vector[350:500],
wsize = 20,
nmatch =5,
main = "homosap - size = 20, nmatch = 5")
PROTEIN PROPERTIES COMPILED FROM DATABASES
Pfam <- c("7tm_1: From 55 to 298", "7tm_1: From 53 to 296", "7tm_1: From 55 to 298", "7tm_1: From 55 to 298", "7tm_1: From 55 to 298", "7tm_1: From 46 to 293", "7tm_1: From 55 to 298", "7tm_1: From 58 to 301", "7tm_1: From 53 to 295", "7tm_1: From 60 to 300")
DisProt <- c("No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available")
RepeatDB <- c("No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available")
Uniprot <- c("Plasma membrane", "Plasma membrane", "Plasma membrane", "Plasma membrane", "Plasma membrane", "Plasma membrane", "Plasma membrane", "Plasma membrane", "Plasma membrane", "Plasma membrane")
PDB <- c("No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available", "No information available")
prot_prop.df <- data.frame(Pfam = Pfam, DisProt = DisProt, RepeatsDB = RepeatDB, UniProt = Uniprot, PDB = PDB)
pander::pander(prot_prop.df)
| Pfam | DisProt | RepeatsDB |
|---|---|---|
| 7tm_1: From 55 to 298 | No information available | No information available |
| 7tm_1: From 53 to 296 | No information available | No information available |
| 7tm_1: From 55 to 298 | No information available | No information available |
| 7tm_1: From 55 to 298 | No information available | No information available |
| 7tm_1: From 55 to 298 | No information available | No information available |
| 7tm_1: From 46 to 293 | No information available | No information available |
| 7tm_1: From 55 to 298 | No information available | No information available |
| 7tm_1: From 58 to 301 | No information available | No information available |
| 7tm_1: From 53 to 295 | No information available | No information available |
| 7tm_1: From 60 to 300 | No information available | No information available |
| UniProt | PDB |
|---|---|
| Plasma membrane | No information available |
| Plasma membrane | No information available |
| Plasma membrane | No information available |
| Plasma membrane | No information available |
| Plasma membrane | No information available |
| Plasma membrane | No information available |
| Plasma membrane | No information available |
| Plasma membrane | No information available |
| Plasma membrane | No information available |
| Plasma membrane | No information available |
PROTEIN FEATURE PREDICTION
#SECONDARY STRUCTURE PREDICTION
aa.1.1 <- c("A","R","N","D","C","Q","E","G","H","I",
"L","K","M","F","P","S","T","W","Y","V")
# enter again
aa.1.2 <- c("A","R","N","D","C","Q","E","G","H","I",
"L","K","M","F","P","S","T","W","Y","V")
# are they the same length?
length(aa.1.1) == length(aa.1.2)
## [1] TRUE
aa.1.1 == aa.1.2
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE
unique(aa.1.1)
## [1] "A" "R" "N" "D" "C" "Q" "E" "G" "H" "I" "L" "K" "M" "F" "P" "S" "T" "W" "Y"
## [20] "V"
length(unique(aa.1.1)) == length(unique(aa.1.2))
## [1] TRUE
any(c(aa.1.1 == aa.1.2) == FALSE)
## [1] FALSE
#alpha
alpha <- c(285, 53, 97, 163, 22, 67, 134, 197, 111, 91,
221, 249, 48, 123, 82, 122, 119, 33, 63, 167)
sum(alpha) == 2447
## [1] TRUE
#beta
beta <- c(203, 67, 139, 121, 75, 122, 86, 297, 49, 120,
177, 115, 16, 85, 127, 341, 253, 44, 110, 229)
sum(beta) == 2776
## [1] TRUE
# alpha + beta
a.plus.b <- c(175, 78, 120, 111, 74, 74, 86, 171, 33, 93,
110, 112, 25, 52, 71, 126, 117, 30, 108, 123)
sum(a.plus.b) == 1889
## [1] TRUE
# alpha/beta
a.div.b <- c(361, 146, 183, 244, 63, 114, 257, 377, 107, 239,
339, 321, 91, 158, 188, 327, 238, 72, 130, 378)
sum(a.div.b) == 4333
## [1] TRUE
data.frame(aa.1.1, alpha, beta, a.plus.b, a.div.b)
## aa.1.1 alpha beta a.plus.b a.div.b
## 1 A 285 203 175 361
## 2 R 53 67 78 146
## 3 N 97 139 120 183
## 4 D 163 121 111 244
## 5 C 22 75 74 63
## 6 Q 67 122 74 114
## 7 E 134 86 86 257
## 8 G 197 297 171 377
## 9 H 111 49 33 107
## 10 I 91 120 93 239
## 11 L 221 177 110 339
## 12 K 249 115 112 321
## 13 M 48 16 25 91
## 14 F 123 85 52 158
## 15 P 82 127 71 188
## 16 S 122 341 126 327
## 17 T 119 253 117 238
## 18 W 33 44 30 72
## 19 Y 63 110 108 130
## 20 V 167 229 123 378
alpha.prop <- alpha/sum(alpha)
beta.prop <- beta/sum(beta)
a.plus.b.prop <- a.plus.b/sum(a.plus.b)
a.div.b <- a.div.b/sum(a.div.b)
aa.prop <- data.frame(alpha.prop,
beta.prop,
a.plus.b.prop,
a.div.b)
#row labels
row.names(aa.prop) <- aa.1.1
plot(aa.prop,panel = panel.smooth)
cor(aa.prop)
## alpha.prop beta.prop a.plus.b.prop a.div.b
## alpha.prop 1.0000000 0.4941143 0.6969508 0.8555289
## beta.prop 0.4941143 1.0000000 0.7977771 0.7706654
## a.plus.b.prop 0.6969508 0.7977771 1.0000000 0.8198043
## a.div.b 0.8555289 0.7706654 0.8198043 1.0000000
round(cor(aa.prop), 3)
## alpha.prop beta.prop a.plus.b.prop a.div.b
## alpha.prop 1.000 0.494 0.697 0.856
## beta.prop 0.494 1.000 0.798 0.771
## a.plus.b.prop 0.697 0.798 1.000 0.820
## a.div.b 0.856 0.771 0.820 1.000
par(mfrow = c(1,3), mar = c(4,4,1,0))
plot(alpha.prop ~ beta.prop, data = aa.prop)
plot(alpha.prop ~ a.plus.b.prop, data = aa.prop)
plot(alpha.prop ~ a.div.b, data = aa.prop)
par(mfrow = c(1,1), mar = c(4,4,4,4))
#MC1R
NP_001914 <- rentrez::entrez_fetch(db = "protein",
id = "NP_001914.3",
rettype = "fasta")
NP_001914 <- compbio4all::fasta_cleaner(NP_001914, parse = TRUE)
NP_001914.freq.table <- table(NP_001914)/length(NP_001914)
#Convert table to a vector
table_to_vector <- function(table_x){
table_names <- attr(table_x, "dimnames")[[1]]
table_vect <- as.vector(table_x)
names(table_vect) <- table_names
return(table_vect)
}
MC1R.human.aa.freq <- table_to_vector(NP_001914.freq.table)
#check for presence of U
aa.names <- names(MC1R.human.aa.freq)
any(aa.names == "U")
## [1] FALSE
#i.U <- which(aa.names == "U")
#aa.names[i.U]
#MC1R.human.aa.freq[i.U]
#which(aa.names %in% c("U"))
#MC1R.human.aa.freq <- MC1R.human.aa.freq[-i.U]
sum(MC1R.human.aa.freq)
## [1] 1
aa.prop$MC1R.human.aa.freq <- MC1R.human.aa.freq
pander::pander(aa.prop)
| Â | alpha.prop | beta.prop | a.plus.b.prop | a.div.b | MC1R.human.aa.freq |
|---|---|---|---|---|---|
| A | 0.1165 | 0.07313 | 0.09264 | 0.08331 | 0.05175 |
| R | 0.02166 | 0.02414 | 0.04129 | 0.03369 | 0.0193 |
| N | 0.03964 | 0.05007 | 0.06353 | 0.04223 | 0.04825 |
| D | 0.06661 | 0.04359 | 0.05876 | 0.05631 | 0.07982 |
| C | 0.008991 | 0.02702 | 0.03917 | 0.01454 | 0.04035 |
| Q | 0.02738 | 0.04395 | 0.03917 | 0.02631 | 0.07193 |
| E | 0.05476 | 0.03098 | 0.04553 | 0.05931 | 0.02456 |
| G | 0.08051 | 0.107 | 0.09052 | 0.08701 | 0.06316 |
| H | 0.04536 | 0.01765 | 0.01747 | 0.02469 | 0.04737 |
| I | 0.03719 | 0.04323 | 0.04923 | 0.05516 | 0.1096 |
| L | 0.09031 | 0.06376 | 0.05823 | 0.07824 | 0.02368 |
| K | 0.1018 | 0.04143 | 0.05929 | 0.07408 | 0.04737 |
| M | 0.01962 | 0.005764 | 0.01323 | 0.021 | 0.03596 |
| F | 0.05027 | 0.03062 | 0.02753 | 0.03646 | 0.04123 |
| P | 0.03351 | 0.04575 | 0.03759 | 0.04339 | 0.04211 |
| S | 0.04986 | 0.1228 | 0.0667 | 0.07547 | 0.07456 |
| T | 0.04863 | 0.09114 | 0.06194 | 0.05493 | 0.0614 |
| W | 0.01349 | 0.01585 | 0.01588 | 0.01662 | 0.07982 |
| Y | 0.02575 | 0.03963 | 0.05717 | 0.03 | 0.007018 |
| V | 0.06825 | 0.08249 | 0.06511 | 0.08724 | 0.0307 |
#Correlation
chou_cor <- function(x,y){
numerator <- sum(x*y)
denominator <- sqrt((sum(x^2))*(sum(y^2)))
result <- numerator/denominator
return(result)
}
chou_cosine <- function(z.1, z.2){
z.1.abs <- sqrt(sum(z.1^2))
z.2.abs <- sqrt(sum(z.2^2))
my.cosine <- sum(z.1*z.2)/(z.1.abs*z.2.abs)
return(my.cosine)
}
corr.alpha <- chou_cor(aa.prop[,5], aa.prop[,1])
corr.beta <- chou_cor(aa.prop[,5], aa.prop[,2])
corr.apb <- chou_cor(aa.prop[,5], aa.prop[,3])
corr.adb <- chou_cor(aa.prop[,5], aa.prop[,4])
cos.alpha <- chou_cosine(aa.prop[,5], aa.prop[,1])
cos.beta <- chou_cosine(aa.prop[,5], aa.prop[,2])
cos.apb <- chou_cosine(aa.prop[,5], aa.prop[,3])
cos.adb <- chou_cosine(aa.prop[,5], aa.prop[,4])
#Distance
aa.prop.flipped <- t(aa.prop)
round(aa.prop.flipped,2)
## A R N D C Q E G H I L K
## alpha.prop 0.12 0.02 0.04 0.07 0.01 0.03 0.05 0.08 0.05 0.04 0.09 0.10
## beta.prop 0.07 0.02 0.05 0.04 0.03 0.04 0.03 0.11 0.02 0.04 0.06 0.04
## a.plus.b.prop 0.09 0.04 0.06 0.06 0.04 0.04 0.05 0.09 0.02 0.05 0.06 0.06
## a.div.b 0.08 0.03 0.04 0.06 0.01 0.03 0.06 0.09 0.02 0.06 0.08 0.07
## MC1R.human.aa.freq 0.05 0.02 0.05 0.08 0.04 0.07 0.02 0.06 0.05 0.11 0.02 0.05
## M F P S T W Y V
## alpha.prop 0.02 0.05 0.03 0.05 0.05 0.01 0.03 0.07
## beta.prop 0.01 0.03 0.05 0.12 0.09 0.02 0.04 0.08
## a.plus.b.prop 0.01 0.03 0.04 0.07 0.06 0.02 0.06 0.07
## a.div.b 0.02 0.04 0.04 0.08 0.05 0.02 0.03 0.09
## MC1R.human.aa.freq 0.04 0.04 0.04 0.07 0.06 0.08 0.01 0.03
dist(aa.prop.flipped, method = "euclidean")
## alpha.prop beta.prop a.plus.b.prop a.div.b
## beta.prop 0.13342098
## a.plus.b.prop 0.09281824 0.08289406
## a.div.b 0.06699039 0.08659174 0.06175113
## MC1R.human.aa.freq 0.16921327 0.15410105 0.13982324 0.14662849
dist.alpha <- dist((aa.prop.flipped[c(1,5),]), method = "euclidean")
dist.beta <- dist((aa.prop.flipped[c(2,5),]), method = "euclidean")
dist.apb <- dist((aa.prop.flipped[c(3,5),]), method = "euclidean")
dist.adb <- dist((aa.prop.flipped[c(4,5),]), method = "euclidean")
fold.type <- c("alpha","beta","alpha plus beta", "alpha/beta")
# data
corr.sim <- round(c(corr.alpha,corr.beta,corr.apb,corr.adb),5)
cosine.sim <- round(c(cos.alpha,cos.beta,cos.apb,cos.adb),5)
Euclidean.dist <- round(c(dist.alpha,dist.beta,dist.apb,dist.adb),5)
# summary
sim.sum <- c("","","most.sim","")
dist.sum <- c("","","min.dist","")
df <- data.frame(fold.type,
corr.sim ,
cosine.sim ,
Euclidean.dist ,
sim.sum ,
dist.sum )
pander::pander(df)
| fold.type | corr.sim | cosine.sim | Euclidean.dist | sim.sum | dist.sum |
|---|---|---|---|---|---|
| alpha | 0.7775 | 0.7775 | 0.1692 | ||
| beta | 0.8186 | 0.8186 | 0.1541 | ||
| alpha plus beta | 0.8384 | 0.8384 | 0.1398 | most.sim | min.dist |
| alpha/beta | 0.8254 | 0.8254 | 0.1466 |
PID
#Data prep
names(mc1r_list)
## [1] "NP_001914" "NP_032585" "XP_006255857" "NP_001009324" "NP_001009152"
## [6] "XP_012817790" "NP_001108006" "NP_001008690" "NP_001026633" "NP_851301"
length(mc1r_list)
## [1] 10
mc1r_list[1]
## $NP_001914
## [1] "MSYNYVVTAQKPTAVNGCVTGHFTSAEDLNLLIAKNTRLEIYVVTAEGLRPVKEVGMYGKIAVMELFRPKGESKDLLFILTAKYNACILEYKQSGESIDIITRAHGNVQDRIGRPSETGIIGIIDPECRMIGLRLYDGLFKVIPLDRDNKELKAFNIRLEELHVIDVKFLYGCQAPTICFVYQDPQGRHVKTYEVSLREKEFNKGPWKQENVEAEASMVIAVPEPFGGAIIIGQESITYHNGDKYLAIAPPIIKQSTIVCHNRVDPNGSRYLLGDMEGRLFMLLLEKEEQMDGTVTLKDLRVELLGETSIAECLTYLDNGVVFVGSRLGDSQLVKLNVDSNEQGSYVVAMETFTNLGPIVDMCVVDLERQGQGQLVTCSGAFKEGSLRIIRNGIGIHEHASIDLPGIKGLWPLRSDPNRETDDTLVLSFVGQTRVLMLNGEEVEETELMGFVDDQQTFFCGNVAHQQLIQITSASVRLVSQEPKALVSEWKEPQAKNISVASCNSSQVVVAVGRALYYLQIHPQELRQISHTEMEHEVACLDITPLGDSNGLSPLCAIGLWTDISARILKLPSFELLHKEMLGGEIIPRSILMTTFESSHYLLCALGDGALFYFGLNIETGLLSDRKKVTLGTQPTVLRTFRSLSTTNVFACSDRPTVIYSSNHKLVFSNVNLKEVNYMCPLNSDGYPDSLALANNSTLTIGTIDEIQKLHIRTVPLYESPRKICYQEVSQCFGVLSSRIEVQDTSGGTTALRPSASTQALSSSVSSSKLFSSSTAPHETSFGEEVEVHNLLIIDQHTFEVLHAHQFLQNEYALSLVSCKLGKDPNTYFIVGTAMVYPEEAEPKQGRIVVFQYSDGKLQTVAEKEVKGAVYSMVEFNGKLLASINSTVRLYEWTTEKELRTECNHYNNIMALYLKTKGDFILVGDLMRSVLLLAYKPMEGNFEEIARDFNPNWMSAVEILDDDNFLGAENAFNLFVCQKDSAATTDEERQHLQEVGLFHLGEFVNVFCHGSLVMQNLGETSTPTQGSVLFGTVNGMIGLVTSLSESWYNLLLDMQNRLNKVIKSVGKIEHSFWRSFHTERKTEPATGFIDGDLIESFLDISRPKMQEVVANLQYDDGSGMKREATADDLIKVVEELTRIH"
homo_vector <- unlist(mc1r_list[1])
mus_vector <- unlist(mc1r_list[2])
rat_vector <- unlist(mc1r_list[3])
fel_vector <- unlist(mc1r_list[4])
pan_vector <- unlist(mc1r_list[5])
xeno_vector <- unlist(mc1r_list[6])
equus_vector <- unlist(mc1r_list[7])
sus_vector <- unlist(mc1r_list[8])
gallus_vector <- unlist(mc1r_list[9])
danio_vector <- unlist(mc1r_list[10])
mc1r_vector <- unlist(mc1r_list)
names(mc1r_vector) <- names(mc1r_list)
#PID score
#human aligns
align.homo.mus <- Biostrings::pairwiseAlignment(
homo_vector,
mus_vector)
align.homo.pan <- Biostrings::pairwiseAlignment(
homo_vector,
pan_vector)
align.homo.xeno <- Biostrings::pairwiseAlignment(
homo_vector,
xeno_vector)
#pan aligns
align.pan.mus <- Biostrings::pairwiseAlignment(
pan_vector,
mus_vector)
align.pan.xeno <- Biostrings::pairwiseAlignment(
pan_vector,
xeno_vector)
#xeno aligns
align.xeno.mus <- Biostrings::pairwiseAlignment(
xeno_vector,
mus_vector)
pids <- c(1, NA, NA, NA,
pid(align.homo.pan), 1, NA, NA,
pid(align.homo.mus), pid(align.pan.mus), 1, NA,
pid(align.homo.xeno), pid(align.pan.xeno), pid(align.xeno.mus), 1)
mat <- matrix(pids, nrow = 4, byrow = T)
row.names(mat) <- c("Homo","Pan","Xeno","Mus")
colnames(mat) <- c("Homo","Pan","Xeno","Mus")
pander::pander(mat)
| Â | Homo | Pan | Xeno | Mus |
|---|---|---|---|---|
| Homo | 1 | NA | NA | NA |
| Pan | 15.53 | 1 | NA | NA |
| Xeno | 14.86 | 75.08 | 1 | NA |
| Mus | 14.79 | 51.4 | 55.11 | 1 |
#Biostrings::pid(align.homo.mus)
#PID comparison table- humans and chimps
method <- c("PID1", "PID2", "PID3", "PID4")
PID1 <- pid(align.homo.pan, type = "PID1")
PID2 <- pid(align.homo.pan, type = "PID2")
PID3 <- pid(align.homo.pan, type = "PID3")
PID4 <- pid(align.homo.pan, type = "PID4")
PID <- c(PID1, PID2, PID3, PID4)
denominator <- c("(aligned positions + internal gap positions)", "(aligned positions)", "(length shorter sequence)", "(average length of the two sequences)")
PID.df <- data.frame(method = method, PID = PID, denominator = denominator)
pander::pander(PID.df)
| method | PID | denominator |
|---|---|---|
| PID1 | 15.53 | (aligned positions + internal gap positions) |
| PID2 | 55.84 | (aligned positions) |
| PID3 | 55.84 | (length shorter sequence) |
| PID4 | 24.3 | (average length of the two sequences) |
MSA ALIGNMENT
#Alignment
mc1r_vector_ss <- Biostrings::AAStringSet(mc1r_vector)
mc1r_align <- msa::msa(mc1r_vector_ss,
method = "ClustalW")
## use default substitution matrix
class(mc1r_align) <- "AAMultipleAlignment"
mc1r_align_seqinr <- msaConvert(mc1r_align, type = "seqinr::alignment")
compbio4all::print_msa(mc1r_align_seqinr)
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "MSYNYVVTAQKPTAVNGCVTGHFTSAEDLNLLIAKNTRLEIYVVTAEGLRPVKEVGMYGK 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "IAVMELFRPKGESKDLLFILTAKYNACILEYKQSGESIDIITRAHGNVQDRIGRPSETGI 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "IGIIDPECRMIGLRLYDGLFKVIPLDRDNKELKAFNIRLEELHVIDVKFLYGCQAPTICF 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "VYQDPQGRHVKTYEVSLREKEFNKGPWKQENVEAEASMVIAVPEPFGGAIIIGQESITYH 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "NGDKYLAIAPPIIKQSTIVCHNRVDPNGSRYLLGDMEGRLFMLLLEKEEQMDGTVTLKDL 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "RVELLGETSIAECLTYLDNGVVFVGSRLGDSQLVKLNVDSNEQGSYVVAMETFTNLGPIV 0"
## [1] " "
## [1] "---------------------------------MSVQGPQRRLLGSLNSTSPAAPRLGLA 0"
## [1] "---------------------------------MPVLGPERRLLASLSSAPPAAPRLGLA 0"
## [1] "---------------------------------MPLQGPQRRLLGSLNSTLPATPYLGLT 0"
## [1] "---------------------------------MAVQGSQRRLLGSLNSTPTAIPQLGLA 0"
## [1] "---------------------------------MSTQEPQKSLLGSLNSN--ATSHLGLA 0"
## [1] "---------------------------------MPTQGPPKRLLGSLNSTSITTSHLGLA 0"
## [1] "---------------------------------MS----MLAPLRLLREPWNASEGNQSN 0"
## [1] "---------------------------------MNDSSRHHFSMKHMDYMYNADNNITLN 0"
## [1] "------------------------------------------MLHSTVNSTNATINVGTE 0"
## [1] "DMCVVDLERQGQGQLVTCSGAFKEGSLRIIRNGIGIHEHASIDLPGIKGLWPLRSDPNRE 0"
## [1] " "
## [1] "ANQT-------GPRCLELS------------VPDGLFLGLGLVSVVENVLVVAAIAKNRN 0"
## [1] "ANQTNQT----GPQCLEVS------------IPDGLFLSLGLVSLVENVLVVAAIAKNRN 0"
## [1] "TNQT-------EPPCLEVS------------IPDGLFLSLGLVSLVENVLVVTAIAKNRN 0"
## [1] "ANQT-------GARCLEVS------------IPDGLFLSLGLVSLVENMLVVATIAKNRN 0"
## [1] "TNQS-------EPWCLYVS------------IPDGLFLSLGLVSLVENVLVVIAITKNRN 0"
## [1] "TNQT-------GSWCLHVS------------IPDGLFLSLGLVSLVENVLVVVAITKNRN 0"
## [1] "ATAGAG-----GAWCQGLD------------IPNELFLTLGLVSLVENLLVVAAILKNRN 0"
## [1] "SNSTAS-----DINVTGIAQI---------MIPQELFLMLGLISLVENILVVVAIIKNRN 0"
## [1] "LKPT-------NTSDTVMD------------VPEELFLFLCVFSLLENILVVIAIFRNHN 0"
## [1] "TDDTLVLSFVGQTRVLMLNGEEVEETELMGFVDDQQTFFCGNVAHQQLIQITSASVRLVS 0"
## [1] " "
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "-------------------------------------------------LHSPMYYFVCC 0"
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "-------------------------------------------------LHSPMYCFICC 0"
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "-------------------------------------------------LHSPTYYFICC 0"
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "-------------------------------------------------LHSPMYYFICC 0"
## [1] "QEPKALVSEWKEPQAKNISVASCNSSQVVVAVGRALYYLQIHPQELRQISHTEMEHEVAC 0"
## [1] " "
## [1] "LAVS------------------DLLVSVSSVLETAVMLLLEAGALAGRAAVVQRLDDIID 0"
## [1] "LAVS------------------DLLVSVSNVLETAVLLLLEAGALAAQAAVVQQLDNVMD 0"
## [1] "LAVS------------------DLLVSMSNVLEMAILLLLEAGVLATQASVLQQLDNIID 0"
## [1] "LALS------------------DLLVSGSNVLETAVILLLEAGALVARAAVLQQVDNVID 0"
## [1] "LALS------------------DLMVSVSIVLETTIILLLEAGILVARVALVQQLDNLID 0"
## [1] "LALS------------------DLMVSVSIVLETTIILLLEAGILVARAALVQQLDNVID 0"
## [1] "LAVS------------------DMLVSVSNLAKTLFMLLMEHGVLVIRASIVRHMDNVID 0"
## [1] "LAVA------------------DMLVSVSNVVETLFMLLTEHGLLLVTAKMLQHLDNVID 0"
## [1] "LAAS------------------DMLVSSSNLGETLIIFMLKQGIIKSEPLLVKKMDYIFD 0"
## [1] "LDITPLGDSNGLSPLCAIGLWTDISARILKLPSFELLHKEMLGGEIIPRSILMTTFESSH 0"
## [1] " "
## [1] "VLVCGAMVSSLCFLGAIAVDRYISIFYALRYHSIVTLPRAWRAISAIWVASVLSSTLFIA 0"
## [1] "VLICGSMVSSLCFLGAIAVDRYVSIFYALRYHSIVTLPRAGRAIAAIWAGSVLSSTLFIA 0"
## [1] "VLICGSMVSSLCFLGSIAVDRYISIFYALRYHSIMMLPRVWRAIVAIWVVSVLSSTLFIA 0"
## [1] "VITCSSMLSSLCFLGAIAVDRYISIFYALRYHSIVTLPRARRAIAAIWVASVLFSTLFIA 0"
## [1] "VLICGSMVSSLCFLGIIAIDRYISIFYALRYHSIVTLPRARRAVVGIWMVSIVSSTLFIT 0"
## [1] "VLICGSMVSSLCFLGVIAIDRYISIFYALRYHSIVTLSRARRAVVGIWVVSIVSSTLFIT 0"
## [1] "MLICSSVVSSLSFLGVIAVDRYITIFYALRYHSIMTLQRAVVTMASVWLASTVSSTVLIT 0"
## [1] "IMICSSVVSSLSFLCTIAADRYITIFYALRYHSIMTTQRAVGIILVVWLASITSSSLFIV 0"
## [1] "TMICCSLVTSLSFLGAIAIDRYITIFYALRYHSIMTLRRVVIAIGVIWSVSLVCAAIFIV 0"
## [1] "YLLCALGDGALFYFGLNIETGLLSDRKKVTLGTQPTVLRTFRSLSTTNVFACSDRPTVIY 0"
## [1] " "
## [1] "YYDHTAVLLCL-VSFFVAMLVLMAVLYVHMLARACQHARGIARLHKRQR----------- 0"
## [1] "YYHHTAVLLGL-VSFFVAMLALMAVLYVHMLARACQHGRHIARLHKTQH----------- 0"
## [1] "YYNHTAVLLCL-VTFFVAMLVLMAVLYVHMLARACQHARGIARLHKRQH----------- 0"
## [1] "YCDHTAVLLCL-VVFFLAVLVLMAVLYVHMLARACQHAQGIARLHKRQR----------- 0"
## [1] "YYKHTAVLLCL-VTFFLAMLALMAILYAHMFTRACQHAQGIAQLHKRRR----------- 0"
## [1] "YYKHTAVLLCL-VTFFLAMLALMAILYVHMLSRACQHAQGIARLHKRRH----------- 0"
## [1] "YYRNNAILLCL-IGFFLFMLVLMLVLYIHMFALARHHVRSISSQQKQP------------ 0"
## [1] "YHTDNAVIACL-VTFFGVTLVFTAVLYLHMFILAHVHSRRITALHK-------------- 0"
## [1] "YHESRAVILCL-IVFFLFMLALMVALYIHMFALARQHARSISALQKGKSRRI-------- 0"
## [1] "SSNHKLVFSNVNLKEVNYMCPLNSDGYPDSLALANNSTLTIGTIDEIQKLHIRTVPLYES 0"
## [1] " "
## [1] "---------------------------------PVHQGLGLKGAATLTILLGIFFLCWGP 0"
## [1] "---------------------------------PTRQGCGLKGAATLTILLGVFLLCWAP 0"
## [1] "---------------------------------PIHQGFGLKGAATLTILLGVFFLCWGP 0"
## [1] "---------------------------------PVHQGFGLKGAVTLTILLGIFFLCWGP 0"
## [1] "---------------------------------SIRQGFCLKGAATLTILLGIFFLCWGP 0"
## [1] "---------------------------------SIRQGFCLKGAATLTILLGIFFLCWAP 0"
## [1] "---------------------------------TIYRTSSLKGAVTLTILLGVFFICWGP 0"
## [1] "---------------------------------SRRQTTSMKGAITLTILLGVFILCWGP 0"
## [1] "--------------------------------TPHQARANMKGAITLTLLLGVFFLCWGP 0"
## [1] "PRKICYQEVSQCFGVLSSRIEVQDTSGGTTALRPSASTQALSSSVSSSKLFSSSTAPHET 0"
## [1] " "
## [1] "FF------LHLSLMVLCPRHPICGCVFKNFNLFLTLIICNSIVDP--------------- 0"
## [1] "FF------LHLSLVVLCPQHPTCGCVFKNVNLFLALVICNSIVDP--------------- 0"
## [1] "FF------LHLSLLILCPQHPTCGCVFKNFKLFLTLILCSAIVDP--------------- 0"
## [1] "FF------LHLTLIXLCPEHPTCGCIFKNFNLFLALIICNAIIDP--------------- 0"
## [1] "FF------LHLLLIVLCPQHPTCSCIFKNFNLFLLLIVLSSTVDP--------------- 0"
## [1] "FF------LHLLLIVLCPQHPTCSCIFKNFNLFLILIILSSIVDP--------------- 0"
## [1] "FF------FHLILIVTCPTNPFCTCFFSYFNLFLILIICNSVVDP--------------- 0"
## [1] "FF------LHLILILTCPTNPYCKCYFSHFNLFLILIICNSLIDP--------------- 0"
## [1] "LF------LHLTLFVSCPGHHICNSYFYYFNIYLLLVICNSVIDP--------------- 0"
## [1] "SFGEEVEVHNLLIIDQHTFEVLHAHQFLQNEYALSLVSCKLGKDPNTYFIVGTAMVYPEE 0"
## [1] " "
## [1] "--------LIYAFRSQELRKTLQEVLLCSW------------------------------ 0"
## [1] "--------LIYAFRSQELRKTLQEVLQCSW------------------------------ 0"
## [1] "--------LIYAFRSQELRKTLQEVLLCSW------------------------------ 0"
## [1] "--------LIYAFHSQELRRTLKEVLTCSW------------------------------ 0"
## [1] "--------LIYAFRSQELRMTLKEVLLCSW------------------------------ 0"
## [1] "--------LIYAFRSQELRMTLKEVLLCSW------------------------------ 0"
## [1] "--------LIYAFRSQELRRTLREVVLCSW------------------------------ 0"
## [1] "--------LIYAYRSQELRKTLKELIFCSWCFAV-------------------------- 0"
## [1] "--------LIYAFRSQELRKTLKEIVWCSW------------------------------ 0"
## [1] "AEPKQGRIVVFQYSDGKLQTVAEKEVKGAVYSMVEFNGKLLASINSTVRLYEWTTEKELR 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "TECNHYNNIMALYLKTKGDFILVGDLMRSVLLLAYKPMEGNFEEIARDFNPNWMSAVEIL 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "DDDNFLGAENAFNLFVCQKDSAATTDEERQHLQEVGLFHLGEFVNVFCHGSLVMQNLGET 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "STPTQGSVLFGTVNGMIGLVTSLSESWYNLLLDMQNRLNKVIKSVGKIEHSFWRSFHTER 0"
## [1] " "
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "------------------------------------------------------------ 0"
## [1] "KTEPATGFIDGDLIESFLDISRPKMQEVVANLQYDDGSGMKREATADDLIKVVEELTRIH 0"
## [1] " "
#library(ggmsa)
ggmsa::ggmsa(mc1r_align, # shrooms_align, NOT shrooms_align_seqinr
start = 780,
end = 830)
DISTANCE MATRIX
names(mc1r_vector_ss)
## [1] "NP_001914" "NP_032585" "XP_006255857" "NP_001009324" "NP_001009152"
## [6] "XP_012817790" "NP_001108006" "NP_001008690" "NP_001026633" "NP_851301"
names.focal <- c("NP_001914", "NP_032585", "XP_006255857", "NP_001009324", "NP_001009152", "XP_012817790", "NP_001108006", "NP_001008690", "NP_001026633",
"NP_851301")
mc1r_vector_ss_subset <- mc1r_vector_ss[names.focal]
mc1r_align_subset <- msa(mc1r_vector_ss_subset,
method = "ClustalW")
## use default substitution matrix
class(mc1r_align_subset) <- "AAMultipleAlignment"
mc1r_align_subset_seqinr <- msaConvert(mc1r_align_subset, type = "seqinr::alignment")
ggmsa::ggmsa(mc1r_align_subset, # shrooms_align, NOT shrooms_align_seqinr
start = 780,
end = 830)
mc1r_subset_dist <- seqinr::dist.alignment(mc1r_align_subset_seqinr,
matrix = "identity")
is(mc1r_subset_dist)
## [1] "dist" "oldClass"
class(mc1r_subset_dist)
## [1] "dist"
mc1r_subset_dist_alt <- matrix(data = mc1r_subset_dist,
nrow = 10,
ncol = 10)
## Warning in matrix(data = mc1r_subset_dist, nrow = 10, ncol = 10): data length
## [45] is not a sub-multiple or multiple of the number of rows [10]
#mc1r_subset_dist_alt[lower.tri(mc1r_subset_dist_alt)] <- distances
seqnames <- c("NP_001914", "NP_032585", "XP_006255857", "NP_001009324", "NP_001009152", "XP_012817790", "NP_001108006", "NP_001008690", "NP_001026633",
"NP_851301")
colnames(mc1r_subset_dist_alt) <- seqnames
row.names(mc1r_subset_dist_alt) <- seqnames
mc1r_subset_dist_alt <- as.dist(mc1r_subset_dist_alt)
mc1r_subset_dist <- mc1r_subset_dist_alt
mc1r_subset_dist_rounded <- round(mc1r_subset_dist,
digits = 3)
mc1r_subset_dist_rounded
## NP_001914 NP_032585 XP_006255857 NP_001009324 NP_001009152
## NP_032585 0.389
## XP_006255857 0.406 0.480
## NP_001009324 0.488 0.648 0.918
## NP_001009152 0.473 0.666 0.492 0.929
## XP_012817790 0.630 0.691 0.471 0.635 0.389
## NP_001108006 0.661 0.922 0.642 0.658 0.389
## NP_001008690 0.684 0.447 0.662 0.684 0.406
## NP_001026633 0.918 0.471 0.692 0.926 0.488
## NP_851301 0.417 0.456 0.924 0.610 0.473
## XP_012817790 NP_001108006 NP_001008690 NP_001026633
## NP_032585
## XP_006255857
## NP_001009324
## NP_001009152
## XP_012817790
## NP_001108006 0.510
## NP_001008690 0.480 0.686
## NP_001026633 0.648 0.918 0.674
## NP_851301 0.666 0.492 0.929 0.927
PHYLOGENETIC TREE
tree_subset <- nj(mc1r_subset_dist)
plot.phylo(tree_subset, main="Phylogenetic Tree",
use.edge.length = F)
# add label
mtext(text = "MC1R family gene tree - rooted, no branch lengths")