Human PPARA is a nuclear receptor that is part of the Peroxisome Proliferator-Activated Receptors (PPAR) family. PPARA is a transcription factor that also helps regulate lipid metabolism as it is activated when the a lack of energy is sensed and ketogenesis must be activated.
Link to RefSeq: https://www.ncbi.nlm.nih.gov/nuccore/1434886074 Link to Homologene: https://www.ncbi.nlm.nih.gov/gene/5465 Link to Uniprot: https://www.uniprot.org/uniprot/Q07869 Link to PDB: shorturl.at/vLRX1
## CRAN PACKAGES
# install.packages("rentrez",dependencies = TRUE)
# install.packages("devtools")
# install.packages("seqinr")
# install.packages('rentrez')
### BiocManager
# install.packages("BiocManager")
## BioConductor packages
# BiocManager::install("msa")
# BiocManager::install("Biostrings")
# BiocManager::install("drawProteins")
## Other
#devtools::install_github("YuLab-SMU/ggmsa")
# github
library(compbio4all)
library(ggmsa)
## Registered S3 methods overwritten by 'ggalt':
## method from
## grid.draw.absoluteGrob ggplot2
## grobHeight.absoluteGrob ggplot2
## grobWidth.absoluteGrob ggplot2
## grobX.absoluteGrob ggplot2
## grobY.absoluteGrob ggplot2
## ggmsa v1.1.3 Document: http://yulab-smu.top/ggmsa/
##
## If you use ggmsa in published research, please cite: DOI: 10.18129/B9.bioc.ggmsa
# CRAN
library(rentrez)
library(seqinr)
library(ape)
##
## Attaching package: 'ape'
## The following objects are masked from 'package:seqinr':
##
## as.alignment, consensus
library(pander)
# other
library(Biostrings)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: XVector
## Loading required package: GenomeInfoDb
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:ape':
##
## complement
## The following object is masked from 'package:seqinr':
##
## translate
## The following object is masked from 'package:base':
##
## strsplit
library(BiocManager)
## Bioconductor version '3.13' is out-of-date; the current release version '3.14'
## is available with R version '4.1'; see https://bioconductor.org/install
library(drawProteins)
library(HGNChelper)
## Warning: package 'HGNChelper' was built under R version 4.1.2
library(msa)
##
## Attaching package: 'msa'
## The following object is masked from 'package:BiocManager':
##
## version
A_acc_num <- c('NP_005027', 'NP_035274.2', 'NP_001029208.1', 'XP_038534677.1', 'NP_001028201.1' , ' XP_001136470.1', 'XP_020641438.1', 'NP_001096037.1' , 'XP_040201712.1', 'XP_007425989.2')
B_Uniprot <- c('Q07869', 'Q07869', 'Q5EA13' , 'Q95N78', 'G7N476', 'H2QLW3', 'NA','O42546', 'NA', 'NA')
C_comm_name <- c('Human', 'Mouse' , 'Cow', 'Dog', 'Rhesus Monkey', 'Chimp', 'Bearded Dragon', 'Zebrafish', 'Frog', 'Burmese Python')
C_PDB <- c('3ET1', 'NA', 'NA','NA', 'NA','NA', 'NA','NA', 'NA' , 'NA' )
D_species <- c('H. sapiens', 'M. musculus', 'B. taurus', 'C. lupus', 'M. mulatta','P. troglodytes', 'P. vitticeps' , 'D. rerio','R.tempoaria ','P. bivittatus')
PPARAdf<-data.frame(A_acc_num, B_Uniprot, C_comm_name, C_PDB, D_species)
PPARAdf
## A_acc_num B_Uniprot C_comm_name C_PDB D_species
## 1 NP_005027 Q07869 Human 3ET1 H. sapiens
## 2 NP_035274.2 Q07869 Mouse NA M. musculus
## 3 NP_001029208.1 Q5EA13 Cow NA B. taurus
## 4 XP_038534677.1 Q95N78 Dog NA C. lupus
## 5 NP_001028201.1 G7N476 Rhesus Monkey NA M. mulatta
## 6 XP_001136470.1 H2QLW3 Chimp NA P. troglodytes
## 7 XP_020641438.1 NA Bearded Dragon NA P. vitticeps
## 8 NP_001096037.1 O42546 Zebrafish NA D. rerio
## 9 XP_040201712.1 NA Frog NA R.tempoaria
## 10 XP_007425989.2 NA Burmese Python NA P. bivittatus
entrez_fetch_list <- function(db, id, rettype, ...){
n.seq <- length(id)
list.output <- as.list(rep(NA, n.seq))
names(list.output) <- id
for(i in 1:length(id)){
list.output[[i]] <- rentrez::entrez_fetch(db = db,
id = id[i],
rettype = rettype)
}
return(list.output)
}
seq_list <- entrez_fetch_list(db = 'protein', id = A_acc_num , rettype = 'fasta' )
for(i in 1:length(seq_list)){
seq_list[[i]] <- fasta_cleaner(seq_list[[i]], parse = F)
}
PPARA_vec <- rep(NA, length(seq_list))
for(i in 1:length(PPARA_vec)){
PPARA_vec[i] <- seq_list[[i]]
}
# name the vector
names(PPARA_vec) <- names(seq_list)
PPARA_human_draw <- drawProteins::get_features('Q07869')
## [1] "Download has worked"
draw_df <- drawProteins::feature_to_dataframe(PPARA_human_draw)
my_canvas <- drawProteins:: draw_canvas(draw_df)
my_canvas <- drawProteins::draw_chains(my_canvas, draw_df,
label_size = 2.5)
my_canvas <- drawProteins::draw_domains(my_canvas, draw_df)
my_canvas
my_canvas <- drawProteins:: draw_canvas(draw_df)
my_canvas <- drawProteins::draw_chains(my_canvas, draw_df,
label_size = 2.5)
my_canvas <- drawProteins::draw_regions(my_canvas, draw_df)
my_canvas
my_canvas <- drawProteins:: draw_canvas(draw_df)
my_canvas <- drawProteins::draw_chains(my_canvas, draw_df,
label_size = 2.5)
my_canvas <- drawProteins::draw_folding(my_canvas, draw_df)
my_canvas
PPARA1 <- rentrez::entrez_fetch(id = "NP_005027",
db = "protein",
rettype="fasta")
PPARA2 <- fasta_cleaner(PPARA1)
str(PPARA2)
## chr [1:468] "M" "V" "D" "T" "E" "S" "P" "L" "C" "P" "L" "S" "P" "L" "E" ...
par(mfrow = c(2,2),
mar = c(0,0,2,1))
# plot 1: - Defaults
dotPlot(PPARA2,PPARA2 ,
wsize = 1,
nmatch = 1,
main = "PPARA Defaults")
# plot 2- size = 10, nmatch = 1
dotPlot(PPARA2,PPARA2 ,
wsize = 10,
nmatch = 1,
main = " PPARA- size = 10, nmatch = 1")
# plot 3: - size = 10, nmatch = 5
dotPlot(PPARA2,PPARA2 ,
wsize = 10,
nmatch = 5,
main = " PPARA- size = 10, nmatch = 5")
# plot 4: size = 20, nmatch = 5
dotPlot(PPARA2,PPARA2 ,
wsize = 20,
nmatch = 5,
main = " PPARA- size = 20, nmatch = 5")
dotPlot(PPARA2,
PPARA2,
wsize = 20,
nmatch = 5,
main = "self v. self dotplot")
PFAM: Domain: zf-C4 (nuclear receptor) Domain: Hormone_recep (nuclear receptor) http://pfam.xfam.org/protein/Q07869
DISPROT: not available
REPEATSDB: not available
aa.1.1 <- c("A","R","N","D","C","Q","E","G","H","I",
"L","K","M","F","P","S","T","W","Y","V")
alpha <- c(285, 53, 97, 163, 22, 67, 134, 197, 111, 91,
221, 249, 48, 123, 82, 122, 119, 33, 63, 167)
beta <- c(203, 67, 139, 121, 75, 122, 86, 297, 49, 120,
177, 115, 16, 85, 127, 341, 253, 44, 110, 229)
a.plus.b <- c(175, 78, 120, 111, 74, 74, 86, 171, 33, 93,
110, 112, 25, 52, 71, 126, 117, 30, 108, 123)
a.div.b <- c(361, 146, 183, 244, 63, 114, 257, 377, 107, 239,
339, 321, 91, 158, 188, 327, 238, 72, 130, 378)
data.frame(aa.1.1, alpha, beta, a.plus.b, a.div.b)
## aa.1.1 alpha beta a.plus.b a.div.b
## 1 A 285 203 175 361
## 2 R 53 67 78 146
## 3 N 97 139 120 183
## 4 D 163 121 111 244
## 5 C 22 75 74 63
## 6 Q 67 122 74 114
## 7 E 134 86 86 257
## 8 G 197 297 171 377
## 9 H 111 49 33 107
## 10 I 91 120 93 239
## 11 L 221 177 110 339
## 12 K 249 115 112 321
## 13 M 48 16 25 91
## 14 F 123 85 52 158
## 15 P 82 127 71 188
## 16 S 122 341 126 327
## 17 T 119 253 117 238
## 18 W 33 44 30 72
## 19 Y 63 110 108 130
## 20 V 167 229 123 378
alpha.prop <- alpha/sum(alpha)
beta.prop <- beta/sum(beta)
a.plus.b.prop <- a.plus.b/sum(a.plus.b)
a.div.b <- a.div.b/sum(a.div.b)
aa.prop <- data.frame(alpha.prop,
beta.prop,
a.plus.b.prop,
a.div.b)
row.names(aa.prop) <- aa.1.1
aa.prop
## alpha.prop beta.prop a.plus.b.prop a.div.b
## A 0.116469146 0.073126801 0.09264161 0.08331410
## R 0.021659174 0.024135447 0.04129169 0.03369490
## N 0.039640376 0.050072046 0.06352567 0.04223402
## D 0.066612178 0.043587896 0.05876125 0.05631202
## C 0.008990601 0.027017291 0.03917417 0.01453958
## Q 0.027380466 0.043948127 0.03917417 0.02630972
## E 0.054760932 0.030979827 0.04552673 0.05931225
## G 0.080506743 0.106988473 0.09052409 0.08700669
## H 0.045361667 0.017651297 0.01746956 0.02469421
## I 0.037188394 0.043227666 0.04923240 0.05515809
## L 0.090314671 0.063760807 0.05823187 0.07823679
## K 0.101757254 0.041426513 0.05929063 0.07408262
## M 0.019615856 0.005763689 0.01323452 0.02100162
## F 0.050265631 0.030619597 0.02752779 0.03646434
## P 0.033510421 0.045749280 0.03758602 0.04338795
## S 0.049856968 0.122838617 0.06670196 0.07546734
## T 0.048630977 0.091138329 0.06193753 0.05492730
## W 0.013485901 0.015850144 0.01588142 0.01661666
## Y 0.025745811 0.039625360 0.05717311 0.03000231
## V 0.068246833 0.082492795 0.06511382 0.08723748
plot(aa.prop,panel = panel.smooth)
cor(aa.prop)
## alpha.prop beta.prop a.plus.b.prop a.div.b
## alpha.prop 1.0000000 0.4941143 0.6969508 0.8555289
## beta.prop 0.4941143 1.0000000 0.7977771 0.7706654
## a.plus.b.prop 0.6969508 0.7977771 1.0000000 0.8198043
## a.div.b 0.8555289 0.7706654 0.8198043 1.0000000
round(cor(aa.prop), 3)
## alpha.prop beta.prop a.plus.b.prop a.div.b
## alpha.prop 1.000 0.494 0.697 0.856
## beta.prop 0.494 1.000 0.798 0.771
## a.plus.b.prop 0.697 0.798 1.000 0.820
## a.div.b 0.856 0.771 0.820 1.000
par(mfrow = c(1,3), mar = c(4,4,1,0))
plot(alpha.prop ~ beta.prop, data = aa.prop)
plot(alpha.prop ~ a.plus.b.prop, data = aa.prop)
plot(alpha.prop ~ a.div.b, data = aa.prop)
par(mfrow = c(1,1), mar = c(4,4,4,4))
#functions
table_to_vector <- function(table_x){
table_names <- attr(table_x, "dimnames")[[1]]
table_vect <- as.vector(table_x)
names(table_vect) <- table_names
return(table_vect)
}
chou_cor <- function(x,y){
numerator <- sum(x*y)
denominator <- sqrt((sum(x^2))*(sum(y^2)))
result <- numerator/denominator
return(result)
}
chou_cosine <- function(z.1, z.2){
z.1.abs <- sqrt(sum(z.1^2))
z.2.abs <- sqrt(sum(z.2^2))
my.cosine <- sum(z.1*z.2)/(z.1.abs*z.2.abs)
return(my.cosine)
}
aa.prop$human.aa.freq <- human.aa.freq
^^this is throwing an error that “replacemnt has 19 rows, data has 20” so I put in the code for the rest of the chunks but I don’t know how to fix this.
NP_005027 <- rentrez::entrez_fetch(id = “NP_005027”, db = “protein”, rettype = “fasta”)
NP_005027 <- compbio4all::fasta_cleaner(NP_005027, parse = TRUE)
NP_005027.freq.table <- table(NP_005027)/length(NP_005027)
human.aa.freq <- table_to_vector(NP_005027.freq.table)
aa.names <- names(human.aa.freq) any(aa.names == “U”) #returns False
par(mfrow = c(2,2), mar = c(1,4,1,1)) plot(alpha.prop ~ human.aa.freq, data = aa.prop) plot(beta.prop ~ human.aa.freq, data = aa.prop) plot(a.plus.b.prop ~ human.aa.freq, data = aa.prop) plot(a.div.b ~ human.aa.freq, data = aa.prop)
PPARA1 <- rentrez::entrez_fetch(id = “NP_005027”, db = “protein”, rettype=“fasta”)
A.PPARA <- str(fasta_cleaner(PPARA1)) B.PPARA <- str(fasta_cleaner(PPARA1))
corr.alpha <- chou_cor(aa.prop[,5], aa.prop[,1]) corr.beta <- chou_cor(aa.prop[,5], aa.prop[,2]) corr.apb <- chou_cor(aa.prop[,5], aa.prop[,3]) corr.adb <- chou_cor(aa.prop[,5], aa.prop[,4])
cos.alpha <- chou_cosine(aa.prop[,5], aa.prop[,1]) cos.beta <- chou_cosine(aa.prop[,5], aa.prop[,2]) cos.apb <- chou_cosine(aa.prop[,5], aa.prop[,3]) cos.adb <- chou_cosine(aa.prop[,5], aa.prop[,4])
aa.prop.flipped <- t(aa.prop) round(aa.prop.flipped,2)
dist(aa.prop.flipped, method = “euclidean”)
dist.alpha <- dist((aa.prop.flipped[c(1,5),]), method = “euclidean”) dist.beta <- dist((aa.prop.flipped[c(2,5),]), method = “euclidean”) dist.apb <- dist((aa.prop.flipped[c(3,5),]), method = “euclidean”) dist.adb <- dist((aa.prop.flipped[c(4,5),]), method = “euclidean”)
fold.type <- c(“alpha”,“beta”,“alpha plus beta”, “alpha/beta”)
corr.sim <- round(c(corr.alpha,corr.beta,corr.apb,corr.adb),5) cosine.sim <- round(c(cos.alpha,cos.beta,cos.apb,cos.adb),5) Euclidean.dist <- round(c(dist.alpha,dist.beta,dist.apb,dist.adb),5)
sim.sum <- c("“,”“,”most.sim“,”“) dist.sum <- c(”“,”“,”min.dist“,”")
df <- data.frame(fold.type, corr.sim , cosine.sim , Euclidean.dist , sim.sum , dist.sum )
pander::pander(df) ####
human_PPARA <- PPARA_vec[1]
chimp_PPARA <- PPARA_vec[6]
mouse_PPARA <- PPARA_vec[2]
python_PPARA <- PPARA_vec[10]
data("BLOSUM50")
AlignHC <- Biostrings::pairwiseAlignment(human_PPARA,
chimp_PPARA,
substitutionMatrix = BLOSUM50)
AlignHM <- Biostrings::pairwiseAlignment(human_PPARA,
mouse_PPARA,
substitutionMatrix = BLOSUM50)
AlignHP <- Biostrings::pairwiseAlignment(human_PPARA,
python_PPARA,
substitutionMatrix = BLOSUM50)
AlignCM <- Biostrings::pairwiseAlignment(chimp_PPARA,
mouse_PPARA,
substitutionMatrix = BLOSUM50)
AlignCP <- Biostrings::pairwiseAlignment(chimp_PPARA,
python_PPARA,
substitutionMatrix = BLOSUM50)
AlignMP <- Biostrings::pairwiseAlignment(mouse_PPARA,
python_PPARA,
substitutionMatrix = BLOSUM50)
pids<- c(1, 'NA', 'NA', 'NA',
pid(AlignHC), 1, 'NA', 'NA',
pid(AlignHM), pid(AlignCM), 1, 'NA',
pid(AlignHP), pid(AlignCP), pid(AlignMP), 1)
pid_matrix <- matrix(pids, nrow = 4, byrow = TRUE)
row.names(pid_matrix) <- c('human', 'chimp', 'mouse' ,'python')
colnames(pid_matrix) <- c('human', 'chimp', 'mouse', 'python')
pid_matrix
## human chimp mouse python
## human "1" "NA" "NA" "NA"
## chimp "99.7863247863248" "1" "NA" "NA"
## mouse "92.3076923076923" "92.0940170940171" "1" "NA"
## python "80.982905982906" "80.7692307692308" "77.5641025641026" "1"
method <- c('PID1','PID2','PID3','PID4')
PID <- c(pid(AlignHC, type = "PID1"), pid(AlignHC, type = "PID2"), pid(AlignHC, type = "PID3"), pid(AlignHC, type = "PID4"))
denominator <- c('aligned positions and internal gap positions', 'aligned positions','length shorter sequence','average length of the two sequences')
pid_table<-data.frame(method, PID, denominator)
pid_table
## method PID denominator
## 1 PID1 99.78632 aligned positions and internal gap positions
## 2 PID2 100.00000 aligned positions
## 3 PID3 100.00000 length shorter sequence
## 4 PID4 99.89305 average length of the two sequences
PPARA_vector_ss <- Biostrings::AAStringSet(PPARA_vec)
PPARA_align <- msa(PPARA_vector_ss,
method = "ClustalW")
## use default substitution matrix
PPARA_align
## CLUSTAL 2.1
##
## Call:
## msa(PPARA_vector_ss, method = "ClustalW")
##
## MsaAAMultipleAlignment with 10 rows and 488 columns
## aln names
## [1] -----MLTDIPHLTKMVDTESQLCL...VQIIKKTESDAHLHPLLQEIYKDMY XP_020641438.1
## [2] -------------------------...VQIIKKTESDAHLHPLLQEIYRDMY XP_007425989.2
## [3] ---------------MVDTESPLCP...VQIIKKTESDAALHPLLQEIYRDMY NP_005027
## [4] ---------------MVDTESPLCP...VQIIKK-ESDAALHPLLQEIYRDMY XP_001136470.1
## [5] ---------------MVDTESPLCP...VQIIKT-ESDAALHPLLQEIYRDMY NP_001028201.1
## [6] -------------MEMVDTESPIGP...VQVIKKTESDAALHPLLQEIYRDMY NP_001029208.1
## [7] MVVGAQLHFQNHPVKMVDTESPICP...VQVIKKTESDAALHPLLQEIYRDMY XP_038534677.1
## [8] ---------------MVDTESPICP...VQVIKKTESDAALHPLLQEIYRDMY NP_035274.2
## [9] -----------MSTIKVDPDVEFCS...VQTIKKTETDAALHPLLQEIYRDMY XP_040201712.1
## [10] ---------------MVDMENRYRP...IQEIKKTEDTS-LHPLLQEIYRDMY NP_001096037.1
## Con ---------------MVDTESP?CP...VQIIKKTESDAALHPLLQEIYRDMY Consensus
class(PPARA_align) <- "AAMultipleAlignment"
PPARA_align_seqinr <- msaConvert(PPARA_align,
type = "seqinr::alignment")
compbio4all::print_msa(alignment = PPARA_align_seqinr,
chunksize = 60)
## [1] "-----MLTDIPHLTKMVDTESQLCLLVPLKEEDFGSPLSEDFLQDMESIKELSQSISGDS 0"
## [1] "---------------------------------------------MESMTELSPSISAES 0"
## [1] "---------------MVDTESPLCPLSPLEAGDLESPLSEEFLQEMGNIQEISQSIGEDS 0"
## [1] "---------------MVDTESPLCPLSPLEAGDLESPLSEEFLQEMGNIQEISQSIGEDS 0"
## [1] "---------------MVDTESPLCPLSPLEAGDLESPLSEEFLQEMGNIQEISQSIGEDS 0"
## [1] "-------------MEMVDTESPIGPLSPLEADDLESPLSADFLQEMGTIQEISQSIGEDS 0"
## [1] "MVVGAQLHFQNHPVKMVDTESPICPLSPLEADDLESPLSEEFLQEMGNIQEISQSIGEDS 0"
## [1] "---------------MVDTESPICPLSPLEADDLESPLSEEFLQEMGNIQEISQSIGEES 0"
## [1] "-----------MSTIKVDPDVEFCSIAPLEDDDLGSPLPKEFFTDLGDIQDISHNIGDDG 0"
## [1] "---------------MVDMENRYRPPSPLDDSVLDSAL---FVRGMEELRDISQSMDEDA 0"
## [1] " "
## [1] "SGALSIADFQSLGNGPGSDGSIIT---DSLSPASSPSSVNLAIAPSSKDELPNATLNIEC 0"
## [1] "SGVL--ADFQSLANG--SDGSVHT---DSLSPASSPSSVNLAALPNSKDELPSVVLNIEC 0"
## [1] "SGSFGFTEYQYLGSCPGSDGSVIT---DTLSPASSPSSVTYPVVPGSVDESPSGALNIEC 0"
## [1] "SGSFGFTEYQYLGSCPGSDGSVIT---DTLSPASSPSSVTYPVVPGSVDESPSGALNIEC 0"
## [1] "SGSFSFTEYQYLGSCPGSDGSVIT---DTLSPASSPSSVTYPVVPGSVDESPSGALNIEC 0"
## [1] "SGSFSFTEYQYLGSGPGSDGSVIT---DTLSPASSPSSVSYPAVPGSAEESSSIALNIEC 0"
## [1] "SGSFSFTEYQYLGSGPGSDGSVIT---DTLSPAPSPSSVTHPAAPGGAEEPSSVALNIEC 0"
## [1] "SGSFGFADYQYLGSCPGSEGSVIT---DTLSPASSPSSVSCPVIPASTDESPGSALNIEC 0"
## [1] "SNSFGVAEYQYLGNSPGSNGSISTDQTDTLSPASSPSSITFPAALSGTED-PSKSLNIEC 0"
## [1] "LSSFEMTENQS-GLGSGSESSTEL---DALTPASSPSSGVYG-CPVGQDEFTSTSLNLEC 0"
## [1] " "
## [1] "RICGDKASGYHYGVHACEGCKGFFRRTIRLKLDYDRCDRNCKIQKKNRNKCQYCRFQKCL 0"
## [1] "RICGDKASGYHYGVHACEGCKGFFRRTIRLKLVYDRCDRNCKIQKKNRNKCQYCRFQKCL 0"
## [1] "RICGDKASGYHYGVHACEGCKGFFRRTIRLKLVYDKCDRSCKIQKKNRNKCQYCRFHKCL 0"
## [1] "RICGDKASGYHYGVHACEGCKGFFRRTIRLKLVYDKCDRSCKIQKKNRNKCQYCRFHKCL 0"
## [1] "RICGDKASGYHYGVHACEGCKGFFRRTIRLKLVYDKCDRSCKIQKKNRNKCQYCRFHKCL 0"
## [1] "RICGDKASGYHYGVHACEGCKGFFRRTIRLKLVYDKCDRSCKIQKKNRNKCQYCRFHKCL 0"
## [1] "RICGDRASGYHYGVHACEGCKGFFRRTIRLKLAYDKCDRSCKIQKKNRNKCQYCRFHKCL 0"
## [1] "RICGDKASGYHYGVHACEGCKGFFRRTIRLKLVYDKCDRSCKIQKKNRNKCQYCRFHKCL 0"
## [1] "RVCGDKASGFHYGVHACEGCKGFFRRTIRLKLVYDRCERLCKIQKKNRNKCQYCRFEKCL 0"
## [1] "RVCSDRASGYHYGVHACEGCKGFFRRTIRLKLEYDKCERRCKIQKKNRNKCQYCRFQKCL 0"
## [1] " "
## [1] "SVGMSHNAIRFGRMPRSEKAKLKAEILISEHCIENSEIADLKSLAKRIYEAYLKNFNMNK 0"
## [1] "SVGMSHNAIRFGRMPRSEKAKLKAEIQTGELCIENSEVADLKSLAKRIYEAYLKNFNMTK 0"
## [1] "SVGMSHNAIRFGRMPRSEKAKLKAEILTCEHDIEDSETADLKSLAKRIYEAYLKNFNMNK 0"
## [1] "SVGMSHNAIRFGRMPRSEKAKLKAEILTCEHDIEDSETADLKSLAKRIYEAYLKNFNMNK 0"
## [1] "SVGMSHNAIRFGRMPRSEKAKLKAEILTCEHDIEDSETADLKSLAKRIYEAYLKNFNMNK 0"
## [1] "SVGMSHNAIRFGRMPRSEKAKLKAEILTCEHDLEDSETADLKSLAKRIYEAYLKNFNMNK 0"
## [1] "SVGMSHNAIRFGRMPRSEKAKLKAEILTCEQDPEDAETADLKSLAKRIYEAYLKNFNMNK 0"
## [1] "SVGMSHNAIRFGRMPRSEKAKLKAEILTCEHDLKDSETADLKSLGKRIHEAYLKNFNMNK 0"
## [1] "SVGMSHNAIRFGRMPRSEKAKLTAEILTSEQDIKDSQMADLLSLAKLLYDAYQKNFNMNK 0"
## [1] "SVGMSHNAIRFGRMPQSEKLRLKAEILTGERDVED----DQKTLAKQIYEAYVKNFNMNK 0"
## [1] " "
## [1] "VKARLILAGKTNNNPPFVIHDMDTLCMAEKTLVAKLVAN--GIQNKEAEVRIFHCCQCTS 0"
## [1] "LKARLILVGKASNNPPFVIHDMDTLCMAEKTLVAKLVGN--GIQNKEAEVRIFHCCQCTS 0"
## [1] "VKARVILSGKASNNPPFVIHDMETLCMAEKTLVAKLVAN--GIQNKEAEVRIFHCCQCTS 0"
## [1] "VKARVILSGKASNNPPFVIHDMETLCMAEKTLVAKLVAN--GIQNKEAEVRIFHCCQCTS 0"
## [1] "VKARVILSGKASNNPPFVIHDMETLCMAEKTLVAKLVAN--GIQNKEAEVRIFHCCQCTS 0"
## [1] "IKARVILAGKTNNNPPFVIHDMETLCMAEKTLVAKLVAN--GIQNKEAEVRIFHCCQCTS 0"
## [1] "VKARVILAGKASNNPPFVIHDMETLCMAEKTLVAKLVAN--GIQNKEAEVRIFHCCQCTS 0"
## [1] "VKARVILAGKTSNNPPFVIHDMETLCMAEKTLVAKMVAN--GVEDKEAEVRFFHCCQCMS 0"
## [1] "LKARGILTGKGSN-PPFVIHDMETLCMAEKTLVAKLVAN--GIQNKEAEVRIFHCCQCTS 0"
## [1] "SKARTILTGKTST-PPFVIHDMETLQLAEQTFVAKMMGSCGGLLNKDPEVRIFHCCQCTS 0"
## [1] " "
## [1] "VEAVTELTEFAKSIPGFSNLDLNDQVTLLKYGVYEAMFAMLASVMNKDGMLVAYGNGFIT 0"
## [1] "VEAVTELTEFAKSIPGFCSLDLNDQVTLLKYGVYEAIFAMLASMMNKDGMLVAYGNGFIT 0"
## [1] "VETVTELTEFAKAIPGFANLDLNDQVTLLKYGVYEAIFAMLSSVMNKDGMLVAYGNGFIT 0"
## [1] "VETVTELTEFAKAIPGFANLDLNDQVTLLKYGVYEAIFAMLSSVMNKDGMLVAYGNGFIT 0"
## [1] "VETVTELTEFAKAIPGFANLDLNDQVTLLKYGVYEAIFAMLSSVMNKDGMLVAYGNGFIT 0"
## [1] "VETVTELTEFAKSIPGFANLDLNDQVTLLKYGVYEAIFAMLSSVMNKDGMLVAYGNGFIT 0"
## [1] "VETVTELTEFAKSIPGFANLDLNDQVTLLKYGVYEAIFAMLSSVMNKDGMLVAYGNGFIT 0"
## [1] "VETVTELTEFAKAIPGFANLDLNDQVTLLKYGVYEAIFTMLSSLMNKDGMLIAYGNGFIT 0"
## [1] "VETVTELTEFAKSIPGFTELDLNDQVTLLKYGVYEAMFAMLASVMNKDGMLVAYGNGFIT 0"
## [1] "VETVTELTEFAKSVPGFSNLDLNDQVTLLKYGVHEALFAMLASCMNKDGLLVAYGSGFIT 0"
## [1] " "
## [1] "REFLKSLRKPFCDIMEPKFDFAMKFNSLELDDSDISLFVAAIICCGDRPGLVNIGHIEKM 0"
## [1] "REFLKSLRKPFCDIMEPKFDFAMKFNALELDDSDISLFVAAIICCGDRPGLVNIGHIEKM 0"
## [1] "REFLKSLRKPFCDIMEPKFDFAMKFNALELDDSDISLFVAAIICCGDRPGLLNVGHIEKM 0"
## [1] "REFLKSLRKPFCDIMEPKFDFAMKFNALELDDSDISLFVAAIICCGDRPGLLNVGHIEKM 0"
## [1] "REFLKSLRKPFCDIMEPKFDFAMKFNALELDDSDISLFVAAIICCGDRPGLLNVGHIEKM 0"
## [1] "REFLKSLRKPFCDIMEPKFDFAMKFNALELDDSDISLFVAAIICCGDRPGLLNVGHIEKM 0"
## [1] "REFLKSLRKPFCDIMEPKFDFAMKFNALELDDSDISLFVAAIICCGDRPGLLNVGHIEKM 0"
## [1] "REFLKNLRKPFCDIMEPKFDFAMKFNALELDDSDISLFVAAIICCGDRPGLLNIGYIEKL 0"
## [1] "REFLKSLRKPVGDMMEPKFEFAMKFNALELDDSDIALFVAALICCGDRPGLLNVPSIERM 0"
## [1] "REFLKSLRRPFSDMMEPKFQFAMKFNSLELDDSDLALFVAAIICCGDRPGLVNVPHIERM 0"
## [1] " "
## [1] "QESIVHVLKLHLQSNHPDDIFLFPKLLQKMADLRQLVTEHAQLVQIIKKTESDAHLHPLL 0"
## [1] "QESIVHVLKLHLESNHPDDIFLFPKLLQKLADLRQLVTEHAQLVQIIKKTESDAHLHPLL 0"
## [1] "QEGIVHVLRLHLQSNHPDDIFLFPKLLQKMADLRQLVTEHAQLVQIIKKTESDAALHPLL 0"
## [1] "QEGIVHVLRLHLQSNHPDDIFLFPKLLQKMADLRQLVTEHAQLVQIIKK-ESDAALHPLL 0"
## [1] "QEGIVHVLRLHLQSNHPDDIFLFPKLLQKMADLRQLVTEHAQLVQIIKT-ESDAALHPLL 0"
## [1] "QEGIVHVLKLHLQNNHPDDVFLFPKLLQKMADLRQLVTEHAQLVQVIKKTESDAALHPLL 0"
## [1] "QEGIVHVLKLHLQTNHPDNIFLFPKLLQKMADLRQLVTEHAQLVQVIKKTESDAALHPLL 0"
## [1] "QEGIVHVLKLHLQSNHPDDTFLFPKLLQKMVDLRQLVTEHAQLVQVIKKTESDAALHPLL 0"
## [1] "QENIVHVLKLHLQSNHPDDGFLFPKLLQKMADLRQLVTEHALLVQTIKKTETDAALHPLL 0"
## [1] "QESIVNVLHLHLKSNHPDHGFLFPKLLQKLVDLRQLVTEHAQLIQEIKKTEDTS-LHPLL 0"
## [1] " "
## [1] "QEIYKDMY 52"
## [1] "QEIYRDMY 52"
## [1] "QEIYRDMY 52"
## [1] "QEIYRDMY 52"
## [1] "QEIYRDMY 52"
## [1] "QEIYRDMY 52"
## [1] "QEIYRDMY 52"
## [1] "QEIYRDMY 52"
## [1] "QEIYRDMY 52"
## [1] "QEIYRDMY 52"
## [1] " "
ggmsa::ggmsa(PPARA_align,
start = 350,
end = 450)
PPARA_vector_ss[A_acc_num]
## AAStringSet object of length 10:
## width seq names
## [1] 468 MVDTESPLCPLSPLEAGDLESPL...IKKTESDAALHPLLQEIYRDMY NP_005027
## [2] 468 MVDTESPICPLSPLEADDLESPL...IKKTESDAALHPLLQEIYRDMY NP_035274.2
## [3] 470 MEMVDTESPIGPLSPLEADDLES...IKKTESDAALHPLLQEIYRDMY NP_001029208.1
## [4] 483 MVVGAQLHFQNHPVKMVDTESPI...IKKTESDAALHPLLQEIYRDMY XP_038534677.1
## [5] 467 MVDTESPLCPLSPLEAGDLESPL...IIKTESDAALHPLLQEIYRDMY NP_001028201.1
## [6] 467 MVDTESPLCPLSPLEAGDLESPL...IIKKESDAALHPLLQEIYRDMY XP_001136470.1
## [7] 478 MLTDIPHLTKMVDTESQLCLLVP...IKKTESDAHLHPLLQEIYKDMY XP_020641438.1
## [8] 459 MVDMENRYRPPSPLDDSVLDSAL...EIKKTEDTSLHPLLQEIYRDMY NP_001096037.1
## [9] 473 MSTIKVDPDVEFCSIAPLEDDDL...IKKTETDAALHPLLQEIYRDMY XP_040201712.1
## [10] 434 MESMTELSPSISAESSGVLADFQ...IKKTESDAHLHPLLQEIYRDMY XP_007425989.2
PPARA_align_dm <- msa(PPARA_vector_ss,
method = "ClustalW")
## use default substitution matrix
class(PPARA_align_dm) <- "AAMultipleAlignment"
PPARA_align_dm_seqinr <- msaConvert(PPARA_align, type = "seqinr::alignment")
PPARA_dist <- seqinr::dist.alignment(PPARA_align_dm_seqinr,
matrix = "identity")
PPARA_dist
## XP_020641438.1 XP_007425989.2 NP_005027 XP_001136470.1
## XP_007425989.2 0.2839809
## NP_005027 0.3726780 0.3655688
## XP_001136470.1 0.3730768 0.3659907 0.0000000
## NP_001028201.1 0.3730768 0.3691323 0.0654420 0.0654420
## NP_001029208.1 0.3831560 0.3779645 0.2264554 0.2266977
## XP_038534677.1 0.4039553 0.3840123 0.2311251 0.2313724
## NP_035274.2 0.4160251 0.4129248 0.2773501 0.2776469
## XP_040201712.1 0.4704005 0.4487637 0.4270871 0.4275461
## NP_001096037.1 0.5250821 0.5011723 0.5059817 0.5065362
## NP_001028201.1 NP_001029208.1 XP_038534677.1 NP_035274.2
## XP_007425989.2
## NP_005027
## XP_001136470.1
## NP_001028201.1
## NP_001029208.1 0.2266977
## XP_038534677.1 0.2313724 0.2259731
## NP_035274.2 0.2852551 0.2959845 0.3135133
## XP_040201712.1 0.4325468 0.4385290 0.4375950 0.4632411
## NP_001096037.1 0.5086963 0.5059817 0.5059817 0.5333516
## XP_040201712.1
## XP_007425989.2
## NP_005027
## XP_001136470.1
## NP_001028201.1
## NP_001029208.1
## XP_038534677.1
## NP_035274.2
## XP_040201712.1
## NP_001096037.1 0.5277393
# I re-ran this code and tried to change the row and col names and now it is giving me very low numbers, previously they were in the .70-.99 range and now they are significantly lower. I played with it but cannot figure out where it got messed up
# Note - not using rounded values
tree <- nj(PPARA_dist)
plot.phylo(tree, main="Phylogenetic Tree",
use.edge.length = T)
mtext(text = "PPARA Protein tree - rooted, with branch lenths")