LAG3 (Lymphocyte activation gene 3 protein) functions as an inhibitory receptor on antigen activated T-cells. It delivers inhibitory signals when bound to ligands.
Human protein: https://www.ncbi.nlm.nih.gov/search/all/?term=NP_002277.4 Homologene: https://www.ncbi.nlm.nih.gov/homologene/?term=lag3 Uniprot: https://www.uniprot.org/uniprot/P18627 PDB: https://www.rcsb.org
#install.packages("rentrez")
#install.packages("ape")
#install.packages("seqinr")
#install.packages("BiocManager")
##BiocManager::install("msa")
#BiocManager::install("Biostrings")
#BiocManager::install("drawProteins")
#BiocManager::install("ggplot2")
#library(devtools)
#devtools::install_github("brouwern/combio4all")
#devtools::install_github("YuLab-SMU/ggmsa")
library(compbio4all)
library(ggmsa)
## Registered S3 methods overwritten by 'ggalt':
## method from
## grid.draw.absoluteGrob ggplot2
## grobHeight.absoluteGrob ggplot2
## grobWidth.absoluteGrob ggplot2
## grobX.absoluteGrob ggplot2
## grobY.absoluteGrob ggplot2
library(rentrez)
library(seqinr)
library(ape)
##
## Attaching package: 'ape'
## The following objects are masked from 'package:seqinr':
##
## as.alignment, consensus
library(pander)
library(ggplot2)
library(msa)
## Loading required package: Biostrings
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: XVector
## Loading required package: GenomeInfoDb
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:ape':
##
## complement
## The following object is masked from 'package:seqinr':
##
## translate
## The following object is masked from 'package:base':
##
## strsplit
library(drawProteins)
library(Biostrings)
common_name <- c('human', 'chimp', 'bonobo', 'gorilla', 'oranutan', 'mouse', 'tree frog', 'chicken', 'mink', 'pigeon')
scientific_name <- c('Homo sapien', 'Pan troglodytes', 'Pan paniscus', 'Gorilla gorilla', 'Pongo abelii', 'Mus musculus', 'Xenopus tropicalis', 'Gallus gallus', 'Neogale vison', 'Columba livia')
refseq <- c('NP_002277.4', 'XP_508966.3', 'XP_003820331.2', 'XP_004052634.1', 'XP_002822880.2', 'NP_032505.1', 'XP_004915830.2', 'XP_024998797', 'XP_044082922.1', 'XP_005506340.2')
uniprot <- c('P18627', 'A0A2J8JEL7', 'A0A2R8ZS97', 'G3SDW3.1', 'A0A2J8TBE9', 'Q61790', 'UPI00064CE453', 'UPI001AE9E5F3', NA, 'A0A2I0ML95' )
PDB <- c(NA, NA, NA, NA, NA, '6WKM', NA, NA, NA, NA)
gene <- c("LAG3", "LAG3", "LAG3", "LAG3", "LAG3", "LAG3", "LAG3", "LAG3", "LAG3", "LAG3")
LAG3_table <- data.frame(common_name, scientific_name, refseq, uniprot, PDB, gene)
is(LAG3_table)
## [1] "data.frame" "list" "oldClass" "vector"
## [5] "list_OR_List" "vector_OR_Vector" "vector_OR_factor"
pander::pander(LAG3_table)
| common_name | scientific_name | refseq | uniprot | PDB | gene |
|---|---|---|---|---|---|
| human | Homo sapien | NP_002277.4 | P18627 | NA | LAG3 |
| chimp | Pan troglodytes | XP_508966.3 | A0A2J8JEL7 | NA | LAG3 |
| bonobo | Pan paniscus | XP_003820331.2 | A0A2R8ZS97 | NA | LAG3 |
| gorilla | Gorilla gorilla | XP_004052634.1 | G3SDW3.1 | NA | LAG3 |
| oranutan | Pongo abelii | XP_002822880.2 | A0A2J8TBE9 | NA | LAG3 |
| mouse | Mus musculus | NP_032505.1 | Q61790 | 6WKM | LAG3 |
| tree frog | Xenopus tropicalis | XP_004915830.2 | UPI00064CE453 | NA | LAG3 |
| chicken | Gallus gallus | XP_024998797 | UPI001AE9E5F3 | NA | LAG3 |
| mink | Neogale vison | XP_044082922.1 | NA | NA | LAG3 |
| pigeon | Columba livia | XP_005506340.2 | A0A2I0ML95 | NA | LAG3 |
LAG3_list <- entrez_fetch_list(db = "protein",
id = LAG3_table$refseq,
rettype = "fasta")
length(LAG3_list)
## [1] 10
for(i in 1:length(LAG3_list)){
LAG3_list[[i]] <- fasta_cleaner(LAG3_list[[i]], parse = FALSE)
}
LAG3_vector <- rep(NA, length(LAG3_list))
for(i in 1:length(LAG3_vector)){
LAG3_vector[i] <- LAG3_list[[i]]
}
names(LAG3_vector) <- names(LAG3_list)
LAG3_vector_ss <- Biostrings::AAStringSet(LAG3_vector)
human_protein <- drawProteins::get_features(LAG3_table$uniprot[1])
## [1] "Download has worked"
hum_protein_df <- drawProteins::feature_to_dataframe(human_protein)
## Warning in drawProteins::extract_feat_acc(features_in_lists_of_six[[i]]): NAs
## introduced by coercion
is(human_protein)
## [1] "list" "vector" "list_OR_List" "vector_OR_Vector"
## [5] "vector_OR_factor"
is(hum_protein_df)
## [1] "data.frame" "list" "oldClass" "vector"
## [5] "list_OR_List" "vector_OR_Vector" "vector_OR_factor"
hum_protein_df[,-2]
## type begin end length accession entryName taxid order
## featuresTemp SIGNAL 1 22 21 P18627 LAG3_HUMAN 9606 1
## featuresTemp.1 CHAIN 23 525 502 P18627 LAG3_HUMAN 9606 1
## featuresTemp.2 CHAIN 23 NA NA P18627 LAG3_HUMAN 9606 1
## featuresTemp.3 TOPO_DOM 23 450 427 P18627 LAG3_HUMAN 9606 1
## featuresTemp.4 TRANSMEM 451 471 20 P18627 LAG3_HUMAN 9606 1
## featuresTemp.5 TOPO_DOM 472 525 53 P18627 LAG3_HUMAN 9606 1
## featuresTemp.6 DOMAIN 37 167 130 P18627 LAG3_HUMAN 9606 1
## featuresTemp.7 DOMAIN 168 252 84 P18627 LAG3_HUMAN 9606 1
## featuresTemp.8 DOMAIN 265 343 78 P18627 LAG3_HUMAN 9606 1
## featuresTemp.9 DOMAIN 348 419 71 P18627 LAG3_HUMAN 9606 1
## featuresTemp.10 REGION 37 252 215 P18627 LAG3_HUMAN 9606 1
## featuresTemp.11 REGION 62 97 35 P18627 LAG3_HUMAN 9606 1
## featuresTemp.12 REGION 429 450 21 P18627 LAG3_HUMAN 9606 1
## featuresTemp.13 REGION 487 525 38 P18627 LAG3_HUMAN 9606 1
## featuresTemp.14 REGION 501 524 23 P18627 LAG3_HUMAN 9606 1
## featuresTemp.15 MOTIF 498 503 5 P18627 LAG3_HUMAN 9606 1
## featuresTemp.16 COMPBIAS 72 91 19 P18627 LAG3_HUMAN 9606 1
## featuresTemp.17 COMPBIAS 504 525 21 P18627 LAG3_HUMAN 9606 1
## featuresTemp.18 CARBOHYD 188 188 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.19 CARBOHYD 250 250 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.20 CARBOHYD 256 256 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.21 CARBOHYD 343 343 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.22 DISULFID 44 160 116 P18627 LAG3_HUMAN 9606 1
## featuresTemp.23 DISULFID 189 241 52 P18627 LAG3_HUMAN 9606 1
## featuresTemp.24 DISULFID 282 333 51 P18627 LAG3_HUMAN 9606 1
## featuresTemp.25 DISULFID 369 412 43 P18627 LAG3_HUMAN 9606 1
## featuresTemp.26 VAR_SEQ 353 360 7 P18627 LAG3_HUMAN 9606 1
## featuresTemp.27 VAR_SEQ 361 525 164 P18627 LAG3_HUMAN 9606 1
## featuresTemp.28 VARIANT 455 455 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.29 MUTAGEN 35 35 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.30 MUTAGEN 52 52 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.31 MUTAGEN 78 78 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.32 MUTAGEN 78 78 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.33 MUTAGEN 85 85 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.34 MUTAGEN 95 95 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.35 MUTAGEN 97 97 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.36 MUTAGEN 98 98 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.37 MUTAGEN 99 99 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.38 MUTAGEN 110 110 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.39 MUTAGEN 125 125 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.40 MUTAGEN 129 129 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.41 MUTAGEN 131 131 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.42 MUTAGEN 137 137 0 P18627 LAG3_HUMAN 9606 1
## featuresTemp.43 MUTAGEN 155 156 1 P18627 LAG3_HUMAN 9606 1
## featuresTemp.44 MUTAGEN 247 247 0 P18627 LAG3_HUMAN 9606 1
canvas <- draw_canvas(hum_protein_df)
canvas <- draw_chains(canvas, hum_protein_df,
label_size = 2.5)
canvas <- draw_domains(canvas, hum_protein_df, label_size = 1.5)
canvas
## Warning: Removed 1 rows containing missing values (geom_rect).
LAG3_chr <- entrez_fetch_list(db = "protein",
id = LAG3_table$refseq,
rettype = "fasta")
length(LAG3_chr)
## [1] 10
LAG3_chr[1]
## $NP_002277.4
## [1] ">NP_002277.4 lymphocyte activation gene 3 protein precursor [Homo sapiens]\nMWEAQFLGLLFLQPLWVAPVKPLQPGAEVPVVWAQEGAPAQLPCSPTIPLQDLSLLRRAGVTWQHQPDSG\nPPAAAPGHPLAPGPHPAAPSSWGPRPRRYTVLSVGPGGLRSGRLPLQPRVQLDERGRQRGDFSLWLRPAR\nRADAGEYRAAVHLRDRALSCRLRLRLGQASMTASPPGSLRASDWVILNCSFSRPDRPASVHWFRNRGQGR\nVPVRESPHHHLAESFLFLPQVSPMDSGPWGCILTYRDGFNVSIMYNLTVLGLEPPTPLTVYAGAGSRVGL\nPCRLPAGVGTRSFLTAKWTPPGGGPDLLVTGDNGDFTLRLEDVSQAQAGTYTCHIHLQEQQLNATVTLAI\nITVTPKSFGSPGSLGKLLCEVTPVSGQERFVWSSLDTPSQRSFSGPWLEAQEAQLLSQPWQCQLYQGERL\nLGAAVYFTELSSPGAQRSGRAPGALPAGHLLLFLILGVLSLLLLVTGAFGFHLWRRQWRPRRFSALEQGI\nHPPQAQSKIEELEQEPEPEPEPEPEPEPEPEPEQL\n\n"
for(i in 1:length(LAG3_chr)){
LAG3_chr[[i]] <- fasta_cleaner(LAG3_chr[[i]], parse = TRUE)
}
LAG3_chr[1]
## $NP_002277.4
## [1] "M" "W" "E" "A" "Q" "F" "L" "G" "L" "L" "F" "L" "Q" "P" "L" "W" "V" "A"
## [19] "P" "V" "K" "P" "L" "Q" "P" "G" "A" "E" "V" "P" "V" "V" "W" "A" "Q" "E"
## [37] "G" "A" "P" "A" "Q" "L" "P" "C" "S" "P" "T" "I" "P" "L" "Q" "D" "L" "S"
## [55] "L" "L" "R" "R" "A" "G" "V" "T" "W" "Q" "H" "Q" "P" "D" "S" "G" "P" "P"
## [73] "A" "A" "A" "P" "G" "H" "P" "L" "A" "P" "G" "P" "H" "P" "A" "A" "P" "S"
## [91] "S" "W" "G" "P" "R" "P" "R" "R" "Y" "T" "V" "L" "S" "V" "G" "P" "G" "G"
## [109] "L" "R" "S" "G" "R" "L" "P" "L" "Q" "P" "R" "V" "Q" "L" "D" "E" "R" "G"
## [127] "R" "Q" "R" "G" "D" "F" "S" "L" "W" "L" "R" "P" "A" "R" "R" "A" "D" "A"
## [145] "G" "E" "Y" "R" "A" "A" "V" "H" "L" "R" "D" "R" "A" "L" "S" "C" "R" "L"
## [163] "R" "L" "R" "L" "G" "Q" "A" "S" "M" "T" "A" "S" "P" "P" "G" "S" "L" "R"
## [181] "A" "S" "D" "W" "V" "I" "L" "N" "C" "S" "F" "S" "R" "P" "D" "R" "P" "A"
## [199] "S" "V" "H" "W" "F" "R" "N" "R" "G" "Q" "G" "R" "V" "P" "V" "R" "E" "S"
## [217] "P" "H" "H" "H" "L" "A" "E" "S" "F" "L" "F" "L" "P" "Q" "V" "S" "P" "M"
## [235] "D" "S" "G" "P" "W" "G" "C" "I" "L" "T" "Y" "R" "D" "G" "F" "N" "V" "S"
## [253] "I" "M" "Y" "N" "L" "T" "V" "L" "G" "L" "E" "P" "P" "T" "P" "L" "T" "V"
## [271] "Y" "A" "G" "A" "G" "S" "R" "V" "G" "L" "P" "C" "R" "L" "P" "A" "G" "V"
## [289] "G" "T" "R" "S" "F" "L" "T" "A" "K" "W" "T" "P" "P" "G" "G" "G" "P" "D"
## [307] "L" "L" "V" "T" "G" "D" "N" "G" "D" "F" "T" "L" "R" "L" "E" "D" "V" "S"
## [325] "Q" "A" "Q" "A" "G" "T" "Y" "T" "C" "H" "I" "H" "L" "Q" "E" "Q" "Q" "L"
## [343] "N" "A" "T" "V" "T" "L" "A" "I" "I" "T" "V" "T" "P" "K" "S" "F" "G" "S"
## [361] "P" "G" "S" "L" "G" "K" "L" "L" "C" "E" "V" "T" "P" "V" "S" "G" "Q" "E"
## [379] "R" "F" "V" "W" "S" "S" "L" "D" "T" "P" "S" "Q" "R" "S" "F" "S" "G" "P"
## [397] "W" "L" "E" "A" "Q" "E" "A" "Q" "L" "L" "S" "Q" "P" "W" "Q" "C" "Q" "L"
## [415] "Y" "Q" "G" "E" "R" "L" "L" "G" "A" "A" "V" "Y" "F" "T" "E" "L" "S" "S"
## [433] "P" "G" "A" "Q" "R" "S" "G" "R" "A" "P" "G" "A" "L" "P" "A" "G" "H" "L"
## [451] "L" "L" "F" "L" "I" "L" "G" "V" "L" "S" "L" "L" "L" "L" "V" "T" "G" "A"
## [469] "F" "G" "F" "H" "L" "W" "R" "R" "Q" "W" "R" "P" "R" "R" "F" "S" "A" "L"
## [487] "E" "Q" "G" "I" "H" "P" "P" "Q" "A" "Q" "S" "K" "I" "E" "E" "L" "E" "Q"
## [505] "E" "P" "E" "P" "E" "P" "E" "P" "E" "P" "E" "P" "E" "P" "E" "P" "E" "P"
## [523] "E" "Q" "L"
LAG3_human_chr <- unlist(LAG3_chr[1])
# set up 2 x 2 grid, make margins thinner
par(mfrow = c(2,2),
mar = c(0,0,2,1))
# plot 1: Shroom - Defaults
dotPlot(LAG3_human_chr,
LAG3_human_chr,
wsize = 1,
nmatch = 1,
main = "Shroom Defaults")
# plot 2 Shroom - size = 10, nmatch = 1
dotPlot(LAG3_human_chr,
LAG3_human_chr,
wsize = 10,
nmatch = 1,
main = "Shroom - size = 10, nmatch = 1")
# plot 3: Shroom - size = 10, nmatch = 5
dotPlot(LAG3_human_chr,
LAG3_human_chr,
wsize = 10,
nmatch = 5,
main = "Shroom - size = 10, nmatch = 5")
# plot 4: size = 20, nmatch = 5
dotPlot(LAG3_human_chr,
LAG3_human_chr,
wsize = 20,
nmatch =7,
main = "Shroom - size = 20, nmatch = 7")
# reset par() - run this or other plots will be small!
par(mfrow = c(1,1),
mar = c(4,4,4,4))
dotPlot(LAG3_human_chr,
LAG3_human_chr,
wsize = 20,
nmatch =7,
main = "Shroom - size = 20, nmatch = 7")
Human LAG3 protein information
Pfam: sig_p domain from 1-17, and transmembrane domain from 450-474. There are other regions of low complexity throughout protein. http://pfam.xfam.org/protein/P18627 DisProt: No Information available RepeatDB: No Information available UniProt sub-cellular locations: Plasma Membrane PDB secondary structural location: No Information available Alphafold: https://alphafold.ebi.ac.uk/entry/P18627
ALphafold’s predicted structure for human LAG3 has a mixture of beta-pleated sheets and alpha helices, but PDB’s structure for mouse LAG3 has only beta-pleated sheets.
aa.1 <- c("A","R","N","D","C","Q","E","G","H","I",
"L","K","M","F","P","S","T","W","Y","V")
alpha <- c(285, 53, 97, 163, 22, 67, 134, 197, 111, 91,
221, 249, 48, 123, 82, 122, 119, 33, 63, 167)
beta <- c(203, 67, 139, 121, 75, 122, 86, 297, 49, 120,
177, 115, 16, 85, 127, 341, 253, 44, 110, 229)
a.plus.b <- c(175, 78, 120, 111, 74, 74, 86, 171, 33, 93,
110, 112, 25, 52, 71, 126, 117, 30, 108, 123)
a.div.b <- c(361, 146, 183, 244, 63, 114, 257, 377, 107, 239,
339, 321, 91, 158, 188, 327, 238, 72, 130, 378)
alpha.prop <- alpha/sum(alpha)
beta.prop <- beta/sum(beta)
a.plus.b.prop <- a.plus.b/sum(a.plus.b)
a.div.b <- a.div.b/sum(a.div.b)
aa.prop <- data.frame(alpha.prop,
beta.prop,
a.plus.b.prop,
a.div.b)
#row labels
row.names(aa.prop) <- aa.1
pander(aa.prop)
| Â | alpha.prop | beta.prop | a.plus.b.prop | a.div.b |
|---|---|---|---|---|
| A | 0.1165 | 0.07313 | 0.09264 | 0.08331 |
| R | 0.02166 | 0.02414 | 0.04129 | 0.03369 |
| N | 0.03964 | 0.05007 | 0.06353 | 0.04223 |
| D | 0.06661 | 0.04359 | 0.05876 | 0.05631 |
| C | 0.008991 | 0.02702 | 0.03917 | 0.01454 |
| Q | 0.02738 | 0.04395 | 0.03917 | 0.02631 |
| E | 0.05476 | 0.03098 | 0.04553 | 0.05931 |
| G | 0.08051 | 0.107 | 0.09052 | 0.08701 |
| H | 0.04536 | 0.01765 | 0.01747 | 0.02469 |
| I | 0.03719 | 0.04323 | 0.04923 | 0.05516 |
| L | 0.09031 | 0.06376 | 0.05823 | 0.07824 |
| K | 0.1018 | 0.04143 | 0.05929 | 0.07408 |
| M | 0.01962 | 0.005764 | 0.01323 | 0.021 |
| F | 0.05027 | 0.03062 | 0.02753 | 0.03646 |
| P | 0.03351 | 0.04575 | 0.03759 | 0.04339 |
| S | 0.04986 | 0.1228 | 0.0667 | 0.07547 |
| T | 0.04863 | 0.09114 | 0.06194 | 0.05493 |
| W | 0.01349 | 0.01585 | 0.01588 | 0.01662 |
| Y | 0.02575 | 0.03963 | 0.05717 | 0.03 |
| V | 0.06825 | 0.08249 | 0.06511 | 0.08724 |
human.freq.table <- table(LAG3_human_chr)/length(LAG3_human_chr)
table_to_vector <- function(table_x){
table_names <- attr(table_x, "dimnames")[[1]]
table_vect <- as.vector(table_x)
names(table_vect) <- table_names
return(table_vect)
}
human.aa.freq <- table_to_vector(human.freq.table)
aa.names <- names(human.aa.freq)
any(aa.names == "U")
## [1] FALSE
aa.prop$human.aa.freq <- human.aa.freq
chou_cor <- function(x,y){
numerator <- sum(x*y)
denominator <- sqrt((sum(x^2))*(sum(y^2)))
result <- numerator/denominator
return(result)
}
chou_cosine <- function(z.1, z.2){
z.1.abs <- sqrt(sum(z.1^2))
z.2.abs <- sqrt(sum(z.2^2))
my.cosine <- sum(z.1*z.2)/(z.1.abs*z.2.abs)
return(my.cosine)
}
par(mfrow = c(2,2), mar = c(1,4,1,1))
plot(alpha.prop ~ human.aa.freq, data = aa.prop)
plot(beta.prop ~ human.aa.freq, data = aa.prop)
plot(a.plus.b.prop ~ human.aa.freq, data = aa.prop)
plot(a.div.b ~ human.aa.freq, data = aa.prop)
par(mfrow = c(1,1), mar = c(1,1,1,1))
corr.alpha <- chou_cor(aa.prop[,5], aa.prop[,1])
corr.beta <- chou_cor(aa.prop[,5], aa.prop[,2])
corr.apb <- chou_cor(aa.prop[,5], aa.prop[,3])
corr.adb <- chou_cor(aa.prop[,5], aa.prop[,4])
cos.alpha <- chou_cosine(aa.prop[,5], aa.prop[,1])
cos.beta <- chou_cosine(aa.prop[,5], aa.prop[,2])
cos.apb <- chou_cosine(aa.prop[,5], aa.prop[,3])
cos.adb <- chou_cosine(aa.prop[,5], aa.prop[,4])
aa.prop.flipped <- t(aa.prop)
round(aa.prop.flipped,2)
## A R N D C Q E G H I L K M
## alpha.prop 0.12 0.02 0.04 0.07 0.01 0.03 0.05 0.08 0.05 0.04 0.09 0.10 0.02
## beta.prop 0.07 0.02 0.05 0.04 0.03 0.04 0.03 0.11 0.02 0.04 0.06 0.04 0.01
## a.plus.b.prop 0.09 0.04 0.06 0.06 0.04 0.04 0.05 0.09 0.02 0.05 0.06 0.06 0.01
## a.div.b 0.08 0.03 0.04 0.06 0.01 0.03 0.06 0.09 0.02 0.06 0.08 0.07 0.02
## human.aa.freq 0.08 0.02 0.03 0.06 0.03 0.09 0.02 0.02 0.01 0.13 0.01 0.01 0.12
## F P S T W Y V
## alpha.prop 0.05 0.03 0.05 0.05 0.01 0.03 0.07
## beta.prop 0.03 0.05 0.12 0.09 0.02 0.04 0.08
## a.plus.b.prop 0.03 0.04 0.07 0.06 0.02 0.06 0.07
## a.div.b 0.04 0.04 0.08 0.05 0.02 0.03 0.09
## human.aa.freq 0.06 0.08 0.08 0.04 0.06 0.03 0.02
dist.alpha <- dist((aa.prop.flipped[c(1,5),]), method = "euclidean")
dist.beta <- dist((aa.prop.flipped[c(2,5),]), method = "euclidean")
dist.apb <- dist((aa.prop.flipped[c(3,5),]), method = "euclidean")
dist.adb <- dist((aa.prop.flipped[c(4,5),]), method = "euclidean")
fold.type <- c("alpha","beta","alpha plus beta", "alpha/beta")
corr.sim <- round(c(corr.alpha,corr.beta,corr.apb,corr.adb),5)
cosine.sim <- round(c(cos.alpha,cos.beta,cos.apb,cos.adb),5)
Euclidean.dist <- round(c(dist.alpha,dist.beta,dist.apb,dist.adb),5)
# summary
sim.sum <- c("","","most.sim","")
dist.sum <- c("","","min.dist","")
df <- data.frame(fold.type,
corr.sim ,
cosine.sim ,
Euclidean.dist ,
sim.sum ,
dist.sum )
pander(df)
| fold.type | corr.sim | cosine.sim | Euclidean.dist | sim.sum | dist.sum |
|---|---|---|---|---|---|
| alpha | 0.6233 | 0.6233 | 0.2327 | ||
| beta | 0.6629 | 0.6629 | 0.2214 | ||
| alpha plus beta | 0.6953 | 0.6953 | 0.2052 | most.sim | min.dist |
| alpha/beta | 0.6779 | 0.6779 | 0.2121 |
This prediction agrees with Alphafold in predicting a structure with both alpha helices and beta-pleated sheets.
align.1.2 <- Biostrings::pairwiseAlignment(LAG3_vector[1], LAG3_vector[2])
align.1.3 <- Biostrings::pairwiseAlignment(LAG3_vector[1], LAG3_vector[3])
align.1.4 <- Biostrings::pairwiseAlignment(LAG3_vector[1], LAG3_vector[4])
align.2.3 <- Biostrings::pairwiseAlignment(LAG3_vector[2], LAG3_vector[3])
align.2.4 <- Biostrings::pairwiseAlignment(LAG3_vector[2], LAG3_vector[4])
align.3.4 <- Biostrings::pairwiseAlignment(LAG3_vector[3], LAG3_vector[4])
pids <- c(1, NA, NA, NA,
pid(align.1.2), 1, NA, NA,
pid(align.1.3), pid(align.2.3), 1, NA,
pid(align.1.4), pid(align.2.4), pid(align.3.4), 1)
mat <- matrix(pids, nrow = 4, byrow = T)
row.names(mat) <- c("Human","Chimp","Bonobo","Gorilla")
colnames(mat) <- c("Human","Chimp","Bonobo","Gorilla")
pander::pander(mat)
| Â | Human | Chimp | Bonobo | Gorilla |
|---|---|---|---|---|
| Human | 1 | NA | NA | NA |
| Chimp | 98.1 | 1 | NA | NA |
| Bonobo | 98.11 | 99.43 | 1 | NA |
| Gorilla | 97.36 | 98.12 | 98.68 | 1 |
PID_types <- c( "PID1", pid(align.1.2, type = "PID1"), "(aligned positions + internal gap positions)",
"PID2", pid(align.1.2, type = "PID2"), "(aligned positions)",
"PID3", pid(align.1.2, type = "PID3"), "(length shorter sequence)",
"PID4", pid(align.1.2, type = "PID4"), "(average length of the two sequences)")
PID_types_mat <- matrix(PID_types, nrow = 4, byrow = T)
colnames(PID_types_mat) <- c("Method","PID","denominator")
pander::pander(PID_types_mat)
| Method | PID | denominator |
|---|---|---|
| PID1 | 98.1024667931689 | (aligned positions + internal gap positions) |
| PID2 | 98.4761904761905 | (aligned positions) |
| PID3 | 98.4761904761905 | (length shorter sequence) |
| PID4 | 98.2889733840304 | (average length of the two sequences) |
names.focal <- c('NP_002277.4', 'XP_508966.3', 'XP_003820331.2', 'XP_004052634.1')
LAG3_vector_ss_subset <- LAG3_vector_ss[names.focal]
LAG3_align_subset <-msa(LAG3_vector_ss_subset,
method = "ClustalW")
## use default substitution matrix
LAG3_align_subset
## CLUSTAL 2.1
##
## Call:
## msa(LAG3_vector_ss_subset, method = "ClustalW")
##
## MsaAAMultipleAlignment with 4 rows and 531 columns
## aln names
## [1] MWEAQFLGLLFLQPLWVAPVKPLQPG...ELEPEPEPE--LGPEPEPEPE--QL XP_508966.3
## [2] MWEAQFLGLLFLQPLWVAPVKPLQPG...ELEPEPEPE--LGPEPEPEPEPEQL XP_003820331.2
## [3] MWEAQFLGLLFLQPLWVAPVKPLQPG...EPEPEPEPEPELGPEPEPEPEPEQL XP_004052634.1
## [4] MWEAQFLGLLFLQPLWVAPVKPLQPG...EPEPEPEPE----PEPEPEPE--QL NP_002277.4
## Con MWEAQFLGLLFLQPLWVAPVKPLQPG...E?EPEPEPE--LGPEPEPEPE??QL Consensus
class(LAG3_align_subset) <- "AAMultipleAlignment"
LAG3_align_seqinr_subset <- msaConvert(LAG3_align_subset, type = "seqinr::alignment")
ggmsa::ggmsa(LAG3_align_subset,
start = 250,
end = 350)
LAG3_align <-msa(LAG3_vector_ss,
method = "ClustalW")
## use default substitution matrix
class(LAG3_align) <- "AAMultipleAlignment"
LAG3_align_seqinr <- msaConvert(LAG3_align, type = "seqinr::alignment")
LAG3_dist <- seqinr::dist.alignment(LAG3_align_seqinr,
matrix = "identity")
LAG3_dist_rounded <- round(LAG3_dist,
digits = 3)
LAG3_dist_rounded
## XP_508966.3 XP_003820331.2 XP_004052634.1 NP_002277.4
## XP_003820331.2 0.044
## XP_004052634.1 0.107 0.097
## NP_002277.4 0.123 0.115 0.123
## XP_002822880.2 0.180 0.174 0.189 0.180
## XP_044082922.1 0.451 0.452 0.456 0.450
## NP_032505.1 0.548 0.546 0.546 0.544
## XP_024998797 0.834 0.832 0.834 0.834
## XP_005506340.2 0.836 0.835 0.836 0.836
## XP_004915830.2 0.868 0.870 0.868 0.870
## XP_002822880.2 XP_044082922.1 NP_032505.1 XP_024998797
## XP_003820331.2
## XP_004052634.1
## NP_002277.4
## XP_002822880.2
## XP_044082922.1 0.445
## NP_032505.1 0.546 0.572
## XP_024998797 0.831 0.832 0.836
## XP_005506340.2 0.835 0.843 0.840 0.493
## XP_004915830.2 0.868 0.871 0.871 0.845
## XP_005506340.2
## XP_003820331.2
## XP_004052634.1
## NP_002277.4
## XP_002822880.2
## XP_044082922.1
## NP_032505.1
## XP_024998797
## XP_005506340.2
## XP_004915830.2 0.840
tree <- nj(LAG3_dist)
plot.phylo(tree, main="Phylogenetic Tree",
use.edge.length = T)
# add label
mtext(text = "LAG3 gene homolog tree - rooted, with branch lenths")
```