# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("msa")
library(msa)
## Loading required package: Biostrings
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: XVector
## Loading required package: GenomeInfoDb
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
seqs=c("GTATGCAGCT",
"GTACTAGCAGCT",
"CAGCTACGCGTATACGAGT",
"GTATGTCAG")
names(seqs)=c("seq1", "seq2", "seq3", "seq4")
seqsstring=DNAStringSet(seqs)
#for amino acid sequences, use AAStringSet
seqsstring
## DNAStringSet object of length 4:
## width seq names
## [1] 10 GTATGCAGCT seq1
## [2] 12 GTACTAGCAGCT seq2
## [3] 19 CAGCTACGCGTATACGAGT seq3
## [4] 9 GTATGTCAG seq4
aln=msa(seqsstring)
## use default substitution matrix
aln
## CLUSTAL 2.1
##
## Call:
## msa(seqsstring)
##
## MsaDNAMultipleAlignment with 4 rows and 21 columns
## aln names
## [1] CAGCTACGCGTA-TACGAGT- seq3
## [2] ---------GTA-TGTCAG-- seq4
## [3] ---------GTA-TGCAGCT- seq1
## [4] ---------GTACTAGCAGCT seq2
## Con ---------GTA-T?CCAGT- Consensus
# how to interpret a sequence alignment? https://www.labxchange.org/library/items/lb:LabXchange:5b84cc84:html:1
#load packages
library(DESeq2)
## Loading required package: GenomicRanges
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(ggplot2)
#get count data
countData <- read.csv("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5/airway_scaledcounts.csv", header = TRUE, sep = ",")
head(countData)
## ensgene SRR1039508 SRR1039509 SRR1039512 SRR1039513 SRR1039516
## 1 ENSG00000000003 723 486 904 445 1170
## 2 ENSG00000000005 0 0 0 0 0
## 3 ENSG00000000419 467 523 616 371 582
## 4 ENSG00000000457 347 258 364 237 318
## 5 ENSG00000000460 96 81 73 66 118
## 6 ENSG00000000938 0 0 1 0 2
## SRR1039517 SRR1039520 SRR1039521
## 1 1097 806 604
## 2 0 0 0
## 3 781 417 509
## 4 447 330 324
## 5 94 102 74
## 6 0 0 0
dim(countData)
## [1] 38694 9
#get metadata
metaData <- read.csv("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5/airway_metadata.csv", header = TRUE, sep = ",")
head(metaData)
## id dex celltype geo_id
## 1 SRR1039508 control N61311 GSM1275862
## 2 SRR1039509 treated N61311 GSM1275863
## 3 SRR1039512 control N052611 GSM1275866
## 4 SRR1039513 treated N052611 GSM1275867
## 5 SRR1039516 control N080611 GSM1275870
## 6 SRR1039517 treated N080611 GSM1275871
#create DESeq object
dds <- DESeqDataSetFromMatrix(countData=countData,
colData=metaData,
design=~dex, tidy = TRUE)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
#run differential expression analysis
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
#look at results
res <- results(dds)
# head(results(dds, tidy=TRUE))
summary(res)
##
## out of 25258 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 1563, 6.2%
## LFC < 0 (down) : 1188, 4.7%
## outliers [1] : 142, 0.56%
## low counts [2] : 9971, 39%
## (mean count < 10)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
head(res[order(res$padj),] ) #sorted by p-val
## log2 fold change (MLE): dex treated vs control
## Wald test p-value: dex treated vs control
## DataFrame with 6 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSG00000152583 954.771 4.36836 0.2371268 18.4220 8.74490e-76
## ENSG00000179094 743.253 2.86389 0.1755693 16.3120 8.10784e-60
## ENSG00000116584 2277.913 -1.03470 0.0650984 -15.8944 6.92855e-57
## ENSG00000189221 2383.754 3.34154 0.2124058 15.7319 9.14433e-56
## ENSG00000120129 3440.704 2.96521 0.2036951 14.5571 5.26424e-48
## ENSG00000148175 13493.920 1.42717 0.1003890 14.2164 7.25128e-46
## padj
## <numeric>
## ENSG00000152583 1.32441e-71
## ENSG00000179094 6.13966e-56
## ENSG00000116584 3.49776e-53
## ENSG00000189221 3.46227e-52
## ENSG00000120129 1.59454e-44
## ENSG00000148175 1.83034e-42
#look at individual plots
plotCounts(dds, gene="ENSG00000152583", intgroup="dex")
#volcano plot: which gene is sig between two groups
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3,3)))
with(subset(res, padj<.01 ), points(log2FoldChange, -log10(pvalue), pch=20, col="blue"))
with(subset(res, padj<.01 & abs(log2FoldChange)>2), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
- PCA
#principal component analysis, which gene is important for distinguish these subjects
vsdata <- vst(dds, blind=FALSE)
plotPCA(vsdata, intgroup="dex")
#read alignments
library(phangorn)
## Loading required package: ape
##
## Attaching package: 'ape'
## The following object is masked from 'package:Biostrings':
##
## complement
library(phytools)
## Loading required package: maps
library(ape)
#given known tree topology:
#RERconverge tree-building: https://github.com/nclark-lab/RERconverge/blob/master/R/estimateTreeFuncs.R
# read data
genetree=read.tree(file="C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5/UCSCmastertree.txt")
alnPhyDat=read.phyDat("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5\\IL6.phy", type="AA", format="phylip")
#eliminate species in the alignment but not the tree and vice versa; convenient for comparison
genetree_u=read.tree(file="C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5/UCSCmastertree.txt")
genetree_h=read.tree(file="C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5/Hillermastertree.tree")
tnf_u=read.phyDat("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5\\TNF.phy", type="AA", format="phylip")
il6_u=read.phyDat("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5\\IL6.phy", type="AA", format="phylip")
tnf_h=read.phyDat("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5\\TNF.fasta", type="AA", format="fasta")
il6_h=read.phyDat("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5\\IL6.fasta", type="AA", format="fasta")
inboth = Reduce (intersect,list(names(tnf_u),names(il6_u),names(tnf_h),names(il6_h),genetree_u$tip.label,genetree_h$tip.label))
todropg = genetree$tip.label[genetree$tip.label %in% inboth == FALSE]
if (length(todropg) > 0) {
genetree = drop.tip(genetree, todropg)
}
if (length(inboth) < length(names(alnPhyDat))) {
alnPhyDat = subset(alnPhyDat, subset = inboth)
}
#unroot the tree
genetree = unroot(genetree)
#just in case, set all branches to 1 first (pml abhors a vacuum... or a zero)
genetree$edge.length = c(rep(1,length(genetree$edge.length)))
#Run distance estimation using submodel
#generate an initial pml tree
lgptree = pml(genetree, alnPhyDat, model = "LG", k = 4, rearrangement="none") #model = "GTR" for DNA
#generate a tree
#use capture.output to suppress optimization output?
lgopttree = optim.pml(lgptree,optInv=T,optGamma=T,optEdge=T,rearrangement="none",model="LG") #model = "GTR" for DNA
## optimize edge weights: -10426.61 --> -9206.413
## optimize invariant sites: -9206.413 --> -9206.107
## optimize shape parameter: -9206.107 --> -9187.222
## optimize edge weights: -9187.222 --> -9180.641
## optimize invariant sites: -9180.641 --> -9176.535
## optimize shape parameter: -9176.535 --> -9175.154
## optimize edge weights: -9175.154 --> -9175.134
## optimize invariant sites: -9175.134 --> -9175.095
## optimize shape parameter: -9175.095 --> -9175.094
## optimize edge weights: -9175.094 --> -9175.091
## optimize invariant sites: -9175.091 --> -9175.091
## optimize shape parameter: -9175.091 --> -9175.091
## optimize edge weights: -9175.091 --> -9175.091
## optimize invariant sites: -9175.091 --> -9175.091
## optimize shape parameter: -9175.091 --> -9175.091
## optimize edge weights: -9175.091 --> -9175.091
lgopttree$tree
##
## Phylogenetic tree with 40 tips and 38 internal nodes.
##
## Tip labels:
## ponAbe2, nomLeu3, macFas5, calJac3, saiBol1, otoGar3, ...
##
## Unrooted; includes branch lengths.
tree_u6=lgopttree$tree
plot(lgopttree$tree)
#BUILD tree topology:
dat=alnPhyDat
# read.phyDat("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5/IL6.phy", type="AA", format="phylip")
dm=dist.ml(dat)
tree=fastme.bal(dm) #build a distance-based "starter tree"
fit=pml(tree,dat,k=4,inv=.2)
fit=optim.pml(fit,optNni=TRUE, optGamma=TRUE, optInv=TRUE,model="LG")
## optimize edge weights: -9302.217 --> -9244.775
## optimize invariant sites: -9244.775 --> -9218.686
## optimize shape parameter: -9218.686 --> -9205.59
## optimize edge weights: -9205.59 --> -9200.783
## optimize topology: -9200.783 --> -9163.188 NNI moves: 12
## optimize invariant sites: -9163.188 --> -9157.058
## optimize shape parameter: -9157.058 --> -9152.342
## optimize edge weights: -9152.342 --> -9152.114
## optimize topology: -9152.114 --> -9152.114 NNI moves: 0
## optimize invariant sites: -9152.114 --> -9151.963
## optimize shape parameter: -9151.963 --> -9151.958
## optimize edge weights: -9151.958 --> -9151.949
## optimize invariant sites: -9151.949 --> -9151.948
## optimize shape parameter: -9151.948 --> -9151.948
## optimize edge weights: -9151.948 --> -9151.948
## optimize invariant sites: -9151.948 --> -9151.948
## optimize shape parameter: -9151.948 --> -9151.948
## optimize edge weights: -9151.948 --> -9151.948
fit$tree
##
## Phylogenetic tree with 40 tips and 38 internal nodes.
##
## Tip labels:
## ailMel1, conCri1, sorAra2, orcOrc1, vicPac2, panHod1, ...
##
## Unrooted; includes branch lengths.
top_u6=fit$tree
#re-root a tree:
plot(fit$tree)
#given known tree topology:
#RERconverge tree-building: https://github.com/nclark-lab/RERconverge/blob/master/R/estimateTreeFuncs.R
genetree=read.tree(file="C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5/Hillermastertree.tree")
alnPhyDat=read.phyDat("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5\\IL6.fasta", type="AA", format="fasta")
#eliminate species in the alignment but not the tree and vice versa
# inboth = intersect(names(alnPhyDat),genetree$tip.label)
todropg = genetree$tip.label[genetree$tip.label %in% inboth == FALSE]
if (length(todropg) > 0) {
genetree = drop.tip(genetree, todropg)
}
if (length(inboth) < length(names(alnPhyDat))) {
alnPhyDat = subset(alnPhyDat, subset = inboth)
}
#unroot the tree
genetree = unroot(genetree)
#just in case, set all branches to 1 first (pml abhors a vacuum... or a zero)
genetree$edge.length = c(rep(1,length(genetree$edge.length)))
#Run distance estimation using submodel
#generate an initial pml tree
lgptree = pml(genetree, alnPhyDat, model = "LG", k = 4, rearrangement="none") #model = "GTR" for DNA
#generate a tree
#use capture.output to suppress optimization output?
lgopttree = optim.pml(lgptree,optInv=T,optGamma=T,optEdge=T,rearrangement="none",model="LG") #model = "GTR" for DNA
## optimize edge weights: -10590.38 --> -9369.392
## optimize invariant sites: -9369.392 --> -9368.807
## optimize shape parameter: -9368.807 --> -9351.93
## optimize edge weights: -9351.93 --> -9347.347
## optimize invariant sites: -9347.347 --> -9343.656
## optimize shape parameter: -9343.656 --> -9342.779
## optimize edge weights: -9342.779 --> -9342.731
## optimize invariant sites: -9342.731 --> -9342.71
## optimize shape parameter: -9342.71 --> -9342.708
## optimize edge weights: -9342.708 --> -9342.706
## optimize invariant sites: -9342.706 --> -9342.706
## optimize shape parameter: -9342.706 --> -9342.706
## optimize edge weights: -9342.706 --> -9342.706
## optimize invariant sites: -9342.706 --> -9342.706
## optimize shape parameter: -9342.706 --> -9342.706
## optimize edge weights: -9342.706 --> -9342.706
tree_h6=lgopttree$tree
lgopttree$tree
##
## Phylogenetic tree with 40 tips and 38 internal nodes.
##
## Tip labels:
## monDom5, loxAfr3, triMan1, chrAsi1, echTel2, eleEdw1, ...
##
## Unrooted; includes branch lengths.
plot(lgopttree$tree)
#BUILD tree topology:
dat= alnPhyDat
dm=dist.ml(dat)
tree=fastme.bal(dm) #build a distance-based "starter tree"
fit=pml(tree,dat,k=4,inv=.2)
# LG model
fit=optim.pml(fit,optNni=TRUE, optGamma=TRUE, optInv=TRUE,model="LG")
## optimize edge weights: -9463.005 --> -9401.676
## optimize invariant sites: -9401.676 --> -9373.608
## optimize shape parameter: -9373.608 --> -9358.556
## optimize edge weights: -9358.556 --> -9356.956
## optimize topology: -9356.956 --> -9338.505 NNI moves: 11
## optimize invariant sites: -9338.505 --> -9330.277
## optimize shape parameter: -9330.277 --> -9326.17
## optimize edge weights: -9326.17 --> -9325.905
## optimize topology: -9325.905 --> -9325.905 NNI moves: 0
## optimize invariant sites: -9325.905 --> -9325.836
## optimize shape parameter: -9325.836 --> -9325.835
## optimize edge weights: -9325.835 --> -9325.832
## optimize invariant sites: -9325.832 --> -9325.831
## optimize shape parameter: -9325.831 --> -9325.831
## optimize edge weights: -9325.831 --> -9325.831
## optimize invariant sites: -9325.831 --> -9325.831
## optimize shape parameter: -9325.831 --> -9325.831
## optimize edge weights: -9325.831 --> -9325.831
fit$tree
##
## Phylogenetic tree with 40 tips and 38 internal nodes.
##
## Tip labels:
## ailMel1, orcOrc1, panHod1, vicPac2, eptFus1, myoDav1, ...
##
## Unrooted; includes branch lengths.
top_h6=fit$tree
#re-root a tree:
# fitrooted=root.phylo(fit$tree, outgroup="ornAna1", resolve.root = T)
plot(fit$tree)
#how different are two phylogenetic trees? Robinson-Foulds distance
#ignoring branch lengths:
RF.dist(tree_u6, tree_h6)
## [1] 0
#using branch lengths:
wRF.dist(tree_u6, tree_h6)
## [1] 1.711332
#ignoring branch lengths:
RF.dist(top_u6, top_h6)
## [1] 30
#using branch lengths:
wRF.dist(top_u6, top_h6)
## [1] 2.723068
#ignoring branch lengths:
RF.dist(tree_u6, top_u6)
## [1] 30
#using branch lengths:
wRF.dist(tree_u6, top_u6)
## [1] 1.83267
#ignoring branch lengths:
RF.dist(tree_u6, top_u6)
## [1] 30
#using branch lengths:
wRF.dist(tree_u6, top_u6)
## [1] 1.83267
#bootstrapping:
bs=bootstrap.pml(fit, optNni=T, bs=10) #normally set bs=100
## optimize edge weights: -9509.777 --> -9248.377
## optimize edge weights: -9248.377 --> -9248.377
## optimize topology: -9248.377 --> -9203.103 NNI moves: 11
## optimize edge weights: -9203.103 --> -9203.103
## optimize topology: -9203.103 --> -9200.12 NNI moves: 2
## optimize edge weights: -9200.12 --> -9200.12
## optimize topology: -9200.12 --> -9200.12 NNI moves: 0
## optimize edge weights: -9374.787 --> -9124.709
## optimize edge weights: -9124.709 --> -9124.708
## optimize topology: -9124.708 --> -9095.208 NNI moves: 7
## optimize edge weights: -9095.208 --> -9095.208
## optimize topology: -9095.208 --> -9095.208 NNI moves: 0
## optimize edge weights: -9964.103 --> -9714.785
## optimize edge weights: -9714.785 --> -9714.785
## optimize topology: -9714.785 --> -9697.322 NNI moves: 6
## optimize edge weights: -9697.322 --> -9697.322
## optimize topology: -9697.322 --> -9697.322 NNI moves: 0
## optimize edge weights: -9717.4 --> -9437.075
## optimize edge weights: -9437.075 --> -9437.074
## optimize topology: -9437.074 --> -9409.66 NNI moves: 10
## optimize edge weights: -9409.66 --> -9409.656
## optimize topology: -9409.656 --> -9402.791 NNI moves: 2
## optimize edge weights: -9402.791 --> -9402.791
## optimize topology: -9402.791 --> -9402.791 NNI moves: 0
## optimize edge weights: -9754.917 --> -9517.784
## optimize edge weights: -9517.784 --> -9517.784
## optimize topology: -9517.784 --> -9504.578 NNI moves: 4
## optimize edge weights: -9504.578 --> -9504.578
## optimize topology: -9504.578 --> -9504.578 NNI moves: 0
## optimize edge weights: -9054.493 --> -8802.907
## optimize edge weights: -8802.907 --> -8802.907
## optimize topology: -8802.907 --> -8779.345 NNI moves: 11
## optimize edge weights: -8779.345 --> -8779.345
## optimize topology: -8779.345 --> -8779.345 NNI moves: 0
## optimize edge weights: -8779.345 --> -8779.345
## optimize edge weights: -10135.65 --> -9890.388
## optimize edge weights: -9890.388 --> -9890.387
## optimize topology: -9890.387 --> -9865.023 NNI moves: 5
## optimize edge weights: -9865.023 --> -9865.023
## optimize topology: -9865.023 --> -9865.023 NNI moves: 0
## optimize edge weights: -9420.479 --> -9142.614
## optimize edge weights: -9142.614 --> -9142.614
## optimize topology: -9142.614 --> -9089.098 NNI moves: 9
## optimize edge weights: -9089.098 --> -9089.097
## optimize topology: -9089.097 --> -9089.097 NNI moves: 0
## optimize edge weights: -9089.097 --> -9089.097
## optimize edge weights: -9600.162 --> -9324.265
## optimize edge weights: -9324.265 --> -9324.265
## optimize topology: -9324.265 --> -9307.328 NNI moves: 6
## optimize edge weights: -9307.328 --> -9307.325
## optimize topology: -9307.325 --> -9302.351 NNI moves: 1
## optimize edge weights: -9302.351 --> -9302.351
## optimize topology: -9302.351 --> -9302.351 NNI moves: 0
## optimize edge weights: -9967.612 --> -9685.16
## optimize edge weights: -9685.16 --> -9685.16
## optimize topology: -9685.16 --> -9654.868 NNI moves: 7
## optimize edge weights: -9654.868 --> -9654.868
## optimize topology: -9654.868 --> -9654.868 NNI moves: 0
treeBS <- plotBS(fit$tree,bs, type = "cladogram")
treeBS <- plotBS(fit$tree,bs,type = "phylogram")
treeBS <- plotBS(fit$tree,bs,type = "fan")
treeBS <- plotBS(fit$tree,bs,type = "unrooted", cex=.75)
- plotting tree for a given location with confidence
#get ancestral sequences:
anc = ancestral.pml(fit, "ml", return="phyDat")
#plotting tree for a given location with confidence
anctab=ancestral.pml(fit, "ml")
plotAnc(tree, anctab,25, cex.pie=.5)
#sequences
ancseq=phyDat2alignment(anc)
ancseq$seq
## [1] "------------------------------------MNSLSTSAFSPVAFSLG----LLLVMATALP--------TPGHLGRDSKDE-----------ATSNRP-PLISADKM-EII---KYILGRISAL-KKEMCDKYNKCEDSKEALAENNLHLPKLAEKDGCFQSGFNQDTCLTRIATGLLEFQVHLKYLQANYEGDKENANS--VYFSTKVLLQMLMEKVKNQDEVTT-PDPTTDTGLQAILKSQD--KWLKQTTIHLILRNLEDFLQFSLRAVRVM--------"
## [2] "------------------------------------MNSLSTIAFS-----LG----LLLVTATA--------FPTPGPLGEDFKDD-----------TTSDRL-LLTSPDKTEALI---KYILGKISAM-RKEMCEKYDKCENSKEALAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEYQIYLDYLQNEYEGDKGSIEA--VQISSKALAQILRQKVKNPDEVTT-PDPTTNASLMNNLQSQND-DWMKNTKIILILRSLENFLQFSLRAIRIK--------"
## [3] "------------------------------------MNSLFTSAFSPLAVSLG----LLLVMTSA--------FPTPGPLGEDFKND-----------TTPSRL-LLTTPDKTEALI---KHIVDKISAL-RKEICEKNDECENSKETLAENKLKLPKMEEKDGCFQSGFNQAVCLIKTTAGLLEYQIYLDFLQNEFEGNQETVKE--LQSSIRTLIQILKQ---KAALITT-P--ATNTDMLEKMQSSN--EWVKNAKVIIILRSLENFLQFSLRAIRMK--------"
## [4] "------------------------------------MNSLSTSAFSPVAFSLG----LLLVMATA--------FPTPVPLGEDFKDG-----------TTSNR--PFTSPDKTEELI---KYI-----------MCEKYDKCENSKEALSENNLNLPKMTEKDGCFQSGFNQETCLMRITIGLLEFQIYLDYLQNYYEGDKGNTEA--VQISTKALIQLLRQKVKQPEEVST-PNPITGSSLLNKLQTEN--QWMKNTKMILILRSLEDFLQFSLRAVRIM--------"
## [5] "------------------------------------MNSLSTNTFSPVAFSLG----LLLVMATAFP--------TPVPLEEDSKDD-----------TTSNRP-PLTSSEQIEKLI---KSILLEISDM-KNKMCDNHESCKNSKEALTENNLNLPKLARKDGCFHSGFNQETCLIRLTTGLLEFQVYLEYLQNTYEE---HAKA--MQMRTKALVKILRQKIKNPIEETT-PDPTTNTGLLEKMHAQNE--WLKTTTIHLILRSLEDFLQFTQRAIRM---------"
## [6] "------------------------------------MNSLSTNTFSPVAFSLG----LLLVMTTAFP--------PPVPRGEDSKDD-----------TTSNRP-PLTSSEQIENLI---KSILLEISDV-KNKMCDNHESCKNSKEVLTENNLNLPKLARKDGCFHSGFNQETCLIRITTGLLEFQVYLEYLQNTFEG---HAQA--MKIGTKALVNILRQKMKNPIEETI-PDPTTNTGLLEKMHAQKN--WLKTTTIHLILRSLEDFLQFTQRAIRM---------"
## [7] "------------------------------------MNSLSTNTFSSVAFSLG----LLLVMTTAFP--------TPVPRGEDSKDD-----------TTSNRP-LLTSSEQIENLI---KSILLEISDV-KNKMCDNHESCKNSKEVLTENNLNLPKLARKDGCFHSGFNQETCLIRITTGLLEFQVYLEYIQNTFEG---HAQA--MKIGTKALVNILRQKMKNPVEETI-PDPTTNTGLLEKMHAQKN--WLKTTTIHLILRSLEDFLQFTQRAIRM---------"
## [8] "------------------------------------MNSFCTSAFRPVAFSLG----LLLVMATALP--------IPVPSGEDSKDD-----------TNSNRP-QLTSPNQTENLI---KSIFLEISEV-RNKMCGNDDSCKNSKEVLTENNLNLPKMAEKDGCFQSGFNQETCLMKITTGLLEFQIYLDYLQNKFEE---NAKA--MQMRTKALVQVLKQKVKNPNEITT-PDPTTNSSLLAKLQSQSE--WLQTTTIHLILRSLEDFLQFTQRAVRIM--------"
## [9] "------------------------------------MNSLSTSAFRPVAFSLG----LLLVMPAAFP--------APVPLGEDSKEV-----------AAPNRQ-LLTSTERIDKHI---RYILDGISAL-RKEICNKSNMCESSKEALAENNLNLPKMAEKDGCFQSGFNEETCLLKITTGLLEFEVYLEYLQNRFESSKEQAGA--VQMSTKGLIQSLQKK--NLSAIAT-PDP----SLL--------------------LR------------------------"
## [10] "------------------------------------MNSLSTSAFRPVAFSLG----LLLVMPAAFP--------APVTLGEDSKEV-----------AAPNRQ-LLTSTERIDKHI---WYILDGISAL-RKEICNKSNMCESSKEALAENNLNLPKMAEKDGCFQSGFNEETCLLKITTGLLEFEVYLEYLQNRFESSKEQAGA--VQMSTKGLIQSLQRKAKNLSAIAT-PDPATNASLLTKLQAQDQ--WLQGVTTHLILRSFKEFLQCSLRALRQM--------"
## [11] "------------------------------------MNSVSTSAFGPVAFSLG----LLLVLPAALP--------APVPPGEDSKDV-----------AAPHRQ-PLTSSERIDKQI---RYILDGISAL-RKETCNKSNMCESSKEALAENNLNLPKMAEKDGCFQSGFNEETCLVKIITGLLEFEVYLEYLQNRFESSEEQARA--VQMSTKVLIQFLQKKAKNLDAITT-PDPTTNASLLTKLQAQNQ--WLQDMTTHLILRSFKEFLQSSLRALRQM--------"
## [12] "------------------------------------MNSVSTSAFGPVAFSLG----LLLVLPAAFP--------APVPPGEDSKDV-----------AAPHRQ-PLTSSERIDKQI---RYILDGISAL-RKETCNRSNMCESSKEALVENNLNLPKMAAKDGCFQSGFNEETCLVKIITGLLEFEVYLEYLQNRLESSEEQARA--VQMSTKVLIQFLQKKAKNLDAITT-PDPTTNASLLTKLQAQNQ--WLQDMTTHLILRSFKEFLQYSLRALRQM--------"
## [13] "------------------------------------MNSVSTSAFGPVAFSLG----LLLVLPAAFP--------APVLPGEDSKDV-----------AAPHSQ-PLTSSERIDKHI---RYILDGISAL-RKETCNRSNMCESSKEALAENNLNLPKMAEKDGCFQSGFNEDTCLVKIITGLLEFEVYLEYLQNRFESSEEQARA--VQMSTKVLIQFLQKKAKNLDAITT-PEPTTNASLLTKLQAQNQ--WLQDMTTHLILRSFKEFLQSSLRALRQM--------"
## [14] "------------------------------------MTSLSTSTFSPVAFSLG----LLLVMATALP--------TPVLLGEDSQDG-----------AILNIP-EPTSTDKTEDLA---KYILEEINVL-KQEACDSIYKC---RVALAKNNLNLPKMAEKDGCFHNGFNKDTCLMRIITGLLEFQVYIEFLKNNVN-EKSSARA--VQIGTKALMLMLKQKETYPSIVPT-PDPTSNASLMVKMQSQEE--WLKKVTVRLILRSLEDFLQYTVRASRLM--------"
## [15] "------------------------------------MNSLSTSAFSPVAFSLG----LLLVVATAFP--------TPLSLGEDSKDD-----------TTSNRP-LLTTADKTGHHI---KYILDKISAL-KKEMCNNFSKCENSKEILAENNLNLPKMAEKDGCFQSGFNQENCLKKITTGLSEFQIYLKYLQNQFKSENENAKT--IQISTNALVKMLKQKIKNPDEVTS-PDPTENTSLLEKLQSQN--EWLKNTTIHLILRSLEDFLQFSLRAVRIMQP------"
## [16] "------------------------------------MSFLSTSVFSPIAFSLG----LLLVMATAFP--------TPTPLGEDSKDV-----------TLN-RL-PFTSSNKTEELI---KYILIKISAL-KNEMCKKYDKCDNNKEALAENNLNLPKMTAKDGCFQSGFNKETCLIRITTGLLDFQIYLEYLQYKFEGDKENAEA--VLDSTKALSQILRQKVKNPDALTN-PNPTANARLLDELKSQNE--WLKNTTIHLTLQSLEDFLQFGLRAIRIM--------"
## [17] "------------------------------------MSSLSP----P----LG----LLLVLA--------------------------------------------------------------------K--LCGKYGKCENVKEALAENSLNLPKMANGVECFPSRFDQEPCLIRITSGLLEFQIYLEYLQKVFDGDKKNAMD--VHDNTKNLVQLLKQNVKNPDEVTT-PDPSHSANVLSWLQSQSQKNWLQSTTFHMMLQSLEDFLQFSLRAVRIM--------"
## [18] "------------------------------------MKFLSTSTFRPLAFLG-----LLLVSVTAFP--------TAQVQH-DFTADTTDEMTTAEMTTTMPNK-PTTSASQVFQMF---MRVYQAVKEL-KNEMNKH---------AILNN-LDLPKLKLEDGCFFNGYNWETCQLKITPGLFKFQTYLQSMQNKLQNESENKKAANIYAGIKSLSLFMKSKINNTEQMEF-LSPTPDATLLEKLETQSQ--TQMLLIAEIVLQRLEEFLQDSLRAIRKADWEGRN--"
## [19] "------------------------------------MKFLSTSTFLPLAFLG-----LLLVTATAFP--------TSQVPQ-DFTADR----------ITMPSK-LTTSASQVFGMF---IQVHKDVKAL-KSE-SKHKV-----ETAVLNN-LDLPKLRTEDGCFYRGYNWETCQLKIITGLLKFQTYLQYVQNKLKSDSEDRKPERIYTGVKSLSLLMKAKVNSTEEIVS-PSPTANASLLKKLESQNE--TQMLLSIEIILQSLEEFLQESMRAIRKAEPLDKEI-"
## [20] "------------------------------------MKFLSTSTFRPLALWG-----LLLVTVTAAP--------TSQVLK-DFRADT------------TSSK-PTTSNSQAFRLF---TLVLHDVQEL-KSETCKHNVNCLEEEKAMLNN-LNLPKIKIEDGCFYGGYNWETCHLKIITGLLKFQIYLQYMQNKLQSDSENEKAEKIYTSVKSLSLFMKAKVSNTEQTVF-PSPTANATLLEELESQNE--TQKLLIVQIVLCSLEEFLQNSLRPIRKAGSDLDLDI"
## [21] "------------------------------------MKFLSTSFFRPLAFLG-----LLLVTATAFP--------TAHVQL-DFTAEP-----------TTSPI-KLTTASLAFQKF---SEVYKDVKEL-KDEMSEHNV-----ETVTLDE-LTLPTINEEDGCHYLAYNWETCQSKIITGLLEFQPYVQFIQNKSQDASENEKTEKIYTGFQLLSQLVKPEANSSEETVL-PSPTANANVLEHLKSQNE--DEARLTVKLVLQGLELFLQESLRAIRNAESNGEI--"
## [22] "------------------------------------MKFLSARDFHPLAFLG-----LMLAVATALP--------TSQVRRGDFTEDT-----------TPNRP-VYTTSQQVGGLV---THVLREIFEL-RKELCNNNPDCMNYDDALLENNLELPVIQRNDGCYQTGYNWEICLLKITSGLLDYQIYLEFVTNNVQD-NKKDKARVIQSTTKTLSQIFKQEVKDPDKIVM-PSPTSKAILIEKLESQKQ--WPRTKTIELILKALEEFLKVTMRSTRQN--------"
## [23] "------------------------------------MKFLSARDFHPLVFLG-----LLLVMATALP--------TSQVRRGDFTEDT-----------TPNRP-VYTTSQQVGGLV---TYVLREIYEL-RKELCNNNPGCMDNDYVLLENNLELPVIQINDGCLQTGYNWEICLLKITSGLLDYQIYLEFVTNNVQD-NKKDKARVIQSTIKTLSQIFKQEVKGPDKIVT-PSPTSKAILMEKLESQKE--WPRTKTIKLILKALEEFLEVTMRSTRQN--------"
## [24] "------------------------------------MKFLSARDFHPLAVLG-----LMLAMATALP--------TSQVRRGDHTEDT-----------TPNKP-VHTTAQQLGGLI---SYILREVFEM-RKELCDNSPDCMANDDALSENNLELPAIQTNDRCLQTEYNQKLCLLKITSGLLDYQIYLEFVTNNVQD-NKKDKARVIQSATKTLNQILKQEVKDLSRTVT-PSPTAKALLLEKLESQKE--WSRTKTIQLILKALEGFLKNTMRATRQN--------"
## [25] "------------------------------------MKFLSARDFHPVAFLG-----LMLVTTTAFP--------TSQVRRGDFTEDT-----------TPNRP-VYTTS-QVGGLI---THVLWEIVEM-RKELCNGNSDCMNNDDALAENNLKLPEIQRNDGCYQTGYNQEICLLKISSGLLEYHSYLEYMKNNLKD-NKKDKARVLQRDTETLIHIFNQEVKDLHKIVL-PTPISNALLTDKLESQKE--WLRTKTIQFILKSLEEFLKVTLRSTRQT--------"
## [26] "------------------------------------MKFLCTKALHPLAFLG-----LLLVTASAFP--------NPKVQRGEGTGDT-----------TANKP-TYTSAQITENLM---TFILRRILDL-RTELCDNDEDCLENEEALSENNLNLPTMLEKDGCFQAGYNRHSCLLKTTSGLLEFQIYLEYIQNHLSD-DQKDIARDIQSNSKSLVEILKQEIKNPNEIVF-PSPTANASLMKKLESQHG--WQKTMTMQLILRSLQDFLQYALRAFRN---------"
## [27] "------------------------------------MNFLSTSAFSAVAFSLG----LLLATATAFP--------TSGPLEVLEKD------------ATPAKPLSLSTPEQTEGLI---THIIMEINDL-NGKMCSKGIKCEGDSHVMENNKLHLPRLEDDDGCFETGFNKEECLTRITYGLSGYEKYLAYIEGKFEGDINEAVA--LDLGTKHLIDVLKQKLSNPTQVTA-N-PTTDSEVIAELDSQED--WQQYTAIHIILVNLKEYLHKTLRALRHIGI------"
## [28] "------------------------------------MSSLST--------------------------------------------------------------------------------------------ICENHSMCRNGMVALEDNNLNFPQITEDGGCLPSGFNKDTCLNTITTSLSEYQPYLNYLQENYNLNERTAID--IQTYFKVLIRILKQMENN----TT-----YDT-------SQNN--WQMNTTFYLTLQSLERFLQYTVRAIRMM--------"
## [29] "------------------------------------MNPLLQITGSLRPVALTL---MLLMATAAFP--------TP--VPGGKDLQ-----------GMSSQ--KLPSSIPDLDSIVNHAKYLEKTASDLKEEICRIHNLCDNSNEALAENNLLLPNITERDGCLPSSFNEETCLIKIISGLQDFDIFLNYMETEMED--NRFQT--LKLSTTQLANTLKTVIKKTDLVPT-TNPTTSSILLSELQSLTA--WSRKVGFRLILWHYTRFIQGTVRAVRYLKTRSLDA-"
## [30] "------------------------------------MKSLST---S---------LGLLLMMASAFP--------------GDSKG-------------SSNKT-LEL-----------LMFILSQVEEL-RKE--------------------------------------ETCLRRIIAGLSQFHIYMKFVGNTLE--EENRKLSGVLKSIKALIQLLEENVKHPNEIAT-LDSTTNATVLPTWQLNTE--WLKNTMINLILQSLEKFIQFSVRAVRLM--------"
## [31] "------------------------------------MNSVFA---ALRPAPVGFALGLLLVVATAFPTAP------SVSMKEEPQGG-----------ATSDKP-FTP--VKIEST---ISYILMKISDI-RKK--------------L----L----LTER----PSAPNQEICLMRITVGLLEFEIYLKHLQNKFKSDEENNNMDIVLQNSQTLVKTLRPKVKTTEEAPT-LEPATLTSLKENMQLKEQ--WRRTQTIHYILCGLKDFLEFTLRAVRLM--------"
## [32] "------------------------------------MNSFTS---ALRPGPLGCSLALLLVVATAFPTS--------APVREDSNTK-----------ASPDKT-LTPPGRTIES----IRSILETIKEL-RKEMCDHDVNCMNRKEALAEVNLHLPRLIEEDGCFPPAVNNETCLLRITSGLMEFRMYLEHLQAKFRSDEENTRVSMVLKNIQHLIKTLRPKVKNLNEEAT-LKPAVAVSLMENLQQKNQ--WLKTTTIHFILRGLTNFLEFTLRAVDLM--------"
## [33] "------------------------------------MNTFCTSAFSPVAFSLG----LLLVMASAFP--------TPTPLGGDSKDD-----------TTSNRP-QLTSPNKTEELVNLIRFILSQVVEL-KNEMCDKYDKCENT-EVLAGNNLNLPKMTKNDGCFEKEFDKESCLVEIITGLLEFQIYLEYVQNKFEGEKGKVIA--VQNSAKALVRLLKQKLKNPDEVTT-PNPIANASLLSKLQSQTE--WLRNTTINLILQSLRDFMQVTLRAVRIM--------"
## [34] "------------------------------------MNSLSTSAFGPVAFSLG----LLLVMASAFP--------CRTPAGEDSKDD-----------ATSNTP-PVTISDNTIELM---KFIIEQISAL-KKEVCEKFDKCESISEALAGNNMNLPKIRTNDGCFSSECNWETCLTRVVTGLLEFQIYLDYVEDNFEGDKEKVRV--VQRSIKALVLILKQ-VKNP--VTT-PNPTTNASLLSKLQPQSE--WLRNTRINLILQNLDIFMQFSLRAIRNMKSGDSSSL"
## [35] "------------------------------------MNSLSTSAFSPVAFSLG----LLLVMASAFP--------NPKPLEGDSKDD-----------AASNRP-SLTSPDKTEELI---RFILAEISVL-RKKMCDKYDKCENSREALAGNNLKLPQMTEEDGCFHSGFNKETCLMKIITGLSEFQIYLDYLQNKFEGSKANVIV--VQNSTKALVQILKQKIKNPEDVTT-PDPTANASLLSKLQLQTE--WLKNTTINLILRSLDDFMQFSLRAVRIM--------"
## [36] "------------------------------------MNFVSTSTFSLVAFCLG----LLLVMASAFP--------TPPLLEGDSKDD-----------ATSNRP-PLTSPDKTEELI---NFILAKVSVL-RKEMCDKYDKCDNSREALAGNNLNLPKMTEKDRCFQSGFNKETCLMRIVTGLLEFQIYLDYLQNKFEGSKGNVLV--VQNSIKALVQILKQKVKNPEEVTT-PDATADASLLSKLQPQSE--WLKNTTINLILRSLEEFMQFSLRAVRFK--------"
## [37] "------------------------------------MNSLSTSTFNPVAFCLG----LLLAMASAFP--------TGTSLEGDSKDE-----------GT--------DADKTVKFM---TTIRFQVTEL-RKEMCDKYNKCENTTVALARNNLNLPEMTDKDRCFHSGFNQETCLMKIITGLLEFQIYLDYVQNKFEGEKGNIIA--VQNTIKSLVQNLKQKVKNSEAVTT-PDPSTNAGLLSKLHLQSG--WLKNTTINLILQSLDVFMQYSLRATRMLP-GHLKSL"
## [38] "------------------------------------MNSLSTS-----AFSLG----LLLVMATAFP--------TPGPLAGDSKDD-----------ATSNSL-PLTSANKVEELI---KYILGKISAL-RKEMCDKFNKCEDSKEALAENNLHLPKLEGKDGCFQSGFNQETCLTRITTGLVEFQLHLNILQNNYEGDKENVKS--VHMSTKILVQMLKSKVKNQDEVTT-PDPTTDASLQAILQSQD--EWLKHTTIHLILQSLEDFLQFSLRAVRIM--------"
## [39] "------------------------------------MNSLSTSAFSPVAFSLG----LLLVMATAFP--------TPGPLGGDSKDD-----------ATSNRP-PLTSADKMEDFI---RFILGKISAL-KKEMCEKYNKCEDSKEALAENNLNLPKLAEEDKCFQSQFNQETCLTRITTGLQEFQIHLKYLEANYEGNKNNAHS--VYISTKHLLQKLRP--MNRVEVTT-PDPTTDSSLQALFKSQD--KWLKHVTIHLILRSLEDFLQFSLRAIRIM--------"
## [40] "------------------------------------MNSLSASAFSPVAFSLG----LLLVMATAFP--------TPGPVGGESQAD-----------ATSNRP-PLTSPDKMEEFI---KYILGKISAL-RKEMCDKYNKCEDSKEALAENNLHLPKLAEKDGCFQSGFNQETCLTRITTGLLEFQIHLKYIQANYEGNKENANS--VYISTKLLLQMLMRKVKSQDEVTT-PDPTTDTSLQAILKAQD--EWLKHTTIHLILRSLEDFLQFSLRAVRIM--------"
## [41] "KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPMDLFPTPGPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSADKMEEFINLIKYILGKISALLKKEMCDKYNKCEDSKEALAENNLHLPKLAEKDGCFQSGFNQETCLTRITTGLLEFQIHLKYLQANYEGDKENANSSGVYISTKVLLQMLMQKVKNQDEVTTKPDPTTDTSLQAILKSQDEKKWLKHTTIHLILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [42] "VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPWHGFPTPGPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSADKMEEFINLIKYILGKISALLRKEMCDKYNKCEDSKEALAENNLHLPKLAEKDGCFQSGFNQETCLTRITTGLLEFQIHLKYLQANYEGDKENANSSGVYISTKVLLQMLMQKVKNQDEVTTVPDPTTDTSLQAILKSQDEKEWLKHTTIHLILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [43] "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPCKHFPTPGPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSADKMEELINLIKYILGKISALLRKEMCDKYNKCEDSKEALAENNLHLPKLAEKDGCFQSGFNQETCLTRITTGLLEFQIHLKYLQNNYEGDKENAKSSGVHMSTKVLVQMLKQKVKNQDEVTTMPDPTTDASLQAILQSQDEKEWLKHTTIHLILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [44] "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPFLTFPTPVPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKYILDKISALLRKEMCDKYDKCENSKEALAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQMSTKALVQMLKQKVKNPDEVTTNPDPTTNASLLSKLQSQNEKEWLKNTTIHLILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [45] "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPMSEFPTPVPLGEDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKYILDKISALLRKEMCDKYDKCENSKEALAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQMSTKALVQMLKQKVKNPDEVTTFPDPTTNASLLSKLQSQNEKEWLKNTTIHLILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [46] "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPLWDFPTPVPLGEDFKDDTTDEMTTAEMTTTSNRPLPLTSPDKTEELINLIKYILDKISALLRKEMCEKYDKCENSKEALAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEFQIYLDYLQNKYEGDKGNTEASGVQISTKALVQILRQKVKNPDEVTTEPDPTTNASLLNKLQSQNEKEWMKNTKIILILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [47] "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPVKKFPTPGPLGEDFKDDTTDEMTTAEMTTTSNRLLLLTSPDKTEALINLIKYILDKISAMLRKEMCEKYDKCENSKEALAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEYQIYLDYLQNEYEGDKGSIEASGVQISTKALVQILRQKVKNPDEVTTRPDPTTNASLMNKLQSQNDKEWMKNTKIILILRSLENFLQFSLRAIRIKQPGHLKSL"
## [48] "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPAHGFPTPVPLGEDSKDDTTDEMTTAEMTTTSNRPLPLTSPDKTEELINLIKYILDKISALLRKEMCDKYDKCENSKEALAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQMSTKALVQMLKQKVKNPDEVTTFPDPTTNASLLSKLQSQNEKEWLKNTTIHLILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [49] "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPKFYFPTPVPLGEDSKDDTTDEMTTAEMTTTSNRPLPLTSPDKTEHLINLIKYILDKISALLRKEMCDNYDKCENSKEVLAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQMSTKALVQMLKQKVKNPDEVTTDPDPTTNTSLLSKLQSQNEKEWLKNTTIHLILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [50] "WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPQCRFPTPVPLGEDSKDDTTDEMTTAEMTTTSNRPLPLTSPDQTENLINLIKSILLEISEVLRNKMCDNHDSCKNSKEVLTENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEFQIYLEYLQNKFEEDKENAKASGMQMRTKALVQILKQKVKNPNEVTTWPDPTTNTSLLEKLQSQNEKEWLKTTTIHLILRSLEDFLQFTQRAVRIMQPGHLKSL"
## [51] "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGMNSLSTNTFSPVAFSLGCSLGLLLVMATAFPTSPMMGFPTPVPLGEDSKDDTTDEMTTAEMTTTSNRPLPLTSSEQIENLINLIKSILLEISDVLKNKMCDNHESCKNSKEVLTENNLNLPKLARKDGCFHSGFNQETCLIRITTGLLEFQVYLEYLQNTFEEDKEHAKASGMQMRTKALVKILRQKIKNPIEETTGPDPTTNTGLLEKMHAQNEKEWLKTTTIHLILRSLEDFLQFTQRAIRMMQPGHLKSL"
## [52] "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNSLSTNTFSPVAFSLGCSLGLLLVMTTAFPTSPEMTFPTPVPRGEDSKDDTTDEMTTAEMTTTSNRPLPLTSSEQIENLINLIKSILLEISDVLKNKMCDNHESCKNSKEVLTENNLNLPKLARKDGCFHSGFNQETCLIRITTGLLEFQVYLEYLQNTFEGDKEHAQASGMKIGTKALVNILRQKMKNPIEETIMPDPTTNTGLLEKMHAQKNKEWLKTTTIHLILRSLEDFLQFTQRAIRMMQPGDLKSL"
## [53] "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPMVFFPTPVPLGEDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKYILDKISALLRKEMCDKYDKCENSKEALAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQMSTKALVQMLKQKVKNPDEVTTIPDPTTNASLLSKLQSQNEKEWLKNTTIHLILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [54] "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDMNSLSTSAFGPVAFSLGCSLGLLLVMPAAFPTSPCYSFPAPVPLGEDSKDVTTDEMTTAEMTAAPNRQLPLTSSERIDKHINLIRYILDGISALLRKEICNKSNMCESSKEALAENNLNLPKMAEKDGCFQSGFNEETCLLKITTGLLEFEVYLEYLQNRFESSKEQARASGVQMSTKALIQFLQKKAKNLDAITTDPDPTTNASLLTKLQAQNQKEWLQDMTTHLILRSFKEFLQCSLRALRQMQPGHLKSL"
## [55] "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTMNSLSTSAFRPVAFSLGCSLGLLLVMPAAFPTSPWWKFPAPVPLGEDSKEVTTDEMTTAEMTAAPNRQLLLTSTERIDKHINLIRYILDGISALLRKEICNKSNMCESSKEALAENNLNLPKMAEKDGCFQSGFNEETCLLKITTGLLEFEVYLEYLQNRFESSKEQAGASGVQMSTKGLIQSLQKKAKNLSAIATTPDPATNASLLTKLQAQDQKEWLQGVTTHLILRSFKEFLQCSLRALRQMQPGDLKSL"
## [56] "WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWMNSVSTSAFGPVAFSLGCSLGLLLVLPAAFPTSPRHSFPAPVPPGEDSKDVTTDEMTTAEMTAAPHRQLPLTSSERIDKQINLIRYILDGISALLRKETCNKSNMCESSKEALAENNLNLPKMAEKDGCFQSGFNEETCLVKIITGLLEFEVYLEYLQNRFESSEEQARASGVQMSTKVLIQFLQKKAKNLDAITTWPDPTTNASLLTKLQAQNQKEWLQDMTTHLILRSFKEFLQSSLRALRQMQPGDLKSL"
## [57] "VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVMNSVSTSAFGPVAFSLGCSLGLLLVLPAAFPTSPMASFPAPVPPGEDSKDVTTDEMTTAEMTAAPHRQLPLTSSERIDKQINLIRYILDGISALLRKETCNRSNMCESSKEALAENNLNLPKMAEKDGCFQSGFNEETCLVKIITGLLEFEVYLEYLQNRFESSEEQARASGVQMSTKVLIQFLQKKAKNLDAITTVPDPTTNASLLTKLQAQNQKEWLQDMTTHLILRSFKEFLQSSLRALRQMQPGDLKSL"
## [58] "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPWYDFPTPTPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKYILAKISALLRKEMCDKYDKCENSKEALAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQNSTKALVQMLKQKVKNPDEVTTGPDPTTNASLLSKLQSQNEKEWLKNTTIHLILQSLEDFLQFSLRAVRIMQPGHLSSL"
## [59] "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPYQNFPTPTPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKYILAKISALLRKEMCDKYDKCENSKEALAENNLNLPKMTEKDGCFQSGFNKETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQNSTKALVQILKQKVKNPDEVTTMPDPTTNASLLSKLQSQNEKEWLKNTTIHLILQSLEDFLQFSLRAVRIMQSGHLSSL"
## [60] "KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPESKFPTPTPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKYILAKISALLRKEMCDKYDKCENSKEALAENNLNLPKMTEKDGCFQSGFNKETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQNSTKALVQILKQKVKNPDEVTTKPDPTTNASLLSKLQSQNEKEWLKNTTIHLILQSLEDFLQFSLRAVRIMQSGHLSSL"
## [61] "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPMNQFPTPTPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKYILAKISELLRKEMCDKHDKCENSKEALAENNLNLPKMTEKDGCFQSGFNKETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQNSTKALVQILKQKVKNPDEVTTFPDPTTNASLLSKLQSQNEKEWLKNTTIHLILQSLEDFLQFSLRAVRIMQSGHLSSL"
## [62] "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGMNSLSTSAFSLRPFSLGCSLGLLLVMATAFPTSPTEYFPTPTPLRGDSKDDTTDEMTTAEMTATSNRPLPLPSPDKTEELINLIKYILAKISELLRKEMCDKHDKCENSKEALAENNLNLPKMTEKDGCFPSGFNKETCLMRITTGLLEFQIYLEYLQNKFEGDEENAKASGVQNSTKALVQILKQKVKNPDEVTTGPDPTTNASLLSKLQSQNEKEWLKNTTIHLILQSLEDFLQFTLRAVRIMQSGHLKSL"
## [63] "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGMNFLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPVFAFPTPTPLRGDSKDDTTDEMTTAEMTATSNRPLPLTSPDQTEELINLITYILAKISELLRKEMCDKHDKCENNKEALAENNLNLPKMTEKDGCFQSGFNKETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKARGVQNSTKALVQILKQKVKNPDEVTTGPDPTTNASLLSKLQSQNEKEWQKNTTIHLILQSLEDFLQFSLRAVRIMQSGHLKSL"
## [64] "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMKFLSTSAFHPLAFLGGCSLGLLLVTATAFPTSPMWTFPTPQVQRGDSTDDTTDEMTTAEMTATSNRPLPYTSAQQTENLINLITYILRKILELLRKEMCDNNEDCMNNEEALAENNLNLPKMQEKDGCFQSGYNRETCLLKITSGLLEFQIYLEYMQNKLQDDNEKDKARDIQSSTKSLVQILKQEVKNPDEIVFMPSPTANASLMEKLESQNEKEWQKTMTIQLILRSLEDFLQYTLRAVRQMQSGGETSL"
## [65] "WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWMKFLSTSTFRPLAFLGGFSLGLLLVTATAFPTSPPAMFPTSQVQQGDFTADTTDEMTTAEMTTTTSSKLPTTSASQAFQMFNLITQVLHDVQELLKSEMCKHNVNCLEEETAMLNNNLNLPKIKTEDGCFYNGYNWETCQLKIITGLLKFQIYLQYMQNKLQSDSENEKAEKIYTSVKSLSLFMKAKVNNTEQTVFWPSPTANATLLEKLESQNEKETQKLLTVQIVLQSLEEFLQNSLRAIRKAESDGDLDI"
## [66] "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDMKFLSTSTFRPLAFLGGFSLGLLLVTATAFPTSPDIKFPTSQVQQGDFTADTTDEMTTAEMTTTTSSKLPTTSASQAFQMFNLITQVYKDVKELLKSEMSKHNVNCLEEETAMLNNNLNLPKIKTEDGCFYNGYNWETCQLKIITGLLKFQTYLQYMQNKLQSDSENEKAEKIYTGVKSLSLFMKAKVNNTEQTVFDPSPTANATLLEKLESQNEKETQMLLTVQIVLQSLEEFLQDSLRAIRKAESDGEMDI"
## [67] "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCMKFLSTSTFRPLAFLGGFALGLLLVTATAFPTAPSNIFPTSQVQQGDFTADTTDEMTTAEMTTTMPSKLPTTSASQVFQMFNLIMQVYKDVKELLKSEMSKHNVNCLEEETAILNNNLDLPKLKTEDGCFYNGYNWETCQLKIITGLLKFQTYLQYMQNKLQNDSENKKAEKIYTGVKSLSLFMKAKVNNTEQMVFCPSPTANATLLEKLESQNEKETQMLLTVEIVLQSLEEFLQDSLRAIRKAESEGKNII"
## [68] "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMKFLSTSAFHPLAFLGGCSLGLLLVTATAFPTSPRQLFPTSQVQRGDFTEDTTDEMTTAEMTATSNRPLPYTSSQQTENLINLITHVLREILELLRKEMCDNNEDCMNNEEALAENNLNLPKMQEKDGCFQSGYNWETCLLKITSGLLEFQIYLEYMQNKLQDDNEKDKARTIQSSTKSLVQILKQEVKNPDEIVFAPSPTANATLMEKLESQNEKEWQKTMTIQLILRSLEDFLQYTLRAVRQTESGGEMSI"
## [69] "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQMKFLSARDFHPLAFLGGCSLGLMLVTATAFPTSPWGIFPTSQVRRGDFTEDTTDEMTTAEMTATPNRPLVYTTSQQVGGLINLITHVLREIFELLRKELCNNNPDCMNNDDALAENNLKLPEIQRNDGCYQTGYNWEICLLKITSGLLEYQIYLEYMKNNLQDDNKKDKARVIQSTTKTLIQIFKQEVKDPDKIVMQPSPTSNAILMEKLESQKEKEWPRTKTIQLILKSLEEFLKVTMRSTRQTESGGETSI"
## [70] "YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYMKFLSARDFHPLAFLGGCSLGLMLAMATALPTSPVISFPTSQVRRGDFTEDTTDEMTTAEMTATPNRPLVYTTSQQVGGLVNLITHVLREIFELLRKELCNNNPDCMNNDDALLENNLELPVIQRNDGCYQTGYNWEICLLKITSGLLDYQIYLEFVTNNVQDDNKKDKARVIQSTTKTLSQIFKQEVKDPDKIVMYPSPTSKAILMEKLESQKEKEWPRTKTIQLILKALEEFLKVTMRSTRQNESGGETSI"
## [71] "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGMKFLSARDFHPLAFLGGFSLGLMLAMATALPTSPRETFPTSQVRRGDFTEDTTDEMTTAEMTATPNRPLVYTTSQQVGGLVNLITYVLREIFELLRKELCNNNPDCMNNDDALLENNLELPVIQRNDGCLQTGYNWEICLLKITSGLLDYQIYLEFVTNNVQDDNKKDKARVIQSTTKTLSQIFKQEVKDPDKIVTGPSPTSKAILMEKLESQKEKEWPRTKTIQLILKALEEFLKVTMRSTRQNESGGETSI"
## [72] "KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKMNSLSTSAFSLRPFSLGCSLGLLLVMATAFPTSPGGFFPTPTPLRGDSKDDTTDEMTTAEMTATSNRPLPLPSPDKTEELINLIKYILAKISELLRKEMCDNHDMCENSKEALAENNLNLPKMTEKDGCFPSGFNKETCLMRITTGLLEFQIYLEYLQNKFEGDEENAKASGVQNSTKALVQILKQMVKNPDEVTTKPDPTTNASLLSKLQSQNEKEWQKNTTFHLILQSLEDFLQFTVRAVRMMQSGHLKSL"
## [73] "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGMNSLSTSAFSLRPFSLGCSLGLLLVMATAFPTSPWMHFPTPTPLRGDSKGDTTDEMTTAEMTATSNKPLLTPSPDKTEELINLIKYILAKISELLRKEMCDKHDKCENSKEALAENNLNLPKMTEKDGCFPSGFNKETCLMRITTGLLEFQIYLEYLQNKFEGDEENAKMSGVLKSTKALVQILKQKVKNPDEVTTGLDPTTNASLLSKLQLQNEKEWLKNTTIHLILQSLEDFLQFTLRAVRLMQSGHLKSL"
## [74] "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQMNSFSSSAFALRPAPLGCSLGLLLVVATAFPTSPGTDFPTPAPMREDSQGGTTDEMTTAEMTATSDKPLLTPPGRKIESTINLIRYILEKISELLRKEMCDHDVNCMNRKEALAENNLHLPRLTEKDGCFPSAVNQETCLMRITTGLMEFQIYLEHLQNKFKSDEENTKMSMVLKNTQTLVKTLRPKVKNTDEAATQLEPATATSLMENLQQKNQKEWLKTTTIHFILRGLKDFLEFTLRAVRLMQPGDLKSL"
## [75] "WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWMNSLSTSAFSPVAFSLGCSLGLLLVMASAFPTSPEQHFPTPTPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKFILAQISALLRKEMCDKYDKCENSREALAGNNLNLPKMTEKDGCFQSGFNKETCLMRIITGLLEFQIYLDYLQNKFEGDKGNVIASGVQNSTKALVQILKQKVKNPDEVTTWPDPTANASLLSKLQSQSEKEWLKNTTINLILQSLDDFMQFSLRAVRIMQSGHLSSL"
## [76] "SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSMNSLSTSAFSPVAFSLGCSLGLLLVMASAFPTSPTSSFPTPTPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKFILAQISALLKKEMCDKYDKCENTREALAGNNLNLPKMTENDGCFQSEFNKETCLMRIITGLLEFQIYLDYVQNKFEGDKGKVIASGVQNSTKALVQILKQKVKNPDEVTTSPNPTANASLLSKLQSQSEKEWLRNTTINLILQSLDDFMQFSLRAVRIMQSGDSSSL"
## [77] "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFMNSLSTSAFSPVAFSLGCSLGLLLVMASAFPTSPAMEFPTPTPLEGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKFILAQISVLLRKEMCDKYDKCENSREALAGNNLNLPKMTEKDGCFQSGFNKETCLMRIITGLLEFQIYLDYLQNKFEGSKGNVIVSGVQNSTKALVQILKQKVKNPEEVTTFPDPTANASLLSKLQLQSEKEWLKNTTINLILRSLDDFMQFSLRAVRIMQSGHLKSL"
## [78] "VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVMNSLSTSTFSPVAFCLGCSLGLLLVMASAFPTSPEGSFPTPTPLEGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLINFILAQVSVLLRKEMCDKYDKCENSREALAGNNLNLPKMTEKDRCFQSGFNKETCLMRIITGLLEFQIYLDYLQNKFEGSKGNVIVSGVQNSIKALVQILKQKVKNPEEVTTVPDPTANASLLSKLQLQSEKEWLKNTTINLILRSLDDFMQFSLRAVRIMQSGHLKSL"
ancseq$nam
## [1] "ailMel1" "orcOrc1" "panHod1" "vicPac2" "eptFus1" "myoDav1" "myoLuc2"
## [8] "pteAle1" "calJac3" "saiBol1" "ponAbe2" "nomLeu3" "macFas5" "tupChi1"
## [15] "cerSim1" "conCri1" "sorAra2" "cavPor3" "chiLan1" "hetGla2" "octDeg1"
## [22] "criGri1" "mesAur1" "micOch1" "mm10" "jacJac1" "otoGar3" "eriEur2"
## [29] "monDom5" "echTel2" "ochPri3" "oryCun2" "chrAsi1" "eleEdw1" "loxAfr3"
## [36] "triMan1" "oryAfe1" "canFam3" "musFur1" "lepWed1" "41" "42"
## [43] "43" "44" "45" "46" "47" "48" "49"
## [50] "50" "51" "52" "53" "54" "55" "56"
## [57] "57" "58" "59" "60" "61" "62" "63"
## [64] "64" "65" "66" "67" "68" "69" "70"
## [71] "71" "72" "73" "74" "75" "76" "77"
## [78] "78"
fisher.test(data.frame(c(2,1),c(1,4)),alternative="greater")
##
## Fisher's Exact Test for Count Data
##
## data: data.frame(c(2, 1), c(1, 4))
## p-value = 0.2857
## alternative hypothesis: true odds ratio is greater than 1
## 95 percent confidence interval:
## 0.2446756 Inf
## sample estimates:
## odds ratio
## 5.784265
w=wilcox.test(c(45.22,40.64,14.34),c(56.34,33.96,27.3,20.14,5.21),alternative = "greater")
w
##
## Wilcoxon rank sum exact test
##
## data: c(45.22, 40.64, 14.34) and c(56.34, 33.96, 27.3, 20.14, 5.21)
## W = 9, p-value = 0.3929
## alternative hypothesis: true location shift is greater than 0
w$statistic/(3*5)
## W
## 0.6
# Appoximation of AUROC
# (statistic/product of input lengths)