Sequence Analysis

Sequence alignment

# if (!requireNamespace("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# 
# BiocManager::install("msa")

library(msa)
## Loading required package: Biostrings
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
## 
##     windows
## Loading required package: XVector
## Loading required package: GenomeInfoDb
## 
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
## 
##     strsplit
seqs=c("GTATGCAGCT",
       "GTACTAGCAGCT",
       "CAGCTACGCGTATACGAGT",
       "GTATGTCAG")
names(seqs)=c("seq1", "seq2", "seq3", "seq4")

seqsstring=DNAStringSet(seqs) 
#for amino acid sequences, use AAStringSet
seqsstring
## DNAStringSet object of length 4:
##     width seq                                               names               
## [1]    10 GTATGCAGCT                                        seq1
## [2]    12 GTACTAGCAGCT                                      seq2
## [3]    19 CAGCTACGCGTATACGAGT                               seq3
## [4]     9 GTATGTCAG                                         seq4
aln=msa(seqsstring)
## use default substitution matrix
aln
## CLUSTAL 2.1  
## 
## Call:
##    msa(seqsstring)
## 
## MsaDNAMultipleAlignment with 4 rows and 21 columns
##     aln                   names
## [1] CAGCTACGCGTA-TACGAGT- seq3
## [2] ---------GTA-TGTCAG-- seq4
## [3] ---------GTA-TGCAGCT- seq1
## [4] ---------GTACTAGCAGCT seq2
## Con ---------GTA-T?CCAGT- Consensus
# how to interpret a sequence alignment? https://www.labxchange.org/library/items/lb:LabXchange:5b84cc84:html:1

RNA sequence analysis

  • RNA-seq analysis is to identify differentially expressed and coregulated genes
#load packages
library(DESeq2)
## Loading required package: GenomicRanges
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(ggplot2)

#get count data
countData <- read.csv("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5/airway_scaledcounts.csv", header = TRUE, sep = ",")
head(countData)
##           ensgene SRR1039508 SRR1039509 SRR1039512 SRR1039513 SRR1039516
## 1 ENSG00000000003        723        486        904        445       1170
## 2 ENSG00000000005          0          0          0          0          0
## 3 ENSG00000000419        467        523        616        371        582
## 4 ENSG00000000457        347        258        364        237        318
## 5 ENSG00000000460         96         81         73         66        118
## 6 ENSG00000000938          0          0          1          0          2
##   SRR1039517 SRR1039520 SRR1039521
## 1       1097        806        604
## 2          0          0          0
## 3        781        417        509
## 4        447        330        324
## 5         94        102         74
## 6          0          0          0
dim(countData)
## [1] 38694     9
#get metadata
metaData <- read.csv("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5/airway_metadata.csv", header = TRUE, sep = ",")
head(metaData)
##           id     dex celltype     geo_id
## 1 SRR1039508 control   N61311 GSM1275862
## 2 SRR1039509 treated   N61311 GSM1275863
## 3 SRR1039512 control  N052611 GSM1275866
## 4 SRR1039513 treated  N052611 GSM1275867
## 5 SRR1039516 control  N080611 GSM1275870
## 6 SRR1039517 treated  N080611 GSM1275871
#create DESeq object
dds <- DESeqDataSetFromMatrix(countData=countData, 
                              colData=metaData, 
                              design=~dex, tidy = TRUE)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
#run differential expression analysis
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
#look at results
res <- results(dds)
# head(results(dds, tidy=TRUE))

summary(res)
## 
## out of 25258 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1563, 6.2%
## LFC < 0 (down)     : 1188, 4.7%
## outliers [1]       : 142, 0.56%
## low counts [2]     : 9971, 39%
## (mean count < 10)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
head(res[order(res$padj),] ) #sorted by p-val
## log2 fold change (MLE): dex treated vs control 
## Wald test p-value: dex treated vs control 
## DataFrame with 6 rows and 6 columns
##                  baseMean log2FoldChange     lfcSE      stat      pvalue
##                 <numeric>      <numeric> <numeric> <numeric>   <numeric>
## ENSG00000152583   954.771        4.36836 0.2371268   18.4220 8.74490e-76
## ENSG00000179094   743.253        2.86389 0.1755693   16.3120 8.10784e-60
## ENSG00000116584  2277.913       -1.03470 0.0650984  -15.8944 6.92855e-57
## ENSG00000189221  2383.754        3.34154 0.2124058   15.7319 9.14433e-56
## ENSG00000120129  3440.704        2.96521 0.2036951   14.5571 5.26424e-48
## ENSG00000148175 13493.920        1.42717 0.1003890   14.2164 7.25128e-46
##                        padj
##                   <numeric>
## ENSG00000152583 1.32441e-71
## ENSG00000179094 6.13966e-56
## ENSG00000116584 3.49776e-53
## ENSG00000189221 3.46227e-52
## ENSG00000120129 1.59454e-44
## ENSG00000148175 1.83034e-42
  • Visualization of differential expression
#look at individual plots
plotCounts(dds, gene="ENSG00000152583", intgroup="dex")

#volcano plot: which gene is sig between two groups
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3,3)))
with(subset(res, padj<.01 ), points(log2FoldChange, -log10(pvalue), pch=20, col="blue"))
with(subset(res, padj<.01 & abs(log2FoldChange)>2), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))

- PCA

#principal component analysis, which gene is important for distinguish these subjects
vsdata <- vst(dds, blind=FALSE)
plotPCA(vsdata, intgroup="dex")

Existed gene tree and topolody

  • using UCSC alignment, existed gene tree
#read alignments
library(phangorn)
## Loading required package: ape
## 
## Attaching package: 'ape'
## The following object is masked from 'package:Biostrings':
## 
##     complement
library(phytools)
## Loading required package: maps
library(ape)
 
#given known tree topology:
#RERconverge tree-building: https://github.com/nclark-lab/RERconverge/blob/master/R/estimateTreeFuncs.R

# read data
genetree=read.tree(file="C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5/UCSCmastertree.txt")
alnPhyDat=read.phyDat("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5\\IL6.phy", type="AA", format="phylip")


#eliminate species in the alignment but not the tree and vice versa; convenient for comparison
genetree_u=read.tree(file="C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5/UCSCmastertree.txt")
genetree_h=read.tree(file="C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5/Hillermastertree.tree")
tnf_u=read.phyDat("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5\\TNF.phy", type="AA", format="phylip")
il6_u=read.phyDat("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5\\IL6.phy", type="AA", format="phylip")
tnf_h=read.phyDat("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5\\TNF.fasta", type="AA", format="fasta")
il6_h=read.phyDat("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5\\IL6.fasta", type="AA", format="fasta")

inboth = Reduce (intersect,list(names(tnf_u),names(il6_u),names(tnf_h),names(il6_h),genetree_u$tip.label,genetree_h$tip.label))
todropg = genetree$tip.label[genetree$tip.label %in% inboth == FALSE]

if (length(todropg) > 0) {
  genetree = drop.tip(genetree, todropg)
}

if (length(inboth) < length(names(alnPhyDat))) {
  alnPhyDat = subset(alnPhyDat, subset = inboth)
}

#unroot the tree
genetree = unroot(genetree)
#just in case, set all branches to 1 first (pml abhors a vacuum... or a zero)
genetree$edge.length = c(rep(1,length(genetree$edge.length)))

#Run distance estimation using submodel
#generate an initial pml tree
lgptree = pml(genetree, alnPhyDat, model = "LG", k = 4, rearrangement="none") #model = "GTR" for DNA

#generate a tree
#use capture.output to suppress optimization output?
lgopttree = optim.pml(lgptree,optInv=T,optGamma=T,optEdge=T,rearrangement="none",model="LG") #model = "GTR" for DNA
## optimize edge weights:  -10426.61 --> -9206.413 
## optimize invariant sites:  -9206.413 --> -9206.107 
## optimize shape parameter:  -9206.107 --> -9187.222 
## optimize edge weights:  -9187.222 --> -9180.641 
## optimize invariant sites:  -9180.641 --> -9176.535 
## optimize shape parameter:  -9176.535 --> -9175.154 
## optimize edge weights:  -9175.154 --> -9175.134 
## optimize invariant sites:  -9175.134 --> -9175.095 
## optimize shape parameter:  -9175.095 --> -9175.094 
## optimize edge weights:  -9175.094 --> -9175.091 
## optimize invariant sites:  -9175.091 --> -9175.091 
## optimize shape parameter:  -9175.091 --> -9175.091 
## optimize edge weights:  -9175.091 --> -9175.091 
## optimize invariant sites:  -9175.091 --> -9175.091 
## optimize shape parameter:  -9175.091 --> -9175.091 
## optimize edge weights:  -9175.091 --> -9175.091
lgopttree$tree
## 
## Phylogenetic tree with 40 tips and 38 internal nodes.
## 
## Tip labels:
##   ponAbe2, nomLeu3, macFas5, calJac3, saiBol1, otoGar3, ...
## 
## Unrooted; includes branch lengths.
tree_u6=lgopttree$tree
plot(lgopttree$tree)

  • using UCSC alignment, topology
#BUILD tree topology:
dat=alnPhyDat
  # read.phyDat("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5/IL6.phy", type="AA", format="phylip")
dm=dist.ml(dat)
tree=fastme.bal(dm) #build a distance-based "starter tree"

fit=pml(tree,dat,k=4,inv=.2)
fit=optim.pml(fit,optNni=TRUE, optGamma=TRUE, optInv=TRUE,model="LG")
## optimize edge weights:  -9302.217 --> -9244.775 
## optimize invariant sites:  -9244.775 --> -9218.686 
## optimize shape parameter:  -9218.686 --> -9205.59 
## optimize edge weights:  -9205.59 --> -9200.783 
## optimize topology:  -9200.783 --> -9163.188  NNI moves:  12 
## optimize invariant sites:  -9163.188 --> -9157.058 
## optimize shape parameter:  -9157.058 --> -9152.342 
## optimize edge weights:  -9152.342 --> -9152.114 
## optimize topology:  -9152.114 --> -9152.114  NNI moves:  0 
## optimize invariant sites:  -9152.114 --> -9151.963 
## optimize shape parameter:  -9151.963 --> -9151.958 
## optimize edge weights:  -9151.958 --> -9151.949 
## optimize invariant sites:  -9151.949 --> -9151.948 
## optimize shape parameter:  -9151.948 --> -9151.948 
## optimize edge weights:  -9151.948 --> -9151.948 
## optimize invariant sites:  -9151.948 --> -9151.948 
## optimize shape parameter:  -9151.948 --> -9151.948 
## optimize edge weights:  -9151.948 --> -9151.948
fit$tree
## 
## Phylogenetic tree with 40 tips and 38 internal nodes.
## 
## Tip labels:
##   ailMel1, conCri1, sorAra2, orcOrc1, vicPac2, panHod1, ...
## 
## Unrooted; includes branch lengths.
top_u6=fit$tree
#re-root a tree:

plot(fit$tree)

  • using Hiller alignment, existed gene tree
#given known tree topology:
#RERconverge tree-building: https://github.com/nclark-lab/RERconverge/blob/master/R/estimateTreeFuncs.R
genetree=read.tree(file="C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5/Hillermastertree.tree")
alnPhyDat=read.phyDat("C:\\Users\\hed2\\Downloads\\statistics in biomedical\\week5\\IL6.fasta", type="AA", format="fasta")

#eliminate species in the alignment but not the tree and vice versa
# inboth = intersect(names(alnPhyDat),genetree$tip.label)
todropg = genetree$tip.label[genetree$tip.label %in% inboth == FALSE]

if (length(todropg) > 0) {
  genetree = drop.tip(genetree, todropg)
}

if (length(inboth) < length(names(alnPhyDat))) {
  alnPhyDat = subset(alnPhyDat, subset = inboth)
}

#unroot the tree
genetree = unroot(genetree)
#just in case, set all branches to 1 first (pml abhors a vacuum... or a zero)
genetree$edge.length = c(rep(1,length(genetree$edge.length)))

#Run distance estimation using submodel
#generate an initial pml tree
lgptree = pml(genetree, alnPhyDat, model = "LG", k = 4, rearrangement="none") #model = "GTR" for DNA

#generate a tree
#use capture.output to suppress optimization output?
lgopttree = optim.pml(lgptree,optInv=T,optGamma=T,optEdge=T,rearrangement="none",model="LG") #model = "GTR" for DNA
## optimize edge weights:  -10590.38 --> -9369.392 
## optimize invariant sites:  -9369.392 --> -9368.807 
## optimize shape parameter:  -9368.807 --> -9351.93 
## optimize edge weights:  -9351.93 --> -9347.347 
## optimize invariant sites:  -9347.347 --> -9343.656 
## optimize shape parameter:  -9343.656 --> -9342.779 
## optimize edge weights:  -9342.779 --> -9342.731 
## optimize invariant sites:  -9342.731 --> -9342.71 
## optimize shape parameter:  -9342.71 --> -9342.708 
## optimize edge weights:  -9342.708 --> -9342.706 
## optimize invariant sites:  -9342.706 --> -9342.706 
## optimize shape parameter:  -9342.706 --> -9342.706 
## optimize edge weights:  -9342.706 --> -9342.706 
## optimize invariant sites:  -9342.706 --> -9342.706 
## optimize shape parameter:  -9342.706 --> -9342.706 
## optimize edge weights:  -9342.706 --> -9342.706
tree_h6=lgopttree$tree
lgopttree$tree
## 
## Phylogenetic tree with 40 tips and 38 internal nodes.
## 
## Tip labels:
##   monDom5, loxAfr3, triMan1, chrAsi1, echTel2, eleEdw1, ...
## 
## Unrooted; includes branch lengths.
plot(lgopttree$tree)

  • using Hiller alignment, topology
#BUILD tree topology:
dat= alnPhyDat
dm=dist.ml(dat)
tree=fastme.bal(dm) #build a distance-based "starter tree"

fit=pml(tree,dat,k=4,inv=.2)
# LG model
fit=optim.pml(fit,optNni=TRUE, optGamma=TRUE, optInv=TRUE,model="LG")
## optimize edge weights:  -9463.005 --> -9401.676 
## optimize invariant sites:  -9401.676 --> -9373.608 
## optimize shape parameter:  -9373.608 --> -9358.556 
## optimize edge weights:  -9358.556 --> -9356.956 
## optimize topology:  -9356.956 --> -9338.505  NNI moves:  11 
## optimize invariant sites:  -9338.505 --> -9330.277 
## optimize shape parameter:  -9330.277 --> -9326.17 
## optimize edge weights:  -9326.17 --> -9325.905 
## optimize topology:  -9325.905 --> -9325.905  NNI moves:  0 
## optimize invariant sites:  -9325.905 --> -9325.836 
## optimize shape parameter:  -9325.836 --> -9325.835 
## optimize edge weights:  -9325.835 --> -9325.832 
## optimize invariant sites:  -9325.832 --> -9325.831 
## optimize shape parameter:  -9325.831 --> -9325.831 
## optimize edge weights:  -9325.831 --> -9325.831 
## optimize invariant sites:  -9325.831 --> -9325.831 
## optimize shape parameter:  -9325.831 --> -9325.831 
## optimize edge weights:  -9325.831 --> -9325.831
fit$tree
## 
## Phylogenetic tree with 40 tips and 38 internal nodes.
## 
## Tip labels:
##   ailMel1, orcOrc1, panHod1, vicPac2, eptFus1, myoDav1, ...
## 
## Unrooted; includes branch lengths.
top_h6=fit$tree
#re-root a tree:
# fitrooted=root.phylo(fit$tree, outgroup="ornAna1", resolve.root = T)
plot(fit$tree)

  • compare existed gene tree/ topology between two datasets
#how different are two phylogenetic trees?  Robinson-Foulds distance
#ignoring branch lengths:
RF.dist(tree_u6, tree_h6)
## [1] 0
#using branch lengths:
wRF.dist(tree_u6, tree_h6)
## [1] 1.711332
#ignoring branch lengths:
RF.dist(top_u6, top_h6)
## [1] 30
#using branch lengths:
wRF.dist(top_u6, top_h6)
## [1] 2.723068
  • compare between existed gene tree and topology
#ignoring branch lengths:
RF.dist(tree_u6, top_u6)
## [1] 30
#using branch lengths:
wRF.dist(tree_u6, top_u6)
## [1] 1.83267
#ignoring branch lengths:
RF.dist(tree_u6, top_u6)
## [1] 30
#using branch lengths:
wRF.dist(tree_u6, top_u6)
## [1] 1.83267
  • using Hiller alignment, topology by bootstrap method
#bootstrapping:
bs=bootstrap.pml(fit, optNni=T, bs=10) #normally set bs=100
## optimize edge weights:  -9509.777 --> -9248.377 
## optimize edge weights:  -9248.377 --> -9248.377 
## optimize topology:  -9248.377 --> -9203.103  NNI moves:  11 
## optimize edge weights:  -9203.103 --> -9203.103 
## optimize topology:  -9203.103 --> -9200.12  NNI moves:  2 
## optimize edge weights:  -9200.12 --> -9200.12 
## optimize topology:  -9200.12 --> -9200.12  NNI moves:  0 
## optimize edge weights:  -9374.787 --> -9124.709 
## optimize edge weights:  -9124.709 --> -9124.708 
## optimize topology:  -9124.708 --> -9095.208  NNI moves:  7 
## optimize edge weights:  -9095.208 --> -9095.208 
## optimize topology:  -9095.208 --> -9095.208  NNI moves:  0 
## optimize edge weights:  -9964.103 --> -9714.785 
## optimize edge weights:  -9714.785 --> -9714.785 
## optimize topology:  -9714.785 --> -9697.322  NNI moves:  6 
## optimize edge weights:  -9697.322 --> -9697.322 
## optimize topology:  -9697.322 --> -9697.322  NNI moves:  0 
## optimize edge weights:  -9717.4 --> -9437.075 
## optimize edge weights:  -9437.075 --> -9437.074 
## optimize topology:  -9437.074 --> -9409.66  NNI moves:  10 
## optimize edge weights:  -9409.66 --> -9409.656 
## optimize topology:  -9409.656 --> -9402.791  NNI moves:  2 
## optimize edge weights:  -9402.791 --> -9402.791 
## optimize topology:  -9402.791 --> -9402.791  NNI moves:  0 
## optimize edge weights:  -9754.917 --> -9517.784 
## optimize edge weights:  -9517.784 --> -9517.784 
## optimize topology:  -9517.784 --> -9504.578  NNI moves:  4 
## optimize edge weights:  -9504.578 --> -9504.578 
## optimize topology:  -9504.578 --> -9504.578  NNI moves:  0 
## optimize edge weights:  -9054.493 --> -8802.907 
## optimize edge weights:  -8802.907 --> -8802.907 
## optimize topology:  -8802.907 --> -8779.345  NNI moves:  11 
## optimize edge weights:  -8779.345 --> -8779.345 
## optimize topology:  -8779.345 --> -8779.345  NNI moves:  0 
## optimize edge weights:  -8779.345 --> -8779.345 
## optimize edge weights:  -10135.65 --> -9890.388 
## optimize edge weights:  -9890.388 --> -9890.387 
## optimize topology:  -9890.387 --> -9865.023  NNI moves:  5 
## optimize edge weights:  -9865.023 --> -9865.023 
## optimize topology:  -9865.023 --> -9865.023  NNI moves:  0 
## optimize edge weights:  -9420.479 --> -9142.614 
## optimize edge weights:  -9142.614 --> -9142.614 
## optimize topology:  -9142.614 --> -9089.098  NNI moves:  9 
## optimize edge weights:  -9089.098 --> -9089.097 
## optimize topology:  -9089.097 --> -9089.097  NNI moves:  0 
## optimize edge weights:  -9089.097 --> -9089.097 
## optimize edge weights:  -9600.162 --> -9324.265 
## optimize edge weights:  -9324.265 --> -9324.265 
## optimize topology:  -9324.265 --> -9307.328  NNI moves:  6 
## optimize edge weights:  -9307.328 --> -9307.325 
## optimize topology:  -9307.325 --> -9302.351  NNI moves:  1 
## optimize edge weights:  -9302.351 --> -9302.351 
## optimize topology:  -9302.351 --> -9302.351  NNI moves:  0 
## optimize edge weights:  -9967.612 --> -9685.16 
## optimize edge weights:  -9685.16 --> -9685.16 
## optimize topology:  -9685.16 --> -9654.868  NNI moves:  7 
## optimize edge weights:  -9654.868 --> -9654.868 
## optimize topology:  -9654.868 --> -9654.868  NNI moves:  0
treeBS <- plotBS(fit$tree,bs, type = "cladogram")

treeBS <- plotBS(fit$tree,bs,type = "phylogram")

treeBS <- plotBS(fit$tree,bs,type = "fan")

treeBS <- plotBS(fit$tree,bs,type = "unrooted", cex=.75)

- plotting tree for a given location with confidence

#get ancestral sequences:
anc = ancestral.pml(fit, "ml", return="phyDat")

#plotting tree for a given location with confidence
anctab=ancestral.pml(fit, "ml")
plotAnc(tree, anctab,25, cex.pie=.5)

  • output sequences
#sequences
ancseq=phyDat2alignment(anc)
ancseq$seq
##  [1] "------------------------------------MNSLSTSAFSPVAFSLG----LLLVMATALP--------TPGHLGRDSKDE-----------ATSNRP-PLISADKM-EII---KYILGRISAL-KKEMCDKYNKCEDSKEALAENNLHLPKLAEKDGCFQSGFNQDTCLTRIATGLLEFQVHLKYLQANYEGDKENANS--VYFSTKVLLQMLMEKVKNQDEVTT-PDPTTDTGLQAILKSQD--KWLKQTTIHLILRNLEDFLQFSLRAVRVM--------"
##  [2] "------------------------------------MNSLSTIAFS-----LG----LLLVTATA--------FPTPGPLGEDFKDD-----------TTSDRL-LLTSPDKTEALI---KYILGKISAM-RKEMCEKYDKCENSKEALAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEYQIYLDYLQNEYEGDKGSIEA--VQISSKALAQILRQKVKNPDEVTT-PDPTTNASLMNNLQSQND-DWMKNTKIILILRSLENFLQFSLRAIRIK--------"
##  [3] "------------------------------------MNSLFTSAFSPLAVSLG----LLLVMTSA--------FPTPGPLGEDFKND-----------TTPSRL-LLTTPDKTEALI---KHIVDKISAL-RKEICEKNDECENSKETLAENKLKLPKMEEKDGCFQSGFNQAVCLIKTTAGLLEYQIYLDFLQNEFEGNQETVKE--LQSSIRTLIQILKQ---KAALITT-P--ATNTDMLEKMQSSN--EWVKNAKVIIILRSLENFLQFSLRAIRMK--------"
##  [4] "------------------------------------MNSLSTSAFSPVAFSLG----LLLVMATA--------FPTPVPLGEDFKDG-----------TTSNR--PFTSPDKTEELI---KYI-----------MCEKYDKCENSKEALSENNLNLPKMTEKDGCFQSGFNQETCLMRITIGLLEFQIYLDYLQNYYEGDKGNTEA--VQISTKALIQLLRQKVKQPEEVST-PNPITGSSLLNKLQTEN--QWMKNTKMILILRSLEDFLQFSLRAVRIM--------"
##  [5] "------------------------------------MNSLSTNTFSPVAFSLG----LLLVMATAFP--------TPVPLEEDSKDD-----------TTSNRP-PLTSSEQIEKLI---KSILLEISDM-KNKMCDNHESCKNSKEALTENNLNLPKLARKDGCFHSGFNQETCLIRLTTGLLEFQVYLEYLQNTYEE---HAKA--MQMRTKALVKILRQKIKNPIEETT-PDPTTNTGLLEKMHAQNE--WLKTTTIHLILRSLEDFLQFTQRAIRM---------"
##  [6] "------------------------------------MNSLSTNTFSPVAFSLG----LLLVMTTAFP--------PPVPRGEDSKDD-----------TTSNRP-PLTSSEQIENLI---KSILLEISDV-KNKMCDNHESCKNSKEVLTENNLNLPKLARKDGCFHSGFNQETCLIRITTGLLEFQVYLEYLQNTFEG---HAQA--MKIGTKALVNILRQKMKNPIEETI-PDPTTNTGLLEKMHAQKN--WLKTTTIHLILRSLEDFLQFTQRAIRM---------"
##  [7] "------------------------------------MNSLSTNTFSSVAFSLG----LLLVMTTAFP--------TPVPRGEDSKDD-----------TTSNRP-LLTSSEQIENLI---KSILLEISDV-KNKMCDNHESCKNSKEVLTENNLNLPKLARKDGCFHSGFNQETCLIRITTGLLEFQVYLEYIQNTFEG---HAQA--MKIGTKALVNILRQKMKNPVEETI-PDPTTNTGLLEKMHAQKN--WLKTTTIHLILRSLEDFLQFTQRAIRM---------"
##  [8] "------------------------------------MNSFCTSAFRPVAFSLG----LLLVMATALP--------IPVPSGEDSKDD-----------TNSNRP-QLTSPNQTENLI---KSIFLEISEV-RNKMCGNDDSCKNSKEVLTENNLNLPKMAEKDGCFQSGFNQETCLMKITTGLLEFQIYLDYLQNKFEE---NAKA--MQMRTKALVQVLKQKVKNPNEITT-PDPTTNSSLLAKLQSQSE--WLQTTTIHLILRSLEDFLQFTQRAVRIM--------"
##  [9] "------------------------------------MNSLSTSAFRPVAFSLG----LLLVMPAAFP--------APVPLGEDSKEV-----------AAPNRQ-LLTSTERIDKHI---RYILDGISAL-RKEICNKSNMCESSKEALAENNLNLPKMAEKDGCFQSGFNEETCLLKITTGLLEFEVYLEYLQNRFESSKEQAGA--VQMSTKGLIQSLQKK--NLSAIAT-PDP----SLL--------------------LR------------------------"
## [10] "------------------------------------MNSLSTSAFRPVAFSLG----LLLVMPAAFP--------APVTLGEDSKEV-----------AAPNRQ-LLTSTERIDKHI---WYILDGISAL-RKEICNKSNMCESSKEALAENNLNLPKMAEKDGCFQSGFNEETCLLKITTGLLEFEVYLEYLQNRFESSKEQAGA--VQMSTKGLIQSLQRKAKNLSAIAT-PDPATNASLLTKLQAQDQ--WLQGVTTHLILRSFKEFLQCSLRALRQM--------"
## [11] "------------------------------------MNSVSTSAFGPVAFSLG----LLLVLPAALP--------APVPPGEDSKDV-----------AAPHRQ-PLTSSERIDKQI---RYILDGISAL-RKETCNKSNMCESSKEALAENNLNLPKMAEKDGCFQSGFNEETCLVKIITGLLEFEVYLEYLQNRFESSEEQARA--VQMSTKVLIQFLQKKAKNLDAITT-PDPTTNASLLTKLQAQNQ--WLQDMTTHLILRSFKEFLQSSLRALRQM--------"
## [12] "------------------------------------MNSVSTSAFGPVAFSLG----LLLVLPAAFP--------APVPPGEDSKDV-----------AAPHRQ-PLTSSERIDKQI---RYILDGISAL-RKETCNRSNMCESSKEALVENNLNLPKMAAKDGCFQSGFNEETCLVKIITGLLEFEVYLEYLQNRLESSEEQARA--VQMSTKVLIQFLQKKAKNLDAITT-PDPTTNASLLTKLQAQNQ--WLQDMTTHLILRSFKEFLQYSLRALRQM--------"
## [13] "------------------------------------MNSVSTSAFGPVAFSLG----LLLVLPAAFP--------APVLPGEDSKDV-----------AAPHSQ-PLTSSERIDKHI---RYILDGISAL-RKETCNRSNMCESSKEALAENNLNLPKMAEKDGCFQSGFNEDTCLVKIITGLLEFEVYLEYLQNRFESSEEQARA--VQMSTKVLIQFLQKKAKNLDAITT-PEPTTNASLLTKLQAQNQ--WLQDMTTHLILRSFKEFLQSSLRALRQM--------"
## [14] "------------------------------------MTSLSTSTFSPVAFSLG----LLLVMATALP--------TPVLLGEDSQDG-----------AILNIP-EPTSTDKTEDLA---KYILEEINVL-KQEACDSIYKC---RVALAKNNLNLPKMAEKDGCFHNGFNKDTCLMRIITGLLEFQVYIEFLKNNVN-EKSSARA--VQIGTKALMLMLKQKETYPSIVPT-PDPTSNASLMVKMQSQEE--WLKKVTVRLILRSLEDFLQYTVRASRLM--------"
## [15] "------------------------------------MNSLSTSAFSPVAFSLG----LLLVVATAFP--------TPLSLGEDSKDD-----------TTSNRP-LLTTADKTGHHI---KYILDKISAL-KKEMCNNFSKCENSKEILAENNLNLPKMAEKDGCFQSGFNQENCLKKITTGLSEFQIYLKYLQNQFKSENENAKT--IQISTNALVKMLKQKIKNPDEVTS-PDPTENTSLLEKLQSQN--EWLKNTTIHLILRSLEDFLQFSLRAVRIMQP------"
## [16] "------------------------------------MSFLSTSVFSPIAFSLG----LLLVMATAFP--------TPTPLGEDSKDV-----------TLN-RL-PFTSSNKTEELI---KYILIKISAL-KNEMCKKYDKCDNNKEALAENNLNLPKMTAKDGCFQSGFNKETCLIRITTGLLDFQIYLEYLQYKFEGDKENAEA--VLDSTKALSQILRQKVKNPDALTN-PNPTANARLLDELKSQNE--WLKNTTIHLTLQSLEDFLQFGLRAIRIM--------"
## [17] "------------------------------------MSSLSP----P----LG----LLLVLA--------------------------------------------------------------------K--LCGKYGKCENVKEALAENSLNLPKMANGVECFPSRFDQEPCLIRITSGLLEFQIYLEYLQKVFDGDKKNAMD--VHDNTKNLVQLLKQNVKNPDEVTT-PDPSHSANVLSWLQSQSQKNWLQSTTFHMMLQSLEDFLQFSLRAVRIM--------"
## [18] "------------------------------------MKFLSTSTFRPLAFLG-----LLLVSVTAFP--------TAQVQH-DFTADTTDEMTTAEMTTTMPNK-PTTSASQVFQMF---MRVYQAVKEL-KNEMNKH---------AILNN-LDLPKLKLEDGCFFNGYNWETCQLKITPGLFKFQTYLQSMQNKLQNESENKKAANIYAGIKSLSLFMKSKINNTEQMEF-LSPTPDATLLEKLETQSQ--TQMLLIAEIVLQRLEEFLQDSLRAIRKADWEGRN--"
## [19] "------------------------------------MKFLSTSTFLPLAFLG-----LLLVTATAFP--------TSQVPQ-DFTADR----------ITMPSK-LTTSASQVFGMF---IQVHKDVKAL-KSE-SKHKV-----ETAVLNN-LDLPKLRTEDGCFYRGYNWETCQLKIITGLLKFQTYLQYVQNKLKSDSEDRKPERIYTGVKSLSLLMKAKVNSTEEIVS-PSPTANASLLKKLESQNE--TQMLLSIEIILQSLEEFLQESMRAIRKAEPLDKEI-"
## [20] "------------------------------------MKFLSTSTFRPLALWG-----LLLVTVTAAP--------TSQVLK-DFRADT------------TSSK-PTTSNSQAFRLF---TLVLHDVQEL-KSETCKHNVNCLEEEKAMLNN-LNLPKIKIEDGCFYGGYNWETCHLKIITGLLKFQIYLQYMQNKLQSDSENEKAEKIYTSVKSLSLFMKAKVSNTEQTVF-PSPTANATLLEELESQNE--TQKLLIVQIVLCSLEEFLQNSLRPIRKAGSDLDLDI"
## [21] "------------------------------------MKFLSTSFFRPLAFLG-----LLLVTATAFP--------TAHVQL-DFTAEP-----------TTSPI-KLTTASLAFQKF---SEVYKDVKEL-KDEMSEHNV-----ETVTLDE-LTLPTINEEDGCHYLAYNWETCQSKIITGLLEFQPYVQFIQNKSQDASENEKTEKIYTGFQLLSQLVKPEANSSEETVL-PSPTANANVLEHLKSQNE--DEARLTVKLVLQGLELFLQESLRAIRNAESNGEI--"
## [22] "------------------------------------MKFLSARDFHPLAFLG-----LMLAVATALP--------TSQVRRGDFTEDT-----------TPNRP-VYTTSQQVGGLV---THVLREIFEL-RKELCNNNPDCMNYDDALLENNLELPVIQRNDGCYQTGYNWEICLLKITSGLLDYQIYLEFVTNNVQD-NKKDKARVIQSTTKTLSQIFKQEVKDPDKIVM-PSPTSKAILIEKLESQKQ--WPRTKTIELILKALEEFLKVTMRSTRQN--------"
## [23] "------------------------------------MKFLSARDFHPLVFLG-----LLLVMATALP--------TSQVRRGDFTEDT-----------TPNRP-VYTTSQQVGGLV---TYVLREIYEL-RKELCNNNPGCMDNDYVLLENNLELPVIQINDGCLQTGYNWEICLLKITSGLLDYQIYLEFVTNNVQD-NKKDKARVIQSTIKTLSQIFKQEVKGPDKIVT-PSPTSKAILMEKLESQKE--WPRTKTIKLILKALEEFLEVTMRSTRQN--------"
## [24] "------------------------------------MKFLSARDFHPLAVLG-----LMLAMATALP--------TSQVRRGDHTEDT-----------TPNKP-VHTTAQQLGGLI---SYILREVFEM-RKELCDNSPDCMANDDALSENNLELPAIQTNDRCLQTEYNQKLCLLKITSGLLDYQIYLEFVTNNVQD-NKKDKARVIQSATKTLNQILKQEVKDLSRTVT-PSPTAKALLLEKLESQKE--WSRTKTIQLILKALEGFLKNTMRATRQN--------"
## [25] "------------------------------------MKFLSARDFHPVAFLG-----LMLVTTTAFP--------TSQVRRGDFTEDT-----------TPNRP-VYTTS-QVGGLI---THVLWEIVEM-RKELCNGNSDCMNNDDALAENNLKLPEIQRNDGCYQTGYNQEICLLKISSGLLEYHSYLEYMKNNLKD-NKKDKARVLQRDTETLIHIFNQEVKDLHKIVL-PTPISNALLTDKLESQKE--WLRTKTIQFILKSLEEFLKVTLRSTRQT--------"
## [26] "------------------------------------MKFLCTKALHPLAFLG-----LLLVTASAFP--------NPKVQRGEGTGDT-----------TANKP-TYTSAQITENLM---TFILRRILDL-RTELCDNDEDCLENEEALSENNLNLPTMLEKDGCFQAGYNRHSCLLKTTSGLLEFQIYLEYIQNHLSD-DQKDIARDIQSNSKSLVEILKQEIKNPNEIVF-PSPTANASLMKKLESQHG--WQKTMTMQLILRSLQDFLQYALRAFRN---------"
## [27] "------------------------------------MNFLSTSAFSAVAFSLG----LLLATATAFP--------TSGPLEVLEKD------------ATPAKPLSLSTPEQTEGLI---THIIMEINDL-NGKMCSKGIKCEGDSHVMENNKLHLPRLEDDDGCFETGFNKEECLTRITYGLSGYEKYLAYIEGKFEGDINEAVA--LDLGTKHLIDVLKQKLSNPTQVTA-N-PTTDSEVIAELDSQED--WQQYTAIHIILVNLKEYLHKTLRALRHIGI------"
## [28] "------------------------------------MSSLST--------------------------------------------------------------------------------------------ICENHSMCRNGMVALEDNNLNFPQITEDGGCLPSGFNKDTCLNTITTSLSEYQPYLNYLQENYNLNERTAID--IQTYFKVLIRILKQMENN----TT-----YDT-------SQNN--WQMNTTFYLTLQSLERFLQYTVRAIRMM--------"
## [29] "------------------------------------MNPLLQITGSLRPVALTL---MLLMATAAFP--------TP--VPGGKDLQ-----------GMSSQ--KLPSSIPDLDSIVNHAKYLEKTASDLKEEICRIHNLCDNSNEALAENNLLLPNITERDGCLPSSFNEETCLIKIISGLQDFDIFLNYMETEMED--NRFQT--LKLSTTQLANTLKTVIKKTDLVPT-TNPTTSSILLSELQSLTA--WSRKVGFRLILWHYTRFIQGTVRAVRYLKTRSLDA-"
## [30] "------------------------------------MKSLST---S---------LGLLLMMASAFP--------------GDSKG-------------SSNKT-LEL-----------LMFILSQVEEL-RKE--------------------------------------ETCLRRIIAGLSQFHIYMKFVGNTLE--EENRKLSGVLKSIKALIQLLEENVKHPNEIAT-LDSTTNATVLPTWQLNTE--WLKNTMINLILQSLEKFIQFSVRAVRLM--------"
## [31] "------------------------------------MNSVFA---ALRPAPVGFALGLLLVVATAFPTAP------SVSMKEEPQGG-----------ATSDKP-FTP--VKIEST---ISYILMKISDI-RKK--------------L----L----LTER----PSAPNQEICLMRITVGLLEFEIYLKHLQNKFKSDEENNNMDIVLQNSQTLVKTLRPKVKTTEEAPT-LEPATLTSLKENMQLKEQ--WRRTQTIHYILCGLKDFLEFTLRAVRLM--------"
## [32] "------------------------------------MNSFTS---ALRPGPLGCSLALLLVVATAFPTS--------APVREDSNTK-----------ASPDKT-LTPPGRTIES----IRSILETIKEL-RKEMCDHDVNCMNRKEALAEVNLHLPRLIEEDGCFPPAVNNETCLLRITSGLMEFRMYLEHLQAKFRSDEENTRVSMVLKNIQHLIKTLRPKVKNLNEEAT-LKPAVAVSLMENLQQKNQ--WLKTTTIHFILRGLTNFLEFTLRAVDLM--------"
## [33] "------------------------------------MNTFCTSAFSPVAFSLG----LLLVMASAFP--------TPTPLGGDSKDD-----------TTSNRP-QLTSPNKTEELVNLIRFILSQVVEL-KNEMCDKYDKCENT-EVLAGNNLNLPKMTKNDGCFEKEFDKESCLVEIITGLLEFQIYLEYVQNKFEGEKGKVIA--VQNSAKALVRLLKQKLKNPDEVTT-PNPIANASLLSKLQSQTE--WLRNTTINLILQSLRDFMQVTLRAVRIM--------"
## [34] "------------------------------------MNSLSTSAFGPVAFSLG----LLLVMASAFP--------CRTPAGEDSKDD-----------ATSNTP-PVTISDNTIELM---KFIIEQISAL-KKEVCEKFDKCESISEALAGNNMNLPKIRTNDGCFSSECNWETCLTRVVTGLLEFQIYLDYVEDNFEGDKEKVRV--VQRSIKALVLILKQ-VKNP--VTT-PNPTTNASLLSKLQPQSE--WLRNTRINLILQNLDIFMQFSLRAIRNMKSGDSSSL"
## [35] "------------------------------------MNSLSTSAFSPVAFSLG----LLLVMASAFP--------NPKPLEGDSKDD-----------AASNRP-SLTSPDKTEELI---RFILAEISVL-RKKMCDKYDKCENSREALAGNNLKLPQMTEEDGCFHSGFNKETCLMKIITGLSEFQIYLDYLQNKFEGSKANVIV--VQNSTKALVQILKQKIKNPEDVTT-PDPTANASLLSKLQLQTE--WLKNTTINLILRSLDDFMQFSLRAVRIM--------"
## [36] "------------------------------------MNFVSTSTFSLVAFCLG----LLLVMASAFP--------TPPLLEGDSKDD-----------ATSNRP-PLTSPDKTEELI---NFILAKVSVL-RKEMCDKYDKCDNSREALAGNNLNLPKMTEKDRCFQSGFNKETCLMRIVTGLLEFQIYLDYLQNKFEGSKGNVLV--VQNSIKALVQILKQKVKNPEEVTT-PDATADASLLSKLQPQSE--WLKNTTINLILRSLEEFMQFSLRAVRFK--------"
## [37] "------------------------------------MNSLSTSTFNPVAFCLG----LLLAMASAFP--------TGTSLEGDSKDE-----------GT--------DADKTVKFM---TTIRFQVTEL-RKEMCDKYNKCENTTVALARNNLNLPEMTDKDRCFHSGFNQETCLMKIITGLLEFQIYLDYVQNKFEGEKGNIIA--VQNTIKSLVQNLKQKVKNSEAVTT-PDPSTNAGLLSKLHLQSG--WLKNTTINLILQSLDVFMQYSLRATRMLP-GHLKSL"
## [38] "------------------------------------MNSLSTS-----AFSLG----LLLVMATAFP--------TPGPLAGDSKDD-----------ATSNSL-PLTSANKVEELI---KYILGKISAL-RKEMCDKFNKCEDSKEALAENNLHLPKLEGKDGCFQSGFNQETCLTRITTGLVEFQLHLNILQNNYEGDKENVKS--VHMSTKILVQMLKSKVKNQDEVTT-PDPTTDASLQAILQSQD--EWLKHTTIHLILQSLEDFLQFSLRAVRIM--------"
## [39] "------------------------------------MNSLSTSAFSPVAFSLG----LLLVMATAFP--------TPGPLGGDSKDD-----------ATSNRP-PLTSADKMEDFI---RFILGKISAL-KKEMCEKYNKCEDSKEALAENNLNLPKLAEEDKCFQSQFNQETCLTRITTGLQEFQIHLKYLEANYEGNKNNAHS--VYISTKHLLQKLRP--MNRVEVTT-PDPTTDSSLQALFKSQD--KWLKHVTIHLILRSLEDFLQFSLRAIRIM--------"
## [40] "------------------------------------MNSLSASAFSPVAFSLG----LLLVMATAFP--------TPGPVGGESQAD-----------ATSNRP-PLTSPDKMEEFI---KYILGKISAL-RKEMCDKYNKCEDSKEALAENNLHLPKLAEKDGCFQSGFNQETCLTRITTGLLEFQIHLKYIQANYEGNKENANS--VYISTKLLLQMLMRKVKSQDEVTT-PDPTTDTSLQAILKAQD--EWLKHTTIHLILRSLEDFLQFSLRAVRIM--------"
## [41] "KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPMDLFPTPGPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSADKMEEFINLIKYILGKISALLKKEMCDKYNKCEDSKEALAENNLHLPKLAEKDGCFQSGFNQETCLTRITTGLLEFQIHLKYLQANYEGDKENANSSGVYISTKVLLQMLMQKVKNQDEVTTKPDPTTDTSLQAILKSQDEKKWLKHTTIHLILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [42] "VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPWHGFPTPGPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSADKMEEFINLIKYILGKISALLRKEMCDKYNKCEDSKEALAENNLHLPKLAEKDGCFQSGFNQETCLTRITTGLLEFQIHLKYLQANYEGDKENANSSGVYISTKVLLQMLMQKVKNQDEVTTVPDPTTDTSLQAILKSQDEKEWLKHTTIHLILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [43] "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPCKHFPTPGPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSADKMEELINLIKYILGKISALLRKEMCDKYNKCEDSKEALAENNLHLPKLAEKDGCFQSGFNQETCLTRITTGLLEFQIHLKYLQNNYEGDKENAKSSGVHMSTKVLVQMLKQKVKNQDEVTTMPDPTTDASLQAILQSQDEKEWLKHTTIHLILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [44] "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPFLTFPTPVPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKYILDKISALLRKEMCDKYDKCENSKEALAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQMSTKALVQMLKQKVKNPDEVTTNPDPTTNASLLSKLQSQNEKEWLKNTTIHLILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [45] "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPMSEFPTPVPLGEDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKYILDKISALLRKEMCDKYDKCENSKEALAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQMSTKALVQMLKQKVKNPDEVTTFPDPTTNASLLSKLQSQNEKEWLKNTTIHLILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [46] "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPLWDFPTPVPLGEDFKDDTTDEMTTAEMTTTSNRPLPLTSPDKTEELINLIKYILDKISALLRKEMCEKYDKCENSKEALAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEFQIYLDYLQNKYEGDKGNTEASGVQISTKALVQILRQKVKNPDEVTTEPDPTTNASLLNKLQSQNEKEWMKNTKIILILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [47] "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPVKKFPTPGPLGEDFKDDTTDEMTTAEMTTTSNRLLLLTSPDKTEALINLIKYILDKISAMLRKEMCEKYDKCENSKEALAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEYQIYLDYLQNEYEGDKGSIEASGVQISTKALVQILRQKVKNPDEVTTRPDPTTNASLMNKLQSQNDKEWMKNTKIILILRSLENFLQFSLRAIRIKQPGHLKSL"
## [48] "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPAHGFPTPVPLGEDSKDDTTDEMTTAEMTTTSNRPLPLTSPDKTEELINLIKYILDKISALLRKEMCDKYDKCENSKEALAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQMSTKALVQMLKQKVKNPDEVTTFPDPTTNASLLSKLQSQNEKEWLKNTTIHLILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [49] "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPKFYFPTPVPLGEDSKDDTTDEMTTAEMTTTSNRPLPLTSPDKTEHLINLIKYILDKISALLRKEMCDNYDKCENSKEVLAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQMSTKALVQMLKQKVKNPDEVTTDPDPTTNTSLLSKLQSQNEKEWLKNTTIHLILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [50] "WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPQCRFPTPVPLGEDSKDDTTDEMTTAEMTTTSNRPLPLTSPDQTENLINLIKSILLEISEVLRNKMCDNHDSCKNSKEVLTENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEFQIYLEYLQNKFEEDKENAKASGMQMRTKALVQILKQKVKNPNEVTTWPDPTTNTSLLEKLQSQNEKEWLKTTTIHLILRSLEDFLQFTQRAVRIMQPGHLKSL"
## [51] "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGMNSLSTNTFSPVAFSLGCSLGLLLVMATAFPTSPMMGFPTPVPLGEDSKDDTTDEMTTAEMTTTSNRPLPLTSSEQIENLINLIKSILLEISDVLKNKMCDNHESCKNSKEVLTENNLNLPKLARKDGCFHSGFNQETCLIRITTGLLEFQVYLEYLQNTFEEDKEHAKASGMQMRTKALVKILRQKIKNPIEETTGPDPTTNTGLLEKMHAQNEKEWLKTTTIHLILRSLEDFLQFTQRAIRMMQPGHLKSL"
## [52] "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNSLSTNTFSPVAFSLGCSLGLLLVMTTAFPTSPEMTFPTPVPRGEDSKDDTTDEMTTAEMTTTSNRPLPLTSSEQIENLINLIKSILLEISDVLKNKMCDNHESCKNSKEVLTENNLNLPKLARKDGCFHSGFNQETCLIRITTGLLEFQVYLEYLQNTFEGDKEHAQASGMKIGTKALVNILRQKMKNPIEETIMPDPTTNTGLLEKMHAQKNKEWLKTTTIHLILRSLEDFLQFTQRAIRMMQPGDLKSL"
## [53] "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPMVFFPTPVPLGEDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKYILDKISALLRKEMCDKYDKCENSKEALAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQMSTKALVQMLKQKVKNPDEVTTIPDPTTNASLLSKLQSQNEKEWLKNTTIHLILRSLEDFLQFSLRAVRIMQPGHLKSL"
## [54] "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDMNSLSTSAFGPVAFSLGCSLGLLLVMPAAFPTSPCYSFPAPVPLGEDSKDVTTDEMTTAEMTAAPNRQLPLTSSERIDKHINLIRYILDGISALLRKEICNKSNMCESSKEALAENNLNLPKMAEKDGCFQSGFNEETCLLKITTGLLEFEVYLEYLQNRFESSKEQARASGVQMSTKALIQFLQKKAKNLDAITTDPDPTTNASLLTKLQAQNQKEWLQDMTTHLILRSFKEFLQCSLRALRQMQPGHLKSL"
## [55] "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTMNSLSTSAFRPVAFSLGCSLGLLLVMPAAFPTSPWWKFPAPVPLGEDSKEVTTDEMTTAEMTAAPNRQLLLTSTERIDKHINLIRYILDGISALLRKEICNKSNMCESSKEALAENNLNLPKMAEKDGCFQSGFNEETCLLKITTGLLEFEVYLEYLQNRFESSKEQAGASGVQMSTKGLIQSLQKKAKNLSAIATTPDPATNASLLTKLQAQDQKEWLQGVTTHLILRSFKEFLQCSLRALRQMQPGDLKSL"
## [56] "WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWMNSVSTSAFGPVAFSLGCSLGLLLVLPAAFPTSPRHSFPAPVPPGEDSKDVTTDEMTTAEMTAAPHRQLPLTSSERIDKQINLIRYILDGISALLRKETCNKSNMCESSKEALAENNLNLPKMAEKDGCFQSGFNEETCLVKIITGLLEFEVYLEYLQNRFESSEEQARASGVQMSTKVLIQFLQKKAKNLDAITTWPDPTTNASLLTKLQAQNQKEWLQDMTTHLILRSFKEFLQSSLRALRQMQPGDLKSL"
## [57] "VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVMNSVSTSAFGPVAFSLGCSLGLLLVLPAAFPTSPMASFPAPVPPGEDSKDVTTDEMTTAEMTAAPHRQLPLTSSERIDKQINLIRYILDGISALLRKETCNRSNMCESSKEALAENNLNLPKMAEKDGCFQSGFNEETCLVKIITGLLEFEVYLEYLQNRFESSEEQARASGVQMSTKVLIQFLQKKAKNLDAITTVPDPTTNASLLTKLQAQNQKEWLQDMTTHLILRSFKEFLQSSLRALRQMQPGDLKSL"
## [58] "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPWYDFPTPTPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKYILAKISALLRKEMCDKYDKCENSKEALAENNLNLPKMAEKDGCFQSGFNQETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQNSTKALVQMLKQKVKNPDEVTTGPDPTTNASLLSKLQSQNEKEWLKNTTIHLILQSLEDFLQFSLRAVRIMQPGHLSSL"
## [59] "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPYQNFPTPTPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKYILAKISALLRKEMCDKYDKCENSKEALAENNLNLPKMTEKDGCFQSGFNKETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQNSTKALVQILKQKVKNPDEVTTMPDPTTNASLLSKLQSQNEKEWLKNTTIHLILQSLEDFLQFSLRAVRIMQSGHLSSL"
## [60] "KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPESKFPTPTPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKYILAKISALLRKEMCDKYDKCENSKEALAENNLNLPKMTEKDGCFQSGFNKETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQNSTKALVQILKQKVKNPDEVTTKPDPTTNASLLSKLQSQNEKEWLKNTTIHLILQSLEDFLQFSLRAVRIMQSGHLSSL"
## [61] "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFMNSLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPMNQFPTPTPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKYILAKISELLRKEMCDKHDKCENSKEALAENNLNLPKMTEKDGCFQSGFNKETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKASGVQNSTKALVQILKQKVKNPDEVTTFPDPTTNASLLSKLQSQNEKEWLKNTTIHLILQSLEDFLQFSLRAVRIMQSGHLSSL"
## [62] "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGMNSLSTSAFSLRPFSLGCSLGLLLVMATAFPTSPTEYFPTPTPLRGDSKDDTTDEMTTAEMTATSNRPLPLPSPDKTEELINLIKYILAKISELLRKEMCDKHDKCENSKEALAENNLNLPKMTEKDGCFPSGFNKETCLMRITTGLLEFQIYLEYLQNKFEGDEENAKASGVQNSTKALVQILKQKVKNPDEVTTGPDPTTNASLLSKLQSQNEKEWLKNTTIHLILQSLEDFLQFTLRAVRIMQSGHLKSL"
## [63] "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGMNFLSTSAFSPVAFSLGCSLGLLLVMATAFPTSPVFAFPTPTPLRGDSKDDTTDEMTTAEMTATSNRPLPLTSPDQTEELINLITYILAKISELLRKEMCDKHDKCENNKEALAENNLNLPKMTEKDGCFQSGFNKETCLMRITTGLLEFQIYLEYLQNKFEGDKENAKARGVQNSTKALVQILKQKVKNPDEVTTGPDPTTNASLLSKLQSQNEKEWQKNTTIHLILQSLEDFLQFSLRAVRIMQSGHLKSL"
## [64] "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMKFLSTSAFHPLAFLGGCSLGLLLVTATAFPTSPMWTFPTPQVQRGDSTDDTTDEMTTAEMTATSNRPLPYTSAQQTENLINLITYILRKILELLRKEMCDNNEDCMNNEEALAENNLNLPKMQEKDGCFQSGYNRETCLLKITSGLLEFQIYLEYMQNKLQDDNEKDKARDIQSSTKSLVQILKQEVKNPDEIVFMPSPTANASLMEKLESQNEKEWQKTMTIQLILRSLEDFLQYTLRAVRQMQSGGETSL"
## [65] "WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWMKFLSTSTFRPLAFLGGFSLGLLLVTATAFPTSPPAMFPTSQVQQGDFTADTTDEMTTAEMTTTTSSKLPTTSASQAFQMFNLITQVLHDVQELLKSEMCKHNVNCLEEETAMLNNNLNLPKIKTEDGCFYNGYNWETCQLKIITGLLKFQIYLQYMQNKLQSDSENEKAEKIYTSVKSLSLFMKAKVNNTEQTVFWPSPTANATLLEKLESQNEKETQKLLTVQIVLQSLEEFLQNSLRAIRKAESDGDLDI"
## [66] "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDMKFLSTSTFRPLAFLGGFSLGLLLVTATAFPTSPDIKFPTSQVQQGDFTADTTDEMTTAEMTTTTSSKLPTTSASQAFQMFNLITQVYKDVKELLKSEMSKHNVNCLEEETAMLNNNLNLPKIKTEDGCFYNGYNWETCQLKIITGLLKFQTYLQYMQNKLQSDSENEKAEKIYTGVKSLSLFMKAKVNNTEQTVFDPSPTANATLLEKLESQNEKETQMLLTVQIVLQSLEEFLQDSLRAIRKAESDGEMDI"
## [67] "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCMKFLSTSTFRPLAFLGGFALGLLLVTATAFPTAPSNIFPTSQVQQGDFTADTTDEMTTAEMTTTMPSKLPTTSASQVFQMFNLIMQVYKDVKELLKSEMSKHNVNCLEEETAILNNNLDLPKLKTEDGCFYNGYNWETCQLKIITGLLKFQTYLQYMQNKLQNDSENKKAEKIYTGVKSLSLFMKAKVNNTEQMVFCPSPTANATLLEKLESQNEKETQMLLTVEIVLQSLEEFLQDSLRAIRKAESEGKNII"
## [68] "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMKFLSTSAFHPLAFLGGCSLGLLLVTATAFPTSPRQLFPTSQVQRGDFTEDTTDEMTTAEMTATSNRPLPYTSSQQTENLINLITHVLREILELLRKEMCDNNEDCMNNEEALAENNLNLPKMQEKDGCFQSGYNWETCLLKITSGLLEFQIYLEYMQNKLQDDNEKDKARTIQSSTKSLVQILKQEVKNPDEIVFAPSPTANATLMEKLESQNEKEWQKTMTIQLILRSLEDFLQYTLRAVRQTESGGEMSI"
## [69] "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQMKFLSARDFHPLAFLGGCSLGLMLVTATAFPTSPWGIFPTSQVRRGDFTEDTTDEMTTAEMTATPNRPLVYTTSQQVGGLINLITHVLREIFELLRKELCNNNPDCMNNDDALAENNLKLPEIQRNDGCYQTGYNWEICLLKITSGLLEYQIYLEYMKNNLQDDNKKDKARVIQSTTKTLIQIFKQEVKDPDKIVMQPSPTSNAILMEKLESQKEKEWPRTKTIQLILKSLEEFLKVTMRSTRQTESGGETSI"
## [70] "YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYMKFLSARDFHPLAFLGGCSLGLMLAMATALPTSPVISFPTSQVRRGDFTEDTTDEMTTAEMTATPNRPLVYTTSQQVGGLVNLITHVLREIFELLRKELCNNNPDCMNNDDALLENNLELPVIQRNDGCYQTGYNWEICLLKITSGLLDYQIYLEFVTNNVQDDNKKDKARVIQSTTKTLSQIFKQEVKDPDKIVMYPSPTSKAILMEKLESQKEKEWPRTKTIQLILKALEEFLKVTMRSTRQNESGGETSI"
## [71] "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGMKFLSARDFHPLAFLGGFSLGLMLAMATALPTSPRETFPTSQVRRGDFTEDTTDEMTTAEMTATPNRPLVYTTSQQVGGLVNLITYVLREIFELLRKELCNNNPDCMNNDDALLENNLELPVIQRNDGCLQTGYNWEICLLKITSGLLDYQIYLEFVTNNVQDDNKKDKARVIQSTTKTLSQIFKQEVKDPDKIVTGPSPTSKAILMEKLESQKEKEWPRTKTIQLILKALEEFLKVTMRSTRQNESGGETSI"
## [72] "KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKMNSLSTSAFSLRPFSLGCSLGLLLVMATAFPTSPGGFFPTPTPLRGDSKDDTTDEMTTAEMTATSNRPLPLPSPDKTEELINLIKYILAKISELLRKEMCDNHDMCENSKEALAENNLNLPKMTEKDGCFPSGFNKETCLMRITTGLLEFQIYLEYLQNKFEGDEENAKASGVQNSTKALVQILKQMVKNPDEVTTKPDPTTNASLLSKLQSQNEKEWQKNTTFHLILQSLEDFLQFTVRAVRMMQSGHLKSL"
## [73] "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGMNSLSTSAFSLRPFSLGCSLGLLLVMATAFPTSPWMHFPTPTPLRGDSKGDTTDEMTTAEMTATSNKPLLTPSPDKTEELINLIKYILAKISELLRKEMCDKHDKCENSKEALAENNLNLPKMTEKDGCFPSGFNKETCLMRITTGLLEFQIYLEYLQNKFEGDEENAKMSGVLKSTKALVQILKQKVKNPDEVTTGLDPTTNASLLSKLQLQNEKEWLKNTTIHLILQSLEDFLQFTLRAVRLMQSGHLKSL"
## [74] "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQMNSFSSSAFALRPAPLGCSLGLLLVVATAFPTSPGTDFPTPAPMREDSQGGTTDEMTTAEMTATSDKPLLTPPGRKIESTINLIRYILEKISELLRKEMCDHDVNCMNRKEALAENNLHLPRLTEKDGCFPSAVNQETCLMRITTGLMEFQIYLEHLQNKFKSDEENTKMSMVLKNTQTLVKTLRPKVKNTDEAATQLEPATATSLMENLQQKNQKEWLKTTTIHFILRGLKDFLEFTLRAVRLMQPGDLKSL"
## [75] "WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWMNSLSTSAFSPVAFSLGCSLGLLLVMASAFPTSPEQHFPTPTPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKFILAQISALLRKEMCDKYDKCENSREALAGNNLNLPKMTEKDGCFQSGFNKETCLMRIITGLLEFQIYLDYLQNKFEGDKGNVIASGVQNSTKALVQILKQKVKNPDEVTTWPDPTANASLLSKLQSQSEKEWLKNTTINLILQSLDDFMQFSLRAVRIMQSGHLSSL"
## [76] "SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSMNSLSTSAFSPVAFSLGCSLGLLLVMASAFPTSPTSSFPTPTPLGGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKFILAQISALLKKEMCDKYDKCENTREALAGNNLNLPKMTENDGCFQSEFNKETCLMRIITGLLEFQIYLDYVQNKFEGDKGKVIASGVQNSTKALVQILKQKVKNPDEVTTSPNPTANASLLSKLQSQSEKEWLRNTTINLILQSLDDFMQFSLRAVRIMQSGDSSSL"
## [77] "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFMNSLSTSAFSPVAFSLGCSLGLLLVMASAFPTSPAMEFPTPTPLEGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLIKFILAQISVLLRKEMCDKYDKCENSREALAGNNLNLPKMTEKDGCFQSGFNKETCLMRIITGLLEFQIYLDYLQNKFEGSKGNVIVSGVQNSTKALVQILKQKVKNPEEVTTFPDPTANASLLSKLQLQSEKEWLKNTTINLILRSLDDFMQFSLRAVRIMQSGHLKSL"
## [78] "VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVMNSLSTSTFSPVAFCLGCSLGLLLVMASAFPTSPEGSFPTPTPLEGDSKDDTTDEMTTAEMTATSNRPLPLTSPDKTEELINLINFILAQVSVLLRKEMCDKYDKCENSREALAGNNLNLPKMTEKDRCFQSGFNKETCLMRIITGLLEFQIYLDYLQNKFEGSKGNVIVSGVQNSIKALVQILKQKVKNPEEVTTVPDPTANASLLSKLQLQSEKEWLKNTTINLILRSLDDFMQFSLRAVRIMQSGHLKSL"
ancseq$nam
##  [1] "ailMel1" "orcOrc1" "panHod1" "vicPac2" "eptFus1" "myoDav1" "myoLuc2"
##  [8] "pteAle1" "calJac3" "saiBol1" "ponAbe2" "nomLeu3" "macFas5" "tupChi1"
## [15] "cerSim1" "conCri1" "sorAra2" "cavPor3" "chiLan1" "hetGla2" "octDeg1"
## [22] "criGri1" "mesAur1" "micOch1" "mm10"    "jacJac1" "otoGar3" "eriEur2"
## [29] "monDom5" "echTel2" "ochPri3" "oryCun2" "chrAsi1" "eleEdw1" "loxAfr3"
## [36] "triMan1" "oryAfe1" "canFam3" "musFur1" "lepWed1" "41"      "42"     
## [43] "43"      "44"      "45"      "46"      "47"      "48"      "49"     
## [50] "50"      "51"      "52"      "53"      "54"      "55"      "56"     
## [57] "57"      "58"      "59"      "60"      "61"      "62"      "63"     
## [64] "64"      "65"      "66"      "67"      "68"      "69"      "70"     
## [71] "71"      "72"      "73"      "74"      "75"      "76"      "77"     
## [78] "78"

Pathway enrichment

  • Do my known genes appear frequently at the top of my ranked set?
  • Is the proportion of genes in my pathway above the line (you pick a cutoff) greater than those below the line?
fisher.test(data.frame(c(2,1),c(1,4)),alternative="greater")
## 
##  Fisher's Exact Test for Count Data
## 
## data:  data.frame(c(2, 1), c(1, 4))
## p-value = 0.2857
## alternative hypothesis: true odds ratio is greater than 1
## 95 percent confidence interval:
##  0.2446756       Inf
## sample estimates:
## odds ratio 
##   5.784265
w=wilcox.test(c(45.22,40.64,14.34),c(56.34,33.96,27.3,20.14,5.21),alternative = "greater")
w
## 
##  Wilcoxon rank sum exact test
## 
## data:  c(45.22, 40.64, 14.34) and c(56.34, 33.96, 27.3, 20.14, 5.21)
## W = 9, p-value = 0.3929
## alternative hypothesis: true location shift is greater than 0
w$statistic/(3*5)
##   W 
## 0.6
# Appoximation of AUROC
# (statistic/product of input lengths)