presentación3

library(ape)
library(adegenet)

## Loading required package: ade4
## 
##    /// adegenet 2.0.0 is loaded ////////////
## 
##    > overview: '?adegenet'
##    > tutorials/doc/questions: 'adegenetWeb()' 
##    > bug reports/feature resquests: adegenetIssues()

#pin <- read.dna('ND1-EasternPacific.fasta', format = 'f')
pac <- read.dna('completo.fas', format = 'f')
pac.genind <- DNAbin2genind(pac)
#indNames(pac.genind)[193] <- strsplit(indNames(pac.genind)[193], ' ')[[1]][1]

AMOVA

library(poppr)

## This is poppr version 2.0.2. To get started, type package?poppr
## OMP parallel support: available

library(Hmisc)

## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## 
## Attaching package: 'survival'
## 
## The following object is masked from 'package:adegenet':
## 
##     strata
## 
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## 
## The following object is masked from 'package:ape':
## 
##     zoom
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units

#hier(pac.genind, )
load('strata.rda')

pob <- data.frame(nom=indNames(pac.genind), pob=1:nrow(tab(pac.genind)))

pob[1:192, 'pob'] <- capitalize(strata$pob)

pop(pac.genind) <- pob$pob[1:192]
strata(pac.genind) <- data.frame(pop=pob$pob[1:192])

#save(pac.genind, file='pac.genind.rda')
#pac.genpop <- genind2genpop(pac.genind)
#save(pac.genpop, file='pac_genpop.gen')
pac.gencl <- as.genclone(pac.genind)

amova.pac <- poppr.amova(pac.gencl, hier = ~pop)

## 
##  No missing values detected.

amovasig.pac <- randtest(amova.pac, nrepet=999)
amova.pac

## $call
## ade4::amova(samples = xtab, distances = xdist, structures = xstruct)
## 
## $results
##                  Df     Sum Sq  Mean Sq
## Between samples   4   5.451607 1.362902
## Within samples  187 279.308810 1.493630
## Total           191 284.760417 1.490892
## 
## $componentsofcovariance
##                                    Sigma           %
## Variations  Between samples -0.003480044  -0.2335365
## Variations  Within samples   1.493629997 100.2335365
## Total variations             1.490149954 100.0000000
## 
## $statphi
##                            Phi
## Phi-samples-total -0.002335365

amovasig.pac

## Monte-Carlo test
## Call: as.randtest(sim = res, obs = sigma[1])
## 
## Observation: -0.003480044 
## 
## Based on 999 replicates
## Simulated p-value: 0.599 
## Alternative hypothesis: greater 
## 
##       Std.Obs   Expectation      Variance 
## -0.3541440114 -0.0002246648  0.0000844974

Medidas resumen

library(poppr);library(ggplot2)
res.pac <- poppr(pac.genind)

## | Ancash-lib 
## | Sechura 
## | Vegueta 
## | Ica 
## | Tacna 
## | Total

res.pac

##          Pop   N MLG eMLG    SE    H    G lambda   E.5   Hexp    Ia  rbarD
## 1 Ancash-lib  56  28 14.9 1.787 2.77 8.76  0.886 0.518 0.0808 1.096 0.0401
## 2    Sechura  25  16 16.0 0.000 2.51 9.06  0.890 0.712 0.1012 1.705 0.0614
## 3    Vegueta  28  20 18.1 0.791 2.69 9.12  0.890 0.592 0.1004 0.856 0.0338
## 4        Ica  38  17 12.4 1.396 2.26 5.60  0.821 0.536 0.0513 0.713 0.0401
## 5      Tacna  45  23 14.8 1.596 2.68 8.77  0.886 0.574 0.0646 0.648 0.0239
## 6      Total 192  52 14.7 2.095 3.10 9.15  0.891 0.382 0.0765 1.117 0.0329
##         File
## 1 pac.genind
## 2 pac.genind
## 3 pac.genind
## 4 pac.genind
## 5 pac.genind
## 6 pac.genind

h <- data.frame(Hs=Hs(pac.genind), Hexp=res.pac$Hexp[1:5])
h

##                    Hs       Hexp
## Ancash-lib 0.07932692 0.08076923
## Sechura    0.09714872 0.10119658
## Vegueta    0.09680795 0.10039343
## Ica        0.04993252 0.05128205
## Tacna      0.06320988 0.06464646

ggplot(h, aes(Hs, Hexp))+
  geom_point(alpha=0.5)+
  geom_text(aes(label=levels(pop(pac.genind))), hjust=2, size=3)+
  xlab('Heterocgocidad observada')+
  ylab('Heterocigocidad esperada')+
  geom_line(dat=data.frame(x=c(0,1), y=c(0,1)), aes(x, y))+
  coord_cartesian(xlim = c(0, 0.15), ylim = c(0,0.15))

PCA

pca.pac <- dudi.pca(pac.genind, scannf = F, nf = 3)
ggplot(pca.pac$li, aes(Axis1, Axis2, color=pob$pob[1:192]))+
  geom_point(size=4, alpha=0.5)+
  xlab('Componente 1')+
  ylab('Componente 2')+
  scale_color_discrete(guide = guide_legend(title= 'Puntos'))+
  geom_vline(xintercept=0, linetype='dotted')+
  geom_hline(yintercept=0, linetype='dotted')

pca.pac$eig[1]/sum(pca.pac$eig)*100

## [1] 12.98261

pca.pac$eig[2]/sum(pca.pac$eig)*100

## [1] 7.993238

(pca.pac$eig[1]+pca.pac$eig[2])/sum(pca.pac$eig)*100

## [1] 20.97585

loadings <- pca.pac$c1
aload <- abs(loadings)
#sweep(aload, 2, colSums(aload), '/')[order(aload)]
aload[order(aload$CS1, decreasing = T)[1:5], ]

##              CS1          CS2       CS3
## X251.t 0.2879322 0.0005785273 0.0929206
## X251.c 0.2879322 0.0005785273 0.0929206
## X470.t 0.2812348 0.0386049192 0.1459957
## X470.c 0.2812348 0.0386049192 0.1459957
## X623.g 0.2743272 0.0063027088 0.0897944

aload[order(aload$CS2, decreasing = T)[1:5], ]

##               CS1       CS2        CS3
## X350.t 0.04229584 0.3509372 0.03502338
## X350.c 0.04229584 0.3509372 0.03502338
## X188.g 0.04622626 0.3500111 0.03439804
## X188.a 0.04622626 0.3500111 0.03439804
## X104.g 0.04747940 0.2859141 0.02404198

aload[order(aload$CS3, decreasing = T)[1:5], ]

##               CS1        CS2       CS3
## X374.g 0.10282770 0.04410839 0.3197177
## X374.a 0.10282770 0.04410839 0.3197177
## X215.t 0.10503223 0.04559234 0.3113974
## X215.c 0.10503223 0.04559234 0.3113974
## X746.a 0.06442677 0.03503461 0.2469785

Arbol de peru

library(magrittr)
load('chi_dna.rda')
d.prov3 <- provesti.dist(DNAbin2genind(chi.dna))
arbol.per <- d.prov3 %>%
  nj() %>%
  ladderize()

load('strata.rda')

colores <- as.character(strata$pob)
colores[colores %in% c('ancash-lib')] <- '#1b9e77'
colores[colores %in% c('sechura')] <- '#d95f02'
colores[colores %in% c('vegueta')] <- '#7570b3'
colores[colores %in% c('ica')] <- '#e7298a'
colores[colores %in% c('tacna')] <- '#66a61e'


plot(arbol.per, tip.color=colores, cex=0.7)
legend(x='topright', 
       legend = c('Ancash-Lib', 'Sechura', 'Vegueta', 'Ica', 'Tacna'),
       fill= c('#1b9e77', '#d95f02', '#7570b3', '#e7298a', '#66a61e', '#e6ab02'),
       border=c('white', 'white', 'white', 'white', 'white'), cex=0.7)

Network de haplotipos solo Peru

library(pegas)

## 
## Attaching package: 'pegas'
## 
## The following object is masked from 'package:ade4':
## 
##     amova
## 
## The following object is masked from 'package:ape':
## 
##     mst

per <- read.dna('completo.fas', format='f')
h <- pegas::haplotype(per)
h <- sort(h, what='labels')
net <- haploNet(h)

ind.hap<-with(stack(setNames(attr(h, "index"), rownames(h))), table(hap=ind, pop=pob[1:192, 'pob']))
plot(net, size = attr(net, "freq"), pie=ind.hap, fast = TRUE, legend=F)
legend('topleft', colnames(ind.hap), col=rainbow(ncol(ind.hap)), pch=20, cex=0.7)

Indices de diversidad nuc y haplo + tabla de frecuencias de haplotypos por pob

library('PopGenome'); library(parallel); library(SnowballC); library(haplotypes)
#makeCluster(4)

genome <- readData('peru/', parallized = F)

## |            :            |            :            | 100 %
## |====================================================

popul <- split(pob$nom, factor(pob$pob[1:192], levels=unique(pob$pob[1:192])))
#popul <- lapply(popul, function(x) gsub(' ', '_', x))
genome <- set.populations(genome, new.populations = popul)

## |            :            |            :            | 100 %
## |

## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors

## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors

## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors

## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors

## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors

## ====================================================

pac <- read.dna('completo.fas', format = 'f')
pob$seq <- seq_along(pob$pob[1:192])
nd <- sapply(split(pob$seq, factor(pob$pob, levels=unique(pob$pob))), function(x) nuc.div(pac[x, ]))
nd <- append(nd, nuc.div(pac))

paci <- haplotypes::read.fas('pacifico/pacifico.fasta')
h <- haplotypes::haplotype(paci)
pops <- pob$pob[1:192]
g <- haplotypes::grouping(h, pops)
sz<-apply(g$hapmat,2,sum)
# hdiv gives the total haplotype diversity #
hdiv<-(sum(sz)/(sum(sz)-1))*(1-sum((sz/sum(sz))^2))

hdiv

## [1] 0.7867038

genome@region.data@populations2[[1]][[1]] <- popul[[1]]
genome@region.data@populations2[[1]][[2]] <- popul[[2]]
genome@region.data@populations2[[1]][[3]] <- popul[[3]]
genome@region.data@populations2[[1]][[4]] <- popul[[4]]
genome@region.data@populations2[[1]][[5]] <- popul[[5]]

genome <- F_ST.stats(genome, mode='haplotype',  only.haplotype.counts = T, new.populations = popul)

## |            :            |            :            | 100 %
## |

## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors

## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors

## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors

## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors

## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors

## ====================================================

genome <- diversity.stats(genome, new.populations = popul)

## |            :            |            :            | 100 %
## |

## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors

## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors

## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors

## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors

## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors

## ====================================================

h <- pegas::haplotype(pac)
net <- haploNet(h)
freq <- haploFreq(pac, factor(pob$pob[1:192], levels = unique(pob$pob)), haplo = h)

res <- data.frame(n.hap=c(apply(freq, 2, function(x) sum(x!=0)),nrow(freq)), nind=c(as.vector(table(factor(pob$pob, levels=unique(pob$pob)))),nrow(pob)), hap.div= c(as.vector(genome@hap.diversity.within), hdiv), nuc.div= as.vector(nd))
res

##            n.hap nind   hap.div     nuc.div
## Ancash-lib    38   56 0.9610390 0.004866667
## Sechura       17   25 0.9433333 0.005688889
## Vegueta       20   28 0.9021164 0.005601411
## Ica           21   38 0.8961593 0.003087719
## Tacna         27   45 0.9424242 0.003776431
##               75  192 0.7867038 0.004461824

rownames(freq) <- attr(net, 'labels')
freq

##         Ancash-lib Sechura Vegueta Ica Tacna
## I                2       0       0   1     1
## II               8       4       0   6     6
## III              9       5       8  10    10
## IV               1       0       0   0     0
## V                1       0       0   0     0
## VI               1       0       0   0     0
## VII              1       0       0   0     0
## VIII             1       1       0   0     1
## IX               2       0       1   0     0
## X                1       0       0   0     0
## XI               1       0       0   0     1
## XII              1       0       0   0     0
## XIII             1       2       0   1     1
## XIV              1       0       0   0     0
## XV               1       1       2   0     1
## XVI              1       0       0   0     0
## XVII             1       0       1   0     0
## XVIII            1       0       0   0     0
## XIX              1       0       0   0     0
## XX               1       0       1   1     1
## XXI              1       0       1   0     0
## XXII             2       1       1   0     0
## XXIII            1       0       0   0     0
## XXIV             1       0       0   0     0
## XXV              1       1       0   0     0
## XXVI             1       0       0   0     0
## XXVII            1       0       0   0     0
## XXVIII           1       0       0   0     0
## XXIX             1       0       0   0     0
## XXX              1       0       0   0     0
## XXXI             1       0       1   0     1
## XXXII            1       0       0   0     1
## XXXIII           1       1       0   0     0
## XXXIV            1       0       1   3     0
## XXXV             1       0       0   0     0
## XXXVI            1       1       0   0     1
## XXXVII           1       0       0   0     0
## XXXVIII          1       0       0   0     2
## XXXIX            0       1       0   2     1
## XL               0       1       0   0     0
## XLI              0       1       0   0     0
## XLII             0       1       0   0     0
## XLIII            0       1       0   0     0
## XLIV             0       1       0   0     0
## XLV              0       1       0   0     0
## XLVI             0       1       0   1     0
## XLVII            0       0       1   1     0
## XLVIII           0       0       1   0     0
## XLIX             0       0       1   0     0
## L                0       0       1   1     1
## LI               0       0       1   0     1
## LII              0       0       1   0     0
## LIII             0       0       1   1     0
## LIV              0       0       1   1     0
## LV               0       0       1   0     0
## LVI              0       0       1   0     0
## LVII             0       0       1   0     0
## LVIII            0       0       0   1     0
## LIX              0       0       0   1     0
## LX               0       0       0   1     0
## LXI              0       0       0   1     1
## LXII             0       0       0   1     0
## LXIII            0       0       0   1     1
## LXIV             0       0       0   1     0
## LXV              0       0       0   1     0
## LXVI             0       0       0   1     1
## LXVII            0       0       0   0     1
## LXVIII           0       0       0   0     1
## LXIX             0       0       0   0     3
## LXX              0       0       0   0     1
## LXXI             0       0       0   0     2
## LXXII            0       0       0   0     1
## LXXIII           0       0       0   0     1
## LXXIV            0       0       0   0     1
## LXXV             0       0       0   0     1

MST

d <- dist.dna(pac, model = 'N')
r <- mst(d)
#plot(r)

pac.coa <- dudi.coa(pac.genind, scannf = F, nf = 3)
pac.mst <- mstree(dist.dudi(pac.coa), 1)
s.label(pac.coa$li, clab=0, cpoi=2, neig = pac.mst, cneig = 1)

#xy = data.frame(x = runif(20), y = runif(20))
#par(mfrow = c(2,2))
#for (k in 1:4) {
#    neig = mstree (provesti.dist(pac.genind), k)
#    s.label(, xlim = c(0,1), ylim = c(0,1), addax = FALSE, neig = neig)
# }

#plot(ms.tree)

#provesti.dist(pac.genind)

#data(HorseFluRaw)
#attach(HorseFluRaw)
#x <- new("obkData", individuals=individuals, dna=dna, records=clinics)
#plotggMST(x,individualID=42)
#plot huge minimum spanning tree
#plotggMST(x)
#detach(HorseFluRaw)

FST

library(hierfstat)

## Loading required package: gtools
## 
## Attaching package: 'gtools'
## 
## The following object is masked from 'package:adegenet':
## 
##     chr
## 
## 
## Attaching package: 'hierfstat'
## 
## The following object is masked from 'package:poppr':
## 
##     nei.dist
## 
## The following object is masked from 'package:adegenet':
## 
##     read.fstat
## 
## The following objects are masked from 'package:ape':
## 
##     pcoa, varcomp

tab.pac <- as.data.frame(pac.genind@tab)
tab.pac$pop <- as.character(pop(pac.genind))

mat.fst <- pp.fst(tab.pac[, c(69, 1:68)])
mat.fst

##      [,1]       [,2]
## [1,]  NaN 0.03835539
## [2,]  NaN        NaN

unique(pob$pob)

## [1] "Ancash-lib" "Sechura"    "Vegueta"    "Ica"        "Tacna"

library(StAMPP)

geno <- as.data.frame(tab(pac.genind, freq=T))
geno$id <- row.names(geno)
geno$pop <- as.character(pop(pac.genind))
geno$ploidy <- 1
geno$format <- 'freq'
geno <- geno[, c(79, 80, 81, 82, 1:78)]

#stamppConvert(geno, 'r')

boot.fst <- stamppFst(stamppConvert(geno, 'r'), nboots = 1000)

boot.fst$Fsts

##             Ancash-lib     Sechura      Vegueta         Ica Tacna
## Ancash-lib          NA          NA           NA          NA    NA
## Sechura    -0.05015092          NA           NA          NA    NA
## Vegueta    -0.03844282 -0.06879505           NA          NA    NA
## Ica        -0.01106672 -0.02599804 -0.007145136          NA    NA
## Tacna      -0.02572480 -0.04252345 -0.029897205 -0.02431715    NA

boot.fst$Pvalues

##            Ancash-lib Sechura Vegueta Ica Tacna
## Ancash-lib         NA      NA      NA  NA    NA
## Sechura         1.000      NA      NA  NA    NA
## Vegueta         1.000       1      NA  NA    NA
## Ica             0.991       1   0.851  NA    NA
## Tacna           1.000       1   1.000   1    NA

presentación3

fmenendez

October 22, 2015

AMOVA

Medidas resumen

PCA

Arbol de peru

Network de haplotipos solo Peru

Indices de diversidad nuc y haplo + tabla de frecuencias de haplotypos por pob

MST

FST