library(ape)
library(adegenet)
## Loading required package: ade4
##
## /// adegenet 2.0.0 is loaded ////////////
##
## > overview: '?adegenet'
## > tutorials/doc/questions: 'adegenetWeb()'
## > bug reports/feature resquests: adegenetIssues()
#pin <- read.dna('ND1-EasternPacific.fasta', format = 'f')
pac <- read.dna('completo.fas', format = 'f')
pac.genind <- DNAbin2genind(pac)
#indNames(pac.genind)[193] <- strsplit(indNames(pac.genind)[193], ' ')[[1]][1]
AMOVA
library(poppr)
## This is poppr version 2.0.2. To get started, type package?poppr
## OMP parallel support: available
library(Hmisc)
## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
##
## Attaching package: 'survival'
##
## The following object is masked from 'package:adegenet':
##
## strata
##
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
##
## The following object is masked from 'package:ape':
##
## zoom
##
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
#hier(pac.genind, )
load('strata.rda')
pob <- data.frame(nom=indNames(pac.genind), pob=1:nrow(tab(pac.genind)))
pob[1:192, 'pob'] <- capitalize(strata$pob)
pop(pac.genind) <- pob$pob[1:192]
strata(pac.genind) <- data.frame(pop=pob$pob[1:192])
#save(pac.genind, file='pac.genind.rda')
#pac.genpop <- genind2genpop(pac.genind)
#save(pac.genpop, file='pac_genpop.gen')
pac.gencl <- as.genclone(pac.genind)
amova.pac <- poppr.amova(pac.gencl, hier = ~pop)
##
## No missing values detected.
amovasig.pac <- randtest(amova.pac, nrepet=999)
amova.pac
## $call
## ade4::amova(samples = xtab, distances = xdist, structures = xstruct)
##
## $results
## Df Sum Sq Mean Sq
## Between samples 4 5.451607 1.362902
## Within samples 187 279.308810 1.493630
## Total 191 284.760417 1.490892
##
## $componentsofcovariance
## Sigma %
## Variations Between samples -0.003480044 -0.2335365
## Variations Within samples 1.493629997 100.2335365
## Total variations 1.490149954 100.0000000
##
## $statphi
## Phi
## Phi-samples-total -0.002335365
amovasig.pac
## Monte-Carlo test
## Call: as.randtest(sim = res, obs = sigma[1])
##
## Observation: -0.003480044
##
## Based on 999 replicates
## Simulated p-value: 0.599
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## -0.3541440114 -0.0002246648 0.0000844974
Medidas resumen
library(poppr);library(ggplot2)
res.pac <- poppr(pac.genind)
## | Ancash-lib
## | Sechura
## | Vegueta
## | Ica
## | Tacna
## | Total
res.pac
## Pop N MLG eMLG SE H G lambda E.5 Hexp Ia rbarD
## 1 Ancash-lib 56 28 14.9 1.787 2.77 8.76 0.886 0.518 0.0808 1.096 0.0401
## 2 Sechura 25 16 16.0 0.000 2.51 9.06 0.890 0.712 0.1012 1.705 0.0614
## 3 Vegueta 28 20 18.1 0.791 2.69 9.12 0.890 0.592 0.1004 0.856 0.0338
## 4 Ica 38 17 12.4 1.396 2.26 5.60 0.821 0.536 0.0513 0.713 0.0401
## 5 Tacna 45 23 14.8 1.596 2.68 8.77 0.886 0.574 0.0646 0.648 0.0239
## 6 Total 192 52 14.7 2.095 3.10 9.15 0.891 0.382 0.0765 1.117 0.0329
## File
## 1 pac.genind
## 2 pac.genind
## 3 pac.genind
## 4 pac.genind
## 5 pac.genind
## 6 pac.genind
h <- data.frame(Hs=Hs(pac.genind), Hexp=res.pac$Hexp[1:5])
h
## Hs Hexp
## Ancash-lib 0.07932692 0.08076923
## Sechura 0.09714872 0.10119658
## Vegueta 0.09680795 0.10039343
## Ica 0.04993252 0.05128205
## Tacna 0.06320988 0.06464646
ggplot(h, aes(Hs, Hexp))+
geom_point(alpha=0.5)+
geom_text(aes(label=levels(pop(pac.genind))), hjust=2, size=3)+
xlab('Heterocgocidad observada')+
ylab('Heterocigocidad esperada')+
geom_line(dat=data.frame(x=c(0,1), y=c(0,1)), aes(x, y))+
coord_cartesian(xlim = c(0, 0.15), ylim = c(0,0.15))

PCA
pca.pac <- dudi.pca(pac.genind, scannf = F, nf = 3)
ggplot(pca.pac$li, aes(Axis1, Axis2, color=pob$pob[1:192]))+
geom_point(size=4, alpha=0.5)+
xlab('Componente 1')+
ylab('Componente 2')+
scale_color_discrete(guide = guide_legend(title= 'Puntos'))+
geom_vline(xintercept=0, linetype='dotted')+
geom_hline(yintercept=0, linetype='dotted')

pca.pac$eig[1]/sum(pca.pac$eig)*100
## [1] 12.98261
pca.pac$eig[2]/sum(pca.pac$eig)*100
## [1] 7.993238
(pca.pac$eig[1]+pca.pac$eig[2])/sum(pca.pac$eig)*100
## [1] 20.97585
loadings <- pca.pac$c1
aload <- abs(loadings)
#sweep(aload, 2, colSums(aload), '/')[order(aload)]
aload[order(aload$CS1, decreasing = T)[1:5], ]
## CS1 CS2 CS3
## X251.t 0.2879322 0.0005785273 0.0929206
## X251.c 0.2879322 0.0005785273 0.0929206
## X470.t 0.2812348 0.0386049192 0.1459957
## X470.c 0.2812348 0.0386049192 0.1459957
## X623.g 0.2743272 0.0063027088 0.0897944
aload[order(aload$CS2, decreasing = T)[1:5], ]
## CS1 CS2 CS3
## X350.t 0.04229584 0.3509372 0.03502338
## X350.c 0.04229584 0.3509372 0.03502338
## X188.g 0.04622626 0.3500111 0.03439804
## X188.a 0.04622626 0.3500111 0.03439804
## X104.g 0.04747940 0.2859141 0.02404198
aload[order(aload$CS3, decreasing = T)[1:5], ]
## CS1 CS2 CS3
## X374.g 0.10282770 0.04410839 0.3197177
## X374.a 0.10282770 0.04410839 0.3197177
## X215.t 0.10503223 0.04559234 0.3113974
## X215.c 0.10503223 0.04559234 0.3113974
## X746.a 0.06442677 0.03503461 0.2469785
Arbol de peru
library(magrittr)
load('chi_dna.rda')
d.prov3 <- provesti.dist(DNAbin2genind(chi.dna))
arbol.per <- d.prov3 %>%
nj() %>%
ladderize()
load('strata.rda')
colores <- as.character(strata$pob)
colores[colores %in% c('ancash-lib')] <- '#1b9e77'
colores[colores %in% c('sechura')] <- '#d95f02'
colores[colores %in% c('vegueta')] <- '#7570b3'
colores[colores %in% c('ica')] <- '#e7298a'
colores[colores %in% c('tacna')] <- '#66a61e'
plot(arbol.per, tip.color=colores, cex=0.7)
legend(x='topright',
legend = c('Ancash-Lib', 'Sechura', 'Vegueta', 'Ica', 'Tacna'),
fill= c('#1b9e77', '#d95f02', '#7570b3', '#e7298a', '#66a61e', '#e6ab02'),
border=c('white', 'white', 'white', 'white', 'white'), cex=0.7)

Network de haplotipos solo Peru
library(pegas)
##
## Attaching package: 'pegas'
##
## The following object is masked from 'package:ade4':
##
## amova
##
## The following object is masked from 'package:ape':
##
## mst
per <- read.dna('completo.fas', format='f')
h <- pegas::haplotype(per)
h <- sort(h, what='labels')
net <- haploNet(h)
ind.hap<-with(stack(setNames(attr(h, "index"), rownames(h))), table(hap=ind, pop=pob[1:192, 'pob']))
plot(net, size = attr(net, "freq"), pie=ind.hap, fast = TRUE, legend=F)
legend('topleft', colnames(ind.hap), col=rainbow(ncol(ind.hap)), pch=20, cex=0.7)

Indices de diversidad nuc y haplo + tabla de frecuencias de haplotypos por pob
library('PopGenome'); library(parallel); library(SnowballC); library(haplotypes)
#makeCluster(4)
genome <- readData('peru/', parallized = F)
## | : | : | 100 %
## |====================================================
popul <- split(pob$nom, factor(pob$pob[1:192], levels=unique(pob$pob[1:192])))
#popul <- lapply(popul, function(x) gsub(' ', '_', x))
genome <- set.populations(genome, new.populations = popul)
## | : | : | 100 %
## |
## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors
## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors
## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors
## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors
## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors
## ====================================================
pac <- read.dna('completo.fas', format = 'f')
pob$seq <- seq_along(pob$pob[1:192])
nd <- sapply(split(pob$seq, factor(pob$pob, levels=unique(pob$pob))), function(x) nuc.div(pac[x, ]))
nd <- append(nd, nuc.div(pac))
paci <- haplotypes::read.fas('pacifico/pacifico.fasta')
h <- haplotypes::haplotype(paci)
pops <- pob$pob[1:192]
g <- haplotypes::grouping(h, pops)
sz<-apply(g$hapmat,2,sum)
# hdiv gives the total haplotype diversity #
hdiv<-(sum(sz)/(sum(sz)-1))*(1-sum((sz/sum(sz))^2))
hdiv
## [1] 0.7867038
genome@region.data@populations2[[1]][[1]] <- popul[[1]]
genome@region.data@populations2[[1]][[2]] <- popul[[2]]
genome@region.data@populations2[[1]][[3]] <- popul[[3]]
genome@region.data@populations2[[1]][[4]] <- popul[[4]]
genome@region.data@populations2[[1]][[5]] <- popul[[5]]
genome <- F_ST.stats(genome, mode='haplotype', only.haplotype.counts = T, new.populations = popul)
## | : | : | 100 %
## |
## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors
## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors
## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors
## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors
## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors
## ====================================================
genome <- diversity.stats(genome, new.populations = popul)
## | : | : | 100 %
## |
## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors
## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors
## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors
## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors
## Warning in Ops.factor(populations[[yy]], dim(bial)[1]): '>' not meaningful
## for factors
## ====================================================
h <- pegas::haplotype(pac)
net <- haploNet(h)
freq <- haploFreq(pac, factor(pob$pob[1:192], levels = unique(pob$pob)), haplo = h)
res <- data.frame(n.hap=c(apply(freq, 2, function(x) sum(x!=0)),nrow(freq)), nind=c(as.vector(table(factor(pob$pob, levels=unique(pob$pob)))),nrow(pob)), hap.div= c(as.vector(genome@hap.diversity.within), hdiv), nuc.div= as.vector(nd))
res
## n.hap nind hap.div nuc.div
## Ancash-lib 38 56 0.9610390 0.004866667
## Sechura 17 25 0.9433333 0.005688889
## Vegueta 20 28 0.9021164 0.005601411
## Ica 21 38 0.8961593 0.003087719
## Tacna 27 45 0.9424242 0.003776431
## 75 192 0.7867038 0.004461824
rownames(freq) <- attr(net, 'labels')
freq
## Ancash-lib Sechura Vegueta Ica Tacna
## I 2 0 0 1 1
## II 8 4 0 6 6
## III 9 5 8 10 10
## IV 1 0 0 0 0
## V 1 0 0 0 0
## VI 1 0 0 0 0
## VII 1 0 0 0 0
## VIII 1 1 0 0 1
## IX 2 0 1 0 0
## X 1 0 0 0 0
## XI 1 0 0 0 1
## XII 1 0 0 0 0
## XIII 1 2 0 1 1
## XIV 1 0 0 0 0
## XV 1 1 2 0 1
## XVI 1 0 0 0 0
## XVII 1 0 1 0 0
## XVIII 1 0 0 0 0
## XIX 1 0 0 0 0
## XX 1 0 1 1 1
## XXI 1 0 1 0 0
## XXII 2 1 1 0 0
## XXIII 1 0 0 0 0
## XXIV 1 0 0 0 0
## XXV 1 1 0 0 0
## XXVI 1 0 0 0 0
## XXVII 1 0 0 0 0
## XXVIII 1 0 0 0 0
## XXIX 1 0 0 0 0
## XXX 1 0 0 0 0
## XXXI 1 0 1 0 1
## XXXII 1 0 0 0 1
## XXXIII 1 1 0 0 0
## XXXIV 1 0 1 3 0
## XXXV 1 0 0 0 0
## XXXVI 1 1 0 0 1
## XXXVII 1 0 0 0 0
## XXXVIII 1 0 0 0 2
## XXXIX 0 1 0 2 1
## XL 0 1 0 0 0
## XLI 0 1 0 0 0
## XLII 0 1 0 0 0
## XLIII 0 1 0 0 0
## XLIV 0 1 0 0 0
## XLV 0 1 0 0 0
## XLVI 0 1 0 1 0
## XLVII 0 0 1 1 0
## XLVIII 0 0 1 0 0
## XLIX 0 0 1 0 0
## L 0 0 1 1 1
## LI 0 0 1 0 1
## LII 0 0 1 0 0
## LIII 0 0 1 1 0
## LIV 0 0 1 1 0
## LV 0 0 1 0 0
## LVI 0 0 1 0 0
## LVII 0 0 1 0 0
## LVIII 0 0 0 1 0
## LIX 0 0 0 1 0
## LX 0 0 0 1 0
## LXI 0 0 0 1 1
## LXII 0 0 0 1 0
## LXIII 0 0 0 1 1
## LXIV 0 0 0 1 0
## LXV 0 0 0 1 0
## LXVI 0 0 0 1 1
## LXVII 0 0 0 0 1
## LXVIII 0 0 0 0 1
## LXIX 0 0 0 0 3
## LXX 0 0 0 0 1
## LXXI 0 0 0 0 2
## LXXII 0 0 0 0 1
## LXXIII 0 0 0 0 1
## LXXIV 0 0 0 0 1
## LXXV 0 0 0 0 1
MST
d <- dist.dna(pac, model = 'N')
r <- mst(d)
#plot(r)
pac.coa <- dudi.coa(pac.genind, scannf = F, nf = 3)
pac.mst <- mstree(dist.dudi(pac.coa), 1)
s.label(pac.coa$li, clab=0, cpoi=2, neig = pac.mst, cneig = 1)

#xy = data.frame(x = runif(20), y = runif(20))
#par(mfrow = c(2,2))
#for (k in 1:4) {
# neig = mstree (provesti.dist(pac.genind), k)
# s.label(, xlim = c(0,1), ylim = c(0,1), addax = FALSE, neig = neig)
# }
#plot(ms.tree)
#provesti.dist(pac.genind)
#data(HorseFluRaw)
#attach(HorseFluRaw)
#x <- new("obkData", individuals=individuals, dna=dna, records=clinics)
#plotggMST(x,individualID=42)
#plot huge minimum spanning tree
#plotggMST(x)
#detach(HorseFluRaw)
FST
library(hierfstat)
## Loading required package: gtools
##
## Attaching package: 'gtools'
##
## The following object is masked from 'package:adegenet':
##
## chr
##
##
## Attaching package: 'hierfstat'
##
## The following object is masked from 'package:poppr':
##
## nei.dist
##
## The following object is masked from 'package:adegenet':
##
## read.fstat
##
## The following objects are masked from 'package:ape':
##
## pcoa, varcomp
tab.pac <- as.data.frame(pac.genind@tab)
tab.pac$pop <- as.character(pop(pac.genind))
mat.fst <- pp.fst(tab.pac[, c(69, 1:68)])
mat.fst
## [,1] [,2]
## [1,] NaN 0.03835539
## [2,] NaN NaN
unique(pob$pob)
## [1] "Ancash-lib" "Sechura" "Vegueta" "Ica" "Tacna"
library(StAMPP)
geno <- as.data.frame(tab(pac.genind, freq=T))
geno$id <- row.names(geno)
geno$pop <- as.character(pop(pac.genind))
geno$ploidy <- 1
geno$format <- 'freq'
geno <- geno[, c(79, 80, 81, 82, 1:78)]
#stamppConvert(geno, 'r')
boot.fst <- stamppFst(stamppConvert(geno, 'r'), nboots = 1000)
boot.fst$Fsts
## Ancash-lib Sechura Vegueta Ica Tacna
## Ancash-lib NA NA NA NA NA
## Sechura -0.05015092 NA NA NA NA
## Vegueta -0.03844282 -0.06879505 NA NA NA
## Ica -0.01106672 -0.02599804 -0.007145136 NA NA
## Tacna -0.02572480 -0.04252345 -0.029897205 -0.02431715 NA
boot.fst$Pvalues
## Ancash-lib Sechura Vegueta Ica Tacna
## Ancash-lib NA NA NA NA NA
## Sechura 1.000 NA NA NA NA
## Vegueta 1.000 1 NA NA NA
## Ica 0.991 1 0.851 NA NA
## Tacna 1.000 1 1.000 1 NA