Lovell — Jun 18, 2014, 2:58 PM
#Rarity final molecular and quantitative genetic script
######################################
#Outline:
#1:Read in the data
#2:Calculate genetic diversity and structure indices
#3:Tabulate so that each locus is a column, each pop*index combo is a row
#4:Run Principal component analyses on each index independently
#5:Test differentiation among species (or range size types) using ___
#6:Repeat for quantitative genetic data
#7:Plot data
######################################
#1: Read in all data and packages
rm(list=ls())
setwd("~/Desktop/R_directory")
library(gstudio)
Loading required package: ggplot2
Loading required package: raster
Loading required package: sp
library(plyr)
library(reshape)
Attaching package: 'reshape'
The following objects are masked from 'package:plyr':
rename, round_any
library(reshape2)
Attaching package: 'reshape2'
The following objects are masked from 'package:reshape':
colsplit, melt, recast
library(nlme)
Attaching package: 'nlme'
The following object is masked from 'package:raster':
getData
library(lme4)
Loading required package: Matrix
Attaching package: 'Matrix'
The following object is masked from 'package:reshape':
expand
Loading required package: Rcpp
Attaching package: 'lme4'
The following object is masked from 'package:nlme':
lmList
library(MASS)
Attaching package: 'MASS'
The following objects are masked from 'package:raster':
area, select
library(adegenet)
Loading required package: ade4
==========================
adegenet 1.4-2 is loaded
==========================
- to start, type '?adegenet'
- to browse adegenet website, type 'adegenetWeb()'
- to post questions/comments: adegenet-forum@lists.r-forge.r-project.org
Attaching package: 'adegenet'
The following objects are masked from 'package:gstudio':
alleles, ploidy
library(hierfstat)
Loading required package: gtools
Attaching package: 'hierfstat'
The following object is masked from 'package:adegenet':
read.fstat
library(fossil)
Loading required package: maps
Loading required package: shapefiles
Loading required package: foreign
Attaching package: 'shapefiles'
The following objects are masked from 'package:foreign':
read.dbf, write.dbf
###################
#gs is population genetic data (15 SSRs)
gs<-read_population(
"rl2013_allgenotypedata_pegas_noapo_nosmallpops_reanalysis_sppop.csv",
type="separated",sep=",",locus.columns=4:18,header=T)
head(gs)
species pop unique a1 bf11 bf18 b6 a3
1 crandallii 1 crandallii_1 239:239 81:81 118:118 299:299 999:999
2 crandallii 1 crandallii_1 239:239 81:81 118:118 299:299 999:999
3 crandallii 1 crandallii_1 239:239 81:81 118:118 299:299 999:999
4 crandallii 1 crandallii_1 239:239 81:81 118:118 299:299 999:999
5 crandallii 1 crandallii_1 239:239 81:81 118:118 299:299 999:999
6 crandallii 1 crandallii_1 239:239 81:81 118:118 299:299 999:999
bf20 c8 bf9 bf19 bdru266 ice3 ice14 e9 bf3
1 214:214 233:233 84:84 999:999 999:999 85:85 215:215 194:194 100:100
2 214:214 233:233 85:85 999:999 999:999 85:85 215:215 194:194 100:100
3 214:214 233:233 85:85 999:999 999:999 85:85 215:215 194:194 100:100
4 214:214 233:233 85:85 999:999 999:999 85:85 215:215 194:194 100:100
5 214:214 233:233 85:85 999:999 999:999 85:85 215:215 194:194 100:100
6 214:214 233:233 85:85 999:999 999:999 81:81 215:215 194:194 100:100
bf15
1 104:104
2 104:104
3 104:104
4 104:104
5 104:104
6 104:104
#phes is quantitative genetic data. scaled and centered. 8 traits
phes<-read.csv("rl2013_allphenotypes_reanalysis_scalecenter.csv",header=T)
head(phes)
species pop family unique_pop unique area_spring
1 Crandallii 1 12 Crandallii_1 Crandallii_1_12 -0.3590
2 Crandallii 1 12 Crandallii_1 Crandallii_1_12 0.5693
3 Crandallii 1 12 Crandallii_1 Crandallii_1_12 0.6472
4 Crandallii 1 12 Crandallii_1 Crandallii_1_12 0.6450
5 Crandallii 1 16 Crandallii_1 Crandallii_1_16 -2.4179
6 Crandallii 1 16 Crandallii_1 Crandallii_1_16 -1.1280
area_winter ht_spring ht_winter leaf.size lvs_spring lvs_winter
1 0.08905 -0.03756 -0.5445 -0.5496 0.2209 0.8664
2 0.26551 -0.53657 -0.1154 0.3866 0.2209 0.8664
3 0.54796 -0.53657 -0.1154 0.4651 0.2209 0.8664
4 0.39552 -0.03756 -0.1154 0.4629 0.2209 0.8664
5 -2.44404 -2.03359 -1.4028 -1.9100 -1.8332 -3.1624
6 -0.97719 -1.53458 -0.5445 -0.9492 -0.4638 -0.4765
node.dist
1 -0.9949
2 -0.6137
3 -0.6137
4 -0.6137
5 0.9112
6 -0.3087
######################################
######################################
########### Molecular genetic analysis
######################################
######################################
#2 Prepare SSR data from popgen diversity calculations
species<-unique(gs$species)
allsep_gen<-partition(gs,stratum="unique")
hist(unlist(lapply(allsep_gen, function(x) length(x[,1])))) #look at distn of sample sizes
sppop<-lapply(allsep_gen,function(x) return(x[1,1:2]))
sppop<-data.frame(matrix(unlist(sppop), nrow=length(sppop), byrow=T))
colnames(sppop)<-c("species","pop")
######################################
#3 Calculate genetic diversity
div_out<-list()
index_out<-list()
gendiv_indices<-c("He", "A95","Pe") #add index here. Fis doesn't work here.
for (i in gendiv_indices){
cat("\n",paste("------",i,"------","\n",sep=""))
for (j in 4:18){
temp<-lapply(allsep_gen, function(x) mean(rarefaction(x[,j],mode=i,size=5, nperm=99)))
temp<-data.frame(matrix(unlist(temp), nrow=length(temp)))
cat(colnames(gs)[j],"...")
div_out[[colnames(gs)[j]]]<-temp
}
div_df<-data.frame(matrix(unlist(div_out), nrow=43)) #process the output for each index
colnames(div_df)<-colnames(gs)[4:18]
rownames(div_df)<-names(allsep_gen)
div_df<-cbind(sppop,div_df)
is.na(div_df) <- sapply(div_df, is.infinite) #set infinite values to NA
index_out[[i]]<-div_df
}
------He------
a1 ...bf11 ...bf18 ...b6 ...a3 ...bf20 ...c8 ...bf9 ...bf19 ...bdru266 ...ice3 ...ice14 ...e9 ...bf3 ...bf15 ...
------A95------
a1 ...bf11 ...bf18 ...b6 ...a3 ...bf20 ...c8 ...bf9 ...bf19 ...bdru266 ...ice3 ...ice14 ...e9 ...bf3 ...bf15 ...
------Pe------
a1 ...bf11 ...bf18 ...b6 ...a3 ...bf20 ...c8 ...bf9 ...bf19 ...bdru266 ...ice3 ...ice14 ...e9 ...bf3 ...bf15 ...
######################################
#4 Calculate pairwise population structure among all populations
allgen<-read.csv("rl2013_allgenotypedata_pegas_noapo_nosmallpops_reanalysis.csv", header=T)
outall<-data.frame(species=character(),pop=character(),meanfst=numeric())
for (i in unique(allgen$species))
{
species_in<-allgen[allgen$species==i,]
popnames<-sppop[sppop$species==i,]
all_gi<-df2genind(as.matrix(species_in[,-c(1:4)]),sep="/",pop=matrix(species_in$pop))
fstats<-pairwise.fst(all_gi)
df_fst <- melt(as.matrix(fstats), varnames = c("row", "col"))
df_fst <- df_fst[df_fst$row!=df_fst$col,]
print(i)
pops<-data.frame(meanfst=numeric())
for (k in 1:length(unique(species_in$pop)))
{
meanfst<-c(unique(species_in$pop)[k],mean(df_fst[df_fst$row==k|df_fst$col==k,][,3]))
pops[k,]<-mean(df_fst[df_fst$row==k|df_fst$col==k,][,3])
}
outeachspecies<-cbind(popnames,pops)
outall<-rbind(outall,outeachspecies)
}
[1] "crandallii"
[1] "fernaldiana"
[1] "spatifolia"
[1] "stricta"
ggplot(outall,aes(y=meanfst,x=species))+
geom_point()
######################################
#5 Process estimates of diversity and FST so that there is a data frame
# containing 1 column/locus, stacked by statistic
# make the diversity estimates 1st
alldiv_df <- ldply(index_out, data.frame)
colnames(alldiv_df)[1]<-"index"
#average diversity across loci
all_out<-melt(alldiv_df, id.vars = c("index","species","pop"))
gendiv_means <- dcast(all_out, species + pop ~ index,function(x) mean(na.omit(x)))
#add FST
aphefst<-merge(gendiv_means,outall,by=c("species","pop"))
######################################
#6 Plot Population genetic diversity for each statistic averaged across loci
head(aphefst)
species pop A95 He Pe meanfst
1 crandallii 1 1.079 0.03017 0.03071 0.5648
2 crandallii 2 1.201 0.06945 0.07213 0.3370
3 crandallii 3 1.198 0.06112 0.05752 0.3869
4 crandallii 4 1.168 0.06681 0.06741 0.3135
5 crandallii 5 1.095 0.02835 0.02785 0.4074
6 crandallii 6 1.473 0.15849 0.15747 0.2414
# add spatial data
ll<- read.table("rl2013_popsummaryfinal.csv", sep= ",", header=T)
names(ll)[1:2]<-c("species","pop")
head(ll)
species pop unique lat long popnum
1 Crandallii 1 Crandallii_1 39.08 -106.3 1
2 Crandallii 2 Crandallii_2 38.51 -106.2 2
3 Crandallii 3 Crandallii_3 38.49 -106.0 3
4 Crandallii 4 Crandallii_4 38.66 -106.8 4
5 Crandallii 5 Crandallii_5 38.44 -106.9 5
6 Crandallii 6 Crandallii_6 38.49 -107.0 6
ll$species<-tolower(ll$species)
gen_spatial<-merge(ll,aphefst,by=c("species","pop"))
gen_spatial<-gen_spatial[,c(1:2,4:5,7:10)]
gen_spatial<-melt(gen_spatial,id.vars=c("species","pop","stat"))
Error: id variables not found in data: stat
gen_spatial<-melt(gen_spatial,id.vars=c("species","pop","lat","long"))
stats<-unique(gen_spatial$variable)
#plot by spatial distribution
for (i in stats){
print(ggplot(gen_spatial[gen_spatial$variable==i,], aes(x=long,y=lat))+
geom_point(aes(col=species,size=value))+
theme_bw()+
ggtitle(paste("map of distribution of",i, sep=" "))
)
}
#boxplots by genetic statistic
ggplot(gen_spatial,aes(x=species,y=value))+
geom_point(aes(col=species))+
geom_boxplot(aes(col=species))+
theme_bw()+
facet_grid(variable~., scales="free_y")
######################################
######################################
######## Quantitative genetic analysis
######################################
######################################
#7 Filter phenotype data
phes<-read.csv("rl2013_allphenotypes_reanalysis_scalecenter.csv",header=T)
head(phes)
species pop family unique_pop unique area_spring
1 Crandallii 1 12 Crandallii_1 Crandallii_1_12 -0.3590
2 Crandallii 1 12 Crandallii_1 Crandallii_1_12 0.5693
3 Crandallii 1 12 Crandallii_1 Crandallii_1_12 0.6472
4 Crandallii 1 12 Crandallii_1 Crandallii_1_12 0.6450
5 Crandallii 1 16 Crandallii_1 Crandallii_1_16 -2.4179
6 Crandallii 1 16 Crandallii_1 Crandallii_1_16 -1.1280
area_winter ht_spring ht_winter leaf.size lvs_spring lvs_winter
1 0.08905 -0.03756 -0.5445 -0.5496 0.2209 0.8664
2 0.26551 -0.53657 -0.1154 0.3866 0.2209 0.8664
3 0.54796 -0.53657 -0.1154 0.4651 0.2209 0.8664
4 0.39552 -0.03756 -0.1154 0.4629 0.2209 0.8664
5 -2.44404 -2.03359 -1.4028 -1.9100 -1.8332 -3.1624
6 -0.97719 -1.53458 -0.5445 -0.9492 -0.4638 -0.4765
node.dist
1 -0.9949
2 -0.6137
3 -0.6137
4 -0.6137
5 0.9112
6 -0.3087
levels(phes$unique_pop)
[1] "Crandallii_1" "Crandallii_2"
[3] "Crandallii_3" "Crandallii_4"
[5] "Crandallii_5" "Crandallii_6"
[7] "Crandallii_7" "Fernaldiana_1"
[9] "Fernaldiana_2" "Fernaldiana_3"
[11] "Fernaldiana_4" "Fernaldiana_5"
[13] "Spatifolia_103" "Spatifolia_105"
[15] "Spatifolia_377" "Spatifolia_Alvarado"
[17] "Spatifolia_Antero" "Spatifolia_Barr"
[19] "Spatifolia_Brookvale" "Spatifolia_Central"
[21] "Spatifolia_Chicago" "Spatifolia_Chiquito"
[23] "Spatifolia_Cotopaxi" "Spatifolia_Cripple"
[25] "Spatifolia_Crosier" "Spatifolia_Crystal"
[27] "Spatifolia_Delnorte" "Spatifolia_Gardner"
[29] "Spatifolia_Goose" "Spatifolia_Green"
[31] "Spatifolia_Hondo2" "Spatifolia_Pinegrove"
[33] "Spatifolia_Poso" "Spatifolia_Prince"
[35] "Spatifolia_Rosita" "Spatifolia_Round"
[37] "Spatifolia_Royal" "Spatifolia_Salida"
[39] "Spatifolia_Sanluis2" "Spatifolia_Tiesiding"
[41] "Spatifolia_Virginiadale" "Stricta_1"
[43] "Stricta_10" "Stricta_3"
[45] "Stricta_4" "Stricta_5"
[47] "Stricta_8" "Stricta_a4"
[49] "Stricta_f10" "Stricta_F8"
[51] "Stricta_g1" "Stricta_g2"
[53] "Stricta_g3" "Stricta_g5"
[55] "Stricta_o1" "Stricta_o5"
[57] "Stricta_wil0" "Stricta_wil3"
#filter dataset by any families with 1 or 2 reps
n_reps<-data.frame(tapply(phes$unique,phes$unique,function(x) length((x))))
colnames(n_reps)<-"n_reps"
phes_filtered<-phes[phes$unique %in% rownames(n_reps[n_reps$n_reps>=3,]),]
cat("removing families with 1 or 2 reps: ","\n",rownames(n_reps[n_reps$n_reps<3,]))
removing families with 1 or 2 reps:
Crandallii_1_19 Crandallii_5_2 Crandallii_5_3 Crandallii_5_8 Crandallii_7_14 Fernaldiana_1_3 Fernaldiana_1_5 Fernaldiana_1_9 Fernaldiana_2_14 Fernaldiana_2_5 Fernaldiana_3_13 Fernaldiana_3_2 Fernaldiana_3_6 Fernaldiana_4_11 Fernaldiana_5_11 Fernaldiana_5_2 Spatifolia_Brookvale_2 Spatifolia_Central_3 Spatifolia_Central_7 Spatifolia_Cotopaxi_1 Spatifolia_Cotopaxi_3 Spatifolia_Cotopaxi_5 Spatifolia_Cotopaxi_7 Spatifolia_Crosier_1 Spatifolia_Crystal_10 Spatifolia_Rosita_3 Spatifolia_Rosita_9 Spatifolia_Royal_2 Spatifolia_Salida_5 Spatifolia_Virginiadale_2 Spatifolia_Virginiadale_3 Stricta_3_20 Stricta_a4_14 Stricta_a4_16 Stricta_F8_11 Stricta_F8_7 Stricta_o5_5
#filter by number of plants and fams w/in pops
n_fams<-data.frame(tapply(phes_filtered$family,phes_filtered$unique_pop,function(x) length(unique(x))))
n_plants<-data.frame(tapply(phes_filtered$family,phes_filtered$unique_pop,function(x) length((x))))
tofilter_pop<-cbind(n_fams,n_plants)
colnames(tofilter_pop)<-c("n_fams","n_plants")
par(mfrow=c(2,1))
hist(tofilter_pop$n_fams,breaks=10)
hist(tofilter_pop$n_plants,breaks=20)
phes_filtered<-phes[phes$unique_pop %in% rownames(tofilter_pop[tofilter_pop$n_fams>=4,]),]
cat("removing pops w/ less than 4 families: ","\n",rownames(tofilter_pop[tofilter_pop$n_fams<4,]))
removing pops w/ less than 4 families:
Spatifolia_Cotopaxi Spatifolia_Rosita Spatifolia_Royal Spatifolia_Virginiadale Stricta_a4 Stricta_f10 Stricta_F8
phes_filtered<-phes_filtered[complete.cases(phes_filtered),]
phe_list_filtered<-split(phes_filtered,phes_filtered$unique_pop)
######################################
#8 calculate total and among family variance w/in pops
count=1
outlist=list()
for (i in 1:47){
dat<-as.data.frame(phe_list_filtered[i])
if(length(dat[,1])>2){
for (j in 6:13){
model<-lmer(dat[,j]~1+(1|dat[,3]),data=dat)
outlist[[count]]<-data.frame(strsplit(names(phe_list_filtered)[i],"_")[[1]][1],
names(phe_list_filtered[i]),names(phes[j]),
as.numeric(VarCorr(model)), attr(VarCorr(model), "sc")^2 )
count<-count+1
}
}
cat(names(phe_list_filtered[i]),"...",sep="")
}
Crandallii_1...Crandallii_2...Crandallii_3...Crandallii_4...Crandallii_5...Crandallii_6...Crandallii_7...Fernaldiana_1...Fernaldiana_2...Fernaldiana_3...Fernaldiana_4...Fernaldiana_5...Spatifolia_103...Spatifolia_105...Spatifolia_377...Spatifolia_Alvarado...Spatifolia_Antero...Spatifolia_Barr...Spatifolia_Brookvale...Spatifolia_Central...Spatifolia_Chicago...Spatifolia_Chiquito...Spatifolia_Cotopaxi...Spatifolia_Cripple...Spatifolia_Crosier...Spatifolia_Crystal...Spatifolia_Delnorte...Spatifolia_Gardner...Spatifolia_Goose...Spatifolia_Green...Spatifolia_Hondo2...Spatifolia_Pinegrove...Spatifolia_Poso...Spatifolia_Prince...Spatifolia_Rosita...Spatifolia_Round...Spatifolia_Royal...Spatifolia_Salida...Spatifolia_Sanluis2...Spatifolia_Tiesiding...Spatifolia_Virginiadale...Stricta_1...Stricta_10...Stricta_3...Stricta_4...Stricta_5...Stricta_8...
allwithinpopvc <- ldply(outlist, data.frame)
colnames(allwithinpopvc)<-c("species","pop","phenotype","amongfam_var","error_var")
allwithinpopvc$total_var<-allwithinpopvc$amongfam_var+allwithinpopvc$error_var
allwithinpopvc$prop_amongfams<-allwithinpopvc$amongfam_var/allwithinpopvc$total_var
allwithinpop_totalvar<-dcast(allwithinpopvc, species+pop~phenotype,value.var="total_var")
head(allwithinpop_totalvar)
species pop area_spring area_winter ht_spring ht_winter
1 Crandallii Crandallii_1 0.8272 0.8643 0.9541 0.5654
2 Crandallii Crandallii_2 1.2973 0.3964 0.5482 0.5845
3 Crandallii Crandallii_3 0.6425 0.7776 0.2298 0.3371
4 Crandallii Crandallii_4 0.6992 0.9139 0.7450 0.9592
5 Crandallii Crandallii_5 2.6934 3.6997 8.5011 5.9954
6 Crandallii Crandallii_6 0.9858 1.1012 1.1705 1.0203
leaf.size lvs_spring lvs_winter node.dist
1 0.6288 0.3590 1.1049 0.5434
2 1.3843 2.5847 0.5074 0.8160
3 0.7385 0.1172 0.3992 0.3751
4 0.6604 0.1350 0.6665 0.9004
5 3.6743 0.2772 1.5959 3.3326
6 0.9330 0.3838 1.3268 0.9451
allwithinpop_propfam<-dcast(allwithinpopvc, species+pop~phenotype,value.var="prop_amongfams")
head(allwithinpop_propfam)
species pop area_spring area_winter ht_spring ht_winter
1 Crandallii Crandallii_1 0.21923 0.35975 0.5366 0.15072
2 Crandallii Crandallii_2 0.04272 0.07114 0.5248 0.20672
3 Crandallii Crandallii_3 0.42829 0.24807 0.2087 0.07202
4 Crandallii Crandallii_4 0.18660 0.00000 0.1086 0.05333
5 Crandallii Crandallii_5 0.85892 0.89344 0.9316 0.91566
6 Crandallii Crandallii_6 0.39571 0.45344 0.4026 0.37999
leaf.size lvs_spring lvs_winter node.dist
1 0.08913 0.48854 0.60285 0.000e+00
2 0.00000 0.04650 0.02804 2.140e-02
3 0.29510 0.03031 0.00000 5.137e-02
4 0.23041 0.00000 0.00000 2.500e-15
5 0.87518 0.37870 0.16801 5.178e-01
6 0.32297 0.23914 0.30048 0.000e+00
######################################
#9 calculate pairwise variance among pops
out<-list()
count=1
species<-unique(phes_filtered$species)
for (i in 1:4){
onespecies<-phes_filtered[phes_filtered$species==species[i],]
pops<-unique(onespecies$unique_pop)
pwisecombs<-combn(pops,2)
cat("\n",as.character(species[i]),": n.combinations = ",length(pwisecombs[1,]),"\n")
for (j in 1:length(pwisecombs[1,])){
twopops<-phes_filtered[phes_filtered$unique_pop==pwisecombs[1,j]|phes_filtered$unique_pop==pwisecombs[2,j],]
pwisedist<-earth.dist(ll[ll$unique==as.character(pwisecombs[1,j]) | ll$unique==as.character(pwisecombs[2,j]) ,][,c(5,4)])
cat(j,",",sep="")
for(k in 6:13){
fm1 <- lmer(twopops[,k]~1+(1|unique_pop)+(1|unique),data=twopops)
out[[count]]<-data.frame(species[i],pwisecombs[1,j],pwisecombs[2,j],as.numeric(pwisedist),colnames(twopops)[k],
as.numeric(VarCorr(fm1))[1],as.numeric(VarCorr(fm1))[2], attr(VarCorr(fm1), "sc")^2)
count=count+1
}
}
}
Crandallii : n.combinations = 21
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,
Fernaldiana : n.combinations = 10
1,2,3,4,5,6,7,8,9,10,
Spatifolia : n.combinations = 300
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,
Warning: Model failed to converge: degenerate Hessian with 1 negative
eigenvalues
254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,
Stricta : n.combinations = 91
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,
allpairwisevc <- ldply(out, data.frame)
names(allpairwisevc)<-c("species","pop1","pop2","geo_distance","phenotype","amongfam_var","btwpop_var","error_var")
allpairwisevc$total_var<-allpairwisevc$amongfam_var+allpairwisevc$error_var
allpairwisevc$prop_amongfams<-allpairwisevc$amongfam_var/allpairwisevc$total_var
allpairwisevc$qst<-allpairwisevc$btwpop_var/(allpairwisevc$btwpop_var+allpairwisevc$amongfam_var)
pairwiseqst<-dcast(allpairwisevc, species+pop1+pop2+geo_distance~phenotype,value.var="qst")
pairwiseqst_filtered<-pairwiseqst[pairwiseqst$geo_distance<133,]
summary(pairwiseqst_filtered)
species pop1 pop2
Crandallii : 21 Spatifolia_377 : 14 Spatifolia_Salida : 14
Fernaldiana: 10 Spatifolia_Alvarado: 14 Spatifolia_Round : 13
Spatifolia :142 Spatifolia_Antero : 12 Spatifolia_Pinegrove: 12
Stricta : 18 Spatifolia_103 : 11 Spatifolia_Prince : 12
Spatifolia_Barr : 11 Spatifolia_Goose : 10
Spatifolia_105 : 8 Spatifolia_Green : 9
(Other) :121 (Other) :121
geo_distance area_spring area_winter ht_spring
Min. : 1.11 Min. :0.000 Min. :0.000 Min. :0.000
1st Qu.: 43.36 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
Median : 75.15 Median :0.155 Median :0.239 Median :0.230
Mean : 74.08 Mean :0.313 Mean :0.347 Mean :0.335
3rd Qu.:108.17 3rd Qu.:0.546 3rd Qu.:0.681 3rd Qu.:0.645
Max. :132.28 Max. :1.000 Max. :1.000 Max. :1.000
NA's :6 NA's :2 NA's :3
ht_winter leaf.size lvs_spring lvs_winter
Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
Median :0.303 Median :0.218 Median :0.077 Median :0.237
Mean :0.366 Mean :0.366 Mean :0.270 Mean :0.333
3rd Qu.:0.664 3rd Qu.:0.681 3rd Qu.:0.441 3rd Qu.:0.593
Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
NA's :3 NA's :1 NA's :8 NA's :11
node.dist
Min. :0.000
1st Qu.:0.000
Median :0.424
Mean :0.438
3rd Qu.:0.832
Max. :1.000
NA's :8
count<-1
out<-list()
for (i in 1:length(unique(phes_filtered$unique_pop))){
pop<-pairwiseqst_filtered[pairwiseqst_filtered$pop1==unique(phes_filtered$unique_pop)[i]|pairwiseqst_filtered$pop2==unique(phes_filtered$unique_pop)[i],]
info<-data.frame(pop$species[1],as.character(unique(phes_filtered$unique_pop)[i]),mean(pop$geo_distance))
meanqst<-data.frame(apply(pop[,5:12],2,function(x) mean(na.omit(x))))
colnames(meanqst)<-i
out[[count]]<-data.frame(info,t(meanqst))
count<-count+1
}
allmeanqst<- ldply(out, data.frame)
colnames(allmeanqst)[1:3]<-c("species","pop","mean_geodist")
meanqst_filtered<-allmeanqst[allmeanqst$pop %in% allwithinpop_propfam$pop,]
meanqst_filtered<-meanqst_filtered[complete.cases(meanqst_filtered),]
head(meanqst_filtered)
species pop mean_geodist area_spring area_winter ht_spring
1 Crandallii Crandallii_1 84.70 0.11120 0.16258 4.510e-02
2 Crandallii Crandallii_2 65.78 0.10457 0.32762 9.043e-16
3 Crandallii Crandallii_3 77.45 0.05130 0.17851 5.938e-03
4 Crandallii Crandallii_4 52.69 0.16004 0.64928 2.456e-02
5 Crandallii Crandallii_5 54.26 0.06376 0.09023 1.515e-02
6 Crandallii Crandallii_6 55.61 0.30774 0.30902 5.513e-03
ht_winter leaf.size lvs_spring lvs_winter node.dist
1 0.24171 0.27065 0.1280 0.1910 0.8333
2 0.40208 0.23616 0.1631 0.5427 0.7442
3 0.19398 0.05997 0.3354 0.5201 0.4286
4 0.34788 0.05351 0.6048 0.7268 0.4358
5 0.05757 0.09183 0.2674 0.1523 0.3872
6 0.22106 0.13743 0.5816 0.4962 0.8295
######################################
# 10 Prepare allquantgen estimates
#stats: qst (pairwiseqst_filtered), among fam var (allwithinpop_propfam), total w/in pop var (allwithinpop_totalvar)
meanqst_filtered<-meanqst_filtered[,-3]
meanqst_filtered$stat<-"qst"
allwithinpop_propfam$stat<-"prop_amongfams"
allwithinpop_totalvar$stat<-"totalvar_withinpops"
par(mfrow=c(1,1))
pairs(meanqst_filtered[,3:10])
pairs(allwithinpop_propfam[,3:10])
pairs(allwithinpop_totalvar[,3:10]) #remove extreme outlier
allwithinpop_totalvar<-allwithinpop_totalvar[!allwithinpop_totalvar$area_spring==max(allwithinpop_totalvar$area_spring),]
pairs(allwithinpop_totalvar[,3:10])
pca1<-prcomp(meanqst_filtered[,3:10])
(pca1$sdev)^2 / sum(pca1$sdev^2) #%var explained
[1] 0.511854 0.201358 0.099012 0.077710 0.056821 0.026033 0.018331 0.008879
cumsum((pca1$sdev)^2) / sum(pca1$sdev^2) #cumulative %var explained
[1] 0.5119 0.7132 0.8122 0.8899 0.9468 0.9728 0.9911 1.0000
meanqst_filtered<-cbind(meanqst_filtered,pca1$x[,1:2])
pca1<-prcomp(allwithinpop_propfam[,3:10])
(pca1$sdev)^2 / sum(pca1$sdev^2) #%var explained
[1] 0.509787 0.207130 0.129482 0.067552 0.035709 0.023099 0.019862 0.007378
cumsum((pca1$sdev)^2) / sum(pca1$sdev^2) #cumulative %var explained
[1] 0.5098 0.7169 0.8464 0.9140 0.9497 0.9728 0.9926 1.0000
allwithinpop_propfam<-cbind(allwithinpop_propfam,pca1$x[,1:2])
pca1<-prcomp(allwithinpop_totalvar[,3:10])
(pca1$sdev)^2 / sum(pca1$sdev^2) #%var explained
[1] 0.418480 0.266689 0.137908 0.096813 0.040565 0.022673 0.011377 0.005495
cumsum((pca1$sdev)^2) / sum(pca1$sdev^2) #cumulative %var explained
[1] 0.4185 0.6852 0.8231 0.9199 0.9605 0.9831 0.9945 1.0000
allwithinpop_totalvar<-cbind(allwithinpop_totalvar,pca1$x[,1:2])
allqgenstats<-rbind(allwithinpop_propfam,allwithinpop_totalvar,meanqst_filtered)
######################################
#11 plot values for each phenotype
plot(allqgenstats$PC1,allqgenstats$PC2)
PC1 <- ddply(allqgenstats, c("species","stat"), summarise,
meanPC1 = mean(PC1),
sePC1 = sd(PC1) / sqrt(length(PC1)) )
PC2 <- ddply(allqgenstats, c("species","stat"), summarise,
meanPC2 = mean(PC2),
sePC2 = sd(PC2) / sqrt(length(PC2)) )
qgen_toplot<-as.data.frame(cbind(PC1,PC2[,3:4]))
ggplot(qgen_toplot,aes(x = meanPC1,y = meanPC2)) +
geom_point(aes(col=species)) +
geom_errorbar(aes(ymin = meanPC2-sePC2,ymax = meanPC2+sePC2,col=species)) +
geom_errorbarh(aes(xmin = meanPC1-sePC1,xmax = meanPC1+sePC1,col=species))+
facet_grid(stat~., scales="free_y")+
theme_bw()
allqgen_long<-melt(allqgenstats,id.vars=c("species","pop","stat"))
allqgen_toplot <- ddply(allqgen_long, c("species","stat","variable"), summarise,
meanValue = mean(value),
seValue = sd(value) / sqrt(length(value)) )
stats<-unique(allqgen_long$stat)
for (i in stats){
print(ggplot(allqgen_long[allqgen_long$stat==i,], aes(x=species,y=value))+
geom_boxplot(aes(col=species),ylim=c(0,1))+
geom_point(aes(col=species),ylim=c(0,1))+
facet_wrap(~variable, scales="free_y")+
theme_bw()+
ggtitle(paste("phenotype specific variation in",i, sep=" "))
)
}