rl2014_allqgen_ssr_analyses.R

Lovell — Jun 18, 2014, 2:58 PM

#Rarity final molecular and quantitative genetic script
######################################
#Outline:
#1:Read in the data
#2:Calculate genetic diversity and structure indices
#3:Tabulate so that each locus is a column, each pop*index combo is a row
#4:Run Principal component analyses on each index independently
#5:Test differentiation among species (or range size types) using ___
#6:Repeat for quantitative genetic data
#7:Plot data
######################################
#1: Read in all data and packages
rm(list=ls())
setwd("~/Desktop/R_directory")
library(gstudio)
Loading required package: ggplot2
Loading required package: raster
Loading required package: sp
library(plyr)
library(reshape)

Attaching package: 'reshape'

The following objects are masked from 'package:plyr':

    rename, round_any
library(reshape2)

Attaching package: 'reshape2'

The following objects are masked from 'package:reshape':

    colsplit, melt, recast
library(nlme)

Attaching package: 'nlme'

The following object is masked from 'package:raster':

    getData
library(lme4)
Loading required package: Matrix

Attaching package: 'Matrix'

The following object is masked from 'package:reshape':

    expand

Loading required package: Rcpp

Attaching package: 'lme4'

The following object is masked from 'package:nlme':

    lmList
library(MASS)

Attaching package: 'MASS'

The following objects are masked from 'package:raster':

    area, select
library(adegenet)
Loading required package: ade4
   ==========================
    adegenet 1.4-2 is loaded
   ==========================

 - to start, type '?adegenet'
 - to browse adegenet website, type 'adegenetWeb()'
 - to post questions/comments: adegenet-forum@lists.r-forge.r-project.org



Attaching package: 'adegenet'

The following objects are masked from 'package:gstudio':

    alleles, ploidy
library(hierfstat)
Loading required package: gtools

Attaching package: 'hierfstat'

The following object is masked from 'package:adegenet':

    read.fstat
library(fossil)
Loading required package: maps
Loading required package: shapefiles
Loading required package: foreign

Attaching package: 'shapefiles'

The following objects are masked from 'package:foreign':

    read.dbf, write.dbf
###################
#gs is population genetic data (15 SSRs)
gs<-read_population(
  "rl2013_allgenotypedata_pegas_noapo_nosmallpops_reanalysis_sppop.csv",
  type="separated",sep=",",locus.columns=4:18,header=T)
head(gs)
     species pop       unique      a1  bf11    bf18      b6      a3
1 crandallii   1 crandallii_1 239:239 81:81 118:118 299:299 999:999
2 crandallii   1 crandallii_1 239:239 81:81 118:118 299:299 999:999
3 crandallii   1 crandallii_1 239:239 81:81 118:118 299:299 999:999
4 crandallii   1 crandallii_1 239:239 81:81 118:118 299:299 999:999
5 crandallii   1 crandallii_1 239:239 81:81 118:118 299:299 999:999
6 crandallii   1 crandallii_1 239:239 81:81 118:118 299:299 999:999
     bf20      c8   bf9    bf19 bdru266  ice3   ice14      e9     bf3
1 214:214 233:233 84:84 999:999 999:999 85:85 215:215 194:194 100:100
2 214:214 233:233 85:85 999:999 999:999 85:85 215:215 194:194 100:100
3 214:214 233:233 85:85 999:999 999:999 85:85 215:215 194:194 100:100
4 214:214 233:233 85:85 999:999 999:999 85:85 215:215 194:194 100:100
5 214:214 233:233 85:85 999:999 999:999 85:85 215:215 194:194 100:100
6 214:214 233:233 85:85 999:999 999:999 81:81 215:215 194:194 100:100
     bf15
1 104:104
2 104:104
3 104:104
4 104:104
5 104:104
6 104:104
#phes is quantitative genetic data. scaled and centered. 8 traits
phes<-read.csv("rl2013_allphenotypes_reanalysis_scalecenter.csv",header=T)
head(phes)
     species pop family   unique_pop          unique area_spring
1 Crandallii   1     12 Crandallii_1 Crandallii_1_12     -0.3590
2 Crandallii   1     12 Crandallii_1 Crandallii_1_12      0.5693
3 Crandallii   1     12 Crandallii_1 Crandallii_1_12      0.6472
4 Crandallii   1     12 Crandallii_1 Crandallii_1_12      0.6450
5 Crandallii   1     16 Crandallii_1 Crandallii_1_16     -2.4179
6 Crandallii   1     16 Crandallii_1 Crandallii_1_16     -1.1280
  area_winter ht_spring ht_winter leaf.size lvs_spring lvs_winter
1     0.08905  -0.03756   -0.5445   -0.5496     0.2209     0.8664
2     0.26551  -0.53657   -0.1154    0.3866     0.2209     0.8664
3     0.54796  -0.53657   -0.1154    0.4651     0.2209     0.8664
4     0.39552  -0.03756   -0.1154    0.4629     0.2209     0.8664
5    -2.44404  -2.03359   -1.4028   -1.9100    -1.8332    -3.1624
6    -0.97719  -1.53458   -0.5445   -0.9492    -0.4638    -0.4765
  node.dist
1   -0.9949
2   -0.6137
3   -0.6137
4   -0.6137
5    0.9112
6   -0.3087
######################################
######################################
########### Molecular genetic analysis
######################################
######################################
#2 Prepare SSR data from popgen diversity calculations
species<-unique(gs$species)
allsep_gen<-partition(gs,stratum="unique")
hist(unlist(lapply(allsep_gen, function(x) length(x[,1])))) #look at distn of sample sizes

plot of chunk unnamed-chunk-1

sppop<-lapply(allsep_gen,function(x) return(x[1,1:2]))
sppop<-data.frame(matrix(unlist(sppop), nrow=length(sppop), byrow=T))
colnames(sppop)<-c("species","pop")
######################################
#3 Calculate genetic diversity
div_out<-list()
index_out<-list()
gendiv_indices<-c("He", "A95","Pe") #add index here. Fis doesn't work here.
for (i in gendiv_indices){
  cat("\n",paste("------",i,"------","\n",sep=""))
  for (j in 4:18){
    temp<-lapply(allsep_gen, function(x) mean(rarefaction(x[,j],mode=i,size=5, nperm=99)))
    temp<-data.frame(matrix(unlist(temp), nrow=length(temp)))
    cat(colnames(gs)[j],"...")
    div_out[[colnames(gs)[j]]]<-temp
  }
  div_df<-data.frame(matrix(unlist(div_out), nrow=43)) #process the output for each index
  colnames(div_df)<-colnames(gs)[4:18]
  rownames(div_df)<-names(allsep_gen)
  div_df<-cbind(sppop,div_df)
  is.na(div_df) <- sapply(div_df, is.infinite) #set infinite values to NA
  index_out[[i]]<-div_df
}

 ------He------
a1 ...bf11 ...bf18 ...b6 ...a3 ...bf20 ...c8 ...bf9 ...bf19 ...bdru266 ...ice3 ...ice14 ...e9 ...bf3 ...bf15 ...
 ------A95------
a1 ...bf11 ...bf18 ...b6 ...a3 ...bf20 ...c8 ...bf9 ...bf19 ...bdru266 ...ice3 ...ice14 ...e9 ...bf3 ...bf15 ...
 ------Pe------
a1 ...bf11 ...bf18 ...b6 ...a3 ...bf20 ...c8 ...bf9 ...bf19 ...bdru266 ...ice3 ...ice14 ...e9 ...bf3 ...bf15 ...
######################################
#4 Calculate pairwise population structure among all populations
allgen<-read.csv("rl2013_allgenotypedata_pegas_noapo_nosmallpops_reanalysis.csv", header=T)
outall<-data.frame(species=character(),pop=character(),meanfst=numeric())
for (i in unique(allgen$species))
{
  species_in<-allgen[allgen$species==i,]
  popnames<-sppop[sppop$species==i,]
  all_gi<-df2genind(as.matrix(species_in[,-c(1:4)]),sep="/",pop=matrix(species_in$pop))
  fstats<-pairwise.fst(all_gi)
  df_fst <- melt(as.matrix(fstats), varnames = c("row", "col"))
  df_fst <- df_fst[df_fst$row!=df_fst$col,]
  print(i)
  pops<-data.frame(meanfst=numeric())
  for (k in 1:length(unique(species_in$pop)))
  {
    meanfst<-c(unique(species_in$pop)[k],mean(df_fst[df_fst$row==k|df_fst$col==k,][,3]))
    pops[k,]<-mean(df_fst[df_fst$row==k|df_fst$col==k,][,3])
  }
  outeachspecies<-cbind(popnames,pops)
  outall<-rbind(outall,outeachspecies)
}
[1] "crandallii"
[1] "fernaldiana"
[1] "spatifolia"
[1] "stricta"
ggplot(outall,aes(y=meanfst,x=species))+
  geom_point()

plot of chunk unnamed-chunk-1

######################################
#5 Process estimates of diversity and FST so that there is a data frame
# containing 1 column/locus, stacked by statistic
# make the diversity estimates 1st
alldiv_df <- ldply(index_out, data.frame)
colnames(alldiv_df)[1]<-"index"
#average diversity across loci
all_out<-melt(alldiv_df, id.vars = c("index","species","pop"))
gendiv_means <- dcast(all_out, species + pop ~ index,function(x) mean(na.omit(x)))
#add FST
aphefst<-merge(gendiv_means,outall,by=c("species","pop"))

######################################
#6 Plot Population genetic diversity for each statistic averaged across loci
head(aphefst)
     species pop   A95      He      Pe meanfst
1 crandallii   1 1.079 0.03017 0.03071  0.5648
2 crandallii   2 1.201 0.06945 0.07213  0.3370
3 crandallii   3 1.198 0.06112 0.05752  0.3869
4 crandallii   4 1.168 0.06681 0.06741  0.3135
5 crandallii   5 1.095 0.02835 0.02785  0.4074
6 crandallii   6 1.473 0.15849 0.15747  0.2414
# add spatial data
ll<- read.table("rl2013_popsummaryfinal.csv", sep= ",", header=T)
names(ll)[1:2]<-c("species","pop")
head(ll)
     species pop       unique   lat   long popnum
1 Crandallii   1 Crandallii_1 39.08 -106.3      1
2 Crandallii   2 Crandallii_2 38.51 -106.2      2
3 Crandallii   3 Crandallii_3 38.49 -106.0      3
4 Crandallii   4 Crandallii_4 38.66 -106.8      4
5 Crandallii   5 Crandallii_5 38.44 -106.9      5
6 Crandallii   6 Crandallii_6 38.49 -107.0      6
ll$species<-tolower(ll$species)
gen_spatial<-merge(ll,aphefst,by=c("species","pop"))
gen_spatial<-gen_spatial[,c(1:2,4:5,7:10)]
gen_spatial<-melt(gen_spatial,id.vars=c("species","pop","stat"))
Error: id variables not found in data: stat
gen_spatial<-melt(gen_spatial,id.vars=c("species","pop","lat","long"))
stats<-unique(gen_spatial$variable)
#plot by spatial distribution
for (i in stats){
  print(ggplot(gen_spatial[gen_spatial$variable==i,], aes(x=long,y=lat))+
          geom_point(aes(col=species,size=value))+
          theme_bw()+
          ggtitle(paste("map of distribution of",i, sep=" "))
  )
}

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

#boxplots by genetic statistic
ggplot(gen_spatial,aes(x=species,y=value))+
  geom_point(aes(col=species))+
  geom_boxplot(aes(col=species))+
  theme_bw()+
  facet_grid(variable~., scales="free_y")

plot of chunk unnamed-chunk-1

######################################
######################################
######## Quantitative genetic analysis
######################################
######################################
#7 Filter phenotype data
phes<-read.csv("rl2013_allphenotypes_reanalysis_scalecenter.csv",header=T)
head(phes)
     species pop family   unique_pop          unique area_spring
1 Crandallii   1     12 Crandallii_1 Crandallii_1_12     -0.3590
2 Crandallii   1     12 Crandallii_1 Crandallii_1_12      0.5693
3 Crandallii   1     12 Crandallii_1 Crandallii_1_12      0.6472
4 Crandallii   1     12 Crandallii_1 Crandallii_1_12      0.6450
5 Crandallii   1     16 Crandallii_1 Crandallii_1_16     -2.4179
6 Crandallii   1     16 Crandallii_1 Crandallii_1_16     -1.1280
  area_winter ht_spring ht_winter leaf.size lvs_spring lvs_winter
1     0.08905  -0.03756   -0.5445   -0.5496     0.2209     0.8664
2     0.26551  -0.53657   -0.1154    0.3866     0.2209     0.8664
3     0.54796  -0.53657   -0.1154    0.4651     0.2209     0.8664
4     0.39552  -0.03756   -0.1154    0.4629     0.2209     0.8664
5    -2.44404  -2.03359   -1.4028   -1.9100    -1.8332    -3.1624
6    -0.97719  -1.53458   -0.5445   -0.9492    -0.4638    -0.4765
  node.dist
1   -0.9949
2   -0.6137
3   -0.6137
4   -0.6137
5    0.9112
6   -0.3087
levels(phes$unique_pop)
 [1] "Crandallii_1"            "Crandallii_2"           
 [3] "Crandallii_3"            "Crandallii_4"           
 [5] "Crandallii_5"            "Crandallii_6"           
 [7] "Crandallii_7"            "Fernaldiana_1"          
 [9] "Fernaldiana_2"           "Fernaldiana_3"          
[11] "Fernaldiana_4"           "Fernaldiana_5"          
[13] "Spatifolia_103"          "Spatifolia_105"         
[15] "Spatifolia_377"          "Spatifolia_Alvarado"    
[17] "Spatifolia_Antero"       "Spatifolia_Barr"        
[19] "Spatifolia_Brookvale"    "Spatifolia_Central"     
[21] "Spatifolia_Chicago"      "Spatifolia_Chiquito"    
[23] "Spatifolia_Cotopaxi"     "Spatifolia_Cripple"     
[25] "Spatifolia_Crosier"      "Spatifolia_Crystal"     
[27] "Spatifolia_Delnorte"     "Spatifolia_Gardner"     
[29] "Spatifolia_Goose"        "Spatifolia_Green"       
[31] "Spatifolia_Hondo2"       "Spatifolia_Pinegrove"   
[33] "Spatifolia_Poso"         "Spatifolia_Prince"      
[35] "Spatifolia_Rosita"       "Spatifolia_Round"       
[37] "Spatifolia_Royal"        "Spatifolia_Salida"      
[39] "Spatifolia_Sanluis2"     "Spatifolia_Tiesiding"   
[41] "Spatifolia_Virginiadale" "Stricta_1"              
[43] "Stricta_10"              "Stricta_3"              
[45] "Stricta_4"               "Stricta_5"              
[47] "Stricta_8"               "Stricta_a4"             
[49] "Stricta_f10"             "Stricta_F8"             
[51] "Stricta_g1"              "Stricta_g2"             
[53] "Stricta_g3"              "Stricta_g5"             
[55] "Stricta_o1"              "Stricta_o5"             
[57] "Stricta_wil0"            "Stricta_wil3"           
#filter dataset by any families with 1 or 2 reps
n_reps<-data.frame(tapply(phes$unique,phes$unique,function(x) length((x))))
colnames(n_reps)<-"n_reps"
phes_filtered<-phes[phes$unique %in% rownames(n_reps[n_reps$n_reps>=3,]),]
cat("removing families with 1 or 2 reps:  ","\n",rownames(n_reps[n_reps$n_reps<3,]))
removing families with 1 or 2 reps:   
 Crandallii_1_19 Crandallii_5_2 Crandallii_5_3 Crandallii_5_8 Crandallii_7_14 Fernaldiana_1_3 Fernaldiana_1_5 Fernaldiana_1_9 Fernaldiana_2_14 Fernaldiana_2_5 Fernaldiana_3_13 Fernaldiana_3_2 Fernaldiana_3_6 Fernaldiana_4_11 Fernaldiana_5_11 Fernaldiana_5_2 Spatifolia_Brookvale_2 Spatifolia_Central_3 Spatifolia_Central_7 Spatifolia_Cotopaxi_1 Spatifolia_Cotopaxi_3 Spatifolia_Cotopaxi_5 Spatifolia_Cotopaxi_7 Spatifolia_Crosier_1 Spatifolia_Crystal_10 Spatifolia_Rosita_3 Spatifolia_Rosita_9 Spatifolia_Royal_2 Spatifolia_Salida_5 Spatifolia_Virginiadale_2 Spatifolia_Virginiadale_3 Stricta_3_20 Stricta_a4_14 Stricta_a4_16 Stricta_F8_11 Stricta_F8_7 Stricta_o5_5
#filter by number of plants and fams w/in pops 
n_fams<-data.frame(tapply(phes_filtered$family,phes_filtered$unique_pop,function(x) length(unique(x))))
n_plants<-data.frame(tapply(phes_filtered$family,phes_filtered$unique_pop,function(x) length((x))))
tofilter_pop<-cbind(n_fams,n_plants)
colnames(tofilter_pop)<-c("n_fams","n_plants")
par(mfrow=c(2,1)) 
hist(tofilter_pop$n_fams,breaks=10)
hist(tofilter_pop$n_plants,breaks=20)

plot of chunk unnamed-chunk-1

phes_filtered<-phes[phes$unique_pop %in% rownames(tofilter_pop[tofilter_pop$n_fams>=4,]),] 
cat("removing pops w/ less than 4 families:  ","\n",rownames(tofilter_pop[tofilter_pop$n_fams<4,]))
removing pops w/ less than 4 families:   
 Spatifolia_Cotopaxi Spatifolia_Rosita Spatifolia_Royal Spatifolia_Virginiadale Stricta_a4 Stricta_f10 Stricta_F8
phes_filtered<-phes_filtered[complete.cases(phes_filtered),]
phe_list_filtered<-split(phes_filtered,phes_filtered$unique_pop)

######################################
#8 calculate total and among family variance w/in pops
count=1
outlist=list()
for (i in 1:47){
  dat<-as.data.frame(phe_list_filtered[i])
  if(length(dat[,1])>2){
    for (j in 6:13){
      model<-lmer(dat[,j]~1+(1|dat[,3]),data=dat)
      outlist[[count]]<-data.frame(strsplit(names(phe_list_filtered)[i],"_")[[1]][1],
                                 names(phe_list_filtered[i]),names(phes[j]),
                                 as.numeric(VarCorr(model)), attr(VarCorr(model), "sc")^2 )
      count<-count+1
    }
  }
  cat(names(phe_list_filtered[i]),"...",sep="")
}
Crandallii_1...Crandallii_2...Crandallii_3...Crandallii_4...Crandallii_5...Crandallii_6...Crandallii_7...Fernaldiana_1...Fernaldiana_2...Fernaldiana_3...Fernaldiana_4...Fernaldiana_5...Spatifolia_103...Spatifolia_105...Spatifolia_377...Spatifolia_Alvarado...Spatifolia_Antero...Spatifolia_Barr...Spatifolia_Brookvale...Spatifolia_Central...Spatifolia_Chicago...Spatifolia_Chiquito...Spatifolia_Cotopaxi...Spatifolia_Cripple...Spatifolia_Crosier...Spatifolia_Crystal...Spatifolia_Delnorte...Spatifolia_Gardner...Spatifolia_Goose...Spatifolia_Green...Spatifolia_Hondo2...Spatifolia_Pinegrove...Spatifolia_Poso...Spatifolia_Prince...Spatifolia_Rosita...Spatifolia_Round...Spatifolia_Royal...Spatifolia_Salida...Spatifolia_Sanluis2...Spatifolia_Tiesiding...Spatifolia_Virginiadale...Stricta_1...Stricta_10...Stricta_3...Stricta_4...Stricta_5...Stricta_8...
allwithinpopvc <- ldply(outlist, data.frame)
colnames(allwithinpopvc)<-c("species","pop","phenotype","amongfam_var","error_var")
allwithinpopvc$total_var<-allwithinpopvc$amongfam_var+allwithinpopvc$error_var
allwithinpopvc$prop_amongfams<-allwithinpopvc$amongfam_var/allwithinpopvc$total_var
allwithinpop_totalvar<-dcast(allwithinpopvc, species+pop~phenotype,value.var="total_var")
head(allwithinpop_totalvar)
     species          pop area_spring area_winter ht_spring ht_winter
1 Crandallii Crandallii_1      0.8272      0.8643    0.9541    0.5654
2 Crandallii Crandallii_2      1.2973      0.3964    0.5482    0.5845
3 Crandallii Crandallii_3      0.6425      0.7776    0.2298    0.3371
4 Crandallii Crandallii_4      0.6992      0.9139    0.7450    0.9592
5 Crandallii Crandallii_5      2.6934      3.6997    8.5011    5.9954
6 Crandallii Crandallii_6      0.9858      1.1012    1.1705    1.0203
  leaf.size lvs_spring lvs_winter node.dist
1    0.6288     0.3590     1.1049    0.5434
2    1.3843     2.5847     0.5074    0.8160
3    0.7385     0.1172     0.3992    0.3751
4    0.6604     0.1350     0.6665    0.9004
5    3.6743     0.2772     1.5959    3.3326
6    0.9330     0.3838     1.3268    0.9451
allwithinpop_propfam<-dcast(allwithinpopvc, species+pop~phenotype,value.var="prop_amongfams")
head(allwithinpop_propfam)
     species          pop area_spring area_winter ht_spring ht_winter
1 Crandallii Crandallii_1     0.21923     0.35975    0.5366   0.15072
2 Crandallii Crandallii_2     0.04272     0.07114    0.5248   0.20672
3 Crandallii Crandallii_3     0.42829     0.24807    0.2087   0.07202
4 Crandallii Crandallii_4     0.18660     0.00000    0.1086   0.05333
5 Crandallii Crandallii_5     0.85892     0.89344    0.9316   0.91566
6 Crandallii Crandallii_6     0.39571     0.45344    0.4026   0.37999
  leaf.size lvs_spring lvs_winter node.dist
1   0.08913    0.48854    0.60285 0.000e+00
2   0.00000    0.04650    0.02804 2.140e-02
3   0.29510    0.03031    0.00000 5.137e-02
4   0.23041    0.00000    0.00000 2.500e-15
5   0.87518    0.37870    0.16801 5.178e-01
6   0.32297    0.23914    0.30048 0.000e+00
######################################
#9 calculate pairwise variance among pops
out<-list()
count=1
species<-unique(phes_filtered$species)
for (i in 1:4){
  onespecies<-phes_filtered[phes_filtered$species==species[i],]
  pops<-unique(onespecies$unique_pop)
  pwisecombs<-combn(pops,2)
  cat("\n",as.character(species[i]),": n.combinations = ",length(pwisecombs[1,]),"\n")
  for (j in 1:length(pwisecombs[1,])){
    twopops<-phes_filtered[phes_filtered$unique_pop==pwisecombs[1,j]|phes_filtered$unique_pop==pwisecombs[2,j],]
    pwisedist<-earth.dist(ll[ll$unique==as.character(pwisecombs[1,j]) | ll$unique==as.character(pwisecombs[2,j]) ,][,c(5,4)])
    cat(j,",",sep="")
    for(k in 6:13){
      fm1 <- lmer(twopops[,k]~1+(1|unique_pop)+(1|unique),data=twopops)
      out[[count]]<-data.frame(species[i],pwisecombs[1,j],pwisecombs[2,j],as.numeric(pwisedist),colnames(twopops)[k],
        as.numeric(VarCorr(fm1))[1],as.numeric(VarCorr(fm1))[2], attr(VarCorr(fm1), "sc")^2)
      count=count+1
    }
  }
}

 Crandallii : n.combinations =  21 
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,
 Fernaldiana : n.combinations =  10 
1,2,3,4,5,6,7,8,9,10,
 Spatifolia : n.combinations =  300 
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,
Warning: Model failed to converge: degenerate Hessian with 1 negative
eigenvalues
254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,
 Stricta : n.combinations =  91 
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,
allpairwisevc <- ldply(out, data.frame)
names(allpairwisevc)<-c("species","pop1","pop2","geo_distance","phenotype","amongfam_var","btwpop_var","error_var")
allpairwisevc$total_var<-allpairwisevc$amongfam_var+allpairwisevc$error_var
allpairwisevc$prop_amongfams<-allpairwisevc$amongfam_var/allpairwisevc$total_var
allpairwisevc$qst<-allpairwisevc$btwpop_var/(allpairwisevc$btwpop_var+allpairwisevc$amongfam_var)
pairwiseqst<-dcast(allpairwisevc, species+pop1+pop2+geo_distance~phenotype,value.var="qst")
pairwiseqst_filtered<-pairwiseqst[pairwiseqst$geo_distance<133,]
summary(pairwiseqst_filtered)
        species                     pop1                       pop2    
 Crandallii : 21   Spatifolia_377     : 14   Spatifolia_Salida   : 14  
 Fernaldiana: 10   Spatifolia_Alvarado: 14   Spatifolia_Round    : 13  
 Spatifolia :142   Spatifolia_Antero  : 12   Spatifolia_Pinegrove: 12  
 Stricta    : 18   Spatifolia_103     : 11   Spatifolia_Prince   : 12  
                   Spatifolia_Barr    : 11   Spatifolia_Goose    : 10  
                   Spatifolia_105     :  8   Spatifolia_Green    :  9  
                   (Other)            :121   (Other)             :121  
  geo_distance     area_spring     area_winter      ht_spring    
 Min.   :  1.11   Min.   :0.000   Min.   :0.000   Min.   :0.000  
 1st Qu.: 43.36   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
 Median : 75.15   Median :0.155   Median :0.239   Median :0.230  
 Mean   : 74.08   Mean   :0.313   Mean   :0.347   Mean   :0.335  
 3rd Qu.:108.17   3rd Qu.:0.546   3rd Qu.:0.681   3rd Qu.:0.645  
 Max.   :132.28   Max.   :1.000   Max.   :1.000   Max.   :1.000  
                  NA's   :6       NA's   :2       NA's   :3      
   ht_winter       leaf.size       lvs_spring      lvs_winter   
 Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
 1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
 Median :0.303   Median :0.218   Median :0.077   Median :0.237  
 Mean   :0.366   Mean   :0.366   Mean   :0.270   Mean   :0.333  
 3rd Qu.:0.664   3rd Qu.:0.681   3rd Qu.:0.441   3rd Qu.:0.593  
 Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.000  
 NA's   :3       NA's   :1       NA's   :8       NA's   :11     
   node.dist    
 Min.   :0.000  
 1st Qu.:0.000  
 Median :0.424  
 Mean   :0.438  
 3rd Qu.:0.832  
 Max.   :1.000  
 NA's   :8      
count<-1
out<-list()
for (i in 1:length(unique(phes_filtered$unique_pop))){
  pop<-pairwiseqst_filtered[pairwiseqst_filtered$pop1==unique(phes_filtered$unique_pop)[i]|pairwiseqst_filtered$pop2==unique(phes_filtered$unique_pop)[i],]
  info<-data.frame(pop$species[1],as.character(unique(phes_filtered$unique_pop)[i]),mean(pop$geo_distance))
  meanqst<-data.frame(apply(pop[,5:12],2,function(x) mean(na.omit(x))))
  colnames(meanqst)<-i
  out[[count]]<-data.frame(info,t(meanqst))
  count<-count+1
}
allmeanqst<- ldply(out, data.frame)
colnames(allmeanqst)[1:3]<-c("species","pop","mean_geodist")
meanqst_filtered<-allmeanqst[allmeanqst$pop %in% allwithinpop_propfam$pop,]
meanqst_filtered<-meanqst_filtered[complete.cases(meanqst_filtered),]
head(meanqst_filtered)
     species          pop mean_geodist area_spring area_winter ht_spring
1 Crandallii Crandallii_1        84.70     0.11120     0.16258 4.510e-02
2 Crandallii Crandallii_2        65.78     0.10457     0.32762 9.043e-16
3 Crandallii Crandallii_3        77.45     0.05130     0.17851 5.938e-03
4 Crandallii Crandallii_4        52.69     0.16004     0.64928 2.456e-02
5 Crandallii Crandallii_5        54.26     0.06376     0.09023 1.515e-02
6 Crandallii Crandallii_6        55.61     0.30774     0.30902 5.513e-03
  ht_winter leaf.size lvs_spring lvs_winter node.dist
1   0.24171   0.27065     0.1280     0.1910    0.8333
2   0.40208   0.23616     0.1631     0.5427    0.7442
3   0.19398   0.05997     0.3354     0.5201    0.4286
4   0.34788   0.05351     0.6048     0.7268    0.4358
5   0.05757   0.09183     0.2674     0.1523    0.3872
6   0.22106   0.13743     0.5816     0.4962    0.8295
######################################
# 10 Prepare allquantgen estimates
#stats: qst (pairwiseqst_filtered), among fam var (allwithinpop_propfam), total w/in pop var (allwithinpop_totalvar)
meanqst_filtered<-meanqst_filtered[,-3]
meanqst_filtered$stat<-"qst"
allwithinpop_propfam$stat<-"prop_amongfams"
allwithinpop_totalvar$stat<-"totalvar_withinpops"
par(mfrow=c(1,1)) 
pairs(meanqst_filtered[,3:10])

plot of chunk unnamed-chunk-1

pairs(allwithinpop_propfam[,3:10])

plot of chunk unnamed-chunk-1

pairs(allwithinpop_totalvar[,3:10]) #remove extreme outlier

plot of chunk unnamed-chunk-1

allwithinpop_totalvar<-allwithinpop_totalvar[!allwithinpop_totalvar$area_spring==max(allwithinpop_totalvar$area_spring),]
pairs(allwithinpop_totalvar[,3:10])

plot of chunk unnamed-chunk-1

pca1<-prcomp(meanqst_filtered[,3:10])
(pca1$sdev)^2 / sum(pca1$sdev^2) #%var explained
[1] 0.511854 0.201358 0.099012 0.077710 0.056821 0.026033 0.018331 0.008879
cumsum((pca1$sdev)^2) / sum(pca1$sdev^2) #cumulative %var explained
[1] 0.5119 0.7132 0.8122 0.8899 0.9468 0.9728 0.9911 1.0000
meanqst_filtered<-cbind(meanqst_filtered,pca1$x[,1:2])

pca1<-prcomp(allwithinpop_propfam[,3:10])
(pca1$sdev)^2 / sum(pca1$sdev^2) #%var explained
[1] 0.509787 0.207130 0.129482 0.067552 0.035709 0.023099 0.019862 0.007378
cumsum((pca1$sdev)^2) / sum(pca1$sdev^2) #cumulative %var explained
[1] 0.5098 0.7169 0.8464 0.9140 0.9497 0.9728 0.9926 1.0000
allwithinpop_propfam<-cbind(allwithinpop_propfam,pca1$x[,1:2])

pca1<-prcomp(allwithinpop_totalvar[,3:10])
(pca1$sdev)^2 / sum(pca1$sdev^2) #%var explained
[1] 0.418480 0.266689 0.137908 0.096813 0.040565 0.022673 0.011377 0.005495
cumsum((pca1$sdev)^2) / sum(pca1$sdev^2) #cumulative %var explained
[1] 0.4185 0.6852 0.8231 0.9199 0.9605 0.9831 0.9945 1.0000
allwithinpop_totalvar<-cbind(allwithinpop_totalvar,pca1$x[,1:2])
allqgenstats<-rbind(allwithinpop_propfam,allwithinpop_totalvar,meanqst_filtered)


######################################
#11 plot values for each phenotype
plot(allqgenstats$PC1,allqgenstats$PC2)

plot of chunk unnamed-chunk-1

PC1 <- ddply(allqgenstats, c("species","stat"), summarise,
             meanPC1 = mean(PC1),
             sePC1   = sd(PC1) / sqrt(length(PC1)) )
PC2 <- ddply(allqgenstats, c("species","stat"), summarise,
               meanPC2 = mean(PC2),
               sePC2   = sd(PC2) / sqrt(length(PC2)) )
qgen_toplot<-as.data.frame(cbind(PC1,PC2[,3:4]))

ggplot(qgen_toplot,aes(x = meanPC1,y = meanPC2)) + 
  geom_point(aes(col=species)) + 
  geom_errorbar(aes(ymin = meanPC2-sePC2,ymax = meanPC2+sePC2,col=species)) + 
  geom_errorbarh(aes(xmin = meanPC1-sePC1,xmax = meanPC1+sePC1,col=species))+
  facet_grid(stat~., scales="free_y")+
  theme_bw()

plot of chunk unnamed-chunk-1

allqgen_long<-melt(allqgenstats,id.vars=c("species","pop","stat"))
allqgen_toplot <- ddply(allqgen_long, c("species","stat","variable"), summarise,
             meanValue = mean(value),
             seValue   = sd(value) / sqrt(length(value)) )
stats<-unique(allqgen_long$stat)
for (i in stats){
  print(ggplot(allqgen_long[allqgen_long$stat==i,], aes(x=species,y=value))+
          geom_boxplot(aes(col=species),ylim=c(0,1))+
          geom_point(aes(col=species),ylim=c(0,1))+
          facet_wrap(~variable, scales="free_y")+
          theme_bw()+
          ggtitle(paste("phenotype specific variation in",i, sep=" "))
  )
}

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1