rl2014_allnewanalyses.R

Lovell — Jun 5, 2014, 2:10 PM

rm(list=ls())
setwd("~/Desktop/R_directory")
library(gstudio)
Loading required package: ggplot2
Loading required package: raster
Loading required package: sp
library(plyr)
library(reshape)

Attaching package: 'reshape'

The following objects are masked from 'package:plyr':

    rename, round_any
###################
#Part I: SSR diversity for each population
#####
#2.1: Read in the data
gs<-read_population(
  "rl2013_allgenotypedata_pegas_noapo_nosmallpops_reanalysis_sppop.csv",
  type="separated",sep=",",locus.columns=4:18,header=T)
species<-unique(gs$species)
allsep_gen<-partition(gs,stratum="unique")
sppop<-lapply(allsep_gen,function(x) return(x[1,1:2]))
sppop<-data.frame(matrix(unlist(sppop), nrow=length(sppop), byrow=T))
colnames(sppop)<-c("species","pop")
hist(unlist(lapply(allsep_gen, function(x) length(x[,1])))) #look at distn of sample sizes

plot of chunk unnamed-chunk-1

#####
#1.2 estimate He, Ae and others for each population/allele combination after rarefaction
div_out<-list()
index_out<-list()
gendiv_indices<-c("He","Ae") #add index here. Fis doesn't work here.
for (i in gendiv_indices){
  cat("\n",paste("------",i,"------","\n",sep=""))
  for (j in 4:18){
    temp<-lapply(allsep_gen, function(x) mean(rarefaction(x[,j],mode=i,size=5, nperm=99)))
    temp<-data.frame(matrix(unlist(temp), nrow=length(temp)))
    cat(colnames(gs)[j],"...")
    div_out[[colnames(gs)[j]]]<-temp
  }
  div_df<-data.frame(matrix(unlist(div_out), nrow=43)) #process the output for each index
  colnames(div_df)<-colnames(gs)[4:18]
  rownames(div_df)<-names(allsep_gen)
  div_df<-cbind(sppop,div_df)
  is.na(div_df) <- sapply(div_df, is.infinite) #set infinite values to NA
  index_out[[i]]<-div_df
}

 ------He------
a1 ...bf11 ...bf18 ...b6 ...a3 ...bf20 ...c8 ...bf9 ...bf19 ...bdru266 ...ice3 ...ice14 ...e9 ...bf3 ...bf15 ...
 ------Ae------
a1 ...bf11 ...bf18 ...b6 ...a3 ...bf20 ...c8 ...bf9 ...bf19 ...bdru266 ...ice3 ...ice14 ...e9 ...bf3 ...bf15 ...
#####
# 1.3 Process the output
alldiv_df <- ldply(index_out, data.frame)
colnames(alldiv_df)[1]<-"index"
Ae_out<-alldiv_df[alldiv_df$index=="Ae",]
He_out<-alldiv_df[alldiv_df$index=="He",]
Ae.melted <- melt(Ae_out, id.vars = c("index","species","pop"))
He.melted <- melt(He_out, id.vars = c("index","species","pop"))
div_toplot<-cbind(Ae.melted[,2:5],He.melted[,5])
names(div_toplot)[3:5]<-c("locus","Ae","He")
#####
# 1.4: Plot the results
ggplot(div_toplot,aes(x=Ae,y=He))+
  facet_wrap(~species)+
  geom_point(alpha=0.8,aes(col=locus,fill=locus))+
  ggtitle("Diversity/species")+
  theme_bw()
Warning: Removed 4 rows containing missing values (geom_point).
Warning: Removed 6 rows containing missing values (geom_point).
Warning: Removed 2 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-1

ggplot(div_toplot,aes(x=Ae,group=species))+
  facet_wrap(~species)+
  geom_density(alpha=.2, fill="#FF6666")+
  ggtitle("Effective number of alleles for each population by species")+
  theme_bw()
Warning: Removed 4 rows containing non-finite values (stat_density).
Warning: Removed 6 rows containing non-finite values (stat_density).
Warning: Removed 2 rows containing non-finite values (stat_density).

plot of chunk unnamed-chunk-1

ggplot(div_toplot,aes(x=He,group=species))+
  facet_wrap(~species)+
  geom_density(alpha=.2, fill="#FF6666")+
  ggtitle("Expected Heterozygosity for each population by species")+
  theme_bw()

plot of chunk unnamed-chunk-1

###################
#Part II: Phenotypic Variance components
#2.1: Read in the data
library(nlme)

Attaching package: 'nlme'

The following object is masked from 'package:raster':

    getData
library(lme4)
Loading required package: Matrix

Attaching package: 'Matrix'

The following object is masked from 'package:reshape':

    expand

Loading required package: Rcpp

Attaching package: 'lme4'

The following object is masked from 'package:nlme':

    lmList
library(ggplot2)
phes<-read.csv("rl2013_allphenotypes_reanalysis_scalecenter.csv",header=T)
head(phes)
          sp pop family          unique   unique_pop area_spring
1 Crandallii   1     12 Crandallii_1_12 Crandallii_1     -0.3590
2 Crandallii   1     12 Crandallii_1_12 Crandallii_1      0.5693
3 Crandallii   1     12 Crandallii_1_12 Crandallii_1      0.6472
4 Crandallii   1     12 Crandallii_1_12 Crandallii_1      0.6450
5 Crandallii   1     16 Crandallii_1_16 Crandallii_1     -2.4179
6 Crandallii   1     16 Crandallii_1_16 Crandallii_1     -1.1280
  area_winter ht_spring ht_winter leaf.size lvs_spring lvs_winter
1     0.08905  -0.03756   -0.5445   -0.5496     0.2209     0.8664
2     0.26551  -0.53657   -0.1154    0.3866     0.2209     0.8664
3     0.54796  -0.53657   -0.1154    0.4651     0.2209     0.8664
4     0.39552  -0.03756   -0.1154    0.4629     0.2209     0.8664
5    -2.44404  -2.03359   -1.4028   -1.9100    -1.8332    -3.1624
6    -0.97719  -1.53458   -0.5445   -0.9492    -0.4638    -0.4765
  node.dist
1   -0.9949
2   -0.6137
3   -0.6137
4   -0.6137
5    0.9112
6   -0.3087
levels(phes$unique_pop)
 [1] "Crandallii_1"            "Crandallii_2"           
 [3] "Crandallii_3"            "Crandallii_4"           
 [5] "Crandallii_5"            "Crandallii_6"           
 [7] "Crandallii_7"            "Fernaldiana_1"          
 [9] "Fernaldiana_2"           "Fernaldiana_3"          
[11] "Fernaldiana_4"           "Fernaldiana_5"          
[13] "Spatifolia_103"          "Spatifolia_105"         
[15] "Spatifolia_377"          "Spatifolia_Alvarado"    
[17] "Spatifolia_Antero"       "Spatifolia_Barr"        
[19] "Spatifolia_Brookvale"    "Spatifolia_Central"     
[21] "Spatifolia_Chicago"      "Spatifolia_Chiquito"    
[23] "Spatifolia_Cotopaxi"     "Spatifolia_Cripple"     
[25] "Spatifolia_Crosier"      "Spatifolia_Crystal"     
[27] "Spatifolia_Delnorte"     "Spatifolia_Gardner"     
[29] "Spatifolia_Goose"        "Spatifolia_Green"       
[31] "Spatifolia_Hondo2"       "Spatifolia_Pinegrove"   
[33] "Spatifolia_Poso"         "Spatifolia_Prince"      
[35] "Spatifolia_Rosita"       "Spatifolia_Round"       
[37] "Spatifolia_Royal"        "Spatifolia_Salida"      
[39] "Spatifolia_Sanluis2"     "Spatifolia_Tiesiding"   
[41] "Spatifolia_Virginiadale" "Stricta_1"              
[43] "Stricta_10"              "Stricta_3"              
[45] "Stricta_4"               "Stricta_5"              
[47] "Stricta_8"               "Stricta_a4"             
[49] "Stricta_f10"             "Stricta_F8"             
[51] "Stricta_g1"              "Stricta_g2"             
[53] "Stricta_g3"              "Stricta_G3"             
[55] "Stricta_g5"              "Stricta_o1"             
[57] "Stricta_o5"              "Stricta_wil0"           
[59] "Stricta_wil3"           
phe_list<-split(phes, phes$unique_pop) #split the df into a list to loop through
#####
#2.2: Filter the populations by number of families and number of plants
n_fams<-as.numeric(lapply(phe_list,function(x) length(unique(x[,4]))))
n_plants<-as.numeric(lapply(phe_list,function(x) length((x[,4]))))
phe_list_filtered<-phe_list[n_fams>4] #remove pops w/ less than 4 families
cat("removing:  ",names(phe_list)[n_fams<4])
removing:   Spatifolia_Royal Stricta_f10 Stricta_G3
phe_list_filtered<-phe_list_filtered[n_plants>15]
cat("removing:  ",names(phe_list)[n_plants<15])
removing:   Spatifolia_Rosita Spatifolia_Royal Spatifolia_Salida Spatifolia_Virginiadale Stricta_a4 Stricta_f10 Stricta_F8 Stricta_G3 Stricta_o5
#####
#2.3: extract variance components for each population
outlist<-list()
count=1
for (i in 1:47){
  dat<-as.data.frame(phe_list_filtered[i])
  for (j in 6:13){
    model<-lmer(dat[,j]~1+(1|dat[,3]),data=dat)
    outlist[[count]]<-data.frame(strsplit(names(phe_list_filtered)[i],"_")[[1]][1],
        names(phe_list_filtered[i]),names(phes[j]),
        as.numeric(VarCorr(model)), attr(VarCorr(model), "sc")^2 )
    count<-count+1
  }
  cat(names(phe_list_filtered[i]),"...",sep="")
}
Crandallii_1...Crandallii_2...Crandallii_3...Crandallii_4...Crandallii_5...Crandallii_6...Crandallii_7...Fernaldiana_1...Fernaldiana_2...Fernaldiana_3...Fernaldiana_4...Fernaldiana_5...Spatifolia_103...Spatifolia_105...Spatifolia_377...Spatifolia_Alvarado...Spatifolia_Antero...Spatifolia_Barr...Spatifolia_Brookvale...Spatifolia_Central...Spatifolia_Chicago...Spatifolia_Chiquito...Spatifolia_Cotopaxi...Spatifolia_Cripple...Spatifolia_Crosier...Spatifolia_Crystal...Spatifolia_Delnorte...Spatifolia_Gardner...Spatifolia_Goose...Spatifolia_Green...Spatifolia_Hondo2...Spatifolia_Pinegrove...Spatifolia_Poso...Spatifolia_Prince...Spatifolia_Round...Spatifolia_Tiesiding...Spatifolia_Virginiadale...Stricta_10...Stricta_3...Stricta_4...Stricta_5...Stricta_8...Stricta_F8...Stricta_g5...Stricta_o1...Stricta_o5...Stricta_wil3...
####
#2.4: Process the data for plotting
allvc <- ldply(outlist, data.frame)
names(allvc)<-c("species","population","phenotype","amongfam_var","error_var")
allvc$total_var<-allvc$amongfam_var+allvc$error_var
allvc$prop_amongfams<-allvc$amongfam_var/allvc$total_var
####
#2.5 plot the results
ggplot(allvc,aes(x=prop_amongfams,y=total_var,group=species))+
  geom_point(aes(colour=species))+
  stat_ellipse(aes(colour=species))+
  ggtitle("Among-family and total variance for all population")+
  theme_bw()

plot of chunk unnamed-chunk-1

#potentially need to exclude some points
ggplot(allvc,aes(x=prop_amongfams,group=species))+
  facet_wrap(~species)+
  geom_histogram(aes(colour=species),fill="white", binwidth=1/30)+
  geom_density(alpha=.2, fill="#FF6666")+
  ggtitle("Among-family variance distribution for all population")+
  theme_bw()

plot of chunk unnamed-chunk-1

ggplot(allvc,aes(x=prop_amongfams,group=species))+
  geom_density(alpha=.2, aes(col=species,fill=species))+
  ggtitle("Among-family variance distribution for all population")+
  theme_bw()

plot of chunk unnamed-chunk-1