Lovell — Jun 5, 2014, 2:10 PM
rm(list=ls())
setwd("~/Desktop/R_directory")
library(gstudio)
Loading required package: ggplot2
Loading required package: raster
Loading required package: sp
library(plyr)
library(reshape)
Attaching package: 'reshape'
The following objects are masked from 'package:plyr':
rename, round_any
###################
#Part I: SSR diversity for each population
#####
#2.1: Read in the data
gs<-read_population(
"rl2013_allgenotypedata_pegas_noapo_nosmallpops_reanalysis_sppop.csv",
type="separated",sep=",",locus.columns=4:18,header=T)
species<-unique(gs$species)
allsep_gen<-partition(gs,stratum="unique")
sppop<-lapply(allsep_gen,function(x) return(x[1,1:2]))
sppop<-data.frame(matrix(unlist(sppop), nrow=length(sppop), byrow=T))
colnames(sppop)<-c("species","pop")
hist(unlist(lapply(allsep_gen, function(x) length(x[,1])))) #look at distn of sample sizes
#####
#1.2 estimate He, Ae and others for each population/allele combination after rarefaction
div_out<-list()
index_out<-list()
gendiv_indices<-c("He","Ae") #add index here. Fis doesn't work here.
for (i in gendiv_indices){
cat("\n",paste("------",i,"------","\n",sep=""))
for (j in 4:18){
temp<-lapply(allsep_gen, function(x) mean(rarefaction(x[,j],mode=i,size=5, nperm=99)))
temp<-data.frame(matrix(unlist(temp), nrow=length(temp)))
cat(colnames(gs)[j],"...")
div_out[[colnames(gs)[j]]]<-temp
}
div_df<-data.frame(matrix(unlist(div_out), nrow=43)) #process the output for each index
colnames(div_df)<-colnames(gs)[4:18]
rownames(div_df)<-names(allsep_gen)
div_df<-cbind(sppop,div_df)
is.na(div_df) <- sapply(div_df, is.infinite) #set infinite values to NA
index_out[[i]]<-div_df
}
------He------
a1 ...bf11 ...bf18 ...b6 ...a3 ...bf20 ...c8 ...bf9 ...bf19 ...bdru266 ...ice3 ...ice14 ...e9 ...bf3 ...bf15 ...
------Ae------
a1 ...bf11 ...bf18 ...b6 ...a3 ...bf20 ...c8 ...bf9 ...bf19 ...bdru266 ...ice3 ...ice14 ...e9 ...bf3 ...bf15 ...
#####
# 1.3 Process the output
alldiv_df <- ldply(index_out, data.frame)
colnames(alldiv_df)[1]<-"index"
Ae_out<-alldiv_df[alldiv_df$index=="Ae",]
He_out<-alldiv_df[alldiv_df$index=="He",]
Ae.melted <- melt(Ae_out, id.vars = c("index","species","pop"))
He.melted <- melt(He_out, id.vars = c("index","species","pop"))
div_toplot<-cbind(Ae.melted[,2:5],He.melted[,5])
names(div_toplot)[3:5]<-c("locus","Ae","He")
#####
# 1.4: Plot the results
ggplot(div_toplot,aes(x=Ae,y=He))+
facet_wrap(~species)+
geom_point(alpha=0.8,aes(col=locus,fill=locus))+
ggtitle("Diversity/species")+
theme_bw()
Warning: Removed 4 rows containing missing values (geom_point).
Warning: Removed 6 rows containing missing values (geom_point).
Warning: Removed 2 rows containing missing values (geom_point).
ggplot(div_toplot,aes(x=Ae,group=species))+
facet_wrap(~species)+
geom_density(alpha=.2, fill="#FF6666")+
ggtitle("Effective number of alleles for each population by species")+
theme_bw()
Warning: Removed 4 rows containing non-finite values (stat_density).
Warning: Removed 6 rows containing non-finite values (stat_density).
Warning: Removed 2 rows containing non-finite values (stat_density).
ggplot(div_toplot,aes(x=He,group=species))+
facet_wrap(~species)+
geom_density(alpha=.2, fill="#FF6666")+
ggtitle("Expected Heterozygosity for each population by species")+
theme_bw()
###################
#Part II: Phenotypic Variance components
#2.1: Read in the data
library(nlme)
Attaching package: 'nlme'
The following object is masked from 'package:raster':
getData
library(lme4)
Loading required package: Matrix
Attaching package: 'Matrix'
The following object is masked from 'package:reshape':
expand
Loading required package: Rcpp
Attaching package: 'lme4'
The following object is masked from 'package:nlme':
lmList
library(ggplot2)
phes<-read.csv("rl2013_allphenotypes_reanalysis_scalecenter.csv",header=T)
head(phes)
sp pop family unique unique_pop area_spring
1 Crandallii 1 12 Crandallii_1_12 Crandallii_1 -0.3590
2 Crandallii 1 12 Crandallii_1_12 Crandallii_1 0.5693
3 Crandallii 1 12 Crandallii_1_12 Crandallii_1 0.6472
4 Crandallii 1 12 Crandallii_1_12 Crandallii_1 0.6450
5 Crandallii 1 16 Crandallii_1_16 Crandallii_1 -2.4179
6 Crandallii 1 16 Crandallii_1_16 Crandallii_1 -1.1280
area_winter ht_spring ht_winter leaf.size lvs_spring lvs_winter
1 0.08905 -0.03756 -0.5445 -0.5496 0.2209 0.8664
2 0.26551 -0.53657 -0.1154 0.3866 0.2209 0.8664
3 0.54796 -0.53657 -0.1154 0.4651 0.2209 0.8664
4 0.39552 -0.03756 -0.1154 0.4629 0.2209 0.8664
5 -2.44404 -2.03359 -1.4028 -1.9100 -1.8332 -3.1624
6 -0.97719 -1.53458 -0.5445 -0.9492 -0.4638 -0.4765
node.dist
1 -0.9949
2 -0.6137
3 -0.6137
4 -0.6137
5 0.9112
6 -0.3087
levels(phes$unique_pop)
[1] "Crandallii_1" "Crandallii_2"
[3] "Crandallii_3" "Crandallii_4"
[5] "Crandallii_5" "Crandallii_6"
[7] "Crandallii_7" "Fernaldiana_1"
[9] "Fernaldiana_2" "Fernaldiana_3"
[11] "Fernaldiana_4" "Fernaldiana_5"
[13] "Spatifolia_103" "Spatifolia_105"
[15] "Spatifolia_377" "Spatifolia_Alvarado"
[17] "Spatifolia_Antero" "Spatifolia_Barr"
[19] "Spatifolia_Brookvale" "Spatifolia_Central"
[21] "Spatifolia_Chicago" "Spatifolia_Chiquito"
[23] "Spatifolia_Cotopaxi" "Spatifolia_Cripple"
[25] "Spatifolia_Crosier" "Spatifolia_Crystal"
[27] "Spatifolia_Delnorte" "Spatifolia_Gardner"
[29] "Spatifolia_Goose" "Spatifolia_Green"
[31] "Spatifolia_Hondo2" "Spatifolia_Pinegrove"
[33] "Spatifolia_Poso" "Spatifolia_Prince"
[35] "Spatifolia_Rosita" "Spatifolia_Round"
[37] "Spatifolia_Royal" "Spatifolia_Salida"
[39] "Spatifolia_Sanluis2" "Spatifolia_Tiesiding"
[41] "Spatifolia_Virginiadale" "Stricta_1"
[43] "Stricta_10" "Stricta_3"
[45] "Stricta_4" "Stricta_5"
[47] "Stricta_8" "Stricta_a4"
[49] "Stricta_f10" "Stricta_F8"
[51] "Stricta_g1" "Stricta_g2"
[53] "Stricta_g3" "Stricta_G3"
[55] "Stricta_g5" "Stricta_o1"
[57] "Stricta_o5" "Stricta_wil0"
[59] "Stricta_wil3"
phe_list<-split(phes, phes$unique_pop) #split the df into a list to loop through
#####
#2.2: Filter the populations by number of families and number of plants
n_fams<-as.numeric(lapply(phe_list,function(x) length(unique(x[,4]))))
n_plants<-as.numeric(lapply(phe_list,function(x) length((x[,4]))))
phe_list_filtered<-phe_list[n_fams>4] #remove pops w/ less than 4 families
cat("removing: ",names(phe_list)[n_fams<4])
removing: Spatifolia_Royal Stricta_f10 Stricta_G3
phe_list_filtered<-phe_list_filtered[n_plants>15]
cat("removing: ",names(phe_list)[n_plants<15])
removing: Spatifolia_Rosita Spatifolia_Royal Spatifolia_Salida Spatifolia_Virginiadale Stricta_a4 Stricta_f10 Stricta_F8 Stricta_G3 Stricta_o5
#####
#2.3: extract variance components for each population
outlist<-list()
count=1
for (i in 1:47){
dat<-as.data.frame(phe_list_filtered[i])
for (j in 6:13){
model<-lmer(dat[,j]~1+(1|dat[,3]),data=dat)
outlist[[count]]<-data.frame(strsplit(names(phe_list_filtered)[i],"_")[[1]][1],
names(phe_list_filtered[i]),names(phes[j]),
as.numeric(VarCorr(model)), attr(VarCorr(model), "sc")^2 )
count<-count+1
}
cat(names(phe_list_filtered[i]),"...",sep="")
}
Crandallii_1...Crandallii_2...Crandallii_3...Crandallii_4...Crandallii_5...Crandallii_6...Crandallii_7...Fernaldiana_1...Fernaldiana_2...Fernaldiana_3...Fernaldiana_4...Fernaldiana_5...Spatifolia_103...Spatifolia_105...Spatifolia_377...Spatifolia_Alvarado...Spatifolia_Antero...Spatifolia_Barr...Spatifolia_Brookvale...Spatifolia_Central...Spatifolia_Chicago...Spatifolia_Chiquito...Spatifolia_Cotopaxi...Spatifolia_Cripple...Spatifolia_Crosier...Spatifolia_Crystal...Spatifolia_Delnorte...Spatifolia_Gardner...Spatifolia_Goose...Spatifolia_Green...Spatifolia_Hondo2...Spatifolia_Pinegrove...Spatifolia_Poso...Spatifolia_Prince...Spatifolia_Round...Spatifolia_Tiesiding...Spatifolia_Virginiadale...Stricta_10...Stricta_3...Stricta_4...Stricta_5...Stricta_8...Stricta_F8...Stricta_g5...Stricta_o1...Stricta_o5...Stricta_wil3...
####
#2.4: Process the data for plotting
allvc <- ldply(outlist, data.frame)
names(allvc)<-c("species","population","phenotype","amongfam_var","error_var")
allvc$total_var<-allvc$amongfam_var+allvc$error_var
allvc$prop_amongfams<-allvc$amongfam_var/allvc$total_var
####
#2.5 plot the results
ggplot(allvc,aes(x=prop_amongfams,y=total_var,group=species))+
geom_point(aes(colour=species))+
stat_ellipse(aes(colour=species))+
ggtitle("Among-family and total variance for all population")+
theme_bw()
#potentially need to exclude some points
ggplot(allvc,aes(x=prop_amongfams,group=species))+
facet_wrap(~species)+
geom_histogram(aes(colour=species),fill="white", binwidth=1/30)+
geom_density(alpha=.2, fill="#FF6666")+
ggtitle("Among-family variance distribution for all population")+
theme_bw()
ggplot(allvc,aes(x=prop_amongfams,group=species))+
geom_density(alpha=.2, aes(col=species,fill=species))+
ggtitle("Among-family variance distribution for all population")+
theme_bw()