This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

### load libraries
library(adegenet)

### read in data -- GenePop format, converted to genind format
popdata <- read.genepop("Cor_striata_ssrgenotyper_06.pop.gen", ncode = 3L)

 Converting data from a Genepop .gen file to a genind object... 


File description:  SSR markers for Cor_striata_ssrgenotyper_05 
Duplicate individual names detected. Coercing them to be unique.

...done.
popdata
/// GENIND OBJECT /////////

 // 83 individuals; 19 loci; 79 alleles; size: 53.4 Kb

 // Basic content
   @tab:  83 x 79 matrix of allele counts
   @loc.n.all: number of alleles per locus (range: 2-5)
   @loc.fac: locus factor for the 79 columns of @tab
   @all.names: list of allele names for each locus
   @ploidy: ploidy of each individual  (range: 2-2)
   @type:  codom
   @call: read.genepop(file = "Cor_striata_ssrgenotyper_06.pop.gen", 
    ncode = 3L)

 // Optional content
   @pop: population of each individual (group size range: 16-26)
# /// GENIND OBJECT /////////                                             #
#                                                                         #
#  // 83 individuals; 19 loci; 79 alleles; size: 53.4 Kb                  #
#                                                                         #
#  // Basic content                                                       #
#    @tab:  83 x 79 matrix of allele counts                               #
#    @loc.n.all: number of alleles per locus (range: 2-5)                 #
#    @loc.fac: locus factor for the 79 columns of @tab                    #
#    @all.names: list of allele names for each locus                      #
#    @ploidy: ploidy of each individual  (range: 2-2)                     #
#    @type:  codom                                                        #
#    @call: read.genepop(file = "Cor_striata_ssrgenotyper_06.pop.gen",    #
#     ncode = 3L)                                                         #
#                                                                         #
# // Optional content                                                      #
#   @pop: population of each individual (group size range: 16-26)          #


### Just checking...
is.genind(popdata)
[1] TRUE
### define "populations"
pop(popdata) <- rep(c("vreelandii","striata","Sierra_Nevada","Coast_Cascades"), c(22,26,16,19))
pop(popdata)
 [1] vreelandii     vreelandii     vreelandii     vreelandii     vreelandii    
 [6] vreelandii     vreelandii     vreelandii     vreelandii     vreelandii    
[11] vreelandii     vreelandii     vreelandii     vreelandii     vreelandii    
[16] vreelandii     vreelandii     vreelandii     vreelandii     vreelandii    
[21] vreelandii     vreelandii     striata        striata        striata       
[26] striata        striata        striata        striata        striata       
[31] striata        striata        striata        striata        striata       
[36] striata        striata        striata        striata        striata       
[41] striata        striata        striata        striata        striata       
[46] striata        striata        striata        Sierra_Nevada  Sierra_Nevada 
[51] Sierra_Nevada  Sierra_Nevada  Sierra_Nevada  Sierra_Nevada  Sierra_Nevada 
[56] Sierra_Nevada  Sierra_Nevada  Sierra_Nevada  Sierra_Nevada  Sierra_Nevada 
[61] Sierra_Nevada  Sierra_Nevada  Sierra_Nevada  Sierra_Nevada  Coast_Cascades
[66] Coast_Cascades Coast_Cascades Coast_Cascades Coast_Cascades Coast_Cascades
[71] Coast_Cascades Coast_Cascades Coast_Cascades Coast_Cascades Coast_Cascades
[76] Coast_Cascades Coast_Cascades Coast_Cascades Coast_Cascades Coast_Cascades
[81] Coast_Cascades Coast_Cascades Coast_Cascades
Levels: vreelandii striata Sierra_Nevada Coast_Cascades
### set color scheme
mycol <- c("green","red","blue","magenta")

### Find clusters
grp <- find.clusters(popdata, max.n.clust = 40)
Choose the number PCs to retain (>= 1): 
70

Choose the number of clusters (>=2): 
4


# Choose the number PCs to retain (>= 1): 
6
[1] 6
# Choose the number of clusters (>=2): 
4
[1] 4
# Also, you can use K-means clustering to find the K with minimum BIC

maxK <- 10
myMat <- matrix(nrow=10, ncol=maxK)
colnames(myMat) <- 1:ncol(myMat)
for(i in 1:nrow(myMat)){
  grp <- find.clusters(popdata, n.pca = 40, choose.n.clust = FALSE,  max.n.clust = maxK)
  myMat[i,] <- grp$Kstat
}

library(reshape2)
my_df <- melt(myMat)
colnames(my_df)[1:3] <- c("Group", "K", "BIC")
my_df$K <- as.factor(my_df$K)
head(my_df)

# The K vs. BIC plot
library(ggplot2)
p1 <- ggplot(my_df, aes(x = K, y = BIC))
p1 <- p1 + geom_boxplot()
p1 <- p1 + theme_bw()
p1 <- p1 + xlab("Number of groups (K)")
p1



### conduct DAPC
vcf_dapc <- dapc(popdata, n.pca = 40, n.da = 5)

### IMPORTANT: find the optimal # of PCs to retain using the 'a-score' criterion
temp <- optim.a.score(vcf_dapc)

#* n = 6 PCs = optimal!

# Cross validation method to find # of PCs
xval = xvalDapc(x, popdata$pop, n.pca.max=10, training.set=0.9,
result="groupMean", center=TRUE, scale=FALSE,
n.rep=100, n.pca=NULL, parallel="snow", ncpus=4)

## Number of PCs with best stats
xval$`Number of PCs Achieving Highest Mean Success`
[1] "6"
xval$`Number of PCs Achieving Lowest MSE`
[1] "6"
xval$`Root Mean Squared Error by Number of PCs of PCA` # lower score = better
        1         2         3         4         5         6         7         8         9 
0.4553692 0.3415904 0.2653287 0.2372806 0.2394293 0.2171069 0.2399725 0.2683540 0.2729405 
# n = 6 is optimal

vcf_dapc <- dapc(popdata, n.pca = 6, n.da = 3)


### Set up the plots

par(mfrow=c(2,2))


### DAPC1 vs 2 scatter
scatter(vcf_dapc, col = mycol, xax = 1, yax = 2, cex = 2, scree.da=FALSE, legend = FALSE, grp = pop(popdata))

### DAPC1 vs 3 scatter
scatter(vcf_dapc, col = mycol, xax = 1, yax = 3, cex = 2, scree.da=FALSE, legend = FALSE, grp = pop(popdata))

### Check the assignment plot
assignplot(vcf_dapc)

### plot membership probabilities
compoplot(vcf_dapc, lab="",ncol=1,col=mycol)



### Check the assignment probabilities
vcf_dapc$posterior
     vreelandii      striata Sierra_Nevada Coast_Cascades
01 9.974121e-01 2.562859e-03  2.229775e-05   2.790531e-06
02 9.836707e-01 1.631574e-02  1.151232e-05   2.066385e-06
03 9.951732e-01 7.168806e-04  2.923958e-03   1.185970e-03
04 9.991581e-01 8.279254e-04  1.152511e-05   2.465796e-06
05 9.960866e-01 3.722849e-03  1.586008e-04   3.193386e-05
06 9.925980e-01 6.508818e-03  4.571851e-04   4.360287e-04
07 9.974144e-01 1.374501e-03  1.058555e-03   1.525455e-04
08 9.812357e-01 1.210021e-02  6.108413e-03   5.556350e-04
09 9.935677e-01 6.287667e-03  1.257137e-04   1.891051e-05
10 9.975242e-01 1.922359e-03  3.595750e-04   1.938372e-04
11 9.968004e-01 1.506981e-03  9.494023e-04   7.432622e-04
12 9.745108e-01 1.593976e-03  2.119615e-02   2.699117e-03
13 9.959507e-01 2.693725e-03  1.806502e-05   1.337540e-03
14 9.466390e-01 4.791197e-02  4.275496e-03   1.173504e-03
15 9.999519e-01 4.709102e-05  8.947801e-07   1.455936e-07
16 9.999335e-01 5.961780e-06  5.846728e-05   2.043389e-06
17 9.995833e-01 4.127272e-04  3.161820e-06   8.287303e-07
18 9.989755e-01 7.226615e-04  2.718374e-04   3.001371e-05
19 9.996291e-01 2.042762e-04  1.653108e-05   1.501196e-04
20 9.944081e-01 1.529872e-04  6.063812e-04   4.832535e-03
21 9.998298e-01 9.238262e-05  5.810996e-05   1.973636e-05
22 9.980853e-01 1.664336e-03  2.284965e-04   2.182366e-05
23 8.583211e-04 9.988143e-01  2.780648e-04   4.930454e-05
24 5.505424e-06 9.868915e-01  1.149674e-02   1.606281e-03
25 1.115745e-03 9.896314e-01  7.959476e-03   1.293336e-03
26 3.130501e-06 9.994634e-01  4.447302e-04   8.870109e-05
27 6.581053e-06 9.999572e-01  1.321002e-05   2.296007e-05
28 3.621743e-03 9.834368e-01  1.056225e-02   2.379201e-03
29 8.716107e-02 9.126106e-01  3.756916e-05   1.907739e-04
30 3.786664e-02 3.423360e-01  5.207161e-01   9.908125e-02
31 5.681910e-02 9.431574e-01  1.815083e-05   5.315766e-06
32 8.504543e-06 9.777165e-01  6.496302e-03   1.577865e-02
33 5.959656e-08 9.884949e-01  2.701515e-03   8.803572e-03
34 9.488265e-04 9.989581e-01  4.796705e-05   4.508890e-05
35 1.933376e-05 9.999083e-01  3.403341e-05   3.835658e-05
36 2.501089e-03 9.963032e-01  5.936696e-04   6.020365e-04
37 1.582462e-03 9.983459e-01  5.776404e-05   1.386092e-05
38 3.780107e-05 9.272055e-01  3.765915e-02   3.509751e-02
39 3.089380e-06 9.985815e-01  1.000912e-04   1.315354e-03
40 1.526724e-03 9.895842e-01  4.502488e-03   4.386609e-03
41 5.538462e-02 3.271245e-01  3.815614e-01   2.359295e-01
42 7.130729e-03 9.682674e-01  2.256578e-02   2.036133e-03
43 4.922562e-03 8.648565e-01  5.030829e-02   7.991268e-02
44 2.915569e-04 1.475365e-01  3.565981e-01   4.955738e-01
45 1.211246e-02 9.863630e-01  1.046228e-03   4.782855e-04
46 2.171461e-01 2.881199e-01  4.601810e-01   3.455305e-02
47 2.797097e-04 9.996525e-01  5.984944e-05   7.923706e-06
48 6.678008e-02 9.331791e-01  3.609320e-05   4.700592e-06
49 1.474207e-03 1.841226e-03  8.693747e-01   1.273098e-01
50 2.255127e-03 9.079017e-05  6.761054e-01   3.215486e-01
51 6.329561e-05 6.657456e-02  7.515070e-01   1.818551e-01
52 7.129823e-03 1.277739e-02  4.166816e-01   5.634112e-01
53 1.283601e-09 3.056550e-04  8.523536e-01   1.473407e-01
54 7.501967e-03 6.689090e-04  6.754045e-01   3.164246e-01
55 3.446915e-07 1.896756e-05  7.984569e-01   2.015237e-01
56 1.232552e-02 4.702590e-02  6.337580e-01   3.068906e-01
57 4.581971e-04 6.701903e-04  8.000963e-01   1.987753e-01
58 1.492893e-01 1.665184e-03  7.501147e-01   9.893086e-02
59 1.186108e-03 2.337363e-03  8.625986e-01   1.338780e-01
60 1.388710e-03 1.836502e-01  7.109125e-01   1.040486e-01
61 1.218061e-05 3.285729e-03  8.802327e-01   1.164694e-01
62 1.035940e-05 4.519179e-03  9.240388e-01   7.143169e-02
63 2.712556e-05 1.005242e-03  1.950664e-01   8.039012e-01
64 1.974106e-05 9.524927e-01  4.156201e-02   5.925541e-03
65 5.688090e-05 2.576573e-02  1.134915e-02   9.628282e-01
66 1.504331e-06 2.205096e-06  7.906236e-03   9.920901e-01
67 5.570653e-04 2.067271e-02  5.827805e-01   3.959897e-01
68 3.120024e-07 3.937502e-03  1.669734e-01   8.290888e-01
69 8.312522e-03 1.383599e-04  7.682951e-03   9.838662e-01
70 3.437176e-05 4.050840e-05  2.300734e-01   7.698518e-01
71 4.788450e-06 2.185469e-04  4.911611e-02   9.506606e-01
72 3.247094e-05 1.349341e-01  2.785740e-01   5.864595e-01
73 1.926218e-08 3.176804e-06  6.626536e-01   3.373432e-01
74 2.795884e-04 1.320117e-05  3.516034e-02   9.645469e-01
75 5.311797e-04 1.627101e-03  4.143621e-02   9.564055e-01
76 1.785439e-03 6.032308e-02  3.457368e-01   5.921546e-01
77 1.485950e-03 1.881471e-02  8.743752e-01   1.053242e-01
78 9.584477e-05 3.312604e-04  8.518155e-01   1.477574e-01
79 2.481726e-05 1.522989e-03  7.749375e-01   2.235147e-01
80 4.456778e-07 2.590137e-03  6.016363e-03   9.913931e-01
81 2.052966e-08 2.696593e-05  1.308309e-02   9.868899e-01
82 3.597931e-02 6.470976e-01  1.630050e-01   1.539181e-01
83 1.539109e-03 2.120743e-02  4.160299e-01   5.612235e-01
#####################################
###    ENTIRE CODE in Markdown    ###
#####################################
# ---
# title: "2021_06_02_Cor_striata_DAPC_SSRgenotyper"
# output: 
#   html_notebook: 
#     theme: spacelab
# #editor_options: 
# #  chunk_output_type: console
# editor_options: 
#   chunk_output_type: inline
# ---
# 
# This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 
# 
# Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*. 
# 
# ```{r, include=TRUE}
# ### load libraries
# library(adegenet)
# 
# ### read in data -- GenePop format, converted to genind format
# popdata <- read.genepop("Cor_striata_ssrgenotyper_06.pop.gen", ncode = 3L)
# popdata
# 
# # /// GENIND OBJECT /////////                                             #
# #                                                                         #
# #  // 83 individuals; 19 loci; 79 alleles; size: 53.4 Kb                  #
# #                                                                         #
# #  // Basic content                                                       #
# #    @tab:  83 x 79 matrix of allele counts                               #
# #    @loc.n.all: number of alleles per locus (range: 2-5)                 #
# #    @loc.fac: locus factor for the 79 columns of @tab                    #
# #    @all.names: list of allele names for each locus                      #
# #    @ploidy: ploidy of each individual  (range: 2-2)                     #
# #    @type:  codom                                                        #
# #    @call: read.genepop(file = "Cor_striata_ssrgenotyper_06.pop.gen",    #
# #     ncode = 3L)                                                         #
# #                                                                       #
# # // Optional content                                                      #
# #   @pop: population of each individual (group size range: 16-26)          #
# 
# 
# ### Just checking...
# is.genind(popdata)
# 
# ### define "populations"
# pop(popdata) <- rep(c("vreelandii","striata","Sierra_Nevada","Coast_Cascades"), c(22,26,16,19))
# pop(popdata)
# 
# ### set color scheme
# mycol <- c("green","red","blue","magenta")
# 
# ### Find clusters
# grp <- find.clusters(popdata, max.n.clust = 40)
# 
# # Choose the number PCs to retain (>= 1): 
# 6
# # Choose the number of clusters (>=2): 
# 4
# 
# 
# # Also, you can use K-means clustering to find the K with minimum BIC
# 
# maxK <- 10
# myMat <- matrix(nrow=10, ncol=maxK)
# colnames(myMat) <- 1:ncol(myMat)
# for(i in 1:nrow(myMat)){
#   grp <- find.clusters(popdata, n.pca = 40, choose.n.clust = FALSE,  max.n.clust = maxK)
#   myMat[i,] <- grp$Kstat
# }
# 
# library(reshape2)
# my_df <- melt(myMat)
# colnames(my_df)[1:3] <- c("Group", "K", "BIC")
# my_df$K <- as.factor(my_df$K)
# head(my_df)
# 
# # The K vs. BIC plot
# library(ggplot2)
# p1 <- ggplot(my_df, aes(x = K, y = BIC))
# p1 <- p1 + geom_boxplot()
# p1 <- p1 + theme_bw()
# p1 <- p1 + xlab("Number of groups (K)")
# p1
# 
# 
# ### conduct DAPC
# vcf_dapc <- dapc(popdata, n.pca = 40, n.da = 5)
# 
# ### IMPORTANT: find the optimal # of PCs to retain using the 'a-score' criterion
# temp <- optim.a.score(vcf_dapc)
# #* n = 6 PCs = optimal!
# 
# # Cross validation method to find # of PCs
# xval = xvalDapc(x, popdata$pop, n.pca.max=10, training.set=0.9,
# result="groupMean", center=TRUE, scale=FALSE,
# n.rep=30, n.pca=NULL, parallel="snow", ncpus=2)
# ## Number of PCs with best stats
# xval$`Number of PCs Achieving Highest Mean Success`
# xval$`Number of PCs Achieving Lowest MSE`
# xval$`Root Mean Squared Error by Number of PCs of PCA` # lower score = better
# 
# vcf_dapc <- dapc(popdata, n.pca = 7, n.da = 5)
# 
# 
# ### Set up the plots
# 
# par(mfrow=c(2,2))
# 
# ### DAPC1 vs 2 scatter
# scatter(vcf_dapc, col = mycol, xax = 1, yax = 2, cex = 2, scree.da=FALSE, legend = FALSE, grp = pop(popdata))
# 
# ### DAPC1 vs 3 scatter
# scatter(vcf_dapc, col = mycol, xax = 1, yax = 3, cex = 2, scree.da=FALSE, legend = FALSE, grp = pop(popdata))
# 
# ### Check the assignment plot
# assignplot(vcf_dapc)
# 
# ### plot membership probabilities
# compoplot(vcf_dapc, lab="",ncol=1,col=mycol)
# 
# 
# ### Check the assignment probabilities
# vcf_dapc$posterior
# 
# ```

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

---
title: "2021_06_02_Cor_striata_DAPC_SSRgenotyper"
output: 
  html_notebook: 
    theme: spacelab
#editor_options: 
#  chunk_output_type: console
editor_options: 
  chunk_output_type: inline
---

This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 

Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*. 

```{r, include=TRUE}
### load libraries
library(adegenet)

### read in data -- GenePop format, converted to genind format
popdata <- read.genepop("Cor_striata_ssrgenotyper_06.pop.gen", ncode = 3L)
popdata

# /// GENIND OBJECT /////////                                             #
#                                                                         #
#  // 83 individuals; 19 loci; 79 alleles; size: 53.4 Kb                  #
#                                                                         #
#  // Basic content                                                       #
#    @tab:  83 x 79 matrix of allele counts                               #
#    @loc.n.all: number of alleles per locus (range: 2-5)                 #
#    @loc.fac: locus factor for the 79 columns of @tab                    #
#    @all.names: list of allele names for each locus                      #
#    @ploidy: ploidy of each individual  (range: 2-2)                     #
#    @type:  codom                                                        #
#    @call: read.genepop(file = "Cor_striata_ssrgenotyper_06.pop.gen",    #
#     ncode = 3L)                                                         #
#																		  #
# // Optional content                                                      #
#   @pop: population of each individual (group size range: 16-26)          #


### Just checking...
is.genind(popdata)

### define "populations"
pop(popdata) <- rep(c("vreelandii","striata","Sierra_Nevada","Coast_Cascades"), c(22,26,16,19))
pop(popdata)

### set color scheme
mycol <- c("green","red","blue","magenta")

### Find clusters
grp <- find.clusters(popdata, max.n.clust = 40)

# Choose the number PCs to retain (>= 1): 
6
# Choose the number of clusters (>=2): 
4


# Also, you can use K-means clustering to find the K with minimum BIC

maxK <- 10
myMat <- matrix(nrow=10, ncol=maxK)
colnames(myMat) <- 1:ncol(myMat)
for(i in 1:nrow(myMat)){
  grp <- find.clusters(popdata, n.pca = 40, choose.n.clust = FALSE,  max.n.clust = maxK)
  myMat[i,] <- grp$Kstat
}

library(reshape2)
my_df <- melt(myMat)
colnames(my_df)[1:3] <- c("Group", "K", "BIC")
my_df$K <- as.factor(my_df$K)
head(my_df)

# The K vs. BIC plot
library(ggplot2)
p1 <- ggplot(my_df, aes(x = K, y = BIC))
p1 <- p1 + geom_boxplot()
p1 <- p1 + theme_bw()
p1 <- p1 + xlab("Number of groups (K)")
p1


### conduct DAPC
vcf_dapc <- dapc(popdata, n.pca = 40, n.da = 5)

### IMPORTANT: find the optimal # of PCs to retain using the 'a-score' criterion
temp <- optim.a.score(vcf_dapc)
#* n = 6 PCs = optimal!

# Cross validation method to find # of PCs
xval = xvalDapc(x, popdata$pop, n.pca.max=10, training.set=0.9,
result="groupMean", center=TRUE, scale=FALSE,
n.rep=100, n.pca=NULL, parallel="snow", ncpus=4)

## Number of PCs with best stats
xval$`Number of PCs Achieving Highest Mean Success`
xval$`Number of PCs Achieving Lowest MSE`
xval$`Root Mean Squared Error by Number of PCs of PCA` # lower score = better
# n = 6 is optimal

vcf_dapc <- dapc(popdata, n.pca = 6, n.da = 3)


### Set up the plots

par(mfrow=c(2,2))

### DAPC1 vs 2 scatter
scatter(vcf_dapc, col = mycol, xax = 1, yax = 2, cex = 2, scree.da=FALSE, legend = FALSE, grp = pop(popdata))

### DAPC1 vs 3 scatter
scatter(vcf_dapc, col = mycol, xax = 1, yax = 3, cex = 2, scree.da=FALSE, legend = FALSE, grp = pop(popdata))

### Check the assignment plot
assignplot(vcf_dapc)

### plot membership probabilities
compoplot(vcf_dapc, lab="",ncol=1,col=mycol)


### Check the assignment probabilities
vcf_dapc$posterior

```
```{r}
#####################################
###    ENTIRE CODE in Markdown    ###
#####################################
# ---
# title: "2021_06_02_Cor_striata_DAPC_SSRgenotyper"
# output: 
#   html_notebook: 
#     theme: spacelab
# #editor_options: 
# #  chunk_output_type: console
# editor_options: 
#   chunk_output_type: inline
# ---
# 
# This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 
# 
# Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*. 
# 
# ```{r, include=TRUE}
# ### load libraries
# library(adegenet)
# 
# ### read in data -- GenePop format, converted to genind format
# popdata <- read.genepop("Cor_striata_ssrgenotyper_06.pop.gen", ncode = 3L)
# popdata
# 
# # /// GENIND OBJECT /////////                                             #
# #                                                                         #
# #  // 83 individuals; 19 loci; 79 alleles; size: 53.4 Kb                  #
# #                                                                         #
# #  // Basic content                                                       #
# #    @tab:  83 x 79 matrix of allele counts                               #
# #    @loc.n.all: number of alleles per locus (range: 2-5)                 #
# #    @loc.fac: locus factor for the 79 columns of @tab                    #
# #    @all.names: list of allele names for each locus                      #
# #    @ploidy: ploidy of each individual  (range: 2-2)                     #
# #    @type:  codom                                                        #
# #    @call: read.genepop(file = "Cor_striata_ssrgenotyper_06.pop.gen",    #
# #     ncode = 3L)                                                         #
# #																		  #
# # // Optional content                                                      #
# #   @pop: population of each individual (group size range: 16-26)          #
# 
# 
# ### Just checking...
# is.genind(popdata)
# 
# ### define "populations"
# pop(popdata) <- rep(c("vreelandii","striata","Sierra_Nevada","Coast_Cascades"), c(22,26,16,19))
# pop(popdata)
# 
# ### set color scheme
# mycol <- c("green","red","blue","magenta")
# 
# ### Find clusters
# grp <- find.clusters(popdata, max.n.clust = 40)
# 
# # Choose the number PCs to retain (>= 1): 
# 6
# # Choose the number of clusters (>=2): 
# 4
# 
# 
# # Also, you can use K-means clustering to find the K with minimum BIC
# 
# maxK <- 10
# myMat <- matrix(nrow=10, ncol=maxK)
# colnames(myMat) <- 1:ncol(myMat)
# for(i in 1:nrow(myMat)){
#   grp <- find.clusters(popdata, n.pca = 40, choose.n.clust = FALSE,  max.n.clust = maxK)
#   myMat[i,] <- grp$Kstat
# }
# 
# library(reshape2)
# my_df <- melt(myMat)
# colnames(my_df)[1:3] <- c("Group", "K", "BIC")
# my_df$K <- as.factor(my_df$K)
# head(my_df)
# 
# # The K vs. BIC plot
# library(ggplot2)
# p1 <- ggplot(my_df, aes(x = K, y = BIC))
# p1 <- p1 + geom_boxplot()
# p1 <- p1 + theme_bw()
# p1 <- p1 + xlab("Number of groups (K)")
# p1
# 
# 
# ### conduct DAPC
# vcf_dapc <- dapc(popdata, n.pca = 40, n.da = 5)
# 
# ### IMPORTANT: find the optimal # of PCs to retain using the 'a-score' criterion
# temp <- optim.a.score(vcf_dapc)
# #* n = 6 PCs = optimal!
# 
# # Cross validation method to find # of PCs
# xval = xvalDapc(x, popdata$pop, n.pca.max=10, training.set=0.9,
# result="groupMean", center=TRUE, scale=FALSE,
# n.rep=30, n.pca=NULL, parallel="snow", ncpus=2)
# ## Number of PCs with best stats
# xval$`Number of PCs Achieving Highest Mean Success`
# xval$`Number of PCs Achieving Lowest MSE`
# xval$`Root Mean Squared Error by Number of PCs of PCA` # lower score = better
# 
# vcf_dapc <- dapc(popdata, n.pca = 7, n.da = 5)
# 
# 
# ### Set up the plots
# 
# par(mfrow=c(2,2))
# 
# ### DAPC1 vs 2 scatter
# scatter(vcf_dapc, col = mycol, xax = 1, yax = 2, cex = 2, scree.da=FALSE, legend = FALSE, grp = pop(popdata))
# 
# ### DAPC1 vs 3 scatter
# scatter(vcf_dapc, col = mycol, xax = 1, yax = 3, cex = 2, scree.da=FALSE, legend = FALSE, grp = pop(popdata))
# 
# ### Check the assignment plot
# assignplot(vcf_dapc)
# 
# ### plot membership probabilities
# compoplot(vcf_dapc, lab="",ncol=1,col=mycol)
# 
# 
# ### Check the assignment probabilities
# vcf_dapc$posterior
# 
# ```
```

Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Ctrl+Alt+I*.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Ctrl+Shift+K* to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
