New Theoretical Approach To Ancestry Estimation

A quick note about this markdown file - I have presented my analysis in a step-by-step fashion, so if you had your R environment properly configured, you could copy and paste everything that follows verbatim. Code annotations are shown whenever there is a double hashtag. Most of this framework uses the same types of analyses, just at different scales. I have only annotated the code for the first instance that particular analysis was done. I have mostly adhered to a naming schema for my objects that you can follow the steps, if you are not as familiar with R.

r <- getOption("repos")
r["CRAN"] <- "http://cran.cnr.berkeley.edu/"
options(repos = r)
install.packages("devtools")

## Warning: unable to access index for repository http://cran.cnr.berkeley.edu/src/contrib:
##   cannot open URL 'http://cran.cnr.berkeley.edu/src/contrib/PACKAGES'

## Warning: package 'devtools' is not available (for R version 4.0.2)

## Warning: unable to access index for repository http://cran.cnr.berkeley.edu/bin/macosx/contrib/4.0:
##   cannot open URL 'http://cran.cnr.berkeley.edu/bin/macosx/contrib/4.0/PACKAGES'

devtools::install_github("geanes/bioanth") ## this installs a package with the Howells dataset

## Skipping install of 'bioanth' from a github remote, the SHA1 (b179b396) has not changed since last install.
##   Use `force = TRUE` to force installation

library(factoextra)

## Loading required package: ggplot2

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(plyr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(NbClust)
library(ggplot2)
library(treemap)
library(caret)

## Loading required package: lattice

library(ggRandomForests)

## Loading required package: randomForestSRC

## 
##  randomForestSRC 2.9.3 
##  
##  Type rfsrc.news() to see new features, changes, and bug fixes. 
##

## 
## Attaching package: 'ggRandomForests'

## The following object is masked from 'package:randomForestSRC':
## 
##     partial.rfsrc

library(randomForest)

## randomForest 4.6-14

## Type rfNews() to see new features/changes/bug fixes.

## 
## Attaching package: 'randomForest'

## The following object is masked from 'package:dplyr':
## 
##     combine

## The following object is masked from 'package:ggplot2':
## 
##     margin

library(randomForestSRC)
library(psych)

## 
## Attaching package: 'psych'

## The following object is masked from 'package:randomForest':
## 
##     outlier

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library(usedist)
library(bioanth)
library(ggdendro)

suppressWarnings(library(ggplot2))
knitr::opts_chunk$set(dev="jpeg")

Data Summary

set.seed(32) ## setting a random number generator for consistent results.
head(howell) ## examining the first six indicies in the howell data frame.

## # A tibble: 6 x 86
##      ID Sex   PopNum Population   GOL   NOL   BNL   BBH   XCB   XFB   ZYB   AUB
##   <int> <fct> <fct>  <fct>      <int> <int> <int> <int> <int> <int> <int> <int>
## 1     1 M     1      NORSE        189   185   100   135   143   120   133   119
## 2     2 M     1      NORSE        182   178   102   139   145   120   137   125
## 3     3 M     1      NORSE        191   187   102   123   140   114   134   125
## 4     4 M     1      NORSE        191   188   100   127   141   123   135   127
## 5     5 M     1      NORSE        178   177    97   128   138   117   129   121
## 6     6 M     1      NORSE        194   191   106   132   139   118   136   128
## # … with 74 more variables: WCB <int>, ASB <int>, BPL <int>, NPH <int>,
## #   NLH <int>, JUB <int>, NLB <int>, MAB <int>, MDH <int>, MDB <int>,
## #   OBH <int>, OBB <int>, DKB <int>, NDS <int>, WNB <dbl>, SIS <dbl>,
## #   ZMB <int>, SSS <int>, FMB <int>, NAS <int>, EKB <int>, DKS <int>,
## #   IML <int>, XML <int>, MLS <int>, WMH <int>, SOS <int>, GLS <int>,
## #   STB <int>, FRC <int>, FRS <int>, FRF <int>, PAC <int>, PAS <int>,
## #   PAF <int>, OCC <int>, OCS <int>, OCF <int>, FOL <int>, NAR <int>,
## #   SSR <int>, PRR <int>, DKR <int>, ZOR <int>, FMR <int>, EKR <int>,
## #   ZMR <int>, AVR <int>, BRR <int>, VRR <int>, LAR <int>, OSR <int>,
## #   BAR <int>, NAA <int>, PRA <int>, BAA <int>, NBA <int>, BBA <int>,
## #   BRA <int>, SSA <int>, NFA <int>, DKA <int>, NDA <int>, SIA <int>,
## #   FRA <int>, PAA <int>, OCA <int>, RFA <int>, RPA <int>, ROA <int>,
## #   BSA <int>, SBA <int>, SLA <int>, TBA <int>

dim(howell) ## checking the dimensions of the howell data frame

## [1] 2524   86

summary(howell)

##        ID         Sex          PopNum       Population        GOL       
##  Min.   :   1.0   F:1156   22     : 111   EGYPT  : 111   Min.   :151.0  
##  1st Qu.: 634.8   M:1368   1      : 110   NORSE  : 110   1st Qu.:173.0  
##  Median :1275.5            9      : 110   PERU   : 110   Median :179.0  
##  Mean   :1458.1            15     : 110   TOLAI  : 110   Mean   :179.2  
##  3rd Qu.:2326.2            3      : 109   BERG   : 109   3rd Qu.:185.0  
##  Max.   :3190.0            26     : 109   BURIAT : 109   Max.   :206.0  
##                            (Other):1865   (Other):1865                  
##       NOL             BNL              BBH             XCB       
##  Min.   :151.0   Min.   : 83.00   Min.   :107.0   Min.   :116.0  
##  1st Qu.:172.0   1st Qu.: 95.00   1st Qu.:127.0   1st Qu.:132.0  
##  Median :177.0   Median : 99.00   Median :131.0   Median :137.0  
##  Mean   :176.9   Mean   : 99.12   Mean   :131.6   Mean   :136.8  
##  3rd Qu.:182.0   3rd Qu.:103.00   3rd Qu.:137.0   3rd Qu.:141.0  
##  Max.   :200.0   Max.   :120.00   Max.   :155.0   Max.   :167.0  
##                                                                  
##       XFB             ZYB             AUB             WCB       
##  Min.   : 95.0   Min.   :105.0   Min.   : 98.0   Min.   :57.00  
##  1st Qu.:109.0   1st Qu.:125.0   1st Qu.:115.0   1st Qu.:68.00  
##  Median :113.0   Median :131.0   Median :120.0   Median :71.00  
##  Mean   :113.4   Mean   :130.8   Mean   :120.6   Mean   :70.99  
##  3rd Qu.:117.0   3rd Qu.:136.0   3rd Qu.:125.0   3rd Qu.:74.00  
##  Max.   :145.0   Max.   :158.0   Max.   :149.0   Max.   :89.00  
##                                                                 
##       ASB             BPL              NPH             NLH       
##  Min.   : 88.0   Min.   : 80.00   Min.   :48.00   Min.   :36.00  
##  1st Qu.:103.0   1st Qu.: 93.00   1st Qu.:62.75   1st Qu.:47.00  
##  Median :107.0   Median : 98.00   Median :66.00   Median :50.00  
##  Mean   :106.9   Mean   : 97.78   Mean   :65.98   Mean   :50.01  
##  3rd Qu.:111.0   3rd Qu.:102.00   3rd Qu.:70.00   3rd Qu.:53.00  
##  Max.   :128.0   Max.   :123.00   Max.   :82.00   Max.   :65.00  
##                                                                  
##       JUB             NLB            MAB             MDH             MDB       
##  Min.   : 97.0   Min.   :19.0   Min.   :52.00   Min.   :16.00   Min.   : 6.00  
##  1st Qu.:111.0   1st Qu.:25.0   1st Qu.:61.00   1st Qu.:25.00   1st Qu.:11.00  
##  Median :115.0   Median :26.0   Median :63.00   Median :27.00   Median :12.00  
##  Mean   :115.2   Mean   :26.3   Mean   :63.55   Mean   :27.35   Mean   :12.31  
##  3rd Qu.:120.0   3rd Qu.:28.0   3rd Qu.:66.00   3rd Qu.:30.00   3rd Qu.:14.00  
##  Max.   :138.0   Max.   :35.0   Max.   :78.00   Max.   :39.00   Max.   :20.00  
##                                                                                
##       OBH             OBB             DKB             NDS        
##  Min.   :26.00   Min.   :33.00   Min.   :13.00   Min.   : 4.000  
##  1st Qu.:32.00   1st Qu.:38.00   1st Qu.:20.00   1st Qu.: 8.000  
##  Median :34.00   Median :39.00   Median :21.00   Median :10.000  
##  Mean   :33.67   Mean   :39.49   Mean   :21.38   Mean   : 9.592  
##  3rd Qu.:35.00   3rd Qu.:41.00   3rd Qu.:23.00   3rd Qu.:11.000  
##  Max.   :41.00   Max.   :46.00   Max.   :32.00   Max.   :17.000  
##                                                                  
##       WNB              SIS             ZMB              SSS       
##  Min.   : 1.400   Min.   :0.100   Min.   : 79.00   Min.   :11.00  
##  1st Qu.: 6.800   1st Qu.:2.200   1st Qu.: 91.00   1st Qu.:21.00  
##  Median : 8.200   Median :3.000   Median : 95.00   Median :23.00  
##  Mean   : 8.315   Mean   :3.049   Mean   : 94.98   Mean   :22.91  
##  3rd Qu.: 9.800   3rd Qu.:3.800   3rd Qu.: 99.00   3rd Qu.:25.00  
##  Max.   :15.900   Max.   :7.700   Max.   :120.00   Max.   :35.00  
##                                                                   
##       FMB              NAS             EKB              DKS        
##  Min.   : 81.00   Min.   : 8.00   Min.   : 83.00   Min.   : 2.000  
##  1st Qu.: 94.00   1st Qu.:14.00   1st Qu.: 95.00   1st Qu.: 8.000  
##  Median : 97.00   Median :16.00   Median : 97.00   Median :10.000  
##  Mean   : 96.97   Mean   :16.26   Mean   : 97.34   Mean   : 9.786  
##  3rd Qu.:100.00   3rd Qu.:18.00   3rd Qu.:100.00   3rd Qu.:11.000  
##  Max.   :112.00   Max.   :25.00   Max.   :113.00   Max.   :18.000  
##                                                                    
##       IML             XML             MLS             WMH       
##  Min.   :20.00   Min.   :38.00   Min.   : 6.00   Min.   :14.00  
##  1st Qu.:33.00   1st Qu.:50.00   1st Qu.:10.00   1st Qu.:21.00  
##  Median :36.00   Median :53.00   Median :11.00   Median :23.00  
##  Mean   :36.11   Mean   :52.68   Mean   :11.29   Mean   :22.74  
##  3rd Qu.:39.00   3rd Qu.:56.00   3rd Qu.:12.00   3rd Qu.:25.00  
##  Max.   :49.00   Max.   :69.00   Max.   :18.00   Max.   :35.00  
##                                                                 
##       SOS              GLS             STB             FRC       
##  Min.   : 2.000   Min.   :0.000   Min.   : 81.0   Min.   : 93.0  
##  1st Qu.: 5.000   1st Qu.:2.000   1st Qu.:104.0   1st Qu.:106.0  
##  Median : 6.000   Median :3.000   Median :109.0   Median :109.0  
##  Mean   : 5.794   Mean   :3.023   Mean   :109.3   Mean   :109.5  
##  3rd Qu.: 7.000   3rd Qu.:4.000   3rd Qu.:115.0   3rd Qu.:113.0  
##  Max.   :11.000   Max.   :9.000   Max.   :140.0   Max.   :128.0  
##                                                                  
##       FRS             FRF             PAC             PAS             PAF      
##  Min.   :16.00   Min.   :36.00   Min.   : 89.0   Min.   :13.00   Min.   :34.0  
##  1st Qu.:23.00   1st Qu.:46.00   1st Qu.:106.0   1st Qu.:22.00   1st Qu.:54.0  
##  Median :26.00   Median :49.00   Median :111.0   Median :24.00   Median :57.0  
##  Mean   :25.43   Mean   :48.77   Mean   :110.6   Mean   :23.56   Mean   :57.4  
##  3rd Qu.:27.00   3rd Qu.:51.00   3rd Qu.:115.0   3rd Qu.:25.00   3rd Qu.:61.0  
##  Max.   :35.00   Max.   :66.00   Max.   :135.0   Max.   :36.00   Max.   :77.0  
##                                                                                
##       OCC              OCS             OCF             FOL       
##  Min.   : 79.00   Min.   :15.00   Min.   :29.00   Min.   :27.00  
##  1st Qu.: 92.00   1st Qu.:25.00   1st Qu.:43.00   1st Qu.:34.00  
##  Median : 95.00   Median :28.00   Median :46.00   Median :36.00  
##  Mean   : 95.69   Mean   :27.87   Mean   :46.51   Mean   :35.78  
##  3rd Qu.:100.00   3rd Qu.:30.00   3rd Qu.:50.00   3rd Qu.:37.00  
##  Max.   :118.00   Max.   :41.00   Max.   :70.00   Max.   :50.00  
##                                                                  
##       NAR             SSR              PRR             DKR       
##  Min.   : 75.0   Min.   : 76.00   Min.   : 81.0   Min.   :67.00  
##  1st Qu.: 89.0   1st Qu.: 90.00   1st Qu.: 96.0   1st Qu.:78.00  
##  Median : 92.0   Median : 94.00   Median :101.0   Median :81.00  
##  Mean   : 92.5   Mean   : 94.08   Mean   :100.6   Mean   :81.68  
##  3rd Qu.: 96.0   3rd Qu.: 98.00   3rd Qu.:105.0   3rd Qu.:85.00  
##  Max.   :110.0   Max.   :114.00   Max.   :124.0   Max.   :98.00  
##                                                                  
##       ZOR             FMR             EKR             ZMR       
##  Min.   :61.00   Min.   :63.00   Min.   :57.00   Min.   :54.00  
##  1st Qu.:76.00   1st Qu.:73.00   1st Qu.:68.00   1st Qu.:68.00  
##  Median :80.00   Median :77.00   Median :71.00   Median :72.00  
##  Mean   :79.75   Mean   :76.91   Mean   :71.64   Mean   :72.01  
##  3rd Qu.:83.00   3rd Qu.:80.00   3rd Qu.:75.00   3rd Qu.:75.00  
##  Max.   :97.00   Max.   :96.00   Max.   :88.00   Max.   :89.00  
##                                                                 
##       AVR             BRR              VRR             LAR        
##  Min.   :61.00   Min.   :  0.00   Min.   :101.0   Min.   :  0.00  
##  1st Qu.:76.00   1st Qu.:  0.00   1st Qu.:117.0   1st Qu.:  0.00  
##  Median :80.00   Median :115.00   Median :121.0   Median :103.00  
##  Mean   :80.05   Mean   : 86.78   Mean   :120.8   Mean   : 78.01  
##  3rd Qu.:84.00   3rd Qu.:120.00   3rd Qu.:125.0   3rd Qu.:108.00  
##  Max.   :98.00   Max.   :138.00   Max.   :140.0   Max.   :133.00  
##                                                                   
##       OSR             BAR            NAA             PRA             BAA       
##  Min.   : 0.00   Min.   : 0.0   Min.   :57.00   Min.   :60.00   Min.   :28.00  
##  1st Qu.: 0.00   1st Qu.: 0.0   1st Qu.:66.00   1st Qu.:69.00   1st Qu.:37.00  
##  Median :39.00   Median :14.0   Median :69.00   Median :71.00   Median :39.00  
##  Mean   :29.98   Mean   :11.2   Mean   :69.42   Mean   :71.48   Mean   :39.12  
##  3rd Qu.:42.00   3rd Qu.:16.0   3rd Qu.:72.00   3rd Qu.:74.00   3rd Qu.:41.00  
##  Max.   :52.00   Max.   :50.0   Max.   :87.00   Max.   :84.00   Max.   :50.00  
##                                                                                
##       NBA             BBA             BRA             SSA             NFA     
##  Min.   :65.00   Min.   :46.00   Min.   :38.00   Min.   :107.0   Min.   :126  
##  1st Qu.:76.00   1st Qu.:53.00   1st Qu.:46.00   1st Qu.:124.0   1st Qu.:140  
##  Median :78.00   Median :54.00   Median :47.00   Median :128.0   Median :143  
##  Mean   :78.06   Mean   :54.53   Mean   :47.42   Mean   :128.6   Mean   :143  
##  3rd Qu.:80.00   3rd Qu.:56.00   3rd Qu.:49.00   3rd Qu.:133.0   3rd Qu.:146  
##  Max.   :90.00   Max.   :64.00   Max.   :55.00   Max.   :153.0   Max.   :161  
##                                                                               
##       DKA             NDA              SIA             FRA       
##  Min.   :128.0   Min.   : 63.00   Min.   : 43.0   Min.   :114.0  
##  1st Qu.:147.0   1st Qu.: 88.00   1st Qu.: 96.0   1st Qu.:126.0  
##  Median :151.0   Median : 96.00   Median :108.0   Median :130.0  
##  Mean   :151.2   Mean   : 96.73   Mean   :109.4   Mean   :129.7  
##  3rd Qu.:155.0   3rd Qu.:105.00   3rd Qu.:121.0   3rd Qu.:133.0  
##  Max.   :174.0   Max.   :138.00   Max.   :178.0   Max.   :145.0  
##                                                                  
##       PAA             OCA             RFA             RPA       
##  Min.   :122.0   Min.   :102.0   Min.   : 0.00   Min.   : 0.00  
##  1st Qu.:131.0   1st Qu.:115.0   1st Qu.: 0.00   1st Qu.: 0.00  
##  Median :134.0   Median :119.0   Median :60.00   Median :57.00  
##  Mean   :133.7   Mean   :119.3   Mean   :45.35   Mean   :43.58  
##  3rd Qu.:136.0   3rd Qu.:123.0   3rd Qu.:63.00   3rd Qu.:61.00  
##  Max.   :147.0   Max.   :142.0   Max.   :71.00   Max.   :71.00  
##                                                                 
##       ROA             BSA             SBA              SLA        
##  Min.   : 0.00   Min.   :  0.0   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.: 0.00   1st Qu.:  0.0   1st Qu.:  0.00   1st Qu.:  0.00  
##  Median :63.00   Median :162.0   Median :101.00   Median : 87.00  
##  Mean   :47.95   Mean   :122.3   Mean   : 75.96   Mean   : 65.16  
##  3rd Qu.:67.00   3rd Qu.:170.0   3rd Qu.:104.00   3rd Qu.: 90.00  
##  Max.   :83.00   Max.   :229.0   Max.   :116.00   Max.   :101.00  
##                                                                   
##       TBA     
##  Min.   :  0  
##  1st Qu.:  0  
##  Median :149  
##  Mean   :112  
##  3rd Qu.:154  
##  Max.   :177  
##

Data Preparation

Removing uncommon variables and adding a variable representative of gross size, ‘GS’

hwl<-howell[1:24] ## making a new data frame that only contains the first 24 columns of the object howell - this includes the more common craniometric and removes subtenses, arcs, angles, etc. Also, the choice of name hwl is just quicker to type now.
hwl$GS<-(hwl$GOL*hwl$XCB*hwl$BBH) ## This is a very basic and general estimation of overall cranial volume that will be used as a metric for gross size.

Calculating group means for each variable

hwl_sum<-hwl %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) ##creating a new data frame that is using the Population label as a grouping variable, which is then being summarized for all variables in the data frame between GOL and GS, with only mean being specified.
as.data.frame(hwl_sum) ## printing the summary showing the mean measures for each variable by population label.

##    Population      GOL      NOL       BNL      BBH      XCB      XFB      ZYB
## 1        AINU 185.0116 182.6047 103.65116 135.8721 140.3721 117.5465 134.2093
## 2     ANDAMAN 164.4857 163.4857  91.64286 126.4571 133.3714 108.3286 120.7143
## 3      ANYANG 181.0000 178.8571 101.28571 140.2619 138.7857 114.8095 135.9524
## 4     ARIKARA 176.2029 174.7681 100.75362 130.7971 139.5652 115.0145 136.8841
## 5      ATAYAL 173.6170 171.2128  96.38298 131.9149 134.2766 111.6596 129.6809
## 6    AUSTRALI 185.8416 181.4752  99.15842 126.6634 129.7921 108.2178 131.4356
## 7        BERG 175.5596 173.0367  95.83486 127.4404 144.0826 121.7615 131.0917
## 8      BURIAT 176.7798 174.8991  99.30275 129.8807 151.6606 124.1743 139.3945
## 9     BUSHMAN 174.7444 172.9667  93.07778 120.8889 130.8556 108.2222 119.7333
## 10      DOGON 173.5657 172.2121  96.58586 130.0101 134.6465 111.5758 125.0909
## 11   EASTER I 187.2442 183.1977 108.34884 141.4884 132.2209 109.8140 131.8605
## 12      EGYPT 180.8288 179.4955  98.81982 130.7207 137.4775 113.5135 124.6396
## 13     ESKIMO 184.5556 181.4630 103.31481 135.8981 132.4815 110.4630 134.8796
## 14       GUAM 180.7018 178.4035 102.63158 140.4912 138.7018 113.7544 137.5614
## 15     HAINAN 173.7470 172.0843  97.30120 134.6145 136.8554 113.4096 130.1084
## 16     MOKAPU 180.9600 178.6300 104.28000 140.3800 141.2500 114.6500 132.9700
## 17    MORIORI 183.5556 181.5093 103.59259 133.9352 140.3611 111.7130 135.5926
## 18    N JAPAN 178.6782 176.2184  99.33333 134.1494 138.6207 114.4138 131.6207
## 19    N MAORI 186.6000 183.0000 105.00000 137.4000 136.9000 111.1000 139.3000
## 20      NORSE 184.2273 182.4364  99.55455 128.8455 139.0818 116.7182 129.4364
## 21       PERU 173.4818 172.1545  93.29091 127.7182 136.4364 113.6545 130.2636
## 22   PHILLIPI 176.9200 174.8200  98.48000 134.8000 139.8000 115.1600 133.1600
## 23   SANTA CR 176.0000 173.9020  95.04902 126.4804 137.4608 111.2745 131.7941
## 24    S JAPAN 177.3297 175.6044  99.09890 134.9670 136.2308 113.0659 129.6154
## 25    S MAORI 187.1000 183.0000 105.60000 139.4000 142.6000 114.3000 142.5000
## 26   TASMANIA 181.7126 176.8506  97.34483 129.3103 135.6782 110.0230 130.8161
## 27      TEITA 178.4096 177.0964  98.72289 126.6867 127.7952 109.3614 126.9518
## 28      TOLAI 179.1364 175.5364  98.60000 131.1091 129.2364 108.2545 131.2000
## 29    ZALAVAR 181.1429 178.7041  99.09184 132.0510 139.3265 117.6327 129.5306
## 30       ZULU 182.3168 180.8812  99.85149 131.4257 133.0198 114.8614 126.6832
##         AUB      WCB      ASB       BPL      NPH      NLH      JUB      NLB
## 1  122.5233 73.96512 109.9186 101.40698 65.95349 49.73256 119.1628 27.16279
## 2  110.9286 67.47143  98.0000  91.84286 58.65714 45.28571 109.0714 24.42857
## 3  125.6905 74.19048 108.2381  97.50000 69.42857 52.47619 121.2143 28.28571
## 4  128.4203 73.17391 107.6087  97.24638 70.10145 52.91304 119.4638 26.59420
## 5  120.5319 72.89362 107.2553  93.17021 63.29787 49.00000 113.5957 26.31915
## 6  116.7921 69.96040 107.1980 103.04950 63.00990 48.14851 116.4356 27.08911
## 7  124.0183 73.00000 110.9725  91.88073 65.75229 50.01835 114.1835 25.18349
## 8  132.7798 78.25688 114.5872  96.75229 71.95413 55.13761 119.9908 27.64220
## 9  110.1556 67.96667 104.2000  92.00000 56.75556 43.26667 108.2667 26.48889
## 10 112.2222 66.95960 101.8485  97.92929 63.07071 46.90909 112.4242 28.04040
## 11 120.4535 66.95349 105.5116 104.18605 65.73256 51.65116 116.3372 27.69767
## 12 115.7207 67.42342 106.0541  94.00000 66.34234 50.41441 109.1351 24.44144
## 13 123.3889 72.51852 107.5556 101.23148 69.37963 52.25000 117.1019 23.50000
## 14 126.6842 72.89474 107.8246  99.10526 68.35088 52.77193 121.2982 27.17544
## 15 120.7952 72.71084 103.9759  95.12048 67.72289 50.96386 115.9398 26.72289
## 16 123.6500 70.66000 105.8700 102.32000 66.23000 51.39000 116.8000 26.72000
## 17 125.9537 69.18519 107.5185 101.00926 71.49074 54.37037 119.6204 26.37963
## 18 121.5862 74.47126 107.5287  96.32184 69.19540 51.87356 115.4943 25.83908
## 19 126.0000 69.10000 110.0000 101.20000 67.30000 52.70000 120.8000 27.00000
## 20 121.1636 69.75455 109.3000  95.49091 66.59091 50.56364 113.5727 24.80000
## 21 120.5364 69.62727 106.7091  91.57273 65.71818 49.00000 112.3364 24.60000
## 22 122.9600 74.66000 107.3200  97.88000 66.78000 51.46000 118.1600 28.26000
## 23 122.7745 73.10784 109.0784  97.02941 66.78431 48.70588 114.7255 24.21569
## 24 120.3516 71.74725 106.1099  97.17582 67.59341 50.68132 114.4396 25.79121
## 25 130.2000 72.20000 110.4000 100.50000 70.10000 54.10000 124.6000 26.40000
## 26 120.1494 70.03448 107.2069 100.81609 60.44828 47.10345 114.3678 28.22989
## 27 114.4096 65.75904 102.3253  98.73494 63.03614 47.92771 114.4337 27.48193
## 28 117.5545 69.35455 104.6000 104.33636 64.43636 47.54545 114.7727 27.24545
## 29 121.3878 72.84694 109.5102  94.79592 66.03061 50.08163 112.7959 25.04082
## 30 114.6832 70.63366 104.3960 100.71287 65.54455 48.77228 114.5941 28.34653
##         MAB      MDH      MDB      OBH      OBB      GS
## 1  64.77907 27.65116 12.76744 34.18605 40.95349 3538458
## 2  59.45714 24.17143 10.81429 32.45714 36.98571 2781390
## 3  66.69048 30.59524 13.92857 32.78571 39.04762 3525032
## 4  65.00000 26.92754 12.17391 34.82609 40.02899 3223608
## 5  62.42553 25.59574 11.42553 33.23404 37.78723 3081822
## 6  64.82178 27.37624 11.76238 33.28713 40.94059 3064683
## 7  62.28440 27.00000 12.34862 33.26606 39.28440 3230445
## 8  66.79817 27.74312 12.29358 35.38532 40.64220 3490759
## 9  58.91111 23.26667 10.28889 30.90000 38.40000 2770051
## 10 62.73737 27.02020 11.22222 33.24242 38.80808 3044807
## 11 62.55814 27.59302 14.24419 33.91860 39.04651 3512974
## 12 61.31532 27.81982 12.02703 32.89189 38.72072 3258136
## 13 63.86111 25.25000 12.53704 35.65741 41.21296 3328639
## 14 65.70175 29.10526 12.59649 35.14035 40.63158 3528384
## 15 63.67470 27.54217 11.84337 33.25301 38.18072 3205744
## 16 63.80000 28.29000 13.89000 34.60000 40.07000 3599953
## 17 64.31481 29.87037 13.70370 36.26852 41.11111 3459399
## 18 65.62069 29.20690 11.75862 34.50575 39.11494 3334221
## 19 64.60000 32.80000 15.70000 35.50000 40.70000 3510752
## 20 61.86364 27.78182 12.42727 33.48182 39.79091 3308171
## 21 62.84545 28.13636 11.66364 34.20909 37.53636 3029428
## 22 64.86000 29.62000 13.50000 33.22000 39.04000 3338169
## 23 65.02941 27.02941 13.01961 34.72549 39.26471 3066291
## 24 64.52747 27.89011 11.62637 33.98901 38.58242 3267251
## 25 65.70000 31.70000 16.00000 36.20000 41.50000 3728305
## 26 65.45977 24.59770 12.17241 30.85057 40.17241 3195916
## 27 60.66265 26.19277 11.39759 32.63855 38.53012 2894007
## 28 64.10000 27.45455 12.76364 32.26364 40.12727 3041819
## 29 62.57143 28.04082 12.72449 32.40816 39.36735 3339968
## 30 64.11881 27.13861 11.41584 33.35644 39.87129 3194899

labs<-hwl_sum$Population ##creating an object of Population labels. 
dsq<-dist(scale(hwl_sum[2:22])) ## creating a distance matrix that compares the distances between each Population based on the observed differences among the scaled craniometric variables.
hc<-hclust(dsq, method='ward.D2') ## subjecting the distance matrix to Hierarchical clustering using Wards criterion.
NbClust(data=as.numeric(scale(hwl_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15) ##this function is a package that provides over 30 different indices for determining the optimum number of clusters within a dataset. Here the data has been specified first making sure the data is read as numeric and is scaled. The 2:22 is an index call saying I only want to use all of the variables represented by column 2 through 22, or in this case, GOL through GS. 6 indices were able to be calculated with this dataset and 4 of those propose an optimal number of clusters of 3.

## Warning in sqrt(zvargss$varbgss/zvargss$vartss): NaNs produced

## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##

## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]

## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]

## ******************************************************************* 
## * Among all indices:                                                
## * 4 proposed 3 as the best number of clusters 
## * 1 proposed 4 as the best number of clusters 
## * 1 proposed 15 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  3 
##  
##  
## *******************************************************************

## $All.index
##         KL        CH  Hartigan      CCC     Scott  Marriot TrCovW   TraceW
## 2   1.2201 -504.0082 4592.6661 -15.2097  492.0031 1115.609     NA 278.9021
## 3  15.2176  201.0612  319.9759 -13.2154  980.9663 1155.113     NA 128.3459
## 4   0.3560  308.6113   61.5617 -13.6771 1304.4185 1228.933     NA  76.8083
## 5   0.4936  269.1802 1635.6517 -16.0943 1493.7914 1421.683     NA  56.8673
## 6   6.1535 1104.2697   57.8197 -17.1974 1680.3266 1522.566     NA  42.2935
## 7   4.3585 1013.5023  155.4864 -17.5674 1859.4061 1559.624     NA  31.8291
## 8   0.2449 1105.9621   46.9671 -15.8811 2085.9875 1421.702     NA  22.2141
## 9   2.1794 1044.9771   96.2327 -14.9890 2266.4794 1351.109     NA  16.6804
## 10  0.2944 1081.7575  340.6208 -15.4984 2382.8832 1386.633     NA  13.8663
## 11  7.9903 1540.0304   92.6788 -16.0790 2484.4294 1428.057     NA  11.8021
## 12  0.9290 1615.4560  134.7729 -16.4850 2581.8839 1455.934     NA  10.1107
## 13  0.7552 1812.0680  136.3710 -16.2660 2692.7865 1432.896     NA   8.4787
## 14  0.3162 2049.5410  232.9606 -15.7909 2805.4229 1389.752     NA   7.0906
## 15  4.1066 2635.2371  120.6605 -15.1169 2918.7839 1332.655     NA   5.9229
##    Friedman    Rubin Cindex     DB Silhouette   Duda Pseudot2  Beale Ratkowsky
## 2    1.1836   2.1836 0.2148 0.7215     0.5124 0.3812 751.6896 0.5400       NaN
## 3    3.7450   4.7450 0.2142 0.6565     0.5181 0.2850 928.0325 0.8338    0.3609
## 4    6.9288   7.9288 0.1473 0.6921     0.5171 0.4400 207.4215 0.4216    0.3862
## 5    9.7091  10.7091 0.1600 0.6100     0.5225 0.2942 218.2928 0.7909    0.3557
## 6   13.3994  14.3994 0.2208 0.5783     0.5243 0.2944 361.8547 0.7935    0.3870
## 7   18.1335  19.1335 0.2346 0.6006     0.4991 0.2540 637.2888 0.9744    0.3600
## 8   26.4150  27.4150 0.1819 0.5936     0.5131 0.2775 393.2187 0.8623    0.3402
## 9   35.5100  36.5100 0.1471 0.5908     0.5118 0.2013 261.9238 1.3031    0.3216
## 10  42.9193  43.9193 0.1374 0.5925     0.5127 0.1912  97.2701 1.3510    0.3066
## 11  50.6009  51.6009 0.1718 0.5468     0.5207 0.3970  95.7078 0.4985    0.2956
## 12  59.2335  60.2335 0.1642 0.5447     0.5138 0.2696 349.3985 0.8959    0.2838
## 13  70.8273  71.8273 0.1455 0.5527     0.5169 0.2407 274.4311 1.0395    0.2735
## 14  84.8887  85.8887 0.1309 0.5496     0.5307 0.2524 254.7418 0.9760    0.2642
## 15 101.8211 102.8211 0.1160 0.5452     0.5185 0.2579 247.4848 0.9482    0.2561
##         Ball Ptbiserial   Frey McClain   Dunn Hubert SDindex Dindex   SDbw
## 2  1542.2474     0.5085 0.2661  0.2943 0.0071 0.0018  7.1048 0.5202 1.3462
## 3   123.6792     0.6308 1.2025  0.4109 0.0057 0.0025  5.0695 0.3672 0.6489
## 4    61.4167     0.5470 0.0171  0.6806 0.0057 0.0026  5.5803 0.2694 0.5415
## 5    44.7341     0.5590 0.2850  0.6415 0.0069 0.0028  5.1923 0.2445 0.3673
## 6    10.3063     0.5601 1.0283  0.6310 0.0102 0.0030  4.9885 0.2198 0.3244
## 7     8.0849     0.5366 1.0914  0.6781 0.0121 0.0032  6.3314 0.1903 0.2671
## 8     5.6613     0.4756 0.7706  0.8035 0.0117 0.0032  8.0393 0.1532 0.2162
## 9     4.6790     0.4430 0.4903  0.8341 0.0101 0.0032  9.7445 0.1287 0.1729
## 10    3.6461     0.4374 0.1649  0.8209 0.0101 0.0033  9.4811 0.1167 0.1513
## 11    2.1393     0.4374 0.4239  0.8128 0.0129 0.0033  9.4682 0.1114 0.1203
## 12    1.7056     0.4345 1.4640  0.8001 0.0129 0.0033  9.0027 0.1055 0.1235
## 13    1.2926     0.3986 0.8380  0.8887 0.0092 0.0033 14.8892 0.0943 0.1152
## 14    0.9830     0.3820 0.7366  0.9013 0.0092 0.0033 15.4388 0.0850 0.0941
## 15    0.6657     0.3664 0.9704  0.8986 0.0092 0.0034 15.6309 0.0771 0.0951
## 
## $All.CriticalValues
##    CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2          0.2720          1238.9574       0.4628
## 3          0.2613          1046.2223       0.3618
## 4          0.2100           613.0321       0.5171
## 5          0.1591           480.8357       0.3762
## 6          0.2041           588.6771       0.3744
## 7          0.2303           725.3174       0.3247
## 8          0.2041           588.6771       0.3546
## 9          0.1245           464.0158       0.2578
## 10        -0.0306          -775.8496       0.2570
## 11         0.1191           466.0796       0.4828
## 12         0.1913           545.3661       0.3457
## 13         0.1546           475.7524       0.3108
## 14         0.1534           474.5732       0.3260
## 15         0.1534           474.5732       0.3329
## 
## $Best.nc
##                      KL       CH Hartigan      CCC    Scott  Marriot TrCovW
## Number_clusters  3.0000   15.000     3.00   3.0000   3.0000   4.0000   -Inf
## Value_Index     15.2176 2635.237  4272.69 -13.2154 488.9632 118.9303      3
##                  TraceW Friedman   Rubin Cindex      DB Silhouette   Duda
## Number_clusters 99.0187  16.9324 -1.6857  0.116  0.5447     0.5307 0.3812
## Value_Index     15.0000   9.0000 15.0000 12.000 14.0000     2.0000 2.0000
##                 PseudoT2 Beale Ratkowsky     Ball PtBiserial Frey McClain
## Number_clusters 751.6896  0.54     0.387 1418.568     0.6308   NA  0.2943
## Value_Index       2.0000  6.00     3.000    3.000     1.0000    2 11.0000
##                   Dunn Hubert SDindex Dindex   SDbw
## Number_clusters 0.0129      0  4.9885      0 0.0941
## Value_Index     0.0000      6  0.0000     14 3.0000
## 
## $Best.partition
##   [1] 1 2 3 3 2 1 3 3 3 2 1 3 1 3 2 3 1 3 1 1 2 3 3 3 1 3 3 3 3 3 1 2 3 3 2 1 3
##  [38] 3 3 2 1 3 1 3 2 3 1 3 1 1 2 3 3 3 1 3 3 3 3 1 1 2 3 3 3 3 3 3 2 3 1 3 1 1
##  [75] 3 1 1 3 1 3 2 3 2 3 1 3 3 3 3 3 1 2 1 3 3 2 3 3 2 3 1 3 1 1 3 1 3 3 1 3 3
## [112] 3 2 3 1 3 2 3 3 3 1 3 3 3 3 2 1 1 2 3 3 3 3 3 3 1 1 3 3 3 3 3 3 3 1 3 2 2
## [149] 3 3 1 2 3 3 3 2 1 1 2 3 3 3 3 3 3 3 3 3 3 1 3 3 3 3 3 3 2 2 1 3 3 2 1 1 3
## [186] 3 3 1 2 2 3 2 1 1 3 3 1 3 1 3 3 3 3 3 1 3 3 3 3 3 3 2 1 1 3 3 3 1 2 2 3 2
## [223] 3 1 3 3 1 3 1 3 3 3 3 3 1 3 2 3 3 2 1 2 1 1 3 3 1 1 2 2 2 2 3 3 3 3 3 1 3
## [260] 3 3 1 1 3 3 3 2 3 3 3 1 2 3 3 3 3 1 1 3 2 3 3 3 3 3 3 3 3 1 1 3 3 1 3 1 3
## [297] 2 3 1 3 1 2 3 3 2 1 2 3 2 3 1 2 1 3 3 1 1 3 1 3 2 3 3 3 1 1 3 1 3 1 3 2 1
## [334] 1 3 3 3 1 2 3 3 3 1 1 3 3 1 1 3 3 3 3 3 3 1 2 3 3 3 3 3 2 1 1 3 3 3 1 2 2
## [371] 3 3 1 1 3 3 1 3 1 3 3 3 3 3 1 2 3 2 3 3 1 2 1 1 3 3 3 1 2 3 3 2 3 1 3 3 1
## [408] 3 1 3 3 3 3 3 1 3 3 3 3 3 3 2 1 3 3 3 3 1 3 1 1 2 2 3 3 3 3 3 3 2 2 1 2 3
## [445] 3 1 1 3 2 1 3 2 1 1 3 3 3 1 2 3 3 2 3 1 3 3 3 1 3 3 3 3 1 3 1 1 2 3 3 3 3
## [482] 2 1 3 2 3 3 3 2 3 3 3 2 1 3 3 1 1 1 3 3 1 3 3 1 2 3 3 3 3 3 2 1 3 3 3 3 3
## [519] 2 2 1 3 3 3 3 1 1 3 1 3 3 1 3 3 1 3 3 3 3 3 3 3 3 1 3 3 3 1 2 3 3 3 1 1 3
## [556] 3 1 3 1 3 3 3 1 3 1 2 3 2 3 3 1 2 3 3 2 1 3 1 3 3 3 3 1 1 2 3 1 3 1 3 2 3
## [593] 3 3 1 3 3 3 3 3 1 2 1 3 3 3 3 1 2 3 1 3 3 1 3 1 1 3 1 3 3 3 3 3 1 3 2 3 3
## [630] 3

plot(hc, labels=labs, hang=-1, cex=0.8) ##hierarchical cluster dendrogram that has been specified to hang the group labels at the same level.
hc_morph<-rect.hclust(hc, k=3, border=2:4) ## graphically depicting the clusters identified by the NbClust call above. The border call indicates that I want different three different colors, one for each of the the k=3 clusters.

morphogroup<-cutree(hc, k=3) ## assigning each Population its determined cluster.
hwl_sum<-hwl_sum %>% mutate(cluster=morphogroup) ##Assigning each Population cluster, here called morphogroup.
hwl_sum$cluster<-as.factor(hwl_sum$cluster) ## Setting assigned cluster as a factor.
mg1<-hwl_sum %>% filter(cluster==1) %>% droplevels() ## subsetting hwl to only include individuals from morphogroup 1.
mg2<-hwl_sum %>% filter(cluster==2) %>% droplevels() ## subsetting hwl to only include individuals from morphogroup 2.
mg3<-hwl_sum%>% filter(cluster==3) %>% droplevels() ## subsetting hwl to only include individuals from morphogroup 3.
c1d<-mg1$Population ## creating an object that is comprised of the Population labels from cluster 1. 
c1<-hwl %>% filter(Population %in% c1d) %>% droplevels() ## filtering original howell dataset  to include populations from morphogroup 1.
c1$Morphogroup=1 ## creating new column Morphogroup and setting it to 1.
c1$Morphogroup <- as.factor(c1$Morphogroup) ## setting Morphogroup as a factor.
c2d<-mg2$Population ## creating an object that is comprised of the Population labels from cluster 2. 
c2<-hwl %>% filter(Population %in% c2d) %>% droplevels() ## filtering original howell dataset  to include populations from morphogroup 2.
c2$Morphogroup=2 ## creating new column Morphogroup and setting it to 2.
c2$Morphogroup<-as.factor(c2$Morphogroup) ## setting Morphogroup as a factor.
c3d<-mg3$Population ## creating an object that is comprised of the Population labels from cluster 3. 
c3<-hwl %>% filter(Population %in% c3d) %>% droplevels() ## filtering original howell dataset  to include populations from morphogroup 3.
c3$Morphogroup=3 ## creating new column Morphogroup and setting it to 3.
c3$Morphogroup<-as.factor(c3$Morphogroup) ## setting Morphogroup as a factor.
hwlr<-rbind(c1, c2) ## combinging the rows of individuals from morphogroups 1 and 2.
hwlr<-rbind(hwlr, c3) ## combining the rows of individuals from morphogroups and 2 with those of 3. 
describeBy(hwlr, group='Morphogroup') ## summary statistics by morphogroup.

## 
##  Descriptive statistics by group 
## Morphogroup: 1
##              vars   n       mean        sd    median    trimmed       mad
## ID              1 766    2060.33    732.11    2148.5    2060.61   1163.84
## Sex*            2 766       1.59      0.49       2.0       1.62      0.00
## PopNum*         3 766       4.98      2.71       5.0       4.94      2.97
## Population*     4 766       5.20      2.67       5.0       5.15      2.97
## GOL             5 766     182.27      8.00     182.0     182.21      8.90
## NOL             6 766     179.72      7.33     180.0     179.69      7.41
## BNL             7 766     103.09      5.34     103.0     102.98      5.93
## BBH             8 766     136.55      6.66     137.0     136.56      5.93
## XCB             9 766     139.80      7.97     139.0     139.43      7.41
## XFB            10 766     114.71      6.58     114.0     114.36      5.93
## ZYB            11 766     135.26      7.06     135.0     135.19      7.41
## AUB            12 766     125.16      6.65     125.0     124.93      7.41
## WCB            13 766      72.37      5.12      72.0      72.32      4.45
## ASB            14 766     108.48      5.65     108.0     108.34      5.93
## BPL            15 766     100.46      5.47     100.0     100.42      5.93
## NPH            16 766      68.58      4.98      68.0      68.51      4.45
## NLH            17 766      52.53      3.53      52.0      52.45      2.97
## JUB            18 766     118.73      5.79     119.0     118.69      5.93
## NLB            19 766      26.73      2.37      27.0      26.78      2.97
## MAB            20 766      64.68      3.91      64.0      64.60      4.45
## MDH            21 766      28.23      3.94      28.0      28.29      4.45
## MDB            22 766      13.29      2.17      13.0      13.21      1.48
## OBH            23 766      34.86      1.97      35.0      34.86      1.48
## OBB            24 766      40.39      1.88      40.0      40.35      1.48
## GS             25 766 3483665.18 347586.26 3463843.0 3475462.57 371784.19
## Morphogroup*   26 766       1.00      0.00       1.0       1.00      0.00
##                  min     max   range  skew kurtosis       se
## ID              1082    3064    1982 -0.09    -1.71    26.45
## Sex*               1       2       1 -0.38    -1.86     0.02
## PopNum*            1      11      10  0.05    -1.19     0.10
## Population*        1      11      10  0.13    -0.90     0.10
## GOL              158     206      48  0.03    -0.24     0.29
## NOL              157     200      43  0.01    -0.18     0.26
## BNL               89     120      31  0.19    -0.26     0.19
## BBH              115     155      40 -0.02    -0.03     0.24
## XCB              120     167      47  0.52     0.47     0.29
## XFB               97     145      48  0.56     0.67     0.24
## ZYB              117     158      41  0.09    -0.35     0.26
## AUB              106     149      43  0.33     0.05     0.24
## WCB               59      89      30  0.11    -0.20     0.19
## ASB               90     128      38  0.23     0.18     0.20
## BPL               82     117      35  0.06    -0.02     0.20
## NPH               53      82      29  0.08    -0.12     0.18
## NLH               42      65      23  0.17     0.10     0.13
## JUB              102     138      36  0.11    -0.11     0.21
## NLB               20      35      15 -0.13    -0.08     0.09
## MAB               52      77      25  0.23     0.10     0.14
## MDH               18      39      21 -0.14    -0.44     0.14
## MDB                7      20      13  0.33    -0.10     0.08
## OBH               30      41      11 -0.02    -0.15     0.07
## OBB               35      46      11  0.22     0.05     0.07
## GS           2640000 4640688 2000688  0.22    -0.37 12558.80
## Morphogroup*       1       1       0   NaN      NaN     0.00
## ------------------------------------------------------------ 
## Morphogroup: 2
##              vars   n       mean        sd    median    trimmed       mad
## ID              1 160    1355.63    756.97     799.5    1206.29    121.57
## Sex*            2 160       1.48      0.50       1.0       1.47      0.00
## PopNum*         3 160      12.44      0.50      12.0      12.42      0.00
## Population*     4 160      12.56      0.50      13.0      12.58      0.00
## GOL             5 160     170.26      8.44     170.0     170.17      8.90
## NOL             6 160     168.82      7.98     169.0     168.69      8.90
## BNL             7 160      92.45      4.22      92.0      92.22      4.45
## BBH             8 160     123.33      5.98     123.0     123.11      5.93
## XCB             9 160     131.96      5.06     132.0     131.87      4.45
## XFB            10 160     108.27      4.98     108.0     108.12      4.45
## ZYB            11 160     120.16      5.69     120.0     120.20      4.45
## AUB            12 160     110.49      5.24     111.0     110.41      4.45
## WCB            13 160      67.75      3.83      68.0      67.71      4.45
## ASB            14 160     101.49      5.99     101.0     101.25      5.93
## BPL            15 160      91.93      4.45      92.0      91.80      4.45
## NPH            16 160      57.59      4.48      58.0      57.47      4.45
## NLH            17 160      44.15      3.01      44.0      44.11      2.97
## JUB            18 160     108.62      5.00     108.0     108.52      4.45
## NLB            19 160      25.59      2.25      25.0      25.47      1.48
## MAB            20 160      59.15      2.94      59.0      59.09      2.97
## MDH            21 160      23.66      3.69      23.5      23.65      3.71
## MDB            22 160      10.52      2.09      10.0      10.39      1.48
## OBH            23 160      31.58      2.06      32.0      31.62      1.48
## OBB            24 160      37.78      1.86      38.0      37.69      1.48
## GS             25 160 2775011.75 270873.08 2756518.5 2764591.17 267584.84
## Morphogroup*   26 160       2.00      0.00       2.0       2.00      0.00
##                  min     max   range  skew kurtosis       se
## ID               716    3190    2474  1.21     0.63    59.84
## Sex*               1       2       1  0.10    -2.00     0.04
## PopNum*           12      13       1  0.25    -1.95     0.04
## Population*       12      13       1 -0.25    -1.95     0.04
## GOL              151     191      40  0.10    -0.61     0.67
## NOL              151     190      39  0.16    -0.58     0.63
## BNL               84     104      20  0.49    -0.15     0.33
## BBH              107     138      31  0.28    -0.13     0.47
## XCB              118     146      28  0.18     0.09     0.40
## XFB               97     120      23  0.25    -0.29     0.39
## ZYB              105     136      31  0.00     0.26     0.45
## AUB               98     126      28  0.19     0.07     0.41
## WCB               59      79      20  0.15    -0.23     0.30
## ASB               88     117      29  0.31    -0.35     0.47
## BPL               81     106      25  0.29    -0.07     0.35
## NPH               48      72      24  0.26    -0.16     0.35
## NLH               36      51      15  0.11    -0.50     0.24
## JUB               97     125      28  0.28     0.42     0.40
## NLB               21      33      12  0.52     0.31     0.18
## MAB               52      70      18  0.33     0.49     0.23
## MDH               16      31      15  0.00    -0.74     0.29
## MDB                7      20      13  0.95     2.18     0.17
## OBH               26      37      11 -0.13     0.05     0.16
## OBB               33      43      10  0.41     0.31     0.15
## GS           2162468 3634348 1471880  0.38    -0.06 21414.40
## Morphogroup*       2       2       0   NaN      NaN     0.00
## ------------------------------------------------------------ 
## Morphogroup: 3
##              vars    n       mean        sd    median    trimmed       mad
## ID              1 1598    1179.74    928.42     889.5    1103.96    915.51
## Sex*            2 1598       1.52      0.50       2.0       1.53      0.00
## PopNum*         3 1598      21.69      4.89      22.0      21.61      5.93
## Population*     4 1598      22.31      4.75      22.0      22.32      5.93
## GOL             5 1598     178.58      8.03     178.0     178.55      7.41
## NOL             6 1598     176.33      7.50     176.0     176.30      7.41
## BNL             7 1598      97.89      4.93      98.0      97.83      4.45
## BBH             8 1598     130.13      6.10     130.0     129.99      5.93
## XCB             9 1598     135.92      6.60     136.0     135.84      5.93
## XFB            10 1598     113.31      6.12     113.0     113.14      5.93
## ZYB            11 1598     129.67      6.83     130.0     129.57      7.41
## AUB            12 1598     119.41      6.38     119.0     119.29      5.93
## WCB            13 1598      70.65      4.48      71.0      70.59      4.45
## ASB            14 1598     106.62      5.31     106.0     106.51      5.93
## BPL            15 1598      97.08      6.37      97.0      96.98      5.93
## NPH            16 1598      65.57      4.91      66.0      65.63      4.45
## NLH            17 1598      49.39      3.40      49.0      49.36      2.97
## JUB            18 1598     114.12      5.58     114.0     114.02      5.93
## NLB            19 1598      26.16      2.28      26.0      26.12      2.97
## MAB            20 1598      63.44      3.88      63.0      63.31      4.45
## MDH            21 1598      27.30      3.63      27.0      27.31      4.45
## MDB            22 1598      12.02      1.91      12.0      11.97      1.48
## OBH            23 1598      33.31      2.08      33.0      33.37      1.48
## OBB            24 1598      39.22      1.92      39.0      39.20      1.48
## GS             25 1598 3165353.90 330867.43 3147312.0 3155852.74 348130.79
## Morphogroup*   26 1598       3.00      0.00       3.0       3.00      0.00
##                  min     max   range  skew kurtosis      se
## ID                 1    3167    3166  0.61    -0.92   23.22
## Sex*               1       2       1 -0.10    -1.99    0.01
## PopNum*           14      30      16  0.07    -1.17    0.12
## Population*       14      30      16 -0.01    -1.17    0.12
## GOL              155     203      48  0.06    -0.23    0.20
## NOL              153     199      46  0.06    -0.12    0.19
## BNL               83     114      31  0.09    -0.20    0.12
## BBH              111     152      41  0.19    -0.22    0.15
## XCB              116     161      45  0.13     0.03    0.17
## XFB               95     135      40  0.32     0.14    0.15
## ZYB              111     151      40  0.14    -0.39    0.17
## AUB               98     143      45  0.22    -0.01    0.16
## WCB               57      87      30  0.14    -0.08    0.11
## ASB               89     127      38  0.17    -0.10    0.13
## BPL               80     123      43  0.18     0.00    0.16
## NPH               50      81      31 -0.07     0.01    0.12
## NLH               40      61      21  0.08    -0.06    0.08
## JUB               99     132      33  0.15    -0.29    0.14
## NLB               19      34      15  0.19    -0.19    0.06
## MAB               54      78      24  0.30    -0.10    0.10
## MDH               17      39      22  0.02    -0.28    0.09
## MDB                6      20      14  0.27     0.39    0.05
## OBH               26      41      15 -0.21     0.17    0.05
## OBB               33      46      13  0.14    -0.13    0.05
## GS           2327314 4399488 2072174  0.28    -0.24 8276.86
## Morphogroup*       3       3       0   NaN      NaN    0.00

pcs<-principal(scale(hwlr[5:25]), rotate = 'varimax', nfactors=3) ## performing PCA with Varimax rotation on the scaled craniometric variables and GS and storing it as an object.
pcs ## examining results of PCA from above.

## Principal Components Analysis
## Call: principal(r = scale(hwlr[5:25]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      RC1   RC2   RC3   h2   u2 com
## GOL 0.83  0.18  0.13 0.74 0.26 1.1
## NOL 0.81  0.17  0.17 0.71 0.29 1.2
## BNL 0.85  0.12  0.28 0.81 0.19 1.3
## BBH 0.62  0.24  0.28 0.52 0.48 1.7
## XCB 0.03  0.87  0.20 0.79 0.21 1.1
## XFB 0.04  0.81  0.13 0.67 0.33 1.1
## ZYB 0.56  0.66  0.25 0.81 0.19 2.3
## AUB 0.34  0.78  0.32 0.82 0.18 1.7
## WCB 0.18  0.78  0.07 0.64 0.36 1.1
## ASB 0.29  0.69  0.10 0.57 0.43 1.4
## BPL 0.85 -0.05 -0.05 0.73 0.27 1.0
## NPH 0.38  0.40  0.68 0.77 0.23 2.3
## NLH 0.39  0.43  0.67 0.78 0.22 2.4
## JUB 0.64  0.57  0.14 0.75 0.25 2.1
## NLB 0.49  0.19 -0.43 0.47 0.53 2.3
## MAB 0.54  0.52  0.03 0.56 0.44 2.0
## MDH 0.45  0.38  0.17 0.38 0.62 2.2
## MDB 0.54  0.33  0.09 0.41 0.59 1.7
## OBH 0.13  0.18  0.79 0.67 0.33 1.2
## OBB 0.59  0.30  0.20 0.48 0.52 1.7
## GS  0.64  0.57  0.27 0.81 0.19 2.3
## 
##                        RC1  RC2  RC3
## SS loadings           6.23 5.33 2.32
## Proportion Var        0.30 0.25 0.11
## Cumulative Var        0.30 0.55 0.66
## Proportion Explained  0.45 0.38 0.17
## Cumulative Proportion 0.45 0.83 1.00
## 
## Mean item complexity =  1.7
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.07 
##  with the empirical chi square  4739.27  with prob <  0 
## 
## Fit based upon off diagonal values = 0.98

MG<-hwlr$Morphogroup ## storing assigned morphogroup to a new object MG.
MG_pcs<-cbind.data.frame(MG, pcs$scores) ## combining the assigned morphogroup object with the principal component scores
MG_pcs<-as.data.frame(MG_pcs) ##converting to a data frame.
MG_pcs$MG<-as.factor(MG_pcs$MG) ## ensuring Morphogrooup is being read as a factor.
pc_sum<-MG_pcs %>% group_by(MG) %>% dplyr::summarize(across(RC1:RC3, mean)) ##creating a summary of principal component scores for each morphogroup.
pc_sum ##viewing principal components summary. Note - the principal call refers to the components as RC, or rotated component. This is a pedantic distinction that can make this function frustrating to use because RC2 might actually be PC1, PC2, or PC3 as normal nomenclature would indicate. Stats people get hung up on the strangest things sometimes.

## # A tibble: 3 x 4
##   MG       RC1     RC2    RC3
## * <fct>  <dbl>   <dbl>  <dbl>
## 1 1      0.489  0.246   0.464
## 2 2     -1.03  -0.706  -0.980
## 3 3     -0.132 -0.0471 -0.124

sort(pcs$loadings[,1], decreasing=TRUE) ##sorting the loadings for PC1

##        BPL        BNL        GOL        NOL        JUB         GS        BBH 
## 0.84886484 0.84533825 0.83348094 0.80871912 0.64078648 0.63970347 0.61887047 
##        OBB        ZYB        MDB        MAB        NLB        MDH        NLH 
## 0.59177929 0.56257625 0.54179221 0.54003314 0.49374381 0.44884319 0.38542125 
##        NPH        AUB        ASB        WCB        OBH        XFB        XCB 
## 0.38498886 0.34019432 0.28545761 0.17841339 0.13446763 0.04302043 0.03340290

sort(pcs$loadings[,2], decreasing=TRUE) ## sorting the loadings for PC2

##         XCB         XFB         AUB         WCB         ASB         ZYB 
##  0.86576233  0.80758972  0.77950190  0.77517128  0.68825637  0.65866864 
##          GS         JUB         MAB         NLH         NPH         MDH 
##  0.57148964  0.56627946  0.51503998  0.43296798  0.40085281  0.38151118 
##         MDB         OBB         BBH         NLB         OBH         GOL 
##  0.33076946  0.29501929  0.24328752  0.18724449  0.18470945  0.17892803 
##         NOL         BNL         BPL 
##  0.17335255  0.12052499 -0.05188129

sort(pcs$loadings[,3], decreasing=TRUE) ## sorting the loadings for PC3

##         OBH         NPH         NLH         AUB         BBH         BNL 
##  0.78607964  0.67853221  0.66909802  0.31553202  0.28081450  0.27727336 
##          GS         ZYB         OBB         XCB         NOL         MDH 
##  0.26903199  0.24905070  0.20439501  0.19748155  0.17411547  0.16744801 
##         JUB         XFB         GOL         ASB         MDB         WCB 
##  0.14496758  0.13441315  0.13112266  0.10142789  0.08935902  0.06546183 
##         MAB         BPL         NLB 
##  0.02752250 -0.05225522 -0.43292758

ggplot(MG_pcs, aes(x=RC1, y=RC2, color = MG)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_sum, size=5) ## creating a plot of PC1 and PC2 scores with their color assigned by Morphogroup, but it could have been by anything you have information for, like Sex, or SES, or time-period...literally whatever.

ggplot(MG_pcs, aes(x=RC2, y=RC3, color = MG)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_sum, size=5) ## creating a plot of PC2 and PC3

ggplot(MG_pcs, aes(x=RC1, y=RC3, color = MG)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_sum, size=5) ## creating a plot of PC1 and PC3

morpho1<-c1[2:25] ##trimming dataframe.
m1_sum<-morpho1 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) ##creating a new data frame that is using the Population label as a grouping variable, which is then being summarized for all variables in the data frame for Morphogroup 1.
m1_dsq<-dist(scale(m1_sum[2:22])) ## creating a distance matrix here and only subjecting columns 2 through 22, which are only the craniometric variables.
m1_hc<-hclust(m1_dsq, method='ward.D2') ##submitting distance object to hierarchical cluster analysis and storing it as the object m1_hc, or morphogtoup1 hierarchical cluster. 
NbClust(data=as.numeric(scale(m1_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15) ## subjecting the craniometric variables to NbClust, see text for description. Also, look into the package vignette for more detail.

## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##

## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]

## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]

## ******************************************************************* 
## * Among all indices:                                                
## * 1 proposed 2 as the best number of clusters 
## * 1 proposed 4 as the best number of clusters 
## * 2 proposed 5 as the best number of clusters 
## * 1 proposed 12 as the best number of clusters 
## * 1 proposed 14 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  5 
##  
##  
## *******************************************************************

## $All.index
##         KL       CH Hartigan     CCC     Scott  Marriot TrCovW  TraceW Friedman
## 2   0.4204 261.8046  91.0243 -5.7653  234.0843 304.9202     NA 76.2301   1.7548
## 3   0.9686 227.4487  63.3586 -8.3900  355.3257 405.9076     NA 45.1008   3.6562
## 4   0.7690 213.9464 332.1555 -9.9143  450.5239 477.8880     NA 29.8680   6.0309
## 5   5.4841 476.1829  68.4848 -7.2662  605.4017 381.9144     NA 15.2766  12.7465
## 6   0.6861 507.8245 130.5371 -7.0311  695.3266 372.6184     NA 10.3505  19.2889
## 7   0.8700 687.3928  22.8969 -6.3663  782.2613 348.1089     NA  7.1043  28.5597
## 8   1.7275 649.7773  35.8428 -7.4323  823.6833 380.0345     NA  5.9380  34.3652
## 9   0.7926 661.4399  26.1276 -8.4543  858.8367 413.0832     NA  5.0998  40.1781
## 10  3.5513 657.0729  39.4666 -9.0584  897.2112 431.9227     NA  4.3192  47.6198
## 11  0.1708 697.7480  88.7489 -9.1044  942.9303 428.7821     NA  3.5437  58.2609
## 12 19.5802 894.1870  40.3955 -8.8319  991.7168 413.1350     NA  2.8690  72.1964
## 13  0.2100 969.7811  24.1731 -8.6941 1034.4901 402.9022     NA  2.3840  87.0859
## 14  0.4304 991.7348   9.8914 -8.1713 1082.8199 379.0583     NA  1.9340 107.5849
## 15  0.7996 959.1395  60.6120 -7.8456 1124.6180 363.1194     NA  1.6139 129.1225
##       Rubin Cindex     DB Silhouette   Duda Pseudot2  Beale Ratkowsky    Ball
## 2    2.7548 0.2426 0.7094     0.5636 0.4366 180.6376 0.4270    0.5164 48.9910
## 3    4.6562 0.2204 0.5932     0.5473 0.3351 245.9892 0.6560    0.4712 23.3710
## 4    7.0309 0.1793 0.5919     0.4689 0.3043 198.8720 0.7533    0.4297 13.7166
## 5   13.7465 0.2238 0.5870     0.5188 0.2651 196.8016 0.9111    0.4228  4.4548
## 6   20.2889 0.1783 0.5796     0.5480 0.2733 164.8694 0.8723    0.3913  2.8490
## 7   29.5597 0.1383 0.5960     0.5557 0.3914  35.7628 0.4967    0.3681  1.5454
## 8   35.3652 0.1824 0.5535     0.5619 0.2206 105.9746 1.1395    0.3452  1.2268
## 9   41.1781 0.1722 0.5437     0.5524 0.3574  25.1761 0.5595    0.3266  0.9395
## 10  48.6198 0.2788 0.5210     0.5554 0.2328 141.7330 1.0737    0.3105  0.7565
## 11  59.2609 0.2524 0.5201     0.5549 0.7341  18.4741 0.1184    0.2969  0.5835
## 12  73.1964 0.2166 0.5070     0.5515 0.3088  87.2888 0.7274    0.2855  0.3812
## 13  88.0859 0.1899 0.5145     0.5492 0.2522  62.2756 0.9436    0.2748  0.2970
## 14 108.5849 0.1739 0.5141     0.5460 0.2453  33.8392 0.9400    0.2650  0.2483
## 15 130.1225 0.1763 0.5193     0.5506 0.2649 105.4434 0.9012    0.2561  0.2216
##    Ptbiserial   Frey McClain   Dunn Hubert SDindex Dindex   SDbw
## 2      0.5663 0.0810  0.3763 0.0347 0.0049  6.2123 0.4620 1.6436
## 3      0.6438 1.8709  0.4023 0.0366 0.0067  4.2679 0.3695 0.6118
## 4      0.5478 0.3111  0.6916 0.0209 0.0069  5.3679 0.2806 0.4308
## 5      0.5553 0.8401  0.6812 0.0369 0.0083  4.9575 0.2082 0.3070
## 6      0.5161 0.5849  0.7541 0.0199 0.0086  6.8866 0.1681 0.2660
## 7      0.4903 0.1417  0.7476 0.0199 0.0088  6.6610 0.1382 0.1923
## 8      0.4908 0.9699  0.7297 0.0273 0.0090  6.9035 0.1333 0.1576
## 9      0.4798 0.2967  0.7420 0.0273 0.0091  9.2310 0.1222 0.1210
## 10     0.4791 1.2873  0.7332 0.0452 0.0092  9.2269 0.1159 0.1069
## 11     0.4529 1.0348  0.7793 0.0452 0.0092 11.7444 0.1028 0.1015
## 12     0.4222 1.0392  0.8176 0.0452 0.0092 12.9318 0.0900 0.1038
## 13     0.3966 0.5329  0.8497 0.0452 0.0092 15.5783 0.0809 0.0846
## 14     0.3891 0.4039  0.8272 0.0452 0.0093 15.1048 0.0733 0.0787
## 15     0.3869 1.6769  0.8125 0.0476 0.0094 14.5228 0.0689 0.0644
## 
## $All.CriticalValues
##    CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2          0.1981           566.7476       0.5145
## 3          0.1879           535.8921       0.4195
## 4          0.1546           475.7524       0.3878
## 5          0.1328           463.4387       0.3431
## 6          0.1172           467.1300       0.3539
## 7         -0.0306          -775.8496       0.4880
## 8          0.0152          1944.8165       0.2943
## 9         -0.1290          -122.4991       0.4669
## 10         0.0698           573.3856       0.3059
## 11         0.0928           498.4066       0.7322
## 12         0.0558           660.2764       0.3989
## 13        -0.0473          -464.7772       0.3424
## 14        -0.1829           -71.1410       0.3531
## 15         0.0520           693.4645       0.3485
## 
## $Best.nc
##                      KL       CH Hartigan     CCC    Scott Marriot TrCovW
## Number_clusters 12.0000  14.0000   4.0000  2.0000   5.0000  5.0000   -Inf
## Value_Index     19.5802 991.7348 268.7969 -5.7653 154.8779 86.6775      3
##                  TraceW Friedman   Rubin  Cindex    DB Silhouette   Duda
## Number_clusters 15.8964  21.5376 -3.4654  0.1383 0.507     0.5636 0.4366
## Value_Index     15.0000   7.0000  7.0000 12.0000 2.000     2.0000 2.0000
##                 PseudoT2 Beale Ratkowsky  Ball PtBiserial Frey McClain   Dunn
## Number_clusters 180.6376 0.427    0.5164 25.62     0.6438   NA  0.3763 0.0476
## Value_Index       2.0000 2.000    3.0000  3.00     1.0000    2 15.0000 0.0000
##                 Hubert SDindex Dindex    SDbw
## Number_clusters      0  4.2679      0  0.0644
## Value_Index          3  0.0000     15 12.0000
## 
## $Best.partition
##   [1] 1 2 3 1 1 2 2 4 1 3 1 1 2 3 1 1 2 2 1 1 3 1 4 2 3 5 4 4 1 4 1 3 1 2 1 3 1
##  [38] 2 1 1 2 4 2 1 4 4 5 3 3 4 1 4 2 4 1 1 4 5 2 2 4 4 2 2 4 4 2 4 1 3 2 1 2 4
##  [75] 1 2 5 2 4 5 3 2 1 2 4 4 2 1 1 1 5 3 4 4 2 2 2 1 4 1 4 5 2 2 2 2 2 1 2 1 1
## [112] 2 3 5 1 2 1 1 1 2 4 2 1 5 2 1 4 2 1 2 2 1 3 4 5 2 4 4 2 1 4 2 1 4 1 4 3 2
## [149] 1 2 4 1 2 5 4 1 1 1 3 4 4 2 4 1 2 4 1 5 3 2 1 2 2 4 4 1 2 1 2 2 3 4 2 1 5
## [186] 4 1 2 4 2 1 2 2 4 4 5 4 5 2 3 1 2 1 1 4 1 1 3 1 1 3 1 3 1 1 2 1 1 3 1 1 4
## [223] 4 4 3 4 1 2 4 3 5

m1_labs<-m1_sum$Population ## creating an object that is only the Population labels from the m1_sum object.
plot(m1_hc, labels=m1_labs, hang=-1, cex=0.8) ## plotting the results of the hierarchical clustering and setting the group labels.
hc_morph_1<-rect.hclust(m1_hc, k=5, border=2:7) ## adding rectangles around the number of clusters (k=5) identified from NbClust.

morphogroup_1<-cutree(m1_hc, k=5) ## assigning each Population its determined cluster.
m1_sum<-m1_sum %>% mutate(cluster=morphogroup_1) ##Assigning each Population its determined subcluster.
m1_sum$cluster<-as.factor(m1_sum$cluster) ## Setting cluster as a factor.
m1.1<-m1_sum %>% filter(cluster==1) %>% droplevels() ## subsetting to only include individuals from morphogroup 1.1.
m1.2<-m1_sum %>% filter(cluster==2) %>% droplevels() ## subsetting to only include individuals from morphogroup 1.2.
m1.3<-m1_sum %>% filter(cluster==3) %>% droplevels() ## subsetting to only include individuals from morphogroup 1.3.
m1.4<-m1_sum %>% filter(cluster==4) %>% droplevels() ## subsetting to only include individuals from morphogroup 1.4
m1.5<-m1_sum %>% filter(cluster==5) %>% droplevels() ## subsetting to only include individuals from morphogroup 1.5

c1.1d<-m1.1$Population ## creating an object that is comprised of the Population labels from subcluster 1.1. 
c1.1<-morpho1 %>% filter(Population %in% c1.1d) %>% droplevels() ## filtering Morphogroup 1 dataset  to include populations from sub morphogroup 1.1.
c1.1$Morphogroup=1.1 ## creating new column Morphogroup and setting it to 1.1.
c1.1$Morphogroup <- as.factor(c1.1$Morphogroup) ## setting Morphogroup as a factor.

c1.2d<-m1.2$Population ## creating an object that is comprised of the Population labels from subcluster 1.2. 
c1.2<-morpho1 %>% filter(Population %in% c1.2d) %>% droplevels() ## filtering Morphogroup 1 dataset  to include populations from sub morphogroup 1.2.
c1.2$Morphogroup=1.2 ## creating new column Morphogroup and setting it to 1.2.
c1.2$Morphogroup <- as.factor(c1.2$Morphogroup) ## setting Morphogroup as a factor.

c1.3d<-m1.3$Population ## creating an object that is comprised of the Population labels from subcluster 1.3. 
c1.3<-morpho1 %>% filter(Population %in% c1.3d) %>% droplevels() ## filtering Morphogroup 1 dataset  to include populations from sub morphogroup 1.3.
c1.3$Morphogroup=1.3 ## creating new column Morphogroup and setting it to 1.3.
c1.3$Morphogroup <- as.factor(c1.3$Morphogroup) ## setting Morphogroup as a factor.

c1.4d<-m1.4$Population ## creating an object that is comprised of the Population labels from subcluster 1.4. 
c1.4<-morpho1 %>% filter(Population %in% c1.4d) %>% droplevels() ## filtering Morphogroup 1 dataset  to include populations from sub morphogroup 1.4.
c1.4$Morphogroup=1.4 ## creating new column Morphogroup and setting it to 1.4.
c1.4$Morphogroup <- as.factor(c1.4$Morphogroup) ## setting Morphogroup as a factor.

c1.5d<-m1.5$Population ## creating an object that is comprised of the Population labels from subcluster 1.5. 
c1.5<-morpho1 %>% filter(Population %in% c1.5d) %>% droplevels() ## filtering Morphogroup 1 dataset  to include populations from sub morphogroup 1.5.
c1.5$Morphogroup=1.5 ## creating new column Morphogroup and setting it to 1.5.
c1.5$Morphogroup <- as.factor(c1.5$Morphogroup) ## setting Morphogroup as a factor.

hwl_c1.1<-rbind(c1.1, c1.2) ## combinging the rows of individuals from morphogroups 1 and 2.
hwlr_c1.1_2<-rbind(c1.3, c1.4) ## combining the rows of individuals from morphogroups and 2 with those of 3. 
hwl_c1.1<-rbind(hwl_c1.1, hwlr_c1.1_2) ## combining the previous two data frames
hwl_c1<-rbind(hwl_c1.1,c1.5) ## combining the last data frame
head(hwl_c1) ## looking at the first 6 entries to check work.

## # A tibble: 6 x 25
##   Sex   PopNum Population   GOL   NOL   BNL   BBH   XCB   XFB   ZYB   AUB   WCB
##   <fct> <fct>  <fct>      <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 M     10     MOKAPU       181   178   104   147   146   115   138   129    77
## 2 M     10     MOKAPU       174   172   105   130   134   109   136   124    70
## 3 M     10     MOKAPU       185   183   101   147   150   125   142   131    74
## 4 M     10     MOKAPU       181   179   102   138   136   112   130   120    68
## 5 M     10     MOKAPU       181   179   109   148   146   121   142   133    70
## 6 M     10     MOKAPU       178   176   102   132   134   105   134   123    69
## # … with 13 more variables: ASB <int>, BPL <int>, NPH <int>, NLH <int>,
## #   JUB <int>, NLB <int>, MAB <int>, MDH <int>, MDB <int>, OBH <int>,
## #   OBB <int>, GS <int>, Morphogroup <fct>

describeBy(hwl_c1, group='Morphogroup') ## summary statistics by morphogroup.

## 
##  Descriptive statistics by group 
## Morphogroup: 1.1
##              vars   n       mean        sd    median    trimmed       mad
## Sex*            1 272       1.54      0.50       2.0       1.56      0.00
## PopNum*         2 272       1.95      0.83       2.0       1.94      1.48
## Population*     3 272       2.05      0.83       2.0       2.06      1.48
## GOL             4 272     184.23      8.32     184.0     184.22      8.90
## NOL             5 272     181.33      7.62     181.0     181.32      7.41
## BNL             6 272     105.37      5.41     105.0     105.40      5.93
## BBH             7 272     139.31      6.18     139.0     139.32      5.93
## XCB             8 272     138.12      6.62     139.0     138.14      5.93
## XFB             9 272     114.04      5.79     114.0     114.07      5.93
## ZYB            10 272     133.01      7.14     132.0     132.97      8.90
## AUB            11 272     122.28      5.96     122.0     122.19      5.93
## WCB            12 272      70.53      4.83      70.5      70.52      5.19
## ASB            13 272     107.04      5.45     107.0     107.04      5.93
## BPL            14 272     102.62      5.43     103.0     102.66      4.45
## NPH            15 272      65.99      4.59      66.0      65.98      4.45
## NLH            16 272      50.95      3.36      51.0      50.92      2.97
## JUB            17 272     117.40      5.80     117.5     117.42      6.67
## NLB            18 272      27.17      1.96      27.0      27.14      1.48
## MAB            19 272      63.72      4.03      64.0      63.62      4.45
## MDH            20 272      27.87      4.00      28.0      27.85      4.45
## MDB            21 272      13.65      2.28      14.0      13.59      2.97
## OBH            22 272      34.25      1.73      34.0      34.25      1.48
## OBB            23 272      40.03      1.87      40.0      39.98      1.48
## GS             24 272 3553009.32 372759.24 3574769.0 3547149.89 421437.95
## Morphogroup*   25 272       1.00      0.00       1.0       1.00      0.00
##                  min     max   range  skew kurtosis       se
## Sex*               1       2       1 -0.18    -1.98     0.03
## PopNum*            1       3       2  0.10    -1.54     0.05
## Population*        1       3       2 -0.10    -1.54     0.05
## GOL              160     203      43 -0.06    -0.43     0.50
## NOL              160     199      39 -0.04    -0.40     0.46
## BNL               94     120      26 -0.01    -0.58     0.33
## BBH              123     155      32 -0.02     0.07     0.37
## XCB              122     156      34 -0.06    -0.32     0.40
## XFB               97     127      30 -0.09    -0.15     0.35
## ZYB              117     154      37  0.08    -0.76     0.43
## AUB              106     138      32  0.14    -0.42     0.36
## WCB               59      83      24  0.03    -0.28     0.29
## ASB               94     122      28  0.03    -0.24     0.33
## BPL               87     117      30 -0.06     0.01     0.33
## NPH               53      79      26  0.01    -0.21     0.28
## NLH               42      60      18  0.08    -0.06     0.20
## JUB              102     135      33  0.04    -0.23     0.35
## NLB               22      35      13  0.21     0.41     0.12
## MAB               52      76      24  0.23    -0.04     0.24
## MDH               18      38      20  0.04    -0.61     0.24
## MDB                8      20      12  0.23    -0.37     0.14
## OBH               30      40      10  0.03     0.05     0.10
## OBB               35      46      11  0.23     0.26     0.11
## GS           2640000 4640688 2000688  0.13    -0.41 22601.85
## Morphogroup*       1       1       0   NaN      NaN     0.00
## ------------------------------------------------------------ 
## Morphogroup: 1.2
##              vars   n       mean        sd  median    trimmed       mad     min
## Sex*            1 149       1.82      0.39       2       1.89      0.00       1
## PopNum*         2 149       4.95      0.79       5       4.93      1.48       4
## Population*     3 149       5.05      0.79       5       5.07      1.48       4
## GOL             4 149     179.52      6.76     180     179.40      7.41     162
## NOL             5 149     177.33      6.50     177     177.29      5.93     160
## BNL             6 149     100.86      4.53     101     100.74      4.45      90
## BBH             7 149     138.52      5.85     138     138.50      5.93     125
## XCB             8 149     139.09      5.10     139     139.17      5.93     126
## XFB             9 149     114.52      4.55     115     114.49      4.45     104
## ZYB            10 149     135.63      6.22     135     135.59      5.93     120
## AUB            11 149     125.15      5.51     125     125.19      4.45     111
## WCB            12 149      73.85      3.75      74      73.80      4.45      63
## ASB            13 149     107.77      4.42     108     107.64      4.45      96
## BPL            14 149      98.24      4.81      97      98.01      4.45      83
## NPH            15 149      68.13      3.49      68      68.09      2.97      59
## NLH            16 149      52.25      2.62      52      52.17      2.97      46
## JUB            17 149     120.22      5.02     120     120.31      4.45     107
## NLB            18 149      27.85      1.91      28      27.87      1.48      23
## MAB            19 149      65.70      3.32      65      65.59      2.97      58
## MDH            20 149      29.70      3.06      30      29.74      2.97      20
## MDB            21 149      13.28      1.73      13      13.23      1.48       9
## OBH            22 149      33.83      1.86      34      33.83      1.48      30
## OBB            23 149      39.65      1.65      40      39.60      1.48      37
## GS             24 149 3463608.38 297228.79 3471942 3461498.78 329991.18 2800332
## Morphogroup*   25 149       2.00      0.00       2       2.00      0.00       2
##                  max   range  skew kurtosis       se
## Sex*               2       1 -1.64     0.69     0.03
## PopNum*            6       2  0.09    -1.39     0.06
## Population*        6       2 -0.09    -1.39     0.06
## GOL              196      34  0.06    -0.51     0.55
## NOL              192      32  0.04    -0.55     0.53
## BNL              112      22  0.22    -0.41     0.37
## BBH              153      28  0.05    -0.59     0.48
## XCB              152      26 -0.11    -0.18     0.42
## XFB              127      23  0.05    -0.29     0.37
## ZYB              153      33  0.04     0.09     0.51
## AUB              141      30 -0.01    -0.12     0.45
## WCB               84      21  0.07    -0.25     0.31
## ASB              120      24  0.17    -0.03     0.36
## BPL              115      32  0.50     0.80     0.39
## NPH               77      18  0.11    -0.08     0.29
## NLH               60      14  0.28     0.19     0.21
## JUB              135      28 -0.09     0.06     0.41
## NLB               33      10 -0.02    -0.02     0.16
## MAB               77      19  0.38     0.14     0.27
## MDH               39      19 -0.14     0.26     0.25
## MDB               20      11  0.31     0.49     0.14
## OBH               41      11  0.28     0.60     0.15
## OBB               44       7  0.35    -0.50     0.14
## GS           4158720 1358388  0.05    -0.80 24349.93
## Morphogroup*       2       0   NaN      NaN     0.00
## ------------------------------------------------------------ 
## Morphogroup: 1.3
##              vars   n       mean        sd  median    trimmed       mad     min
## Sex*            1 109       1.50      0.50       2       1.51      0.00       1
## PopNum*         2 109       7.00      0.00       7       7.00      0.00       7
## Population*     3 109       7.00      0.00       7       7.00      0.00       7
## GOL             4 109     176.78      7.66     176     176.75      7.41     158
## NOL             5 109     174.90      7.00     175     174.90      7.41     157
## BNL             6 109      99.30      4.61      99      99.21      4.45      89
## BBH             7 109     129.88      5.68     130     130.01      5.93     115
## XCB             8 109     151.66      6.77     151     151.47      5.93     138
## XFB             9 109     124.17      5.85     124     124.08      5.93     112
## ZYB            10 109     139.39      7.12     139     139.26      8.90     126
## AUB            11 109     132.78      6.30     133     132.70      7.41     120
## WCB            12 109      78.26      4.12      78      78.30      4.45      68
## ASB            13 109     114.59      5.38     115     114.61      5.93     101
## BPL            14 109      96.75      5.12      97      96.76      5.93      82
## NPH            15 109      71.95      4.90      72      71.92      5.93      61
## NLH            16 109      55.14      3.42      55      55.11      2.97      47
## JUB            17 109     119.99      6.49     119     119.66      7.41     107
## NLB            18 109      27.64      2.13      27      27.65      2.97      22
## MAB            19 109      66.80      4.12      66      66.58      4.45      58
## MDH            20 109      27.74      3.59      28      27.81      4.45      18
## MDB            21 109      12.29      1.97      12      12.21      1.48       7
## OBH            22 109      35.39      1.82      35      35.43      1.48      30
## OBB            23 109      40.64      1.93      40      40.61      1.48      36
## GS             24 109 3490758.89 362730.22 3431484 3480350.76 347500.68 2669884
## Morphogroup*   25 109       3.00      0.00       3       3.00      0.00       3
##                  max   range  skew kurtosis       se
## Sex*               2       1 -0.02    -2.02     0.05
## PopNum*            7       0   NaN      NaN     0.00
## Population*        7       0   NaN      NaN     0.00
## GOL              194      36  0.04    -0.50     0.73
## NOL              191      34  0.00    -0.22     0.67
## BNL              113      24  0.25    -0.01     0.44
## BBH              141      26 -0.22    -0.56     0.54
## XCB              167      29  0.23    -0.45     0.65
## XFB              145      33  0.29     0.65     0.56
## ZYB              158      32  0.20    -0.72     0.68
## AUB              149      29  0.13    -0.70     0.60
## WCB               89      21 -0.09    -0.40     0.39
## ASB              128      27  0.01    -0.36     0.52
## BPL              112      30  0.07     0.40     0.49
## NPH               82      21  0.02    -0.75     0.47
## NLH               62      15  0.08    -0.76     0.33
## JUB              138      31  0.44    -0.31     0.62
## NLB               34      12  0.07     0.02     0.20
## MAB               77      19  0.43    -0.30     0.39
## MDH               35      17 -0.23    -0.36     0.34
## MDB               19      12  0.43     0.58     0.19
## OBH               40      10 -0.19     0.03     0.17
## OBB               46      10  0.25     0.24     0.18
## GS           4306095 1636211  0.23    -0.58 34743.25
## Morphogroup*       3       0   NaN      NaN     0.00
## ------------------------------------------------------------ 
## Morphogroup: 1.4
##              vars   n       mean        sd    median    trimmed       mad
## Sex*            1 108       1.49      0.50       1.0       1.49      0.00
## PopNum*         2 108       8.00      0.00       8.0       8.00      0.00
## Population*     3 108       8.00      0.00       8.0       8.00      0.00
## GOL             4 108     184.56      6.48     184.0     184.20      5.93
## NOL             5 108     181.46      5.73     181.0     181.20      5.93
## BNL             6 108     103.31      4.72     103.0     103.32      4.45
## BBH             7 108     135.90      5.68     136.0     135.82      4.45
## XCB             8 108     132.48      4.73     133.0     132.48      4.45
## XFB             9 108     110.46      4.30     111.0     110.49      3.71
## ZYB            10 108     134.88      6.39     134.0     134.60      7.41
## AUB            11 108     123.39      4.89     123.0     123.20      5.93
## WCB            12 108      72.52      4.00      72.0      72.45      4.45
## ASB            13 108     107.56      4.38     107.0     107.43      5.93
## BPL            14 108     101.23      5.04     101.0     101.40      4.45
## NPH            15 108      69.38      4.34      69.0      69.35      4.45
## NLH            16 108      52.25      3.11      52.0      52.24      2.97
## JUB            17 108     117.10      5.72     116.0     116.92      5.93
## NLB            18 108      23.50      1.63      23.5      23.49      2.22
## MAB            19 108      63.86      3.51      64.0      63.85      4.45
## MDH            20 108      25.25      3.58      25.0      25.22      4.45
## MDB            21 108      12.54      1.99      12.0      12.42      1.48
## OBH            22 108      35.66      1.88      36.0      35.73      1.48
## OBB            23 108      41.21      1.76      41.0      41.09      1.48
## GS             24 108 3328638.82 296464.96 3296670.0 3319319.48 269172.70
## Morphogroup*   25 108       4.00      0.00       4.0       4.00      0.00
##                  min     max   range  skew kurtosis       se
## Sex*               1       2       1  0.04    -2.02     0.05
## PopNum*            8       8       0   NaN      NaN     0.00
## Population*        8       8       0   NaN      NaN     0.00
## GOL              173     206      33  0.58     0.03     0.62
## NOL              171     200      29  0.51     0.00     0.55
## BNL               92     119      27  0.15     0.01     0.45
## BBH              122     150      28  0.12     0.07     0.55
## XCB              120     142      22 -0.04    -0.46     0.45
## XFB              100     122      22 -0.06    -0.19     0.41
## ZYB              119     151      32  0.31    -0.36     0.61
## AUB              114     136      22  0.36    -0.40     0.47
## WCB               63      84      21  0.15     0.06     0.38
## ASB              100     120      20  0.23    -0.45     0.42
## BPL               90     114      24 -0.19    -0.38     0.48
## NPH               58      80      22  0.05    -0.01     0.42
## NLH               45      61      16  0.11    -0.01     0.30
## JUB              106     134      28  0.34    -0.31     0.55
## NLB               20      29       9  0.23     0.43     0.16
## MAB               56      73      17  0.09    -0.54     0.34
## MDH               18      34      16  0.12    -0.79     0.34
## MDB                8      18      10  0.53     0.14     0.19
## OBH               30      40      10 -0.33     0.10     0.18
## OBB               38      46       8  0.57     0.10     0.17
## GS           2750000 4217400 1467400  0.43     0.19 28527.35
## Morphogroup*       4       4       0   NaN      NaN     0.00
## ------------------------------------------------------------ 
## Morphogroup: 1.5
##              vars   n       mean        sd    median    trimmed       mad
## Sex*            1 128       1.60      0.49       2.0       1.62      0.00
## PopNum*         2 128       9.23      0.58       9.0       9.08      0.00
## Population*     3 128       9.23      0.58       9.0       9.08      0.00
## GOL             4 128     184.07      7.01     184.0     184.04      7.41
## NOL             5 128     181.74      6.48     182.0     181.67      5.93
## BNL             6 128     103.86      4.34     104.0     103.63      4.45
## BBH             7 128     134.63      5.31     134.0     134.48      5.93
## XCB             8 128     140.27      5.24     140.0     140.22      5.93
## XFB             9 128     111.87      4.37     112.0     111.87      4.45
## ZYB            10 128     136.42      6.45     136.0     136.48      7.41
## AUB            11 128     126.29      5.47     126.0     126.24      5.93
## WCB            12 128      69.41      3.93      69.5      69.34      4.45
## ASB            13 128     107.94      5.23     108.0     107.93      4.45
## BPL            14 128     100.98      4.27     101.0     100.97      4.45
## NPH            15 128      71.05      4.77      72.0      71.06      4.45
## NLH            16 128      54.22      3.27      54.0      54.17      2.97
## JUB            17 128     120.10      5.02     120.0     120.04      5.19
## NLB            18 128      26.43      1.93      26.0      26.42      1.48
## MAB            19 128      64.45      3.51      64.0      64.44      2.97
## MDH            20 128      30.24      3.45      30.0      30.28      2.97
## MDB            21 128      14.04      2.23      14.0      14.01      2.97
## OBH            22 128      36.20      1.61      36.0      36.20      1.48
## OBB            23 128      41.11      1.66      41.0      41.15      1.48
## GS             24 128 3484418.98 333328.84 3464784.0 3484601.97 380117.88
## Morphogroup*   25 128       5.00      0.00       5.0       5.00      0.00
##                  min     max   range  skew kurtosis       se
## Sex*               1       2       1 -0.41    -1.85     0.04
## PopNum*            9      11       2  2.31     3.89     0.05
## Population*        9      11       2  2.31     3.89     0.05
## GOL              168     199      31  0.01    -0.52     0.62
## NOL              167     196      29  0.08    -0.40     0.57
## BNL               93     118      25  0.45     0.44     0.38
## BBH              122     150      28  0.27    -0.01     0.47
## XCB              127     151      24  0.07    -0.58     0.46
## XFB              100     124      24 -0.02    -0.20     0.39
## ZYB              123     153      30 -0.02    -0.54     0.57
## AUB              114     141      27  0.10    -0.26     0.48
## WCB               61      80      19  0.17    -0.55     0.35
## ASB               90     121      31 -0.19     0.57     0.46
## BPL               92     113      21  0.10    -0.55     0.38
## NPH               62      82      20 -0.08    -0.72     0.42
## NLH               46      65      19  0.26     0.19     0.29
## JUB              109     134      25  0.12    -0.37     0.44
## NLB               22      31       9  0.02    -0.53     0.17
## MAB               56      73      17  0.05    -0.33     0.31
## MDH               21      37      16 -0.11    -0.59     0.31
## MDB                9      19      10  0.11    -0.48     0.20
## OBH               33      40       7  0.01    -0.39     0.14
## OBB               37      45       8 -0.12    -0.18     0.15
## GS           2649474 4204200 1554726  0.02    -0.66 29462.39
## Morphogroup*       5       5       0   NaN      NaN     0.00

pcs1<-principal(scale(hwl_c1[4:24]), rotate = 'varimax', nfactors=3) 
pcs1

## Principal Components Analysis
## Call: principal(r = scale(hwl_c1[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       RC1   RC2   RC3   h2   u2 com
## GOL  0.08  0.87  0.26 0.82 0.18 1.2
## NOL  0.09  0.84  0.28 0.80 0.20 1.2
## BNL -0.03  0.87  0.19 0.80 0.20 1.1
## BBH  0.06  0.78 -0.10 0.62 0.38 1.0
## XCB  0.86 -0.10  0.14 0.77 0.23 1.1
## XFB  0.83 -0.07  0.07 0.69 0.31 1.0
## ZYB  0.72  0.38  0.39 0.81 0.19 2.1
## AUB  0.81  0.14  0.38 0.82 0.18 1.5
## WCB  0.76 -0.09  0.18 0.62 0.38 1.1
## ASB  0.71  0.16  0.21 0.57 0.43 1.3
## BPL -0.04  0.78  0.11 0.62 0.38 1.0
## NPH  0.41  0.17  0.71 0.70 0.30 1.7
## NLH  0.44  0.18  0.66 0.67 0.33 1.9
## JUB  0.66  0.45  0.29 0.72 0.28 2.2
## NLB  0.53  0.27 -0.33 0.46 0.54 2.2
## MAB  0.67  0.31  0.21 0.59 0.41 1.6
## MDH  0.49  0.47  0.00 0.46 0.54 2.0
## MDB  0.25  0.62 -0.02 0.44 0.56 1.3
## OBH  0.03  0.01  0.82 0.67 0.33 1.0
## OBB  0.26  0.28  0.64 0.57 0.43 1.7
## GS   0.55  0.71  0.14 0.83 0.17 2.0
## 
##                        RC1  RC2  RC3
## SS loadings           5.90 5.26 2.89
## Proportion Var        0.28 0.25 0.14
## Cumulative Var        0.28 0.53 0.67
## Proportion Explained  0.42 0.37 0.21
## Cumulative Proportion 0.42 0.79 1.00
## 
## Mean item complexity =  1.5
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.06 
##  with the empirical chi square  1335.09  with prob <  4.3e-189 
## 
## Fit based upon off diagonal values = 0.98

MG_1<-hwl_c1$Morphogroup
MG_1_pcs<-cbind.data.frame(MG_1, pcs1$scores)
MG_1_pcs<-as.data.frame(MG_1_pcs)
MG_1_pcs$MG_1<-as.factor(MG_1_pcs$MG_1)
pc_1_sum<-MG_1_pcs %>% group_by(MG_1) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_1_sum

## # A tibble: 5 x 4
##   MG_1     RC1     RC2    RC3
## * <fct>  <dbl>   <dbl>  <dbl>
## 1 1.1   -0.292  0.465  -0.522
## 2 1.2    0.353 -0.114  -0.589
## 3 1.3    1.37  -1.12    0.368
## 4 1.4   -0.853 -0.0615  0.923
## 5 1.5   -0.239  0.153   0.703

sort(pcs1$loadings[,1], decreasing=TRUE)

##         XCB         XFB         AUB         WCB         ZYB         ASB 
##  0.86231017  0.82513147  0.80941310  0.76391681  0.71649373  0.70852690 
##         MAB         JUB          GS         NLB         MDH         NLH 
##  0.66889735  0.66055733  0.54858352  0.52907155  0.48862664  0.43826642 
##         NPH         OBB         MDB         NOL         GOL         BBH 
##  0.40943700  0.26468548  0.25147953  0.09112167  0.07655382  0.06418611 
##         OBH         BNL         BPL 
##  0.02948380 -0.02851710 -0.03902677

sort(pcs1$loadings[,2], decreasing=TRUE)

##         BNL         GOL         NOL         BBH         BPL          GS 
##  0.87204124  0.86501177  0.84365973  0.78001482  0.77603352  0.71196906 
##         MDB         MDH         JUB         ZYB         MAB         OBB 
##  0.61615923  0.46736082  0.44775852  0.37515627  0.31087973  0.28482125 
##         NLB         NLH         NPH         ASB         AUB         OBH 
##  0.26808253  0.18372014  0.17495931  0.15779251  0.14370990  0.01223405 
##         XFB         WCB         XCB 
## -0.06706507 -0.08699013 -0.09647033

sort(pcs1$loadings[,3], decreasing=TRUE)

##          OBH          NPH          NLH          OBB          ZYB          AUB 
##  0.818183098  0.707185196  0.663198366  0.644262996  0.394727495  0.376701784 
##          JUB          NOL          GOL          MAB          ASB          BNL 
##  0.292960771  0.281196398  0.259337956  0.210405485  0.205757158  0.190867146 
##          WCB           GS          XCB          BPL          XFB          MDH 
##  0.180266921  0.141892256  0.141389721  0.110466634  0.070436774  0.001881795 
##          MDB          BBH          NLB 
## -0.017477956 -0.102162329 -0.327896830

ggplot(MG_1_pcs, aes(x=RC1, y=RC2, color = MG_1)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1_sum, size=5)

ggplot(MG_1_pcs, aes(x=RC2, y=RC3, color = MG_1)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1_sum, size=5)

ggplot(MG_1_pcs, aes(x=RC1, y=RC3, color = MG_1)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1_sum, size=5)

morpho1.1<-c1.1[2:25] 
m1.1_sum<-morpho1.1 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) 
m1.1_dsq<-dist(scale(m1.1_sum[2:22]))
m1.1_hc<-hclust(m1.1_dsq, method='ward.D2') 
NbClust(data=as.numeric(scale(m1.1_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15)

## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##

## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]

## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]

## ******************************************************************* 
## * Among all indices:                                                
## * 2 proposed 3 as the best number of clusters 
## * 2 proposed 5 as the best number of clusters 
## * 1 proposed 13 as the best number of clusters 
## * 1 proposed 15 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  3 
##  
##  
## *******************************************************************

## $All.index
##        KL        CH Hartigan    CCC    Scott Marriot TrCovW TraceW Friedman
## 2  0.9620   96.2662 144.8040 0.5291  93.7609 37.9282     NA 9.4821   3.4294
## 3  1.1152  230.9460  13.6701 1.5573 159.0030 30.2966     NA 3.3663  11.4766
## 4  1.4786  190.3725  57.5772 0.7127 190.4245 32.7089     NA 2.0443  19.5449
## 5  1.4172  291.4842  12.3811 2.5978 242.1715 22.4784     NA 0.8991  45.7116
## 6  1.3393  280.5201  30.9094 1.8312 260.2620 24.2896     NA 0.6747  61.2490
## 7  0.9132  359.2676   7.8383 1.7030 281.5704 23.5734     NA 0.4811  86.3018
## 8  0.5420  345.8774  35.9572 1.3898 298.1831 23.6529     NA 0.3696 112.6434
## 9  1.3944  495.8133  31.4637 1.4927 317.4138 22.0608     NA 0.2724 153.2099
## 10 3.3244  688.0294  17.0733 2.2772 342.7007 18.2314     NA 0.1823 229.3716
## 11 1.9416  804.9301  12.9861 2.1990 357.1362 17.5426     NA 0.1450 288.6951
## 12 0.2538  898.0695  23.5838 1.9678 368.8362 17.3387     NA 0.1204 347.8155
## 13 9.3674 1182.2350   6.5037 1.8755 381.2392 16.7124     NA 0.0989 423.7141
## 14 0.1913 1209.0693  15.9422 2.0730 396.1734 15.2918     NA 0.0780 537.3277
## 15 2.3034 1458.7290  10.5407 2.1399 409.0406 14.3115     NA 0.0636 659.3093
##       Rubin Cindex     DB Silhouette   Duda Pseudot2  Beale Ratkowsky   Ball
## 2    4.4294 0.3226 0.4751     0.6553 0.2434 111.8767 1.0079    0.5532 8.1454
## 3   12.4766 0.3360 0.4584     0.6264 0.2242  83.0504 1.1073    0.5431 1.6095
## 4   20.5449 0.2985 0.5108     0.6284 0.1811 104.0352 1.4449    0.4760 0.9831
## 5   46.7116 0.2941 0.5027     0.6395 0.1049  68.2715 2.5286    0.4365 0.3981
## 6   62.2490 0.2702 0.4559     0.6281 0.2664  27.5371 0.8345    0.4002 0.2734
## 7   87.3018 0.3560 0.4816     0.6375 0.2763  39.2894 0.8185    0.3731 0.1519
## 8  113.6434 0.3165 0.5172     0.5879 0.2596  39.9324 0.8874    0.3496 0.1166
## 9  154.2099 0.2557 0.5312     0.5785 0.0913  59.7280 2.8442    0.3311 0.0627
## 10 230.3716 0.2881 0.5023     0.6033 0.1587  21.1997 1.4133    0.3149 0.0356
## 11 289.6951 0.4001 0.4727     0.6114 0.0525  72.1899 4.8127    0.3005 0.0245
## 12 348.8155 0.3805 0.4338     0.6298 0.2006  27.8937 1.1622    0.2879 0.0180
## 13 424.7141 0.3467 0.4150     0.6330 0.7770   2.5830 0.0861    0.2769 0.0113
## 14 538.3277 0.2939 0.4180     0.6333 0.1447  23.6497 1.5766    0.2668 0.0093
## 15 660.3093 0.2679 0.3989     0.6452 0.1340  19.3883 1.6157    0.2579 0.0066
##    Ptbiserial   Frey McClain   Dunn Hubert SDindex Dindex   SDbw
## 2      0.7500 0.8188  0.2955 0.1052 0.0237  7.6292 0.3320 0.7015
## 3      0.7410 1.6982  0.3814 0.1688 0.0260  4.9347 0.1960 0.5023
## 4      0.6634 0.8066  0.4791 0.1328 0.0276  6.7967 0.1489 0.2454
## 5      0.6085 1.2238  0.4644 0.2103 0.0293  6.2439 0.1028 0.1440
## 6      0.5922 1.4205  0.4683 0.2103 0.0294  9.3004 0.0864 0.1976
## 7      0.5657 2.5980  0.4798 0.2404 0.0294 11.6032 0.0756 0.0933
## 8      0.5052 1.8028  0.5605 0.1226 0.0299 16.1227 0.0660 0.0913
## 9      0.4433 0.6715  0.6339 0.0931 0.0305 16.9758 0.0563 0.0527
## 10     0.4292 0.8915  0.5976 0.1227 0.0305 17.0763 0.0469 0.0413
## 11     0.4212 1.3436  0.5874 0.1825 0.0305 17.9835 0.0423 0.0304
## 12     0.4111 1.7832  0.5898 0.1836 0.0305 23.2065 0.0375 0.0175
## 13     0.3893 1.8875  0.6119 0.1836 0.0306 27.5891 0.0338 0.0020
## 14     0.3496 0.9812  0.6653 0.1836 0.0308 31.5366 0.0294 0.0127
## 15     0.3389 0.8331  0.6538 0.1836 0.0308 31.1455 0.0266 0.0013
## 
## $All.CriticalValues
##    CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2          0.0439           784.7908       0.3221
## 3         -0.0229         -1072.0019       0.3031
## 4         -0.0306          -775.8496       0.2416
## 5         -0.2595           -38.8310       0.1505
## 6         -0.2052           -58.7306       0.3825
## 7         -0.1143          -146.1979       0.3799
## 8         -0.1290          -122.4991       0.3622
## 9         -0.3330           -24.0181       0.1427
## 10        -0.4407           -13.0759       0.3003
## 11        -0.4407           -13.0759       0.0933
## 12        -0.2932           -30.8768       0.3167
## 13        -0.2305           -48.0468       0.7758
## 14        -0.4407           -13.0759       0.2776
## 15        -0.5175            -8.7974       0.2933
## 
## $Best.nc
##                      KL       CH Hartigan    CCC   Scott Marriot TrCovW  TraceW
## Number_clusters 13.0000   15.000   3.0000 5.0000  3.0000  5.0000   -Inf  4.7938
## Value_Index      9.3674 1458.729 131.1339 2.5978 65.2421 12.0417      3 15.0000
##                 Friedman    Rubin  Cindex     DB Silhouette   Duda PseudoT2
## Number_clusters 121.9816 -16.8382  0.2557 0.3989     0.6553 0.2434 111.8767
## Value_Index      10.0000   9.0000 15.0000 2.0000     2.0000 2.0000   2.0000
##                  Beale Ratkowsky   Ball PtBiserial Frey McClain   Dunn Hubert
## Number_clusters 1.0079    0.5532 6.5359       0.75   NA  0.2955 0.2404      0
## Value_Index     2.0000    3.0000 2.0000       1.00    2  7.0000 0.0000      3
##                 SDindex Dindex    SDbw
## Number_clusters  4.9347      0  0.0013
## Value_Index      0.0000     15 13.0000
## 
## $Best.partition
##  [1] 1 2 3 2 2 3 3 2 3 3 2 2 2 3 2 2 3 1 2 3 1 1 3 2 2 3 1 2 3 3 3 2 1 1 3 2 3 2
## [39] 2 2 3 3 1 2 3 2 3 1 3 3 2 3 2 2 1 3 2 2 3 1 1 3 2

m1.1_labs<-m1.1_sum$Population
plot(m1.1_hc, labels=m1.1_labs, hang=-1, cex=0.8)
pcs1.1<-principal(scale(c1.1[4:24]), rotate = 'varimax', nfactors=3) 
MG_1.1_pcs<-cbind.data.frame(c1.1$Population, pcs1.1$scores)
MG_1.1_pcs<-MG_1.1_pcs %>% dplyr::rename(Population = 'c1.1$Population')
pc_1.1_sum<-MG_1.1_pcs %>% group_by(Population) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_1.1_sum

## # A tibble: 3 x 4
##   Population    RC1    RC2     RC3
## * <fct>       <dbl>  <dbl>   <dbl>
## 1 AINU       -0.385  0.797 -0.370 
## 2 EASTER I    0.691 -0.939 -0.0613
## 3 MOKAPU     -0.263  0.122  0.371

sort(pcs1.1$loadings[,1], decreasing=TRUE)

##          BNL          GOL          BPL          NOL          BBH           GS 
##  0.880601326  0.832309471  0.804734841  0.795696716  0.768213643  0.693358915 
##          MDB          ZYB          MDH          JUB          NLB          NLH 
##  0.656533236  0.562282022  0.559960448  0.546819622  0.520549387  0.504276399 
##          AUB          MAB          NPH          ASB          OBB          XFB 
##  0.469101674  0.465663053  0.457010657  0.429111267  0.254168890  0.107323393 
##          WCB          XCB          OBH 
##  0.040141143  0.037643028 -0.007834046

sort(pcs1.1$loadings[,2], decreasing=TRUE)

##        WCB        XCB        XFB        ASB        AUB        OBB        JUB 
## 0.84333493 0.83558685 0.82564030 0.69364841 0.67277648 0.66810442 0.66620138 
##        ZYB        MAB         GS        MDH        NOL        NPH        GOL 
## 0.66487481 0.60076264 0.54680269 0.41388669 0.36476410 0.35059623 0.30490677 
##        NLB        NLH        OBH        BPL        MDB        BBH        BNL 
## 0.19637037 0.17728856 0.15726941 0.14800835 0.12972844 0.08048060 0.05046791

sort(pcs1.1$loadings[,3], decreasing=TRUE)

##         OBH         NLH         NPH         AUB         BBH         ZYB 
##  0.81806943  0.70600895  0.67069224  0.28863088  0.25824495  0.25381448 
##          GS         OBB         XCB         BNL         JUB         MDB 
##  0.23215267  0.22000991  0.20466537  0.18720592  0.18362999  0.16698524 
##         MDH         MAB         XFB         NOL         BPL         WCB 
##  0.16034606  0.15857939  0.07899570  0.07692024  0.07515935  0.07336469 
##         GOL         NLB         ASB 
##  0.06260495  0.02234662 -0.03690243

ggplot(MG_1.1_pcs, aes(x=RC1, y=RC2, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1.1_sum, size=5)
ggplot(MG_1.1_pcs, aes(x=RC2, y=RC3, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1.1_sum, size=5)
ggplot(MG_1.1_pcs, aes(x=RC1, y=RC3, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1.1_sum, size=5)

morpho1.2<-c1.2[2:25]
m1.2_sum<-morpho1.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) 
m1.2_dsq<-dist(scale(m1.2_sum[2:22]))
m1.2_hc<-hclust(m1.2_dsq, method='ward.D2') 
NbClust(data=as.numeric(scale(m1.2_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15)

## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##

## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]

## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]

## ******************************************************************* 
## * Among all indices:                                                
## * 1 proposed 2 as the best number of clusters 
## * 1 proposed 3 as the best number of clusters 
## * 2 proposed 4 as the best number of clusters 
## * 1 proposed 8 as the best number of clusters 
## * 1 proposed 9 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  4 
##  
##  
## *******************************************************************

## $All.index
##         KL       CH Hartigan    CCC    Scott Marriot TrCovW TraceW Friedman
## 2   1.1027 237.1408  11.7879 3.5892 118.4495 25.6313     NA 6.4078   5.5545
## 3   1.7681 144.9613  85.2503 0.5602 149.1574 35.4214     NA 3.9357   9.6715
## 4   1.6046 257.9958  60.0171 1.3333 196.9431 29.4937     NA 1.8434  21.7845
## 5   1.4128 398.4635  11.2536 0.7311 222.0297 30.9466     NA 1.2379  32.9294
## 6   4.6907 376.2706  23.5153 1.9408 261.4605 23.8318     NA 0.6620  62.4446
## 7   0.1046 438.9974  58.5405 2.3460 288.6632 21.0634     NA 0.4299  96.7052
## 8  13.1025 764.1078  11.7270 2.8814 314.7202 18.1922     NA 0.2843 146.7554
## 9   0.2862 797.8417   4.2400 2.5025 328.6470 18.4580     NA 0.2279 183.3099
## 10  1.5403 751.1753   6.2665 2.5991 346.2893 17.2219     NA 0.1722 242.8751
## 11  0.6030 742.3426   2.9711 2.1635 356.7389 17.6536     NA 0.1459 286.8737
## 12  7.1415 699.9617   9.8053 1.8763 367.8136 17.6224     NA 0.1224 342.1991
## 13  0.6543 750.7935   6.6131 1.5443 377.5311 17.7256     NA 0.1049 399.4375
## 14  1.0378 769.5080   5.9935 1.2465 386.9144 17.7128     NA 0.0904 463.7496
## 15  3.7528 785.9966   6.9404 0.9065 395.2151 17.8234     NA 0.0792 529.2005
##       Rubin Cindex     DB Silhouette   Duda Pseudot2  Beale Ratkowsky   Ball
## 2    6.5545 0.3307 0.4216     0.7324 0.2860  84.8882 0.8085    0.6306 4.2966
## 3   10.6715 0.2588 0.5337     0.6278 0.2897  61.3079 0.7860    0.5255 2.4005
## 4   22.7845 0.1992 0.5699     0.6073 0.1513 106.5373 1.7756    0.4820 0.7437
## 5   33.9294 0.1494 0.5017     0.6180 0.3076  22.5144 0.6823    0.4393 0.2949
## 6   63.4446 0.2301 0.5236     0.6355 0.1610  67.7611 1.6134    0.4022 0.2058
## 7   97.7052 0.1717 0.4863     0.6634 0.2192  21.3782 1.0180    0.3740 0.1249
## 8  147.7554 0.2012 0.4725     0.6621 0.1865   8.7244 0.9694    0.3517 0.0534
## 9  184.3099 0.2343 0.4540     0.6620 0.2418  21.9554 0.9148    0.3319 0.0392
## 10 243.8751 0.2571 0.4704     0.6547 0.0914  19.8888 2.2099    0.3150 0.0327
## 11 287.8737 0.2806 0.4448     0.6426 0.2694  18.9875 0.7911    0.3005 0.0266
## 12 343.1991 0.2614 0.4556     0.6183 0.4942  10.2364 0.3102    0.2877 0.0230
## 13 400.4375 0.3279 0.4842     0.5781 0.5656   9.9837 0.2377    0.2766 0.0178
## 14 464.7496 0.2881 0.4981     0.5364 0.0000      NaN    NaN    0.2666 0.0146
## 15 530.2005 0.0424 0.4577     0.5576 0.3399  15.5387 0.5755    0.2576 0.0122
##    Ptbiserial   Frey McClain   Dunn Hubert SDindex Dindex   SDbw
## 2      0.8389 3.0525  0.2376 0.1085 0.0251  6.9323 0.2708 0.3206
## 3      0.6834 0.8630  0.4312 0.0925 0.0269  7.3853 0.2075 0.5617
## 4      0.6321 1.2686  0.4646 0.0644 0.0296  6.1178 0.1334 0.2906
## 5      0.5705 0.4383  0.5058 0.0644 0.0296  8.5522 0.0965 0.1402
## 6      0.5608 1.0340  0.4447 0.1273 0.0299  8.3498 0.0803 0.1064
## 7      0.5233 0.7105  0.4169 0.1273 0.0306 10.9778 0.0620 0.0829
## 8      0.5128 0.8107  0.3867 0.1731 0.0307 10.6397 0.0523 0.0502
## 9      0.5099 1.3592  0.3798 0.2091 0.0307 13.3890 0.0481 0.0266
## 10     0.4942 1.2945  0.3703 0.2285 0.0307 17.7750 0.0428 0.0101
## 11     0.4906 2.8468  0.3674 0.2565 0.0307 17.8766 0.0399 0.0080
## 12     0.4699 2.3581  0.3804 0.2318 0.0308 27.1170 0.0362 0.0295
## 13     0.4488 3.2959  0.3891 0.2065 0.0308 27.8693 0.0336 0.0400
## 14     0.4070 0.8020  0.4292 0.1215 0.0310 35.1747 0.0309 0.0390
## 15     0.4060 2.0822  0.4259 0.1322 0.0310 36.5356 0.0285 0.0239
## 
## $All.CriticalValues
##    CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2          0.0351           934.5356       0.3749
## 3         -0.0157         -1619.3999       0.3838
## 4         -0.0664          -305.0001       0.1984
## 5         -0.2052           -58.7306       0.4281
## 6         -0.1452          -102.5426       0.2263
## 7         -0.3330           -24.0181       0.3519
## 8         -0.6214            -5.2183       0.4286
## 9         -0.2932           -30.8768       0.3707
## 10        -0.6214            -5.2183       0.2755
## 11        -0.2932           -30.8768       0.4033
## 12        -0.2052           -58.7306       0.5898
## 13        -0.1452          -102.5426       0.6340
## 14        -1.0294             0.0000          NaN
## 15        -0.2595           -38.8310       0.4698
## 
## $Best.nc
##                      KL       CH Hartigan    CCC   Scott Marriot TrCovW  TraceW
## Number_clusters  8.0000   9.0000   3.0000 2.0000  4.0000  4.0000   -Inf  1.4869
## Value_Index     13.1025 797.8417  73.4624 3.5892 47.7857  7.3806      4 15.0000
##                 Friedman    Rubin Cindex     DB Silhouette  Duda PseudoT2
## Number_clusters   65.451 -15.5666 0.0424 0.4216     0.7324 0.286  84.8882
## Value_Index       10.000  15.0000 2.0000 2.0000     2.0000 2.000   2.0000
##                  Beale Ratkowsky   Ball PtBiserial   Frey McClain   Dunn Hubert
## Number_clusters 0.8085    0.6306 1.8961     0.8389 3.0525  0.2376 0.2565      0
## Value_Index     2.0000    3.0000 2.0000     2.0000 2.0000 11.0000 0.0000      4
##                 SDindex Dindex  SDbw
## Number_clusters  6.1178      0 0.008
## Value_Index      0.0000     11 8.000
## 
## $Best.partition
##  [1] 1 1 2 1 1 2 1 3 2 1 1 2 4 4 3 1 2 3 1 3 2 1 3 2 1 2 3 3 1 2 4 3 4 3 1 2 1 3
## [39] 2 1 1 2 1 2 1 3 4 4 3 4 4 3 2 1 4 3 4 4 3 4 1 1 2

m1.2_labs<-m1.2_sum$Population
plot(m1.2_hc, labels=m1.2_labs, hang = -1, cex=0.8)
pcs1.2<-principal(scale(c1.2[4:24]), rotate = 'varimax', nfactors=3) 
pcs1.2 #

## Principal Components Analysis
## Call: principal(r = scale(c1.2[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       RC1   RC3   RC2   h2   u2 com
## GOL  0.85  0.04  0.26 0.80 0.20 1.2
## NOL  0.85  0.04  0.26 0.79 0.21 1.2
## BNL  0.68  0.04  0.49 0.71 0.29 1.8
## BBH  0.50  0.31  0.40 0.51 0.49 2.6
## XCB  0.04  0.84  0.08 0.71 0.29 1.0
## XFB  0.05  0.76  0.06 0.58 0.42 1.0
## ZYB  0.40  0.61  0.50 0.78 0.22 2.7
## AUB  0.34  0.70  0.44 0.80 0.20 2.2
## WCB  0.11  0.70  0.06 0.50 0.50 1.1
## ASB  0.47  0.48  0.03 0.45 0.55 2.0
## BPL  0.64  0.04  0.22 0.46 0.54 1.2
## NPH  0.20  0.07  0.58 0.38 0.62 1.3
## NLH  0.23  0.17  0.64 0.49 0.51 1.4
## JUB  0.44  0.56  0.43 0.69 0.31 2.8
## NLB  0.42  0.28 -0.22 0.31 0.69 2.3
## MAB  0.57  0.40  0.19 0.51 0.49 2.0
## MDH  0.60  0.31 -0.12 0.47 0.53 1.6
## MDB  0.47  0.39 -0.19 0.40 0.60 2.3
## OBH -0.16 -0.01  0.79 0.65 0.35 1.1
## OBB  0.15  0.14  0.76 0.62 0.38 1.2
## GS   0.64  0.53  0.35 0.81 0.19 2.5
## 
##                        RC1  RC3  RC2
## SS loadings           4.91 4.09 3.43
## Proportion Var        0.23 0.19 0.16
## Cumulative Var        0.23 0.43 0.59
## Proportion Explained  0.40 0.33 0.28
## Cumulative Proportion 0.40 0.72 1.00
## 
## Mean item complexity =  1.7
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.08 
##  with the empirical chi square  440.6  with prob <  2.3e-30 
## 
## Fit based upon off diagonal values = 0.95

MG_1.2_pcs<-cbind.data.frame(c1.2$Population, pcs1.2$scores)
MG_1.2_pcs<-MG_1.2_pcs %>% dplyr::rename(Population = 'c1.2$Population')
pc_1.2_sum<-MG_1.2_pcs %>% group_by(Population) %>% dplyr::summarize(across(RC1:RC2, mean))
pc_1.2_sum

## # A tibble: 3 x 4
##   Population     RC1    RC3    RC2
## * <fct>        <dbl>  <dbl>  <dbl>
## 1 ANYANG      0.390   0.106 -0.296
## 2 GUAM       -0.0288 -0.220  0.762
## 3 PHILLIPI   -0.295   0.162 -0.620

sort(pcs1.2$loadings[,1], decreasing=TRUE)

##         GOL         NOL         BNL         BPL          GS         MDH 
##  0.85369775  0.84509704  0.68498458  0.64136934  0.63728778  0.60351946 
##         MAB         BBH         ASB         MDB         JUB         NLB 
##  0.56617011  0.50398076  0.46911225  0.46762767  0.44039432  0.42109267 
##         ZYB         AUB         NLH         NPH         OBB         WCB 
##  0.39997334  0.33796843  0.23194849  0.20161238  0.14976852  0.10572211 
##         XFB         XCB         OBH 
##  0.05145673  0.03992281 -0.15640193

sort(pcs1.2$loadings[,2], decreasing=TRUE)

##          XCB          XFB          AUB          WCB          ZYB          JUB 
##  0.838782492  0.756676309  0.704701474  0.696274430  0.609673001  0.559612849 
##           GS          ASB          MAB          MDB          MDH          BBH 
##  0.525063691  0.477764229  0.399237478  0.388318085  0.311515092  0.307577921 
##          NLB          NLH          OBB          NPH          GOL          BNL 
##  0.280781146  0.166565168  0.143609665  0.068862429  0.043694727  0.037157185 
##          BPL          NOL          OBH 
##  0.036484277  0.035068478 -0.009390017

sort(pcs1.2$loadings[,3], decreasing=TRUE)

##         OBH         OBB         NLH         NPH         ZYB         BNL 
##  0.78779150  0.76116381  0.63636579  0.58080960  0.50034744  0.48749196 
##         AUB         JUB         BBH          GS         NOL         GOL 
##  0.43833347  0.42761768  0.40140117  0.35350149  0.26418920  0.26373387 
##         BPL         MAB         XCB         WCB         XFB         ASB 
##  0.21543907  0.18512482  0.08418060  0.06498061  0.05758335  0.03216290 
##         MDH         MDB         NLB 
## -0.11646655 -0.18745763 -0.22492122

ggplot(MG_1.2_pcs, aes(x=RC1, y=RC2, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1.2_sum, size=5)
ggplot(MG_1.2_pcs, aes(x=RC2, y=RC3, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1.2_sum, size=5)
ggplot(MG_1.2_pcs, aes(x=RC1, y=RC3, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1.2_sum, size=5)

morpho1.3<-c1.3[1:24] 
m1.3_sum<-morpho1.3 %>% group_by(Sex) %>% dplyr::summarize(across(GOL:GS, mean)) 
pcs1.3<-principal(scale(c1.3[4:24]), rotate = 'varimax', nfactors=3) 
MG_1.3_pcs<-cbind.data.frame(c1.3$Sex, pcs1.3$scores)
MG_1.3_pcs<-MG_1.3_pcs %>% dplyr::rename(Sex = 'c1.3$Sex')
pc_1.3_sum<-MG_1.3_pcs %>% group_by(Sex) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_1.3_sum

## # A tibble: 2 x 4
##   Sex      RC1    RC2    RC3
## * <fct>  <dbl>  <dbl>  <dbl>
## 1 F     -0.503 -0.518 -0.237
## 2 M      0.493  0.508  0.233

sort(pcs1.3$loadings[,1], decreasing=TRUE)

##       XCB       ZYB       AUB       JUB       XFB       WCB       ASB        GS 
## 0.8569526 0.7625678 0.7587810 0.7243988 0.7051496 0.6697491 0.6369257 0.6325684 
##       MDH       MAB       MDB       GOL       OBB       NLB       NOL       NLH 
## 0.5912441 0.5272812 0.5257578 0.3619664 0.3444638 0.3160304 0.3064650 0.2841031 
##       BBH       NPH       BNL       OBH       BPL 
## 0.2554415 0.2280223 0.2055569 0.1656299 0.1583243

sort(pcs1.3$loadings[,2], decreasing=TRUE)

##         BNL         BPL         NOL         GOL          GS         JUB 
##  0.85565419  0.76513220  0.73784744  0.73034123  0.55311407  0.52105710 
##         OBB         BBH         ZYB         MDB         NLB         NPH 
##  0.48379684  0.45754507  0.43122731  0.41283387  0.40955693  0.36976932 
##         NLH         AUB         MAB         MDH         ASB         WCB 
##  0.33762392  0.31051096  0.30010460  0.27851035  0.23796932  0.16097404 
##         XCB         XFB         OBH 
##  0.12407954  0.10032700 -0.03272567

sort(pcs1.3$loadings[,3], decreasing=TRUE)

##          OBH          NLH          NPH          NOL           GS          OBB 
##  0.772596605  0.771562555  0.768635037  0.397752763  0.396720558  0.389948265 
##          GOL          MAB          BBH          XFB          AUB          ZYB 
##  0.388152828  0.368743129  0.308273452  0.297441716  0.285879047  0.271828321 
##          XCB          JUB          ASB          WCB          BNL          NLB 
##  0.253482802  0.227008945  0.220668558  0.180549263  0.162594563  0.098554223 
##          MDH          BPL          MDB 
##  0.038918464 -0.006231387 -0.117247479

ggplot(MG_1.3_pcs, aes(x=RC1, y=RC2, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse()+ geom_point(data=pc_1.3_sum, size=5)

ggplot(MG_1.3_pcs, aes(x=RC2, y=RC3, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse()+ geom_point(data=pc_1.3_sum, size=5)

ggplot(MG_1.3_pcs, aes(x=RC1, y=RC3, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse()+ geom_point(data=pc_1.3_sum, size=5)

morpho1.4<-c1.4[1:24] 
m1.4_sum<-morpho1.4 %>% group_by(Sex) %>% dplyr::summarize(across(GOL:GS, mean)) 
pcs1.4<-principal(scale(c1.4[4:24]), rotate = 'varimax', nfactors=3) 
pcs1.4 ## examining results of PCA.

## Principal Components Analysis
## Call: principal(r = scale(c1.4[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      RC2  RC1   RC3   h2   u2 com
## GOL 0.88 0.19  0.20 0.85 0.15 1.2
## NOL 0.88 0.13  0.20 0.84 0.16 1.1
## BNL 0.83 0.31 -0.01 0.79 0.21 1.3
## BBH 0.72 0.27  0.26 0.65 0.35 1.6
## XCB 0.08 0.47  0.67 0.68 0.32 1.8
## XFB 0.17 0.36  0.74 0.71 0.29 1.6
## ZYB 0.33 0.84  0.24 0.87 0.13 1.5
## AUB 0.24 0.75  0.37 0.76 0.24 1.7
## WCB 0.14 0.68  0.19 0.52 0.48 1.2
## ASB 0.27 0.45  0.31 0.37 0.63 2.4
## BPL 0.69 0.16 -0.28 0.58 0.42 1.4
## NPH 0.67 0.35  0.04 0.57 0.43 1.5
## NLH 0.60 0.40  0.18 0.55 0.45 2.0
## JUB 0.19 0.84  0.09 0.76 0.24 1.1
## NLB 0.17 0.27 -0.48 0.33 0.67 1.9
## MAB 0.34 0.71 -0.01 0.62 0.38 1.4
## MDH 0.33 0.59 -0.08 0.46 0.54 1.6
## MDB 0.27 0.51  0.05 0.33 0.67 1.5
## OBH 0.23 0.13  0.54 0.36 0.64 1.5
## OBB 0.10 0.61  0.10 0.39 0.61 1.1
## GS  0.72 0.39  0.47 0.89 0.11 2.3
## 
##                        RC2  RC1  RC3
## SS loadings           5.28 5.24 2.35
## Proportion Var        0.25 0.25 0.11
## Cumulative Var        0.25 0.50 0.61
## Proportion Explained  0.41 0.41 0.18
## Cumulative Proportion 0.41 0.82 1.00
## 
## Mean item complexity =  1.6
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.07 
##  with the empirical chi square  242.79  with prob <  2.4e-06 
## 
## Fit based upon off diagonal values = 0.97

MG_1.4_pcs<-cbind.data.frame(c1.4$Sex, pcs1.4$scores)
MG_1.4_pcs<-MG_1.4_pcs %>% dplyr::rename(Sex = 'c1.4$Sex')
pc_1.4_sum<-MG_1.4_pcs %>% group_by(Sex) %>% dplyr::summarize(across(RC2:RC3, mean))
pc_1.4_sum

## # A tibble: 2 x 4
##   Sex      RC2    RC1    RC3
## * <fct>  <dbl>  <dbl>  <dbl>
## 1 F     -0.482 -0.616 -0.106
## 2 M      0.500  0.639  0.110

sort(pcs1.4$loadings[,1], decreasing=TRUE)

##        NOL        GOL        BNL         GS        BBH        BPL        NPH 
## 0.88352094 0.88243888 0.83239525 0.71961894 0.71652584 0.69012610 0.66657444 
##        NLH        MAB        ZYB        MDH        MDB        ASB        AUB 
## 0.60070374 0.34145981 0.32888683 0.32611974 0.26558402 0.26520913 0.24425658 
##        OBH        JUB        NLB        XFB        WCB        OBB        XCB 
## 0.23153874 0.19373810 0.16748331 0.16594146 0.14416740 0.09642814 0.07735697

sort(pcs1.4$loadings[,2], decreasing=TRUE)

##       JUB       ZYB       AUB       MAB       WCB       OBB       MDH       MDB 
## 0.8433597 0.8426717 0.7474713 0.7077281 0.6796706 0.6095263 0.5860515 0.5051223 
##       XCB       ASB       NLH        GS       XFB       NPH       BNL       BBH 
## 0.4714933 0.4545556 0.3998124 0.3873867 0.3589494 0.3540436 0.3073552 0.2746377 
##       NLB       GOL       BPL       OBH       NOL 
## 0.2724973 0.1867217 0.1554656 0.1336713 0.1305433

sort(pcs1.4$loadings[,3], decreasing=TRUE)

##          XFB          XCB          OBH           GS          AUB          ASB 
##  0.741299957  0.668882512  0.538768118  0.470106979  0.370171463  0.309895817 
##          BBH          ZYB          NOL          GOL          WCB          NLH 
##  0.255714622  0.235832910  0.200202507  0.199507869  0.185490316  0.180922192 
##          OBB          JUB          MDB          NPH          BNL          MAB 
##  0.102416784  0.091341239  0.048938486  0.038599569 -0.008985247 -0.009169799 
##          MDH          BPL          NLB 
## -0.075796909 -0.282484623 -0.477128605

ggplot(MG_1.4_pcs, aes(x=RC2, y=RC1, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse()+ geom_point(data=pc_1.4_sum, size=5)

ggplot(MG_1.4_pcs, aes(x=RC1, y=RC3, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse()+ geom_point(data=pc_1.4_sum, size=5)

ggplot(MG_1.4_pcs, aes(x=RC2, y=RC3, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse()+ geom_point(data=pc_1.4_sum, size=5)

morpho1.5<-c1.5[1:24] 
m1.5_sum<-morpho1.5 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) 
m1.5_dsq<-dist(scale(m1.5_sum[2:22]))
m1.5_hc<-hclust(m1.5_dsq, method='ward.D2') 
NbClust(data=as.numeric(scale(m1.5_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15)

## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##

## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]

## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]

## ******************************************************************* 
## * Among all indices:                                                
## * 2 proposed 4 as the best number of clusters 
## * 2 proposed 5 as the best number of clusters 
## * 2 proposed 15 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  4 
##  
##  
## *******************************************************************

## $All.index
##         KL        CH Hartigan    CCC    Scott Marriot TrCovW TraceW  Friedman
## 2   5.1347  168.2809  40.8142 3.2315 115.5634 26.8328     NA 6.7082    5.2610
## 3   0.8018  158.2078  97.3233 1.1694 155.1729 32.1957     NA 3.5773   10.7407
## 4   2.0387  303.8442  15.7703 2.1936 205.9802 25.5524     NA 1.5970   25.2989
## 5  16.5975  287.7756  24.5800 3.2948 249.6928 19.9488     NA 0.7980   51.6348
## 6   0.0474  326.9660  44.7529 2.7679 270.5103 20.6429     NA 0.5734   72.2454
## 7   1.3653  485.1944  38.7657 3.3276 299.4914 17.7371     NA 0.3620  115.0281
## 8   1.4243  696.6437  32.8413 4.0144 327.2821 14.9035     NA 0.2329  179.3599
## 9   1.3155  959.8726  29.2393 4.8224 354.4560 12.2538     NA 0.1513  276.6291
## 10 15.0481 1294.0453  12.2072 5.4004 377.5266 10.4893     NA 0.1049  399.4085
## 11  0.3235 1407.0482  13.2730 5.2714 391.4543 10.1747     NA 0.0841  498.4765
## 12  0.2822 1575.9394  23.2631 5.4965 408.3004  9.2676     NA 0.0644  651.5962
## 13  0.9886 2064.2090  26.6533 5.9711 427.0863  8.0722     NA 0.0478  878.3188
## 14  1.8451 2864.7271  19.1366 6.8162 449.3080  6.5792     NA 0.0336 1250.2182
## 15  0.9147 3624.8390  23.0218 7.1522 465.2214  5.8668     NA 0.0261 1609.7694
##        Rubin Cindex     DB Silhouette   Duda Pseudot2  Beale Ratkowsky   Ball
## 2     6.2610 0.3385 0.4262     0.7250 0.2861  82.3592 0.8074    0.6058 5.5870
## 3    11.7407 0.3499 0.5247     0.6374 0.1475 150.3148 1.8557    0.5293 2.2316
## 4    26.2989 0.2643 0.4782     0.6531 0.2148  65.8133 1.1546    0.4846 0.6383
## 5    52.6348 0.2864 0.4636     0.6726 0.2805  30.7838 0.7893    0.4364 0.4029
## 6    73.2454 0.3357 0.4503     0.6774 0.1074 108.0424 2.5724    0.4013 0.2358
## 7   116.0281 0.2900 0.4173     0.6964 0.1626  51.5114 1.5610    0.3744 0.1132
## 8   180.3599 0.2555 0.3903     0.7194 0.0482 157.9881 5.8514    0.3516 0.0586
## 9   277.6291 0.2114 0.3137     0.7539 0.2793  15.4791 0.7371    0.3322 0.0326
## 10  400.4085 0.3028 0.3193     0.7580 0.3171  25.8419 0.6626    0.3155 0.0190
## 11  499.4765 0.2579 0.3642     0.7068 0.1051  42.5588 2.3644    0.3010 0.0141
## 12  652.5962 0.2302 0.3299     0.7180 0.3367  11.8215 0.5629    0.2883 0.0103
## 13  879.3188 0.2221 0.3288     0.7157 0.1896  17.0971 1.1398    0.2771 0.0065
## 14 1251.2182 0.2808 0.3278     0.7218 0.2152  21.8758 1.0417    0.2671 0.0039
## 15 1610.7694 0.2514 0.3127     0.7447 0.2481  12.1209 0.8081    0.2581 0.0026
##    Ptbiserial   Frey McClain   Dunn Hubert SDindex Dindex   SDbw
## 2      0.8292 2.1573  0.2572 0.3117 0.0250  8.3991 0.2871 0.3705
## 3      0.7084 1.0053  0.4210 0.1208 0.0272  7.2778 0.2086 0.3526
## 4      0.6401 0.8568  0.4817 0.1408 0.0301  5.9975 0.1261 0.2564
## 5      0.5951 1.2161  0.4676 0.2183 0.0302  7.1007 0.0926 0.1274
## 6      0.5680 1.2034  0.4717 0.2961 0.0303 10.1268 0.0809 0.0453
## 7      0.5256 1.0554  0.4677 0.3282 0.0311 11.4363 0.0596 0.0234
## 8      0.4969 1.0225  0.4446 0.2999 0.0311 13.5503 0.0471 0.0168
## 9      0.4756 0.9280  0.4174 0.2999 0.0312 14.9004 0.0359 0.0123
## 10     0.4647 3.4491  0.3958 0.4845 0.0312 16.4987 0.0318 0.0020
## 11     0.4072 1.0773  0.4580 0.1624 0.0314 35.2758 0.0269 0.0078
## 12     0.3957 1.1106  0.4395 0.1624 0.0314 35.8182 0.0226 0.0063
## 13     0.3812 1.0770  0.4145 0.1823 0.0315 35.2663 0.0203 0.0061
## 14     0.3699 1.5393  0.3901 0.2637 0.0315 36.9321 0.0175 0.0062
## 15     0.3534 1.4980  0.3743 0.2735 0.0315 45.2984 0.0157 0.0029
## 
## $All.CriticalValues
##    CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2          0.0304          1050.8179       0.3754
## 3         -0.0088         -2964.0492       0.1848
## 4         -0.0770          -251.6212       0.2968
## 5         -0.1630           -85.6052       0.3918
## 6         -0.1452          -102.5426       0.1327
## 7         -0.2052           -58.7306       0.2400
## 8         -0.2595           -38.8310       0.0419
## 9         -0.3330           -24.0181       0.4236
## 10        -0.1630           -85.6052       0.4315
## 11        -0.3811           -18.1206       0.1847
## 12        -0.3330           -24.0181       0.4815
## 13        -0.4407           -13.0759       0.3458
## 14        -0.3330           -24.0181       0.3468
## 15        -0.4407           -13.0759       0.4195
## 
## $Best.nc
##                      KL       CH Hartigan     CCC   Scott Marriot TrCovW
## Number_clusters  5.0000   15.000    4.000 15.0000  4.0000  5.0000   -Inf
## Value_Index     16.5975 3624.839   81.553  7.1522 50.8073  6.2978      4
##                  TraceW Friedman    Rubin  Cindex      DB Silhouette   Duda
## Number_clusters  1.1812 371.8994 -23.7113  0.2114  0.3127      0.758 0.2861
## Value_Index     14.0000  10.0000   9.0000 15.0000 10.0000      2.000 2.0000
##                 PseudoT2  Beale Ratkowsky   Ball PtBiserial   Frey McClain
## Number_clusters  82.3592 0.8074    0.6058 3.3555     0.8292 1.0053  0.2572
## Value_Index       2.0000 2.0000    3.0000 2.0000     3.0000 2.0000 10.0000
##                   Dunn Hubert SDindex Dindex  SDbw
## Number_clusters 0.4845      0  5.9975      0 0.002
## Value_Index     0.0000      4  0.0000     10 5.000
## 
## $Best.partition
##  [1] 1 2 2 1 2 2 1 2 3 1 2 3 2 1 3 4 4 3 1 2 3 4 4 3 4 4 3 1 2 2 2 3 1 3 1 2 2 1
## [39] 2 4 4 3 4 3 4 4 4 3 1 3 2 1 2 2 2 1 2 2 1 3 4 4 3

m1.5_labs<-m1.5_sum$Population
plot(m1.5_hc, labels=m1.5_labs, hang = -1, cex=0.8)

pcs1.5<-principal(scale(c1.5[4:24]), rotate = 'varimax', nfactors=3) 
MG_1.5_pcs<-cbind.data.frame(c1.5$Population, pcs1.5$scores)
MG_1.5_pcs<-MG_1.5_pcs %>% dplyr::rename(Population = 'c1.5$Population')
pc_1.5_sum<-MG_1.5_pcs %>% group_by(Population) %>% dplyr::summarize(across(RC1:RC2, mean))
pc_1.5_sum

## # A tibble: 3 x 4
##   Population    RC1     RC3    RC2
## * <fct>       <dbl>   <dbl>  <dbl>
## 1 MORIORI    -0.107 -0.0659  0.108
## 2 N MAORI     0.824 -0.106  -0.894
## 3 S MAORI     0.331  0.817  -0.274

sort(pcs1.5$loadings[,1], decreasing=TRUE)

##        BNL        BPL        MDB        GOL        NLB        JUB        NOL 
## 0.73863146 0.72671305 0.67934410 0.66737592 0.65466778 0.65073261 0.63948657 
##        ZYB        OBB        MDH        ASB        MAB         GS        AUB 
## 0.59903467 0.55159083 0.53741862 0.49723438 0.48100058 0.45546100 0.40367782 
##        BBH        WCB        NLH        NPH        XCB        XFB        OBH 
## 0.37603437 0.30623211 0.20375665 0.18103168 0.10779146 0.09462537 0.06560257

sort(pcs1.5$loadings[,2], decreasing=TRUE)

##         XFB         XCB          GS         BBH         WCB         AUB 
## 0.832474292 0.829104663 0.812799710 0.698573970 0.636846141 0.631769246 
##         MAB         ZYB         GOL         NOL         ASB         JUB 
## 0.541806980 0.539588640 0.517410528 0.487776427 0.455489599 0.452566071 
##         NPH         MDH         MDB         NLH         BNL         OBB 
## 0.370534924 0.356620660 0.301312770 0.272262997 0.239486639 0.138354536 
##         BPL         NLB         OBH 
## 0.134186889 0.060477837 0.005274038

sort(pcs1.5$loadings[,3], decreasing=TRUE)

##         NLH         OBH         NPH         OBB         AUB         JUB 
##  0.81034885  0.80776066  0.72563155  0.54491372  0.39992916  0.35891736 
##         ZYB         XCB         WCB         NOL         BNL         GOL 
##  0.32123706  0.30421896  0.28600926  0.27926312  0.23850022  0.20245430 
##         MDH         MAB         XFB          GS         MDB         BPL 
##  0.19745140  0.19302341  0.16666544  0.16097315  0.12804980  0.08300629 
##         NLB         ASB         BBH 
##  0.04644679  0.01800660 -0.08538726

ggplot(MG_1.5_pcs, aes(x=RC1, y=RC3, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() +geom_point(data=pc_1.5_sum, size=5)
ggplot(MG_1.5_pcs, aes(x=RC3, y=RC2, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() +geom_point(data=pc_1.5_sum, size=5)
ggplot(MG_1.5_pcs, aes(x=RC1, y=RC2, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() +geom_point(data=pc_1.5_sum, size=5)

m1.5_sex<-morpho1.5 %>% group_by(Sex) %>% dplyr::summarize(across(GOL:GS, mean))
MG_1.5_sex_pcs<-cbind.data.frame(c1.5$Sex, pcs1.5$scores)
MG_1.5_sex_pcs<-MG_1.5_sex_pcs %>% dplyr::rename(Sex = 'c1.5$Sex')
pc_1.5_sex_sum<-MG_1.5_sex_pcs %>% group_by(Sex) %>% dplyr::summarize(across(RC1:RC2, mean))
pc_1.5_sex_sum

## # A tibble: 2 x 4
##   Sex      RC1    RC3    RC2
## * <fct>  <dbl>  <dbl>  <dbl>
## 1 F     -0.758 -0.450 -0.259
## 2 M      0.502  0.298  0.172

ggplot(MG_1.5_sex_pcs, aes(x=RC1, y=RC3, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() +geom_point(data=pc_1.5_sex_sum, size=5)
ggplot(MG_1.5_sex_pcs, aes(x=RC3, y=RC2, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() +geom_point(data=pc_1.5_sex_sum, size=5)
ggplot(MG_1.5_sex_pcs, aes(x=RC1, y=RC2, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() +geom_point(data=pc_1.5_sex_sum, size=5)

morpho2<-c2[2:25] 
m2_sum<-morpho2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) 
pcs2<-principal(scale(morpho2[4:24]), rotate = 'varimax', nfactors=3)
pcs2

## Principal Components Analysis
## Call: principal(r = scale(morpho2[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       RC2   RC3   RC1   h2   u2 com
## GOL  0.89  0.22 -0.01 0.85 0.15 1.1
## NOL  0.89  0.21  0.03 0.84 0.16 1.1
## BNL  0.63  0.29  0.44 0.68 0.32 2.2
## BBH -0.07  0.44  0.59 0.55 0.45 1.9
## XCB  0.03  0.78  0.22 0.65 0.35 1.2
## XFB  0.25  0.60  0.25 0.48 0.52 1.7
## ZYB  0.38  0.59  0.49 0.73 0.27 2.7
## AUB  0.35  0.66  0.39 0.71 0.29 2.2
## WCB  0.48  0.42  0.31 0.50 0.50 2.7
## ASB  0.71  0.37 -0.23 0.70 0.30 1.8
## BPL  0.48  0.20  0.34 0.39 0.61 2.2
## NPH  0.21  0.19  0.79 0.71 0.29 1.3
## NLH  0.15  0.14  0.87 0.79 0.21 1.1
## JUB  0.43  0.54  0.50 0.73 0.27 2.9
## NLB  0.72 -0.04  0.02 0.52 0.48 1.0
## MAB  0.33  0.42  0.45 0.48 0.52 2.8
## MDH  0.13  0.71 -0.03 0.52 0.48 1.1
## MDB  0.14  0.69 -0.01 0.49 0.51 1.1
## OBH -0.12 -0.03  0.76 0.60 0.40 1.1
## OBB  0.71  0.18  0.28 0.62 0.38 1.4
## GS   0.43  0.64  0.38 0.74 0.26 2.4
## 
##                        RC2  RC3  RC1
## SS loadings           4.91 4.42 3.95
## Proportion Var        0.23 0.21 0.19
## Cumulative Var        0.23 0.44 0.63
## Proportion Explained  0.37 0.33 0.30
## Cumulative Proportion 0.37 0.70 1.00
## 
## Mean item complexity =  1.8
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.08 
##  with the empirical chi square  415.94  with prob <  7.6e-27 
## 
## Fit based upon off diagonal values = 0.97

MG_2<-as.factor(c2$Population)
MG_2_pcs<-cbind.data.frame(MG_2, pcs2$scores)
MG_2_pcs<-MG_2_pcs %>% dplyr::rename(Population = 'MG_2')
pc_2_sum<-as.data.frame(MG_2_pcs) %>% group_by(Population) %>% dplyr::summarize(across(RC2:RC1, mean))
pc_2_sum

## # A tibble: 2 x 4
##   Population    RC2    RC3    RC1
## * <fct>       <dbl>  <dbl>  <dbl>
## 1 ANDAMAN    -0.793  0.295  0.491
## 2 BUSHMAN     0.617 -0.229 -0.382

sort(pcs2$loadings[,1], decreasing=TRUE)

##         GOL         NOL         NLB         OBB         ASB         BNL 
##  0.89179286  0.88935805  0.72267645  0.71462988  0.71115150  0.63140916 
##         BPL         WCB          GS         JUB         ZYB         AUB 
##  0.48246877  0.47577743  0.42909472  0.42738850  0.37786946  0.34565047 
##         MAB         XFB         NPH         NLH         MDB         MDH 
##  0.33377389  0.24709220  0.20586389  0.14868990  0.14203667  0.12747957 
##         XCB         BBH         OBH 
##  0.03177141 -0.07348742 -0.12490433

sort(pcs2$loadings[,2], decreasing=TRUE)

##         XCB         MDH         MDB         AUB          GS         XFB 
##  0.77591046  0.71085588  0.68825670  0.66136670  0.64307902  0.59646926 
##         ZYB         JUB         BBH         WCB         MAB         ASB 
##  0.58850821  0.54180861  0.44275489  0.41746955  0.41544908  0.37123435 
##         BNL         GOL         NOL         BPL         NPH         OBB 
##  0.28788727  0.22382599  0.20914885  0.20265335  0.19101062  0.17864474 
##         NLH         OBH         NLB 
##  0.13725060 -0.02636489 -0.04166433

sort(pcs2$loadings[,3], decreasing=TRUE)

##          NLH          NPH          OBH          BBH          JUB          ZYB 
##  0.866793686  0.794132672  0.761480936  0.594093380  0.503441231  0.492616386 
##          MAB          BNL          AUB           GS          BPL          WCB 
##  0.447372792  0.440245355  0.387350797  0.377482758  0.342553158  0.307692960 
##          OBB          XFB          XCB          NOL          NLB          MDB 
##  0.275402219  0.254212263  0.217638977  0.025209638  0.019524569 -0.008901311 
##          GOL          MDH          ASB 
## -0.010160642 -0.027775512 -0.234102974

ggplot(MG_2_pcs, aes(x=RC2, y=RC3, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_2_sum, size=5)

ggplot(MG_2_pcs, aes(x=RC2, y=RC1, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_2_sum, size=5)

ggplot(MG_2_pcs, aes(x=RC3, y=RC1, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_2_sum, size=5)

MG_2_sex<-as.factor(c2$Sex)
MG_2_pcs_sex<-cbind.data.frame(MG_2_sex, pcs2$scores)
MG_2_pcs_sex<-MG_2_pcs_sex %>% dplyr::rename(Sex = 'MG_2_sex')
pc_2_sum_sex<-as.data.frame(MG_2_pcs_sex) %>% group_by(Sex) %>% dplyr::summarize(across(RC2:RC1, mean))
pc_2_sum_sex

## # A tibble: 2 x 4
##   Sex      RC2    RC3    RC1
## * <fct>  <dbl>  <dbl>  <dbl>
## 1 F     -0.286 -0.538 -0.178
## 2 M      0.316  0.595  0.197

ggplot(MG_2_pcs_sex, aes(x=RC2, y=RC3, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_2_sum_sex, size=5)

ggplot(MG_2_pcs_sex, aes(x=RC2, y=RC1, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_2_sum_sex, size=5)

ggplot(MG_2_pcs_sex, aes(x=RC3, y=RC1, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_2_sum_sex, size=5)

morpho3<-c3[2:25] 
m3_sum<-morpho3 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) 
m3_dsq<-dist(scale(m3_sum[2:22]))
m3_hc<-hclust(m3_dsq, method='ward.D2') 
NbClust(data=as.numeric(scale(m3_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15)

## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##

## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]

## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]

## ******************************************************************* 
## * Among all indices:                                                
## * 3 proposed 3 as the best number of clusters 
## * 2 proposed 5 as the best number of clusters 
## * 1 proposed 15 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  3 
##  
##  
## *******************************************************************

## $All.index
##         KL        CH Hartigan      CCC     Scott  Marriot TrCovW   TraceW
## 2   0.6477  187.0039  97.5637 -10.4756  297.9688 583.3335     NA 145.8334
## 3   3.7835  167.5075 356.7525  -7.7663  608.6537 549.7280     NA  61.0809
## 4   0.3026  342.1599 610.4937 -10.6240  734.3284 687.2908     NA  42.9557
## 5  20.3200  850.6365 136.9105  -9.2474  924.2592 630.8232     NA  25.2329
## 6   0.1607  969.8126  40.3839  -8.2605 1079.4542 588.1306     NA  16.3370
## 7   7.6750  905.3052 118.1702  -9.7776 1150.9825 655.1674     NA  13.3708
## 8   0.1323 1051.8355  24.5909 -10.0586 1240.4988 665.9454     NA  10.4054
## 9   6.6750  985.4475  91.7895  -9.3661 1344.8620 629.1942     NA   7.7678
## 10  0.1500 1113.9859  15.6844  -9.7756 1411.6006 644.3346     NA   6.4433
## 11  1.3710 1046.4481  28.5598  -9.4340 1491.2156 623.7983     NA   5.1554
## 12  3.0160 1029.4532  72.4018  -9.6097 1551.3285 627.3261     NA   4.3564
## 13  1.1102 1144.4100  44.1594  -9.3767 1617.4079 611.8306     NA   3.6203
## 14  0.4409 1191.9081  27.9724  -9.2370 1676.8452 600.7509     NA   3.0651
## 15  0.8978 1195.5338  73.9191  -8.8531 1739.1889 579.1340     NA   2.5739
##    Friedman    Rubin Cindex     DB Silhouette   Duda Pseudot2  Beale Ratkowsky
## 2    1.3040   2.3040 0.2488 0.7386     0.4945 0.3021 570.5103 0.7668    0.4153
## 3    4.5009   5.5009 0.2338 0.6840     0.5271 0.2568 306.7924 0.9557    0.4026
## 4    6.8220   7.8220 0.2399 0.6121     0.4948 0.2458 512.5322 1.0169    0.4313
## 5   12.3159  13.3159 0.1630 0.6288     0.5325 0.3258 161.3743 0.6809    0.4257
## 6   19.5669  20.5669 0.1933 0.5751     0.5530 0.2725 283.0255 0.8817    0.3942
## 7   24.1295  25.1295 0.1691 0.5858     0.5343 0.2785  90.6723 0.8396    0.3663
## 8   31.2909  32.2909 0.2204 0.5799     0.5295 0.2328 214.1922 1.0818    0.3455
## 9   42.2553  43.2553 0.2008 0.5731     0.5290 0.3847 110.3797 0.5256    0.3262
## 10  51.1468  52.1468 0.1777 0.5789     0.5272 0.2413 185.5434 1.0308    0.3109
## 11  64.1749  65.1749 0.1532 0.5709     0.5386 0.2118  52.1113 1.1580    0.2966
## 12  76.1274  77.1274 0.1725 0.5377     0.5455 0.1459  64.3991 1.7889    0.2844
## 13  91.8100  92.8100 0.2361 0.5121     0.5488 0.2545 149.3607 0.9574    0.2739
## 14 108.6228 109.6228 0.2165 0.5159     0.5522 0.2479 160.8244 0.9927    0.2644
## 15 129.5397 130.5397 0.1942 0.5126     0.5431 0.3469  52.7058 0.6058    0.2556
##        Ball Ptbiserial   Frey McClain   Dunn Hubert SDindex Dindex   SDbw
## 2  110.0361     0.5079 0.3874  0.3452 0.0059 0.0030  8.0708 0.5287 1.8614
## 3   57.5430     0.6003 0.7133  0.5380 0.0092 0.0041  5.3369 0.3429 0.7517
## 4   21.4951     0.5918 0.7867  0.5867 0.0106 0.0048  5.0758 0.2827 0.5155
## 5    6.3002     0.5375 0.1772  0.7286 0.0106 0.0048  5.5829 0.2054 0.3123
## 6    3.7800     0.5420 1.8833  0.6727 0.0149 0.0053  5.3629 0.1789 0.2244
## 7    2.9057     0.4798 0.3471  0.8358 0.0149 0.0053 10.1160 0.1554 0.2437
## 8    1.9007     0.4769 0.6583  0.8182 0.0208 0.0055  9.8096 0.1390 0.1920
## 9    1.5783     0.4576 0.8596  0.8201 0.0220 0.0056  9.5170 0.1189 0.1451
## 10   1.1240     0.4365 0.7274  0.8405 0.0220 0.0057 10.1393 0.1077 0.1447
## 11   0.9776     0.4163 0.2163  0.8395 0.0220 0.0057 10.9061 0.0945 0.1303
## 12   0.8278     0.4159 0.2137  0.8277 0.0254 0.0057 10.7979 0.0899 0.0990
## 13   0.6316     0.4155 1.2317  0.8186 0.0353 0.0058 10.6204 0.0849 0.0785
## 14   0.5198     0.3954 1.1724  0.8545 0.0353 0.0058 15.8639 0.0773 0.0676
## 15   0.4485     0.3718 0.6708  0.8963 0.0325 0.0058 18.1104 0.0696 0.0584
## 
## $All.CriticalValues
##    CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2          0.2386           788.3856       0.3821
## 3          0.1739           503.7204       0.3305
## 4          0.2119           621.2252       0.3147
## 5          0.1432           466.8213       0.4118
## 6          0.1739           503.7204       0.3499
## 7          0.0396           849.4841       0.3658
## 8          0.1227           464.5373       0.3022
## 9          0.1296           463.3035       0.4709
## 10         0.1112           471.6144       0.3141
## 11        -0.1290          -122.4991       0.3001
## 12        -0.1829           -71.1410       0.2081
## 13         0.0928           498.4066       0.3325
## 14         0.0978           488.9659       0.3236
## 15         0.0038          7394.6572       0.4429
## 
## $Best.nc
##                    KL       CH Hartigan     CCC    Scott  Marriot TrCovW
## Number_clusters  5.00   15.000   5.0000  3.0000   3.0000   3.0000   -Inf
## Value_Index     20.32 1195.534 473.5832 -7.7663 310.6849 171.1684      3
##                  TraceW Friedman   Rubin  Cindex     DB Silhouette   Duda
## Number_clusters 66.6273  20.9169 -2.6883  0.1532 0.5121      0.553 0.3021
## Value_Index     15.0000   6.0000 11.0000 13.0000 6.0000      2.000 2.0000
##                 PseudoT2  Beale Ratkowsky    Ball PtBiserial Frey McClain
## Number_clusters 570.5103 0.7668    0.4313 52.4931     0.6003   NA  0.3452
## Value_Index       2.0000 4.0000    3.0000  3.0000     1.0000    2 13.0000
##                   Dunn Hubert SDindex Dindex   SDbw
## Number_clusters 0.0353      0  5.0758      0 0.0584
## Value_Index     0.0000      4  0.0000     15 5.0000
## 
## $Best.partition
##   [1] 1 2 3 1 2 3 2 1 3 2 1 1 3 1 1 3 3 1 2 3 2 2 3 2 1 3 2 1 1 1 1 1 3 3 3 1 3
##  [38] 2 1 1 1 3 3 2 2 3 1 1 1 3 3 1 3 2 2 1 1 3 3 1 2 2 3 1 2 1 3 1 3 1 2 3 1 1
##  [75] 1 3 3 1 1 1 1 2 2 3 1 3 1 2 3 1 1 1 1 3 1 1 1 2 2 2 3 1 3 1 3 1 2 2 1 3 1
## [112] 1 3 1 1 2 1 1 2 3 1 1 3 2 2 1 1 1 1 3 1 1 2 1 1 2 3 3 1 3 2 2 3 3 1 1 3 1
## [149] 1 2 1 3 1 1 1 1 3 2 1 2 1 3 1 3 1 1 2 1 3 2 1 2 3 2 1 2 1 1 1 2 1 1 3 1 3
## [186] 1 3 3 2 2 1 2 1 3 3 1 1 1 3 2 2 1 1 1 3 1 1 1 2 3 3 3 3 1 1 3 2 2 2 1 1 3
## [223] 1 3 1 2 2 3 3 1 2 1 1 1 1 1 1 1 1 1 3 1 3 2 1 1 2 2 2 1 3 3 3 2 3 3 1 3 1
## [260] 1 2 1 3 2 1 3 3 3 2 1 1 1 1 2 1 1 1 3 1 3 3 3 1 3 2 2 1 3 1 1 2 1 3 2 1 1
## [297] 1 3 1 3 1 1 2 3 3 2 3 1 1 1 1 1 1 3 1 3 3 3 2 1 2 2 1 3 2 3 1 1 1 2 1 3 2
## [334] 1 1 3 1 3 1 3 1 1 1 3 2 3 1 3 3 2 1 3 1 2 2 3 1

m3_labs<-m3_sum$Population
plot(m3_hc, labels=m3_labs, hang=-1, cex=0.8)
hc_morph_3<-rect.hclust(m3_hc, k=3, border=2:7)

morphogroup_3<-cutree(m3_hc, k=3) 
m3_sum<-m3_sum %>% mutate(cluster=morphogroup_3) 
m3_sum$cluster<-as.factor(m3_sum$cluster) 
m3.1<-m3_sum %>% filter(cluster==1) %>% droplevels() 
m3.2<-m3_sum %>% filter(cluster==2) %>% droplevels() 
m3.3<-m3_sum %>% filter(cluster==3) %>% droplevels() 

c3.1d<-m3.1$Population 
c3.1<-morpho3 %>% filter(Population %in% c3.1d) %>% droplevels() 
c3.1$Morphogroup=3.1 
c3.1$Morphogroup <- as.factor(c3.1$Morphogroup) 

c3.2d<-m3.2$Population  
c3.2<-morpho3 %>% filter(Population %in% c3.2d) %>% droplevels() 
c3.2$Morphogroup=3.2 
c3.2$Morphogroup <- as.factor(c3.2$Morphogroup) 

c3.3d<-m3.3$Population 
c3.3<-morpho3 %>% filter(Population %in% c3.3d) %>% droplevels() 
c3.3$Morphogroup=3.3 
c3.3$Morphogroup <- as.factor(c3.3$Morphogroup) 

hwl_c3.1<-rbind(c3.1, c3.2) 
hwl_c3<-rbind(hwl_c3.1, c3.3) 
head(hwl_c3)

## # A tibble: 6 x 25
##   Sex   PopNum Population   GOL   NOL   BNL   BBH   XCB   XFB   ZYB   AUB   WCB
##   <fct> <fct>  <fct>      <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 M     13     ARIKARA      178   177   101   131   140   112   133   126    70
## 2 M     13     ARIKARA      172   171   103   131   136   114   140   129    76
## 3 M     13     ARIKARA      188   186   103   130   137   113   140   128    79
## 4 M     13     ARIKARA      180   178   103   133   144   117   145   137    76
## 5 M     13     ARIKARA      182   181   104   134   135   115   141   128    75
## 6 M     13     ARIKARA      186   183   105   131   143   116   139   129    67
## # … with 13 more variables: ASB <int>, BPL <int>, NPH <int>, NLH <int>,
## #   JUB <int>, NLB <int>, MAB <int>, MDH <int>, MDB <int>, OBH <int>,
## #   OBB <int>, GS <int>, Morphogroup <fct>

describeBy(hwl_c3, group='Morphogroup')

## 
##  Descriptive statistics by group 
## Morphogroup: 3.1
##              vars   n       mean        sd  median    trimmed       mad     min
## Sex*            1 330       1.58      0.49       2       1.60      0.00       1
## PopNum*         2 330       2.57      1.08       3       2.59      1.48       1
## Population*     3 330       2.61      1.10       3       2.63      1.48       1
## GOL             4 330     176.55      7.59     176     176.53      7.41     157
## NOL             5 330     174.71      7.13     175     174.69      7.41     156
## BNL             6 330      99.05      4.81      99      99.05      5.93      85
## BBH             7 330     133.79      5.90     134     133.82      5.93     118
## XCB             8 330     137.72      5.48     138     137.60      5.93     124
## XFB             9 330     113.92      5.12     114     113.95      4.45     101
## ZYB            10 330     131.79      7.12     132     131.74      7.41     114
## AUB            11 330     122.48      6.24     123     122.28      5.93     109
## WCB            12 330      73.01      4.00      73      72.92      4.45      63
## ASB            13 330     106.26      5.13     106     106.08      4.45      93
## BPL            14 330      96.45      5.35      96      96.47      5.93      81
## NPH            15 330      68.57      4.36      69      68.53      4.45      54
## NLH            16 330      51.53      3.10      51      51.48      2.97      42
## JUB            17 330     116.15      5.90     116     116.18      5.93      99
## NLB            18 330      26.21      1.88      26      26.19      1.48      21
## MAB            19 330      64.70      3.85      65      64.56      4.45      55
## MDH            20 330      27.95      3.61      28      27.98      4.45      19
## MDB            21 330      11.83      1.76      12      11.79      1.48       7
## OBH            22 330      34.12      1.83      34      34.09      1.48      29
## OBB            23 330      38.92      1.76      39      38.92      1.48      35
## GS             24 330 3260311.60 324988.94 3251316 3252909.63 346016.60 2565120
## Morphogroup*   25 330       1.00      0.00       1       1.00      0.00       1
##                  max   range  skew kurtosis       se
## Sex*               2       1 -0.33    -1.90     0.03
## PopNum*            4       3 -0.08    -1.28     0.06
## Population*        4       3 -0.12    -1.32     0.06
## GOL              195      38  0.03    -0.56     0.42
## NOL              192      36  0.04    -0.46     0.39
## BNL              110      25 -0.03    -0.54     0.26
## BBH              148      30 -0.05    -0.53     0.32
## XCB              153      29  0.16    -0.25     0.30
## XFB              128      27 -0.05    -0.17     0.28
## ZYB              151      37  0.07    -0.31     0.39
## AUB              143      34  0.33     0.01     0.34
## WCB               87      24  0.27     0.02     0.22
## ASB              120      27  0.27    -0.09     0.28
## BPL              110      29  0.00    -0.17     0.29
## NPH               81      27  0.07     0.31     0.24
## NLH               61      19  0.19    -0.05     0.17
## JUB              132      33 -0.02    -0.43     0.32
## NLB               32      11  0.02     0.21     0.10
## MAB               78      23  0.34    -0.11     0.21
## MDH               39      20 -0.03    -0.50     0.20
## MDB               18      11  0.27     0.08     0.10
## OBH               41      12  0.06     0.12     0.10
## OBB               43       8  0.02    -0.57     0.10
## GS           4083345 1518225  0.19    -0.44 17890.05
## Morphogroup*       1       0   NaN      NaN     0.00
## ------------------------------------------------------------ 
## Morphogroup: 3.2
##              vars   n       mean        sd  median    trimmed       mad     min
## Sex*            1 687       1.52      0.50       2       1.52      0.00       1
## PopNum*         2 687       7.86      2.01       8       7.82      2.97       5
## Population*     3 687       8.20      1.85       8       8.21      2.97       5
## GOL             4 687     178.20      8.10     178     178.11      7.41     155
## NOL             5 687     176.26      7.82     176     176.18      7.41     153
## BNL             6 687      96.89      5.01      97      96.82      5.93      83
## BBH             7 687     129.06      5.75     129     128.88      5.93     115
## XCB             8 687     138.66      5.82     138     138.43      5.93     123
## XFB             9 687     115.49      6.12     115     115.27      5.93      99
## ZYB            10 687     129.44      6.58     129     129.35      7.41     113
## AUB            11 687     120.86      5.92     121     120.82      5.93     104
## WCB            12 687      71.03      4.38      71      70.97      4.45      61
## ASB            13 687     108.48      5.01     108     108.47      5.93      95
## BPL            14 687      94.02      5.45      94      93.98      5.93      80
## NPH            15 687      66.00      4.40      66      66.03      4.45      52
## NLH            16 687      49.75      3.10      50      49.73      2.97      41
## JUB            17 687     112.82      5.28     113     112.72      5.93      99
## NLB            18 687      24.82      1.84      25      24.78      1.48      19
## MAB            19 687      62.61      3.72      62      62.44      4.45      54
## MDH            20 687      27.50      3.45      27      27.46      2.97      18
## MDB            21 687      12.29      1.79      12      12.23      1.48       7
## OBH            22 687      33.48      1.97      33      33.53      1.48      27
## OBB            23 687      38.90      1.82      39      38.89      1.48      33
## GS             24 687 3196261.58 328110.57 3170440 3183918.88 338732.59 2437344
## Morphogroup*   25 687       2.00      0.00       2       2.00      0.00       2
##                  max   range  skew kurtosis       se
## Sex*               2       1 -0.08    -2.00     0.02
## PopNum*           11       6  0.14    -1.17     0.08
## Population*       11       6 -0.02    -1.15     0.07
## GOL              201      46  0.10    -0.16     0.31
## NOL              199      46  0.08    -0.07     0.30
## BNL              112      29  0.11    -0.20     0.19
## BBH              146      31  0.26    -0.28     0.22
## XCB              161      38  0.42     0.38     0.22
## XFB              135      36  0.36     0.02     0.23
## ZYB              149      36  0.14    -0.44     0.25
## AUB              140      36  0.11    -0.05     0.23
## WCB               85      24  0.15    -0.17     0.17
## ASB              127      32  0.06    -0.05     0.19
## BPL              114      34  0.12    -0.01     0.21
## NPH               79      27 -0.06    -0.05     0.17
## NLH               59      18  0.03    -0.14     0.12
## JUB              127      28  0.15    -0.35     0.20
## NLB               31      12  0.31     0.34     0.07
## MAB               74      20  0.38    -0.04     0.14
## MDH               38      20  0.14    -0.11     0.13
## MDB               20      13  0.31     0.59     0.07
## OBH               39      12 -0.17     0.04     0.08
## OBB               44      11  0.03    -0.26     0.07
## GS           4399488 1962144  0.36    -0.14 12518.20
## Morphogroup*       2       0   NaN      NaN     0.00
## ------------------------------------------------------------ 
## Morphogroup: 3.3
##              vars   n       mean        sd  median    trimmed       mad     min
## Sex*            1 581       1.50      0.50       1       1.49      0.00       1
## PopNum*         2 581      14.59      1.70      15      14.61      2.97      12
## Population*     3 581      14.52      1.75      15      14.53      2.97      12
## GOL             4 581     180.19      7.88     180     180.19      7.41     157
## NOL             5 581     177.35      7.15     177     177.32      7.41     156
## BNL             6 581      98.40      4.69      98      98.29      4.45      86
## BBH             7 581     129.30      5.82     129     129.21      5.93     111
## XCB             8 581     131.67      5.83     131     131.51      5.93     116
## XFB             9 581     110.39      5.44     110     110.21      4.45      95
## ZYB            10 581     128.75      6.69     128     128.66      7.41     111
## AUB            11 581     115.95      5.37     116     115.85      5.93      98
## WCB            12 581      68.86      4.13      69      68.81      4.45      57
## ASB            13 581     104.61      4.97     104     104.47      4.45      89
## BPL            14 581     101.06      5.76     101     100.99      5.93      84
## NPH            15 581      63.35      4.72      63      63.37      4.45      50
## NLH            16 581      47.74      3.08      48      47.73      2.97      40
## JUB            17 581     114.52      5.33     114     114.42      5.93      99
## NLB            18 581      27.73      1.93      28      27.71      1.48      23
## MAB            19 581      63.71      3.85      64      63.64      4.45      54
## MDH            20 581      26.70      3.75      27      26.72      4.45      17
## MDB            21 581      11.81      2.08      12      11.74      1.48       6
## OBH            22 581      32.64      2.13      33      32.68      1.48      26
## OBB            23 581      39.78      2.00      40      39.75      1.48      34
## GS             24 581 3074872.64 315790.45 3063420 3066899.51 327478.17 2327314
## Morphogroup*   25 581       3.00      0.00       3       3.00      0.00       3
##                  max   range  skew kurtosis       se
## Sex*               2       1  0.02    -2.00     0.02
## PopNum*           17       5 -0.02    -1.25     0.07
## Population*       17       5 -0.03    -1.35     0.07
## GOL              203      46  0.01    -0.19     0.33
## NOL              198      42  0.04    -0.11     0.30
## BNL              114      28  0.22    -0.03     0.19
## BBH              152      41  0.18     0.24     0.24
## XCB              149      33  0.26    -0.13     0.24
## XFB              133      38  0.38     0.38     0.23
## ZYB              148      37  0.13    -0.51     0.28
## AUB              132      34  0.16    -0.11     0.22
## WCB               81      24  0.13    -0.08     0.17
## ASB              123      34  0.31     0.26     0.21
## BPL              123      39  0.15     0.15     0.24
## NPH               76      26 -0.03    -0.18     0.20
## NLH               58      18  0.09    -0.04     0.13
## JUB              131      32  0.16    -0.21     0.22
## NLB               34      11  0.15    -0.21     0.08
## MAB               76      22  0.19    -0.16     0.16
## MDH               37      20  0.00    -0.41     0.16
## MDB               19      13  0.32     0.25     0.09
## OBH               39      13 -0.20     0.04     0.09
## OBB               46      12  0.13    -0.13     0.08
## GS           4030152 1702838  0.24    -0.36 13101.20
## Morphogroup*       3       0   NaN      NaN     0.00

pcs3<-principal(scale(hwl_c3[4:24]), rotate = 'varimax', nfactors=3) 
pcs3

## Principal Components Analysis
## Call: principal(r = scale(hwl_c3[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       RC2   RC1   RC3   h2    u2 com
## GOL  0.71  0.57 -0.03 0.83 0.169 1.9
## NOL  0.67  0.58 -0.02 0.79 0.213 2.0
## BNL  0.73  0.35  0.19 0.69 0.308 1.6
## BBH  0.38  0.50  0.20 0.43 0.565 2.2
## XCB -0.18  0.80  0.37 0.80 0.196 1.5
## XFB -0.12  0.74  0.31 0.66 0.337 1.4
## ZYB  0.45  0.38  0.66 0.78 0.219 2.4
## AUB  0.11  0.53  0.67 0.74 0.256 2.0
## WCB  0.14  0.39  0.60 0.54 0.464 1.8
## ASB  0.11  0.65  0.31 0.53 0.466 1.5
## BPL  0.87 -0.09  0.02 0.77 0.233 1.0
## NPH  0.19  0.31  0.67 0.58 0.421 1.6
## NLH  0.15  0.35  0.66 0.58 0.415 1.6
## JUB  0.60  0.23  0.61 0.77 0.226 2.3
## NLB  0.63 -0.20  0.02 0.44 0.558 1.2
## MAB  0.56  0.13  0.50 0.58 0.424 2.1
## MDH  0.30  0.32  0.37 0.33 0.671 2.9
## MDB  0.34  0.34  0.29 0.31 0.686 2.9
## OBH -0.10 -0.01  0.66 0.45 0.551 1.0
## OBB  0.60  0.14  0.33 0.49 0.512 1.7
## GS   0.39  0.84  0.25 0.92 0.076 1.6
## 
##                        RC2  RC1  RC3
## SS loadings           4.53 4.51 3.99
## Proportion Var        0.22 0.21 0.19
## Cumulative Var        0.22 0.43 0.62
## Proportion Explained  0.35 0.35 0.31
## Cumulative Proportion 0.35 0.69 1.00
## 
## Mean item complexity =  1.8
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.07 
##  with the empirical chi square  3575.14  with prob <  0 
## 
## Fit based upon off diagonal values = 0.97

MG_3<-hwl_c3$Morphogroup
MG_3_pcs<-cbind.data.frame(MG_3, pcs3$scores)
MG_3_pcs<-as.data.frame(MG_3_pcs)
MG_3_pcs$MG_3<-as.factor(MG_3_pcs$MG_3)
pc_3_sum<-MG_3_pcs %>% group_by(MG_3) %>% dplyr::summarize(across(RC2:RC3, mean))
pc_3_sum

## # A tibble: 3 x 4
##   MG_3     RC2     RC1     RC3
## * <fct>  <dbl>   <dbl>   <dbl>
## 1 3.1   -0.123 -0.0253  0.743 
## 2 3.2   -0.520  0.448   0.0389
## 3 3.3    0.684 -0.515  -0.468

sort(pcs3$loadings[,1], decreasing=TRUE)

##        BPL        BNL        GOL        NOL        NLB        OBB        JUB 
##  0.8710993  0.7308651  0.7079409  0.6671807  0.6337507  0.5996900  0.5964270 
##        MAB        ZYB         GS        BBH        MDB        MDH        NPH 
##  0.5576735  0.4529363  0.3883827  0.3761783  0.3429594  0.2996543  0.1887839 
##        NLH        WCB        ASB        AUB        OBH        XFB        XCB 
##  0.1506211  0.1381008  0.1147203  0.1135620 -0.1016162 -0.1172894 -0.1811767

sort(pcs3$loadings[,2], decreasing=TRUE)

##           GS          XCB          XFB          ASB          NOL          GOL 
##  0.843354214  0.797085123  0.742168802  0.653364247  0.584549600  0.573480731 
##          AUB          BBH          WCB          ZYB          NLH          BNL 
##  0.531768599  0.501500238  0.390029451  0.376174685  0.350512147  0.348513154 
##          MDB          MDH          NPH          JUB          OBB          MAB 
##  0.339469231  0.316323377  0.307465080  0.227594987  0.143479728  0.128951886 
##          OBH          BPL          NLB 
## -0.009567601 -0.085968013 -0.200679729

sort(pcs3$loadings[,3], decreasing=TRUE)

##         NPH         AUB         NLH         OBH         ZYB         JUB 
##  0.66992615  0.66979012  0.66286823  0.66211989  0.65941459  0.60515526 
##         WCB         MAB         MDH         XCB         OBB         XFB 
##  0.60399674  0.49864220  0.37296063  0.36907025  0.32781268  0.31315281 
##         ASB         MDB          GS         BBH         BNL         BPL 
##  0.30606877  0.28511697  0.24867428  0.20457162  0.19043677  0.02105507 
##         NLB         NOL         GOL 
##  0.01891023 -0.02266377 -0.02588954

ggplot(MG_3_pcs, aes(x=RC2, y=RC1, color = MG_3)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3_sum, size=5)

ggplot(MG_3_pcs, aes(x=RC1, y=RC3, color = MG_3)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3_sum, size=5)

ggplot(MG_3_pcs, aes(x=RC2, y=RC3, color = MG_3)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3_sum, size=5)

morpho3.1<-c3.1 
m3.1_sum<-morpho3.1 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) 
m3.1_dsq<-dist(scale(m3.1_sum[2:22]))
m3.1_hc<-hclust(m3.1_dsq, method='ward.D2') 
NbClust(data=as.numeric(scale(m3.1_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15)

## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##

## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]

## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]

## ******************************************************************* 
## * Among all indices:                                                
## * 1 proposed 2 as the best number of clusters 
## * 3 proposed 4 as the best number of clusters 
## * 1 proposed 12 as the best number of clusters 
## * 1 proposed 15 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  4 
##  
##  
## *******************************************************************

## $All.index
##        KL       CH Hartigan     CCC    Scott Marriot TrCovW  TraceW Friedman
## 2  8.1284  82.8509 114.2021 -0.2790 115.9762 63.3554     NA 15.8388   2.9776
## 3  0.6119 154.3150  79.6531 -2.4811 161.5094 82.8994     NA  9.2110   5.8396
## 4  0.5503 227.7468  23.2918 -0.4478 235.6759 60.9502     NA  3.8094  15.5381
## 5  1.5418 223.5344  49.2004 -1.8190 259.0263 72.1223     NA  2.8849  20.8379
## 6  6.4904 296.2415  26.1301 -1.9314 290.9921 70.9845     NA  1.9718  30.9506
## 7  0.1013 329.6430  47.0763 -1.0407 331.1090 59.9302     NA  1.2231  50.5099
## 8  4.9989 456.0222  15.8913 -1.0584 356.3610 57.9526     NA  0.9055  68.5741
## 9  0.2989 478.0652  33.8609 -0.5535 385.7130 51.7158     NA  0.6385  97.6739
## 10 2.7089 612.2896  10.0861  0.0483 414.2908 45.4345     NA  0.4543 137.6612
## 11 0.7265 618.7028   7.1557  0.0816 433.9078 43.5259     NA  0.3597 174.1371
## 12 0.5710 609.7725  25.7408  0.1151 452.1565 41.6846     NA  0.2895 216.6341
## 13 4.6352 750.3686   8.6505 -0.2885 463.6110 42.6853     NA  0.2526 248.4299
## 14 0.9490 766.7510  13.2248 -0.4841 476.7747 42.3243     NA  0.2159 290.7475
## 15 1.1434 835.3331   7.1307 -0.5689 490.5348 41.2452     NA  0.1833 342.6762
##       Rubin Cindex     DB Silhouette   Duda Pseudot2  Beale Ratkowsky    Ball
## 2    3.9776 0.3343 0.5758     0.6205 0.2600 125.2242 0.9276    0.5013 15.6687
## 3    6.8396 0.2980 0.5849     0.5415 0.2151 131.3403 1.1832    0.5138  4.3657
## 4   16.5381 0.3439 0.5624     0.5796 0.2456  67.5903 0.9796    0.4731  1.6509
## 5   21.8379 0.3066 0.5608     0.5283 0.1725  95.9648 1.5233    0.4287  1.0229
## 6   31.9506 0.2660 0.5157     0.5662 0.3053  43.2268 0.7204    0.3979  0.5253
## 7   51.5099 0.3041 0.4985     0.5650 0.2115  55.9087 1.1648    0.3708  0.3372
## 8   69.5741 0.3269 0.5139     0.5769 0.1440  71.3589 1.8297    0.3494  0.1831
## 9   98.6739 0.2801 0.4847     0.6133 0.1442  65.2964 1.8138    0.3301  0.1346
## 10 138.6612 0.2404 0.4740     0.6165 0.3601  21.3279 0.5469    0.3141  0.0835
## 11 175.1371 0.2991 0.5109     0.6002 0.1811  40.6974 1.3566    0.2997  0.0668
## 12 217.6341 0.3297 0.4901     0.6236 0.1856  30.7223 1.2801    0.2871  0.0558
## 13 249.4299 0.3207 0.4777     0.6347 0.1383  37.3717 1.7796    0.2763  0.0379
## 14 291.7475 0.3006 0.4595     0.6310 0.2355  22.7187 0.9466    0.2663  0.0314
## 15 343.6762 0.2774 0.4580     0.6320 0.1520  33.4627 1.5935    0.2574  0.0246
##    Ptbiserial   Frey McClain   Dunn Hubert SDindex Dindex   SDbw
## 2      0.7040 1.6006  0.3422 0.0498 0.0161  7.0193 0.3779 1.1979
## 3      0.6265 0.5254  0.5706 0.0563 0.0182  5.9286 0.2796 0.7588
## 4      0.6086 1.5964  0.6299 0.1026 0.0214  4.6128 0.1847 0.4635
## 5      0.5607 0.8343  0.7373 0.0716 0.0215  7.8344 0.1560 0.2982
## 6      0.5281 0.6246  0.7846 0.0740 0.0228  7.7211 0.1231 0.1256
## 7      0.4986 1.0009  0.7799 0.1076 0.0230  7.4330 0.1017 0.1454
## 8      0.4669 0.7012  0.8195 0.1341 0.0237 10.1993 0.0867 0.1077
## 9      0.4458 0.7092  0.8112 0.1345 0.0238 10.7961 0.0723 0.0689
## 10     0.4263 1.1390  0.7966 0.1345 0.0238 12.3997 0.0608 0.0877
## 11     0.3993 0.9378  0.8279 0.1265 0.0240 17.0822 0.0555 0.0780
## 12     0.3818 1.2088  0.8340 0.1555 0.0240 18.6262 0.0498 0.0194
## 13     0.3694 0.9844  0.8511 0.1607 0.0240 24.6324 0.0459 0.0079
## 14     0.3589 1.0247  0.8566 0.1607 0.0241 23.4729 0.0418 0.0066
## 15     0.3457 1.0191  0.8656 0.1607 0.0242 22.7709 0.0385 0.0168
## 
## $All.CriticalValues
##    CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2          0.0730           558.9828       0.3408
## 3          0.0439           784.7908       0.2839
## 4         -0.0387          -590.8616       0.3331
## 5         -0.0566          -373.6603       0.2314
## 6         -0.0664          -305.0001       0.4066
## 7         -0.1143          -146.1979       0.2975
## 8         -0.1630           -85.6052       0.2011
## 9         -0.1829           -71.1410       0.2051
## 10        -0.1630           -85.6052       0.4738
## 11        -0.2305           -48.0468       0.2741
## 12        -0.2932           -30.8768       0.2951
## 13        -0.3330           -24.0181       0.2306
## 14        -0.2932           -30.8768       0.3630
## 15        -0.3330           -24.0181       0.2537
## 
## $Best.nc
##                     KL       CH Hartigan     CCC   Scott Marriot TrCovW  TraceW
## Number_clusters 2.0000  15.0000   4.0000 12.0000  4.0000  4.0000   -Inf  4.4772
## Value_Index     8.1284 835.3331  56.3613  0.1151 74.1665 33.1213      4 15.0000
##                 Friedman    Rubin  Cindex     DB Silhouette Duda PseudoT2
## Number_clusters  51.9286 -10.7012  0.2404  0.458     0.6347 0.26 125.2242
## Value_Index      12.0000  10.0000 15.0000 13.000     2.0000 2.00   2.0000
##                  Beale Ratkowsky   Ball PtBiserial   Frey McClain   Dunn Hubert
## Number_clusters 0.9276    0.5138 11.303      0.704 1.6006  0.3422 0.1607      0
## Value_Index     3.0000    3.0000  2.000      2.000 2.0000 13.0000 0.0000      4
##                 SDindex Dindex   SDbw
## Number_clusters  4.6128      0 0.0066
## Value_Index      0.0000     14 2.0000
## 
## $Best.partition
##  [1] 1 2 3 4 1 2 4 4 3 2 4 1 2 4 4 4 3 1 4 2 3 1 4 2 3 1 1 2 3 1 1 1 4 1 3 2 4 2
## [39] 4 1 4 2 1 4 3 2 4 2 3 1 4 2 3 1 1 2 4 3 2 2 4 2 3 1 2 1 3 1 3 1 1 2 3 2 4 1
## [77] 3 2 4 1 1 2 3 4

m3.1_labs<-m3.1_sum$Population
plot(m3.1_hc, labels=m3.1_labs, hang=-1, cex=0.8)
pcs3.1<-principal(scale(c3.1[4:24]), rotate = 'varimax', nfactors=3) 
pcs3.1

## Principal Components Analysis
## Call: principal(r = scale(c3.1[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       RC1   RC2   RC3   h2    u2 com
## GOL  0.47  0.73  0.11 0.77 0.229 1.8
## NOL  0.44  0.72  0.14 0.74 0.265 1.7
## BNL  0.23  0.77  0.35 0.76 0.236 1.6
## BBH  0.53  0.58 -0.15 0.64 0.356 2.1
## XCB  0.82  0.00  0.32 0.78 0.218 1.3
## XFB  0.78  0.01  0.28 0.68 0.315 1.2
## ZYB  0.57  0.40  0.58 0.82 0.180 2.7
## AUB  0.57  0.23  0.62 0.76 0.243 2.3
## WCB  0.63  0.17  0.28 0.50 0.500 1.6
## ASB  0.56  0.24  0.34 0.49 0.513 2.1
## BPL -0.05  0.83  0.19 0.73 0.270 1.1
## NPH  0.22  0.40  0.68 0.67 0.331 1.9
## NLH  0.26  0.30  0.71 0.66 0.342 1.6
## JUB  0.49  0.47  0.54 0.76 0.243 3.0
## NLB -0.04  0.39  0.28 0.23 0.770 1.9
## MAB  0.27  0.62  0.30 0.55 0.451 1.9
## MDH  0.46  0.38  0.06 0.36 0.644 2.0
## MDB  0.47  0.40  0.16 0.40 0.597 2.2
## OBH  0.13 -0.09  0.70 0.52 0.478 1.1
## OBB  0.28  0.35  0.62 0.59 0.411 2.0
## GS   0.77  0.57  0.11 0.93 0.073 1.9
## 
##                        RC1  RC2  RC3
## SS loadings           4.93 4.75 3.66
## Proportion Var        0.23 0.23 0.17
## Cumulative Var        0.23 0.46 0.63
## Proportion Explained  0.37 0.36 0.27
## Cumulative Proportion 0.37 0.73 1.00
## 
## Mean item complexity =  1.9
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.06 
##  with the empirical chi square  550.92  with prob <  3.5e-47 
## 
## Fit based upon off diagonal values = 0.98

MG_3.1<-c3.1$Population
MG_3.1_pcs<-cbind.data.frame(MG_3.1, pcs3.1$scores)
MG_3.1_pcs$MG_3.1<-as.factor(MG_3.1_pcs$MG_3.1)
pc_3.1_sum<-MG_3.1_pcs %>% group_by(MG_3.1) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_3.1_sum

## # A tibble: 4 x 4
##   MG_3.1      RC1     RC2     RC3
## * <fct>     <dbl>   <dbl>   <dbl>
## 1 ARIKARA -0.0629 -0.162   1.00  
## 2 HAINAN  -0.0565 -0.144  -0.340 
## 3 N JAPAN  0.237   0.0468 -0.0392
## 4 S JAPAN -0.128   0.210  -0.411

sort(pcs3.1$loadings[,1], decreasing=TRUE)

##         XCB         XFB          GS         WCB         AUB         ZYB 
##  0.82439351  0.77948672  0.76555531  0.62575495  0.56898542  0.56865604 
##         ASB         BBH         JUB         MDB         GOL         MDH 
##  0.55854484  0.52985517  0.48707550  0.46947848  0.46826566  0.45739433 
##         NOL         OBB         MAB         NLH         BNL         NPH 
##  0.44098441  0.27863314  0.27279594  0.25860107  0.23261787  0.21873623 
##         OBH         NLB         BPL 
##  0.13363120 -0.04195184 -0.04833663

sort(pcs3.1$loadings[,2], decreasing=TRUE)

##          BPL          BNL          GOL          NOL          MAB          BBH 
##  0.830876535  0.766500087  0.734969456  0.722268321  0.617566122  0.584445273 
##           GS          JUB          NPH          ZYB          MDB          NLB 
##  0.573752662  0.472376815  0.404349040  0.396101909  0.395451746  0.387039378 
##          MDH          OBB          NLH          ASB          AUB          WCB 
##  0.377744039  0.354080147  0.298879614  0.243719147  0.230118118  0.168361082 
##          XFB          XCB          OBH 
##  0.014994463 -0.001186767 -0.088322916

sort(pcs3.1$loadings[,3], decreasing=TRUE)

##        NLH        OBH        NPH        OBB        AUB        ZYB        JUB 
##  0.7082554  0.7041761  0.6765996  0.6210068  0.6162912  0.5827246  0.5444586 
##        BNL        ASB        XCB        MAB        WCB        NLB        XFB 
##  0.3503080  0.3398187  0.3198096  0.3044928  0.2828325  0.2806770  0.2769057 
##        BPL        MDB        NOL        GOL         GS        MDH        BBH 
##  0.1923248  0.1610242  0.1376745  0.1096684  0.1069208  0.0627041 -0.1464342

ggplot(MG_3.1_pcs, aes(x=RC1, y=RC2, color = MG_3.1)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.1_sum, size=5)
ggplot(MG_3.1_pcs, aes(x=RC1, y=RC3, color = MG_3.1)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.1_sum, size=5)
ggplot(MG_3.1_pcs, aes(x=RC2, y=RC3, color = MG_3.1)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.1_sum, size=5)
MG_3.1_sex<-c3.1$Sex
MG_3.1_pcs_sex<-cbind.data.frame(MG_3.1_sex, pcs3.1$scores)
pc_3.1_sum_sex<-MG_3.1_pcs_sex %>% group_by(MG_3.1_sex) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_3.1_sum_sex

## # A tibble: 2 x 4
##   MG_3.1_sex    RC1    RC2    RC3
## * <fct>       <dbl>  <dbl>  <dbl>
## 1 F          -0.609 -0.580 -0.245
## 2 M           0.438  0.417  0.176

ggplot(MG_3.1_pcs_sex, aes(x=RC1, y=RC2, color = MG_3.1_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.1_sum_sex, size=5)
ggplot(MG_3.1_pcs_sex, aes(x=RC1, y=RC3, color = MG_3.1_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.1_sum_sex, size=5)
ggplot(MG_3.1_pcs_sex, aes(x=RC2, y=RC3, color = MG_3.1_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.1_sum_sex, size=5)

morphogroup_1<-cutree(m3.1_hc, k=4) 
m3.1.1_sum<-m3.1_sum %>% mutate(cluster=morphogroup_1) 
m3.1.1_sum$cluster<-as.factor(m3.1.1_sum$cluster) 
m3.1.1<-m3.1.1_sum %>% filter(cluster==1) %>% droplevels() 
m3.1.2<-m3.1.1_sum %>% filter(cluster==2) %>% droplevels() 
m3.1.3<-m3.1.1_sum %>% filter(cluster==3) %>% droplevels() 
m3.1.4<-m3.1.1_sum %>% filter(cluster==4) %>% droplevels() 

c3.1.1d<-m3.1.1$Population  
c3.1.1<-c3.1 %>% filter(Population %in% c3.1.1d) %>% droplevels() 
c3.1.1$Morphogroup="3.1.1" 
c3.1.1$Morphogroup <- as.factor(c3.1.1$Morphogroup) 

c3.1.2d<-m3.1.2$Population  
c3.1.2<-c3.1 %>% filter(Population %in% c3.1.2d) %>% droplevels() 
c3.1.2$Morphogroup="3.1.2" 
c3.1.2$Morphogroup <- as.factor(c3.1.2$Morphogroup) 

c3.1.3d<-m3.1.3$Population  
c3.1.3<-c3.1 %>% filter(Population %in% c3.1.3d) %>% droplevels() 
c3.1.3$Morphogroup="3.1.3" 
c3.1.3$Morphogroup <- as.factor(c3.1.3$Morphogroup) 

c3.1.4d<-m3.1.4$Population 
c3.1.4<-c3.1 %>% filter(Population %in% c3.1.4d) %>% droplevels() 
c3.1.4$Morphogroup="3.1.4" 
c3.1.4$Morphogroup <- as.factor(c3.1.4$Morphogroup) 

hwlr_c3.1<-rbind.data.frame(c3.1.1, c3.1.2)
hwlr_c3.1a<-rbind.data.frame(c3.1.3, c3.1.4)
hwlr_c3.1<-rbind.data.frame(hwlr_c3.1, hwlr_c3.1a)

morpho3.2<-c3.2 
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) 
m3.2_dsq<-dist(scale(m3.2_sum[2:22]))
m3.2_hc<-hclust(m3.2_dsq, method='ward.D2') 
NbClust(data=as.numeric(scale(m3.2_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15)

## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##

## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]

## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]

## ******************************************************************* 
## * Among all indices:                                                
## * 1 proposed 2 as the best number of clusters 
## * 2 proposed 4 as the best number of clusters 
## * 1 proposed 6 as the best number of clusters 
## * 1 proposed 10 as the best number of clusters 
## * 1 proposed 15 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  4 
##  
##  
## *******************************************************************

## $All.index
##         KL        CH Hartigan     CCC    Scott  Marriot TrCovW  TraceW Friedman
## 2   1.0010  310.6099  46.0055 -2.5443 174.7412 153.5245     NA 38.3811   2.2829
## 3   0.8802  226.0131  30.0344 -6.3707 232.6288 232.9905     NA 25.8878   3.8672
## 4   2.3131  190.7795  53.2699 -5.9015 321.5104 226.2691     NA 14.1418   7.9097
## 5   0.3412  208.2371 175.7052 -6.7374 373.7806 247.7530     NA  9.9101  11.7143
## 6   4.1717  404.9899  28.4278 -5.8670 443.2822 222.3551     NA  6.1765  19.3998
## 7   0.5194  407.3634   9.1972 -6.0161 488.2979 222.8164     NA  4.5473  26.7089
## 8   1.3179  370.7537  15.3621 -6.0812 528.9557 220.7050     NA  3.4485  35.5375
## 9   7.0796  359.5775  29.4713 -5.0443 583.8431 192.2909     NA  2.3740  52.0758
## 10 10.2540  388.3236  29.0455 -4.9070 620.0047 185.6258     NA  1.8563  66.8785
## 11  0.0261  423.3788  16.1535 -4.8681 651.6150 181.1485     NA  1.4971  83.1630
## 12  0.2135  428.8970  85.2390 -4.6931 683.1584 173.9485     NA  1.2080 103.3067
## 13  5.1585  643.6937  34.1210 -4.1066 719.7226 159.1916     NA  0.9420 132.7633
## 14  0.3206  742.5189  71.9828 -3.8927 748.2206 152.0881     NA  0.7760 161.3795
## 15  2.4958 1059.7593  42.0809 -3.4075 779.8839 140.7590     NA  0.6256 200.4080
##       Rubin Cindex     DB Silhouette   Duda Pseudot2  Beale Ratkowsky    Ball
## 2    3.2829 0.2733 0.6604     0.5947 0.3762 129.3582 0.5458    0.5838 20.0500
## 3    4.8672 0.2387 0.6260     0.5307 0.3414 125.4044 0.6334    0.5028 10.1472
## 4    8.9097 0.3419 0.6470     0.5069 0.2959 145.1296 0.7803    0.4472  6.2970
## 5   12.7143 0.3000 0.6329     0.4826 0.2128 177.5325 1.2077    0.4134  3.6703
## 6   20.3998 0.2323 0.6311     0.5065 0.1267 103.4079 2.1543    0.3947  1.3671
## 7   27.7089 0.2155 0.5502     0.5250 0.2149 120.5699 1.1821    0.3676  0.9752
## 8   36.5375 0.1801 0.5502     0.5466 0.2944  35.9557 0.7491    0.3444  0.8007
## 9   53.0758 0.3000 0.5271     0.5537 0.2152  87.5366 1.1672    0.3256  0.6409
## 10  67.8785 0.2737 0.5166     0.5570 0.0993  99.7540 2.7709    0.3102  0.4753
## 11  84.1630 0.2722 0.4929     0.5616 0.2375  83.4938 1.0308    0.2968  0.3565
## 12 104.3067 0.2398 0.4944     0.5783 0.2396  69.8313 1.0120    0.2846  0.2921
## 13 133.7633 0.2045 0.4985     0.5896 0.2238  34.6875 1.0511    0.2750  0.1653
## 14 162.3795 0.2812 0.4920     0.5894 0.1696  68.5599 1.5236    0.2654  0.1223
## 15 201.4080 0.2699 0.4847     0.5957 0.1791  77.9255 1.4431    0.2571  0.0741
##    Ptbiserial   Frey McClain   Dunn Hubert SDindex Dindex   SDbw
## 2      0.6303 1.0037  0.3714 0.0276 0.0082  6.4886 0.4079 1.0965
## 3      0.6175 0.4821  0.4941 0.0290 0.0100  5.4531 0.3366 0.6896
## 4      0.6258 1.6293  0.5462 0.0551 0.0124  4.4952 0.2690 0.5223
## 5      0.5422 0.6206  0.7703 0.0587 0.0127  6.5102 0.2175 0.3852
## 6      0.5017 0.2399  0.8280 0.0635 0.0131  6.2848 0.1669 0.2879
## 7      0.5009 0.8921  0.8007 0.0635 0.0137  6.2452 0.1438 0.2430
## 8      0.4655 0.2209  0.8540 0.0635 0.0137  8.9232 0.1218 0.1710
## 9      0.4639 0.8730  0.8085 0.1168 0.0141  8.8790 0.1098 0.1214
## 10     0.4419 0.5480  0.8271 0.1209 0.0142 11.0834 0.0957 0.1082
## 11     0.4368 1.3023  0.8169 0.1269 0.0144 10.6920 0.0850 0.1163
## 12     0.4035 0.8556  0.8847 0.0727 0.0145 15.5007 0.0754 0.0895
## 13     0.3782 0.5055  0.8990 0.0727 0.0145 16.0626 0.0663 0.0479
## 14     0.3730 0.7240  0.8801 0.1064 0.0145 15.4584 0.0614 0.0531
## 15     0.3608 1.0353  0.8694 0.1131 0.0145 17.0855 0.0549 0.0423
## 
## $All.CriticalValues
##    CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2          0.1432           466.8213       0.4623
## 3          0.1227           464.5373       0.4290
## 4          0.1152           468.3892       0.3805
## 5          0.0848           517.8437       0.2773
## 6         -0.1143          -146.1979       0.1628
## 7          0.0304          1050.8179       0.2848
## 8         -0.1143          -146.1979       0.4004
## 9         -0.0229         -1072.0019       0.2907
## 10        -0.1829           -71.1410       0.1242
## 11        -0.0088         -2964.0492       0.3193
## 12        -0.0387          -590.8616       0.3254
## 13        -0.2052           -58.7306       0.3294
## 14        -0.1290          -122.4991       0.2374
## 15        -0.0885          -209.1134       0.2461
## 
## $Best.nc
##                     KL       CH Hartigan     CCC   Scott Marriot TrCovW  TraceW
## Number_clusters 10.000   15.000   6.0000  2.0000  4.0000  4.0000   -Inf  7.5143
## Value_Index     10.254 1059.759 147.2774 -2.5443 88.8816 28.2054      4 15.0000
##                 Friedman   Rubin  Cindex      DB Silhouette   Duda PseudoT2
## Number_clusters  39.0285 -1.7357  0.1801  0.4847     0.5957 0.3762 129.3582
## Value_Index       9.0000  8.0000 15.0000 15.0000     2.0000 2.0000   2.0000
##                  Beale Ratkowsky   Ball PtBiserial   Frey McClain   Dunn Hubert
## Number_clusters 0.5458    0.5838 9.9028     0.6303 1.0037  0.3714 0.1269      0
## Value_Index     2.0000    3.0000 2.0000     2.0000 2.0000 11.0000 0.0000      4
##                 SDindex Dindex    SDbw
## Number_clusters  4.4952      0  0.0423
## Value_Index      0.0000     15 10.0000
## 
## $Best.partition
##   [1] 1 1 2 3 1 1 2 1 1 2 3 1 1 2 1 1 2 3 4 1 2 3 1 2 1 1 4 3 4 3 1 2 1 1 2 1 3
##  [38] 1 2 1 1 2 2 2 4 2 2 2 2 1 3 4 2 1 2 2 2 2 4 1 1 2 2 1 3 4 2 1 2 2 1 1 2 2
##  [75] 4 3 2 4 2 2 2 2 2 2 1 2 2 3 1 4 2 2 2 4 2 1 2 2 3 2 1 1 1 1 2 1 1 1 1 2 3
## [112] 2 4 1 2 2 2 1 2 4 2 1 2 1 3 2 1 1 1 2 2 3 4 4 2 1 3 4 2 2 1 2 2 2 4 1 3

m3.2_labs<-m3.2_sum$Population
plot(m3.2_hc, labels=m3.2_labs, hang=-1, cex=0.8)
hc_morph_3.2<-rect.hclust(m3.2_hc, k=4, border=2:7)
pcs3.2<-principal(scale(c3.2[4:24]), rotate = 'varimax', nfactors=3) 
pcs3.2

## Principal Components Analysis
## Call: principal(r = scale(c3.2[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       RC2   RC3   RC1   h2    u2 com
## GOL  0.89  0.24  0.12 0.86 0.141 1.2
## NOL  0.90  0.18  0.09 0.84 0.156 1.1
## BNL  0.84  0.16  0.18 0.77 0.234 1.2
## BBH  0.62  0.35  0.01 0.51 0.493 1.6
## XCB  0.21  0.86  0.11 0.79 0.205 1.2
## XFB  0.23  0.82 -0.01 0.72 0.276 1.2
## ZYB  0.24  0.57  0.68 0.85 0.150 2.2
## AUB  0.12  0.71  0.56 0.84 0.159 2.0
## WCB  0.01  0.59  0.53 0.63 0.373 2.0
## ASB  0.26  0.65  0.28 0.57 0.433 1.7
## BPL  0.55 -0.05  0.42 0.48 0.521 1.9
## NPH  0.53  0.08  0.56 0.60 0.403 2.0
## NLH  0.54  0.14  0.46 0.52 0.476 2.1
## JUB  0.27  0.50  0.69 0.81 0.195 2.2
## NLB  0.17  0.19  0.28 0.14 0.859 2.5
## MAB  0.21  0.24  0.70 0.59 0.414 1.4
## MDH  0.43  0.10  0.41 0.36 0.639 2.1
## MDB  0.36  0.22  0.43 0.37 0.634 2.5
## OBH -0.05 -0.05  0.58 0.34 0.659 1.0
## OBB  0.43  0.31  0.41 0.45 0.547 2.8
## GS   0.75  0.61  0.10 0.95 0.055 2.0
## 
##                        RC2  RC3  RC1
## SS loadings           5.01 4.15 3.82
## Proportion Var        0.24 0.20 0.18
## Cumulative Var        0.24 0.44 0.62
## Proportion Explained  0.39 0.32 0.29
## Cumulative Proportion 0.39 0.71 1.00
## 
## Mean item complexity =  1.8
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.07 
##  with the empirical chi square  1486.9  with prob <  1.3e-218 
## 
## Fit based upon off diagonal values = 0.97

MG_3.2<-c3.2$Population
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC2:RC1, mean))
pc_3.2_sum

## # A tibble: 7 x 4
##   MG_3.2      RC2     RC3    RC1
## * <fct>     <dbl>   <dbl>  <dbl>
## 1 ATAYAL   -0.476 -0.162   0.114
## 2 BERG     -0.541  1.06   -0.109
## 3 EGYPT     0.753 -0.621  -0.679
## 4 NORSE     0.673 -0.0470 -0.194
## 5 PERU     -0.623 -0.280   0.273
## 6 SANTA CR -0.490 -0.338   1.01 
## 7 ZALAVAR   0.432  0.323  -0.305

sort(pcs3.2$loadings[,1], decreasing=TRUE)

##          NOL          GOL          BNL           GS          BBH          BPL 
##  0.895903281  0.887071615  0.842810769  0.750678856  0.618526701  0.551149547 
##          NLH          NPH          OBB          MDH          MDB          JUB 
##  0.542108196  0.526050377  0.434333091  0.429249817  0.362847286  0.270484584 
##          ASB          ZYB          XFB          XCB          MAB          NLB 
##  0.259273300  0.241773109  0.226248612  0.211438282  0.205471853  0.168572776 
##          AUB          WCB          OBH 
##  0.120274391  0.007265324 -0.053288600

sort(pcs3.2$loadings[,2], decreasing=TRUE)

##         XCB         XFB         AUB         ASB          GS         WCB 
##  0.85891822  0.82038853  0.71481700  0.64884676  0.60981639  0.58726362 
##         ZYB         JUB         BBH         OBB         GOL         MAB 
##  0.57400348  0.49981751  0.35222278  0.30624755  0.24057337  0.23551827 
##         MDB         NLB         NOL         BNL         NLH         MDH 
##  0.21671218  0.18882950  0.18015571  0.15596295  0.14449093  0.10018006 
##         NPH         OBH         BPL 
##  0.08483362 -0.04703090 -0.05010399

sort(pcs3.2$loadings[,3], decreasing=TRUE)

##          MAB          JUB          ZYB          OBH          AUB          NPH 
##  0.699007687  0.694424638  0.679444615  0.579609368  0.561393584  0.559527217 
##          WCB          NLH          MDB          BPL          OBB          MDH 
##  0.530778355  0.457755831  0.433050563  0.415073543  0.413375434  0.408396880 
##          ASB          NLB          BNL          GOL          XCB           GS 
##  0.280065180  0.276819378  0.176985767  0.117638181  0.110672366  0.098798418 
##          NOL          BBH          XFB 
##  0.092216666  0.009104812 -0.008135666

ggplot(MG_3.2_pcs, aes(x=RC2, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC3, y=RC1, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC2, y=RC1, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
MG_3.2_sex<-c3.2$Sex
MG_3.2_pcs_sex<-cbind.data.frame(MG_3.2_sex, pcs3.2$scores)
pc_3.2_sum_sex<-MG_3.2_pcs_sex %>% group_by(MG_3.2_sex) %>% dplyr::summarize(across(RC2:RC1, mean))
pc_3.2_sum_sex

## # A tibble: 2 x 4
##   MG_3.2_sex    RC2    RC3    RC1
## * <fct>       <dbl>  <dbl>  <dbl>
## 1 F          -0.503 -0.344 -0.448
## 2 M           0.465  0.318  0.414

ggplot(MG_3.2_pcs_sex, aes(x=RC2, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
ggplot(MG_3.2_pcs_sex, aes(x=RC1, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
ggplot(MG_3.2_pcs_sex, aes(x=RC2, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)

morphogroup_1<-cutree(m3.2_hc, k=4) 
m3.2.1_sum<-m3.2_sum %>% mutate(cluster=morphogroup_1) 
m3.2.1_sum$cluster<-as.factor(m3.2.1_sum$cluster) 
m3.2.1<-m3.2.1_sum %>% filter(cluster==1) %>% droplevels() 
m3.2.2<-m3.2.1_sum %>% filter(cluster==2) %>% droplevels() 
m3.2.3<-m3.2.1_sum %>% filter(cluster==3) %>% droplevels() 
m3.2.4<-m3.2.1_sum %>% filter(cluster==4) %>% droplevels() 

c3.2.1d<-m3.2.1$Population  
c3.2.1<-c3.2 %>% filter(Population %in% c3.2.1d) %>% droplevels() 
c3.2.1$Morphogroup="3.2.1" 
c3.2.1$Morphogroup <- as.factor(c3.2.1$Morphogroup) 

c3.2.2d<-m3.2.2$Population  
c3.2.2<-c3.2 %>% filter(Population %in% c3.2.2d) %>% droplevels() 
c3.2.2$Morphogroup="3.2.2" 
c3.2.2$Morphogroup <- as.factor(c3.2.2$Morphogroup) 

c3.2.3d<-m3.2.3$Population  
c3.2.3<-c3.2 %>% filter(Population %in% c3.2.3d) %>% droplevels() 
c3.2.3$Morphogroup="3.2.3" 
c3.2.3$Morphogroup <- as.factor(c3.2.3$Morphogroup)

c3.2.4d<-m3.2.4$Population  
c3.2.4<-c3.2 %>% filter(Population %in% c3.2.4d) %>% droplevels() 
c3.2.4$Morphogroup="3.2.4" ## 
c3.2.4$Morphogroup <- as.factor(c3.2.4$Morphogroup) 

hwlr_c3.2<-rbind.data.frame(c3.2.1, c3.2.2)
hwlr_c3.2a<-rbind.data.frame(c3.2.3, c3.2.4)
hwlr_c3.2<-rbind.data.frame(hwlr_c3.2, hwlr_c3.2a)

morpho3.2<-c3.2.1 
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) 
m3.2_dsq<-dist(scale(m3.2_sum[2:22]))
m3.2_hc<-hclust(m3.2_dsq, method='ward.D2')  
m3.2_labs<-m3.2_sum$Population
pcs3.2<-principal(scale(c3.2.1[4:24]), rotate = 'varimax', nfactors=3) 
pcs3.2

## Principal Components Analysis
## Call: principal(r = scale(c3.2.1[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       RC1   RC3   RC2   h2   u2 com
## GOL  0.79  0.33  0.23 0.78 0.22 1.5
## NOL  0.76  0.29  0.30 0.76 0.24 1.6
## BNL  0.85  0.23  0.07 0.78 0.22 1.2
## BBH  0.63  0.43 -0.12 0.60 0.40 1.8
## XCB  0.05  0.83  0.31 0.79 0.21 1.3
## XFB  0.18  0.71  0.25 0.59 0.41 1.4
## ZYB  0.60  0.61  0.26 0.81 0.19 2.3
## AUB  0.38  0.80  0.14 0.81 0.19 1.5
## WCB  0.51  0.56 -0.15 0.59 0.41 2.1
## ASB  0.22  0.66  0.08 0.49 0.51 1.2
## BPL  0.85 -0.01 -0.01 0.73 0.27 1.0
## NPH  0.47  0.19  0.76 0.84 0.16 1.8
## NLH  0.47  0.31  0.63 0.72 0.28 2.4
## JUB  0.70  0.46  0.20 0.75 0.25 1.9
## NLB  0.54  0.15 -0.44 0.51 0.49 2.1
## MAB  0.58  0.40  0.14 0.52 0.48 1.9
## MDH  0.44  0.22  0.37 0.38 0.62 2.4
## MDB  0.48  0.31  0.10 0.34 0.66 1.8
## OBH -0.14  0.20  0.77 0.66 0.34 1.2
## OBB  0.48  0.38  0.24 0.44 0.56 2.4
## GS   0.65  0.64  0.15 0.86 0.14 2.1
## 
##                        RC1  RC3  RC2
## SS loadings           6.57 4.67 2.50
## Proportion Var        0.31 0.22 0.12
## Cumulative Var        0.31 0.54 0.65
## Proportion Explained  0.48 0.34 0.18
## Cumulative Proportion 0.48 0.82 1.00
## 
## Mean item complexity =  1.8
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.07 
##  with the empirical chi square  312.08  with prob <  1.9e-13 
## 
## Fit based upon off diagonal values = 0.98

MG_3.2<-c3.2.1$Population
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC2:RC1, mean))
pc_3.2_sum

## # A tibble: 2 x 4
##   MG_3.2    RC2      RC3    RC1
## * <fct>   <dbl>    <dbl>  <dbl>
## 1 ATAYAL -0.766 -0.0100   0.369
## 2 PERU    0.327  0.00427 -0.158

sort(pcs3.2$loadings[,1], decreasing=TRUE)

##        BPL        BNL        GOL        NOL        JUB         GS        BBH 
##  0.8525344  0.8522404  0.7888121  0.7601387  0.7038526  0.6500897  0.6305836 
##        ZYB        MAB        NLB        WCB        OBB        MDB        NPH 
##  0.6007675  0.5844356  0.5423986  0.5053035  0.4821375  0.4813822  0.4724094 
##        NLH        MDH        AUB        ASB        XFB        XCB        OBH 
##  0.4708698  0.4390764  0.3823714  0.2161252  0.1835291  0.0490214 -0.1432866

sort(pcs3.2$loadings[,2], decreasing=TRUE)

##        XCB        AUB        XFB        ASB         GS        ZYB        WCB 
##  0.8347212  0.8000699  0.7050149  0.6607510  0.6430081  0.6133600  0.5618911 
##        JUB        BBH        MAB        OBB        GOL        NLH        MDB 
##  0.4642675  0.4287210  0.3979202  0.3833819  0.3300225  0.3129504  0.3113318 
##        NOL        BNL        MDH        OBH        NPH        NLB        BPL 
##  0.2933008  0.2256500  0.2157440  0.2023385  0.1929322  0.1478814 -0.0121618

sort(pcs3.2$loadings[,3], decreasing=TRUE)

##          OBH          NPH          NLH          MDH          XCB          NOL 
##  0.773637958  0.764008179  0.631241539  0.369075749  0.305149164  0.303833977 
##          ZYB          XFB          OBB          GOL          JUB           GS 
##  0.260848874  0.246715959  0.238915737  0.231974793  0.200916561  0.153087209 
##          AUB          MAB          MDB          ASB          BNL          BPL 
##  0.141299939  0.137860681  0.102021399  0.084457830  0.067086124 -0.008785728 
##          BBH          WCB          NLB 
## -0.120113615 -0.148521768 -0.441542146

ggplot(MG_3.2_pcs, aes(x=RC2, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

ggplot(MG_3.2_pcs, aes(x=RC3, y=RC1, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

ggplot(MG_3.2_pcs, aes(x=RC2, y=RC1, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

MG_3.2_sex<-c3.2.1$Sex
MG_3.2_pcs_sex<-cbind.data.frame(MG_3.2_sex, pcs3.2$scores)
pc_3.2_sum_sex<-MG_3.2_pcs_sex %>% group_by(MG_3.2_sex) %>% dplyr::summarize(across(RC2:RC1, mean))
pc_3.2_sum_sex

## # A tibble: 2 x 4
##   MG_3.2_sex    RC2    RC3    RC1
## * <fct>       <dbl>  <dbl>  <dbl>
## 1 F          -0.157 -0.358 -0.672
## 2 M           0.136  0.311  0.584

ggplot(MG_3.2_pcs_sex, aes(x=RC2, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)

ggplot(MG_3.2_pcs_sex, aes(x=RC1, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)

ggplot(MG_3.2_pcs_sex, aes(x=RC2, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)

morpho3.2<-c3.2.2 
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) 
m3.2_dsq<-dist(scale(m3.2_sum[2:22]))
m3.2_hc<-hclust(m3.2_dsq, method='ward.D2') 
m3.2_labs<-m3.2_sum$Population
pcs3.2<-principal(scale(c3.2.2[4:24]), rotate = 'varimax', nfactors=3) 
pcs3.2

## Principal Components Analysis
## Call: principal(r = scale(c3.2.2[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      RC2   RC1   RC3   h2   u2 com
## GOL 0.09  0.80  0.38 0.80 0.20 1.5
## NOL 0.03  0.79  0.40 0.79 0.21 1.5
## BNL 0.12  0.86  0.14 0.78 0.22 1.1
## BBH 0.25  0.52  0.17 0.36 0.64 1.7
## XCB 0.86  0.02  0.19 0.78 0.22 1.1
## XFB 0.80 -0.06  0.16 0.66 0.34 1.1
## ZYB 0.77  0.42  0.26 0.83 0.17 1.8
## AUB 0.83  0.23  0.24 0.80 0.20 1.3
## WCB 0.75  0.13  0.04 0.57 0.43 1.1
## ASB 0.64  0.29  0.21 0.54 0.46 1.6
## BPL 0.10  0.80 -0.15 0.67 0.33 1.1
## NPH 0.22  0.36  0.74 0.72 0.28 1.6
## NLH 0.23  0.28  0.77 0.72 0.28 1.5
## JUB 0.70  0.47  0.18 0.74 0.26 1.9
## NLB 0.37  0.34 -0.24 0.31 0.69 2.7
## MAB 0.54  0.51  0.03 0.55 0.45 2.0
## MDH 0.23  0.37  0.37 0.33 0.67 2.6
## MDB 0.30  0.48  0.10 0.33 0.67 1.8
## OBH 0.11 -0.09  0.81 0.68 0.32 1.1
## OBB 0.37  0.36  0.40 0.42 0.58 3.0
## GS  0.53  0.63  0.34 0.79 0.21 2.5
## 
##                        RC2  RC1  RC3
## SS loadings           5.31 4.95 2.92
## Proportion Var        0.25 0.24 0.14
## Cumulative Var        0.25 0.49 0.63
## Proportion Explained  0.40 0.38 0.22
## Cumulative Proportion 0.40 0.78 1.00
## 
## Mean item complexity =  1.7
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.07 
##  with the empirical chi square  596.78  with prob <  1.4e-54 
## 
## Fit based upon off diagonal values = 0.97

MG_3.2<-c3.2.2$Population
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC2:RC3, mean))
pc_3.2_sum

## # A tibble: 3 x 4
##   MG_3.2     RC2    RC1     RC3
## * <fct>    <dbl>  <dbl>   <dbl>
## 1 BERG     0.617 -0.669 -0.0918
## 2 NORSE   -0.492  0.388  0.269 
## 3 ZALAVAR -0.134  0.309 -0.200

sort(pcs3.2$loadings[,1], decreasing=TRUE)

##        XCB        AUB        XFB        ZYB        WCB        JUB        ASB 
## 0.86490100 0.82999142 0.79701842 0.76561854 0.74517035 0.70262111 0.64007475 
##        MAB         GS        NLB        OBB        MDB        BBH        NLH 
## 0.54270025 0.52885844 0.37306130 0.36892056 0.29752109 0.24530769 0.22693889 
##        MDH        NPH        BNL        OBH        BPL        GOL        NOL 
## 0.22660854 0.21771172 0.12224100 0.10848727 0.09883848 0.09169986 0.02951558

sort(pcs3.2$loadings[,2], decreasing=TRUE)

##         BNL         GOL         BPL         NOL          GS         BBH 
##  0.86098754  0.80451272  0.79888348  0.79378560  0.62877825  0.52133201 
##         MAB         MDB         JUB         ZYB         MDH         NPH 
##  0.50680953  0.47727027  0.46628550  0.41504722  0.36792154  0.35617599 
##         OBB         NLB         ASB         NLH         AUB         WCB 
##  0.35506169  0.34496337  0.29012646  0.28018645  0.23234129  0.12566876 
##         XCB         XFB         OBH 
##  0.01509011 -0.05844498 -0.09073198

sort(pcs3.2$loadings[,3], decreasing=TRUE)

##         OBH         NLH         NPH         NOL         OBB         GOL 
##  0.81284267  0.76665315  0.74061046  0.40400980  0.39563577  0.38453217 
##         MDH          GS         ZYB         AUB         ASB         XCB 
##  0.37397263  0.34030031  0.25880915  0.24185482  0.21357967  0.18566600 
##         JUB         BBH         XFB         BNL         MDB         WCB 
##  0.17659953  0.17341415  0.15827605  0.14401426  0.10176963  0.03756273 
##         MAB         BPL         NLB 
##  0.03072741 -0.14721713 -0.23825192

ggplot(MG_3.2_pcs, aes(x=RC2, y=RC1, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

ggplot(MG_3.2_pcs, aes(x=RC1, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

ggplot(MG_3.2_pcs, aes(x=RC2, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

MG_3.2_sex<-c3.2.2$Sex
MG_3.2_pcs_sex<-cbind.data.frame(MG_3.2_sex, pcs3.2$scores)
pc_3.2_sum_sex<-MG_3.2_pcs_sex %>% group_by(MG_3.2_sex) %>% dplyr::summarize(across(RC2:RC3, mean))
pc_3.2_sum_sex

## # A tibble: 2 x 4
##   MG_3.2_sex    RC2    RC1    RC3
## * <fct>       <dbl>  <dbl>  <dbl>
## 1 F          -0.513 -0.507 -0.305
## 2 M           0.478  0.473  0.284

ggplot(MG_3.2_pcs_sex, aes(x=RC2, y=RC1, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)

ggplot(MG_3.2_pcs_sex, aes(x=RC1, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)

ggplot(MG_3.2_pcs_sex, aes(x=RC2, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)

morpho3.2<-c3.2.3 
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) 
m3.2_labs<-m3.2_sum$Population
pcs3.2<-principal(scale(c3.2.3[4:24]), rotate = 'varimax', nfactors=3) 
pcs3.2

## Principal Components Analysis
## Call: principal(r = scale(c3.2.3[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       RC1   RC3   RC2   h2    u2 com
## GOL  0.70  0.41  0.28 0.74 0.260 2.0
## NOL  0.69  0.36  0.30 0.70 0.303 1.9
## BNL  0.45  0.60  0.26 0.63 0.374 2.3
## BBH  0.71  0.30  0.11 0.60 0.401 1.4
## XCB  0.84  0.02  0.09 0.71 0.290 1.0
## XFB  0.81 -0.07  0.23 0.71 0.286 1.2
## ZYB  0.62  0.53  0.35 0.78 0.218 2.6
## AUB  0.64  0.42  0.21 0.63 0.367 2.0
## WCB  0.35  0.30  0.34 0.33 0.667 2.9
## ASB  0.62  0.25 -0.17 0.48 0.522 1.5
## BPL  0.16  0.77  0.05 0.62 0.384 1.1
## NPH  0.31  0.32  0.70 0.69 0.309 1.8
## NLH  0.17  0.33  0.76 0.71 0.293 1.5
## JUB  0.52  0.55  0.40 0.74 0.257 2.8
## NLB -0.03  0.56  0.10 0.32 0.675 1.1
## MAB  0.26  0.62  0.13 0.47 0.527 1.4
## MDH  0.26  0.57  0.22 0.44 0.565 1.7
## MDB  0.26  0.67  0.01 0.52 0.482 1.3
## OBH -0.04 -0.22  0.77 0.65 0.354 1.2
## OBB  0.24  0.34  0.59 0.53 0.474 2.0
## GS   0.89  0.30  0.20 0.92 0.079 1.3
## 
##                        RC1  RC3  RC2
## SS loadings           5.82 4.18 2.91
## Proportion Var        0.28 0.20 0.14
## Cumulative Var        0.28 0.48 0.61
## Proportion Explained  0.45 0.32 0.23
## Cumulative Proportion 0.45 0.77 1.00
## 
## Mean item complexity =  1.7
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.08 
##  with the empirical chi square  276.92  with prob <  1.4e-09 
## 
## Fit based upon off diagonal values = 0.97

MG_3.2<-c3.2.3$Sex
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC1:RC2, mean))
pc_3.2_sum

## # A tibble: 2 x 4
##   MG_3.2    RC1    RC3    RC2
## * <fct>   <dbl>  <dbl>  <dbl>
## 1 F      -0.476 -0.646 -0.280
## 2 M       0.435  0.591  0.256

sort(pcs3.2$loadings[,1], decreasing=TRUE)

##          GS         XCB         XFB         BBH         GOL         NOL 
##  0.89015553  0.83741294  0.80940160  0.70552502  0.70293105  0.68970460 
##         AUB         ASB         ZYB         JUB         BNL         WCB 
##  0.64315093  0.62065403  0.62019642  0.52482286  0.45344446  0.35295531 
##         NPH         MAB         MDH         MDB         OBB         NLH 
##  0.31254082  0.26352173  0.26049508  0.25873643  0.24169844  0.16989904 
##         BPL         NLB         OBH 
##  0.15593991 -0.03002931 -0.04459484

sort(pcs3.2$loadings[,2], decreasing=TRUE)

##         BPL         MDB         MAB         BNL         MDH         NLB 
##  0.76765937  0.67154226  0.62229287  0.59598262  0.56671740  0.56019762 
##         JUB         ZYB         AUB         GOL         NOL         OBB 
##  0.55301409  0.52542818  0.41719877  0.40841653  0.35823219  0.34062999 
##         NLH         NPH          GS         WCB         BBH         ASB 
##  0.32586266  0.32481984  0.30083763  0.29916361  0.29683548  0.24986661 
##         XCB         XFB         OBH 
##  0.02082444 -0.06856437 -0.21715234

sort(pcs3.2$loadings[,3], decreasing=TRUE)

##          OBH          NLH          NPH          OBB          JUB          ZYB 
##  0.772450823  0.756143106  0.698144070  0.592833420  0.402246590  0.348642967 
##          WCB          NOL          GOL          BNL          XFB          MDH 
##  0.344653735  0.304674016  0.280730359  0.256089813  0.232435434  0.215422836 
##          AUB           GS          MAB          BBH          NLB          XCB 
##  0.213243694  0.196237470  0.128044155  0.113047980  0.099967528  0.090444832 
##          BPL          MDB          ASB 
##  0.051725528  0.009595866 -0.174393720

ggplot(MG_3.2_pcs, aes(x=RC1, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5) ##only sex because only Egypt is here.

ggplot(MG_3.2_pcs, aes(x=RC3, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

ggplot(MG_3.2_pcs, aes(x=RC1, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

morpho3.2<-c3.2.4 
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) 
m3.2_labs<-m3.2_sum$Population
pcs3.2<-principal(scale(c3.2.4[4:24]), rotate = 'varimax', nfactors=3) 
pcs3.2

## Principal Components Analysis
## Call: principal(r = scale(c3.2.4[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      RC1   RC2   RC3   h2    u2 com
## GOL 0.58  0.61  0.11 0.72 0.278 2.1
## NOL 0.55  0.60  0.10 0.67 0.329 2.0
## BNL 0.35  0.58  0.48 0.68 0.321 2.6
## BBH 0.62  0.22  0.40 0.60 0.400 2.0
## XCB 0.89  0.07  0.13 0.82 0.185 1.1
## XFB 0.83  0.05  0.23 0.75 0.254 1.2
## ZYB 0.67  0.40  0.53 0.90 0.105 2.6
## AUB 0.80  0.25  0.41 0.87 0.128 1.7
## WCB 0.59  0.48  0.19 0.61 0.389 2.1
## ASB 0.66  0.31  0.30 0.62 0.378 1.8
## BPL 0.03  0.77  0.20 0.63 0.366 1.1
## NPH 0.34  0.32  0.70 0.71 0.285 1.9
## NLH 0.43  0.32  0.71 0.79 0.206 2.1
## JUB 0.59  0.50  0.48 0.83 0.170 2.9
## NLB 0.20  0.63 -0.05 0.44 0.560 1.2
## MAB 0.29  0.58  0.42 0.60 0.403 2.3
## MDH 0.07  0.61  0.46 0.59 0.414 1.9
## MDB 0.23  0.59  0.26 0.47 0.529 1.7
## OBH 0.16 -0.05  0.75 0.60 0.403 1.1
## OBB 0.26  0.35  0.66 0.63 0.373 1.9
## GS  0.86  0.35  0.27 0.93 0.072 1.5
## 
##                        RC1  RC2  RC3
## SS loadings           6.17 4.41 3.87
## Proportion Var        0.29 0.21 0.18
## Cumulative Var        0.29 0.50 0.69
## Proportion Explained  0.43 0.31 0.27
## Cumulative Proportion 0.43 0.73 1.00
## 
## Mean item complexity =  1.9
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.06 
##  with the empirical chi square  167.64  with prob <  0.15 
## 
## Fit based upon off diagonal values = 0.99

MG_3.2<-c3.2.4$Sex
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_3.2_sum

## # A tibble: 2 x 4
##   MG_3.2    RC1    RC2    RC3
## * <fct>   <dbl>  <dbl>  <dbl>
## 1 F      -0.463 -0.492 -0.365
## 2 M       0.463  0.492  0.365

sort(pcs3.2$loadings[,1], decreasing=TRUE)

##        XCB         GS        XFB        AUB        ZYB        ASB        BBH 
## 0.89105449 0.85742462 0.82991697 0.79705759 0.67393470 0.66330296 0.62453357 
##        WCB        JUB        GOL        NOL        NLH        BNL        NPH 
## 0.59212804 0.58835121 0.58246190 0.54846547 0.43034989 0.34761977 0.34175726 
##        MAB        OBB        MDB        NLB        OBH        MDH        BPL 
## 0.28647961 0.25868761 0.23175159 0.19826026 0.16299742 0.06842462 0.02673032

sort(pcs3.2$loadings[,2], decreasing=TRUE)

##         BPL         NLB         MDH         GOL         NOL         MDB 
##  0.77013201  0.63139805  0.60948339  0.60853319  0.60001462  0.59113274 
##         MAB         BNL         JUB         WCB         ZYB          GS 
##  0.58256368  0.57524705  0.49823495  0.47551119  0.40251979  0.34991925 
##         OBB         NLH         NPH         ASB         AUB         BBH 
##  0.34665523  0.32154084  0.31963602  0.30534270  0.25491521  0.22008509 
##         XCB         XFB         OBH 
##  0.07182635  0.05144901 -0.05130351

sort(pcs3.2$loadings[,3], decreasing=TRUE)

##         OBH         NLH         NPH         OBB         ZYB         JUB 
##  0.75333256  0.71058630  0.70425226  0.66327498  0.52838641  0.48495636 
##         BNL         MDH         MAB         AUB         BBH         ASB 
##  0.47720820  0.45841930  0.41952613  0.41441391  0.40147485  0.29834064 
##          GS         MDB         XFB         BPL         WCB         XCB 
##  0.26516857  0.26000628  0.23306299  0.20101232  0.18555430  0.12683885 
##         GOL         NOL         NLB 
##  0.10994892  0.10069360 -0.04778089

ggplot(MG_3.2_pcs, aes(x=RC1, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

ggplot(MG_3.2_pcs, aes(x=RC2, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

ggplot(MG_3.2_pcs, aes(x=RC1, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

morpho3.3<-c3.3 
m3.3_sum<-morpho3.3 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) 
m3.3_dsq<-dist(scale(m3.3_sum[2:22]))
m3.3_hc<-hclust(m3.3_dsq, method='ward.D2') 
NbClust(data=as.numeric(scale(m3.3_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15)

## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##

## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]

## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]

## ******************************************************************* 
## * Among all indices:                                                
## * 1 proposed 3 as the best number of clusters 
## * 4 proposed 4 as the best number of clusters 
## * 1 proposed 15 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  4 
##  
##  
## *******************************************************************

## $All.index
##         KL       CH Hartigan     CCC    Scott  Marriot TrCovW  TraceW Friedman
## 2   0.9380  10.0514 199.2684 -1.9551 154.5974 123.1362     NA 30.7841   2.4109
## 3   0.3937 111.8270 243.2780 -2.6629 244.9809 135.2185     NA 15.0243   5.9887
## 4  10.9996 300.6327  57.6027 -1.8599 329.9802 122.4446     NA  7.6528  12.7205
## 5   0.6365 343.4950  38.7303 -3.1133 369.4292 139.8903     NA  5.5956  17.7647
## 6   1.1738 367.4382  32.5137 -3.5203 411.4521 144.3132     NA  4.0087  25.1930
## 7   2.2830 391.2929  41.0732 -3.4465 454.0373 140.0935     NA  2.8591  35.7255
## 8   0.1313 453.1831  75.3643 -2.8321 500.0730 126.9776     NA  1.9840  51.9227
## 9   3.6415 653.6286  16.6279 -2.4626 538.4716 118.4896     NA  1.4628  70.7784
## 10  0.8433 659.7349  38.0383 -2.1766 572.5170 111.6473     NA  1.1165  93.0462
## 11  1.0540 785.4395  10.2768 -2.6025 592.8585 114.9532     NA  0.9500 109.5232
## 12  2.2142 772.0067  25.1089 -2.7893 614.8940 114.8541     NA  0.7976 130.6453
## 13  1.7912 858.0391  15.2087 -2.6863 639.7756 110.6393     NA  0.6547 159.3861
## 14  0.7135 891.8431  12.7757 -2.4149 665.8529 104.3268     NA  0.5323 196.2648
## 15  1.0479 915.2719  18.9546 -2.7253 681.4556 105.8140     NA  0.4703 222.2691
##       Rubin Cindex     DB Silhouette   Duda Pseudot2  Beale Ratkowsky    Ball
## 2    3.4109 0.2962 0.6312     0.5911 0.2384 204.4194 1.0483    0.1936 48.5635
## 3    6.9887 0.2547 0.5861     0.5586 0.2694 157.2620 0.8885    0.4637 12.4187
## 4   13.7205 0.2782 0.5832     0.5718 0.2644  77.9177 0.8956    0.4693  3.1278
## 5   18.7647 0.2772 0.5662     0.5548 0.2576  97.9649 0.9330    0.4287  1.6997
## 6   26.1930 0.2350 0.5636     0.5460 0.2727  66.6831 0.8549    0.3955  1.0730
## 7   36.7255 0.2901 0.5571     0.5305 0.2311 103.1399 1.0744    0.3687  0.7236
## 8   52.9227 0.2794 0.5503     0.5639 0.1107 144.5574 2.5361    0.3472  0.4707
## 9   71.7784 0.2412 0.5234     0.5801 0.2732  45.2190 0.8374    0.3297  0.2553
## 10  94.0462 0.2411 0.5356     0.5969 0.3062  36.2596 0.7110    0.3132  0.2012
## 11 110.5232 0.2206 0.5323     0.5865 0.2372  51.4640 1.0091    0.2993  0.1377
## 12 131.6453 0.1975 0.5352     0.5901 0.2522  20.7608 0.8650    0.2868  0.1159
## 13 160.3861 0.2281 0.5135     0.5933 0.2009  31.8147 1.1783    0.2758  0.0877
## 14 197.2648 0.3115 0.5015     0.5951 0.1475  46.2509 1.7130    0.2660  0.0718
## 15 223.2691 0.2986 0.4841     0.5880 0.2514  44.6653 0.9305    0.2571  0.0601
##    Ptbiserial   Frey McClain   Dunn Hubert SDindex Dindex   SDbw
## 2      0.6431 0.7624  0.3763 0.0377 0.0095  6.8675 0.4216 1.2302
## 3      0.6411 0.8089  0.5211 0.0446 0.0118  4.8489 0.2892 0.6765
## 4      0.5904 0.7886  0.6591 0.0715 0.0134  5.0024 0.2092 0.4126
## 5      0.5722 1.1297  0.6887 0.0801 0.0145  5.4752 0.1813 0.3848
## 6      0.5274 0.6833  0.7806 0.0801 0.0145  7.2766 0.1474 0.2113
## 7      0.5071 0.9042  0.7938 0.1143 0.0153  6.9966 0.1268 0.1687
## 8      0.4602 0.6219  0.8551 0.0899 0.0153  9.2436 0.1033 0.1251
## 9      0.4426 0.7075  0.8422 0.0899 0.0156  9.0203 0.0854 0.0700
## 10     0.4242 1.0354  0.8347 0.0957 0.0156 11.2233 0.0759 0.0586
## 11     0.4066 1.0360  0.8549 0.0957 0.0157 14.4855 0.0687 0.0480
## 12     0.3853 0.3438  0.8796 0.0957 0.0157 16.7589 0.0621 0.0524
## 13     0.3826 0.4846  0.8547 0.1166 0.0158 16.8058 0.0586 0.0314
## 14     0.3777 0.9736  0.8326 0.1700 0.0159 16.2857 0.0544 0.0210
## 15     0.3709 1.2523  0.8358 0.1700 0.0159 19.3347 0.0503 0.0246
## 
## $All.CriticalValues
##    CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2          0.1209           465.2205       0.3098
## 3          0.1091           473.6274       0.3498
## 4          0.0038          7394.6572       0.3521
## 5          0.0351           934.5356       0.3409
## 6         -0.0157         -1619.3999       0.3640
## 7          0.0205          1480.6451       0.3080
## 8         -0.0770          -251.6212       0.1287
## 9         -0.0885          -209.1134       0.3729
## 10        -0.1009          -174.6173       0.4116
## 11        -0.1009          -174.6173       0.3301
## 12        -0.2932           -30.8768       0.3833
## 13        -0.2595           -38.8310       0.3093
## 14        -0.2595           -38.8310       0.2269
## 15        -0.1143          -146.1979       0.3500
## 
## $Best.nc
##                      KL       CH Hartigan     CCC   Scott Marriot TrCovW
## Number_clusters  4.0000  15.0000   4.0000  4.0000  3.0000  4.0000   -Inf
## Value_Index     10.9996 915.2719 185.6753 -1.8599 90.3835 30.2196      3
##                  TraceW Friedman    Rubin  Cindex      DB Silhouette   Duda
## Number_clusters  8.3883  36.8788 -10.8745  0.1975  0.4841     0.5969 0.2384
## Value_Index     14.0000  14.0000  12.0000 15.0000 10.0000     2.0000 2.0000
##                 PseudoT2  Beale Ratkowsky    Ball PtBiserial Frey McClain Dunn
## Number_clusters 204.4194 1.0483    0.4693 36.1447     0.6431   NA  0.3763 0.17
## Value_Index       2.0000 4.0000    3.0000  2.0000     1.0000    2 14.0000 0.00
##                 Hubert SDindex Dindex  SDbw
## Number_clusters      0  4.8489      0 0.021
## Value_Index          3  0.0000     14 4.000
## 
## $Best.partition
##   [1] 1 2 3 4 4 3 1 2 4 4 4 1 3 2 2 3 3 1 2 3 4 2 1 1 4 1 1 2 4 3 2 3 4 4 2 1 1
##  [38] 2 3 4 1 4 3 2 1 4 3 4 3 2 3 2 3 1 1 2 1 2 4 4 1 2 4 2 1 4 4 4 2 4 3 1 3 2
##  [75] 2 3 4 1 1 2 4 4 3 4 2 3 1 4 2 1 3 4 1 2 3 3 3 3 2 4 3 3 4 2 3 4 1 4 3 3 2
## [112] 4 4 3 1 2 3 2 3 3 4 4 1 2 4 1

m3.3_labs<-m3.3_sum$Population
plot(m3.3_hc, labels=m3.3_labs, hang=-1, cex=0.8)
hc_morph_3.3<-rect.hclust(m3.3_hc, k=4, border=2:7)
pcs3.3<-principal(scale(c3.3[4:24]), rotate = 'varimax', nfactors=3) 
pcs3.3

## Principal Components Analysis
## Call: principal(r = scale(c3.3[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       RC1   RC2   RC3   h2    u2 com
## GOL  0.70  0.35  0.26 0.69 0.312 1.8
## NOL  0.62  0.38  0.37 0.66 0.340 2.4
## BNL  0.61  0.19  0.52 0.67 0.325 2.2
## BBH  0.30  0.58  0.31 0.52 0.475 2.1
## XCB  0.12  0.89 -0.05 0.81 0.194 1.0
## XFB  0.02  0.81  0.25 0.72 0.279 1.2
## ZYB  0.82  0.30  0.15 0.78 0.225 1.3
## AUB  0.72  0.43 -0.01 0.70 0.296 1.6
## WCB  0.54  0.46  0.05 0.51 0.487 2.0
## ASB  0.59  0.40 -0.05 0.51 0.486 1.8
## BPL  0.77 -0.07  0.26 0.66 0.342 1.2
## NPH  0.29  0.16  0.79 0.73 0.269 1.4
## NLH  0.32  0.24  0.73 0.68 0.317 1.6
## JUB  0.75  0.26  0.28 0.71 0.288 1.5
## NLB  0.25  0.32  0.12 0.18 0.822 2.2
## MAB  0.66  0.31  0.14 0.56 0.443 1.5
## MDH  0.49  0.14  0.31 0.35 0.646 1.9
## MDB  0.63  0.10 -0.01 0.41 0.591 1.1
## OBH -0.08  0.04  0.75 0.57 0.430 1.0
## OBB  0.71  0.07  0.19 0.54 0.457 1.2
## GS   0.48  0.79  0.23 0.91 0.093 1.8
## 
##                       RC1  RC2  RC3
## SS loadings           6.4 3.75 2.73
## Proportion Var        0.3 0.18 0.13
## Cumulative Var        0.3 0.48 0.61
## Proportion Explained  0.5 0.29 0.21
## Cumulative Proportion 0.5 0.79 1.00
## 
## Mean item complexity =  1.6
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.07 
##  with the empirical chi square  1075.31  with prob <  1.3e-139 
## 
## Fit based upon off diagonal values = 0.98

MG_3.3<-c3.3$Population
MG_3.3_pcs<-cbind.data.frame(MG_3.3, pcs3.3$scores)
pc_3.3_sum<-MG_3.3_pcs %>% group_by(MG_3.3) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_3.3_sum

## # A tibble: 6 x 4
##   MG_3.3      RC1    RC2     RC3
## * <fct>     <dbl>  <dbl>   <dbl>
## 1 AUSTRALI  0.639 -0.468  0.0526
## 2 DOGON    -0.875  0.359  0.145 
## 3 TASMANIA  0.378  0.533 -1.08  
## 4 TEITA    -0.368 -0.503  0.280 
## 5 TOLAI     0.458 -0.451 -0.0791
## 6 ZULU     -0.303  0.562  0.595

sort(pcs3.3$loadings[,1], decreasing=TRUE)

##         ZYB         BPL         JUB         AUB         OBB         GOL 
##  0.81513007  0.76624234  0.75283764  0.71812683  0.71014323  0.70431384 
##         MAB         MDB         NOL         BNL         ASB         WCB 
##  0.66369533  0.63143175  0.61659633  0.60830861  0.59254035  0.54434761 
##         MDH          GS         NLH         BBH         NPH         NLB 
##  0.48836250  0.48179066  0.31580502  0.29756664  0.29442714  0.25153577 
##         XCB         XFB         OBH 
##  0.12476891  0.02150161 -0.07591439

sort(pcs3.3$loadings[,2], decreasing=TRUE)

##         XCB         XFB          GS         BBH         WCB         AUB 
##  0.88785083  0.81168217  0.78945084  0.58286810  0.46271837  0.43345943 
##         ASB         NOL         GOL         NLB         MAB         ZYB 
##  0.40074436  0.37677795  0.34959246  0.31796682  0.31248415  0.29590267 
##         JUB         NLH         BNL         NPH         MDH         MDB 
##  0.26321059  0.23993404  0.18797192  0.15986320  0.13660555  0.10191014 
##         OBB         OBH         BPL 
##  0.06631841  0.03510919 -0.06662561

sort(pcs3.3$loadings[,3], decreasing=TRUE)

##         NPH         OBH         NLH         BNL         NOL         MDH 
##  0.78648765  0.75033383  0.72518998  0.51889025  0.37188746  0.31188407 
##         BBH         JUB         GOL         BPL         XFB          GS 
##  0.31080578  0.27518868  0.26460537  0.25704874  0.24855587  0.22699551 
##         OBB         ZYB         MAB         NLB         WCB         AUB 
##  0.18500319  0.15308169  0.13716781  0.11630065  0.04685713 -0.01147652 
##         MDB         XCB         ASB 
## -0.01200930 -0.04584929 -0.05079294

ggplot(MG_3.3_pcs, aes(x=RC1, y=RC2, color = MG_3.3)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.3_sum)
ggplot(MG_3.3_pcs, aes(x=RC2, y=RC3, color = MG_3.3)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.3_sum, size=5)
ggplot(MG_3.3_pcs, aes(x=RC1, y=RC3, color = MG_3.3)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.3_sum, size=5)
MG_3.3_sex<-c3.3$Sex
MG_3.3_pcs_sex<-cbind.data.frame(MG_3.3_sex, pcs3.3$scores)
pc_3.3_sum_sex<-MG_3.3_pcs_sex %>% group_by(MG_3.3_sex) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_3.3_sum_sex

## # A tibble: 2 x 4
##   MG_3.3_sex    RC1    RC2    RC3
## * <fct>       <dbl>  <dbl>  <dbl>
## 1 F          -0.555 -0.323 -0.261
## 2 M           0.565  0.328  0.266

ggplot(MG_3.3_pcs_sex, aes(x=RC1, y=RC2, color = MG_3.3_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.3_sum_sex, size=5)
ggplot(MG_3.3_pcs_sex, aes(x=RC2, y=RC3, color = MG_3.3_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.3_sum_sex, size=5)
ggplot(MG_3.3_pcs_sex, aes(x=RC1, y=RC3, color = MG_3.3_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.3_sum_sex, size=5)

morphogroup_1<-cutree(m3.3_hc, k=4) 
m3.3.1_sum<-m3.3_sum %>% mutate(cluster=morphogroup_1) 
m3.3.1_sum$cluster<-as.factor(m3.3.1_sum$cluster) 
m3.3.1<-m3.3.1_sum %>% filter(cluster==1) %>% droplevels() 
m3.3.2<-m3.3.1_sum %>% filter(cluster==2) %>% droplevels() 
m3.3.3<-m3.3.1_sum %>% filter(cluster==3) %>% droplevels() 
m3.3.4<-m3.3.1_sum %>% filter(cluster==4) %>% droplevels() 

c3.3.1d<-m3.3.1$Population 
c3.3.1<-c3.3 %>% filter(Population %in% c3.3.1d) %>% droplevels() 
c3.3.1$Morphogroup="3.3.1" 
c3.3.1$Morphogroup <- as.factor(c3.3.1$Morphogroup) 

c3.3.2d<-m3.3.2$Population 
c3.3.2<-c3.3 %>% filter(Population %in% c3.3.2d) %>% droplevels() 
c3.3.2$Morphogroup="3.3.2" 
c3.3.2$Morphogroup <- as.factor(c3.3.2$Morphogroup) 

c3.3.3d<-m3.3.3$Population  
c3.3.3<-c3.3 %>% filter(Population %in% c3.3.3d) %>% droplevels() 
c3.3.3$Morphogroup="3.3.3" 
c3.3.3$Morphogroup <- as.factor(c3.3.3$Morphogroup) 

c3.3.4d<-m3.3.4$Population 
c3.3.4<-c3.3 %>% filter(Population %in% c3.3.4d) %>% droplevels() 
c3.3.4$Morphogroup="3.3.4" 
c3.3.4$Morphogroup <- as.factor(c3.3.4$Morphogroup) 

hwlr_c3.3<-rbind.data.frame(c3.3.1, c3.3.2)
hwlr_c3.3a<-rbind.data.frame(c3.3.3, c3.3.4)
hwlr_c3.3<-rbind.data.frame(hwlr_c3.3, hwlr_c3.3a)

morpho3.2<-c3.3.1 ## Honestly I stored this as a previously used object because I was too lazy to change the code to match...in practice, this variable would be generic and dynamic to be used over and over again for different workflows...that way I just need to change the input data and then have all of the operations I want run on it. A very basic idea of a for loop.
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) 
m3.2_dsq<-dist(scale(m3.2_sum[2:22])) 
m3.2_labs<-m3.2_sum$Population
pcs3.2<-principal(scale(c3.3.1[4:24]), rotate = 'varimax', nfactors=3) #
pcs3.2 ## .

## Principal Components Analysis
## Call: principal(r = scale(c3.3.1[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       RC1   RC3   RC2   h2   u2 com
## GOL  0.53  0.36  0.58 0.75 0.25 2.7
## NOL  0.54  0.34  0.60 0.77 0.23 2.6
## BNL  0.34  0.64  0.46 0.74 0.26 2.4
## BBH  0.53  0.54 -0.10 0.59 0.41 2.1
## XCB  0.89  0.10  0.08 0.80 0.20 1.0
## XFB  0.75  0.23  0.00 0.61 0.39 1.2
## ZYB  0.58  0.61  0.26 0.78 0.22 2.4
## AUB  0.71  0.42  0.19 0.72 0.28 1.8
## WCB  0.58  0.34  0.21 0.50 0.50 1.9
## ASB  0.74  0.05  0.29 0.64 0.36 1.3
## BPL  0.13  0.78  0.22 0.67 0.33 1.2
## NPH  0.13  0.58  0.39 0.51 0.49 1.9
## NLH  0.22  0.41  0.63 0.61 0.39 2.0
## JUB  0.52  0.61  0.32 0.74 0.26 2.5
## NLB  0.10  0.64  0.12 0.44 0.56 1.1
## MAB  0.38  0.62  0.31 0.63 0.37 2.2
## MDH  0.42  0.56  0.00 0.49 0.51 1.9
## MDB  0.32  0.67 -0.26 0.62 0.38 1.8
## OBH -0.02 -0.07  0.75 0.57 0.43 1.0
## OBB  0.29  0.50  0.42 0.51 0.49 2.6
## GS   0.80  0.44  0.23 0.90 0.10 1.7
## 
##                        RC1  RC3  RC2
## SS loadings           5.57 5.17 2.83
## Proportion Var        0.27 0.25 0.13
## Cumulative Var        0.27 0.51 0.65
## Proportion Explained  0.41 0.38 0.21
## Cumulative Proportion 0.41 0.79 1.00
## 
## Mean item complexity =  1.9
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.07 
##  with the empirical chi square  390.39  with prob <  2.6e-23 
## 
## Fit based upon off diagonal values = 0.98

MG_3.2<-c3.3.1$Population
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC1:RC2, mean))
pc_3.2_sum

## # A tibble: 2 x 4
##   MG_3.2      RC1    RC3    RC2
## * <fct>     <dbl>  <dbl>  <dbl>
## 1 AUSTRALI  0.124 -0.305  0.532
## 2 TOLAI    -0.114  0.280 -0.488

sort(pcs3.2$loadings[,1], decreasing=TRUE)

##         XCB          GS         XFB         ASB         AUB         ZYB 
##  0.88537343  0.80499186  0.74752131  0.74121016  0.71082425  0.58149911 
##         WCB         NOL         BBH         GOL         JUB         MDH 
##  0.58130207  0.54188336  0.53226545  0.52974124  0.51921850  0.42316296 
##         MAB         BNL         MDB         OBB         NLH         NPH 
##  0.37772634  0.33834952  0.32265066  0.29430046  0.22322938  0.13189999 
##         BPL         NLB         OBH 
##  0.12552713  0.09531118 -0.02108935

sort(pcs3.2$loadings[,2], decreasing=TRUE)

##        BPL        MDB        BNL        NLB        MAB        ZYB        JUB 
##  0.7777084  0.6712337  0.6431414  0.6425547  0.6229960  0.6072098  0.6062875 
##        NPH        MDH        BBH        OBB         GS        AUB        NLH 
##  0.5845295  0.5576770  0.5406984  0.4957012  0.4429494  0.4226782  0.4124539 
##        GOL        WCB        NOL        XFB        XCB        ASB        OBH 
##  0.3593864  0.3403817  0.3351251  0.2346796  0.1026047  0.0475871 -0.0740300

sort(pcs3.2$loadings[,3], decreasing=TRUE)

##          OBH          NLH          NOL          GOL          BNL          OBB 
##  0.748762322  0.625218271  0.599284365  0.583193965  0.460143827  0.418292581 
##          NPH          JUB          MAB          ASB          ZYB           GS 
##  0.391453343  0.323846038  0.312153402  0.292121476  0.264038787  0.229103594 
##          BPL          WCB          AUB          NLB          XCB          XFB 
##  0.215817252  0.213207561  0.191196618  0.117409150  0.079168933 -0.001983384 
##          MDH          BBH          MDB 
## -0.004475984 -0.099970452 -0.257960601

ggplot(MG_3.2_pcs, aes(x=RC1, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

ggplot(MG_3.2_pcs, aes(x=RC3, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

ggplot(MG_3.2_pcs, aes(x=RC1, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

MG_3.2_sex<-c3.3.1$Sex
MG_3.2_pcs_sex<-cbind.data.frame(MG_3.2_sex, pcs3.2$scores)
pc_3.2_sum_sex<-MG_3.2_pcs_sex %>% group_by(MG_3.2_sex) %>% dplyr::summarize(across(RC2:RC1, mean))
pc_3.2_sum_sex

## # A tibble: 2 x 4
##   MG_3.2_sex    RC2    RC3    RC1
## * <fct>       <dbl>  <dbl>  <dbl>
## 1 F          -0.171 -0.625 -0.431
## 2 M           0.163  0.596  0.411

ggplot(MG_3.2_pcs_sex, aes(x=RC1, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)

ggplot(MG_3.2_pcs_sex, aes(x=RC3, y=RC2, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)

ggplot(MG_3.2_pcs_sex, aes(x=RC1, y=RC2, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)

morpho3.2<-c3.3.2 ## same lazy use as above. I'm not sorry. This is a lot of code. 
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) #
m3.2_dsq<-dist(scale(m3.2_sum[2:22])) 
m3.2_labs<-m3.2_sum$Population
pcs3.2<-principal(scale(c3.3.2[4:24]), rotate = 'varimax', nfactors=3) 
pcs3.2

## Principal Components Analysis
## Call: principal(r = scale(c3.3.2[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       RC1   RC3   RC2   h2    u2 com
## GOL  0.83  0.21  0.25 0.80 0.201 1.3
## NOL  0.85  0.18  0.23 0.81 0.186 1.2
## BNL  0.77  0.40  0.02 0.76 0.244 1.5
## BBH  0.24  0.16  0.67 0.54 0.463 1.4
## XCB -0.07  0.26  0.86 0.82 0.182 1.2
## XFB  0.17  0.23  0.76 0.65 0.349 1.3
## ZYB  0.56  0.58  0.27 0.72 0.278 2.4
## AUB  0.58  0.48  0.27 0.64 0.363 2.4
## WCB  0.10  0.63  0.33 0.52 0.484 1.6
## ASB  0.42  0.32  0.37 0.41 0.585 2.8
## BPL  0.53  0.57 -0.06 0.61 0.393 2.0
## NPH  0.62  0.23  0.26 0.50 0.498 1.7
## NLH  0.68  0.17  0.13 0.51 0.492 1.2
## JUB  0.52  0.65  0.12 0.71 0.293 2.0
## NLB  0.04  0.49  0.17 0.27 0.732 1.2
## MAB  0.23  0.58  0.40 0.55 0.446 2.1
## MDH  0.19  0.61  0.23 0.46 0.540 1.5
## MDB  0.23  0.63  0.09 0.45 0.546 1.3
## OBH  0.32 -0.01  0.37 0.24 0.763 2.0
## OBB  0.26  0.65  0.07 0.49 0.507 1.4
## GS   0.42  0.28  0.81 0.91 0.092 1.8
## 
##                        RC1  RC3  RC2
## SS loadings           4.83 4.11 3.41
## Proportion Var        0.23 0.20 0.16
## Cumulative Var        0.23 0.43 0.59
## Proportion Explained  0.39 0.33 0.28
## Cumulative Proportion 0.39 0.72 1.00
## 
## Mean item complexity =  1.7
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.08 
##  with the empirical chi square  472.97  with prob <  4e-35 
## 
## Fit based upon off diagonal values = 0.96

MG_3.2<-c3.3.2$Population
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC1:RC2, mean))
pc_3.2_sum

## # A tibble: 2 x 4
##   MG_3.2    RC1    RC3    RC2
## * <fct>   <dbl>  <dbl>  <dbl>
## 1 DOGON  -0.448  0.113  0.473
## 2 TEITA   0.535 -0.135 -0.564

sort(pcs3.2$loadings[,1], decreasing=TRUE)

##         NOL         GOL         BNL         NLH         NPH         AUB 
##  0.85251793  0.83343963  0.77247131  0.68046027  0.61729285  0.57549759 
##         ZYB         BPL         JUB          GS         ASB         OBH 
##  0.56378647  0.53068322  0.51585979  0.42367929  0.42261719  0.31708953 
##         OBB         BBH         MAB         MDB         MDH         XFB 
##  0.26411668  0.23873140  0.23070052  0.22939209  0.18996357  0.16536969 
##         WCB         NLB         XCB 
##  0.10489125  0.04164548 -0.07210014

sort(pcs3.2$loadings[,2], decreasing=TRUE)

##         JUB         OBB         WCB         MDB         MDH         MAB 
##  0.65278241  0.64633604  0.63064543  0.62724828  0.60874956  0.58495638 
##         ZYB         BPL         NLB         AUB         BNL         ASB 
##  0.57656055  0.56712171  0.48802103  0.48456408  0.39905990  0.31686491 
##          GS         XCB         NPH         XFB         GOL         NOL 
##  0.27973055  0.25805211  0.22667136  0.22659246  0.20632959  0.18236802 
##         NLH         BBH         OBH 
##  0.16766249  0.16447259 -0.01319013

sort(pcs3.2$loadings[,3], decreasing=TRUE)

##         XCB          GS         XFB         BBH         MAB         OBH 
##  0.86395400  0.80629841  0.75618449  0.67298885  0.39762229  0.36877633 
##         ASB         WCB         ZYB         AUB         NPH         GOL 
##  0.36816315  0.32809084  0.26778916  0.26552458  0.26414976  0.24829185 
##         NOL         MDH         NLB         NLH         JUB         MDB 
##  0.23150418  0.23085336  0.16627165  0.12928741  0.12008408  0.08650472 
##         OBB         BNL         BPL 
##  0.07178029  0.02030907 -0.06233490

ggplot(MG_3.2_pcs, aes(x=RC1, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

ggplot(MG_3.2_pcs, aes(x=RC3, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

ggplot(MG_3.2_pcs, aes(x=RC1, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

MG_3.2_sex<-c3.3.2$Sex
MG_3.2_pcs_sex<-cbind.data.frame(MG_3.2_sex, pcs3.2$scores)
pc_3.2_sum_sex<-MG_3.2_pcs_sex %>% group_by(MG_3.2_sex) %>% dplyr::summarize(across(RC2:RC1, mean))
pc_3.2_sum_sex

## # A tibble: 2 x 4
##   MG_3.2_sex    RC2    RC3    RC1
## * <fct>       <dbl>  <dbl>  <dbl>
## 1 F          -0.298 -0.426 -0.369
## 2 M           0.380  0.543  0.470

ggplot(MG_3.2_pcs_sex, aes(x=RC1, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)

ggplot(MG_3.2_pcs_sex, aes(x=RC3, y=RC2, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)

ggplot(MG_3.2_pcs_sex, aes(x=RC1, y=RC2, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)

morpho3.2<-c3.3.3 ## see previous two sections.
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) 
m3.2_dsq<-dist(scale(m3.2_sum[2:22])) 
m3.2_labs<-m3.2_sum$Population
pcs3.2<-principal(scale(c3.3.3[4:24]), rotate = 'varimax', nfactors=3) 
pcs3.2

## Principal Components Analysis
## Call: principal(r = scale(c3.3.3[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       RC1   RC2   RC3   h2    u2 com
## GOL  0.79  0.34  0.20 0.77 0.225 1.5
## NOL  0.78  0.37  0.18 0.77 0.226 1.6
## BNL  0.43  0.66  0.15 0.65 0.353 1.8
## BBH  0.51  0.48  0.08 0.49 0.507 2.0
## XCB  0.90 -0.07  0.11 0.83 0.173 1.0
## XFB  0.69  0.10  0.33 0.60 0.398 1.5
## ZYB  0.48  0.49  0.62 0.86 0.138 2.8
## AUB  0.66  0.26  0.51 0.76 0.238 2.2
## WCB  0.49  0.11  0.68 0.71 0.289 1.9
## ASB  0.61  0.23  0.18 0.46 0.541 1.5
## BPL  0.26  0.55  0.40 0.53 0.475 2.3
## NPH  0.23  0.80  0.08 0.69 0.307 1.2
## NLH  0.40  0.63  0.15 0.58 0.416 1.8
## JUB  0.37  0.54  0.65 0.84 0.156 2.6
## NLB  0.04 -0.13  0.77 0.62 0.384 1.1
## MAB  0.33  0.28  0.61 0.56 0.438 2.0
## MDH  0.15  0.71  0.19 0.56 0.443 1.2
## MDB  0.32  0.50  0.19 0.39 0.607 2.0
## OBH -0.04  0.55 -0.08 0.32 0.685 1.1
## OBB  0.02  0.57  0.54 0.62 0.381 2.0
## GS   0.91  0.30  0.16 0.95 0.053 1.3
## 
##                        RC1  RC2  RC3
## SS loadings           5.72 4.50 3.35
## Proportion Var        0.27 0.21 0.16
## Cumulative Var        0.27 0.49 0.65
## Proportion Explained  0.42 0.33 0.25
## Cumulative Proportion 0.42 0.75 1.00
## 
## Mean item complexity =  1.7
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.07 
##  with the empirical chi square  180.82  with prob <  0.044 
## 
## Fit based upon off diagonal values = 0.98

MG_3.2<-c3.3.3$Sex
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_3.2_sum

## # A tibble: 2 x 4
##   MG_3.2    RC1    RC2    RC3
## * <fct>   <dbl>  <dbl>  <dbl>
## 1 F      -0.516 -0.400 -0.394
## 2 M       0.481  0.374  0.368

sort(pcs3.2$loadings[,1], decreasing=TRUE)

##          GS         XCB         GOL         NOL         XFB         AUB 
##  0.91110897  0.89988004  0.78636217  0.77793439  0.69489644  0.65560857 
##         ASB         BBH         WCB         ZYB         BNL         NLH 
##  0.61185957  0.50856490  0.48983860  0.47778892  0.43034036  0.40308876 
##         JUB         MAB         MDB         BPL         NPH         MDH 
##  0.36754451  0.32837809  0.32450106  0.25898349  0.23082695  0.15022414 
##         NLB         OBB         OBH 
##  0.04339999  0.01812516 -0.04193111

sort(pcs3.2$loadings[,2], decreasing=TRUE)

##         NPH         MDH         BNL         NLH         OBB         OBH 
##  0.79556839  0.70634179  0.66288402  0.63181056  0.56957271  0.55430441 
##         BPL         JUB         MDB         ZYB         BBH         NOL 
##  0.54772943  0.53539091  0.50071448  0.49401237  0.47620407  0.37092323 
##         GOL          GS         MAB         AUB         ASB         WCB 
##  0.33846288  0.30194780  0.27969481  0.26245679  0.23017386  0.10962450 
##         XFB         XCB         NLB 
##  0.09752573 -0.06740661 -0.13075338

sort(pcs3.2$loadings[,3], decreasing=TRUE)

##         NLB         WCB         JUB         ZYB         MAB         OBB 
##  0.77255008  0.67756789  0.64983043  0.62441811  0.61300226  0.54257828 
##         AUB         BPL         XFB         GOL         MDB         MDH 
##  0.51265458  0.39777357  0.33114280  0.20452917  0.19120374  0.18791326 
##         ASB         NOL          GS         BNL         NLH         XCB 
##  0.17840881  0.17579290  0.16080620  0.15105051  0.15050225  0.11382825 
##         BBH         NPH         OBH 
##  0.08458573  0.08051274 -0.07745519

ggplot(MG_3.2_pcs, aes(x=RC1, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

ggplot(MG_3.2_pcs, aes(x=RC2, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

ggplot(MG_3.2_pcs, aes(x=RC1, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

morpho3.2<-c3.3.4 ## see previous three sections.
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) 
m3.2_dsq<-dist(scale(m3.2_sum[2:22])) 
m3.2_labs<-m3.2_sum$Population
pcs3.2<-principal(scale(c3.3.4[4:24]), rotate = 'varimax', nfactors=3) 
pcs3.2

## Principal Components Analysis
## Call: principal(r = scale(c3.3.4[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       RC1   RC2   RC3   h2   u2 com
## GOL  0.71  0.38  0.25 0.70 0.30 1.8
## NOL  0.68  0.37  0.26 0.67 0.33 1.9
## BNL  0.73  0.06  0.39 0.69 0.31 1.5
## BBH  0.42  0.44  0.48 0.60 0.40 3.0
## XCB  0.11  0.84  0.15 0.75 0.25 1.1
## XFB -0.03  0.81  0.22 0.70 0.30 1.2
## ZYB  0.64  0.44  0.32 0.71 0.29 2.3
## AUB  0.53  0.57  0.17 0.63 0.37 2.2
## WCB  0.25  0.65  0.06 0.49 0.51 1.3
## ASB  0.47  0.30  0.08 0.32 0.68 1.7
## BPL  0.80 -0.25  0.20 0.73 0.27 1.3
## NPH  0.35  0.19  0.76 0.73 0.27 1.6
## NLH  0.34  0.22  0.72 0.68 0.32 1.6
## JUB  0.65  0.31  0.32 0.62 0.38 2.0
## NLB  0.43  0.04  0.20 0.23 0.77 1.4
## MAB  0.58  0.19  0.15 0.40 0.60 1.4
## MDH  0.57  0.36 -0.11 0.47 0.53 1.8
## MDB  0.60  0.17 -0.42 0.56 0.44 2.0
## OBH  0.01  0.14  0.73 0.56 0.44 1.1
## OBB  0.37  0.27  0.35 0.33 0.67 2.8
## GS   0.49  0.67  0.37 0.83 0.17 2.5
## 
##                        RC1  RC2  RC3
## SS loadings           5.56 3.85 3.01
## Proportion Var        0.26 0.18 0.14
## Cumulative Var        0.26 0.45 0.59
## Proportion Explained  0.45 0.31 0.24
## Cumulative Proportion 0.45 0.76 1.00
## 
## Mean item complexity =  1.8
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.08 
##  with the empirical chi square  303.74  with prob <  1.7e-12 
## 
## Fit based upon off diagonal values = 0.96

MG_3.2<-c3.3.4$Sex
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_3.2_sum

## # A tibble: 2 x 4
##   MG_3.2    RC1    RC2    RC3
## * <fct>   <dbl>  <dbl>  <dbl>
## 1 F      -0.527 -0.270 -0.292
## 2 M       0.441  0.226  0.245

sort(pcs3.2$loadings[,1], decreasing=TRUE)

##         BPL         BNL         GOL         NOL         JUB         ZYB 
##  0.79646899  0.73374325  0.70729128  0.68496500  0.64749243  0.63834563 
##         MDB         MAB         MDH         AUB          GS         ASB 
##  0.60000715  0.58305693  0.57363702  0.53371228  0.49082093  0.47344267 
##         NLB         BBH         OBB         NPH         NLH         WCB 
##  0.43143328  0.41879381  0.37345073  0.35315764  0.33624222  0.25339672 
##         XCB         OBH         XFB 
##  0.10995538  0.01379945 -0.02960592

sort(pcs3.2$loadings[,2], decreasing=TRUE)

##         XCB         XFB          GS         WCB         AUB         ZYB 
##  0.84274883  0.80732354  0.67344851  0.64813358  0.56649504  0.44425813 
##         BBH         GOL         NOL         MDH         JUB         ASB 
##  0.43658634  0.37720396  0.36845208  0.36141912  0.31256017  0.29699366 
##         OBB         NLH         MAB         NPH         MDB         OBH 
##  0.26710595  0.21875168  0.19352930  0.19184382  0.16644650  0.14203915 
##         BNL         NLB         BPL 
##  0.06022599  0.04077725 -0.24664747

sort(pcs3.2$loadings[,3], decreasing=TRUE)

##         NPH         OBH         NLH         BBH         BNL          GS 
##  0.75657277  0.73496986  0.71902198  0.48013765  0.38631181  0.37251598 
##         OBB         ZYB         JUB         NOL         GOL         XFB 
##  0.35029154  0.32120944  0.32053041  0.26096012  0.24841074  0.22227832 
##         NLB         BPL         AUB         XCB         MAB         ASB 
##  0.20329387  0.19724088  0.16764502  0.15136001  0.14620609  0.07801167 
##         WCB         MDH         MDB 
##  0.05930469 -0.10672858 -0.41891925

ggplot(MG_3.2_pcs, aes(x=RC1, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

ggplot(MG_3.2_pcs, aes(x=RC2, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

ggplot(MG_3.2_pcs, aes(x=RC1, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)

rf_target<-"Morphogroup" ## creating a new generic object that just contains Morphogroup. I will use this to build a generic function to be used in all random forest models.
rf_data<-as.data.frame(hwlr) ## storing the hwlr dataset as a generic object rf_data.
nobs<-nrow(rf_data) ## nobs is shorthand for number of observations, which I am setting to the number of rows, nrow, from the data set.
rf_data[5:25]<-scale(rf_data[5:25]) ## scaling craniometric data because sexes are pooled here still.
rfsamp<-rftrain<-sample(nrow(rf_data), 0.7*nobs) ## creating a training set that is 70% of the data.
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data)), rftrain), 0.15*nobs) ## creating a validation set that is 15% of the data set
rftest<-setdiff(setdiff(seq_len(nrow(rf_data)), rftrain),rfvalidate) ##creating a testing set that is what is left over from the sample and validation sets...or 15%
cranio<-names((rf_data[5:25])) #storing an object that is just the name of each of the craniometric variables.
rf1<-randomForest::randomForest(Morphogroup ~ ., data=rf_data[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE) ## the random forest model with the Morphogroup being a function all of the craniometric variables available and filtering our dataset by the training set to include only the sample of 70% and to only use the variables defined in cranio.
pr<-predict(rf1, newdata=na.omit(rf_data)) ## using the random forest model that was created to predict everyone in the original dataset and store those predictions as an object
ct<-table(pr, rf_data$Morphogroup) ## create a table showing the predicted vs actual morphogroups
caret::confusionMatrix(ct, reference = rf_data$Morphogroup) ## confusion matrix diagnostics that give you much more information.

## Confusion Matrix and Statistics
## 
##    
## pr     1    2    3
##   1  679    0   41
##   2    0  142    7
##   3   87   18 1550
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9394          
##                  95% CI : (0.9294, 0.9484)
##     No Information Rate : 0.6331          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8774          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.8864  0.88750   0.9700
## Specificity            0.9767  0.99704   0.8866
## Pos Pred Value         0.9431  0.95302   0.9366
## Neg Pred Value         0.9518  0.99242   0.9448
## Prevalence             0.3035  0.06339   0.6331
## Detection Rate         0.2690  0.05626   0.6141
## Detection Prevalence   0.2853  0.05903   0.6557
## Balanced Accuracy      0.9316  0.94227   0.9283

rf_target<-"Sex"
rf_data<-as.data.frame(hwlr) ##not scaling data here because examining sex.
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data))
ct<-table(pr, rf_data$Sex)
caret::confusionMatrix(ct, reference = rf_data$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr     F    M
##   F 1095   56
##   M   61 1312
##                                           
##                Accuracy : 0.9536          
##                  95% CI : (0.9447, 0.9615)
##     No Information Rate : 0.542           
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9066          
##                                           
##  Mcnemar's Test P-Value : 0.7115          
##                                           
##             Sensitivity : 0.9472          
##             Specificity : 0.9591          
##          Pos Pred Value : 0.9513          
##          Neg Pred Value : 0.9556          
##              Prevalence : 0.4580          
##          Detection Rate : 0.4338          
##    Detection Prevalence : 0.4560          
##       Balanced Accuracy : 0.9531          
##                                           
##        'Positive' Class : F               
##

rf_target<-"Morphogroup"
rf_data_1<-as.data.frame(hwl_c1)
rf_data_1[4:24]<-scale(rf_data_1[4:24]) ## scaling data because sexes are pooled here still.
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Morphogroup ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Morphogroup)
caret::confusionMatrix(ct, reference = rf_data_1$Morphogroup)

## Confusion Matrix and Statistics
## 
##      
## pr    1.1 1.2 1.3 1.4 1.5
##   1.1 263  16   0   3  10
##   1.2   5 131   0   0   2
##   1.3   0   0 107   0   0
##   1.4   3   1   1 103   3
##   1.5   1   1   1   2 113
## 
## Overall Statistics
##                                           
##                Accuracy : 0.936           
##                  95% CI : (0.9163, 0.9523)
##     No Information Rate : 0.3551          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9162          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1.1 Class: 1.2 Class: 1.3 Class: 1.4 Class: 1.5
## Sensitivity              0.9669     0.8792     0.9817     0.9537     0.8828
## Specificity              0.9413     0.9887     1.0000     0.9878     0.9922
## Pos Pred Value           0.9007     0.9493     1.0000     0.9279     0.9576
## Neg Pred Value           0.9810     0.9713     0.9970     0.9924     0.9769
## Prevalence               0.3551     0.1945     0.1423     0.1410     0.1671
## Detection Rate           0.3433     0.1710     0.1397     0.1345     0.1475
## Detection Prevalence     0.3812     0.1802     0.1397     0.1449     0.1540
## Balanced Accuracy        0.9541     0.9339     0.9908     0.9708     0.9375

rf_target<-"Sex"
rf_data_1<-as.data.frame(hwl_c1)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr    F   M
##   F 302  25
##   M   9 430
##                                           
##                Accuracy : 0.9556          
##                  95% CI : (0.9385, 0.9691)
##     No Information Rate : 0.594           
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9087          
##                                           
##  Mcnemar's Test P-Value : 0.0101          
##                                           
##             Sensitivity : 0.9711          
##             Specificity : 0.9451          
##          Pos Pred Value : 0.9235          
##          Neg Pred Value : 0.9795          
##              Prevalence : 0.4060          
##          Detection Rate : 0.3943          
##    Detection Prevalence : 0.4269          
##       Balanced Accuracy : 0.9581          
##                                           
##        'Positive' Class : F               
##

rf_target<-"Population"  ##here, each cluster is its own Howell's group.
rf_data_1<-as.data.frame(c1.1)
rf_data_1[4:24]<-scale(rf_data_1[4:24]) ##data scaled.
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)

## Confusion Matrix and Statistics
## 
##           
## pr         AINU EASTER I MOKAPU
##   AINU       84        0      4
##   EASTER I    0       84      1
##   MOKAPU      2        2     95
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9669          
##                  95% CI : (0.9381, 0.9848)
##     No Information Rate : 0.3676          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9502          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: AINU Class: EASTER I Class: MOKAPU
## Sensitivity               0.9767          0.9767        0.9500
## Specificity               0.9785          0.9946        0.9767
## Pos Pred Value            0.9545          0.9882        0.9596
## Neg Pred Value            0.9891          0.9893        0.9711
## Prevalence                0.3162          0.3162        0.3676
## Detection Rate            0.3088          0.3088        0.3493
## Detection Prevalence      0.3235          0.3125        0.3640
## Balanced Accuracy         0.9776          0.9857        0.9634

rf_target<-"Sex"
rf_data_1<-as.data.frame(c1.1) ##assigning non-scaled data for sex estimation.
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr    F   M
##   F 121   6
##   M   3 142
##                                           
##                Accuracy : 0.9669          
##                  95% CI : (0.9381, 0.9848)
##     No Information Rate : 0.5441          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9334          
##                                           
##  Mcnemar's Test P-Value : 0.505           
##                                           
##             Sensitivity : 0.9758          
##             Specificity : 0.9595          
##          Pos Pred Value : 0.9528          
##          Neg Pred Value : 0.9793          
##              Prevalence : 0.4559          
##          Detection Rate : 0.4449          
##    Detection Prevalence : 0.4669          
##       Balanced Accuracy : 0.9676          
##                                           
##        'Positive' Class : F               
##

rf_target<-"Population"
rf_data_1<-as.data.frame(c1.2)
rf_data_1[4:24]<-scale(rf_data_1[4:24]) ##data scaled.
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)

## Confusion Matrix and Statistics
## 
##           
## pr         ANYANG GUAM PHILLIPI
##   ANYANG       36    3        3
##   GUAM          1   51        0
##   PHILLIPI      5    3       47
## 
## Overall Statistics
##                                           
##                Accuracy : 0.8993          
##                  95% CI : (0.8394, 0.9426)
##     No Information Rate : 0.3826          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.8482          
##                                           
##  Mcnemar's Test P-Value : 0.2123          
## 
## Statistics by Class:
## 
##                      Class: ANYANG Class: GUAM Class: PHILLIPI
## Sensitivity                 0.8571      0.8947          0.9400
## Specificity                 0.9439      0.9891          0.9192
## Pos Pred Value              0.8571      0.9808          0.8545
## Neg Pred Value              0.9439      0.9381          0.9681
## Prevalence                  0.2819      0.3826          0.3356
## Detection Rate              0.2416      0.3423          0.3154
## Detection Prevalence        0.2819      0.3490          0.3691
## Balanced Accuracy           0.9005      0.9419          0.9296

rf_target<-"Sex"
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr    F   M
##   F  22   0
##   M   5 122
##                                          
##                Accuracy : 0.9664         
##                  95% CI : (0.9234, 0.989)
##     No Information Rate : 0.8188         
##     P-Value [Acc > NIR] : 4.13e-08       
##                                          
##                   Kappa : 0.8781         
##                                          
##  Mcnemar's Test P-Value : 0.07364        
##                                          
##             Sensitivity : 0.8148         
##             Specificity : 1.0000         
##          Pos Pred Value : 1.0000         
##          Neg Pred Value : 0.9606         
##              Prevalence : 0.1812         
##          Detection Rate : 0.1477         
##    Detection Prevalence : 0.1477         
##       Balanced Accuracy : 0.9074         
##                                          
##        'Positive' Class : F              
##

rf_target<-"Sex"
rf_data_1<-as.data.frame(c1.3)
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf_target<-"Sex"
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr   F  M
##   F 52  4
##   M  2 51
##                                          
##                Accuracy : 0.945          
##                  95% CI : (0.884, 0.9795)
##     No Information Rate : 0.5046         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.8899         
##                                          
##  Mcnemar's Test P-Value : 0.6831         
##                                          
##             Sensitivity : 0.9630         
##             Specificity : 0.9273         
##          Pos Pred Value : 0.9286         
##          Neg Pred Value : 0.9623         
##              Prevalence : 0.4954         
##          Detection Rate : 0.4771         
##    Detection Prevalence : 0.5138         
##       Balanced Accuracy : 0.9451         
##                                          
##        'Positive' Class : F              
##

rf_target<-"Sex"
rf_data_1<-as.data.frame(c1.4)
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf_target<-"Sex"
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr   F  M
##   F 55  0
##   M  0 53
##                                      
##                Accuracy : 1          
##                  95% CI : (0.9664, 1)
##     No Information Rate : 0.5093     
##     P-Value [Acc > NIR] : < 2.2e-16  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
##                                      
##             Sensitivity : 1.0000     
##             Specificity : 1.0000     
##          Pos Pred Value : 1.0000     
##          Neg Pred Value : 1.0000     
##              Prevalence : 0.5093     
##          Detection Rate : 0.5093     
##    Detection Prevalence : 0.5093     
##       Balanced Accuracy : 1.0000     
##                                      
##        'Positive' Class : F          
##

rf_target<-"Population"
rf_data_1<-as.data.frame(c1.5)
nobs<-nrow(rf_data_1)
rf_data_1[4:24]<-scale(rf_data_1[4:24]) ##data scaled.
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)

## Confusion Matrix and Statistics
## 
##          
## pr        MORIORI N MAORI S MAORI
##   MORIORI     108       4       3
##   N MAORI       0       6       0
##   S MAORI       0       0       7
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9453          
##                  95% CI : (0.8906, 0.9777)
##     No Information Rate : 0.8438          
##     P-Value [Acc > NIR] : 0.0003583       
##                                           
##                   Kappa : 0.7663          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: MORIORI Class: N MAORI Class: S MAORI
## Sensitivity                  1.0000        0.60000        0.70000
## Specificity                  0.6500        1.00000        1.00000
## Pos Pred Value               0.9391        1.00000        1.00000
## Neg Pred Value               1.0000        0.96721        0.97521
## Prevalence                   0.8438        0.07812        0.07812
## Detection Rate               0.8438        0.04688        0.05469
## Detection Prevalence         0.8984        0.04688        0.05469
## Balanced Accuracy            0.8250        0.80000        0.85000

rf_target<-"Sex"
rf_data_1<-as.data.frame(c1.5)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr   F  M
##   F 49  1
##   M  2 76
##                                          
##                Accuracy : 0.9766         
##                  95% CI : (0.933, 0.9951)
##     No Information Rate : 0.6016         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.9509         
##                                          
##  Mcnemar's Test P-Value : 1              
##                                          
##             Sensitivity : 0.9608         
##             Specificity : 0.9870         
##          Pos Pred Value : 0.9800         
##          Neg Pred Value : 0.9744         
##              Prevalence : 0.3984         
##          Detection Rate : 0.3828         
##    Detection Prevalence : 0.3906         
##       Balanced Accuracy : 0.9739         
##                                          
##        'Positive' Class : F              
##

rf_target<-"Population"
rf_data<-as.data.frame(c2)
rf_data[5:25]<-scale(rf_data[5:25])
nobs<-nrow(rf_data)
rfsamp<-rftrain<-sample(nrow(rf_data), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data)), rftrain),rfvalidate)
cranio<-names((rf_data[5:25]))
rf1<-randomForest::randomForest(Population ~ ., data=rf_data[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data))
ct<-table(pr, rf_data$Population)
caret::confusionMatrix(ct, reference = rf_data$Population)

## Confusion Matrix and Statistics
## 
##          
## pr        ANDAMAN BUSHMAN
##   ANDAMAN      66       3
##   BUSHMAN       4      87
##                                           
##                Accuracy : 0.9562          
##                  95% CI : (0.9119, 0.9822)
##     No Information Rate : 0.5625          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.911           
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.9429          
##             Specificity : 0.9667          
##          Pos Pred Value : 0.9565          
##          Neg Pred Value : 0.9560          
##              Prevalence : 0.4375          
##          Detection Rate : 0.4125          
##    Detection Prevalence : 0.4313          
##       Balanced Accuracy : 0.9548          
##                                           
##        'Positive' Class : ANDAMAN         
##

rf_target<-"Sex"
rf_data<-as.data.frame(c2)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data))
ct<-table(pr, rf_data$Sex)
caret::confusionMatrix(ct, reference = rf_data$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr   F  M
##   F 82  5
##   M  2 71
##                                           
##                Accuracy : 0.9562          
##                  95% CI : (0.9119, 0.9822)
##     No Information Rate : 0.525           
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9121          
##                                           
##  Mcnemar's Test P-Value : 0.4497          
##                                           
##             Sensitivity : 0.9762          
##             Specificity : 0.9342          
##          Pos Pred Value : 0.9425          
##          Neg Pred Value : 0.9726          
##              Prevalence : 0.5250          
##          Detection Rate : 0.5125          
##    Detection Prevalence : 0.5437          
##       Balanced Accuracy : 0.9552          
##                                           
##        'Positive' Class : F               
##

rf_target<-"Morphogroup"
rf_data_1<-as.data.frame(hwl_c3)
nobs<-nrow(rf_data_1)
rf_data_1[4:24]<-scale(rf_data_1[4:24]) ##data scaled.
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Morphogroup ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Morphogroup)
caret::confusionMatrix(ct, reference = rf_data_1$Morphogroup)

## Confusion Matrix and Statistics
## 
##      
## pr    3.1 3.2 3.3
##   3.1 287  10  10
##   3.2  33 667  14
##   3.3  10  10 557
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9456          
##                  95% CI : (0.9333, 0.9562)
##     No Information Rate : 0.4299          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9145          
##                                           
##  Mcnemar's Test P-Value : 0.004704        
## 
## Statistics by Class:
## 
##                      Class: 3.1 Class: 3.2 Class: 3.3
## Sensitivity              0.8697     0.9709     0.9587
## Specificity              0.9842     0.9484     0.9803
## Pos Pred Value           0.9349     0.9342     0.9653
## Neg Pred Value           0.9667     0.9774     0.9765
## Prevalence               0.2065     0.4299     0.3636
## Detection Rate           0.1796     0.4174     0.3486
## Detection Prevalence     0.1921     0.4468     0.3611
## Balanced Accuracy        0.9270     0.9596     0.9695

rf_target<-"Sex"
rf_data_1<-as.data.frame(hwl_c3)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr    F   M
##   F 729  34
##   M  32 803
##                                           
##                Accuracy : 0.9587          
##                  95% CI : (0.9478, 0.9679)
##     No Information Rate : 0.5238          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9172          
##                                           
##  Mcnemar's Test P-Value : 0.902           
##                                           
##             Sensitivity : 0.9580          
##             Specificity : 0.9594          
##          Pos Pred Value : 0.9554          
##          Neg Pred Value : 0.9617          
##              Prevalence : 0.4762          
##          Detection Rate : 0.4562          
##    Detection Prevalence : 0.4775          
##       Balanced Accuracy : 0.9587          
##                                           
##        'Positive' Class : F               
##

rf_target<-"Morphogroup"
rf_data_1<-as.data.frame(hwlr_c3.1)
rf_data_1[4:24]<-scale(rf_data_1[4:24])
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Morphogroup ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Morphogroup)
caret::confusionMatrix(ct, reference = rf_data_1$Morphogroup)

## Confusion Matrix and Statistics
## 
##        
## pr      3.1.1 3.1.2 3.1.3 3.1.4
##   3.1.1    63     2     4     3
##   3.1.2     1    69     1     4
##   3.1.3     2     4    70     8
##   3.1.4     3     8    12    76
## 
## Overall Statistics
##                                         
##                Accuracy : 0.8424        
##                  95% CI : (0.7985, 0.88)
##     No Information Rate : 0.2758        
##     P-Value [Acc > NIR] : <2e-16        
##                                         
##                   Kappa : 0.7892        
##                                         
##  Mcnemar's Test P-Value : 0.5524        
## 
## Statistics by Class:
## 
##                      Class: 3.1.1 Class: 3.1.2 Class: 3.1.3 Class: 3.1.4
## Sensitivity                0.9130       0.8313       0.8046       0.8352
## Specificity                0.9655       0.9757       0.9424       0.9038
## Pos Pred Value             0.8750       0.9200       0.8333       0.7677
## Neg Pred Value             0.9767       0.9451       0.9309       0.9351
## Prevalence                 0.2091       0.2515       0.2636       0.2758
## Detection Rate             0.1909       0.2091       0.2121       0.2303
## Detection Prevalence       0.2182       0.2273       0.2545       0.3000
## Balanced Accuracy          0.9393       0.9035       0.8735       0.8695

rf_target<-"Population"  ##here each morphogroup is its own Howells population.
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)

## Confusion Matrix and Statistics
## 
##          
## pr        ARIKARA HAINAN N JAPAN S JAPAN
##   ARIKARA      63      2       4       3
##   HAINAN        1     69       1       4
##   N JAPAN       2      4      69       7
##   S JAPAN       3      8      13      77
## 
## Overall Statistics
##                                         
##                Accuracy : 0.8424        
##                  95% CI : (0.7985, 0.88)
##     No Information Rate : 0.2758        
##     P-Value [Acc > NIR] : <2e-16        
##                                         
##                   Kappa : 0.7891        
##                                         
##  Mcnemar's Test P-Value : 0.4307        
## 
## Statistics by Class:
## 
##                      Class: ARIKARA Class: HAINAN Class: N JAPAN Class: S JAPAN
## Sensitivity                  0.9130        0.8313         0.7931         0.8462
## Specificity                  0.9655        0.9757         0.9465         0.8996
## Pos Pred Value               0.8750        0.9200         0.8415         0.7624
## Neg Pred Value               0.9767        0.9451         0.9274         0.9389
## Prevalence                   0.2091        0.2515         0.2636         0.2758
## Detection Rate               0.1909        0.2091         0.2091         0.2333
## Detection Prevalence         0.2182        0.2273         0.2485         0.3061
## Balanced Accuracy            0.9393        0.9035         0.8698         0.8729

rf_target<-"Sex"
rf_data_1<-as.data.frame(hwlr_c3.1)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr    F   M
##   F 124   1
##   M  14 191
##                                           
##                Accuracy : 0.9545          
##                  95% CI : (0.9261, 0.9743)
##     No Information Rate : 0.5818          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9053          
##                                           
##  Mcnemar's Test P-Value : 0.001946        
##                                           
##             Sensitivity : 0.8986          
##             Specificity : 0.9948          
##          Pos Pred Value : 0.9920          
##          Neg Pred Value : 0.9317          
##              Prevalence : 0.4182          
##          Detection Rate : 0.3758          
##    Detection Prevalence : 0.3788          
##       Balanced Accuracy : 0.9467          
##                                           
##        'Positive' Class : F               
##

rf_target<-"Morphogroup"
rf_data_1<-as.data.frame(hwlr_c3.2)
rf_data_1[4:24]<-scale(rf_data_1[4:24])
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Morphogroup ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Morphogroup)
caret::confusionMatrix(ct, reference = rf_data_1$Morphogroup)

## Confusion Matrix and Statistics
## 
##        
## pr      3.2.1 3.2.2 3.2.3 3.2.4
##   3.2.1   138     3     1     5
##   3.2.2    12   311    14     5
##   3.2.3     2     3    95     1
##   3.2.4     5     0     1    91
## 
## Overall Statistics
##                                          
##                Accuracy : 0.9243         
##                  95% CI : (0.9019, 0.943)
##     No Information Rate : 0.4614         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.8881         
##                                          
##  Mcnemar's Test P-Value : 0.006616       
## 
## Statistics by Class:
## 
##                      Class: 3.2.1 Class: 3.2.2 Class: 3.2.3 Class: 3.2.4
## Sensitivity                0.8790       0.9811       0.8559       0.8922
## Specificity                0.9830       0.9162       0.9896       0.9897
## Pos Pred Value             0.9388       0.9094       0.9406       0.9381
## Neg Pred Value             0.9648       0.9826       0.9727       0.9814
## Prevalence                 0.2285       0.4614       0.1616       0.1485
## Detection Rate             0.2009       0.4527       0.1383       0.1325
## Detection Prevalence       0.2140       0.4978       0.1470       0.1412
## Balanced Accuracy          0.9310       0.9486       0.9227       0.9410

rf_target<-"Population"
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)

## Confusion Matrix and Statistics
## 
##           
## pr         ATAYAL PERU BERG NORSE ZALAVAR EGYPT SANTA CR
##   ATAYAL       30    0    0     0       0     0        0
##   PERU         10   94    1     0       3     0        4
##   BERG          0    3   97     3      11     0        2
##   NORSE         0    2    5    98       6     9        0
##   ZALAVAR       1    0    4     5      76     2        0
##   EGYPT         2    5    1     4       2    98        2
##   SANTA CR      4    6    1     0       0     2       94
## 
## Overall Statistics
##                                         
##                Accuracy : 0.8544        
##                  95% CI : (0.8258, 0.88)
##     No Information Rate : 0.1616        
##     P-Value [Acc > NIR] : < 2.2e-16     
##                                         
##                   Kappa : 0.8283        
##                                         
##  Mcnemar's Test P-Value : NA            
## 
## Statistics by Class:
## 
##                      Class: ATAYAL Class: PERU Class: BERG Class: NORSE
## Sensitivity                0.63830      0.8545      0.8899       0.8909
## Specificity                1.00000      0.9688      0.9671       0.9619
## Pos Pred Value             1.00000      0.8393      0.8362       0.8167
## Neg Pred Value             0.97412      0.9722      0.9790       0.9788
## Prevalence                 0.06841      0.1601      0.1587       0.1601
## Detection Rate             0.04367      0.1368      0.1412       0.1426
## Detection Prevalence       0.04367      0.1630      0.1689       0.1747
## Balanced Accuracy          0.81915      0.9117      0.9285       0.9264
##                      Class: ZALAVAR Class: EGYPT Class: SANTA CR
## Sensitivity                  0.7755       0.8829          0.9216
## Specificity                  0.9796       0.9722          0.9778
## Pos Pred Value               0.8636       0.8596          0.8785
## Neg Pred Value               0.9633       0.9773          0.9862
## Prevalence                   0.1426       0.1616          0.1485
## Detection Rate               0.1106       0.1426          0.1368
## Detection Prevalence         0.1281       0.1659          0.1557
## Balanced Accuracy            0.8776       0.9276          0.9497

rf_target<-"Sex"
rf_data_1<-as.data.frame(hwlr_c3.2)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr    F   M
##   F 313  12
##   M  17 345
##                                           
##                Accuracy : 0.9578          
##                  95% CI : (0.9399, 0.9716)
##     No Information Rate : 0.5197          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9154          
##                                           
##  Mcnemar's Test P-Value : 0.4576          
##                                           
##             Sensitivity : 0.9485          
##             Specificity : 0.9664          
##          Pos Pred Value : 0.9631          
##          Neg Pred Value : 0.9530          
##              Prevalence : 0.4803          
##          Detection Rate : 0.4556          
##    Detection Prevalence : 0.4731          
##       Balanced Accuracy : 0.9574          
##                                           
##        'Positive' Class : F               
##

rf_target<-"Population"
rf_data_1<-as.data.frame(c3.2.1)
rf_data_1[4:24]<-scale(rf_data_1[4:24])
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)

## Confusion Matrix and Statistics
## 
##         
## pr       ATAYAL PERU
##   ATAYAL     36    1
##   PERU       11  109
##                                           
##                Accuracy : 0.9236          
##                  95% CI : (0.8703, 0.9599)
##     No Information Rate : 0.7006          
##     P-Value [Acc > NIR] : 7.671e-12       
##                                           
##                   Kappa : 0.806           
##                                           
##  Mcnemar's Test P-Value : 0.009375        
##                                           
##             Sensitivity : 0.7660          
##             Specificity : 0.9909          
##          Pos Pred Value : 0.9730          
##          Neg Pred Value : 0.9083          
##              Prevalence : 0.2994          
##          Detection Rate : 0.2293          
##    Detection Prevalence : 0.2357          
##       Balanced Accuracy : 0.8784          
##                                           
##        'Positive' Class : ATAYAL          
##

rf_target<-"Sex"
rf_data_1<-as.data.frame(c3.2.1)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr   F  M
##   F 70  4
##   M  3 80
##                                           
##                Accuracy : 0.9554          
##                  95% CI : (0.9103, 0.9819)
##     No Information Rate : 0.535           
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9105          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.9589          
##             Specificity : 0.9524          
##          Pos Pred Value : 0.9459          
##          Neg Pred Value : 0.9639          
##              Prevalence : 0.4650          
##          Detection Rate : 0.4459          
##    Detection Prevalence : 0.4713          
##       Balanced Accuracy : 0.9556          
##                                           
##        'Positive' Class : F               
##

rf_target<-"Population"
rf_data_1<-as.data.frame(c3.2.2)
rf_data_1[4:24]<-scale(rf_data_1[4:24])
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)

## Confusion Matrix and Statistics
## 
##          
## pr        BERG NORSE ZALAVAR
##   BERG     104     1       4
##   NORSE      4   101       5
##   ZALAVAR    1     8      89
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9274          
##                  95% CI : (0.8931, 0.9535)
##     No Information Rate : 0.347           
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.891           
##                                           
##  Mcnemar's Test P-Value : 0.2316          
## 
## Statistics by Class:
## 
##                      Class: BERG Class: NORSE Class: ZALAVAR
## Sensitivity               0.9541       0.9182         0.9082
## Specificity               0.9760       0.9565         0.9589
## Pos Pred Value            0.9541       0.9182         0.9082
## Neg Pred Value            0.9760       0.9565         0.9589
## Prevalence                0.3438       0.3470         0.3091
## Detection Rate            0.3281       0.3186         0.2808
## Detection Prevalence      0.3438       0.3470         0.3091
## Balanced Accuracy         0.9650       0.9374         0.9335

rf_target<-"Sex"
rf_data_1<-as.data.frame(c3.2.2)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr    F   M
##   F 151   5
##   M   2 159
##                                          
##                Accuracy : 0.9779         
##                  95% CI : (0.955, 0.9911)
##     No Information Rate : 0.5174         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.9558         
##                                          
##  Mcnemar's Test P-Value : 0.4497         
##                                          
##             Sensitivity : 0.9869         
##             Specificity : 0.9695         
##          Pos Pred Value : 0.9679         
##          Neg Pred Value : 0.9876         
##              Prevalence : 0.4826         
##          Detection Rate : 0.4763         
##    Detection Prevalence : 0.4921         
##       Balanced Accuracy : 0.9782         
##                                          
##        'Positive' Class : F              
##

rf_target<-"Sex" ##only only one group, so only classifying sex.
rf_data_1<-as.data.frame(c3.2.3)
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr   F  M
##   F 50  2
##   M  3 56
##                                          
##                Accuracy : 0.955          
##                  95% CI : (0.898, 0.9852)
##     No Information Rate : 0.5225         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.9097         
##                                          
##  Mcnemar's Test P-Value : 1              
##                                          
##             Sensitivity : 0.9434         
##             Specificity : 0.9655         
##          Pos Pred Value : 0.9615         
##          Neg Pred Value : 0.9492         
##              Prevalence : 0.4775         
##          Detection Rate : 0.4505         
##    Detection Prevalence : 0.4685         
##       Balanced Accuracy : 0.9545         
##                                          
##        'Positive' Class : F              
##

rf_target<-"Sex" ##only only one group, so only classifying sex.
rf_data_1<-as.data.frame(c3.2.4)
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr   F  M
##   F 50  2
##   M  1 49
##                                           
##                Accuracy : 0.9706          
##                  95% CI : (0.9164, 0.9939)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9412          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.9804          
##             Specificity : 0.9608          
##          Pos Pred Value : 0.9615          
##          Neg Pred Value : 0.9800          
##              Prevalence : 0.5000          
##          Detection Rate : 0.4902          
##    Detection Prevalence : 0.5098          
##       Balanced Accuracy : 0.9706          
##                                           
##        'Positive' Class : F               
##

rf_target<-"Morphogroup"
rf_data_1<-as.data.frame(hwlr_c3.3)
rf_data_1[4:24]<-scale(rf_data_1[4:24])
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Morphogroup ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Morphogroup)
caret::confusionMatrix(ct, reference = rf_data_1$Morphogroup)

## Confusion Matrix and Statistics
## 
##        
## pr      3.3.1 3.3.2 3.3.3 3.3.4
##   3.3.1   204    12    12     4
##   3.3.2     1   165     2    16
##   3.3.3     4     1    73     0
##   3.3.4     2     4     0    81
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9002          
##                  95% CI : (0.8729, 0.9233)
##     No Information Rate : 0.3632          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8593          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 3.3.1 Class: 3.3.2 Class: 3.3.3 Class: 3.3.4
## Sensitivity                0.9668       0.9066       0.8391       0.8020
## Specificity                0.9243       0.9524       0.9899       0.9875
## Pos Pred Value             0.8793       0.8967       0.9359       0.9310
## Neg Pred Value             0.9799       0.9572       0.9722       0.9595
## Prevalence                 0.3632       0.3133       0.1497       0.1738
## Detection Rate             0.3511       0.2840       0.1256       0.1394
## Detection Prevalence       0.3993       0.3167       0.1343       0.1497
## Balanced Accuracy          0.9456       0.9295       0.9145       0.8947

rf_target<-"Population"
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)

## Confusion Matrix and Statistics
## 
##           
## pr         AUSTRALI TOLAI DOGON TEITA TASMANIA ZULU
##   AUSTRALI       91     4     0     3        6    2
##   TOLAI           5   102     2     3        3    0
##   DOGON           0     0    89     3        2    8
##   TEITA           1     0     3    70        1    1
##   TASMANIA        3     3     1     1       75    1
##   ZULU            1     1     4     3        0   89
## 
## Overall Statistics
##                                           
##                Accuracy : 0.8881          
##                  95% CI : (0.8596, 0.9126)
##     No Information Rate : 0.1893          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8654          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: AUSTRALI Class: TOLAI Class: DOGON Class: TEITA
## Sensitivity                   0.9010       0.9273       0.8990       0.8434
## Specificity                   0.9688       0.9724       0.9730       0.9880
## Pos Pred Value                0.8585       0.8870       0.8725       0.9211
## Neg Pred Value                0.9789       0.9828       0.9791       0.9743
## Prevalence                    0.1738       0.1893       0.1704       0.1429
## Detection Rate                0.1566       0.1756       0.1532       0.1205
## Detection Prevalence          0.1824       0.1979       0.1756       0.1308
## Balanced Accuracy             0.9349       0.9498       0.9360       0.9157
##                      Class: TASMANIA Class: ZULU
## Sensitivity                   0.8621      0.8812
## Specificity                   0.9818      0.9812
## Pos Pred Value                0.8929      0.9082
## Neg Pred Value                0.9759      0.9752
## Prevalence                    0.1497      0.1738
## Detection Rate                0.1291      0.1532
## Detection Prevalence          0.1446      0.1687
## Balanced Accuracy             0.9219      0.9312

rf_target<-"Sex"
rf_data_1<-as.data.frame(hwlr_c3.3)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr    F   M
##   F 282  10
##   M  11 278
##                                           
##                Accuracy : 0.9639          
##                  95% CI : (0.9453, 0.9775)
##     No Information Rate : 0.5043          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9277          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.9625          
##             Specificity : 0.9653          
##          Pos Pred Value : 0.9658          
##          Neg Pred Value : 0.9619          
##              Prevalence : 0.5043          
##          Detection Rate : 0.4854          
##    Detection Prevalence : 0.5026          
##       Balanced Accuracy : 0.9639          
##                                           
##        'Positive' Class : F               
##

rf_target<-"Population"
rf_data_1<-as.data.frame(c3.3.1)
rf_data_1[4:24]<-scale(rf_data_1[4:24])
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)

## Confusion Matrix and Statistics
## 
##           
## pr         AUSTRALI TOLAI
##   AUSTRALI       94     4
##   TOLAI           7   106
##                                           
##                Accuracy : 0.9479          
##                  95% CI : (0.9086, 0.9737)
##     No Information Rate : 0.5213          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.8954          
##                                           
##  Mcnemar's Test P-Value : 0.5465          
##                                           
##             Sensitivity : 0.9307          
##             Specificity : 0.9636          
##          Pos Pred Value : 0.9592          
##          Neg Pred Value : 0.9381          
##              Prevalence : 0.4787          
##          Detection Rate : 0.4455          
##    Detection Prevalence : 0.4645          
##       Balanced Accuracy : 0.9472          
##                                           
##        'Positive' Class : AUSTRALI        
##

rf_target<-"Sex"
rf_data_1<-as.data.frame(c3.3.1)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr    F   M
##   F  94   2
##   M   9 106
##                                           
##                Accuracy : 0.9479          
##                  95% CI : (0.9086, 0.9737)
##     No Information Rate : 0.5118          
##     P-Value [Acc > NIR] : < 2e-16         
##                                           
##                   Kappa : 0.8955          
##                                           
##  Mcnemar's Test P-Value : 0.07044         
##                                           
##             Sensitivity : 0.9126          
##             Specificity : 0.9815          
##          Pos Pred Value : 0.9792          
##          Neg Pred Value : 0.9217          
##              Prevalence : 0.4882          
##          Detection Rate : 0.4455          
##    Detection Prevalence : 0.4550          
##       Balanced Accuracy : 0.9471          
##                                           
##        'Positive' Class : F               
##

rf_target<-"Population"
rf_data_1<-as.data.frame(c3.3.2)
rf_data_1[4:24]<-scale(rf_data_1[4:24])
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)

## Confusion Matrix and Statistics
## 
##        
## pr      DOGON TEITA
##   DOGON    97     3
##   TEITA     2    80
##                                          
##                Accuracy : 0.9725         
##                  95% CI : (0.9371, 0.991)
##     No Information Rate : 0.544          
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.9446         
##                                          
##  Mcnemar's Test P-Value : 1              
##                                          
##             Sensitivity : 0.9798         
##             Specificity : 0.9639         
##          Pos Pred Value : 0.9700         
##          Neg Pred Value : 0.9756         
##              Prevalence : 0.5440         
##          Detection Rate : 0.5330         
##    Detection Prevalence : 0.5495         
##       Balanced Accuracy : 0.9718         
##                                          
##        'Positive' Class : DOGON          
##

rf_target<-"Sex"
rf_data_1<-as.data.frame(c3.3.2)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr   F  M
##   F 97  2
##   M  5 78
##                                           
##                Accuracy : 0.9615          
##                  95% CI : (0.9224, 0.9844)
##     No Information Rate : 0.5604          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9223          
##                                           
##  Mcnemar's Test P-Value : 0.4497          
##                                           
##             Sensitivity : 0.9510          
##             Specificity : 0.9750          
##          Pos Pred Value : 0.9798          
##          Neg Pred Value : 0.9398          
##              Prevalence : 0.5604          
##          Detection Rate : 0.5330          
##    Detection Prevalence : 0.5440          
##       Balanced Accuracy : 0.9630          
##                                           
##        'Positive' Class : F               
##

rf_target<-"Sex" ##only only one group, so only classifying sex.
rf_data_1<-as.data.frame(c3.3.3)
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr   F  M
##   F 40  1
##   M  2 44
##                                           
##                Accuracy : 0.9655          
##                  95% CI : (0.9025, 0.9928)
##     No Information Rate : 0.5172          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9309          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.9524          
##             Specificity : 0.9778          
##          Pos Pred Value : 0.9756          
##          Neg Pred Value : 0.9565          
##              Prevalence : 0.4828          
##          Detection Rate : 0.4598          
##    Detection Prevalence : 0.4713          
##       Balanced Accuracy : 0.9651          
##                                           
##        'Positive' Class : F               
##

rf_target<-"Sex" ##only only one group, so only classifying sex.
rf_data_1<-as.data.frame(c3.3.4)
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)

## Confusion Matrix and Statistics
## 
##    
## pr   F  M
##   F 42  6
##   M  4 49
##                                           
##                Accuracy : 0.901           
##                  95% CI : (0.8254, 0.9515)
##     No Information Rate : 0.5446          
##     P-Value [Acc > NIR] : 8.075e-15       
##                                           
##                   Kappa : 0.8011          
##                                           
##  Mcnemar's Test P-Value : 0.7518          
##                                           
##             Sensitivity : 0.9130          
##             Specificity : 0.8909          
##          Pos Pred Value : 0.8750          
##          Neg Pred Value : 0.9245          
##              Prevalence : 0.4554          
##          Detection Rate : 0.4158          
##    Detection Prevalence : 0.4752          
##       Balanced Accuracy : 0.9020          
##                                           
##        'Positive' Class : F               
##

New Theoretical Approach To Ancestry Estimation

Michael Kenyhercz

01/19/2021

Data Summary

Data Preparation

Calculating group means for each variable