A quick note about this markdown file - I have presented my analysis in a step-by-step fashion, so if you had your R environment properly configured, you could copy and paste everything that follows verbatim. Code annotations are shown whenever there is a double hashtag. Most of this framework uses the same types of analyses, just at different scales. I have only annotated the code for the first instance that particular analysis was done. I have mostly adhered to a naming schema for my objects that you can follow the steps, if you are not as familiar with R.
r <- getOption("repos")
r["CRAN"] <- "http://cran.cnr.berkeley.edu/"
options(repos = r)
install.packages("devtools")
## Warning: unable to access index for repository http://cran.cnr.berkeley.edu/src/contrib:
## cannot open URL 'http://cran.cnr.berkeley.edu/src/contrib/PACKAGES'
## Warning: package 'devtools' is not available (for R version 4.0.2)
## Warning: unable to access index for repository http://cran.cnr.berkeley.edu/bin/macosx/contrib/4.0:
## cannot open URL 'http://cran.cnr.berkeley.edu/bin/macosx/contrib/4.0/PACKAGES'
devtools::install_github("geanes/bioanth") ## this installs a package with the Howells dataset
## Skipping install of 'bioanth' from a github remote, the SHA1 (b179b396) has not changed since last install.
## Use `force = TRUE` to force installation
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(NbClust)
library(ggplot2)
library(treemap)
library(caret)
## Loading required package: lattice
library(ggRandomForests)
## Loading required package: randomForestSRC
##
## randomForestSRC 2.9.3
##
## Type rfsrc.news() to see new features, changes, and bug fixes.
##
##
## Attaching package: 'ggRandomForests'
## The following object is masked from 'package:randomForestSRC':
##
## partial.rfsrc
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
library(randomForestSRC)
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:randomForest':
##
## outlier
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(usedist)
library(bioanth)
library(ggdendro)
suppressWarnings(library(ggplot2))
knitr::opts_chunk$set(dev="jpeg")
set.seed(32) ## setting a random number generator for consistent results.
head(howell) ## examining the first six indicies in the howell data frame.
## # A tibble: 6 x 86
## ID Sex PopNum Population GOL NOL BNL BBH XCB XFB ZYB AUB
## <int> <fct> <fct> <fct> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 1 M 1 NORSE 189 185 100 135 143 120 133 119
## 2 2 M 1 NORSE 182 178 102 139 145 120 137 125
## 3 3 M 1 NORSE 191 187 102 123 140 114 134 125
## 4 4 M 1 NORSE 191 188 100 127 141 123 135 127
## 5 5 M 1 NORSE 178 177 97 128 138 117 129 121
## 6 6 M 1 NORSE 194 191 106 132 139 118 136 128
## # … with 74 more variables: WCB <int>, ASB <int>, BPL <int>, NPH <int>,
## # NLH <int>, JUB <int>, NLB <int>, MAB <int>, MDH <int>, MDB <int>,
## # OBH <int>, OBB <int>, DKB <int>, NDS <int>, WNB <dbl>, SIS <dbl>,
## # ZMB <int>, SSS <int>, FMB <int>, NAS <int>, EKB <int>, DKS <int>,
## # IML <int>, XML <int>, MLS <int>, WMH <int>, SOS <int>, GLS <int>,
## # STB <int>, FRC <int>, FRS <int>, FRF <int>, PAC <int>, PAS <int>,
## # PAF <int>, OCC <int>, OCS <int>, OCF <int>, FOL <int>, NAR <int>,
## # SSR <int>, PRR <int>, DKR <int>, ZOR <int>, FMR <int>, EKR <int>,
## # ZMR <int>, AVR <int>, BRR <int>, VRR <int>, LAR <int>, OSR <int>,
## # BAR <int>, NAA <int>, PRA <int>, BAA <int>, NBA <int>, BBA <int>,
## # BRA <int>, SSA <int>, NFA <int>, DKA <int>, NDA <int>, SIA <int>,
## # FRA <int>, PAA <int>, OCA <int>, RFA <int>, RPA <int>, ROA <int>,
## # BSA <int>, SBA <int>, SLA <int>, TBA <int>
dim(howell) ## checking the dimensions of the howell data frame
## [1] 2524 86
summary(howell)
## ID Sex PopNum Population GOL
## Min. : 1.0 F:1156 22 : 111 EGYPT : 111 Min. :151.0
## 1st Qu.: 634.8 M:1368 1 : 110 NORSE : 110 1st Qu.:173.0
## Median :1275.5 9 : 110 PERU : 110 Median :179.0
## Mean :1458.1 15 : 110 TOLAI : 110 Mean :179.2
## 3rd Qu.:2326.2 3 : 109 BERG : 109 3rd Qu.:185.0
## Max. :3190.0 26 : 109 BURIAT : 109 Max. :206.0
## (Other):1865 (Other):1865
## NOL BNL BBH XCB
## Min. :151.0 Min. : 83.00 Min. :107.0 Min. :116.0
## 1st Qu.:172.0 1st Qu.: 95.00 1st Qu.:127.0 1st Qu.:132.0
## Median :177.0 Median : 99.00 Median :131.0 Median :137.0
## Mean :176.9 Mean : 99.12 Mean :131.6 Mean :136.8
## 3rd Qu.:182.0 3rd Qu.:103.00 3rd Qu.:137.0 3rd Qu.:141.0
## Max. :200.0 Max. :120.00 Max. :155.0 Max. :167.0
##
## XFB ZYB AUB WCB
## Min. : 95.0 Min. :105.0 Min. : 98.0 Min. :57.00
## 1st Qu.:109.0 1st Qu.:125.0 1st Qu.:115.0 1st Qu.:68.00
## Median :113.0 Median :131.0 Median :120.0 Median :71.00
## Mean :113.4 Mean :130.8 Mean :120.6 Mean :70.99
## 3rd Qu.:117.0 3rd Qu.:136.0 3rd Qu.:125.0 3rd Qu.:74.00
## Max. :145.0 Max. :158.0 Max. :149.0 Max. :89.00
##
## ASB BPL NPH NLH
## Min. : 88.0 Min. : 80.00 Min. :48.00 Min. :36.00
## 1st Qu.:103.0 1st Qu.: 93.00 1st Qu.:62.75 1st Qu.:47.00
## Median :107.0 Median : 98.00 Median :66.00 Median :50.00
## Mean :106.9 Mean : 97.78 Mean :65.98 Mean :50.01
## 3rd Qu.:111.0 3rd Qu.:102.00 3rd Qu.:70.00 3rd Qu.:53.00
## Max. :128.0 Max. :123.00 Max. :82.00 Max. :65.00
##
## JUB NLB MAB MDH MDB
## Min. : 97.0 Min. :19.0 Min. :52.00 Min. :16.00 Min. : 6.00
## 1st Qu.:111.0 1st Qu.:25.0 1st Qu.:61.00 1st Qu.:25.00 1st Qu.:11.00
## Median :115.0 Median :26.0 Median :63.00 Median :27.00 Median :12.00
## Mean :115.2 Mean :26.3 Mean :63.55 Mean :27.35 Mean :12.31
## 3rd Qu.:120.0 3rd Qu.:28.0 3rd Qu.:66.00 3rd Qu.:30.00 3rd Qu.:14.00
## Max. :138.0 Max. :35.0 Max. :78.00 Max. :39.00 Max. :20.00
##
## OBH OBB DKB NDS
## Min. :26.00 Min. :33.00 Min. :13.00 Min. : 4.000
## 1st Qu.:32.00 1st Qu.:38.00 1st Qu.:20.00 1st Qu.: 8.000
## Median :34.00 Median :39.00 Median :21.00 Median :10.000
## Mean :33.67 Mean :39.49 Mean :21.38 Mean : 9.592
## 3rd Qu.:35.00 3rd Qu.:41.00 3rd Qu.:23.00 3rd Qu.:11.000
## Max. :41.00 Max. :46.00 Max. :32.00 Max. :17.000
##
## WNB SIS ZMB SSS
## Min. : 1.400 Min. :0.100 Min. : 79.00 Min. :11.00
## 1st Qu.: 6.800 1st Qu.:2.200 1st Qu.: 91.00 1st Qu.:21.00
## Median : 8.200 Median :3.000 Median : 95.00 Median :23.00
## Mean : 8.315 Mean :3.049 Mean : 94.98 Mean :22.91
## 3rd Qu.: 9.800 3rd Qu.:3.800 3rd Qu.: 99.00 3rd Qu.:25.00
## Max. :15.900 Max. :7.700 Max. :120.00 Max. :35.00
##
## FMB NAS EKB DKS
## Min. : 81.00 Min. : 8.00 Min. : 83.00 Min. : 2.000
## 1st Qu.: 94.00 1st Qu.:14.00 1st Qu.: 95.00 1st Qu.: 8.000
## Median : 97.00 Median :16.00 Median : 97.00 Median :10.000
## Mean : 96.97 Mean :16.26 Mean : 97.34 Mean : 9.786
## 3rd Qu.:100.00 3rd Qu.:18.00 3rd Qu.:100.00 3rd Qu.:11.000
## Max. :112.00 Max. :25.00 Max. :113.00 Max. :18.000
##
## IML XML MLS WMH
## Min. :20.00 Min. :38.00 Min. : 6.00 Min. :14.00
## 1st Qu.:33.00 1st Qu.:50.00 1st Qu.:10.00 1st Qu.:21.00
## Median :36.00 Median :53.00 Median :11.00 Median :23.00
## Mean :36.11 Mean :52.68 Mean :11.29 Mean :22.74
## 3rd Qu.:39.00 3rd Qu.:56.00 3rd Qu.:12.00 3rd Qu.:25.00
## Max. :49.00 Max. :69.00 Max. :18.00 Max. :35.00
##
## SOS GLS STB FRC
## Min. : 2.000 Min. :0.000 Min. : 81.0 Min. : 93.0
## 1st Qu.: 5.000 1st Qu.:2.000 1st Qu.:104.0 1st Qu.:106.0
## Median : 6.000 Median :3.000 Median :109.0 Median :109.0
## Mean : 5.794 Mean :3.023 Mean :109.3 Mean :109.5
## 3rd Qu.: 7.000 3rd Qu.:4.000 3rd Qu.:115.0 3rd Qu.:113.0
## Max. :11.000 Max. :9.000 Max. :140.0 Max. :128.0
##
## FRS FRF PAC PAS PAF
## Min. :16.00 Min. :36.00 Min. : 89.0 Min. :13.00 Min. :34.0
## 1st Qu.:23.00 1st Qu.:46.00 1st Qu.:106.0 1st Qu.:22.00 1st Qu.:54.0
## Median :26.00 Median :49.00 Median :111.0 Median :24.00 Median :57.0
## Mean :25.43 Mean :48.77 Mean :110.6 Mean :23.56 Mean :57.4
## 3rd Qu.:27.00 3rd Qu.:51.00 3rd Qu.:115.0 3rd Qu.:25.00 3rd Qu.:61.0
## Max. :35.00 Max. :66.00 Max. :135.0 Max. :36.00 Max. :77.0
##
## OCC OCS OCF FOL
## Min. : 79.00 Min. :15.00 Min. :29.00 Min. :27.00
## 1st Qu.: 92.00 1st Qu.:25.00 1st Qu.:43.00 1st Qu.:34.00
## Median : 95.00 Median :28.00 Median :46.00 Median :36.00
## Mean : 95.69 Mean :27.87 Mean :46.51 Mean :35.78
## 3rd Qu.:100.00 3rd Qu.:30.00 3rd Qu.:50.00 3rd Qu.:37.00
## Max. :118.00 Max. :41.00 Max. :70.00 Max. :50.00
##
## NAR SSR PRR DKR
## Min. : 75.0 Min. : 76.00 Min. : 81.0 Min. :67.00
## 1st Qu.: 89.0 1st Qu.: 90.00 1st Qu.: 96.0 1st Qu.:78.00
## Median : 92.0 Median : 94.00 Median :101.0 Median :81.00
## Mean : 92.5 Mean : 94.08 Mean :100.6 Mean :81.68
## 3rd Qu.: 96.0 3rd Qu.: 98.00 3rd Qu.:105.0 3rd Qu.:85.00
## Max. :110.0 Max. :114.00 Max. :124.0 Max. :98.00
##
## ZOR FMR EKR ZMR
## Min. :61.00 Min. :63.00 Min. :57.00 Min. :54.00
## 1st Qu.:76.00 1st Qu.:73.00 1st Qu.:68.00 1st Qu.:68.00
## Median :80.00 Median :77.00 Median :71.00 Median :72.00
## Mean :79.75 Mean :76.91 Mean :71.64 Mean :72.01
## 3rd Qu.:83.00 3rd Qu.:80.00 3rd Qu.:75.00 3rd Qu.:75.00
## Max. :97.00 Max. :96.00 Max. :88.00 Max. :89.00
##
## AVR BRR VRR LAR
## Min. :61.00 Min. : 0.00 Min. :101.0 Min. : 0.00
## 1st Qu.:76.00 1st Qu.: 0.00 1st Qu.:117.0 1st Qu.: 0.00
## Median :80.00 Median :115.00 Median :121.0 Median :103.00
## Mean :80.05 Mean : 86.78 Mean :120.8 Mean : 78.01
## 3rd Qu.:84.00 3rd Qu.:120.00 3rd Qu.:125.0 3rd Qu.:108.00
## Max. :98.00 Max. :138.00 Max. :140.0 Max. :133.00
##
## OSR BAR NAA PRA BAA
## Min. : 0.00 Min. : 0.0 Min. :57.00 Min. :60.00 Min. :28.00
## 1st Qu.: 0.00 1st Qu.: 0.0 1st Qu.:66.00 1st Qu.:69.00 1st Qu.:37.00
## Median :39.00 Median :14.0 Median :69.00 Median :71.00 Median :39.00
## Mean :29.98 Mean :11.2 Mean :69.42 Mean :71.48 Mean :39.12
## 3rd Qu.:42.00 3rd Qu.:16.0 3rd Qu.:72.00 3rd Qu.:74.00 3rd Qu.:41.00
## Max. :52.00 Max. :50.0 Max. :87.00 Max. :84.00 Max. :50.00
##
## NBA BBA BRA SSA NFA
## Min. :65.00 Min. :46.00 Min. :38.00 Min. :107.0 Min. :126
## 1st Qu.:76.00 1st Qu.:53.00 1st Qu.:46.00 1st Qu.:124.0 1st Qu.:140
## Median :78.00 Median :54.00 Median :47.00 Median :128.0 Median :143
## Mean :78.06 Mean :54.53 Mean :47.42 Mean :128.6 Mean :143
## 3rd Qu.:80.00 3rd Qu.:56.00 3rd Qu.:49.00 3rd Qu.:133.0 3rd Qu.:146
## Max. :90.00 Max. :64.00 Max. :55.00 Max. :153.0 Max. :161
##
## DKA NDA SIA FRA
## Min. :128.0 Min. : 63.00 Min. : 43.0 Min. :114.0
## 1st Qu.:147.0 1st Qu.: 88.00 1st Qu.: 96.0 1st Qu.:126.0
## Median :151.0 Median : 96.00 Median :108.0 Median :130.0
## Mean :151.2 Mean : 96.73 Mean :109.4 Mean :129.7
## 3rd Qu.:155.0 3rd Qu.:105.00 3rd Qu.:121.0 3rd Qu.:133.0
## Max. :174.0 Max. :138.00 Max. :178.0 Max. :145.0
##
## PAA OCA RFA RPA
## Min. :122.0 Min. :102.0 Min. : 0.00 Min. : 0.00
## 1st Qu.:131.0 1st Qu.:115.0 1st Qu.: 0.00 1st Qu.: 0.00
## Median :134.0 Median :119.0 Median :60.00 Median :57.00
## Mean :133.7 Mean :119.3 Mean :45.35 Mean :43.58
## 3rd Qu.:136.0 3rd Qu.:123.0 3rd Qu.:63.00 3rd Qu.:61.00
## Max. :147.0 Max. :142.0 Max. :71.00 Max. :71.00
##
## ROA BSA SBA SLA
## Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.0 1st Qu.: 0.00 1st Qu.: 0.00
## Median :63.00 Median :162.0 Median :101.00 Median : 87.00
## Mean :47.95 Mean :122.3 Mean : 75.96 Mean : 65.16
## 3rd Qu.:67.00 3rd Qu.:170.0 3rd Qu.:104.00 3rd Qu.: 90.00
## Max. :83.00 Max. :229.0 Max. :116.00 Max. :101.00
##
## TBA
## Min. : 0
## 1st Qu.: 0
## Median :149
## Mean :112
## 3rd Qu.:154
## Max. :177
##
Removing uncommon variables and adding a variable representative of gross size, ‘GS’
hwl<-howell[1:24] ## making a new data frame that only contains the first 24 columns of the object howell - this includes the more common craniometric and removes subtenses, arcs, angles, etc. Also, the choice of name hwl is just quicker to type now.
hwl$GS<-(hwl$GOL*hwl$XCB*hwl$BBH) ## This is a very basic and general estimation of overall cranial volume that will be used as a metric for gross size.
hwl_sum<-hwl %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) ##creating a new data frame that is using the Population label as a grouping variable, which is then being summarized for all variables in the data frame between GOL and GS, with only mean being specified.
as.data.frame(hwl_sum) ## printing the summary showing the mean measures for each variable by population label.
## Population GOL NOL BNL BBH XCB XFB ZYB
## 1 AINU 185.0116 182.6047 103.65116 135.8721 140.3721 117.5465 134.2093
## 2 ANDAMAN 164.4857 163.4857 91.64286 126.4571 133.3714 108.3286 120.7143
## 3 ANYANG 181.0000 178.8571 101.28571 140.2619 138.7857 114.8095 135.9524
## 4 ARIKARA 176.2029 174.7681 100.75362 130.7971 139.5652 115.0145 136.8841
## 5 ATAYAL 173.6170 171.2128 96.38298 131.9149 134.2766 111.6596 129.6809
## 6 AUSTRALI 185.8416 181.4752 99.15842 126.6634 129.7921 108.2178 131.4356
## 7 BERG 175.5596 173.0367 95.83486 127.4404 144.0826 121.7615 131.0917
## 8 BURIAT 176.7798 174.8991 99.30275 129.8807 151.6606 124.1743 139.3945
## 9 BUSHMAN 174.7444 172.9667 93.07778 120.8889 130.8556 108.2222 119.7333
## 10 DOGON 173.5657 172.2121 96.58586 130.0101 134.6465 111.5758 125.0909
## 11 EASTER I 187.2442 183.1977 108.34884 141.4884 132.2209 109.8140 131.8605
## 12 EGYPT 180.8288 179.4955 98.81982 130.7207 137.4775 113.5135 124.6396
## 13 ESKIMO 184.5556 181.4630 103.31481 135.8981 132.4815 110.4630 134.8796
## 14 GUAM 180.7018 178.4035 102.63158 140.4912 138.7018 113.7544 137.5614
## 15 HAINAN 173.7470 172.0843 97.30120 134.6145 136.8554 113.4096 130.1084
## 16 MOKAPU 180.9600 178.6300 104.28000 140.3800 141.2500 114.6500 132.9700
## 17 MORIORI 183.5556 181.5093 103.59259 133.9352 140.3611 111.7130 135.5926
## 18 N JAPAN 178.6782 176.2184 99.33333 134.1494 138.6207 114.4138 131.6207
## 19 N MAORI 186.6000 183.0000 105.00000 137.4000 136.9000 111.1000 139.3000
## 20 NORSE 184.2273 182.4364 99.55455 128.8455 139.0818 116.7182 129.4364
## 21 PERU 173.4818 172.1545 93.29091 127.7182 136.4364 113.6545 130.2636
## 22 PHILLIPI 176.9200 174.8200 98.48000 134.8000 139.8000 115.1600 133.1600
## 23 SANTA CR 176.0000 173.9020 95.04902 126.4804 137.4608 111.2745 131.7941
## 24 S JAPAN 177.3297 175.6044 99.09890 134.9670 136.2308 113.0659 129.6154
## 25 S MAORI 187.1000 183.0000 105.60000 139.4000 142.6000 114.3000 142.5000
## 26 TASMANIA 181.7126 176.8506 97.34483 129.3103 135.6782 110.0230 130.8161
## 27 TEITA 178.4096 177.0964 98.72289 126.6867 127.7952 109.3614 126.9518
## 28 TOLAI 179.1364 175.5364 98.60000 131.1091 129.2364 108.2545 131.2000
## 29 ZALAVAR 181.1429 178.7041 99.09184 132.0510 139.3265 117.6327 129.5306
## 30 ZULU 182.3168 180.8812 99.85149 131.4257 133.0198 114.8614 126.6832
## AUB WCB ASB BPL NPH NLH JUB NLB
## 1 122.5233 73.96512 109.9186 101.40698 65.95349 49.73256 119.1628 27.16279
## 2 110.9286 67.47143 98.0000 91.84286 58.65714 45.28571 109.0714 24.42857
## 3 125.6905 74.19048 108.2381 97.50000 69.42857 52.47619 121.2143 28.28571
## 4 128.4203 73.17391 107.6087 97.24638 70.10145 52.91304 119.4638 26.59420
## 5 120.5319 72.89362 107.2553 93.17021 63.29787 49.00000 113.5957 26.31915
## 6 116.7921 69.96040 107.1980 103.04950 63.00990 48.14851 116.4356 27.08911
## 7 124.0183 73.00000 110.9725 91.88073 65.75229 50.01835 114.1835 25.18349
## 8 132.7798 78.25688 114.5872 96.75229 71.95413 55.13761 119.9908 27.64220
## 9 110.1556 67.96667 104.2000 92.00000 56.75556 43.26667 108.2667 26.48889
## 10 112.2222 66.95960 101.8485 97.92929 63.07071 46.90909 112.4242 28.04040
## 11 120.4535 66.95349 105.5116 104.18605 65.73256 51.65116 116.3372 27.69767
## 12 115.7207 67.42342 106.0541 94.00000 66.34234 50.41441 109.1351 24.44144
## 13 123.3889 72.51852 107.5556 101.23148 69.37963 52.25000 117.1019 23.50000
## 14 126.6842 72.89474 107.8246 99.10526 68.35088 52.77193 121.2982 27.17544
## 15 120.7952 72.71084 103.9759 95.12048 67.72289 50.96386 115.9398 26.72289
## 16 123.6500 70.66000 105.8700 102.32000 66.23000 51.39000 116.8000 26.72000
## 17 125.9537 69.18519 107.5185 101.00926 71.49074 54.37037 119.6204 26.37963
## 18 121.5862 74.47126 107.5287 96.32184 69.19540 51.87356 115.4943 25.83908
## 19 126.0000 69.10000 110.0000 101.20000 67.30000 52.70000 120.8000 27.00000
## 20 121.1636 69.75455 109.3000 95.49091 66.59091 50.56364 113.5727 24.80000
## 21 120.5364 69.62727 106.7091 91.57273 65.71818 49.00000 112.3364 24.60000
## 22 122.9600 74.66000 107.3200 97.88000 66.78000 51.46000 118.1600 28.26000
## 23 122.7745 73.10784 109.0784 97.02941 66.78431 48.70588 114.7255 24.21569
## 24 120.3516 71.74725 106.1099 97.17582 67.59341 50.68132 114.4396 25.79121
## 25 130.2000 72.20000 110.4000 100.50000 70.10000 54.10000 124.6000 26.40000
## 26 120.1494 70.03448 107.2069 100.81609 60.44828 47.10345 114.3678 28.22989
## 27 114.4096 65.75904 102.3253 98.73494 63.03614 47.92771 114.4337 27.48193
## 28 117.5545 69.35455 104.6000 104.33636 64.43636 47.54545 114.7727 27.24545
## 29 121.3878 72.84694 109.5102 94.79592 66.03061 50.08163 112.7959 25.04082
## 30 114.6832 70.63366 104.3960 100.71287 65.54455 48.77228 114.5941 28.34653
## MAB MDH MDB OBH OBB GS
## 1 64.77907 27.65116 12.76744 34.18605 40.95349 3538458
## 2 59.45714 24.17143 10.81429 32.45714 36.98571 2781390
## 3 66.69048 30.59524 13.92857 32.78571 39.04762 3525032
## 4 65.00000 26.92754 12.17391 34.82609 40.02899 3223608
## 5 62.42553 25.59574 11.42553 33.23404 37.78723 3081822
## 6 64.82178 27.37624 11.76238 33.28713 40.94059 3064683
## 7 62.28440 27.00000 12.34862 33.26606 39.28440 3230445
## 8 66.79817 27.74312 12.29358 35.38532 40.64220 3490759
## 9 58.91111 23.26667 10.28889 30.90000 38.40000 2770051
## 10 62.73737 27.02020 11.22222 33.24242 38.80808 3044807
## 11 62.55814 27.59302 14.24419 33.91860 39.04651 3512974
## 12 61.31532 27.81982 12.02703 32.89189 38.72072 3258136
## 13 63.86111 25.25000 12.53704 35.65741 41.21296 3328639
## 14 65.70175 29.10526 12.59649 35.14035 40.63158 3528384
## 15 63.67470 27.54217 11.84337 33.25301 38.18072 3205744
## 16 63.80000 28.29000 13.89000 34.60000 40.07000 3599953
## 17 64.31481 29.87037 13.70370 36.26852 41.11111 3459399
## 18 65.62069 29.20690 11.75862 34.50575 39.11494 3334221
## 19 64.60000 32.80000 15.70000 35.50000 40.70000 3510752
## 20 61.86364 27.78182 12.42727 33.48182 39.79091 3308171
## 21 62.84545 28.13636 11.66364 34.20909 37.53636 3029428
## 22 64.86000 29.62000 13.50000 33.22000 39.04000 3338169
## 23 65.02941 27.02941 13.01961 34.72549 39.26471 3066291
## 24 64.52747 27.89011 11.62637 33.98901 38.58242 3267251
## 25 65.70000 31.70000 16.00000 36.20000 41.50000 3728305
## 26 65.45977 24.59770 12.17241 30.85057 40.17241 3195916
## 27 60.66265 26.19277 11.39759 32.63855 38.53012 2894007
## 28 64.10000 27.45455 12.76364 32.26364 40.12727 3041819
## 29 62.57143 28.04082 12.72449 32.40816 39.36735 3339968
## 30 64.11881 27.13861 11.41584 33.35644 39.87129 3194899
labs<-hwl_sum$Population ##creating an object of Population labels.
dsq<-dist(scale(hwl_sum[2:22])) ## creating a distance matrix that compares the distances between each Population based on the observed differences among the scaled craniometric variables.
hc<-hclust(dsq, method='ward.D2') ## subjecting the distance matrix to Hierarchical clustering using Wards criterion.
NbClust(data=as.numeric(scale(hwl_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15) ##this function is a package that provides over 30 different indices for determining the optimum number of clusters within a dataset. Here the data has been specified first making sure the data is read as numeric and is scaled. The 2:22 is an index call saying I only want to use all of the variables represented by column 2 through 22, or in this case, GOL through GS. 6 indices were able to be calculated with this dataset and 4 of those propose an optimal number of clusters of 3.
## Warning in sqrt(zvargss$varbgss/zvargss$vartss): NaNs produced
## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]
## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]
## *******************************************************************
## * Among all indices:
## * 4 proposed 3 as the best number of clusters
## * 1 proposed 4 as the best number of clusters
## * 1 proposed 15 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
## $All.index
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW
## 2 1.2201 -504.0082 4592.6661 -15.2097 492.0031 1115.609 NA 278.9021
## 3 15.2176 201.0612 319.9759 -13.2154 980.9663 1155.113 NA 128.3459
## 4 0.3560 308.6113 61.5617 -13.6771 1304.4185 1228.933 NA 76.8083
## 5 0.4936 269.1802 1635.6517 -16.0943 1493.7914 1421.683 NA 56.8673
## 6 6.1535 1104.2697 57.8197 -17.1974 1680.3266 1522.566 NA 42.2935
## 7 4.3585 1013.5023 155.4864 -17.5674 1859.4061 1559.624 NA 31.8291
## 8 0.2449 1105.9621 46.9671 -15.8811 2085.9875 1421.702 NA 22.2141
## 9 2.1794 1044.9771 96.2327 -14.9890 2266.4794 1351.109 NA 16.6804
## 10 0.2944 1081.7575 340.6208 -15.4984 2382.8832 1386.633 NA 13.8663
## 11 7.9903 1540.0304 92.6788 -16.0790 2484.4294 1428.057 NA 11.8021
## 12 0.9290 1615.4560 134.7729 -16.4850 2581.8839 1455.934 NA 10.1107
## 13 0.7552 1812.0680 136.3710 -16.2660 2692.7865 1432.896 NA 8.4787
## 14 0.3162 2049.5410 232.9606 -15.7909 2805.4229 1389.752 NA 7.0906
## 15 4.1066 2635.2371 120.6605 -15.1169 2918.7839 1332.655 NA 5.9229
## Friedman Rubin Cindex DB Silhouette Duda Pseudot2 Beale Ratkowsky
## 2 1.1836 2.1836 0.2148 0.7215 0.5124 0.3812 751.6896 0.5400 NaN
## 3 3.7450 4.7450 0.2142 0.6565 0.5181 0.2850 928.0325 0.8338 0.3609
## 4 6.9288 7.9288 0.1473 0.6921 0.5171 0.4400 207.4215 0.4216 0.3862
## 5 9.7091 10.7091 0.1600 0.6100 0.5225 0.2942 218.2928 0.7909 0.3557
## 6 13.3994 14.3994 0.2208 0.5783 0.5243 0.2944 361.8547 0.7935 0.3870
## 7 18.1335 19.1335 0.2346 0.6006 0.4991 0.2540 637.2888 0.9744 0.3600
## 8 26.4150 27.4150 0.1819 0.5936 0.5131 0.2775 393.2187 0.8623 0.3402
## 9 35.5100 36.5100 0.1471 0.5908 0.5118 0.2013 261.9238 1.3031 0.3216
## 10 42.9193 43.9193 0.1374 0.5925 0.5127 0.1912 97.2701 1.3510 0.3066
## 11 50.6009 51.6009 0.1718 0.5468 0.5207 0.3970 95.7078 0.4985 0.2956
## 12 59.2335 60.2335 0.1642 0.5447 0.5138 0.2696 349.3985 0.8959 0.2838
## 13 70.8273 71.8273 0.1455 0.5527 0.5169 0.2407 274.4311 1.0395 0.2735
## 14 84.8887 85.8887 0.1309 0.5496 0.5307 0.2524 254.7418 0.9760 0.2642
## 15 101.8211 102.8211 0.1160 0.5452 0.5185 0.2579 247.4848 0.9482 0.2561
## Ball Ptbiserial Frey McClain Dunn Hubert SDindex Dindex SDbw
## 2 1542.2474 0.5085 0.2661 0.2943 0.0071 0.0018 7.1048 0.5202 1.3462
## 3 123.6792 0.6308 1.2025 0.4109 0.0057 0.0025 5.0695 0.3672 0.6489
## 4 61.4167 0.5470 0.0171 0.6806 0.0057 0.0026 5.5803 0.2694 0.5415
## 5 44.7341 0.5590 0.2850 0.6415 0.0069 0.0028 5.1923 0.2445 0.3673
## 6 10.3063 0.5601 1.0283 0.6310 0.0102 0.0030 4.9885 0.2198 0.3244
## 7 8.0849 0.5366 1.0914 0.6781 0.0121 0.0032 6.3314 0.1903 0.2671
## 8 5.6613 0.4756 0.7706 0.8035 0.0117 0.0032 8.0393 0.1532 0.2162
## 9 4.6790 0.4430 0.4903 0.8341 0.0101 0.0032 9.7445 0.1287 0.1729
## 10 3.6461 0.4374 0.1649 0.8209 0.0101 0.0033 9.4811 0.1167 0.1513
## 11 2.1393 0.4374 0.4239 0.8128 0.0129 0.0033 9.4682 0.1114 0.1203
## 12 1.7056 0.4345 1.4640 0.8001 0.0129 0.0033 9.0027 0.1055 0.1235
## 13 1.2926 0.3986 0.8380 0.8887 0.0092 0.0033 14.8892 0.0943 0.1152
## 14 0.9830 0.3820 0.7366 0.9013 0.0092 0.0033 15.4388 0.0850 0.0941
## 15 0.6657 0.3664 0.9704 0.8986 0.0092 0.0034 15.6309 0.0771 0.0951
##
## $All.CriticalValues
## CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2 0.2720 1238.9574 0.4628
## 3 0.2613 1046.2223 0.3618
## 4 0.2100 613.0321 0.5171
## 5 0.1591 480.8357 0.3762
## 6 0.2041 588.6771 0.3744
## 7 0.2303 725.3174 0.3247
## 8 0.2041 588.6771 0.3546
## 9 0.1245 464.0158 0.2578
## 10 -0.0306 -775.8496 0.2570
## 11 0.1191 466.0796 0.4828
## 12 0.1913 545.3661 0.3457
## 13 0.1546 475.7524 0.3108
## 14 0.1534 474.5732 0.3260
## 15 0.1534 474.5732 0.3329
##
## $Best.nc
## KL CH Hartigan CCC Scott Marriot TrCovW
## Number_clusters 3.0000 15.000 3.00 3.0000 3.0000 4.0000 -Inf
## Value_Index 15.2176 2635.237 4272.69 -13.2154 488.9632 118.9303 3
## TraceW Friedman Rubin Cindex DB Silhouette Duda
## Number_clusters 99.0187 16.9324 -1.6857 0.116 0.5447 0.5307 0.3812
## Value_Index 15.0000 9.0000 15.0000 12.000 14.0000 2.0000 2.0000
## PseudoT2 Beale Ratkowsky Ball PtBiserial Frey McClain
## Number_clusters 751.6896 0.54 0.387 1418.568 0.6308 NA 0.2943
## Value_Index 2.0000 6.00 3.000 3.000 1.0000 2 11.0000
## Dunn Hubert SDindex Dindex SDbw
## Number_clusters 0.0129 0 4.9885 0 0.0941
## Value_Index 0.0000 6 0.0000 14 3.0000
##
## $Best.partition
## [1] 1 2 3 3 2 1 3 3 3 2 1 3 1 3 2 3 1 3 1 1 2 3 3 3 1 3 3 3 3 3 1 2 3 3 2 1 3
## [38] 3 3 2 1 3 1 3 2 3 1 3 1 1 2 3 3 3 1 3 3 3 3 1 1 2 3 3 3 3 3 3 2 3 1 3 1 1
## [75] 3 1 1 3 1 3 2 3 2 3 1 3 3 3 3 3 1 2 1 3 3 2 3 3 2 3 1 3 1 1 3 1 3 3 1 3 3
## [112] 3 2 3 1 3 2 3 3 3 1 3 3 3 3 2 1 1 2 3 3 3 3 3 3 1 1 3 3 3 3 3 3 3 1 3 2 2
## [149] 3 3 1 2 3 3 3 2 1 1 2 3 3 3 3 3 3 3 3 3 3 1 3 3 3 3 3 3 2 2 1 3 3 2 1 1 3
## [186] 3 3 1 2 2 3 2 1 1 3 3 1 3 1 3 3 3 3 3 1 3 3 3 3 3 3 2 1 1 3 3 3 1 2 2 3 2
## [223] 3 1 3 3 1 3 1 3 3 3 3 3 1 3 2 3 3 2 1 2 1 1 3 3 1 1 2 2 2 2 3 3 3 3 3 1 3
## [260] 3 3 1 1 3 3 3 2 3 3 3 1 2 3 3 3 3 1 1 3 2 3 3 3 3 3 3 3 3 1 1 3 3 1 3 1 3
## [297] 2 3 1 3 1 2 3 3 2 1 2 3 2 3 1 2 1 3 3 1 1 3 1 3 2 3 3 3 1 1 3 1 3 1 3 2 1
## [334] 1 3 3 3 1 2 3 3 3 1 1 3 3 1 1 3 3 3 3 3 3 1 2 3 3 3 3 3 2 1 1 3 3 3 1 2 2
## [371] 3 3 1 1 3 3 1 3 1 3 3 3 3 3 1 2 3 2 3 3 1 2 1 1 3 3 3 1 2 3 3 2 3 1 3 3 1
## [408] 3 1 3 3 3 3 3 1 3 3 3 3 3 3 2 1 3 3 3 3 1 3 1 1 2 2 3 3 3 3 3 3 2 2 1 2 3
## [445] 3 1 1 3 2 1 3 2 1 1 3 3 3 1 2 3 3 2 3 1 3 3 3 1 3 3 3 3 1 3 1 1 2 3 3 3 3
## [482] 2 1 3 2 3 3 3 2 3 3 3 2 1 3 3 1 1 1 3 3 1 3 3 1 2 3 3 3 3 3 2 1 3 3 3 3 3
## [519] 2 2 1 3 3 3 3 1 1 3 1 3 3 1 3 3 1 3 3 3 3 3 3 3 3 1 3 3 3 1 2 3 3 3 1 1 3
## [556] 3 1 3 1 3 3 3 1 3 1 2 3 2 3 3 1 2 3 3 2 1 3 1 3 3 3 3 1 1 2 3 1 3 1 3 2 3
## [593] 3 3 1 3 3 3 3 3 1 2 1 3 3 3 3 1 2 3 1 3 3 1 3 1 1 3 1 3 3 3 3 3 1 3 2 3 3
## [630] 3
plot(hc, labels=labs, hang=-1, cex=0.8) ##hierarchical cluster dendrogram that has been specified to hang the group labels at the same level.
hc_morph<-rect.hclust(hc, k=3, border=2:4) ## graphically depicting the clusters identified by the NbClust call above. The border call indicates that I want different three different colors, one for each of the the k=3 clusters.
morphogroup<-cutree(hc, k=3) ## assigning each Population its determined cluster.
hwl_sum<-hwl_sum %>% mutate(cluster=morphogroup) ##Assigning each Population cluster, here called morphogroup.
hwl_sum$cluster<-as.factor(hwl_sum$cluster) ## Setting assigned cluster as a factor.
mg1<-hwl_sum %>% filter(cluster==1) %>% droplevels() ## subsetting hwl to only include individuals from morphogroup 1.
mg2<-hwl_sum %>% filter(cluster==2) %>% droplevels() ## subsetting hwl to only include individuals from morphogroup 2.
mg3<-hwl_sum%>% filter(cluster==3) %>% droplevels() ## subsetting hwl to only include individuals from morphogroup 3.
c1d<-mg1$Population ## creating an object that is comprised of the Population labels from cluster 1.
c1<-hwl %>% filter(Population %in% c1d) %>% droplevels() ## filtering original howell dataset to include populations from morphogroup 1.
c1$Morphogroup=1 ## creating new column Morphogroup and setting it to 1.
c1$Morphogroup <- as.factor(c1$Morphogroup) ## setting Morphogroup as a factor.
c2d<-mg2$Population ## creating an object that is comprised of the Population labels from cluster 2.
c2<-hwl %>% filter(Population %in% c2d) %>% droplevels() ## filtering original howell dataset to include populations from morphogroup 2.
c2$Morphogroup=2 ## creating new column Morphogroup and setting it to 2.
c2$Morphogroup<-as.factor(c2$Morphogroup) ## setting Morphogroup as a factor.
c3d<-mg3$Population ## creating an object that is comprised of the Population labels from cluster 3.
c3<-hwl %>% filter(Population %in% c3d) %>% droplevels() ## filtering original howell dataset to include populations from morphogroup 3.
c3$Morphogroup=3 ## creating new column Morphogroup and setting it to 3.
c3$Morphogroup<-as.factor(c3$Morphogroup) ## setting Morphogroup as a factor.
hwlr<-rbind(c1, c2) ## combinging the rows of individuals from morphogroups 1 and 2.
hwlr<-rbind(hwlr, c3) ## combining the rows of individuals from morphogroups and 2 with those of 3.
describeBy(hwlr, group='Morphogroup') ## summary statistics by morphogroup.
##
## Descriptive statistics by group
## Morphogroup: 1
## vars n mean sd median trimmed mad
## ID 1 766 2060.33 732.11 2148.5 2060.61 1163.84
## Sex* 2 766 1.59 0.49 2.0 1.62 0.00
## PopNum* 3 766 4.98 2.71 5.0 4.94 2.97
## Population* 4 766 5.20 2.67 5.0 5.15 2.97
## GOL 5 766 182.27 8.00 182.0 182.21 8.90
## NOL 6 766 179.72 7.33 180.0 179.69 7.41
## BNL 7 766 103.09 5.34 103.0 102.98 5.93
## BBH 8 766 136.55 6.66 137.0 136.56 5.93
## XCB 9 766 139.80 7.97 139.0 139.43 7.41
## XFB 10 766 114.71 6.58 114.0 114.36 5.93
## ZYB 11 766 135.26 7.06 135.0 135.19 7.41
## AUB 12 766 125.16 6.65 125.0 124.93 7.41
## WCB 13 766 72.37 5.12 72.0 72.32 4.45
## ASB 14 766 108.48 5.65 108.0 108.34 5.93
## BPL 15 766 100.46 5.47 100.0 100.42 5.93
## NPH 16 766 68.58 4.98 68.0 68.51 4.45
## NLH 17 766 52.53 3.53 52.0 52.45 2.97
## JUB 18 766 118.73 5.79 119.0 118.69 5.93
## NLB 19 766 26.73 2.37 27.0 26.78 2.97
## MAB 20 766 64.68 3.91 64.0 64.60 4.45
## MDH 21 766 28.23 3.94 28.0 28.29 4.45
## MDB 22 766 13.29 2.17 13.0 13.21 1.48
## OBH 23 766 34.86 1.97 35.0 34.86 1.48
## OBB 24 766 40.39 1.88 40.0 40.35 1.48
## GS 25 766 3483665.18 347586.26 3463843.0 3475462.57 371784.19
## Morphogroup* 26 766 1.00 0.00 1.0 1.00 0.00
## min max range skew kurtosis se
## ID 1082 3064 1982 -0.09 -1.71 26.45
## Sex* 1 2 1 -0.38 -1.86 0.02
## PopNum* 1 11 10 0.05 -1.19 0.10
## Population* 1 11 10 0.13 -0.90 0.10
## GOL 158 206 48 0.03 -0.24 0.29
## NOL 157 200 43 0.01 -0.18 0.26
## BNL 89 120 31 0.19 -0.26 0.19
## BBH 115 155 40 -0.02 -0.03 0.24
## XCB 120 167 47 0.52 0.47 0.29
## XFB 97 145 48 0.56 0.67 0.24
## ZYB 117 158 41 0.09 -0.35 0.26
## AUB 106 149 43 0.33 0.05 0.24
## WCB 59 89 30 0.11 -0.20 0.19
## ASB 90 128 38 0.23 0.18 0.20
## BPL 82 117 35 0.06 -0.02 0.20
## NPH 53 82 29 0.08 -0.12 0.18
## NLH 42 65 23 0.17 0.10 0.13
## JUB 102 138 36 0.11 -0.11 0.21
## NLB 20 35 15 -0.13 -0.08 0.09
## MAB 52 77 25 0.23 0.10 0.14
## MDH 18 39 21 -0.14 -0.44 0.14
## MDB 7 20 13 0.33 -0.10 0.08
## OBH 30 41 11 -0.02 -0.15 0.07
## OBB 35 46 11 0.22 0.05 0.07
## GS 2640000 4640688 2000688 0.22 -0.37 12558.80
## Morphogroup* 1 1 0 NaN NaN 0.00
## ------------------------------------------------------------
## Morphogroup: 2
## vars n mean sd median trimmed mad
## ID 1 160 1355.63 756.97 799.5 1206.29 121.57
## Sex* 2 160 1.48 0.50 1.0 1.47 0.00
## PopNum* 3 160 12.44 0.50 12.0 12.42 0.00
## Population* 4 160 12.56 0.50 13.0 12.58 0.00
## GOL 5 160 170.26 8.44 170.0 170.17 8.90
## NOL 6 160 168.82 7.98 169.0 168.69 8.90
## BNL 7 160 92.45 4.22 92.0 92.22 4.45
## BBH 8 160 123.33 5.98 123.0 123.11 5.93
## XCB 9 160 131.96 5.06 132.0 131.87 4.45
## XFB 10 160 108.27 4.98 108.0 108.12 4.45
## ZYB 11 160 120.16 5.69 120.0 120.20 4.45
## AUB 12 160 110.49 5.24 111.0 110.41 4.45
## WCB 13 160 67.75 3.83 68.0 67.71 4.45
## ASB 14 160 101.49 5.99 101.0 101.25 5.93
## BPL 15 160 91.93 4.45 92.0 91.80 4.45
## NPH 16 160 57.59 4.48 58.0 57.47 4.45
## NLH 17 160 44.15 3.01 44.0 44.11 2.97
## JUB 18 160 108.62 5.00 108.0 108.52 4.45
## NLB 19 160 25.59 2.25 25.0 25.47 1.48
## MAB 20 160 59.15 2.94 59.0 59.09 2.97
## MDH 21 160 23.66 3.69 23.5 23.65 3.71
## MDB 22 160 10.52 2.09 10.0 10.39 1.48
## OBH 23 160 31.58 2.06 32.0 31.62 1.48
## OBB 24 160 37.78 1.86 38.0 37.69 1.48
## GS 25 160 2775011.75 270873.08 2756518.5 2764591.17 267584.84
## Morphogroup* 26 160 2.00 0.00 2.0 2.00 0.00
## min max range skew kurtosis se
## ID 716 3190 2474 1.21 0.63 59.84
## Sex* 1 2 1 0.10 -2.00 0.04
## PopNum* 12 13 1 0.25 -1.95 0.04
## Population* 12 13 1 -0.25 -1.95 0.04
## GOL 151 191 40 0.10 -0.61 0.67
## NOL 151 190 39 0.16 -0.58 0.63
## BNL 84 104 20 0.49 -0.15 0.33
## BBH 107 138 31 0.28 -0.13 0.47
## XCB 118 146 28 0.18 0.09 0.40
## XFB 97 120 23 0.25 -0.29 0.39
## ZYB 105 136 31 0.00 0.26 0.45
## AUB 98 126 28 0.19 0.07 0.41
## WCB 59 79 20 0.15 -0.23 0.30
## ASB 88 117 29 0.31 -0.35 0.47
## BPL 81 106 25 0.29 -0.07 0.35
## NPH 48 72 24 0.26 -0.16 0.35
## NLH 36 51 15 0.11 -0.50 0.24
## JUB 97 125 28 0.28 0.42 0.40
## NLB 21 33 12 0.52 0.31 0.18
## MAB 52 70 18 0.33 0.49 0.23
## MDH 16 31 15 0.00 -0.74 0.29
## MDB 7 20 13 0.95 2.18 0.17
## OBH 26 37 11 -0.13 0.05 0.16
## OBB 33 43 10 0.41 0.31 0.15
## GS 2162468 3634348 1471880 0.38 -0.06 21414.40
## Morphogroup* 2 2 0 NaN NaN 0.00
## ------------------------------------------------------------
## Morphogroup: 3
## vars n mean sd median trimmed mad
## ID 1 1598 1179.74 928.42 889.5 1103.96 915.51
## Sex* 2 1598 1.52 0.50 2.0 1.53 0.00
## PopNum* 3 1598 21.69 4.89 22.0 21.61 5.93
## Population* 4 1598 22.31 4.75 22.0 22.32 5.93
## GOL 5 1598 178.58 8.03 178.0 178.55 7.41
## NOL 6 1598 176.33 7.50 176.0 176.30 7.41
## BNL 7 1598 97.89 4.93 98.0 97.83 4.45
## BBH 8 1598 130.13 6.10 130.0 129.99 5.93
## XCB 9 1598 135.92 6.60 136.0 135.84 5.93
## XFB 10 1598 113.31 6.12 113.0 113.14 5.93
## ZYB 11 1598 129.67 6.83 130.0 129.57 7.41
## AUB 12 1598 119.41 6.38 119.0 119.29 5.93
## WCB 13 1598 70.65 4.48 71.0 70.59 4.45
## ASB 14 1598 106.62 5.31 106.0 106.51 5.93
## BPL 15 1598 97.08 6.37 97.0 96.98 5.93
## NPH 16 1598 65.57 4.91 66.0 65.63 4.45
## NLH 17 1598 49.39 3.40 49.0 49.36 2.97
## JUB 18 1598 114.12 5.58 114.0 114.02 5.93
## NLB 19 1598 26.16 2.28 26.0 26.12 2.97
## MAB 20 1598 63.44 3.88 63.0 63.31 4.45
## MDH 21 1598 27.30 3.63 27.0 27.31 4.45
## MDB 22 1598 12.02 1.91 12.0 11.97 1.48
## OBH 23 1598 33.31 2.08 33.0 33.37 1.48
## OBB 24 1598 39.22 1.92 39.0 39.20 1.48
## GS 25 1598 3165353.90 330867.43 3147312.0 3155852.74 348130.79
## Morphogroup* 26 1598 3.00 0.00 3.0 3.00 0.00
## min max range skew kurtosis se
## ID 1 3167 3166 0.61 -0.92 23.22
## Sex* 1 2 1 -0.10 -1.99 0.01
## PopNum* 14 30 16 0.07 -1.17 0.12
## Population* 14 30 16 -0.01 -1.17 0.12
## GOL 155 203 48 0.06 -0.23 0.20
## NOL 153 199 46 0.06 -0.12 0.19
## BNL 83 114 31 0.09 -0.20 0.12
## BBH 111 152 41 0.19 -0.22 0.15
## XCB 116 161 45 0.13 0.03 0.17
## XFB 95 135 40 0.32 0.14 0.15
## ZYB 111 151 40 0.14 -0.39 0.17
## AUB 98 143 45 0.22 -0.01 0.16
## WCB 57 87 30 0.14 -0.08 0.11
## ASB 89 127 38 0.17 -0.10 0.13
## BPL 80 123 43 0.18 0.00 0.16
## NPH 50 81 31 -0.07 0.01 0.12
## NLH 40 61 21 0.08 -0.06 0.08
## JUB 99 132 33 0.15 -0.29 0.14
## NLB 19 34 15 0.19 -0.19 0.06
## MAB 54 78 24 0.30 -0.10 0.10
## MDH 17 39 22 0.02 -0.28 0.09
## MDB 6 20 14 0.27 0.39 0.05
## OBH 26 41 15 -0.21 0.17 0.05
## OBB 33 46 13 0.14 -0.13 0.05
## GS 2327314 4399488 2072174 0.28 -0.24 8276.86
## Morphogroup* 3 3 0 NaN NaN 0.00
pcs<-principal(scale(hwlr[5:25]), rotate = 'varimax', nfactors=3) ## performing PCA with Varimax rotation on the scaled craniometric variables and GS and storing it as an object.
pcs ## examining results of PCA from above.
## Principal Components Analysis
## Call: principal(r = scale(hwlr[5:25]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 RC3 h2 u2 com
## GOL 0.83 0.18 0.13 0.74 0.26 1.1
## NOL 0.81 0.17 0.17 0.71 0.29 1.2
## BNL 0.85 0.12 0.28 0.81 0.19 1.3
## BBH 0.62 0.24 0.28 0.52 0.48 1.7
## XCB 0.03 0.87 0.20 0.79 0.21 1.1
## XFB 0.04 0.81 0.13 0.67 0.33 1.1
## ZYB 0.56 0.66 0.25 0.81 0.19 2.3
## AUB 0.34 0.78 0.32 0.82 0.18 1.7
## WCB 0.18 0.78 0.07 0.64 0.36 1.1
## ASB 0.29 0.69 0.10 0.57 0.43 1.4
## BPL 0.85 -0.05 -0.05 0.73 0.27 1.0
## NPH 0.38 0.40 0.68 0.77 0.23 2.3
## NLH 0.39 0.43 0.67 0.78 0.22 2.4
## JUB 0.64 0.57 0.14 0.75 0.25 2.1
## NLB 0.49 0.19 -0.43 0.47 0.53 2.3
## MAB 0.54 0.52 0.03 0.56 0.44 2.0
## MDH 0.45 0.38 0.17 0.38 0.62 2.2
## MDB 0.54 0.33 0.09 0.41 0.59 1.7
## OBH 0.13 0.18 0.79 0.67 0.33 1.2
## OBB 0.59 0.30 0.20 0.48 0.52 1.7
## GS 0.64 0.57 0.27 0.81 0.19 2.3
##
## RC1 RC2 RC3
## SS loadings 6.23 5.33 2.32
## Proportion Var 0.30 0.25 0.11
## Cumulative Var 0.30 0.55 0.66
## Proportion Explained 0.45 0.38 0.17
## Cumulative Proportion 0.45 0.83 1.00
##
## Mean item complexity = 1.7
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.07
## with the empirical chi square 4739.27 with prob < 0
##
## Fit based upon off diagonal values = 0.98
MG<-hwlr$Morphogroup ## storing assigned morphogroup to a new object MG.
MG_pcs<-cbind.data.frame(MG, pcs$scores) ## combining the assigned morphogroup object with the principal component scores
MG_pcs<-as.data.frame(MG_pcs) ##converting to a data frame.
MG_pcs$MG<-as.factor(MG_pcs$MG) ## ensuring Morphogrooup is being read as a factor.
pc_sum<-MG_pcs %>% group_by(MG) %>% dplyr::summarize(across(RC1:RC3, mean)) ##creating a summary of principal component scores for each morphogroup.
pc_sum ##viewing principal components summary. Note - the principal call refers to the components as RC, or rotated component. This is a pedantic distinction that can make this function frustrating to use because RC2 might actually be PC1, PC2, or PC3 as normal nomenclature would indicate. Stats people get hung up on the strangest things sometimes.
## # A tibble: 3 x 4
## MG RC1 RC2 RC3
## * <fct> <dbl> <dbl> <dbl>
## 1 1 0.489 0.246 0.464
## 2 2 -1.03 -0.706 -0.980
## 3 3 -0.132 -0.0471 -0.124
sort(pcs$loadings[,1], decreasing=TRUE) ##sorting the loadings for PC1
## BPL BNL GOL NOL JUB GS BBH
## 0.84886484 0.84533825 0.83348094 0.80871912 0.64078648 0.63970347 0.61887047
## OBB ZYB MDB MAB NLB MDH NLH
## 0.59177929 0.56257625 0.54179221 0.54003314 0.49374381 0.44884319 0.38542125
## NPH AUB ASB WCB OBH XFB XCB
## 0.38498886 0.34019432 0.28545761 0.17841339 0.13446763 0.04302043 0.03340290
sort(pcs$loadings[,2], decreasing=TRUE) ## sorting the loadings for PC2
## XCB XFB AUB WCB ASB ZYB
## 0.86576233 0.80758972 0.77950190 0.77517128 0.68825637 0.65866864
## GS JUB MAB NLH NPH MDH
## 0.57148964 0.56627946 0.51503998 0.43296798 0.40085281 0.38151118
## MDB OBB BBH NLB OBH GOL
## 0.33076946 0.29501929 0.24328752 0.18724449 0.18470945 0.17892803
## NOL BNL BPL
## 0.17335255 0.12052499 -0.05188129
sort(pcs$loadings[,3], decreasing=TRUE) ## sorting the loadings for PC3
## OBH NPH NLH AUB BBH BNL
## 0.78607964 0.67853221 0.66909802 0.31553202 0.28081450 0.27727336
## GS ZYB OBB XCB NOL MDH
## 0.26903199 0.24905070 0.20439501 0.19748155 0.17411547 0.16744801
## JUB XFB GOL ASB MDB WCB
## 0.14496758 0.13441315 0.13112266 0.10142789 0.08935902 0.06546183
## MAB BPL NLB
## 0.02752250 -0.05225522 -0.43292758
ggplot(MG_pcs, aes(x=RC1, y=RC2, color = MG)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_sum, size=5) ## creating a plot of PC1 and PC2 scores with their color assigned by Morphogroup, but it could have been by anything you have information for, like Sex, or SES, or time-period...literally whatever.
ggplot(MG_pcs, aes(x=RC2, y=RC3, color = MG)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_sum, size=5) ## creating a plot of PC2 and PC3
ggplot(MG_pcs, aes(x=RC1, y=RC3, color = MG)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_sum, size=5) ## creating a plot of PC1 and PC3
morpho1<-c1[2:25] ##trimming dataframe.
m1_sum<-morpho1 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) ##creating a new data frame that is using the Population label as a grouping variable, which is then being summarized for all variables in the data frame for Morphogroup 1.
m1_dsq<-dist(scale(m1_sum[2:22])) ## creating a distance matrix here and only subjecting columns 2 through 22, which are only the craniometric variables.
m1_hc<-hclust(m1_dsq, method='ward.D2') ##submitting distance object to hierarchical cluster analysis and storing it as the object m1_hc, or morphogtoup1 hierarchical cluster.
NbClust(data=as.numeric(scale(m1_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15) ## subjecting the craniometric variables to NbClust, see text for description. Also, look into the package vignette for more detail.
## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]
## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]
## *******************************************************************
## * Among all indices:
## * 1 proposed 2 as the best number of clusters
## * 1 proposed 4 as the best number of clusters
## * 2 proposed 5 as the best number of clusters
## * 1 proposed 12 as the best number of clusters
## * 1 proposed 14 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 5
##
##
## *******************************************************************
## $All.index
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW Friedman
## 2 0.4204 261.8046 91.0243 -5.7653 234.0843 304.9202 NA 76.2301 1.7548
## 3 0.9686 227.4487 63.3586 -8.3900 355.3257 405.9076 NA 45.1008 3.6562
## 4 0.7690 213.9464 332.1555 -9.9143 450.5239 477.8880 NA 29.8680 6.0309
## 5 5.4841 476.1829 68.4848 -7.2662 605.4017 381.9144 NA 15.2766 12.7465
## 6 0.6861 507.8245 130.5371 -7.0311 695.3266 372.6184 NA 10.3505 19.2889
## 7 0.8700 687.3928 22.8969 -6.3663 782.2613 348.1089 NA 7.1043 28.5597
## 8 1.7275 649.7773 35.8428 -7.4323 823.6833 380.0345 NA 5.9380 34.3652
## 9 0.7926 661.4399 26.1276 -8.4543 858.8367 413.0832 NA 5.0998 40.1781
## 10 3.5513 657.0729 39.4666 -9.0584 897.2112 431.9227 NA 4.3192 47.6198
## 11 0.1708 697.7480 88.7489 -9.1044 942.9303 428.7821 NA 3.5437 58.2609
## 12 19.5802 894.1870 40.3955 -8.8319 991.7168 413.1350 NA 2.8690 72.1964
## 13 0.2100 969.7811 24.1731 -8.6941 1034.4901 402.9022 NA 2.3840 87.0859
## 14 0.4304 991.7348 9.8914 -8.1713 1082.8199 379.0583 NA 1.9340 107.5849
## 15 0.7996 959.1395 60.6120 -7.8456 1124.6180 363.1194 NA 1.6139 129.1225
## Rubin Cindex DB Silhouette Duda Pseudot2 Beale Ratkowsky Ball
## 2 2.7548 0.2426 0.7094 0.5636 0.4366 180.6376 0.4270 0.5164 48.9910
## 3 4.6562 0.2204 0.5932 0.5473 0.3351 245.9892 0.6560 0.4712 23.3710
## 4 7.0309 0.1793 0.5919 0.4689 0.3043 198.8720 0.7533 0.4297 13.7166
## 5 13.7465 0.2238 0.5870 0.5188 0.2651 196.8016 0.9111 0.4228 4.4548
## 6 20.2889 0.1783 0.5796 0.5480 0.2733 164.8694 0.8723 0.3913 2.8490
## 7 29.5597 0.1383 0.5960 0.5557 0.3914 35.7628 0.4967 0.3681 1.5454
## 8 35.3652 0.1824 0.5535 0.5619 0.2206 105.9746 1.1395 0.3452 1.2268
## 9 41.1781 0.1722 0.5437 0.5524 0.3574 25.1761 0.5595 0.3266 0.9395
## 10 48.6198 0.2788 0.5210 0.5554 0.2328 141.7330 1.0737 0.3105 0.7565
## 11 59.2609 0.2524 0.5201 0.5549 0.7341 18.4741 0.1184 0.2969 0.5835
## 12 73.1964 0.2166 0.5070 0.5515 0.3088 87.2888 0.7274 0.2855 0.3812
## 13 88.0859 0.1899 0.5145 0.5492 0.2522 62.2756 0.9436 0.2748 0.2970
## 14 108.5849 0.1739 0.5141 0.5460 0.2453 33.8392 0.9400 0.2650 0.2483
## 15 130.1225 0.1763 0.5193 0.5506 0.2649 105.4434 0.9012 0.2561 0.2216
## Ptbiserial Frey McClain Dunn Hubert SDindex Dindex SDbw
## 2 0.5663 0.0810 0.3763 0.0347 0.0049 6.2123 0.4620 1.6436
## 3 0.6438 1.8709 0.4023 0.0366 0.0067 4.2679 0.3695 0.6118
## 4 0.5478 0.3111 0.6916 0.0209 0.0069 5.3679 0.2806 0.4308
## 5 0.5553 0.8401 0.6812 0.0369 0.0083 4.9575 0.2082 0.3070
## 6 0.5161 0.5849 0.7541 0.0199 0.0086 6.8866 0.1681 0.2660
## 7 0.4903 0.1417 0.7476 0.0199 0.0088 6.6610 0.1382 0.1923
## 8 0.4908 0.9699 0.7297 0.0273 0.0090 6.9035 0.1333 0.1576
## 9 0.4798 0.2967 0.7420 0.0273 0.0091 9.2310 0.1222 0.1210
## 10 0.4791 1.2873 0.7332 0.0452 0.0092 9.2269 0.1159 0.1069
## 11 0.4529 1.0348 0.7793 0.0452 0.0092 11.7444 0.1028 0.1015
## 12 0.4222 1.0392 0.8176 0.0452 0.0092 12.9318 0.0900 0.1038
## 13 0.3966 0.5329 0.8497 0.0452 0.0092 15.5783 0.0809 0.0846
## 14 0.3891 0.4039 0.8272 0.0452 0.0093 15.1048 0.0733 0.0787
## 15 0.3869 1.6769 0.8125 0.0476 0.0094 14.5228 0.0689 0.0644
##
## $All.CriticalValues
## CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2 0.1981 566.7476 0.5145
## 3 0.1879 535.8921 0.4195
## 4 0.1546 475.7524 0.3878
## 5 0.1328 463.4387 0.3431
## 6 0.1172 467.1300 0.3539
## 7 -0.0306 -775.8496 0.4880
## 8 0.0152 1944.8165 0.2943
## 9 -0.1290 -122.4991 0.4669
## 10 0.0698 573.3856 0.3059
## 11 0.0928 498.4066 0.7322
## 12 0.0558 660.2764 0.3989
## 13 -0.0473 -464.7772 0.3424
## 14 -0.1829 -71.1410 0.3531
## 15 0.0520 693.4645 0.3485
##
## $Best.nc
## KL CH Hartigan CCC Scott Marriot TrCovW
## Number_clusters 12.0000 14.0000 4.0000 2.0000 5.0000 5.0000 -Inf
## Value_Index 19.5802 991.7348 268.7969 -5.7653 154.8779 86.6775 3
## TraceW Friedman Rubin Cindex DB Silhouette Duda
## Number_clusters 15.8964 21.5376 -3.4654 0.1383 0.507 0.5636 0.4366
## Value_Index 15.0000 7.0000 7.0000 12.0000 2.000 2.0000 2.0000
## PseudoT2 Beale Ratkowsky Ball PtBiserial Frey McClain Dunn
## Number_clusters 180.6376 0.427 0.5164 25.62 0.6438 NA 0.3763 0.0476
## Value_Index 2.0000 2.000 3.0000 3.00 1.0000 2 15.0000 0.0000
## Hubert SDindex Dindex SDbw
## Number_clusters 0 4.2679 0 0.0644
## Value_Index 3 0.0000 15 12.0000
##
## $Best.partition
## [1] 1 2 3 1 1 2 2 4 1 3 1 1 2 3 1 1 2 2 1 1 3 1 4 2 3 5 4 4 1 4 1 3 1 2 1 3 1
## [38] 2 1 1 2 4 2 1 4 4 5 3 3 4 1 4 2 4 1 1 4 5 2 2 4 4 2 2 4 4 2 4 1 3 2 1 2 4
## [75] 1 2 5 2 4 5 3 2 1 2 4 4 2 1 1 1 5 3 4 4 2 2 2 1 4 1 4 5 2 2 2 2 2 1 2 1 1
## [112] 2 3 5 1 2 1 1 1 2 4 2 1 5 2 1 4 2 1 2 2 1 3 4 5 2 4 4 2 1 4 2 1 4 1 4 3 2
## [149] 1 2 4 1 2 5 4 1 1 1 3 4 4 2 4 1 2 4 1 5 3 2 1 2 2 4 4 1 2 1 2 2 3 4 2 1 5
## [186] 4 1 2 4 2 1 2 2 4 4 5 4 5 2 3 1 2 1 1 4 1 1 3 1 1 3 1 3 1 1 2 1 1 3 1 1 4
## [223] 4 4 3 4 1 2 4 3 5
m1_labs<-m1_sum$Population ## creating an object that is only the Population labels from the m1_sum object.
plot(m1_hc, labels=m1_labs, hang=-1, cex=0.8) ## plotting the results of the hierarchical clustering and setting the group labels.
hc_morph_1<-rect.hclust(m1_hc, k=5, border=2:7) ## adding rectangles around the number of clusters (k=5) identified from NbClust.
morphogroup_1<-cutree(m1_hc, k=5) ## assigning each Population its determined cluster.
m1_sum<-m1_sum %>% mutate(cluster=morphogroup_1) ##Assigning each Population its determined subcluster.
m1_sum$cluster<-as.factor(m1_sum$cluster) ## Setting cluster as a factor.
m1.1<-m1_sum %>% filter(cluster==1) %>% droplevels() ## subsetting to only include individuals from morphogroup 1.1.
m1.2<-m1_sum %>% filter(cluster==2) %>% droplevels() ## subsetting to only include individuals from morphogroup 1.2.
m1.3<-m1_sum %>% filter(cluster==3) %>% droplevels() ## subsetting to only include individuals from morphogroup 1.3.
m1.4<-m1_sum %>% filter(cluster==4) %>% droplevels() ## subsetting to only include individuals from morphogroup 1.4
m1.5<-m1_sum %>% filter(cluster==5) %>% droplevels() ## subsetting to only include individuals from morphogroup 1.5
c1.1d<-m1.1$Population ## creating an object that is comprised of the Population labels from subcluster 1.1.
c1.1<-morpho1 %>% filter(Population %in% c1.1d) %>% droplevels() ## filtering Morphogroup 1 dataset to include populations from sub morphogroup 1.1.
c1.1$Morphogroup=1.1 ## creating new column Morphogroup and setting it to 1.1.
c1.1$Morphogroup <- as.factor(c1.1$Morphogroup) ## setting Morphogroup as a factor.
c1.2d<-m1.2$Population ## creating an object that is comprised of the Population labels from subcluster 1.2.
c1.2<-morpho1 %>% filter(Population %in% c1.2d) %>% droplevels() ## filtering Morphogroup 1 dataset to include populations from sub morphogroup 1.2.
c1.2$Morphogroup=1.2 ## creating new column Morphogroup and setting it to 1.2.
c1.2$Morphogroup <- as.factor(c1.2$Morphogroup) ## setting Morphogroup as a factor.
c1.3d<-m1.3$Population ## creating an object that is comprised of the Population labels from subcluster 1.3.
c1.3<-morpho1 %>% filter(Population %in% c1.3d) %>% droplevels() ## filtering Morphogroup 1 dataset to include populations from sub morphogroup 1.3.
c1.3$Morphogroup=1.3 ## creating new column Morphogroup and setting it to 1.3.
c1.3$Morphogroup <- as.factor(c1.3$Morphogroup) ## setting Morphogroup as a factor.
c1.4d<-m1.4$Population ## creating an object that is comprised of the Population labels from subcluster 1.4.
c1.4<-morpho1 %>% filter(Population %in% c1.4d) %>% droplevels() ## filtering Morphogroup 1 dataset to include populations from sub morphogroup 1.4.
c1.4$Morphogroup=1.4 ## creating new column Morphogroup and setting it to 1.4.
c1.4$Morphogroup <- as.factor(c1.4$Morphogroup) ## setting Morphogroup as a factor.
c1.5d<-m1.5$Population ## creating an object that is comprised of the Population labels from subcluster 1.5.
c1.5<-morpho1 %>% filter(Population %in% c1.5d) %>% droplevels() ## filtering Morphogroup 1 dataset to include populations from sub morphogroup 1.5.
c1.5$Morphogroup=1.5 ## creating new column Morphogroup and setting it to 1.5.
c1.5$Morphogroup <- as.factor(c1.5$Morphogroup) ## setting Morphogroup as a factor.
hwl_c1.1<-rbind(c1.1, c1.2) ## combinging the rows of individuals from morphogroups 1 and 2.
hwlr_c1.1_2<-rbind(c1.3, c1.4) ## combining the rows of individuals from morphogroups and 2 with those of 3.
hwl_c1.1<-rbind(hwl_c1.1, hwlr_c1.1_2) ## combining the previous two data frames
hwl_c1<-rbind(hwl_c1.1,c1.5) ## combining the last data frame
head(hwl_c1) ## looking at the first 6 entries to check work.
## # A tibble: 6 x 25
## Sex PopNum Population GOL NOL BNL BBH XCB XFB ZYB AUB WCB
## <fct> <fct> <fct> <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 M 10 MOKAPU 181 178 104 147 146 115 138 129 77
## 2 M 10 MOKAPU 174 172 105 130 134 109 136 124 70
## 3 M 10 MOKAPU 185 183 101 147 150 125 142 131 74
## 4 M 10 MOKAPU 181 179 102 138 136 112 130 120 68
## 5 M 10 MOKAPU 181 179 109 148 146 121 142 133 70
## 6 M 10 MOKAPU 178 176 102 132 134 105 134 123 69
## # … with 13 more variables: ASB <int>, BPL <int>, NPH <int>, NLH <int>,
## # JUB <int>, NLB <int>, MAB <int>, MDH <int>, MDB <int>, OBH <int>,
## # OBB <int>, GS <int>, Morphogroup <fct>
describeBy(hwl_c1, group='Morphogroup') ## summary statistics by morphogroup.
##
## Descriptive statistics by group
## Morphogroup: 1.1
## vars n mean sd median trimmed mad
## Sex* 1 272 1.54 0.50 2.0 1.56 0.00
## PopNum* 2 272 1.95 0.83 2.0 1.94 1.48
## Population* 3 272 2.05 0.83 2.0 2.06 1.48
## GOL 4 272 184.23 8.32 184.0 184.22 8.90
## NOL 5 272 181.33 7.62 181.0 181.32 7.41
## BNL 6 272 105.37 5.41 105.0 105.40 5.93
## BBH 7 272 139.31 6.18 139.0 139.32 5.93
## XCB 8 272 138.12 6.62 139.0 138.14 5.93
## XFB 9 272 114.04 5.79 114.0 114.07 5.93
## ZYB 10 272 133.01 7.14 132.0 132.97 8.90
## AUB 11 272 122.28 5.96 122.0 122.19 5.93
## WCB 12 272 70.53 4.83 70.5 70.52 5.19
## ASB 13 272 107.04 5.45 107.0 107.04 5.93
## BPL 14 272 102.62 5.43 103.0 102.66 4.45
## NPH 15 272 65.99 4.59 66.0 65.98 4.45
## NLH 16 272 50.95 3.36 51.0 50.92 2.97
## JUB 17 272 117.40 5.80 117.5 117.42 6.67
## NLB 18 272 27.17 1.96 27.0 27.14 1.48
## MAB 19 272 63.72 4.03 64.0 63.62 4.45
## MDH 20 272 27.87 4.00 28.0 27.85 4.45
## MDB 21 272 13.65 2.28 14.0 13.59 2.97
## OBH 22 272 34.25 1.73 34.0 34.25 1.48
## OBB 23 272 40.03 1.87 40.0 39.98 1.48
## GS 24 272 3553009.32 372759.24 3574769.0 3547149.89 421437.95
## Morphogroup* 25 272 1.00 0.00 1.0 1.00 0.00
## min max range skew kurtosis se
## Sex* 1 2 1 -0.18 -1.98 0.03
## PopNum* 1 3 2 0.10 -1.54 0.05
## Population* 1 3 2 -0.10 -1.54 0.05
## GOL 160 203 43 -0.06 -0.43 0.50
## NOL 160 199 39 -0.04 -0.40 0.46
## BNL 94 120 26 -0.01 -0.58 0.33
## BBH 123 155 32 -0.02 0.07 0.37
## XCB 122 156 34 -0.06 -0.32 0.40
## XFB 97 127 30 -0.09 -0.15 0.35
## ZYB 117 154 37 0.08 -0.76 0.43
## AUB 106 138 32 0.14 -0.42 0.36
## WCB 59 83 24 0.03 -0.28 0.29
## ASB 94 122 28 0.03 -0.24 0.33
## BPL 87 117 30 -0.06 0.01 0.33
## NPH 53 79 26 0.01 -0.21 0.28
## NLH 42 60 18 0.08 -0.06 0.20
## JUB 102 135 33 0.04 -0.23 0.35
## NLB 22 35 13 0.21 0.41 0.12
## MAB 52 76 24 0.23 -0.04 0.24
## MDH 18 38 20 0.04 -0.61 0.24
## MDB 8 20 12 0.23 -0.37 0.14
## OBH 30 40 10 0.03 0.05 0.10
## OBB 35 46 11 0.23 0.26 0.11
## GS 2640000 4640688 2000688 0.13 -0.41 22601.85
## Morphogroup* 1 1 0 NaN NaN 0.00
## ------------------------------------------------------------
## Morphogroup: 1.2
## vars n mean sd median trimmed mad min
## Sex* 1 149 1.82 0.39 2 1.89 0.00 1
## PopNum* 2 149 4.95 0.79 5 4.93 1.48 4
## Population* 3 149 5.05 0.79 5 5.07 1.48 4
## GOL 4 149 179.52 6.76 180 179.40 7.41 162
## NOL 5 149 177.33 6.50 177 177.29 5.93 160
## BNL 6 149 100.86 4.53 101 100.74 4.45 90
## BBH 7 149 138.52 5.85 138 138.50 5.93 125
## XCB 8 149 139.09 5.10 139 139.17 5.93 126
## XFB 9 149 114.52 4.55 115 114.49 4.45 104
## ZYB 10 149 135.63 6.22 135 135.59 5.93 120
## AUB 11 149 125.15 5.51 125 125.19 4.45 111
## WCB 12 149 73.85 3.75 74 73.80 4.45 63
## ASB 13 149 107.77 4.42 108 107.64 4.45 96
## BPL 14 149 98.24 4.81 97 98.01 4.45 83
## NPH 15 149 68.13 3.49 68 68.09 2.97 59
## NLH 16 149 52.25 2.62 52 52.17 2.97 46
## JUB 17 149 120.22 5.02 120 120.31 4.45 107
## NLB 18 149 27.85 1.91 28 27.87 1.48 23
## MAB 19 149 65.70 3.32 65 65.59 2.97 58
## MDH 20 149 29.70 3.06 30 29.74 2.97 20
## MDB 21 149 13.28 1.73 13 13.23 1.48 9
## OBH 22 149 33.83 1.86 34 33.83 1.48 30
## OBB 23 149 39.65 1.65 40 39.60 1.48 37
## GS 24 149 3463608.38 297228.79 3471942 3461498.78 329991.18 2800332
## Morphogroup* 25 149 2.00 0.00 2 2.00 0.00 2
## max range skew kurtosis se
## Sex* 2 1 -1.64 0.69 0.03
## PopNum* 6 2 0.09 -1.39 0.06
## Population* 6 2 -0.09 -1.39 0.06
## GOL 196 34 0.06 -0.51 0.55
## NOL 192 32 0.04 -0.55 0.53
## BNL 112 22 0.22 -0.41 0.37
## BBH 153 28 0.05 -0.59 0.48
## XCB 152 26 -0.11 -0.18 0.42
## XFB 127 23 0.05 -0.29 0.37
## ZYB 153 33 0.04 0.09 0.51
## AUB 141 30 -0.01 -0.12 0.45
## WCB 84 21 0.07 -0.25 0.31
## ASB 120 24 0.17 -0.03 0.36
## BPL 115 32 0.50 0.80 0.39
## NPH 77 18 0.11 -0.08 0.29
## NLH 60 14 0.28 0.19 0.21
## JUB 135 28 -0.09 0.06 0.41
## NLB 33 10 -0.02 -0.02 0.16
## MAB 77 19 0.38 0.14 0.27
## MDH 39 19 -0.14 0.26 0.25
## MDB 20 11 0.31 0.49 0.14
## OBH 41 11 0.28 0.60 0.15
## OBB 44 7 0.35 -0.50 0.14
## GS 4158720 1358388 0.05 -0.80 24349.93
## Morphogroup* 2 0 NaN NaN 0.00
## ------------------------------------------------------------
## Morphogroup: 1.3
## vars n mean sd median trimmed mad min
## Sex* 1 109 1.50 0.50 2 1.51 0.00 1
## PopNum* 2 109 7.00 0.00 7 7.00 0.00 7
## Population* 3 109 7.00 0.00 7 7.00 0.00 7
## GOL 4 109 176.78 7.66 176 176.75 7.41 158
## NOL 5 109 174.90 7.00 175 174.90 7.41 157
## BNL 6 109 99.30 4.61 99 99.21 4.45 89
## BBH 7 109 129.88 5.68 130 130.01 5.93 115
## XCB 8 109 151.66 6.77 151 151.47 5.93 138
## XFB 9 109 124.17 5.85 124 124.08 5.93 112
## ZYB 10 109 139.39 7.12 139 139.26 8.90 126
## AUB 11 109 132.78 6.30 133 132.70 7.41 120
## WCB 12 109 78.26 4.12 78 78.30 4.45 68
## ASB 13 109 114.59 5.38 115 114.61 5.93 101
## BPL 14 109 96.75 5.12 97 96.76 5.93 82
## NPH 15 109 71.95 4.90 72 71.92 5.93 61
## NLH 16 109 55.14 3.42 55 55.11 2.97 47
## JUB 17 109 119.99 6.49 119 119.66 7.41 107
## NLB 18 109 27.64 2.13 27 27.65 2.97 22
## MAB 19 109 66.80 4.12 66 66.58 4.45 58
## MDH 20 109 27.74 3.59 28 27.81 4.45 18
## MDB 21 109 12.29 1.97 12 12.21 1.48 7
## OBH 22 109 35.39 1.82 35 35.43 1.48 30
## OBB 23 109 40.64 1.93 40 40.61 1.48 36
## GS 24 109 3490758.89 362730.22 3431484 3480350.76 347500.68 2669884
## Morphogroup* 25 109 3.00 0.00 3 3.00 0.00 3
## max range skew kurtosis se
## Sex* 2 1 -0.02 -2.02 0.05
## PopNum* 7 0 NaN NaN 0.00
## Population* 7 0 NaN NaN 0.00
## GOL 194 36 0.04 -0.50 0.73
## NOL 191 34 0.00 -0.22 0.67
## BNL 113 24 0.25 -0.01 0.44
## BBH 141 26 -0.22 -0.56 0.54
## XCB 167 29 0.23 -0.45 0.65
## XFB 145 33 0.29 0.65 0.56
## ZYB 158 32 0.20 -0.72 0.68
## AUB 149 29 0.13 -0.70 0.60
## WCB 89 21 -0.09 -0.40 0.39
## ASB 128 27 0.01 -0.36 0.52
## BPL 112 30 0.07 0.40 0.49
## NPH 82 21 0.02 -0.75 0.47
## NLH 62 15 0.08 -0.76 0.33
## JUB 138 31 0.44 -0.31 0.62
## NLB 34 12 0.07 0.02 0.20
## MAB 77 19 0.43 -0.30 0.39
## MDH 35 17 -0.23 -0.36 0.34
## MDB 19 12 0.43 0.58 0.19
## OBH 40 10 -0.19 0.03 0.17
## OBB 46 10 0.25 0.24 0.18
## GS 4306095 1636211 0.23 -0.58 34743.25
## Morphogroup* 3 0 NaN NaN 0.00
## ------------------------------------------------------------
## Morphogroup: 1.4
## vars n mean sd median trimmed mad
## Sex* 1 108 1.49 0.50 1.0 1.49 0.00
## PopNum* 2 108 8.00 0.00 8.0 8.00 0.00
## Population* 3 108 8.00 0.00 8.0 8.00 0.00
## GOL 4 108 184.56 6.48 184.0 184.20 5.93
## NOL 5 108 181.46 5.73 181.0 181.20 5.93
## BNL 6 108 103.31 4.72 103.0 103.32 4.45
## BBH 7 108 135.90 5.68 136.0 135.82 4.45
## XCB 8 108 132.48 4.73 133.0 132.48 4.45
## XFB 9 108 110.46 4.30 111.0 110.49 3.71
## ZYB 10 108 134.88 6.39 134.0 134.60 7.41
## AUB 11 108 123.39 4.89 123.0 123.20 5.93
## WCB 12 108 72.52 4.00 72.0 72.45 4.45
## ASB 13 108 107.56 4.38 107.0 107.43 5.93
## BPL 14 108 101.23 5.04 101.0 101.40 4.45
## NPH 15 108 69.38 4.34 69.0 69.35 4.45
## NLH 16 108 52.25 3.11 52.0 52.24 2.97
## JUB 17 108 117.10 5.72 116.0 116.92 5.93
## NLB 18 108 23.50 1.63 23.5 23.49 2.22
## MAB 19 108 63.86 3.51 64.0 63.85 4.45
## MDH 20 108 25.25 3.58 25.0 25.22 4.45
## MDB 21 108 12.54 1.99 12.0 12.42 1.48
## OBH 22 108 35.66 1.88 36.0 35.73 1.48
## OBB 23 108 41.21 1.76 41.0 41.09 1.48
## GS 24 108 3328638.82 296464.96 3296670.0 3319319.48 269172.70
## Morphogroup* 25 108 4.00 0.00 4.0 4.00 0.00
## min max range skew kurtosis se
## Sex* 1 2 1 0.04 -2.02 0.05
## PopNum* 8 8 0 NaN NaN 0.00
## Population* 8 8 0 NaN NaN 0.00
## GOL 173 206 33 0.58 0.03 0.62
## NOL 171 200 29 0.51 0.00 0.55
## BNL 92 119 27 0.15 0.01 0.45
## BBH 122 150 28 0.12 0.07 0.55
## XCB 120 142 22 -0.04 -0.46 0.45
## XFB 100 122 22 -0.06 -0.19 0.41
## ZYB 119 151 32 0.31 -0.36 0.61
## AUB 114 136 22 0.36 -0.40 0.47
## WCB 63 84 21 0.15 0.06 0.38
## ASB 100 120 20 0.23 -0.45 0.42
## BPL 90 114 24 -0.19 -0.38 0.48
## NPH 58 80 22 0.05 -0.01 0.42
## NLH 45 61 16 0.11 -0.01 0.30
## JUB 106 134 28 0.34 -0.31 0.55
## NLB 20 29 9 0.23 0.43 0.16
## MAB 56 73 17 0.09 -0.54 0.34
## MDH 18 34 16 0.12 -0.79 0.34
## MDB 8 18 10 0.53 0.14 0.19
## OBH 30 40 10 -0.33 0.10 0.18
## OBB 38 46 8 0.57 0.10 0.17
## GS 2750000 4217400 1467400 0.43 0.19 28527.35
## Morphogroup* 4 4 0 NaN NaN 0.00
## ------------------------------------------------------------
## Morphogroup: 1.5
## vars n mean sd median trimmed mad
## Sex* 1 128 1.60 0.49 2.0 1.62 0.00
## PopNum* 2 128 9.23 0.58 9.0 9.08 0.00
## Population* 3 128 9.23 0.58 9.0 9.08 0.00
## GOL 4 128 184.07 7.01 184.0 184.04 7.41
## NOL 5 128 181.74 6.48 182.0 181.67 5.93
## BNL 6 128 103.86 4.34 104.0 103.63 4.45
## BBH 7 128 134.63 5.31 134.0 134.48 5.93
## XCB 8 128 140.27 5.24 140.0 140.22 5.93
## XFB 9 128 111.87 4.37 112.0 111.87 4.45
## ZYB 10 128 136.42 6.45 136.0 136.48 7.41
## AUB 11 128 126.29 5.47 126.0 126.24 5.93
## WCB 12 128 69.41 3.93 69.5 69.34 4.45
## ASB 13 128 107.94 5.23 108.0 107.93 4.45
## BPL 14 128 100.98 4.27 101.0 100.97 4.45
## NPH 15 128 71.05 4.77 72.0 71.06 4.45
## NLH 16 128 54.22 3.27 54.0 54.17 2.97
## JUB 17 128 120.10 5.02 120.0 120.04 5.19
## NLB 18 128 26.43 1.93 26.0 26.42 1.48
## MAB 19 128 64.45 3.51 64.0 64.44 2.97
## MDH 20 128 30.24 3.45 30.0 30.28 2.97
## MDB 21 128 14.04 2.23 14.0 14.01 2.97
## OBH 22 128 36.20 1.61 36.0 36.20 1.48
## OBB 23 128 41.11 1.66 41.0 41.15 1.48
## GS 24 128 3484418.98 333328.84 3464784.0 3484601.97 380117.88
## Morphogroup* 25 128 5.00 0.00 5.0 5.00 0.00
## min max range skew kurtosis se
## Sex* 1 2 1 -0.41 -1.85 0.04
## PopNum* 9 11 2 2.31 3.89 0.05
## Population* 9 11 2 2.31 3.89 0.05
## GOL 168 199 31 0.01 -0.52 0.62
## NOL 167 196 29 0.08 -0.40 0.57
## BNL 93 118 25 0.45 0.44 0.38
## BBH 122 150 28 0.27 -0.01 0.47
## XCB 127 151 24 0.07 -0.58 0.46
## XFB 100 124 24 -0.02 -0.20 0.39
## ZYB 123 153 30 -0.02 -0.54 0.57
## AUB 114 141 27 0.10 -0.26 0.48
## WCB 61 80 19 0.17 -0.55 0.35
## ASB 90 121 31 -0.19 0.57 0.46
## BPL 92 113 21 0.10 -0.55 0.38
## NPH 62 82 20 -0.08 -0.72 0.42
## NLH 46 65 19 0.26 0.19 0.29
## JUB 109 134 25 0.12 -0.37 0.44
## NLB 22 31 9 0.02 -0.53 0.17
## MAB 56 73 17 0.05 -0.33 0.31
## MDH 21 37 16 -0.11 -0.59 0.31
## MDB 9 19 10 0.11 -0.48 0.20
## OBH 33 40 7 0.01 -0.39 0.14
## OBB 37 45 8 -0.12 -0.18 0.15
## GS 2649474 4204200 1554726 0.02 -0.66 29462.39
## Morphogroup* 5 5 0 NaN NaN 0.00
pcs1<-principal(scale(hwl_c1[4:24]), rotate = 'varimax', nfactors=3)
pcs1
## Principal Components Analysis
## Call: principal(r = scale(hwl_c1[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 RC3 h2 u2 com
## GOL 0.08 0.87 0.26 0.82 0.18 1.2
## NOL 0.09 0.84 0.28 0.80 0.20 1.2
## BNL -0.03 0.87 0.19 0.80 0.20 1.1
## BBH 0.06 0.78 -0.10 0.62 0.38 1.0
## XCB 0.86 -0.10 0.14 0.77 0.23 1.1
## XFB 0.83 -0.07 0.07 0.69 0.31 1.0
## ZYB 0.72 0.38 0.39 0.81 0.19 2.1
## AUB 0.81 0.14 0.38 0.82 0.18 1.5
## WCB 0.76 -0.09 0.18 0.62 0.38 1.1
## ASB 0.71 0.16 0.21 0.57 0.43 1.3
## BPL -0.04 0.78 0.11 0.62 0.38 1.0
## NPH 0.41 0.17 0.71 0.70 0.30 1.7
## NLH 0.44 0.18 0.66 0.67 0.33 1.9
## JUB 0.66 0.45 0.29 0.72 0.28 2.2
## NLB 0.53 0.27 -0.33 0.46 0.54 2.2
## MAB 0.67 0.31 0.21 0.59 0.41 1.6
## MDH 0.49 0.47 0.00 0.46 0.54 2.0
## MDB 0.25 0.62 -0.02 0.44 0.56 1.3
## OBH 0.03 0.01 0.82 0.67 0.33 1.0
## OBB 0.26 0.28 0.64 0.57 0.43 1.7
## GS 0.55 0.71 0.14 0.83 0.17 2.0
##
## RC1 RC2 RC3
## SS loadings 5.90 5.26 2.89
## Proportion Var 0.28 0.25 0.14
## Cumulative Var 0.28 0.53 0.67
## Proportion Explained 0.42 0.37 0.21
## Cumulative Proportion 0.42 0.79 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.06
## with the empirical chi square 1335.09 with prob < 4.3e-189
##
## Fit based upon off diagonal values = 0.98
MG_1<-hwl_c1$Morphogroup
MG_1_pcs<-cbind.data.frame(MG_1, pcs1$scores)
MG_1_pcs<-as.data.frame(MG_1_pcs)
MG_1_pcs$MG_1<-as.factor(MG_1_pcs$MG_1)
pc_1_sum<-MG_1_pcs %>% group_by(MG_1) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_1_sum
## # A tibble: 5 x 4
## MG_1 RC1 RC2 RC3
## * <fct> <dbl> <dbl> <dbl>
## 1 1.1 -0.292 0.465 -0.522
## 2 1.2 0.353 -0.114 -0.589
## 3 1.3 1.37 -1.12 0.368
## 4 1.4 -0.853 -0.0615 0.923
## 5 1.5 -0.239 0.153 0.703
sort(pcs1$loadings[,1], decreasing=TRUE)
## XCB XFB AUB WCB ZYB ASB
## 0.86231017 0.82513147 0.80941310 0.76391681 0.71649373 0.70852690
## MAB JUB GS NLB MDH NLH
## 0.66889735 0.66055733 0.54858352 0.52907155 0.48862664 0.43826642
## NPH OBB MDB NOL GOL BBH
## 0.40943700 0.26468548 0.25147953 0.09112167 0.07655382 0.06418611
## OBH BNL BPL
## 0.02948380 -0.02851710 -0.03902677
sort(pcs1$loadings[,2], decreasing=TRUE)
## BNL GOL NOL BBH BPL GS
## 0.87204124 0.86501177 0.84365973 0.78001482 0.77603352 0.71196906
## MDB MDH JUB ZYB MAB OBB
## 0.61615923 0.46736082 0.44775852 0.37515627 0.31087973 0.28482125
## NLB NLH NPH ASB AUB OBH
## 0.26808253 0.18372014 0.17495931 0.15779251 0.14370990 0.01223405
## XFB WCB XCB
## -0.06706507 -0.08699013 -0.09647033
sort(pcs1$loadings[,3], decreasing=TRUE)
## OBH NPH NLH OBB ZYB AUB
## 0.818183098 0.707185196 0.663198366 0.644262996 0.394727495 0.376701784
## JUB NOL GOL MAB ASB BNL
## 0.292960771 0.281196398 0.259337956 0.210405485 0.205757158 0.190867146
## WCB GS XCB BPL XFB MDH
## 0.180266921 0.141892256 0.141389721 0.110466634 0.070436774 0.001881795
## MDB BBH NLB
## -0.017477956 -0.102162329 -0.327896830
ggplot(MG_1_pcs, aes(x=RC1, y=RC2, color = MG_1)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1_sum, size=5)
ggplot(MG_1_pcs, aes(x=RC2, y=RC3, color = MG_1)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1_sum, size=5)
ggplot(MG_1_pcs, aes(x=RC1, y=RC3, color = MG_1)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1_sum, size=5)
morpho1.1<-c1.1[2:25]
m1.1_sum<-morpho1.1 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean))
m1.1_dsq<-dist(scale(m1.1_sum[2:22]))
m1.1_hc<-hclust(m1.1_dsq, method='ward.D2')
NbClust(data=as.numeric(scale(m1.1_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15)
## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]
## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]
## *******************************************************************
## * Among all indices:
## * 2 proposed 3 as the best number of clusters
## * 2 proposed 5 as the best number of clusters
## * 1 proposed 13 as the best number of clusters
## * 1 proposed 15 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
## $All.index
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW Friedman
## 2 0.9620 96.2662 144.8040 0.5291 93.7609 37.9282 NA 9.4821 3.4294
## 3 1.1152 230.9460 13.6701 1.5573 159.0030 30.2966 NA 3.3663 11.4766
## 4 1.4786 190.3725 57.5772 0.7127 190.4245 32.7089 NA 2.0443 19.5449
## 5 1.4172 291.4842 12.3811 2.5978 242.1715 22.4784 NA 0.8991 45.7116
## 6 1.3393 280.5201 30.9094 1.8312 260.2620 24.2896 NA 0.6747 61.2490
## 7 0.9132 359.2676 7.8383 1.7030 281.5704 23.5734 NA 0.4811 86.3018
## 8 0.5420 345.8774 35.9572 1.3898 298.1831 23.6529 NA 0.3696 112.6434
## 9 1.3944 495.8133 31.4637 1.4927 317.4138 22.0608 NA 0.2724 153.2099
## 10 3.3244 688.0294 17.0733 2.2772 342.7007 18.2314 NA 0.1823 229.3716
## 11 1.9416 804.9301 12.9861 2.1990 357.1362 17.5426 NA 0.1450 288.6951
## 12 0.2538 898.0695 23.5838 1.9678 368.8362 17.3387 NA 0.1204 347.8155
## 13 9.3674 1182.2350 6.5037 1.8755 381.2392 16.7124 NA 0.0989 423.7141
## 14 0.1913 1209.0693 15.9422 2.0730 396.1734 15.2918 NA 0.0780 537.3277
## 15 2.3034 1458.7290 10.5407 2.1399 409.0406 14.3115 NA 0.0636 659.3093
## Rubin Cindex DB Silhouette Duda Pseudot2 Beale Ratkowsky Ball
## 2 4.4294 0.3226 0.4751 0.6553 0.2434 111.8767 1.0079 0.5532 8.1454
## 3 12.4766 0.3360 0.4584 0.6264 0.2242 83.0504 1.1073 0.5431 1.6095
## 4 20.5449 0.2985 0.5108 0.6284 0.1811 104.0352 1.4449 0.4760 0.9831
## 5 46.7116 0.2941 0.5027 0.6395 0.1049 68.2715 2.5286 0.4365 0.3981
## 6 62.2490 0.2702 0.4559 0.6281 0.2664 27.5371 0.8345 0.4002 0.2734
## 7 87.3018 0.3560 0.4816 0.6375 0.2763 39.2894 0.8185 0.3731 0.1519
## 8 113.6434 0.3165 0.5172 0.5879 0.2596 39.9324 0.8874 0.3496 0.1166
## 9 154.2099 0.2557 0.5312 0.5785 0.0913 59.7280 2.8442 0.3311 0.0627
## 10 230.3716 0.2881 0.5023 0.6033 0.1587 21.1997 1.4133 0.3149 0.0356
## 11 289.6951 0.4001 0.4727 0.6114 0.0525 72.1899 4.8127 0.3005 0.0245
## 12 348.8155 0.3805 0.4338 0.6298 0.2006 27.8937 1.1622 0.2879 0.0180
## 13 424.7141 0.3467 0.4150 0.6330 0.7770 2.5830 0.0861 0.2769 0.0113
## 14 538.3277 0.2939 0.4180 0.6333 0.1447 23.6497 1.5766 0.2668 0.0093
## 15 660.3093 0.2679 0.3989 0.6452 0.1340 19.3883 1.6157 0.2579 0.0066
## Ptbiserial Frey McClain Dunn Hubert SDindex Dindex SDbw
## 2 0.7500 0.8188 0.2955 0.1052 0.0237 7.6292 0.3320 0.7015
## 3 0.7410 1.6982 0.3814 0.1688 0.0260 4.9347 0.1960 0.5023
## 4 0.6634 0.8066 0.4791 0.1328 0.0276 6.7967 0.1489 0.2454
## 5 0.6085 1.2238 0.4644 0.2103 0.0293 6.2439 0.1028 0.1440
## 6 0.5922 1.4205 0.4683 0.2103 0.0294 9.3004 0.0864 0.1976
## 7 0.5657 2.5980 0.4798 0.2404 0.0294 11.6032 0.0756 0.0933
## 8 0.5052 1.8028 0.5605 0.1226 0.0299 16.1227 0.0660 0.0913
## 9 0.4433 0.6715 0.6339 0.0931 0.0305 16.9758 0.0563 0.0527
## 10 0.4292 0.8915 0.5976 0.1227 0.0305 17.0763 0.0469 0.0413
## 11 0.4212 1.3436 0.5874 0.1825 0.0305 17.9835 0.0423 0.0304
## 12 0.4111 1.7832 0.5898 0.1836 0.0305 23.2065 0.0375 0.0175
## 13 0.3893 1.8875 0.6119 0.1836 0.0306 27.5891 0.0338 0.0020
## 14 0.3496 0.9812 0.6653 0.1836 0.0308 31.5366 0.0294 0.0127
## 15 0.3389 0.8331 0.6538 0.1836 0.0308 31.1455 0.0266 0.0013
##
## $All.CriticalValues
## CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2 0.0439 784.7908 0.3221
## 3 -0.0229 -1072.0019 0.3031
## 4 -0.0306 -775.8496 0.2416
## 5 -0.2595 -38.8310 0.1505
## 6 -0.2052 -58.7306 0.3825
## 7 -0.1143 -146.1979 0.3799
## 8 -0.1290 -122.4991 0.3622
## 9 -0.3330 -24.0181 0.1427
## 10 -0.4407 -13.0759 0.3003
## 11 -0.4407 -13.0759 0.0933
## 12 -0.2932 -30.8768 0.3167
## 13 -0.2305 -48.0468 0.7758
## 14 -0.4407 -13.0759 0.2776
## 15 -0.5175 -8.7974 0.2933
##
## $Best.nc
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW
## Number_clusters 13.0000 15.000 3.0000 5.0000 3.0000 5.0000 -Inf 4.7938
## Value_Index 9.3674 1458.729 131.1339 2.5978 65.2421 12.0417 3 15.0000
## Friedman Rubin Cindex DB Silhouette Duda PseudoT2
## Number_clusters 121.9816 -16.8382 0.2557 0.3989 0.6553 0.2434 111.8767
## Value_Index 10.0000 9.0000 15.0000 2.0000 2.0000 2.0000 2.0000
## Beale Ratkowsky Ball PtBiserial Frey McClain Dunn Hubert
## Number_clusters 1.0079 0.5532 6.5359 0.75 NA 0.2955 0.2404 0
## Value_Index 2.0000 3.0000 2.0000 1.00 2 7.0000 0.0000 3
## SDindex Dindex SDbw
## Number_clusters 4.9347 0 0.0013
## Value_Index 0.0000 15 13.0000
##
## $Best.partition
## [1] 1 2 3 2 2 3 3 2 3 3 2 2 2 3 2 2 3 1 2 3 1 1 3 2 2 3 1 2 3 3 3 2 1 1 3 2 3 2
## [39] 2 2 3 3 1 2 3 2 3 1 3 3 2 3 2 2 1 3 2 2 3 1 1 3 2
m1.1_labs<-m1.1_sum$Population
plot(m1.1_hc, labels=m1.1_labs, hang=-1, cex=0.8)
pcs1.1<-principal(scale(c1.1[4:24]), rotate = 'varimax', nfactors=3)
MG_1.1_pcs<-cbind.data.frame(c1.1$Population, pcs1.1$scores)
MG_1.1_pcs<-MG_1.1_pcs %>% dplyr::rename(Population = 'c1.1$Population')
pc_1.1_sum<-MG_1.1_pcs %>% group_by(Population) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_1.1_sum
## # A tibble: 3 x 4
## Population RC1 RC2 RC3
## * <fct> <dbl> <dbl> <dbl>
## 1 AINU -0.385 0.797 -0.370
## 2 EASTER I 0.691 -0.939 -0.0613
## 3 MOKAPU -0.263 0.122 0.371
sort(pcs1.1$loadings[,1], decreasing=TRUE)
## BNL GOL BPL NOL BBH GS
## 0.880601326 0.832309471 0.804734841 0.795696716 0.768213643 0.693358915
## MDB ZYB MDH JUB NLB NLH
## 0.656533236 0.562282022 0.559960448 0.546819622 0.520549387 0.504276399
## AUB MAB NPH ASB OBB XFB
## 0.469101674 0.465663053 0.457010657 0.429111267 0.254168890 0.107323393
## WCB XCB OBH
## 0.040141143 0.037643028 -0.007834046
sort(pcs1.1$loadings[,2], decreasing=TRUE)
## WCB XCB XFB ASB AUB OBB JUB
## 0.84333493 0.83558685 0.82564030 0.69364841 0.67277648 0.66810442 0.66620138
## ZYB MAB GS MDH NOL NPH GOL
## 0.66487481 0.60076264 0.54680269 0.41388669 0.36476410 0.35059623 0.30490677
## NLB NLH OBH BPL MDB BBH BNL
## 0.19637037 0.17728856 0.15726941 0.14800835 0.12972844 0.08048060 0.05046791
sort(pcs1.1$loadings[,3], decreasing=TRUE)
## OBH NLH NPH AUB BBH ZYB
## 0.81806943 0.70600895 0.67069224 0.28863088 0.25824495 0.25381448
## GS OBB XCB BNL JUB MDB
## 0.23215267 0.22000991 0.20466537 0.18720592 0.18362999 0.16698524
## MDH MAB XFB NOL BPL WCB
## 0.16034606 0.15857939 0.07899570 0.07692024 0.07515935 0.07336469
## GOL NLB ASB
## 0.06260495 0.02234662 -0.03690243
ggplot(MG_1.1_pcs, aes(x=RC1, y=RC2, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1.1_sum, size=5)
ggplot(MG_1.1_pcs, aes(x=RC2, y=RC3, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1.1_sum, size=5)
ggplot(MG_1.1_pcs, aes(x=RC1, y=RC3, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1.1_sum, size=5)
morpho1.2<-c1.2[2:25]
m1.2_sum<-morpho1.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean))
m1.2_dsq<-dist(scale(m1.2_sum[2:22]))
m1.2_hc<-hclust(m1.2_dsq, method='ward.D2')
NbClust(data=as.numeric(scale(m1.2_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15)
## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]
## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]
## *******************************************************************
## * Among all indices:
## * 1 proposed 2 as the best number of clusters
## * 1 proposed 3 as the best number of clusters
## * 2 proposed 4 as the best number of clusters
## * 1 proposed 8 as the best number of clusters
## * 1 proposed 9 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 4
##
##
## *******************************************************************
## $All.index
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW Friedman
## 2 1.1027 237.1408 11.7879 3.5892 118.4495 25.6313 NA 6.4078 5.5545
## 3 1.7681 144.9613 85.2503 0.5602 149.1574 35.4214 NA 3.9357 9.6715
## 4 1.6046 257.9958 60.0171 1.3333 196.9431 29.4937 NA 1.8434 21.7845
## 5 1.4128 398.4635 11.2536 0.7311 222.0297 30.9466 NA 1.2379 32.9294
## 6 4.6907 376.2706 23.5153 1.9408 261.4605 23.8318 NA 0.6620 62.4446
## 7 0.1046 438.9974 58.5405 2.3460 288.6632 21.0634 NA 0.4299 96.7052
## 8 13.1025 764.1078 11.7270 2.8814 314.7202 18.1922 NA 0.2843 146.7554
## 9 0.2862 797.8417 4.2400 2.5025 328.6470 18.4580 NA 0.2279 183.3099
## 10 1.5403 751.1753 6.2665 2.5991 346.2893 17.2219 NA 0.1722 242.8751
## 11 0.6030 742.3426 2.9711 2.1635 356.7389 17.6536 NA 0.1459 286.8737
## 12 7.1415 699.9617 9.8053 1.8763 367.8136 17.6224 NA 0.1224 342.1991
## 13 0.6543 750.7935 6.6131 1.5443 377.5311 17.7256 NA 0.1049 399.4375
## 14 1.0378 769.5080 5.9935 1.2465 386.9144 17.7128 NA 0.0904 463.7496
## 15 3.7528 785.9966 6.9404 0.9065 395.2151 17.8234 NA 0.0792 529.2005
## Rubin Cindex DB Silhouette Duda Pseudot2 Beale Ratkowsky Ball
## 2 6.5545 0.3307 0.4216 0.7324 0.2860 84.8882 0.8085 0.6306 4.2966
## 3 10.6715 0.2588 0.5337 0.6278 0.2897 61.3079 0.7860 0.5255 2.4005
## 4 22.7845 0.1992 0.5699 0.6073 0.1513 106.5373 1.7756 0.4820 0.7437
## 5 33.9294 0.1494 0.5017 0.6180 0.3076 22.5144 0.6823 0.4393 0.2949
## 6 63.4446 0.2301 0.5236 0.6355 0.1610 67.7611 1.6134 0.4022 0.2058
## 7 97.7052 0.1717 0.4863 0.6634 0.2192 21.3782 1.0180 0.3740 0.1249
## 8 147.7554 0.2012 0.4725 0.6621 0.1865 8.7244 0.9694 0.3517 0.0534
## 9 184.3099 0.2343 0.4540 0.6620 0.2418 21.9554 0.9148 0.3319 0.0392
## 10 243.8751 0.2571 0.4704 0.6547 0.0914 19.8888 2.2099 0.3150 0.0327
## 11 287.8737 0.2806 0.4448 0.6426 0.2694 18.9875 0.7911 0.3005 0.0266
## 12 343.1991 0.2614 0.4556 0.6183 0.4942 10.2364 0.3102 0.2877 0.0230
## 13 400.4375 0.3279 0.4842 0.5781 0.5656 9.9837 0.2377 0.2766 0.0178
## 14 464.7496 0.2881 0.4981 0.5364 0.0000 NaN NaN 0.2666 0.0146
## 15 530.2005 0.0424 0.4577 0.5576 0.3399 15.5387 0.5755 0.2576 0.0122
## Ptbiserial Frey McClain Dunn Hubert SDindex Dindex SDbw
## 2 0.8389 3.0525 0.2376 0.1085 0.0251 6.9323 0.2708 0.3206
## 3 0.6834 0.8630 0.4312 0.0925 0.0269 7.3853 0.2075 0.5617
## 4 0.6321 1.2686 0.4646 0.0644 0.0296 6.1178 0.1334 0.2906
## 5 0.5705 0.4383 0.5058 0.0644 0.0296 8.5522 0.0965 0.1402
## 6 0.5608 1.0340 0.4447 0.1273 0.0299 8.3498 0.0803 0.1064
## 7 0.5233 0.7105 0.4169 0.1273 0.0306 10.9778 0.0620 0.0829
## 8 0.5128 0.8107 0.3867 0.1731 0.0307 10.6397 0.0523 0.0502
## 9 0.5099 1.3592 0.3798 0.2091 0.0307 13.3890 0.0481 0.0266
## 10 0.4942 1.2945 0.3703 0.2285 0.0307 17.7750 0.0428 0.0101
## 11 0.4906 2.8468 0.3674 0.2565 0.0307 17.8766 0.0399 0.0080
## 12 0.4699 2.3581 0.3804 0.2318 0.0308 27.1170 0.0362 0.0295
## 13 0.4488 3.2959 0.3891 0.2065 0.0308 27.8693 0.0336 0.0400
## 14 0.4070 0.8020 0.4292 0.1215 0.0310 35.1747 0.0309 0.0390
## 15 0.4060 2.0822 0.4259 0.1322 0.0310 36.5356 0.0285 0.0239
##
## $All.CriticalValues
## CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2 0.0351 934.5356 0.3749
## 3 -0.0157 -1619.3999 0.3838
## 4 -0.0664 -305.0001 0.1984
## 5 -0.2052 -58.7306 0.4281
## 6 -0.1452 -102.5426 0.2263
## 7 -0.3330 -24.0181 0.3519
## 8 -0.6214 -5.2183 0.4286
## 9 -0.2932 -30.8768 0.3707
## 10 -0.6214 -5.2183 0.2755
## 11 -0.2932 -30.8768 0.4033
## 12 -0.2052 -58.7306 0.5898
## 13 -0.1452 -102.5426 0.6340
## 14 -1.0294 0.0000 NaN
## 15 -0.2595 -38.8310 0.4698
##
## $Best.nc
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW
## Number_clusters 8.0000 9.0000 3.0000 2.0000 4.0000 4.0000 -Inf 1.4869
## Value_Index 13.1025 797.8417 73.4624 3.5892 47.7857 7.3806 4 15.0000
## Friedman Rubin Cindex DB Silhouette Duda PseudoT2
## Number_clusters 65.451 -15.5666 0.0424 0.4216 0.7324 0.286 84.8882
## Value_Index 10.000 15.0000 2.0000 2.0000 2.0000 2.000 2.0000
## Beale Ratkowsky Ball PtBiserial Frey McClain Dunn Hubert
## Number_clusters 0.8085 0.6306 1.8961 0.8389 3.0525 0.2376 0.2565 0
## Value_Index 2.0000 3.0000 2.0000 2.0000 2.0000 11.0000 0.0000 4
## SDindex Dindex SDbw
## Number_clusters 6.1178 0 0.008
## Value_Index 0.0000 11 8.000
##
## $Best.partition
## [1] 1 1 2 1 1 2 1 3 2 1 1 2 4 4 3 1 2 3 1 3 2 1 3 2 1 2 3 3 1 2 4 3 4 3 1 2 1 3
## [39] 2 1 1 2 1 2 1 3 4 4 3 4 4 3 2 1 4 3 4 4 3 4 1 1 2
m1.2_labs<-m1.2_sum$Population
plot(m1.2_hc, labels=m1.2_labs, hang = -1, cex=0.8)
pcs1.2<-principal(scale(c1.2[4:24]), rotate = 'varimax', nfactors=3)
pcs1.2 #
## Principal Components Analysis
## Call: principal(r = scale(c1.2[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC3 RC2 h2 u2 com
## GOL 0.85 0.04 0.26 0.80 0.20 1.2
## NOL 0.85 0.04 0.26 0.79 0.21 1.2
## BNL 0.68 0.04 0.49 0.71 0.29 1.8
## BBH 0.50 0.31 0.40 0.51 0.49 2.6
## XCB 0.04 0.84 0.08 0.71 0.29 1.0
## XFB 0.05 0.76 0.06 0.58 0.42 1.0
## ZYB 0.40 0.61 0.50 0.78 0.22 2.7
## AUB 0.34 0.70 0.44 0.80 0.20 2.2
## WCB 0.11 0.70 0.06 0.50 0.50 1.1
## ASB 0.47 0.48 0.03 0.45 0.55 2.0
## BPL 0.64 0.04 0.22 0.46 0.54 1.2
## NPH 0.20 0.07 0.58 0.38 0.62 1.3
## NLH 0.23 0.17 0.64 0.49 0.51 1.4
## JUB 0.44 0.56 0.43 0.69 0.31 2.8
## NLB 0.42 0.28 -0.22 0.31 0.69 2.3
## MAB 0.57 0.40 0.19 0.51 0.49 2.0
## MDH 0.60 0.31 -0.12 0.47 0.53 1.6
## MDB 0.47 0.39 -0.19 0.40 0.60 2.3
## OBH -0.16 -0.01 0.79 0.65 0.35 1.1
## OBB 0.15 0.14 0.76 0.62 0.38 1.2
## GS 0.64 0.53 0.35 0.81 0.19 2.5
##
## RC1 RC3 RC2
## SS loadings 4.91 4.09 3.43
## Proportion Var 0.23 0.19 0.16
## Cumulative Var 0.23 0.43 0.59
## Proportion Explained 0.40 0.33 0.28
## Cumulative Proportion 0.40 0.72 1.00
##
## Mean item complexity = 1.7
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.08
## with the empirical chi square 440.6 with prob < 2.3e-30
##
## Fit based upon off diagonal values = 0.95
MG_1.2_pcs<-cbind.data.frame(c1.2$Population, pcs1.2$scores)
MG_1.2_pcs<-MG_1.2_pcs %>% dplyr::rename(Population = 'c1.2$Population')
pc_1.2_sum<-MG_1.2_pcs %>% group_by(Population) %>% dplyr::summarize(across(RC1:RC2, mean))
pc_1.2_sum
## # A tibble: 3 x 4
## Population RC1 RC3 RC2
## * <fct> <dbl> <dbl> <dbl>
## 1 ANYANG 0.390 0.106 -0.296
## 2 GUAM -0.0288 -0.220 0.762
## 3 PHILLIPI -0.295 0.162 -0.620
sort(pcs1.2$loadings[,1], decreasing=TRUE)
## GOL NOL BNL BPL GS MDH
## 0.85369775 0.84509704 0.68498458 0.64136934 0.63728778 0.60351946
## MAB BBH ASB MDB JUB NLB
## 0.56617011 0.50398076 0.46911225 0.46762767 0.44039432 0.42109267
## ZYB AUB NLH NPH OBB WCB
## 0.39997334 0.33796843 0.23194849 0.20161238 0.14976852 0.10572211
## XFB XCB OBH
## 0.05145673 0.03992281 -0.15640193
sort(pcs1.2$loadings[,2], decreasing=TRUE)
## XCB XFB AUB WCB ZYB JUB
## 0.838782492 0.756676309 0.704701474 0.696274430 0.609673001 0.559612849
## GS ASB MAB MDB MDH BBH
## 0.525063691 0.477764229 0.399237478 0.388318085 0.311515092 0.307577921
## NLB NLH OBB NPH GOL BNL
## 0.280781146 0.166565168 0.143609665 0.068862429 0.043694727 0.037157185
## BPL NOL OBH
## 0.036484277 0.035068478 -0.009390017
sort(pcs1.2$loadings[,3], decreasing=TRUE)
## OBH OBB NLH NPH ZYB BNL
## 0.78779150 0.76116381 0.63636579 0.58080960 0.50034744 0.48749196
## AUB JUB BBH GS NOL GOL
## 0.43833347 0.42761768 0.40140117 0.35350149 0.26418920 0.26373387
## BPL MAB XCB WCB XFB ASB
## 0.21543907 0.18512482 0.08418060 0.06498061 0.05758335 0.03216290
## MDH MDB NLB
## -0.11646655 -0.18745763 -0.22492122
ggplot(MG_1.2_pcs, aes(x=RC1, y=RC2, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1.2_sum, size=5)
ggplot(MG_1.2_pcs, aes(x=RC2, y=RC3, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1.2_sum, size=5)
ggplot(MG_1.2_pcs, aes(x=RC1, y=RC3, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_1.2_sum, size=5)
morpho1.3<-c1.3[1:24]
m1.3_sum<-morpho1.3 %>% group_by(Sex) %>% dplyr::summarize(across(GOL:GS, mean))
pcs1.3<-principal(scale(c1.3[4:24]), rotate = 'varimax', nfactors=3)
MG_1.3_pcs<-cbind.data.frame(c1.3$Sex, pcs1.3$scores)
MG_1.3_pcs<-MG_1.3_pcs %>% dplyr::rename(Sex = 'c1.3$Sex')
pc_1.3_sum<-MG_1.3_pcs %>% group_by(Sex) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_1.3_sum
## # A tibble: 2 x 4
## Sex RC1 RC2 RC3
## * <fct> <dbl> <dbl> <dbl>
## 1 F -0.503 -0.518 -0.237
## 2 M 0.493 0.508 0.233
sort(pcs1.3$loadings[,1], decreasing=TRUE)
## XCB ZYB AUB JUB XFB WCB ASB GS
## 0.8569526 0.7625678 0.7587810 0.7243988 0.7051496 0.6697491 0.6369257 0.6325684
## MDH MAB MDB GOL OBB NLB NOL NLH
## 0.5912441 0.5272812 0.5257578 0.3619664 0.3444638 0.3160304 0.3064650 0.2841031
## BBH NPH BNL OBH BPL
## 0.2554415 0.2280223 0.2055569 0.1656299 0.1583243
sort(pcs1.3$loadings[,2], decreasing=TRUE)
## BNL BPL NOL GOL GS JUB
## 0.85565419 0.76513220 0.73784744 0.73034123 0.55311407 0.52105710
## OBB BBH ZYB MDB NLB NPH
## 0.48379684 0.45754507 0.43122731 0.41283387 0.40955693 0.36976932
## NLH AUB MAB MDH ASB WCB
## 0.33762392 0.31051096 0.30010460 0.27851035 0.23796932 0.16097404
## XCB XFB OBH
## 0.12407954 0.10032700 -0.03272567
sort(pcs1.3$loadings[,3], decreasing=TRUE)
## OBH NLH NPH NOL GS OBB
## 0.772596605 0.771562555 0.768635037 0.397752763 0.396720558 0.389948265
## GOL MAB BBH XFB AUB ZYB
## 0.388152828 0.368743129 0.308273452 0.297441716 0.285879047 0.271828321
## XCB JUB ASB WCB BNL NLB
## 0.253482802 0.227008945 0.220668558 0.180549263 0.162594563 0.098554223
## MDH BPL MDB
## 0.038918464 -0.006231387 -0.117247479
ggplot(MG_1.3_pcs, aes(x=RC1, y=RC2, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse()+ geom_point(data=pc_1.3_sum, size=5)
ggplot(MG_1.3_pcs, aes(x=RC2, y=RC3, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse()+ geom_point(data=pc_1.3_sum, size=5)
ggplot(MG_1.3_pcs, aes(x=RC1, y=RC3, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse()+ geom_point(data=pc_1.3_sum, size=5)
morpho1.4<-c1.4[1:24]
m1.4_sum<-morpho1.4 %>% group_by(Sex) %>% dplyr::summarize(across(GOL:GS, mean))
pcs1.4<-principal(scale(c1.4[4:24]), rotate = 'varimax', nfactors=3)
pcs1.4 ## examining results of PCA.
## Principal Components Analysis
## Call: principal(r = scale(c1.4[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC2 RC1 RC3 h2 u2 com
## GOL 0.88 0.19 0.20 0.85 0.15 1.2
## NOL 0.88 0.13 0.20 0.84 0.16 1.1
## BNL 0.83 0.31 -0.01 0.79 0.21 1.3
## BBH 0.72 0.27 0.26 0.65 0.35 1.6
## XCB 0.08 0.47 0.67 0.68 0.32 1.8
## XFB 0.17 0.36 0.74 0.71 0.29 1.6
## ZYB 0.33 0.84 0.24 0.87 0.13 1.5
## AUB 0.24 0.75 0.37 0.76 0.24 1.7
## WCB 0.14 0.68 0.19 0.52 0.48 1.2
## ASB 0.27 0.45 0.31 0.37 0.63 2.4
## BPL 0.69 0.16 -0.28 0.58 0.42 1.4
## NPH 0.67 0.35 0.04 0.57 0.43 1.5
## NLH 0.60 0.40 0.18 0.55 0.45 2.0
## JUB 0.19 0.84 0.09 0.76 0.24 1.1
## NLB 0.17 0.27 -0.48 0.33 0.67 1.9
## MAB 0.34 0.71 -0.01 0.62 0.38 1.4
## MDH 0.33 0.59 -0.08 0.46 0.54 1.6
## MDB 0.27 0.51 0.05 0.33 0.67 1.5
## OBH 0.23 0.13 0.54 0.36 0.64 1.5
## OBB 0.10 0.61 0.10 0.39 0.61 1.1
## GS 0.72 0.39 0.47 0.89 0.11 2.3
##
## RC2 RC1 RC3
## SS loadings 5.28 5.24 2.35
## Proportion Var 0.25 0.25 0.11
## Cumulative Var 0.25 0.50 0.61
## Proportion Explained 0.41 0.41 0.18
## Cumulative Proportion 0.41 0.82 1.00
##
## Mean item complexity = 1.6
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.07
## with the empirical chi square 242.79 with prob < 2.4e-06
##
## Fit based upon off diagonal values = 0.97
MG_1.4_pcs<-cbind.data.frame(c1.4$Sex, pcs1.4$scores)
MG_1.4_pcs<-MG_1.4_pcs %>% dplyr::rename(Sex = 'c1.4$Sex')
pc_1.4_sum<-MG_1.4_pcs %>% group_by(Sex) %>% dplyr::summarize(across(RC2:RC3, mean))
pc_1.4_sum
## # A tibble: 2 x 4
## Sex RC2 RC1 RC3
## * <fct> <dbl> <dbl> <dbl>
## 1 F -0.482 -0.616 -0.106
## 2 M 0.500 0.639 0.110
sort(pcs1.4$loadings[,1], decreasing=TRUE)
## NOL GOL BNL GS BBH BPL NPH
## 0.88352094 0.88243888 0.83239525 0.71961894 0.71652584 0.69012610 0.66657444
## NLH MAB ZYB MDH MDB ASB AUB
## 0.60070374 0.34145981 0.32888683 0.32611974 0.26558402 0.26520913 0.24425658
## OBH JUB NLB XFB WCB OBB XCB
## 0.23153874 0.19373810 0.16748331 0.16594146 0.14416740 0.09642814 0.07735697
sort(pcs1.4$loadings[,2], decreasing=TRUE)
## JUB ZYB AUB MAB WCB OBB MDH MDB
## 0.8433597 0.8426717 0.7474713 0.7077281 0.6796706 0.6095263 0.5860515 0.5051223
## XCB ASB NLH GS XFB NPH BNL BBH
## 0.4714933 0.4545556 0.3998124 0.3873867 0.3589494 0.3540436 0.3073552 0.2746377
## NLB GOL BPL OBH NOL
## 0.2724973 0.1867217 0.1554656 0.1336713 0.1305433
sort(pcs1.4$loadings[,3], decreasing=TRUE)
## XFB XCB OBH GS AUB ASB
## 0.741299957 0.668882512 0.538768118 0.470106979 0.370171463 0.309895817
## BBH ZYB NOL GOL WCB NLH
## 0.255714622 0.235832910 0.200202507 0.199507869 0.185490316 0.180922192
## OBB JUB MDB NPH BNL MAB
## 0.102416784 0.091341239 0.048938486 0.038599569 -0.008985247 -0.009169799
## MDH BPL NLB
## -0.075796909 -0.282484623 -0.477128605
ggplot(MG_1.4_pcs, aes(x=RC2, y=RC1, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse()+ geom_point(data=pc_1.4_sum, size=5)
ggplot(MG_1.4_pcs, aes(x=RC1, y=RC3, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse()+ geom_point(data=pc_1.4_sum, size=5)
ggplot(MG_1.4_pcs, aes(x=RC2, y=RC3, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse()+ geom_point(data=pc_1.4_sum, size=5)
morpho1.5<-c1.5[1:24]
m1.5_sum<-morpho1.5 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean))
m1.5_dsq<-dist(scale(m1.5_sum[2:22]))
m1.5_hc<-hclust(m1.5_dsq, method='ward.D2')
NbClust(data=as.numeric(scale(m1.5_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15)
## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]
## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]
## *******************************************************************
## * Among all indices:
## * 2 proposed 4 as the best number of clusters
## * 2 proposed 5 as the best number of clusters
## * 2 proposed 15 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 4
##
##
## *******************************************************************
## $All.index
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW Friedman
## 2 5.1347 168.2809 40.8142 3.2315 115.5634 26.8328 NA 6.7082 5.2610
## 3 0.8018 158.2078 97.3233 1.1694 155.1729 32.1957 NA 3.5773 10.7407
## 4 2.0387 303.8442 15.7703 2.1936 205.9802 25.5524 NA 1.5970 25.2989
## 5 16.5975 287.7756 24.5800 3.2948 249.6928 19.9488 NA 0.7980 51.6348
## 6 0.0474 326.9660 44.7529 2.7679 270.5103 20.6429 NA 0.5734 72.2454
## 7 1.3653 485.1944 38.7657 3.3276 299.4914 17.7371 NA 0.3620 115.0281
## 8 1.4243 696.6437 32.8413 4.0144 327.2821 14.9035 NA 0.2329 179.3599
## 9 1.3155 959.8726 29.2393 4.8224 354.4560 12.2538 NA 0.1513 276.6291
## 10 15.0481 1294.0453 12.2072 5.4004 377.5266 10.4893 NA 0.1049 399.4085
## 11 0.3235 1407.0482 13.2730 5.2714 391.4543 10.1747 NA 0.0841 498.4765
## 12 0.2822 1575.9394 23.2631 5.4965 408.3004 9.2676 NA 0.0644 651.5962
## 13 0.9886 2064.2090 26.6533 5.9711 427.0863 8.0722 NA 0.0478 878.3188
## 14 1.8451 2864.7271 19.1366 6.8162 449.3080 6.5792 NA 0.0336 1250.2182
## 15 0.9147 3624.8390 23.0218 7.1522 465.2214 5.8668 NA 0.0261 1609.7694
## Rubin Cindex DB Silhouette Duda Pseudot2 Beale Ratkowsky Ball
## 2 6.2610 0.3385 0.4262 0.7250 0.2861 82.3592 0.8074 0.6058 5.5870
## 3 11.7407 0.3499 0.5247 0.6374 0.1475 150.3148 1.8557 0.5293 2.2316
## 4 26.2989 0.2643 0.4782 0.6531 0.2148 65.8133 1.1546 0.4846 0.6383
## 5 52.6348 0.2864 0.4636 0.6726 0.2805 30.7838 0.7893 0.4364 0.4029
## 6 73.2454 0.3357 0.4503 0.6774 0.1074 108.0424 2.5724 0.4013 0.2358
## 7 116.0281 0.2900 0.4173 0.6964 0.1626 51.5114 1.5610 0.3744 0.1132
## 8 180.3599 0.2555 0.3903 0.7194 0.0482 157.9881 5.8514 0.3516 0.0586
## 9 277.6291 0.2114 0.3137 0.7539 0.2793 15.4791 0.7371 0.3322 0.0326
## 10 400.4085 0.3028 0.3193 0.7580 0.3171 25.8419 0.6626 0.3155 0.0190
## 11 499.4765 0.2579 0.3642 0.7068 0.1051 42.5588 2.3644 0.3010 0.0141
## 12 652.5962 0.2302 0.3299 0.7180 0.3367 11.8215 0.5629 0.2883 0.0103
## 13 879.3188 0.2221 0.3288 0.7157 0.1896 17.0971 1.1398 0.2771 0.0065
## 14 1251.2182 0.2808 0.3278 0.7218 0.2152 21.8758 1.0417 0.2671 0.0039
## 15 1610.7694 0.2514 0.3127 0.7447 0.2481 12.1209 0.8081 0.2581 0.0026
## Ptbiserial Frey McClain Dunn Hubert SDindex Dindex SDbw
## 2 0.8292 2.1573 0.2572 0.3117 0.0250 8.3991 0.2871 0.3705
## 3 0.7084 1.0053 0.4210 0.1208 0.0272 7.2778 0.2086 0.3526
## 4 0.6401 0.8568 0.4817 0.1408 0.0301 5.9975 0.1261 0.2564
## 5 0.5951 1.2161 0.4676 0.2183 0.0302 7.1007 0.0926 0.1274
## 6 0.5680 1.2034 0.4717 0.2961 0.0303 10.1268 0.0809 0.0453
## 7 0.5256 1.0554 0.4677 0.3282 0.0311 11.4363 0.0596 0.0234
## 8 0.4969 1.0225 0.4446 0.2999 0.0311 13.5503 0.0471 0.0168
## 9 0.4756 0.9280 0.4174 0.2999 0.0312 14.9004 0.0359 0.0123
## 10 0.4647 3.4491 0.3958 0.4845 0.0312 16.4987 0.0318 0.0020
## 11 0.4072 1.0773 0.4580 0.1624 0.0314 35.2758 0.0269 0.0078
## 12 0.3957 1.1106 0.4395 0.1624 0.0314 35.8182 0.0226 0.0063
## 13 0.3812 1.0770 0.4145 0.1823 0.0315 35.2663 0.0203 0.0061
## 14 0.3699 1.5393 0.3901 0.2637 0.0315 36.9321 0.0175 0.0062
## 15 0.3534 1.4980 0.3743 0.2735 0.0315 45.2984 0.0157 0.0029
##
## $All.CriticalValues
## CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2 0.0304 1050.8179 0.3754
## 3 -0.0088 -2964.0492 0.1848
## 4 -0.0770 -251.6212 0.2968
## 5 -0.1630 -85.6052 0.3918
## 6 -0.1452 -102.5426 0.1327
## 7 -0.2052 -58.7306 0.2400
## 8 -0.2595 -38.8310 0.0419
## 9 -0.3330 -24.0181 0.4236
## 10 -0.1630 -85.6052 0.4315
## 11 -0.3811 -18.1206 0.1847
## 12 -0.3330 -24.0181 0.4815
## 13 -0.4407 -13.0759 0.3458
## 14 -0.3330 -24.0181 0.3468
## 15 -0.4407 -13.0759 0.4195
##
## $Best.nc
## KL CH Hartigan CCC Scott Marriot TrCovW
## Number_clusters 5.0000 15.000 4.000 15.0000 4.0000 5.0000 -Inf
## Value_Index 16.5975 3624.839 81.553 7.1522 50.8073 6.2978 4
## TraceW Friedman Rubin Cindex DB Silhouette Duda
## Number_clusters 1.1812 371.8994 -23.7113 0.2114 0.3127 0.758 0.2861
## Value_Index 14.0000 10.0000 9.0000 15.0000 10.0000 2.000 2.0000
## PseudoT2 Beale Ratkowsky Ball PtBiserial Frey McClain
## Number_clusters 82.3592 0.8074 0.6058 3.3555 0.8292 1.0053 0.2572
## Value_Index 2.0000 2.0000 3.0000 2.0000 3.0000 2.0000 10.0000
## Dunn Hubert SDindex Dindex SDbw
## Number_clusters 0.4845 0 5.9975 0 0.002
## Value_Index 0.0000 4 0.0000 10 5.000
##
## $Best.partition
## [1] 1 2 2 1 2 2 1 2 3 1 2 3 2 1 3 4 4 3 1 2 3 4 4 3 4 4 3 1 2 2 2 3 1 3 1 2 2 1
## [39] 2 4 4 3 4 3 4 4 4 3 1 3 2 1 2 2 2 1 2 2 1 3 4 4 3
m1.5_labs<-m1.5_sum$Population
plot(m1.5_hc, labels=m1.5_labs, hang = -1, cex=0.8)
pcs1.5<-principal(scale(c1.5[4:24]), rotate = 'varimax', nfactors=3)
MG_1.5_pcs<-cbind.data.frame(c1.5$Population, pcs1.5$scores)
MG_1.5_pcs<-MG_1.5_pcs %>% dplyr::rename(Population = 'c1.5$Population')
pc_1.5_sum<-MG_1.5_pcs %>% group_by(Population) %>% dplyr::summarize(across(RC1:RC2, mean))
pc_1.5_sum
## # A tibble: 3 x 4
## Population RC1 RC3 RC2
## * <fct> <dbl> <dbl> <dbl>
## 1 MORIORI -0.107 -0.0659 0.108
## 2 N MAORI 0.824 -0.106 -0.894
## 3 S MAORI 0.331 0.817 -0.274
sort(pcs1.5$loadings[,1], decreasing=TRUE)
## BNL BPL MDB GOL NLB JUB NOL
## 0.73863146 0.72671305 0.67934410 0.66737592 0.65466778 0.65073261 0.63948657
## ZYB OBB MDH ASB MAB GS AUB
## 0.59903467 0.55159083 0.53741862 0.49723438 0.48100058 0.45546100 0.40367782
## BBH WCB NLH NPH XCB XFB OBH
## 0.37603437 0.30623211 0.20375665 0.18103168 0.10779146 0.09462537 0.06560257
sort(pcs1.5$loadings[,2], decreasing=TRUE)
## XFB XCB GS BBH WCB AUB
## 0.832474292 0.829104663 0.812799710 0.698573970 0.636846141 0.631769246
## MAB ZYB GOL NOL ASB JUB
## 0.541806980 0.539588640 0.517410528 0.487776427 0.455489599 0.452566071
## NPH MDH MDB NLH BNL OBB
## 0.370534924 0.356620660 0.301312770 0.272262997 0.239486639 0.138354536
## BPL NLB OBH
## 0.134186889 0.060477837 0.005274038
sort(pcs1.5$loadings[,3], decreasing=TRUE)
## NLH OBH NPH OBB AUB JUB
## 0.81034885 0.80776066 0.72563155 0.54491372 0.39992916 0.35891736
## ZYB XCB WCB NOL BNL GOL
## 0.32123706 0.30421896 0.28600926 0.27926312 0.23850022 0.20245430
## MDH MAB XFB GS MDB BPL
## 0.19745140 0.19302341 0.16666544 0.16097315 0.12804980 0.08300629
## NLB ASB BBH
## 0.04644679 0.01800660 -0.08538726
ggplot(MG_1.5_pcs, aes(x=RC1, y=RC3, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() +geom_point(data=pc_1.5_sum, size=5)
ggplot(MG_1.5_pcs, aes(x=RC3, y=RC2, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() +geom_point(data=pc_1.5_sum, size=5)
ggplot(MG_1.5_pcs, aes(x=RC1, y=RC2, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() +geom_point(data=pc_1.5_sum, size=5)
m1.5_sex<-morpho1.5 %>% group_by(Sex) %>% dplyr::summarize(across(GOL:GS, mean))
MG_1.5_sex_pcs<-cbind.data.frame(c1.5$Sex, pcs1.5$scores)
MG_1.5_sex_pcs<-MG_1.5_sex_pcs %>% dplyr::rename(Sex = 'c1.5$Sex')
pc_1.5_sex_sum<-MG_1.5_sex_pcs %>% group_by(Sex) %>% dplyr::summarize(across(RC1:RC2, mean))
pc_1.5_sex_sum
## # A tibble: 2 x 4
## Sex RC1 RC3 RC2
## * <fct> <dbl> <dbl> <dbl>
## 1 F -0.758 -0.450 -0.259
## 2 M 0.502 0.298 0.172
ggplot(MG_1.5_sex_pcs, aes(x=RC1, y=RC3, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() +geom_point(data=pc_1.5_sex_sum, size=5)
ggplot(MG_1.5_sex_pcs, aes(x=RC3, y=RC2, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() +geom_point(data=pc_1.5_sex_sum, size=5)
ggplot(MG_1.5_sex_pcs, aes(x=RC1, y=RC2, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() +geom_point(data=pc_1.5_sex_sum, size=5)
morpho2<-c2[2:25]
m2_sum<-morpho2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean))
pcs2<-principal(scale(morpho2[4:24]), rotate = 'varimax', nfactors=3)
pcs2
## Principal Components Analysis
## Call: principal(r = scale(morpho2[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC2 RC3 RC1 h2 u2 com
## GOL 0.89 0.22 -0.01 0.85 0.15 1.1
## NOL 0.89 0.21 0.03 0.84 0.16 1.1
## BNL 0.63 0.29 0.44 0.68 0.32 2.2
## BBH -0.07 0.44 0.59 0.55 0.45 1.9
## XCB 0.03 0.78 0.22 0.65 0.35 1.2
## XFB 0.25 0.60 0.25 0.48 0.52 1.7
## ZYB 0.38 0.59 0.49 0.73 0.27 2.7
## AUB 0.35 0.66 0.39 0.71 0.29 2.2
## WCB 0.48 0.42 0.31 0.50 0.50 2.7
## ASB 0.71 0.37 -0.23 0.70 0.30 1.8
## BPL 0.48 0.20 0.34 0.39 0.61 2.2
## NPH 0.21 0.19 0.79 0.71 0.29 1.3
## NLH 0.15 0.14 0.87 0.79 0.21 1.1
## JUB 0.43 0.54 0.50 0.73 0.27 2.9
## NLB 0.72 -0.04 0.02 0.52 0.48 1.0
## MAB 0.33 0.42 0.45 0.48 0.52 2.8
## MDH 0.13 0.71 -0.03 0.52 0.48 1.1
## MDB 0.14 0.69 -0.01 0.49 0.51 1.1
## OBH -0.12 -0.03 0.76 0.60 0.40 1.1
## OBB 0.71 0.18 0.28 0.62 0.38 1.4
## GS 0.43 0.64 0.38 0.74 0.26 2.4
##
## RC2 RC3 RC1
## SS loadings 4.91 4.42 3.95
## Proportion Var 0.23 0.21 0.19
## Cumulative Var 0.23 0.44 0.63
## Proportion Explained 0.37 0.33 0.30
## Cumulative Proportion 0.37 0.70 1.00
##
## Mean item complexity = 1.8
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.08
## with the empirical chi square 415.94 with prob < 7.6e-27
##
## Fit based upon off diagonal values = 0.97
MG_2<-as.factor(c2$Population)
MG_2_pcs<-cbind.data.frame(MG_2, pcs2$scores)
MG_2_pcs<-MG_2_pcs %>% dplyr::rename(Population = 'MG_2')
pc_2_sum<-as.data.frame(MG_2_pcs) %>% group_by(Population) %>% dplyr::summarize(across(RC2:RC1, mean))
pc_2_sum
## # A tibble: 2 x 4
## Population RC2 RC3 RC1
## * <fct> <dbl> <dbl> <dbl>
## 1 ANDAMAN -0.793 0.295 0.491
## 2 BUSHMAN 0.617 -0.229 -0.382
sort(pcs2$loadings[,1], decreasing=TRUE)
## GOL NOL NLB OBB ASB BNL
## 0.89179286 0.88935805 0.72267645 0.71462988 0.71115150 0.63140916
## BPL WCB GS JUB ZYB AUB
## 0.48246877 0.47577743 0.42909472 0.42738850 0.37786946 0.34565047
## MAB XFB NPH NLH MDB MDH
## 0.33377389 0.24709220 0.20586389 0.14868990 0.14203667 0.12747957
## XCB BBH OBH
## 0.03177141 -0.07348742 -0.12490433
sort(pcs2$loadings[,2], decreasing=TRUE)
## XCB MDH MDB AUB GS XFB
## 0.77591046 0.71085588 0.68825670 0.66136670 0.64307902 0.59646926
## ZYB JUB BBH WCB MAB ASB
## 0.58850821 0.54180861 0.44275489 0.41746955 0.41544908 0.37123435
## BNL GOL NOL BPL NPH OBB
## 0.28788727 0.22382599 0.20914885 0.20265335 0.19101062 0.17864474
## NLH OBH NLB
## 0.13725060 -0.02636489 -0.04166433
sort(pcs2$loadings[,3], decreasing=TRUE)
## NLH NPH OBH BBH JUB ZYB
## 0.866793686 0.794132672 0.761480936 0.594093380 0.503441231 0.492616386
## MAB BNL AUB GS BPL WCB
## 0.447372792 0.440245355 0.387350797 0.377482758 0.342553158 0.307692960
## OBB XFB XCB NOL NLB MDB
## 0.275402219 0.254212263 0.217638977 0.025209638 0.019524569 -0.008901311
## GOL MDH ASB
## -0.010160642 -0.027775512 -0.234102974
ggplot(MG_2_pcs, aes(x=RC2, y=RC3, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_2_sum, size=5)
ggplot(MG_2_pcs, aes(x=RC2, y=RC1, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_2_sum, size=5)
ggplot(MG_2_pcs, aes(x=RC3, y=RC1, color = Population)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_2_sum, size=5)
MG_2_sex<-as.factor(c2$Sex)
MG_2_pcs_sex<-cbind.data.frame(MG_2_sex, pcs2$scores)
MG_2_pcs_sex<-MG_2_pcs_sex %>% dplyr::rename(Sex = 'MG_2_sex')
pc_2_sum_sex<-as.data.frame(MG_2_pcs_sex) %>% group_by(Sex) %>% dplyr::summarize(across(RC2:RC1, mean))
pc_2_sum_sex
## # A tibble: 2 x 4
## Sex RC2 RC3 RC1
## * <fct> <dbl> <dbl> <dbl>
## 1 F -0.286 -0.538 -0.178
## 2 M 0.316 0.595 0.197
ggplot(MG_2_pcs_sex, aes(x=RC2, y=RC3, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_2_sum_sex, size=5)
ggplot(MG_2_pcs_sex, aes(x=RC2, y=RC1, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_2_sum_sex, size=5)
ggplot(MG_2_pcs_sex, aes(x=RC3, y=RC1, color = Sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_2_sum_sex, size=5)
morpho3<-c3[2:25]
m3_sum<-morpho3 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean))
m3_dsq<-dist(scale(m3_sum[2:22]))
m3_hc<-hclust(m3_dsq, method='ward.D2')
NbClust(data=as.numeric(scale(m3_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15)
## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]
## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]
## *******************************************************************
## * Among all indices:
## * 3 proposed 3 as the best number of clusters
## * 2 proposed 5 as the best number of clusters
## * 1 proposed 15 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
## $All.index
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW
## 2 0.6477 187.0039 97.5637 -10.4756 297.9688 583.3335 NA 145.8334
## 3 3.7835 167.5075 356.7525 -7.7663 608.6537 549.7280 NA 61.0809
## 4 0.3026 342.1599 610.4937 -10.6240 734.3284 687.2908 NA 42.9557
## 5 20.3200 850.6365 136.9105 -9.2474 924.2592 630.8232 NA 25.2329
## 6 0.1607 969.8126 40.3839 -8.2605 1079.4542 588.1306 NA 16.3370
## 7 7.6750 905.3052 118.1702 -9.7776 1150.9825 655.1674 NA 13.3708
## 8 0.1323 1051.8355 24.5909 -10.0586 1240.4988 665.9454 NA 10.4054
## 9 6.6750 985.4475 91.7895 -9.3661 1344.8620 629.1942 NA 7.7678
## 10 0.1500 1113.9859 15.6844 -9.7756 1411.6006 644.3346 NA 6.4433
## 11 1.3710 1046.4481 28.5598 -9.4340 1491.2156 623.7983 NA 5.1554
## 12 3.0160 1029.4532 72.4018 -9.6097 1551.3285 627.3261 NA 4.3564
## 13 1.1102 1144.4100 44.1594 -9.3767 1617.4079 611.8306 NA 3.6203
## 14 0.4409 1191.9081 27.9724 -9.2370 1676.8452 600.7509 NA 3.0651
## 15 0.8978 1195.5338 73.9191 -8.8531 1739.1889 579.1340 NA 2.5739
## Friedman Rubin Cindex DB Silhouette Duda Pseudot2 Beale Ratkowsky
## 2 1.3040 2.3040 0.2488 0.7386 0.4945 0.3021 570.5103 0.7668 0.4153
## 3 4.5009 5.5009 0.2338 0.6840 0.5271 0.2568 306.7924 0.9557 0.4026
## 4 6.8220 7.8220 0.2399 0.6121 0.4948 0.2458 512.5322 1.0169 0.4313
## 5 12.3159 13.3159 0.1630 0.6288 0.5325 0.3258 161.3743 0.6809 0.4257
## 6 19.5669 20.5669 0.1933 0.5751 0.5530 0.2725 283.0255 0.8817 0.3942
## 7 24.1295 25.1295 0.1691 0.5858 0.5343 0.2785 90.6723 0.8396 0.3663
## 8 31.2909 32.2909 0.2204 0.5799 0.5295 0.2328 214.1922 1.0818 0.3455
## 9 42.2553 43.2553 0.2008 0.5731 0.5290 0.3847 110.3797 0.5256 0.3262
## 10 51.1468 52.1468 0.1777 0.5789 0.5272 0.2413 185.5434 1.0308 0.3109
## 11 64.1749 65.1749 0.1532 0.5709 0.5386 0.2118 52.1113 1.1580 0.2966
## 12 76.1274 77.1274 0.1725 0.5377 0.5455 0.1459 64.3991 1.7889 0.2844
## 13 91.8100 92.8100 0.2361 0.5121 0.5488 0.2545 149.3607 0.9574 0.2739
## 14 108.6228 109.6228 0.2165 0.5159 0.5522 0.2479 160.8244 0.9927 0.2644
## 15 129.5397 130.5397 0.1942 0.5126 0.5431 0.3469 52.7058 0.6058 0.2556
## Ball Ptbiserial Frey McClain Dunn Hubert SDindex Dindex SDbw
## 2 110.0361 0.5079 0.3874 0.3452 0.0059 0.0030 8.0708 0.5287 1.8614
## 3 57.5430 0.6003 0.7133 0.5380 0.0092 0.0041 5.3369 0.3429 0.7517
## 4 21.4951 0.5918 0.7867 0.5867 0.0106 0.0048 5.0758 0.2827 0.5155
## 5 6.3002 0.5375 0.1772 0.7286 0.0106 0.0048 5.5829 0.2054 0.3123
## 6 3.7800 0.5420 1.8833 0.6727 0.0149 0.0053 5.3629 0.1789 0.2244
## 7 2.9057 0.4798 0.3471 0.8358 0.0149 0.0053 10.1160 0.1554 0.2437
## 8 1.9007 0.4769 0.6583 0.8182 0.0208 0.0055 9.8096 0.1390 0.1920
## 9 1.5783 0.4576 0.8596 0.8201 0.0220 0.0056 9.5170 0.1189 0.1451
## 10 1.1240 0.4365 0.7274 0.8405 0.0220 0.0057 10.1393 0.1077 0.1447
## 11 0.9776 0.4163 0.2163 0.8395 0.0220 0.0057 10.9061 0.0945 0.1303
## 12 0.8278 0.4159 0.2137 0.8277 0.0254 0.0057 10.7979 0.0899 0.0990
## 13 0.6316 0.4155 1.2317 0.8186 0.0353 0.0058 10.6204 0.0849 0.0785
## 14 0.5198 0.3954 1.1724 0.8545 0.0353 0.0058 15.8639 0.0773 0.0676
## 15 0.4485 0.3718 0.6708 0.8963 0.0325 0.0058 18.1104 0.0696 0.0584
##
## $All.CriticalValues
## CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2 0.2386 788.3856 0.3821
## 3 0.1739 503.7204 0.3305
## 4 0.2119 621.2252 0.3147
## 5 0.1432 466.8213 0.4118
## 6 0.1739 503.7204 0.3499
## 7 0.0396 849.4841 0.3658
## 8 0.1227 464.5373 0.3022
## 9 0.1296 463.3035 0.4709
## 10 0.1112 471.6144 0.3141
## 11 -0.1290 -122.4991 0.3001
## 12 -0.1829 -71.1410 0.2081
## 13 0.0928 498.4066 0.3325
## 14 0.0978 488.9659 0.3236
## 15 0.0038 7394.6572 0.4429
##
## $Best.nc
## KL CH Hartigan CCC Scott Marriot TrCovW
## Number_clusters 5.00 15.000 5.0000 3.0000 3.0000 3.0000 -Inf
## Value_Index 20.32 1195.534 473.5832 -7.7663 310.6849 171.1684 3
## TraceW Friedman Rubin Cindex DB Silhouette Duda
## Number_clusters 66.6273 20.9169 -2.6883 0.1532 0.5121 0.553 0.3021
## Value_Index 15.0000 6.0000 11.0000 13.0000 6.0000 2.000 2.0000
## PseudoT2 Beale Ratkowsky Ball PtBiserial Frey McClain
## Number_clusters 570.5103 0.7668 0.4313 52.4931 0.6003 NA 0.3452
## Value_Index 2.0000 4.0000 3.0000 3.0000 1.0000 2 13.0000
## Dunn Hubert SDindex Dindex SDbw
## Number_clusters 0.0353 0 5.0758 0 0.0584
## Value_Index 0.0000 4 0.0000 15 5.0000
##
## $Best.partition
## [1] 1 2 3 1 2 3 2 1 3 2 1 1 3 1 1 3 3 1 2 3 2 2 3 2 1 3 2 1 1 1 1 1 3 3 3 1 3
## [38] 2 1 1 1 3 3 2 2 3 1 1 1 3 3 1 3 2 2 1 1 3 3 1 2 2 3 1 2 1 3 1 3 1 2 3 1 1
## [75] 1 3 3 1 1 1 1 2 2 3 1 3 1 2 3 1 1 1 1 3 1 1 1 2 2 2 3 1 3 1 3 1 2 2 1 3 1
## [112] 1 3 1 1 2 1 1 2 3 1 1 3 2 2 1 1 1 1 3 1 1 2 1 1 2 3 3 1 3 2 2 3 3 1 1 3 1
## [149] 1 2 1 3 1 1 1 1 3 2 1 2 1 3 1 3 1 1 2 1 3 2 1 2 3 2 1 2 1 1 1 2 1 1 3 1 3
## [186] 1 3 3 2 2 1 2 1 3 3 1 1 1 3 2 2 1 1 1 3 1 1 1 2 3 3 3 3 1 1 3 2 2 2 1 1 3
## [223] 1 3 1 2 2 3 3 1 2 1 1 1 1 1 1 1 1 1 3 1 3 2 1 1 2 2 2 1 3 3 3 2 3 3 1 3 1
## [260] 1 2 1 3 2 1 3 3 3 2 1 1 1 1 2 1 1 1 3 1 3 3 3 1 3 2 2 1 3 1 1 2 1 3 2 1 1
## [297] 1 3 1 3 1 1 2 3 3 2 3 1 1 1 1 1 1 3 1 3 3 3 2 1 2 2 1 3 2 3 1 1 1 2 1 3 2
## [334] 1 1 3 1 3 1 3 1 1 1 3 2 3 1 3 3 2 1 3 1 2 2 3 1
m3_labs<-m3_sum$Population
plot(m3_hc, labels=m3_labs, hang=-1, cex=0.8)
hc_morph_3<-rect.hclust(m3_hc, k=3, border=2:7)
morphogroup_3<-cutree(m3_hc, k=3)
m3_sum<-m3_sum %>% mutate(cluster=morphogroup_3)
m3_sum$cluster<-as.factor(m3_sum$cluster)
m3.1<-m3_sum %>% filter(cluster==1) %>% droplevels()
m3.2<-m3_sum %>% filter(cluster==2) %>% droplevels()
m3.3<-m3_sum %>% filter(cluster==3) %>% droplevels()
c3.1d<-m3.1$Population
c3.1<-morpho3 %>% filter(Population %in% c3.1d) %>% droplevels()
c3.1$Morphogroup=3.1
c3.1$Morphogroup <- as.factor(c3.1$Morphogroup)
c3.2d<-m3.2$Population
c3.2<-morpho3 %>% filter(Population %in% c3.2d) %>% droplevels()
c3.2$Morphogroup=3.2
c3.2$Morphogroup <- as.factor(c3.2$Morphogroup)
c3.3d<-m3.3$Population
c3.3<-morpho3 %>% filter(Population %in% c3.3d) %>% droplevels()
c3.3$Morphogroup=3.3
c3.3$Morphogroup <- as.factor(c3.3$Morphogroup)
hwl_c3.1<-rbind(c3.1, c3.2)
hwl_c3<-rbind(hwl_c3.1, c3.3)
head(hwl_c3)
## # A tibble: 6 x 25
## Sex PopNum Population GOL NOL BNL BBH XCB XFB ZYB AUB WCB
## <fct> <fct> <fct> <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 M 13 ARIKARA 178 177 101 131 140 112 133 126 70
## 2 M 13 ARIKARA 172 171 103 131 136 114 140 129 76
## 3 M 13 ARIKARA 188 186 103 130 137 113 140 128 79
## 4 M 13 ARIKARA 180 178 103 133 144 117 145 137 76
## 5 M 13 ARIKARA 182 181 104 134 135 115 141 128 75
## 6 M 13 ARIKARA 186 183 105 131 143 116 139 129 67
## # … with 13 more variables: ASB <int>, BPL <int>, NPH <int>, NLH <int>,
## # JUB <int>, NLB <int>, MAB <int>, MDH <int>, MDB <int>, OBH <int>,
## # OBB <int>, GS <int>, Morphogroup <fct>
describeBy(hwl_c3, group='Morphogroup')
##
## Descriptive statistics by group
## Morphogroup: 3.1
## vars n mean sd median trimmed mad min
## Sex* 1 330 1.58 0.49 2 1.60 0.00 1
## PopNum* 2 330 2.57 1.08 3 2.59 1.48 1
## Population* 3 330 2.61 1.10 3 2.63 1.48 1
## GOL 4 330 176.55 7.59 176 176.53 7.41 157
## NOL 5 330 174.71 7.13 175 174.69 7.41 156
## BNL 6 330 99.05 4.81 99 99.05 5.93 85
## BBH 7 330 133.79 5.90 134 133.82 5.93 118
## XCB 8 330 137.72 5.48 138 137.60 5.93 124
## XFB 9 330 113.92 5.12 114 113.95 4.45 101
## ZYB 10 330 131.79 7.12 132 131.74 7.41 114
## AUB 11 330 122.48 6.24 123 122.28 5.93 109
## WCB 12 330 73.01 4.00 73 72.92 4.45 63
## ASB 13 330 106.26 5.13 106 106.08 4.45 93
## BPL 14 330 96.45 5.35 96 96.47 5.93 81
## NPH 15 330 68.57 4.36 69 68.53 4.45 54
## NLH 16 330 51.53 3.10 51 51.48 2.97 42
## JUB 17 330 116.15 5.90 116 116.18 5.93 99
## NLB 18 330 26.21 1.88 26 26.19 1.48 21
## MAB 19 330 64.70 3.85 65 64.56 4.45 55
## MDH 20 330 27.95 3.61 28 27.98 4.45 19
## MDB 21 330 11.83 1.76 12 11.79 1.48 7
## OBH 22 330 34.12 1.83 34 34.09 1.48 29
## OBB 23 330 38.92 1.76 39 38.92 1.48 35
## GS 24 330 3260311.60 324988.94 3251316 3252909.63 346016.60 2565120
## Morphogroup* 25 330 1.00 0.00 1 1.00 0.00 1
## max range skew kurtosis se
## Sex* 2 1 -0.33 -1.90 0.03
## PopNum* 4 3 -0.08 -1.28 0.06
## Population* 4 3 -0.12 -1.32 0.06
## GOL 195 38 0.03 -0.56 0.42
## NOL 192 36 0.04 -0.46 0.39
## BNL 110 25 -0.03 -0.54 0.26
## BBH 148 30 -0.05 -0.53 0.32
## XCB 153 29 0.16 -0.25 0.30
## XFB 128 27 -0.05 -0.17 0.28
## ZYB 151 37 0.07 -0.31 0.39
## AUB 143 34 0.33 0.01 0.34
## WCB 87 24 0.27 0.02 0.22
## ASB 120 27 0.27 -0.09 0.28
## BPL 110 29 0.00 -0.17 0.29
## NPH 81 27 0.07 0.31 0.24
## NLH 61 19 0.19 -0.05 0.17
## JUB 132 33 -0.02 -0.43 0.32
## NLB 32 11 0.02 0.21 0.10
## MAB 78 23 0.34 -0.11 0.21
## MDH 39 20 -0.03 -0.50 0.20
## MDB 18 11 0.27 0.08 0.10
## OBH 41 12 0.06 0.12 0.10
## OBB 43 8 0.02 -0.57 0.10
## GS 4083345 1518225 0.19 -0.44 17890.05
## Morphogroup* 1 0 NaN NaN 0.00
## ------------------------------------------------------------
## Morphogroup: 3.2
## vars n mean sd median trimmed mad min
## Sex* 1 687 1.52 0.50 2 1.52 0.00 1
## PopNum* 2 687 7.86 2.01 8 7.82 2.97 5
## Population* 3 687 8.20 1.85 8 8.21 2.97 5
## GOL 4 687 178.20 8.10 178 178.11 7.41 155
## NOL 5 687 176.26 7.82 176 176.18 7.41 153
## BNL 6 687 96.89 5.01 97 96.82 5.93 83
## BBH 7 687 129.06 5.75 129 128.88 5.93 115
## XCB 8 687 138.66 5.82 138 138.43 5.93 123
## XFB 9 687 115.49 6.12 115 115.27 5.93 99
## ZYB 10 687 129.44 6.58 129 129.35 7.41 113
## AUB 11 687 120.86 5.92 121 120.82 5.93 104
## WCB 12 687 71.03 4.38 71 70.97 4.45 61
## ASB 13 687 108.48 5.01 108 108.47 5.93 95
## BPL 14 687 94.02 5.45 94 93.98 5.93 80
## NPH 15 687 66.00 4.40 66 66.03 4.45 52
## NLH 16 687 49.75 3.10 50 49.73 2.97 41
## JUB 17 687 112.82 5.28 113 112.72 5.93 99
## NLB 18 687 24.82 1.84 25 24.78 1.48 19
## MAB 19 687 62.61 3.72 62 62.44 4.45 54
## MDH 20 687 27.50 3.45 27 27.46 2.97 18
## MDB 21 687 12.29 1.79 12 12.23 1.48 7
## OBH 22 687 33.48 1.97 33 33.53 1.48 27
## OBB 23 687 38.90 1.82 39 38.89 1.48 33
## GS 24 687 3196261.58 328110.57 3170440 3183918.88 338732.59 2437344
## Morphogroup* 25 687 2.00 0.00 2 2.00 0.00 2
## max range skew kurtosis se
## Sex* 2 1 -0.08 -2.00 0.02
## PopNum* 11 6 0.14 -1.17 0.08
## Population* 11 6 -0.02 -1.15 0.07
## GOL 201 46 0.10 -0.16 0.31
## NOL 199 46 0.08 -0.07 0.30
## BNL 112 29 0.11 -0.20 0.19
## BBH 146 31 0.26 -0.28 0.22
## XCB 161 38 0.42 0.38 0.22
## XFB 135 36 0.36 0.02 0.23
## ZYB 149 36 0.14 -0.44 0.25
## AUB 140 36 0.11 -0.05 0.23
## WCB 85 24 0.15 -0.17 0.17
## ASB 127 32 0.06 -0.05 0.19
## BPL 114 34 0.12 -0.01 0.21
## NPH 79 27 -0.06 -0.05 0.17
## NLH 59 18 0.03 -0.14 0.12
## JUB 127 28 0.15 -0.35 0.20
## NLB 31 12 0.31 0.34 0.07
## MAB 74 20 0.38 -0.04 0.14
## MDH 38 20 0.14 -0.11 0.13
## MDB 20 13 0.31 0.59 0.07
## OBH 39 12 -0.17 0.04 0.08
## OBB 44 11 0.03 -0.26 0.07
## GS 4399488 1962144 0.36 -0.14 12518.20
## Morphogroup* 2 0 NaN NaN 0.00
## ------------------------------------------------------------
## Morphogroup: 3.3
## vars n mean sd median trimmed mad min
## Sex* 1 581 1.50 0.50 1 1.49 0.00 1
## PopNum* 2 581 14.59 1.70 15 14.61 2.97 12
## Population* 3 581 14.52 1.75 15 14.53 2.97 12
## GOL 4 581 180.19 7.88 180 180.19 7.41 157
## NOL 5 581 177.35 7.15 177 177.32 7.41 156
## BNL 6 581 98.40 4.69 98 98.29 4.45 86
## BBH 7 581 129.30 5.82 129 129.21 5.93 111
## XCB 8 581 131.67 5.83 131 131.51 5.93 116
## XFB 9 581 110.39 5.44 110 110.21 4.45 95
## ZYB 10 581 128.75 6.69 128 128.66 7.41 111
## AUB 11 581 115.95 5.37 116 115.85 5.93 98
## WCB 12 581 68.86 4.13 69 68.81 4.45 57
## ASB 13 581 104.61 4.97 104 104.47 4.45 89
## BPL 14 581 101.06 5.76 101 100.99 5.93 84
## NPH 15 581 63.35 4.72 63 63.37 4.45 50
## NLH 16 581 47.74 3.08 48 47.73 2.97 40
## JUB 17 581 114.52 5.33 114 114.42 5.93 99
## NLB 18 581 27.73 1.93 28 27.71 1.48 23
## MAB 19 581 63.71 3.85 64 63.64 4.45 54
## MDH 20 581 26.70 3.75 27 26.72 4.45 17
## MDB 21 581 11.81 2.08 12 11.74 1.48 6
## OBH 22 581 32.64 2.13 33 32.68 1.48 26
## OBB 23 581 39.78 2.00 40 39.75 1.48 34
## GS 24 581 3074872.64 315790.45 3063420 3066899.51 327478.17 2327314
## Morphogroup* 25 581 3.00 0.00 3 3.00 0.00 3
## max range skew kurtosis se
## Sex* 2 1 0.02 -2.00 0.02
## PopNum* 17 5 -0.02 -1.25 0.07
## Population* 17 5 -0.03 -1.35 0.07
## GOL 203 46 0.01 -0.19 0.33
## NOL 198 42 0.04 -0.11 0.30
## BNL 114 28 0.22 -0.03 0.19
## BBH 152 41 0.18 0.24 0.24
## XCB 149 33 0.26 -0.13 0.24
## XFB 133 38 0.38 0.38 0.23
## ZYB 148 37 0.13 -0.51 0.28
## AUB 132 34 0.16 -0.11 0.22
## WCB 81 24 0.13 -0.08 0.17
## ASB 123 34 0.31 0.26 0.21
## BPL 123 39 0.15 0.15 0.24
## NPH 76 26 -0.03 -0.18 0.20
## NLH 58 18 0.09 -0.04 0.13
## JUB 131 32 0.16 -0.21 0.22
## NLB 34 11 0.15 -0.21 0.08
## MAB 76 22 0.19 -0.16 0.16
## MDH 37 20 0.00 -0.41 0.16
## MDB 19 13 0.32 0.25 0.09
## OBH 39 13 -0.20 0.04 0.09
## OBB 46 12 0.13 -0.13 0.08
## GS 4030152 1702838 0.24 -0.36 13101.20
## Morphogroup* 3 0 NaN NaN 0.00
pcs3<-principal(scale(hwl_c3[4:24]), rotate = 'varimax', nfactors=3)
pcs3
## Principal Components Analysis
## Call: principal(r = scale(hwl_c3[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC2 RC1 RC3 h2 u2 com
## GOL 0.71 0.57 -0.03 0.83 0.169 1.9
## NOL 0.67 0.58 -0.02 0.79 0.213 2.0
## BNL 0.73 0.35 0.19 0.69 0.308 1.6
## BBH 0.38 0.50 0.20 0.43 0.565 2.2
## XCB -0.18 0.80 0.37 0.80 0.196 1.5
## XFB -0.12 0.74 0.31 0.66 0.337 1.4
## ZYB 0.45 0.38 0.66 0.78 0.219 2.4
## AUB 0.11 0.53 0.67 0.74 0.256 2.0
## WCB 0.14 0.39 0.60 0.54 0.464 1.8
## ASB 0.11 0.65 0.31 0.53 0.466 1.5
## BPL 0.87 -0.09 0.02 0.77 0.233 1.0
## NPH 0.19 0.31 0.67 0.58 0.421 1.6
## NLH 0.15 0.35 0.66 0.58 0.415 1.6
## JUB 0.60 0.23 0.61 0.77 0.226 2.3
## NLB 0.63 -0.20 0.02 0.44 0.558 1.2
## MAB 0.56 0.13 0.50 0.58 0.424 2.1
## MDH 0.30 0.32 0.37 0.33 0.671 2.9
## MDB 0.34 0.34 0.29 0.31 0.686 2.9
## OBH -0.10 -0.01 0.66 0.45 0.551 1.0
## OBB 0.60 0.14 0.33 0.49 0.512 1.7
## GS 0.39 0.84 0.25 0.92 0.076 1.6
##
## RC2 RC1 RC3
## SS loadings 4.53 4.51 3.99
## Proportion Var 0.22 0.21 0.19
## Cumulative Var 0.22 0.43 0.62
## Proportion Explained 0.35 0.35 0.31
## Cumulative Proportion 0.35 0.69 1.00
##
## Mean item complexity = 1.8
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.07
## with the empirical chi square 3575.14 with prob < 0
##
## Fit based upon off diagonal values = 0.97
MG_3<-hwl_c3$Morphogroup
MG_3_pcs<-cbind.data.frame(MG_3, pcs3$scores)
MG_3_pcs<-as.data.frame(MG_3_pcs)
MG_3_pcs$MG_3<-as.factor(MG_3_pcs$MG_3)
pc_3_sum<-MG_3_pcs %>% group_by(MG_3) %>% dplyr::summarize(across(RC2:RC3, mean))
pc_3_sum
## # A tibble: 3 x 4
## MG_3 RC2 RC1 RC3
## * <fct> <dbl> <dbl> <dbl>
## 1 3.1 -0.123 -0.0253 0.743
## 2 3.2 -0.520 0.448 0.0389
## 3 3.3 0.684 -0.515 -0.468
sort(pcs3$loadings[,1], decreasing=TRUE)
## BPL BNL GOL NOL NLB OBB JUB
## 0.8710993 0.7308651 0.7079409 0.6671807 0.6337507 0.5996900 0.5964270
## MAB ZYB GS BBH MDB MDH NPH
## 0.5576735 0.4529363 0.3883827 0.3761783 0.3429594 0.2996543 0.1887839
## NLH WCB ASB AUB OBH XFB XCB
## 0.1506211 0.1381008 0.1147203 0.1135620 -0.1016162 -0.1172894 -0.1811767
sort(pcs3$loadings[,2], decreasing=TRUE)
## GS XCB XFB ASB NOL GOL
## 0.843354214 0.797085123 0.742168802 0.653364247 0.584549600 0.573480731
## AUB BBH WCB ZYB NLH BNL
## 0.531768599 0.501500238 0.390029451 0.376174685 0.350512147 0.348513154
## MDB MDH NPH JUB OBB MAB
## 0.339469231 0.316323377 0.307465080 0.227594987 0.143479728 0.128951886
## OBH BPL NLB
## -0.009567601 -0.085968013 -0.200679729
sort(pcs3$loadings[,3], decreasing=TRUE)
## NPH AUB NLH OBH ZYB JUB
## 0.66992615 0.66979012 0.66286823 0.66211989 0.65941459 0.60515526
## WCB MAB MDH XCB OBB XFB
## 0.60399674 0.49864220 0.37296063 0.36907025 0.32781268 0.31315281
## ASB MDB GS BBH BNL BPL
## 0.30606877 0.28511697 0.24867428 0.20457162 0.19043677 0.02105507
## NLB NOL GOL
## 0.01891023 -0.02266377 -0.02588954
ggplot(MG_3_pcs, aes(x=RC2, y=RC1, color = MG_3)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3_sum, size=5)
ggplot(MG_3_pcs, aes(x=RC1, y=RC3, color = MG_3)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3_sum, size=5)
ggplot(MG_3_pcs, aes(x=RC2, y=RC3, color = MG_3)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3_sum, size=5)
morpho3.1<-c3.1
m3.1_sum<-morpho3.1 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean))
m3.1_dsq<-dist(scale(m3.1_sum[2:22]))
m3.1_hc<-hclust(m3.1_dsq, method='ward.D2')
NbClust(data=as.numeric(scale(m3.1_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15)
## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]
## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]
## *******************************************************************
## * Among all indices:
## * 1 proposed 2 as the best number of clusters
## * 3 proposed 4 as the best number of clusters
## * 1 proposed 12 as the best number of clusters
## * 1 proposed 15 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 4
##
##
## *******************************************************************
## $All.index
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW Friedman
## 2 8.1284 82.8509 114.2021 -0.2790 115.9762 63.3554 NA 15.8388 2.9776
## 3 0.6119 154.3150 79.6531 -2.4811 161.5094 82.8994 NA 9.2110 5.8396
## 4 0.5503 227.7468 23.2918 -0.4478 235.6759 60.9502 NA 3.8094 15.5381
## 5 1.5418 223.5344 49.2004 -1.8190 259.0263 72.1223 NA 2.8849 20.8379
## 6 6.4904 296.2415 26.1301 -1.9314 290.9921 70.9845 NA 1.9718 30.9506
## 7 0.1013 329.6430 47.0763 -1.0407 331.1090 59.9302 NA 1.2231 50.5099
## 8 4.9989 456.0222 15.8913 -1.0584 356.3610 57.9526 NA 0.9055 68.5741
## 9 0.2989 478.0652 33.8609 -0.5535 385.7130 51.7158 NA 0.6385 97.6739
## 10 2.7089 612.2896 10.0861 0.0483 414.2908 45.4345 NA 0.4543 137.6612
## 11 0.7265 618.7028 7.1557 0.0816 433.9078 43.5259 NA 0.3597 174.1371
## 12 0.5710 609.7725 25.7408 0.1151 452.1565 41.6846 NA 0.2895 216.6341
## 13 4.6352 750.3686 8.6505 -0.2885 463.6110 42.6853 NA 0.2526 248.4299
## 14 0.9490 766.7510 13.2248 -0.4841 476.7747 42.3243 NA 0.2159 290.7475
## 15 1.1434 835.3331 7.1307 -0.5689 490.5348 41.2452 NA 0.1833 342.6762
## Rubin Cindex DB Silhouette Duda Pseudot2 Beale Ratkowsky Ball
## 2 3.9776 0.3343 0.5758 0.6205 0.2600 125.2242 0.9276 0.5013 15.6687
## 3 6.8396 0.2980 0.5849 0.5415 0.2151 131.3403 1.1832 0.5138 4.3657
## 4 16.5381 0.3439 0.5624 0.5796 0.2456 67.5903 0.9796 0.4731 1.6509
## 5 21.8379 0.3066 0.5608 0.5283 0.1725 95.9648 1.5233 0.4287 1.0229
## 6 31.9506 0.2660 0.5157 0.5662 0.3053 43.2268 0.7204 0.3979 0.5253
## 7 51.5099 0.3041 0.4985 0.5650 0.2115 55.9087 1.1648 0.3708 0.3372
## 8 69.5741 0.3269 0.5139 0.5769 0.1440 71.3589 1.8297 0.3494 0.1831
## 9 98.6739 0.2801 0.4847 0.6133 0.1442 65.2964 1.8138 0.3301 0.1346
## 10 138.6612 0.2404 0.4740 0.6165 0.3601 21.3279 0.5469 0.3141 0.0835
## 11 175.1371 0.2991 0.5109 0.6002 0.1811 40.6974 1.3566 0.2997 0.0668
## 12 217.6341 0.3297 0.4901 0.6236 0.1856 30.7223 1.2801 0.2871 0.0558
## 13 249.4299 0.3207 0.4777 0.6347 0.1383 37.3717 1.7796 0.2763 0.0379
## 14 291.7475 0.3006 0.4595 0.6310 0.2355 22.7187 0.9466 0.2663 0.0314
## 15 343.6762 0.2774 0.4580 0.6320 0.1520 33.4627 1.5935 0.2574 0.0246
## Ptbiserial Frey McClain Dunn Hubert SDindex Dindex SDbw
## 2 0.7040 1.6006 0.3422 0.0498 0.0161 7.0193 0.3779 1.1979
## 3 0.6265 0.5254 0.5706 0.0563 0.0182 5.9286 0.2796 0.7588
## 4 0.6086 1.5964 0.6299 0.1026 0.0214 4.6128 0.1847 0.4635
## 5 0.5607 0.8343 0.7373 0.0716 0.0215 7.8344 0.1560 0.2982
## 6 0.5281 0.6246 0.7846 0.0740 0.0228 7.7211 0.1231 0.1256
## 7 0.4986 1.0009 0.7799 0.1076 0.0230 7.4330 0.1017 0.1454
## 8 0.4669 0.7012 0.8195 0.1341 0.0237 10.1993 0.0867 0.1077
## 9 0.4458 0.7092 0.8112 0.1345 0.0238 10.7961 0.0723 0.0689
## 10 0.4263 1.1390 0.7966 0.1345 0.0238 12.3997 0.0608 0.0877
## 11 0.3993 0.9378 0.8279 0.1265 0.0240 17.0822 0.0555 0.0780
## 12 0.3818 1.2088 0.8340 0.1555 0.0240 18.6262 0.0498 0.0194
## 13 0.3694 0.9844 0.8511 0.1607 0.0240 24.6324 0.0459 0.0079
## 14 0.3589 1.0247 0.8566 0.1607 0.0241 23.4729 0.0418 0.0066
## 15 0.3457 1.0191 0.8656 0.1607 0.0242 22.7709 0.0385 0.0168
##
## $All.CriticalValues
## CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2 0.0730 558.9828 0.3408
## 3 0.0439 784.7908 0.2839
## 4 -0.0387 -590.8616 0.3331
## 5 -0.0566 -373.6603 0.2314
## 6 -0.0664 -305.0001 0.4066
## 7 -0.1143 -146.1979 0.2975
## 8 -0.1630 -85.6052 0.2011
## 9 -0.1829 -71.1410 0.2051
## 10 -0.1630 -85.6052 0.4738
## 11 -0.2305 -48.0468 0.2741
## 12 -0.2932 -30.8768 0.2951
## 13 -0.3330 -24.0181 0.2306
## 14 -0.2932 -30.8768 0.3630
## 15 -0.3330 -24.0181 0.2537
##
## $Best.nc
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW
## Number_clusters 2.0000 15.0000 4.0000 12.0000 4.0000 4.0000 -Inf 4.4772
## Value_Index 8.1284 835.3331 56.3613 0.1151 74.1665 33.1213 4 15.0000
## Friedman Rubin Cindex DB Silhouette Duda PseudoT2
## Number_clusters 51.9286 -10.7012 0.2404 0.458 0.6347 0.26 125.2242
## Value_Index 12.0000 10.0000 15.0000 13.000 2.0000 2.00 2.0000
## Beale Ratkowsky Ball PtBiserial Frey McClain Dunn Hubert
## Number_clusters 0.9276 0.5138 11.303 0.704 1.6006 0.3422 0.1607 0
## Value_Index 3.0000 3.0000 2.000 2.000 2.0000 13.0000 0.0000 4
## SDindex Dindex SDbw
## Number_clusters 4.6128 0 0.0066
## Value_Index 0.0000 14 2.0000
##
## $Best.partition
## [1] 1 2 3 4 1 2 4 4 3 2 4 1 2 4 4 4 3 1 4 2 3 1 4 2 3 1 1 2 3 1 1 1 4 1 3 2 4 2
## [39] 4 1 4 2 1 4 3 2 4 2 3 1 4 2 3 1 1 2 4 3 2 2 4 2 3 1 2 1 3 1 3 1 1 2 3 2 4 1
## [77] 3 2 4 1 1 2 3 4
m3.1_labs<-m3.1_sum$Population
plot(m3.1_hc, labels=m3.1_labs, hang=-1, cex=0.8)
pcs3.1<-principal(scale(c3.1[4:24]), rotate = 'varimax', nfactors=3)
pcs3.1
## Principal Components Analysis
## Call: principal(r = scale(c3.1[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 RC3 h2 u2 com
## GOL 0.47 0.73 0.11 0.77 0.229 1.8
## NOL 0.44 0.72 0.14 0.74 0.265 1.7
## BNL 0.23 0.77 0.35 0.76 0.236 1.6
## BBH 0.53 0.58 -0.15 0.64 0.356 2.1
## XCB 0.82 0.00 0.32 0.78 0.218 1.3
## XFB 0.78 0.01 0.28 0.68 0.315 1.2
## ZYB 0.57 0.40 0.58 0.82 0.180 2.7
## AUB 0.57 0.23 0.62 0.76 0.243 2.3
## WCB 0.63 0.17 0.28 0.50 0.500 1.6
## ASB 0.56 0.24 0.34 0.49 0.513 2.1
## BPL -0.05 0.83 0.19 0.73 0.270 1.1
## NPH 0.22 0.40 0.68 0.67 0.331 1.9
## NLH 0.26 0.30 0.71 0.66 0.342 1.6
## JUB 0.49 0.47 0.54 0.76 0.243 3.0
## NLB -0.04 0.39 0.28 0.23 0.770 1.9
## MAB 0.27 0.62 0.30 0.55 0.451 1.9
## MDH 0.46 0.38 0.06 0.36 0.644 2.0
## MDB 0.47 0.40 0.16 0.40 0.597 2.2
## OBH 0.13 -0.09 0.70 0.52 0.478 1.1
## OBB 0.28 0.35 0.62 0.59 0.411 2.0
## GS 0.77 0.57 0.11 0.93 0.073 1.9
##
## RC1 RC2 RC3
## SS loadings 4.93 4.75 3.66
## Proportion Var 0.23 0.23 0.17
## Cumulative Var 0.23 0.46 0.63
## Proportion Explained 0.37 0.36 0.27
## Cumulative Proportion 0.37 0.73 1.00
##
## Mean item complexity = 1.9
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.06
## with the empirical chi square 550.92 with prob < 3.5e-47
##
## Fit based upon off diagonal values = 0.98
MG_3.1<-c3.1$Population
MG_3.1_pcs<-cbind.data.frame(MG_3.1, pcs3.1$scores)
MG_3.1_pcs$MG_3.1<-as.factor(MG_3.1_pcs$MG_3.1)
pc_3.1_sum<-MG_3.1_pcs %>% group_by(MG_3.1) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_3.1_sum
## # A tibble: 4 x 4
## MG_3.1 RC1 RC2 RC3
## * <fct> <dbl> <dbl> <dbl>
## 1 ARIKARA -0.0629 -0.162 1.00
## 2 HAINAN -0.0565 -0.144 -0.340
## 3 N JAPAN 0.237 0.0468 -0.0392
## 4 S JAPAN -0.128 0.210 -0.411
sort(pcs3.1$loadings[,1], decreasing=TRUE)
## XCB XFB GS WCB AUB ZYB
## 0.82439351 0.77948672 0.76555531 0.62575495 0.56898542 0.56865604
## ASB BBH JUB MDB GOL MDH
## 0.55854484 0.52985517 0.48707550 0.46947848 0.46826566 0.45739433
## NOL OBB MAB NLH BNL NPH
## 0.44098441 0.27863314 0.27279594 0.25860107 0.23261787 0.21873623
## OBH NLB BPL
## 0.13363120 -0.04195184 -0.04833663
sort(pcs3.1$loadings[,2], decreasing=TRUE)
## BPL BNL GOL NOL MAB BBH
## 0.830876535 0.766500087 0.734969456 0.722268321 0.617566122 0.584445273
## GS JUB NPH ZYB MDB NLB
## 0.573752662 0.472376815 0.404349040 0.396101909 0.395451746 0.387039378
## MDH OBB NLH ASB AUB WCB
## 0.377744039 0.354080147 0.298879614 0.243719147 0.230118118 0.168361082
## XFB XCB OBH
## 0.014994463 -0.001186767 -0.088322916
sort(pcs3.1$loadings[,3], decreasing=TRUE)
## NLH OBH NPH OBB AUB ZYB JUB
## 0.7082554 0.7041761 0.6765996 0.6210068 0.6162912 0.5827246 0.5444586
## BNL ASB XCB MAB WCB NLB XFB
## 0.3503080 0.3398187 0.3198096 0.3044928 0.2828325 0.2806770 0.2769057
## BPL MDB NOL GOL GS MDH BBH
## 0.1923248 0.1610242 0.1376745 0.1096684 0.1069208 0.0627041 -0.1464342
ggplot(MG_3.1_pcs, aes(x=RC1, y=RC2, color = MG_3.1)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.1_sum, size=5)
ggplot(MG_3.1_pcs, aes(x=RC1, y=RC3, color = MG_3.1)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.1_sum, size=5)
ggplot(MG_3.1_pcs, aes(x=RC2, y=RC3, color = MG_3.1)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.1_sum, size=5)
MG_3.1_sex<-c3.1$Sex
MG_3.1_pcs_sex<-cbind.data.frame(MG_3.1_sex, pcs3.1$scores)
pc_3.1_sum_sex<-MG_3.1_pcs_sex %>% group_by(MG_3.1_sex) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_3.1_sum_sex
## # A tibble: 2 x 4
## MG_3.1_sex RC1 RC2 RC3
## * <fct> <dbl> <dbl> <dbl>
## 1 F -0.609 -0.580 -0.245
## 2 M 0.438 0.417 0.176
ggplot(MG_3.1_pcs_sex, aes(x=RC1, y=RC2, color = MG_3.1_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.1_sum_sex, size=5)
ggplot(MG_3.1_pcs_sex, aes(x=RC1, y=RC3, color = MG_3.1_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.1_sum_sex, size=5)
ggplot(MG_3.1_pcs_sex, aes(x=RC2, y=RC3, color = MG_3.1_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.1_sum_sex, size=5)
morphogroup_1<-cutree(m3.1_hc, k=4)
m3.1.1_sum<-m3.1_sum %>% mutate(cluster=morphogroup_1)
m3.1.1_sum$cluster<-as.factor(m3.1.1_sum$cluster)
m3.1.1<-m3.1.1_sum %>% filter(cluster==1) %>% droplevels()
m3.1.2<-m3.1.1_sum %>% filter(cluster==2) %>% droplevels()
m3.1.3<-m3.1.1_sum %>% filter(cluster==3) %>% droplevels()
m3.1.4<-m3.1.1_sum %>% filter(cluster==4) %>% droplevels()
c3.1.1d<-m3.1.1$Population
c3.1.1<-c3.1 %>% filter(Population %in% c3.1.1d) %>% droplevels()
c3.1.1$Morphogroup="3.1.1"
c3.1.1$Morphogroup <- as.factor(c3.1.1$Morphogroup)
c3.1.2d<-m3.1.2$Population
c3.1.2<-c3.1 %>% filter(Population %in% c3.1.2d) %>% droplevels()
c3.1.2$Morphogroup="3.1.2"
c3.1.2$Morphogroup <- as.factor(c3.1.2$Morphogroup)
c3.1.3d<-m3.1.3$Population
c3.1.3<-c3.1 %>% filter(Population %in% c3.1.3d) %>% droplevels()
c3.1.3$Morphogroup="3.1.3"
c3.1.3$Morphogroup <- as.factor(c3.1.3$Morphogroup)
c3.1.4d<-m3.1.4$Population
c3.1.4<-c3.1 %>% filter(Population %in% c3.1.4d) %>% droplevels()
c3.1.4$Morphogroup="3.1.4"
c3.1.4$Morphogroup <- as.factor(c3.1.4$Morphogroup)
hwlr_c3.1<-rbind.data.frame(c3.1.1, c3.1.2)
hwlr_c3.1a<-rbind.data.frame(c3.1.3, c3.1.4)
hwlr_c3.1<-rbind.data.frame(hwlr_c3.1, hwlr_c3.1a)
morpho3.2<-c3.2
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean))
m3.2_dsq<-dist(scale(m3.2_sum[2:22]))
m3.2_hc<-hclust(m3.2_dsq, method='ward.D2')
NbClust(data=as.numeric(scale(m3.2_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15)
## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]
## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]
## *******************************************************************
## * Among all indices:
## * 1 proposed 2 as the best number of clusters
## * 2 proposed 4 as the best number of clusters
## * 1 proposed 6 as the best number of clusters
## * 1 proposed 10 as the best number of clusters
## * 1 proposed 15 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 4
##
##
## *******************************************************************
## $All.index
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW Friedman
## 2 1.0010 310.6099 46.0055 -2.5443 174.7412 153.5245 NA 38.3811 2.2829
## 3 0.8802 226.0131 30.0344 -6.3707 232.6288 232.9905 NA 25.8878 3.8672
## 4 2.3131 190.7795 53.2699 -5.9015 321.5104 226.2691 NA 14.1418 7.9097
## 5 0.3412 208.2371 175.7052 -6.7374 373.7806 247.7530 NA 9.9101 11.7143
## 6 4.1717 404.9899 28.4278 -5.8670 443.2822 222.3551 NA 6.1765 19.3998
## 7 0.5194 407.3634 9.1972 -6.0161 488.2979 222.8164 NA 4.5473 26.7089
## 8 1.3179 370.7537 15.3621 -6.0812 528.9557 220.7050 NA 3.4485 35.5375
## 9 7.0796 359.5775 29.4713 -5.0443 583.8431 192.2909 NA 2.3740 52.0758
## 10 10.2540 388.3236 29.0455 -4.9070 620.0047 185.6258 NA 1.8563 66.8785
## 11 0.0261 423.3788 16.1535 -4.8681 651.6150 181.1485 NA 1.4971 83.1630
## 12 0.2135 428.8970 85.2390 -4.6931 683.1584 173.9485 NA 1.2080 103.3067
## 13 5.1585 643.6937 34.1210 -4.1066 719.7226 159.1916 NA 0.9420 132.7633
## 14 0.3206 742.5189 71.9828 -3.8927 748.2206 152.0881 NA 0.7760 161.3795
## 15 2.4958 1059.7593 42.0809 -3.4075 779.8839 140.7590 NA 0.6256 200.4080
## Rubin Cindex DB Silhouette Duda Pseudot2 Beale Ratkowsky Ball
## 2 3.2829 0.2733 0.6604 0.5947 0.3762 129.3582 0.5458 0.5838 20.0500
## 3 4.8672 0.2387 0.6260 0.5307 0.3414 125.4044 0.6334 0.5028 10.1472
## 4 8.9097 0.3419 0.6470 0.5069 0.2959 145.1296 0.7803 0.4472 6.2970
## 5 12.7143 0.3000 0.6329 0.4826 0.2128 177.5325 1.2077 0.4134 3.6703
## 6 20.3998 0.2323 0.6311 0.5065 0.1267 103.4079 2.1543 0.3947 1.3671
## 7 27.7089 0.2155 0.5502 0.5250 0.2149 120.5699 1.1821 0.3676 0.9752
## 8 36.5375 0.1801 0.5502 0.5466 0.2944 35.9557 0.7491 0.3444 0.8007
## 9 53.0758 0.3000 0.5271 0.5537 0.2152 87.5366 1.1672 0.3256 0.6409
## 10 67.8785 0.2737 0.5166 0.5570 0.0993 99.7540 2.7709 0.3102 0.4753
## 11 84.1630 0.2722 0.4929 0.5616 0.2375 83.4938 1.0308 0.2968 0.3565
## 12 104.3067 0.2398 0.4944 0.5783 0.2396 69.8313 1.0120 0.2846 0.2921
## 13 133.7633 0.2045 0.4985 0.5896 0.2238 34.6875 1.0511 0.2750 0.1653
## 14 162.3795 0.2812 0.4920 0.5894 0.1696 68.5599 1.5236 0.2654 0.1223
## 15 201.4080 0.2699 0.4847 0.5957 0.1791 77.9255 1.4431 0.2571 0.0741
## Ptbiserial Frey McClain Dunn Hubert SDindex Dindex SDbw
## 2 0.6303 1.0037 0.3714 0.0276 0.0082 6.4886 0.4079 1.0965
## 3 0.6175 0.4821 0.4941 0.0290 0.0100 5.4531 0.3366 0.6896
## 4 0.6258 1.6293 0.5462 0.0551 0.0124 4.4952 0.2690 0.5223
## 5 0.5422 0.6206 0.7703 0.0587 0.0127 6.5102 0.2175 0.3852
## 6 0.5017 0.2399 0.8280 0.0635 0.0131 6.2848 0.1669 0.2879
## 7 0.5009 0.8921 0.8007 0.0635 0.0137 6.2452 0.1438 0.2430
## 8 0.4655 0.2209 0.8540 0.0635 0.0137 8.9232 0.1218 0.1710
## 9 0.4639 0.8730 0.8085 0.1168 0.0141 8.8790 0.1098 0.1214
## 10 0.4419 0.5480 0.8271 0.1209 0.0142 11.0834 0.0957 0.1082
## 11 0.4368 1.3023 0.8169 0.1269 0.0144 10.6920 0.0850 0.1163
## 12 0.4035 0.8556 0.8847 0.0727 0.0145 15.5007 0.0754 0.0895
## 13 0.3782 0.5055 0.8990 0.0727 0.0145 16.0626 0.0663 0.0479
## 14 0.3730 0.7240 0.8801 0.1064 0.0145 15.4584 0.0614 0.0531
## 15 0.3608 1.0353 0.8694 0.1131 0.0145 17.0855 0.0549 0.0423
##
## $All.CriticalValues
## CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2 0.1432 466.8213 0.4623
## 3 0.1227 464.5373 0.4290
## 4 0.1152 468.3892 0.3805
## 5 0.0848 517.8437 0.2773
## 6 -0.1143 -146.1979 0.1628
## 7 0.0304 1050.8179 0.2848
## 8 -0.1143 -146.1979 0.4004
## 9 -0.0229 -1072.0019 0.2907
## 10 -0.1829 -71.1410 0.1242
## 11 -0.0088 -2964.0492 0.3193
## 12 -0.0387 -590.8616 0.3254
## 13 -0.2052 -58.7306 0.3294
## 14 -0.1290 -122.4991 0.2374
## 15 -0.0885 -209.1134 0.2461
##
## $Best.nc
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW
## Number_clusters 10.000 15.000 6.0000 2.0000 4.0000 4.0000 -Inf 7.5143
## Value_Index 10.254 1059.759 147.2774 -2.5443 88.8816 28.2054 4 15.0000
## Friedman Rubin Cindex DB Silhouette Duda PseudoT2
## Number_clusters 39.0285 -1.7357 0.1801 0.4847 0.5957 0.3762 129.3582
## Value_Index 9.0000 8.0000 15.0000 15.0000 2.0000 2.0000 2.0000
## Beale Ratkowsky Ball PtBiserial Frey McClain Dunn Hubert
## Number_clusters 0.5458 0.5838 9.9028 0.6303 1.0037 0.3714 0.1269 0
## Value_Index 2.0000 3.0000 2.0000 2.0000 2.0000 11.0000 0.0000 4
## SDindex Dindex SDbw
## Number_clusters 4.4952 0 0.0423
## Value_Index 0.0000 15 10.0000
##
## $Best.partition
## [1] 1 1 2 3 1 1 2 1 1 2 3 1 1 2 1 1 2 3 4 1 2 3 1 2 1 1 4 3 4 3 1 2 1 1 2 1 3
## [38] 1 2 1 1 2 2 2 4 2 2 2 2 1 3 4 2 1 2 2 2 2 4 1 1 2 2 1 3 4 2 1 2 2 1 1 2 2
## [75] 4 3 2 4 2 2 2 2 2 2 1 2 2 3 1 4 2 2 2 4 2 1 2 2 3 2 1 1 1 1 2 1 1 1 1 2 3
## [112] 2 4 1 2 2 2 1 2 4 2 1 2 1 3 2 1 1 1 2 2 3 4 4 2 1 3 4 2 2 1 2 2 2 4 1 3
m3.2_labs<-m3.2_sum$Population
plot(m3.2_hc, labels=m3.2_labs, hang=-1, cex=0.8)
hc_morph_3.2<-rect.hclust(m3.2_hc, k=4, border=2:7)
pcs3.2<-principal(scale(c3.2[4:24]), rotate = 'varimax', nfactors=3)
pcs3.2
## Principal Components Analysis
## Call: principal(r = scale(c3.2[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC2 RC3 RC1 h2 u2 com
## GOL 0.89 0.24 0.12 0.86 0.141 1.2
## NOL 0.90 0.18 0.09 0.84 0.156 1.1
## BNL 0.84 0.16 0.18 0.77 0.234 1.2
## BBH 0.62 0.35 0.01 0.51 0.493 1.6
## XCB 0.21 0.86 0.11 0.79 0.205 1.2
## XFB 0.23 0.82 -0.01 0.72 0.276 1.2
## ZYB 0.24 0.57 0.68 0.85 0.150 2.2
## AUB 0.12 0.71 0.56 0.84 0.159 2.0
## WCB 0.01 0.59 0.53 0.63 0.373 2.0
## ASB 0.26 0.65 0.28 0.57 0.433 1.7
## BPL 0.55 -0.05 0.42 0.48 0.521 1.9
## NPH 0.53 0.08 0.56 0.60 0.403 2.0
## NLH 0.54 0.14 0.46 0.52 0.476 2.1
## JUB 0.27 0.50 0.69 0.81 0.195 2.2
## NLB 0.17 0.19 0.28 0.14 0.859 2.5
## MAB 0.21 0.24 0.70 0.59 0.414 1.4
## MDH 0.43 0.10 0.41 0.36 0.639 2.1
## MDB 0.36 0.22 0.43 0.37 0.634 2.5
## OBH -0.05 -0.05 0.58 0.34 0.659 1.0
## OBB 0.43 0.31 0.41 0.45 0.547 2.8
## GS 0.75 0.61 0.10 0.95 0.055 2.0
##
## RC2 RC3 RC1
## SS loadings 5.01 4.15 3.82
## Proportion Var 0.24 0.20 0.18
## Cumulative Var 0.24 0.44 0.62
## Proportion Explained 0.39 0.32 0.29
## Cumulative Proportion 0.39 0.71 1.00
##
## Mean item complexity = 1.8
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.07
## with the empirical chi square 1486.9 with prob < 1.3e-218
##
## Fit based upon off diagonal values = 0.97
MG_3.2<-c3.2$Population
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC2:RC1, mean))
pc_3.2_sum
## # A tibble: 7 x 4
## MG_3.2 RC2 RC3 RC1
## * <fct> <dbl> <dbl> <dbl>
## 1 ATAYAL -0.476 -0.162 0.114
## 2 BERG -0.541 1.06 -0.109
## 3 EGYPT 0.753 -0.621 -0.679
## 4 NORSE 0.673 -0.0470 -0.194
## 5 PERU -0.623 -0.280 0.273
## 6 SANTA CR -0.490 -0.338 1.01
## 7 ZALAVAR 0.432 0.323 -0.305
sort(pcs3.2$loadings[,1], decreasing=TRUE)
## NOL GOL BNL GS BBH BPL
## 0.895903281 0.887071615 0.842810769 0.750678856 0.618526701 0.551149547
## NLH NPH OBB MDH MDB JUB
## 0.542108196 0.526050377 0.434333091 0.429249817 0.362847286 0.270484584
## ASB ZYB XFB XCB MAB NLB
## 0.259273300 0.241773109 0.226248612 0.211438282 0.205471853 0.168572776
## AUB WCB OBH
## 0.120274391 0.007265324 -0.053288600
sort(pcs3.2$loadings[,2], decreasing=TRUE)
## XCB XFB AUB ASB GS WCB
## 0.85891822 0.82038853 0.71481700 0.64884676 0.60981639 0.58726362
## ZYB JUB BBH OBB GOL MAB
## 0.57400348 0.49981751 0.35222278 0.30624755 0.24057337 0.23551827
## MDB NLB NOL BNL NLH MDH
## 0.21671218 0.18882950 0.18015571 0.15596295 0.14449093 0.10018006
## NPH OBH BPL
## 0.08483362 -0.04703090 -0.05010399
sort(pcs3.2$loadings[,3], decreasing=TRUE)
## MAB JUB ZYB OBH AUB NPH
## 0.699007687 0.694424638 0.679444615 0.579609368 0.561393584 0.559527217
## WCB NLH MDB BPL OBB MDH
## 0.530778355 0.457755831 0.433050563 0.415073543 0.413375434 0.408396880
## ASB NLB BNL GOL XCB GS
## 0.280065180 0.276819378 0.176985767 0.117638181 0.110672366 0.098798418
## NOL BBH XFB
## 0.092216666 0.009104812 -0.008135666
ggplot(MG_3.2_pcs, aes(x=RC2, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC3, y=RC1, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC2, y=RC1, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
MG_3.2_sex<-c3.2$Sex
MG_3.2_pcs_sex<-cbind.data.frame(MG_3.2_sex, pcs3.2$scores)
pc_3.2_sum_sex<-MG_3.2_pcs_sex %>% group_by(MG_3.2_sex) %>% dplyr::summarize(across(RC2:RC1, mean))
pc_3.2_sum_sex
## # A tibble: 2 x 4
## MG_3.2_sex RC2 RC3 RC1
## * <fct> <dbl> <dbl> <dbl>
## 1 F -0.503 -0.344 -0.448
## 2 M 0.465 0.318 0.414
ggplot(MG_3.2_pcs_sex, aes(x=RC2, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
ggplot(MG_3.2_pcs_sex, aes(x=RC1, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
ggplot(MG_3.2_pcs_sex, aes(x=RC2, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
morphogroup_1<-cutree(m3.2_hc, k=4)
m3.2.1_sum<-m3.2_sum %>% mutate(cluster=morphogroup_1)
m3.2.1_sum$cluster<-as.factor(m3.2.1_sum$cluster)
m3.2.1<-m3.2.1_sum %>% filter(cluster==1) %>% droplevels()
m3.2.2<-m3.2.1_sum %>% filter(cluster==2) %>% droplevels()
m3.2.3<-m3.2.1_sum %>% filter(cluster==3) %>% droplevels()
m3.2.4<-m3.2.1_sum %>% filter(cluster==4) %>% droplevels()
c3.2.1d<-m3.2.1$Population
c3.2.1<-c3.2 %>% filter(Population %in% c3.2.1d) %>% droplevels()
c3.2.1$Morphogroup="3.2.1"
c3.2.1$Morphogroup <- as.factor(c3.2.1$Morphogroup)
c3.2.2d<-m3.2.2$Population
c3.2.2<-c3.2 %>% filter(Population %in% c3.2.2d) %>% droplevels()
c3.2.2$Morphogroup="3.2.2"
c3.2.2$Morphogroup <- as.factor(c3.2.2$Morphogroup)
c3.2.3d<-m3.2.3$Population
c3.2.3<-c3.2 %>% filter(Population %in% c3.2.3d) %>% droplevels()
c3.2.3$Morphogroup="3.2.3"
c3.2.3$Morphogroup <- as.factor(c3.2.3$Morphogroup)
c3.2.4d<-m3.2.4$Population
c3.2.4<-c3.2 %>% filter(Population %in% c3.2.4d) %>% droplevels()
c3.2.4$Morphogroup="3.2.4" ##
c3.2.4$Morphogroup <- as.factor(c3.2.4$Morphogroup)
hwlr_c3.2<-rbind.data.frame(c3.2.1, c3.2.2)
hwlr_c3.2a<-rbind.data.frame(c3.2.3, c3.2.4)
hwlr_c3.2<-rbind.data.frame(hwlr_c3.2, hwlr_c3.2a)
morpho3.2<-c3.2.1
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean))
m3.2_dsq<-dist(scale(m3.2_sum[2:22]))
m3.2_hc<-hclust(m3.2_dsq, method='ward.D2')
m3.2_labs<-m3.2_sum$Population
pcs3.2<-principal(scale(c3.2.1[4:24]), rotate = 'varimax', nfactors=3)
pcs3.2
## Principal Components Analysis
## Call: principal(r = scale(c3.2.1[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC3 RC2 h2 u2 com
## GOL 0.79 0.33 0.23 0.78 0.22 1.5
## NOL 0.76 0.29 0.30 0.76 0.24 1.6
## BNL 0.85 0.23 0.07 0.78 0.22 1.2
## BBH 0.63 0.43 -0.12 0.60 0.40 1.8
## XCB 0.05 0.83 0.31 0.79 0.21 1.3
## XFB 0.18 0.71 0.25 0.59 0.41 1.4
## ZYB 0.60 0.61 0.26 0.81 0.19 2.3
## AUB 0.38 0.80 0.14 0.81 0.19 1.5
## WCB 0.51 0.56 -0.15 0.59 0.41 2.1
## ASB 0.22 0.66 0.08 0.49 0.51 1.2
## BPL 0.85 -0.01 -0.01 0.73 0.27 1.0
## NPH 0.47 0.19 0.76 0.84 0.16 1.8
## NLH 0.47 0.31 0.63 0.72 0.28 2.4
## JUB 0.70 0.46 0.20 0.75 0.25 1.9
## NLB 0.54 0.15 -0.44 0.51 0.49 2.1
## MAB 0.58 0.40 0.14 0.52 0.48 1.9
## MDH 0.44 0.22 0.37 0.38 0.62 2.4
## MDB 0.48 0.31 0.10 0.34 0.66 1.8
## OBH -0.14 0.20 0.77 0.66 0.34 1.2
## OBB 0.48 0.38 0.24 0.44 0.56 2.4
## GS 0.65 0.64 0.15 0.86 0.14 2.1
##
## RC1 RC3 RC2
## SS loadings 6.57 4.67 2.50
## Proportion Var 0.31 0.22 0.12
## Cumulative Var 0.31 0.54 0.65
## Proportion Explained 0.48 0.34 0.18
## Cumulative Proportion 0.48 0.82 1.00
##
## Mean item complexity = 1.8
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.07
## with the empirical chi square 312.08 with prob < 1.9e-13
##
## Fit based upon off diagonal values = 0.98
MG_3.2<-c3.2.1$Population
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC2:RC1, mean))
pc_3.2_sum
## # A tibble: 2 x 4
## MG_3.2 RC2 RC3 RC1
## * <fct> <dbl> <dbl> <dbl>
## 1 ATAYAL -0.766 -0.0100 0.369
## 2 PERU 0.327 0.00427 -0.158
sort(pcs3.2$loadings[,1], decreasing=TRUE)
## BPL BNL GOL NOL JUB GS BBH
## 0.8525344 0.8522404 0.7888121 0.7601387 0.7038526 0.6500897 0.6305836
## ZYB MAB NLB WCB OBB MDB NPH
## 0.6007675 0.5844356 0.5423986 0.5053035 0.4821375 0.4813822 0.4724094
## NLH MDH AUB ASB XFB XCB OBH
## 0.4708698 0.4390764 0.3823714 0.2161252 0.1835291 0.0490214 -0.1432866
sort(pcs3.2$loadings[,2], decreasing=TRUE)
## XCB AUB XFB ASB GS ZYB WCB
## 0.8347212 0.8000699 0.7050149 0.6607510 0.6430081 0.6133600 0.5618911
## JUB BBH MAB OBB GOL NLH MDB
## 0.4642675 0.4287210 0.3979202 0.3833819 0.3300225 0.3129504 0.3113318
## NOL BNL MDH OBH NPH NLB BPL
## 0.2933008 0.2256500 0.2157440 0.2023385 0.1929322 0.1478814 -0.0121618
sort(pcs3.2$loadings[,3], decreasing=TRUE)
## OBH NPH NLH MDH XCB NOL
## 0.773637958 0.764008179 0.631241539 0.369075749 0.305149164 0.303833977
## ZYB XFB OBB GOL JUB GS
## 0.260848874 0.246715959 0.238915737 0.231974793 0.200916561 0.153087209
## AUB MAB MDB ASB BNL BPL
## 0.141299939 0.137860681 0.102021399 0.084457830 0.067086124 -0.008785728
## BBH WCB NLB
## -0.120113615 -0.148521768 -0.441542146
ggplot(MG_3.2_pcs, aes(x=RC2, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC3, y=RC1, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC2, y=RC1, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
MG_3.2_sex<-c3.2.1$Sex
MG_3.2_pcs_sex<-cbind.data.frame(MG_3.2_sex, pcs3.2$scores)
pc_3.2_sum_sex<-MG_3.2_pcs_sex %>% group_by(MG_3.2_sex) %>% dplyr::summarize(across(RC2:RC1, mean))
pc_3.2_sum_sex
## # A tibble: 2 x 4
## MG_3.2_sex RC2 RC3 RC1
## * <fct> <dbl> <dbl> <dbl>
## 1 F -0.157 -0.358 -0.672
## 2 M 0.136 0.311 0.584
ggplot(MG_3.2_pcs_sex, aes(x=RC2, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
ggplot(MG_3.2_pcs_sex, aes(x=RC1, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
ggplot(MG_3.2_pcs_sex, aes(x=RC2, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
morpho3.2<-c3.2.2
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean))
m3.2_dsq<-dist(scale(m3.2_sum[2:22]))
m3.2_hc<-hclust(m3.2_dsq, method='ward.D2')
m3.2_labs<-m3.2_sum$Population
pcs3.2<-principal(scale(c3.2.2[4:24]), rotate = 'varimax', nfactors=3)
pcs3.2
## Principal Components Analysis
## Call: principal(r = scale(c3.2.2[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC2 RC1 RC3 h2 u2 com
## GOL 0.09 0.80 0.38 0.80 0.20 1.5
## NOL 0.03 0.79 0.40 0.79 0.21 1.5
## BNL 0.12 0.86 0.14 0.78 0.22 1.1
## BBH 0.25 0.52 0.17 0.36 0.64 1.7
## XCB 0.86 0.02 0.19 0.78 0.22 1.1
## XFB 0.80 -0.06 0.16 0.66 0.34 1.1
## ZYB 0.77 0.42 0.26 0.83 0.17 1.8
## AUB 0.83 0.23 0.24 0.80 0.20 1.3
## WCB 0.75 0.13 0.04 0.57 0.43 1.1
## ASB 0.64 0.29 0.21 0.54 0.46 1.6
## BPL 0.10 0.80 -0.15 0.67 0.33 1.1
## NPH 0.22 0.36 0.74 0.72 0.28 1.6
## NLH 0.23 0.28 0.77 0.72 0.28 1.5
## JUB 0.70 0.47 0.18 0.74 0.26 1.9
## NLB 0.37 0.34 -0.24 0.31 0.69 2.7
## MAB 0.54 0.51 0.03 0.55 0.45 2.0
## MDH 0.23 0.37 0.37 0.33 0.67 2.6
## MDB 0.30 0.48 0.10 0.33 0.67 1.8
## OBH 0.11 -0.09 0.81 0.68 0.32 1.1
## OBB 0.37 0.36 0.40 0.42 0.58 3.0
## GS 0.53 0.63 0.34 0.79 0.21 2.5
##
## RC2 RC1 RC3
## SS loadings 5.31 4.95 2.92
## Proportion Var 0.25 0.24 0.14
## Cumulative Var 0.25 0.49 0.63
## Proportion Explained 0.40 0.38 0.22
## Cumulative Proportion 0.40 0.78 1.00
##
## Mean item complexity = 1.7
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.07
## with the empirical chi square 596.78 with prob < 1.4e-54
##
## Fit based upon off diagonal values = 0.97
MG_3.2<-c3.2.2$Population
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC2:RC3, mean))
pc_3.2_sum
## # A tibble: 3 x 4
## MG_3.2 RC2 RC1 RC3
## * <fct> <dbl> <dbl> <dbl>
## 1 BERG 0.617 -0.669 -0.0918
## 2 NORSE -0.492 0.388 0.269
## 3 ZALAVAR -0.134 0.309 -0.200
sort(pcs3.2$loadings[,1], decreasing=TRUE)
## XCB AUB XFB ZYB WCB JUB ASB
## 0.86490100 0.82999142 0.79701842 0.76561854 0.74517035 0.70262111 0.64007475
## MAB GS NLB OBB MDB BBH NLH
## 0.54270025 0.52885844 0.37306130 0.36892056 0.29752109 0.24530769 0.22693889
## MDH NPH BNL OBH BPL GOL NOL
## 0.22660854 0.21771172 0.12224100 0.10848727 0.09883848 0.09169986 0.02951558
sort(pcs3.2$loadings[,2], decreasing=TRUE)
## BNL GOL BPL NOL GS BBH
## 0.86098754 0.80451272 0.79888348 0.79378560 0.62877825 0.52133201
## MAB MDB JUB ZYB MDH NPH
## 0.50680953 0.47727027 0.46628550 0.41504722 0.36792154 0.35617599
## OBB NLB ASB NLH AUB WCB
## 0.35506169 0.34496337 0.29012646 0.28018645 0.23234129 0.12566876
## XCB XFB OBH
## 0.01509011 -0.05844498 -0.09073198
sort(pcs3.2$loadings[,3], decreasing=TRUE)
## OBH NLH NPH NOL OBB GOL
## 0.81284267 0.76665315 0.74061046 0.40400980 0.39563577 0.38453217
## MDH GS ZYB AUB ASB XCB
## 0.37397263 0.34030031 0.25880915 0.24185482 0.21357967 0.18566600
## JUB BBH XFB BNL MDB WCB
## 0.17659953 0.17341415 0.15827605 0.14401426 0.10176963 0.03756273
## MAB BPL NLB
## 0.03072741 -0.14721713 -0.23825192
ggplot(MG_3.2_pcs, aes(x=RC2, y=RC1, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC1, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC2, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
MG_3.2_sex<-c3.2.2$Sex
MG_3.2_pcs_sex<-cbind.data.frame(MG_3.2_sex, pcs3.2$scores)
pc_3.2_sum_sex<-MG_3.2_pcs_sex %>% group_by(MG_3.2_sex) %>% dplyr::summarize(across(RC2:RC3, mean))
pc_3.2_sum_sex
## # A tibble: 2 x 4
## MG_3.2_sex RC2 RC1 RC3
## * <fct> <dbl> <dbl> <dbl>
## 1 F -0.513 -0.507 -0.305
## 2 M 0.478 0.473 0.284
ggplot(MG_3.2_pcs_sex, aes(x=RC2, y=RC1, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
ggplot(MG_3.2_pcs_sex, aes(x=RC1, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
ggplot(MG_3.2_pcs_sex, aes(x=RC2, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
morpho3.2<-c3.2.3
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean))
m3.2_labs<-m3.2_sum$Population
pcs3.2<-principal(scale(c3.2.3[4:24]), rotate = 'varimax', nfactors=3)
pcs3.2
## Principal Components Analysis
## Call: principal(r = scale(c3.2.3[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC3 RC2 h2 u2 com
## GOL 0.70 0.41 0.28 0.74 0.260 2.0
## NOL 0.69 0.36 0.30 0.70 0.303 1.9
## BNL 0.45 0.60 0.26 0.63 0.374 2.3
## BBH 0.71 0.30 0.11 0.60 0.401 1.4
## XCB 0.84 0.02 0.09 0.71 0.290 1.0
## XFB 0.81 -0.07 0.23 0.71 0.286 1.2
## ZYB 0.62 0.53 0.35 0.78 0.218 2.6
## AUB 0.64 0.42 0.21 0.63 0.367 2.0
## WCB 0.35 0.30 0.34 0.33 0.667 2.9
## ASB 0.62 0.25 -0.17 0.48 0.522 1.5
## BPL 0.16 0.77 0.05 0.62 0.384 1.1
## NPH 0.31 0.32 0.70 0.69 0.309 1.8
## NLH 0.17 0.33 0.76 0.71 0.293 1.5
## JUB 0.52 0.55 0.40 0.74 0.257 2.8
## NLB -0.03 0.56 0.10 0.32 0.675 1.1
## MAB 0.26 0.62 0.13 0.47 0.527 1.4
## MDH 0.26 0.57 0.22 0.44 0.565 1.7
## MDB 0.26 0.67 0.01 0.52 0.482 1.3
## OBH -0.04 -0.22 0.77 0.65 0.354 1.2
## OBB 0.24 0.34 0.59 0.53 0.474 2.0
## GS 0.89 0.30 0.20 0.92 0.079 1.3
##
## RC1 RC3 RC2
## SS loadings 5.82 4.18 2.91
## Proportion Var 0.28 0.20 0.14
## Cumulative Var 0.28 0.48 0.61
## Proportion Explained 0.45 0.32 0.23
## Cumulative Proportion 0.45 0.77 1.00
##
## Mean item complexity = 1.7
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.08
## with the empirical chi square 276.92 with prob < 1.4e-09
##
## Fit based upon off diagonal values = 0.97
MG_3.2<-c3.2.3$Sex
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC1:RC2, mean))
pc_3.2_sum
## # A tibble: 2 x 4
## MG_3.2 RC1 RC3 RC2
## * <fct> <dbl> <dbl> <dbl>
## 1 F -0.476 -0.646 -0.280
## 2 M 0.435 0.591 0.256
sort(pcs3.2$loadings[,1], decreasing=TRUE)
## GS XCB XFB BBH GOL NOL
## 0.89015553 0.83741294 0.80940160 0.70552502 0.70293105 0.68970460
## AUB ASB ZYB JUB BNL WCB
## 0.64315093 0.62065403 0.62019642 0.52482286 0.45344446 0.35295531
## NPH MAB MDH MDB OBB NLH
## 0.31254082 0.26352173 0.26049508 0.25873643 0.24169844 0.16989904
## BPL NLB OBH
## 0.15593991 -0.03002931 -0.04459484
sort(pcs3.2$loadings[,2], decreasing=TRUE)
## BPL MDB MAB BNL MDH NLB
## 0.76765937 0.67154226 0.62229287 0.59598262 0.56671740 0.56019762
## JUB ZYB AUB GOL NOL OBB
## 0.55301409 0.52542818 0.41719877 0.40841653 0.35823219 0.34062999
## NLH NPH GS WCB BBH ASB
## 0.32586266 0.32481984 0.30083763 0.29916361 0.29683548 0.24986661
## XCB XFB OBH
## 0.02082444 -0.06856437 -0.21715234
sort(pcs3.2$loadings[,3], decreasing=TRUE)
## OBH NLH NPH OBB JUB ZYB
## 0.772450823 0.756143106 0.698144070 0.592833420 0.402246590 0.348642967
## WCB NOL GOL BNL XFB MDH
## 0.344653735 0.304674016 0.280730359 0.256089813 0.232435434 0.215422836
## AUB GS MAB BBH NLB XCB
## 0.213243694 0.196237470 0.128044155 0.113047980 0.099967528 0.090444832
## BPL MDB ASB
## 0.051725528 0.009595866 -0.174393720
ggplot(MG_3.2_pcs, aes(x=RC1, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5) ##only sex because only Egypt is here.
ggplot(MG_3.2_pcs, aes(x=RC3, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC1, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
morpho3.2<-c3.2.4
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean))
m3.2_labs<-m3.2_sum$Population
pcs3.2<-principal(scale(c3.2.4[4:24]), rotate = 'varimax', nfactors=3)
pcs3.2
## Principal Components Analysis
## Call: principal(r = scale(c3.2.4[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 RC3 h2 u2 com
## GOL 0.58 0.61 0.11 0.72 0.278 2.1
## NOL 0.55 0.60 0.10 0.67 0.329 2.0
## BNL 0.35 0.58 0.48 0.68 0.321 2.6
## BBH 0.62 0.22 0.40 0.60 0.400 2.0
## XCB 0.89 0.07 0.13 0.82 0.185 1.1
## XFB 0.83 0.05 0.23 0.75 0.254 1.2
## ZYB 0.67 0.40 0.53 0.90 0.105 2.6
## AUB 0.80 0.25 0.41 0.87 0.128 1.7
## WCB 0.59 0.48 0.19 0.61 0.389 2.1
## ASB 0.66 0.31 0.30 0.62 0.378 1.8
## BPL 0.03 0.77 0.20 0.63 0.366 1.1
## NPH 0.34 0.32 0.70 0.71 0.285 1.9
## NLH 0.43 0.32 0.71 0.79 0.206 2.1
## JUB 0.59 0.50 0.48 0.83 0.170 2.9
## NLB 0.20 0.63 -0.05 0.44 0.560 1.2
## MAB 0.29 0.58 0.42 0.60 0.403 2.3
## MDH 0.07 0.61 0.46 0.59 0.414 1.9
## MDB 0.23 0.59 0.26 0.47 0.529 1.7
## OBH 0.16 -0.05 0.75 0.60 0.403 1.1
## OBB 0.26 0.35 0.66 0.63 0.373 1.9
## GS 0.86 0.35 0.27 0.93 0.072 1.5
##
## RC1 RC2 RC3
## SS loadings 6.17 4.41 3.87
## Proportion Var 0.29 0.21 0.18
## Cumulative Var 0.29 0.50 0.69
## Proportion Explained 0.43 0.31 0.27
## Cumulative Proportion 0.43 0.73 1.00
##
## Mean item complexity = 1.9
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.06
## with the empirical chi square 167.64 with prob < 0.15
##
## Fit based upon off diagonal values = 0.99
MG_3.2<-c3.2.4$Sex
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_3.2_sum
## # A tibble: 2 x 4
## MG_3.2 RC1 RC2 RC3
## * <fct> <dbl> <dbl> <dbl>
## 1 F -0.463 -0.492 -0.365
## 2 M 0.463 0.492 0.365
sort(pcs3.2$loadings[,1], decreasing=TRUE)
## XCB GS XFB AUB ZYB ASB BBH
## 0.89105449 0.85742462 0.82991697 0.79705759 0.67393470 0.66330296 0.62453357
## WCB JUB GOL NOL NLH BNL NPH
## 0.59212804 0.58835121 0.58246190 0.54846547 0.43034989 0.34761977 0.34175726
## MAB OBB MDB NLB OBH MDH BPL
## 0.28647961 0.25868761 0.23175159 0.19826026 0.16299742 0.06842462 0.02673032
sort(pcs3.2$loadings[,2], decreasing=TRUE)
## BPL NLB MDH GOL NOL MDB
## 0.77013201 0.63139805 0.60948339 0.60853319 0.60001462 0.59113274
## MAB BNL JUB WCB ZYB GS
## 0.58256368 0.57524705 0.49823495 0.47551119 0.40251979 0.34991925
## OBB NLH NPH ASB AUB BBH
## 0.34665523 0.32154084 0.31963602 0.30534270 0.25491521 0.22008509
## XCB XFB OBH
## 0.07182635 0.05144901 -0.05130351
sort(pcs3.2$loadings[,3], decreasing=TRUE)
## OBH NLH NPH OBB ZYB JUB
## 0.75333256 0.71058630 0.70425226 0.66327498 0.52838641 0.48495636
## BNL MDH MAB AUB BBH ASB
## 0.47720820 0.45841930 0.41952613 0.41441391 0.40147485 0.29834064
## GS MDB XFB BPL WCB XCB
## 0.26516857 0.26000628 0.23306299 0.20101232 0.18555430 0.12683885
## GOL NOL NLB
## 0.10994892 0.10069360 -0.04778089
ggplot(MG_3.2_pcs, aes(x=RC1, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC2, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC1, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
morpho3.3<-c3.3
m3.3_sum<-morpho3.3 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean))
m3.3_dsq<-dist(scale(m3.3_sum[2:22]))
m3.3_hc<-hclust(m3.3_dsq, method='ward.D2')
NbClust(data=as.numeric(scale(m3.3_sum[2:22])), distance='euclidean', method='ward.D2', min.nc=2, max.nc=15)
## Warning in max(DiffLev[, 5], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]
## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]
## *******************************************************************
## * Among all indices:
## * 1 proposed 3 as the best number of clusters
## * 4 proposed 4 as the best number of clusters
## * 1 proposed 15 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 4
##
##
## *******************************************************************
## $All.index
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW Friedman
## 2 0.9380 10.0514 199.2684 -1.9551 154.5974 123.1362 NA 30.7841 2.4109
## 3 0.3937 111.8270 243.2780 -2.6629 244.9809 135.2185 NA 15.0243 5.9887
## 4 10.9996 300.6327 57.6027 -1.8599 329.9802 122.4446 NA 7.6528 12.7205
## 5 0.6365 343.4950 38.7303 -3.1133 369.4292 139.8903 NA 5.5956 17.7647
## 6 1.1738 367.4382 32.5137 -3.5203 411.4521 144.3132 NA 4.0087 25.1930
## 7 2.2830 391.2929 41.0732 -3.4465 454.0373 140.0935 NA 2.8591 35.7255
## 8 0.1313 453.1831 75.3643 -2.8321 500.0730 126.9776 NA 1.9840 51.9227
## 9 3.6415 653.6286 16.6279 -2.4626 538.4716 118.4896 NA 1.4628 70.7784
## 10 0.8433 659.7349 38.0383 -2.1766 572.5170 111.6473 NA 1.1165 93.0462
## 11 1.0540 785.4395 10.2768 -2.6025 592.8585 114.9532 NA 0.9500 109.5232
## 12 2.2142 772.0067 25.1089 -2.7893 614.8940 114.8541 NA 0.7976 130.6453
## 13 1.7912 858.0391 15.2087 -2.6863 639.7756 110.6393 NA 0.6547 159.3861
## 14 0.7135 891.8431 12.7757 -2.4149 665.8529 104.3268 NA 0.5323 196.2648
## 15 1.0479 915.2719 18.9546 -2.7253 681.4556 105.8140 NA 0.4703 222.2691
## Rubin Cindex DB Silhouette Duda Pseudot2 Beale Ratkowsky Ball
## 2 3.4109 0.2962 0.6312 0.5911 0.2384 204.4194 1.0483 0.1936 48.5635
## 3 6.9887 0.2547 0.5861 0.5586 0.2694 157.2620 0.8885 0.4637 12.4187
## 4 13.7205 0.2782 0.5832 0.5718 0.2644 77.9177 0.8956 0.4693 3.1278
## 5 18.7647 0.2772 0.5662 0.5548 0.2576 97.9649 0.9330 0.4287 1.6997
## 6 26.1930 0.2350 0.5636 0.5460 0.2727 66.6831 0.8549 0.3955 1.0730
## 7 36.7255 0.2901 0.5571 0.5305 0.2311 103.1399 1.0744 0.3687 0.7236
## 8 52.9227 0.2794 0.5503 0.5639 0.1107 144.5574 2.5361 0.3472 0.4707
## 9 71.7784 0.2412 0.5234 0.5801 0.2732 45.2190 0.8374 0.3297 0.2553
## 10 94.0462 0.2411 0.5356 0.5969 0.3062 36.2596 0.7110 0.3132 0.2012
## 11 110.5232 0.2206 0.5323 0.5865 0.2372 51.4640 1.0091 0.2993 0.1377
## 12 131.6453 0.1975 0.5352 0.5901 0.2522 20.7608 0.8650 0.2868 0.1159
## 13 160.3861 0.2281 0.5135 0.5933 0.2009 31.8147 1.1783 0.2758 0.0877
## 14 197.2648 0.3115 0.5015 0.5951 0.1475 46.2509 1.7130 0.2660 0.0718
## 15 223.2691 0.2986 0.4841 0.5880 0.2514 44.6653 0.9305 0.2571 0.0601
## Ptbiserial Frey McClain Dunn Hubert SDindex Dindex SDbw
## 2 0.6431 0.7624 0.3763 0.0377 0.0095 6.8675 0.4216 1.2302
## 3 0.6411 0.8089 0.5211 0.0446 0.0118 4.8489 0.2892 0.6765
## 4 0.5904 0.7886 0.6591 0.0715 0.0134 5.0024 0.2092 0.4126
## 5 0.5722 1.1297 0.6887 0.0801 0.0145 5.4752 0.1813 0.3848
## 6 0.5274 0.6833 0.7806 0.0801 0.0145 7.2766 0.1474 0.2113
## 7 0.5071 0.9042 0.7938 0.1143 0.0153 6.9966 0.1268 0.1687
## 8 0.4602 0.6219 0.8551 0.0899 0.0153 9.2436 0.1033 0.1251
## 9 0.4426 0.7075 0.8422 0.0899 0.0156 9.0203 0.0854 0.0700
## 10 0.4242 1.0354 0.8347 0.0957 0.0156 11.2233 0.0759 0.0586
## 11 0.4066 1.0360 0.8549 0.0957 0.0157 14.4855 0.0687 0.0480
## 12 0.3853 0.3438 0.8796 0.0957 0.0157 16.7589 0.0621 0.0524
## 13 0.3826 0.4846 0.8547 0.1166 0.0158 16.8058 0.0586 0.0314
## 14 0.3777 0.9736 0.8326 0.1700 0.0159 16.2857 0.0544 0.0210
## 15 0.3709 1.2523 0.8358 0.1700 0.0159 19.3347 0.0503 0.0246
##
## $All.CriticalValues
## CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2 0.1209 465.2205 0.3098
## 3 0.1091 473.6274 0.3498
## 4 0.0038 7394.6572 0.3521
## 5 0.0351 934.5356 0.3409
## 6 -0.0157 -1619.3999 0.3640
## 7 0.0205 1480.6451 0.3080
## 8 -0.0770 -251.6212 0.1287
## 9 -0.0885 -209.1134 0.3729
## 10 -0.1009 -174.6173 0.4116
## 11 -0.1009 -174.6173 0.3301
## 12 -0.2932 -30.8768 0.3833
## 13 -0.2595 -38.8310 0.3093
## 14 -0.2595 -38.8310 0.2269
## 15 -0.1143 -146.1979 0.3500
##
## $Best.nc
## KL CH Hartigan CCC Scott Marriot TrCovW
## Number_clusters 4.0000 15.0000 4.0000 4.0000 3.0000 4.0000 -Inf
## Value_Index 10.9996 915.2719 185.6753 -1.8599 90.3835 30.2196 3
## TraceW Friedman Rubin Cindex DB Silhouette Duda
## Number_clusters 8.3883 36.8788 -10.8745 0.1975 0.4841 0.5969 0.2384
## Value_Index 14.0000 14.0000 12.0000 15.0000 10.0000 2.0000 2.0000
## PseudoT2 Beale Ratkowsky Ball PtBiserial Frey McClain Dunn
## Number_clusters 204.4194 1.0483 0.4693 36.1447 0.6431 NA 0.3763 0.17
## Value_Index 2.0000 4.0000 3.0000 2.0000 1.0000 2 14.0000 0.00
## Hubert SDindex Dindex SDbw
## Number_clusters 0 4.8489 0 0.021
## Value_Index 3 0.0000 14 4.000
##
## $Best.partition
## [1] 1 2 3 4 4 3 1 2 4 4 4 1 3 2 2 3 3 1 2 3 4 2 1 1 4 1 1 2 4 3 2 3 4 4 2 1 1
## [38] 2 3 4 1 4 3 2 1 4 3 4 3 2 3 2 3 1 1 2 1 2 4 4 1 2 4 2 1 4 4 4 2 4 3 1 3 2
## [75] 2 3 4 1 1 2 4 4 3 4 2 3 1 4 2 1 3 4 1 2 3 3 3 3 2 4 3 3 4 2 3 4 1 4 3 3 2
## [112] 4 4 3 1 2 3 2 3 3 4 4 1 2 4 1
m3.3_labs<-m3.3_sum$Population
plot(m3.3_hc, labels=m3.3_labs, hang=-1, cex=0.8)
hc_morph_3.3<-rect.hclust(m3.3_hc, k=4, border=2:7)
pcs3.3<-principal(scale(c3.3[4:24]), rotate = 'varimax', nfactors=3)
pcs3.3
## Principal Components Analysis
## Call: principal(r = scale(c3.3[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 RC3 h2 u2 com
## GOL 0.70 0.35 0.26 0.69 0.312 1.8
## NOL 0.62 0.38 0.37 0.66 0.340 2.4
## BNL 0.61 0.19 0.52 0.67 0.325 2.2
## BBH 0.30 0.58 0.31 0.52 0.475 2.1
## XCB 0.12 0.89 -0.05 0.81 0.194 1.0
## XFB 0.02 0.81 0.25 0.72 0.279 1.2
## ZYB 0.82 0.30 0.15 0.78 0.225 1.3
## AUB 0.72 0.43 -0.01 0.70 0.296 1.6
## WCB 0.54 0.46 0.05 0.51 0.487 2.0
## ASB 0.59 0.40 -0.05 0.51 0.486 1.8
## BPL 0.77 -0.07 0.26 0.66 0.342 1.2
## NPH 0.29 0.16 0.79 0.73 0.269 1.4
## NLH 0.32 0.24 0.73 0.68 0.317 1.6
## JUB 0.75 0.26 0.28 0.71 0.288 1.5
## NLB 0.25 0.32 0.12 0.18 0.822 2.2
## MAB 0.66 0.31 0.14 0.56 0.443 1.5
## MDH 0.49 0.14 0.31 0.35 0.646 1.9
## MDB 0.63 0.10 -0.01 0.41 0.591 1.1
## OBH -0.08 0.04 0.75 0.57 0.430 1.0
## OBB 0.71 0.07 0.19 0.54 0.457 1.2
## GS 0.48 0.79 0.23 0.91 0.093 1.8
##
## RC1 RC2 RC3
## SS loadings 6.4 3.75 2.73
## Proportion Var 0.3 0.18 0.13
## Cumulative Var 0.3 0.48 0.61
## Proportion Explained 0.5 0.29 0.21
## Cumulative Proportion 0.5 0.79 1.00
##
## Mean item complexity = 1.6
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.07
## with the empirical chi square 1075.31 with prob < 1.3e-139
##
## Fit based upon off diagonal values = 0.98
MG_3.3<-c3.3$Population
MG_3.3_pcs<-cbind.data.frame(MG_3.3, pcs3.3$scores)
pc_3.3_sum<-MG_3.3_pcs %>% group_by(MG_3.3) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_3.3_sum
## # A tibble: 6 x 4
## MG_3.3 RC1 RC2 RC3
## * <fct> <dbl> <dbl> <dbl>
## 1 AUSTRALI 0.639 -0.468 0.0526
## 2 DOGON -0.875 0.359 0.145
## 3 TASMANIA 0.378 0.533 -1.08
## 4 TEITA -0.368 -0.503 0.280
## 5 TOLAI 0.458 -0.451 -0.0791
## 6 ZULU -0.303 0.562 0.595
sort(pcs3.3$loadings[,1], decreasing=TRUE)
## ZYB BPL JUB AUB OBB GOL
## 0.81513007 0.76624234 0.75283764 0.71812683 0.71014323 0.70431384
## MAB MDB NOL BNL ASB WCB
## 0.66369533 0.63143175 0.61659633 0.60830861 0.59254035 0.54434761
## MDH GS NLH BBH NPH NLB
## 0.48836250 0.48179066 0.31580502 0.29756664 0.29442714 0.25153577
## XCB XFB OBH
## 0.12476891 0.02150161 -0.07591439
sort(pcs3.3$loadings[,2], decreasing=TRUE)
## XCB XFB GS BBH WCB AUB
## 0.88785083 0.81168217 0.78945084 0.58286810 0.46271837 0.43345943
## ASB NOL GOL NLB MAB ZYB
## 0.40074436 0.37677795 0.34959246 0.31796682 0.31248415 0.29590267
## JUB NLH BNL NPH MDH MDB
## 0.26321059 0.23993404 0.18797192 0.15986320 0.13660555 0.10191014
## OBB OBH BPL
## 0.06631841 0.03510919 -0.06662561
sort(pcs3.3$loadings[,3], decreasing=TRUE)
## NPH OBH NLH BNL NOL MDH
## 0.78648765 0.75033383 0.72518998 0.51889025 0.37188746 0.31188407
## BBH JUB GOL BPL XFB GS
## 0.31080578 0.27518868 0.26460537 0.25704874 0.24855587 0.22699551
## OBB ZYB MAB NLB WCB AUB
## 0.18500319 0.15308169 0.13716781 0.11630065 0.04685713 -0.01147652
## MDB XCB ASB
## -0.01200930 -0.04584929 -0.05079294
ggplot(MG_3.3_pcs, aes(x=RC1, y=RC2, color = MG_3.3)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.3_sum)
ggplot(MG_3.3_pcs, aes(x=RC2, y=RC3, color = MG_3.3)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.3_sum, size=5)
ggplot(MG_3.3_pcs, aes(x=RC1, y=RC3, color = MG_3.3)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.3_sum, size=5)
MG_3.3_sex<-c3.3$Sex
MG_3.3_pcs_sex<-cbind.data.frame(MG_3.3_sex, pcs3.3$scores)
pc_3.3_sum_sex<-MG_3.3_pcs_sex %>% group_by(MG_3.3_sex) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_3.3_sum_sex
## # A tibble: 2 x 4
## MG_3.3_sex RC1 RC2 RC3
## * <fct> <dbl> <dbl> <dbl>
## 1 F -0.555 -0.323 -0.261
## 2 M 0.565 0.328 0.266
ggplot(MG_3.3_pcs_sex, aes(x=RC1, y=RC2, color = MG_3.3_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.3_sum_sex, size=5)
ggplot(MG_3.3_pcs_sex, aes(x=RC2, y=RC3, color = MG_3.3_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.3_sum_sex, size=5)
ggplot(MG_3.3_pcs_sex, aes(x=RC1, y=RC3, color = MG_3.3_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.3_sum_sex, size=5)
morphogroup_1<-cutree(m3.3_hc, k=4)
m3.3.1_sum<-m3.3_sum %>% mutate(cluster=morphogroup_1)
m3.3.1_sum$cluster<-as.factor(m3.3.1_sum$cluster)
m3.3.1<-m3.3.1_sum %>% filter(cluster==1) %>% droplevels()
m3.3.2<-m3.3.1_sum %>% filter(cluster==2) %>% droplevels()
m3.3.3<-m3.3.1_sum %>% filter(cluster==3) %>% droplevels()
m3.3.4<-m3.3.1_sum %>% filter(cluster==4) %>% droplevels()
c3.3.1d<-m3.3.1$Population
c3.3.1<-c3.3 %>% filter(Population %in% c3.3.1d) %>% droplevels()
c3.3.1$Morphogroup="3.3.1"
c3.3.1$Morphogroup <- as.factor(c3.3.1$Morphogroup)
c3.3.2d<-m3.3.2$Population
c3.3.2<-c3.3 %>% filter(Population %in% c3.3.2d) %>% droplevels()
c3.3.2$Morphogroup="3.3.2"
c3.3.2$Morphogroup <- as.factor(c3.3.2$Morphogroup)
c3.3.3d<-m3.3.3$Population
c3.3.3<-c3.3 %>% filter(Population %in% c3.3.3d) %>% droplevels()
c3.3.3$Morphogroup="3.3.3"
c3.3.3$Morphogroup <- as.factor(c3.3.3$Morphogroup)
c3.3.4d<-m3.3.4$Population
c3.3.4<-c3.3 %>% filter(Population %in% c3.3.4d) %>% droplevels()
c3.3.4$Morphogroup="3.3.4"
c3.3.4$Morphogroup <- as.factor(c3.3.4$Morphogroup)
hwlr_c3.3<-rbind.data.frame(c3.3.1, c3.3.2)
hwlr_c3.3a<-rbind.data.frame(c3.3.3, c3.3.4)
hwlr_c3.3<-rbind.data.frame(hwlr_c3.3, hwlr_c3.3a)
morpho3.2<-c3.3.1 ## Honestly I stored this as a previously used object because I was too lazy to change the code to match...in practice, this variable would be generic and dynamic to be used over and over again for different workflows...that way I just need to change the input data and then have all of the operations I want run on it. A very basic idea of a for loop.
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean))
m3.2_dsq<-dist(scale(m3.2_sum[2:22]))
m3.2_labs<-m3.2_sum$Population
pcs3.2<-principal(scale(c3.3.1[4:24]), rotate = 'varimax', nfactors=3) #
pcs3.2 ## .
## Principal Components Analysis
## Call: principal(r = scale(c3.3.1[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC3 RC2 h2 u2 com
## GOL 0.53 0.36 0.58 0.75 0.25 2.7
## NOL 0.54 0.34 0.60 0.77 0.23 2.6
## BNL 0.34 0.64 0.46 0.74 0.26 2.4
## BBH 0.53 0.54 -0.10 0.59 0.41 2.1
## XCB 0.89 0.10 0.08 0.80 0.20 1.0
## XFB 0.75 0.23 0.00 0.61 0.39 1.2
## ZYB 0.58 0.61 0.26 0.78 0.22 2.4
## AUB 0.71 0.42 0.19 0.72 0.28 1.8
## WCB 0.58 0.34 0.21 0.50 0.50 1.9
## ASB 0.74 0.05 0.29 0.64 0.36 1.3
## BPL 0.13 0.78 0.22 0.67 0.33 1.2
## NPH 0.13 0.58 0.39 0.51 0.49 1.9
## NLH 0.22 0.41 0.63 0.61 0.39 2.0
## JUB 0.52 0.61 0.32 0.74 0.26 2.5
## NLB 0.10 0.64 0.12 0.44 0.56 1.1
## MAB 0.38 0.62 0.31 0.63 0.37 2.2
## MDH 0.42 0.56 0.00 0.49 0.51 1.9
## MDB 0.32 0.67 -0.26 0.62 0.38 1.8
## OBH -0.02 -0.07 0.75 0.57 0.43 1.0
## OBB 0.29 0.50 0.42 0.51 0.49 2.6
## GS 0.80 0.44 0.23 0.90 0.10 1.7
##
## RC1 RC3 RC2
## SS loadings 5.57 5.17 2.83
## Proportion Var 0.27 0.25 0.13
## Cumulative Var 0.27 0.51 0.65
## Proportion Explained 0.41 0.38 0.21
## Cumulative Proportion 0.41 0.79 1.00
##
## Mean item complexity = 1.9
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.07
## with the empirical chi square 390.39 with prob < 2.6e-23
##
## Fit based upon off diagonal values = 0.98
MG_3.2<-c3.3.1$Population
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC1:RC2, mean))
pc_3.2_sum
## # A tibble: 2 x 4
## MG_3.2 RC1 RC3 RC2
## * <fct> <dbl> <dbl> <dbl>
## 1 AUSTRALI 0.124 -0.305 0.532
## 2 TOLAI -0.114 0.280 -0.488
sort(pcs3.2$loadings[,1], decreasing=TRUE)
## XCB GS XFB ASB AUB ZYB
## 0.88537343 0.80499186 0.74752131 0.74121016 0.71082425 0.58149911
## WCB NOL BBH GOL JUB MDH
## 0.58130207 0.54188336 0.53226545 0.52974124 0.51921850 0.42316296
## MAB BNL MDB OBB NLH NPH
## 0.37772634 0.33834952 0.32265066 0.29430046 0.22322938 0.13189999
## BPL NLB OBH
## 0.12552713 0.09531118 -0.02108935
sort(pcs3.2$loadings[,2], decreasing=TRUE)
## BPL MDB BNL NLB MAB ZYB JUB
## 0.7777084 0.6712337 0.6431414 0.6425547 0.6229960 0.6072098 0.6062875
## NPH MDH BBH OBB GS AUB NLH
## 0.5845295 0.5576770 0.5406984 0.4957012 0.4429494 0.4226782 0.4124539
## GOL WCB NOL XFB XCB ASB OBH
## 0.3593864 0.3403817 0.3351251 0.2346796 0.1026047 0.0475871 -0.0740300
sort(pcs3.2$loadings[,3], decreasing=TRUE)
## OBH NLH NOL GOL BNL OBB
## 0.748762322 0.625218271 0.599284365 0.583193965 0.460143827 0.418292581
## NPH JUB MAB ASB ZYB GS
## 0.391453343 0.323846038 0.312153402 0.292121476 0.264038787 0.229103594
## BPL WCB AUB NLB XCB XFB
## 0.215817252 0.213207561 0.191196618 0.117409150 0.079168933 -0.001983384
## MDH BBH MDB
## -0.004475984 -0.099970452 -0.257960601
ggplot(MG_3.2_pcs, aes(x=RC1, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC3, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC1, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
MG_3.2_sex<-c3.3.1$Sex
MG_3.2_pcs_sex<-cbind.data.frame(MG_3.2_sex, pcs3.2$scores)
pc_3.2_sum_sex<-MG_3.2_pcs_sex %>% group_by(MG_3.2_sex) %>% dplyr::summarize(across(RC2:RC1, mean))
pc_3.2_sum_sex
## # A tibble: 2 x 4
## MG_3.2_sex RC2 RC3 RC1
## * <fct> <dbl> <dbl> <dbl>
## 1 F -0.171 -0.625 -0.431
## 2 M 0.163 0.596 0.411
ggplot(MG_3.2_pcs_sex, aes(x=RC1, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
ggplot(MG_3.2_pcs_sex, aes(x=RC3, y=RC2, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
ggplot(MG_3.2_pcs_sex, aes(x=RC1, y=RC2, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
morpho3.2<-c3.3.2 ## same lazy use as above. I'm not sorry. This is a lot of code.
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean)) #
m3.2_dsq<-dist(scale(m3.2_sum[2:22]))
m3.2_labs<-m3.2_sum$Population
pcs3.2<-principal(scale(c3.3.2[4:24]), rotate = 'varimax', nfactors=3)
pcs3.2
## Principal Components Analysis
## Call: principal(r = scale(c3.3.2[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC3 RC2 h2 u2 com
## GOL 0.83 0.21 0.25 0.80 0.201 1.3
## NOL 0.85 0.18 0.23 0.81 0.186 1.2
## BNL 0.77 0.40 0.02 0.76 0.244 1.5
## BBH 0.24 0.16 0.67 0.54 0.463 1.4
## XCB -0.07 0.26 0.86 0.82 0.182 1.2
## XFB 0.17 0.23 0.76 0.65 0.349 1.3
## ZYB 0.56 0.58 0.27 0.72 0.278 2.4
## AUB 0.58 0.48 0.27 0.64 0.363 2.4
## WCB 0.10 0.63 0.33 0.52 0.484 1.6
## ASB 0.42 0.32 0.37 0.41 0.585 2.8
## BPL 0.53 0.57 -0.06 0.61 0.393 2.0
## NPH 0.62 0.23 0.26 0.50 0.498 1.7
## NLH 0.68 0.17 0.13 0.51 0.492 1.2
## JUB 0.52 0.65 0.12 0.71 0.293 2.0
## NLB 0.04 0.49 0.17 0.27 0.732 1.2
## MAB 0.23 0.58 0.40 0.55 0.446 2.1
## MDH 0.19 0.61 0.23 0.46 0.540 1.5
## MDB 0.23 0.63 0.09 0.45 0.546 1.3
## OBH 0.32 -0.01 0.37 0.24 0.763 2.0
## OBB 0.26 0.65 0.07 0.49 0.507 1.4
## GS 0.42 0.28 0.81 0.91 0.092 1.8
##
## RC1 RC3 RC2
## SS loadings 4.83 4.11 3.41
## Proportion Var 0.23 0.20 0.16
## Cumulative Var 0.23 0.43 0.59
## Proportion Explained 0.39 0.33 0.28
## Cumulative Proportion 0.39 0.72 1.00
##
## Mean item complexity = 1.7
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.08
## with the empirical chi square 472.97 with prob < 4e-35
##
## Fit based upon off diagonal values = 0.96
MG_3.2<-c3.3.2$Population
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC1:RC2, mean))
pc_3.2_sum
## # A tibble: 2 x 4
## MG_3.2 RC1 RC3 RC2
## * <fct> <dbl> <dbl> <dbl>
## 1 DOGON -0.448 0.113 0.473
## 2 TEITA 0.535 -0.135 -0.564
sort(pcs3.2$loadings[,1], decreasing=TRUE)
## NOL GOL BNL NLH NPH AUB
## 0.85251793 0.83343963 0.77247131 0.68046027 0.61729285 0.57549759
## ZYB BPL JUB GS ASB OBH
## 0.56378647 0.53068322 0.51585979 0.42367929 0.42261719 0.31708953
## OBB BBH MAB MDB MDH XFB
## 0.26411668 0.23873140 0.23070052 0.22939209 0.18996357 0.16536969
## WCB NLB XCB
## 0.10489125 0.04164548 -0.07210014
sort(pcs3.2$loadings[,2], decreasing=TRUE)
## JUB OBB WCB MDB MDH MAB
## 0.65278241 0.64633604 0.63064543 0.62724828 0.60874956 0.58495638
## ZYB BPL NLB AUB BNL ASB
## 0.57656055 0.56712171 0.48802103 0.48456408 0.39905990 0.31686491
## GS XCB NPH XFB GOL NOL
## 0.27973055 0.25805211 0.22667136 0.22659246 0.20632959 0.18236802
## NLH BBH OBH
## 0.16766249 0.16447259 -0.01319013
sort(pcs3.2$loadings[,3], decreasing=TRUE)
## XCB GS XFB BBH MAB OBH
## 0.86395400 0.80629841 0.75618449 0.67298885 0.39762229 0.36877633
## ASB WCB ZYB AUB NPH GOL
## 0.36816315 0.32809084 0.26778916 0.26552458 0.26414976 0.24829185
## NOL MDH NLB NLH JUB MDB
## 0.23150418 0.23085336 0.16627165 0.12928741 0.12008408 0.08650472
## OBB BNL BPL
## 0.07178029 0.02030907 -0.06233490
ggplot(MG_3.2_pcs, aes(x=RC1, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC3, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC1, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
MG_3.2_sex<-c3.3.2$Sex
MG_3.2_pcs_sex<-cbind.data.frame(MG_3.2_sex, pcs3.2$scores)
pc_3.2_sum_sex<-MG_3.2_pcs_sex %>% group_by(MG_3.2_sex) %>% dplyr::summarize(across(RC2:RC1, mean))
pc_3.2_sum_sex
## # A tibble: 2 x 4
## MG_3.2_sex RC2 RC3 RC1
## * <fct> <dbl> <dbl> <dbl>
## 1 F -0.298 -0.426 -0.369
## 2 M 0.380 0.543 0.470
ggplot(MG_3.2_pcs_sex, aes(x=RC1, y=RC3, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
ggplot(MG_3.2_pcs_sex, aes(x=RC3, y=RC2, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
ggplot(MG_3.2_pcs_sex, aes(x=RC1, y=RC2, color = MG_3.2_sex)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum_sex, size=5)
morpho3.2<-c3.3.3 ## see previous two sections.
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean))
m3.2_dsq<-dist(scale(m3.2_sum[2:22]))
m3.2_labs<-m3.2_sum$Population
pcs3.2<-principal(scale(c3.3.3[4:24]), rotate = 'varimax', nfactors=3)
pcs3.2
## Principal Components Analysis
## Call: principal(r = scale(c3.3.3[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 RC3 h2 u2 com
## GOL 0.79 0.34 0.20 0.77 0.225 1.5
## NOL 0.78 0.37 0.18 0.77 0.226 1.6
## BNL 0.43 0.66 0.15 0.65 0.353 1.8
## BBH 0.51 0.48 0.08 0.49 0.507 2.0
## XCB 0.90 -0.07 0.11 0.83 0.173 1.0
## XFB 0.69 0.10 0.33 0.60 0.398 1.5
## ZYB 0.48 0.49 0.62 0.86 0.138 2.8
## AUB 0.66 0.26 0.51 0.76 0.238 2.2
## WCB 0.49 0.11 0.68 0.71 0.289 1.9
## ASB 0.61 0.23 0.18 0.46 0.541 1.5
## BPL 0.26 0.55 0.40 0.53 0.475 2.3
## NPH 0.23 0.80 0.08 0.69 0.307 1.2
## NLH 0.40 0.63 0.15 0.58 0.416 1.8
## JUB 0.37 0.54 0.65 0.84 0.156 2.6
## NLB 0.04 -0.13 0.77 0.62 0.384 1.1
## MAB 0.33 0.28 0.61 0.56 0.438 2.0
## MDH 0.15 0.71 0.19 0.56 0.443 1.2
## MDB 0.32 0.50 0.19 0.39 0.607 2.0
## OBH -0.04 0.55 -0.08 0.32 0.685 1.1
## OBB 0.02 0.57 0.54 0.62 0.381 2.0
## GS 0.91 0.30 0.16 0.95 0.053 1.3
##
## RC1 RC2 RC3
## SS loadings 5.72 4.50 3.35
## Proportion Var 0.27 0.21 0.16
## Cumulative Var 0.27 0.49 0.65
## Proportion Explained 0.42 0.33 0.25
## Cumulative Proportion 0.42 0.75 1.00
##
## Mean item complexity = 1.7
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.07
## with the empirical chi square 180.82 with prob < 0.044
##
## Fit based upon off diagonal values = 0.98
MG_3.2<-c3.3.3$Sex
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_3.2_sum
## # A tibble: 2 x 4
## MG_3.2 RC1 RC2 RC3
## * <fct> <dbl> <dbl> <dbl>
## 1 F -0.516 -0.400 -0.394
## 2 M 0.481 0.374 0.368
sort(pcs3.2$loadings[,1], decreasing=TRUE)
## GS XCB GOL NOL XFB AUB
## 0.91110897 0.89988004 0.78636217 0.77793439 0.69489644 0.65560857
## ASB BBH WCB ZYB BNL NLH
## 0.61185957 0.50856490 0.48983860 0.47778892 0.43034036 0.40308876
## JUB MAB MDB BPL NPH MDH
## 0.36754451 0.32837809 0.32450106 0.25898349 0.23082695 0.15022414
## NLB OBB OBH
## 0.04339999 0.01812516 -0.04193111
sort(pcs3.2$loadings[,2], decreasing=TRUE)
## NPH MDH BNL NLH OBB OBH
## 0.79556839 0.70634179 0.66288402 0.63181056 0.56957271 0.55430441
## BPL JUB MDB ZYB BBH NOL
## 0.54772943 0.53539091 0.50071448 0.49401237 0.47620407 0.37092323
## GOL GS MAB AUB ASB WCB
## 0.33846288 0.30194780 0.27969481 0.26245679 0.23017386 0.10962450
## XFB XCB NLB
## 0.09752573 -0.06740661 -0.13075338
sort(pcs3.2$loadings[,3], decreasing=TRUE)
## NLB WCB JUB ZYB MAB OBB
## 0.77255008 0.67756789 0.64983043 0.62441811 0.61300226 0.54257828
## AUB BPL XFB GOL MDB MDH
## 0.51265458 0.39777357 0.33114280 0.20452917 0.19120374 0.18791326
## ASB NOL GS BNL NLH XCB
## 0.17840881 0.17579290 0.16080620 0.15105051 0.15050225 0.11382825
## BBH NPH OBH
## 0.08458573 0.08051274 -0.07745519
ggplot(MG_3.2_pcs, aes(x=RC1, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC2, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC1, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
morpho3.2<-c3.3.4 ## see previous three sections.
m3.2_sum<-morpho3.2 %>% group_by(Population) %>% dplyr::summarize(across(GOL:GS, mean))
m3.2_dsq<-dist(scale(m3.2_sum[2:22]))
m3.2_labs<-m3.2_sum$Population
pcs3.2<-principal(scale(c3.3.4[4:24]), rotate = 'varimax', nfactors=3)
pcs3.2
## Principal Components Analysis
## Call: principal(r = scale(c3.3.4[4:24]), nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 RC3 h2 u2 com
## GOL 0.71 0.38 0.25 0.70 0.30 1.8
## NOL 0.68 0.37 0.26 0.67 0.33 1.9
## BNL 0.73 0.06 0.39 0.69 0.31 1.5
## BBH 0.42 0.44 0.48 0.60 0.40 3.0
## XCB 0.11 0.84 0.15 0.75 0.25 1.1
## XFB -0.03 0.81 0.22 0.70 0.30 1.2
## ZYB 0.64 0.44 0.32 0.71 0.29 2.3
## AUB 0.53 0.57 0.17 0.63 0.37 2.2
## WCB 0.25 0.65 0.06 0.49 0.51 1.3
## ASB 0.47 0.30 0.08 0.32 0.68 1.7
## BPL 0.80 -0.25 0.20 0.73 0.27 1.3
## NPH 0.35 0.19 0.76 0.73 0.27 1.6
## NLH 0.34 0.22 0.72 0.68 0.32 1.6
## JUB 0.65 0.31 0.32 0.62 0.38 2.0
## NLB 0.43 0.04 0.20 0.23 0.77 1.4
## MAB 0.58 0.19 0.15 0.40 0.60 1.4
## MDH 0.57 0.36 -0.11 0.47 0.53 1.8
## MDB 0.60 0.17 -0.42 0.56 0.44 2.0
## OBH 0.01 0.14 0.73 0.56 0.44 1.1
## OBB 0.37 0.27 0.35 0.33 0.67 2.8
## GS 0.49 0.67 0.37 0.83 0.17 2.5
##
## RC1 RC2 RC3
## SS loadings 5.56 3.85 3.01
## Proportion Var 0.26 0.18 0.14
## Cumulative Var 0.26 0.45 0.59
## Proportion Explained 0.45 0.31 0.24
## Cumulative Proportion 0.45 0.76 1.00
##
## Mean item complexity = 1.8
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.08
## with the empirical chi square 303.74 with prob < 1.7e-12
##
## Fit based upon off diagonal values = 0.96
MG_3.2<-c3.3.4$Sex
MG_3.2_pcs<-cbind.data.frame(MG_3.2, pcs3.2$scores)
pc_3.2_sum<-MG_3.2_pcs %>% group_by(MG_3.2) %>% dplyr::summarize(across(RC1:RC3, mean))
pc_3.2_sum
## # A tibble: 2 x 4
## MG_3.2 RC1 RC2 RC3
## * <fct> <dbl> <dbl> <dbl>
## 1 F -0.527 -0.270 -0.292
## 2 M 0.441 0.226 0.245
sort(pcs3.2$loadings[,1], decreasing=TRUE)
## BPL BNL GOL NOL JUB ZYB
## 0.79646899 0.73374325 0.70729128 0.68496500 0.64749243 0.63834563
## MDB MAB MDH AUB GS ASB
## 0.60000715 0.58305693 0.57363702 0.53371228 0.49082093 0.47344267
## NLB BBH OBB NPH NLH WCB
## 0.43143328 0.41879381 0.37345073 0.35315764 0.33624222 0.25339672
## XCB OBH XFB
## 0.10995538 0.01379945 -0.02960592
sort(pcs3.2$loadings[,2], decreasing=TRUE)
## XCB XFB GS WCB AUB ZYB
## 0.84274883 0.80732354 0.67344851 0.64813358 0.56649504 0.44425813
## BBH GOL NOL MDH JUB ASB
## 0.43658634 0.37720396 0.36845208 0.36141912 0.31256017 0.29699366
## OBB NLH MAB NPH MDB OBH
## 0.26710595 0.21875168 0.19352930 0.19184382 0.16644650 0.14203915
## BNL NLB BPL
## 0.06022599 0.04077725 -0.24664747
sort(pcs3.2$loadings[,3], decreasing=TRUE)
## NPH OBH NLH BBH BNL GS
## 0.75657277 0.73496986 0.71902198 0.48013765 0.38631181 0.37251598
## OBB ZYB JUB NOL GOL XFB
## 0.35029154 0.32120944 0.32053041 0.26096012 0.24841074 0.22227832
## NLB BPL AUB XCB MAB ASB
## 0.20329387 0.19724088 0.16764502 0.15136001 0.14620609 0.07801167
## WCB MDH MDB
## 0.05930469 -0.10672858 -0.41891925
ggplot(MG_3.2_pcs, aes(x=RC1, y=RC2, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC2, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
ggplot(MG_3.2_pcs, aes(x=RC1, y=RC3, color = MG_3.2)) + geom_point(alpha = 0.65, position='identity') + stat_ellipse() + geom_point(data=pc_3.2_sum, size=5)
rf_target<-"Morphogroup" ## creating a new generic object that just contains Morphogroup. I will use this to build a generic function to be used in all random forest models.
rf_data<-as.data.frame(hwlr) ## storing the hwlr dataset as a generic object rf_data.
nobs<-nrow(rf_data) ## nobs is shorthand for number of observations, which I am setting to the number of rows, nrow, from the data set.
rf_data[5:25]<-scale(rf_data[5:25]) ## scaling craniometric data because sexes are pooled here still.
rfsamp<-rftrain<-sample(nrow(rf_data), 0.7*nobs) ## creating a training set that is 70% of the data.
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data)), rftrain), 0.15*nobs) ## creating a validation set that is 15% of the data set
rftest<-setdiff(setdiff(seq_len(nrow(rf_data)), rftrain),rfvalidate) ##creating a testing set that is what is left over from the sample and validation sets...or 15%
cranio<-names((rf_data[5:25])) #storing an object that is just the name of each of the craniometric variables.
rf1<-randomForest::randomForest(Morphogroup ~ ., data=rf_data[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE) ## the random forest model with the Morphogroup being a function all of the craniometric variables available and filtering our dataset by the training set to include only the sample of 70% and to only use the variables defined in cranio.
pr<-predict(rf1, newdata=na.omit(rf_data)) ## using the random forest model that was created to predict everyone in the original dataset and store those predictions as an object
ct<-table(pr, rf_data$Morphogroup) ## create a table showing the predicted vs actual morphogroups
caret::confusionMatrix(ct, reference = rf_data$Morphogroup) ## confusion matrix diagnostics that give you much more information.
## Confusion Matrix and Statistics
##
##
## pr 1 2 3
## 1 679 0 41
## 2 0 142 7
## 3 87 18 1550
##
## Overall Statistics
##
## Accuracy : 0.9394
## 95% CI : (0.9294, 0.9484)
## No Information Rate : 0.6331
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8774
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.8864 0.88750 0.9700
## Specificity 0.9767 0.99704 0.8866
## Pos Pred Value 0.9431 0.95302 0.9366
## Neg Pred Value 0.9518 0.99242 0.9448
## Prevalence 0.3035 0.06339 0.6331
## Detection Rate 0.2690 0.05626 0.6141
## Detection Prevalence 0.2853 0.05903 0.6557
## Balanced Accuracy 0.9316 0.94227 0.9283
rf_target<-"Sex"
rf_data<-as.data.frame(hwlr) ##not scaling data here because examining sex.
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data))
ct<-table(pr, rf_data$Sex)
caret::confusionMatrix(ct, reference = rf_data$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 1095 56
## M 61 1312
##
## Accuracy : 0.9536
## 95% CI : (0.9447, 0.9615)
## No Information Rate : 0.542
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9066
##
## Mcnemar's Test P-Value : 0.7115
##
## Sensitivity : 0.9472
## Specificity : 0.9591
## Pos Pred Value : 0.9513
## Neg Pred Value : 0.9556
## Prevalence : 0.4580
## Detection Rate : 0.4338
## Detection Prevalence : 0.4560
## Balanced Accuracy : 0.9531
##
## 'Positive' Class : F
##
rf_target<-"Morphogroup"
rf_data_1<-as.data.frame(hwl_c1)
rf_data_1[4:24]<-scale(rf_data_1[4:24]) ## scaling data because sexes are pooled here still.
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Morphogroup ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Morphogroup)
caret::confusionMatrix(ct, reference = rf_data_1$Morphogroup)
## Confusion Matrix and Statistics
##
##
## pr 1.1 1.2 1.3 1.4 1.5
## 1.1 263 16 0 3 10
## 1.2 5 131 0 0 2
## 1.3 0 0 107 0 0
## 1.4 3 1 1 103 3
## 1.5 1 1 1 2 113
##
## Overall Statistics
##
## Accuracy : 0.936
## 95% CI : (0.9163, 0.9523)
## No Information Rate : 0.3551
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9162
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 1.1 Class: 1.2 Class: 1.3 Class: 1.4 Class: 1.5
## Sensitivity 0.9669 0.8792 0.9817 0.9537 0.8828
## Specificity 0.9413 0.9887 1.0000 0.9878 0.9922
## Pos Pred Value 0.9007 0.9493 1.0000 0.9279 0.9576
## Neg Pred Value 0.9810 0.9713 0.9970 0.9924 0.9769
## Prevalence 0.3551 0.1945 0.1423 0.1410 0.1671
## Detection Rate 0.3433 0.1710 0.1397 0.1345 0.1475
## Detection Prevalence 0.3812 0.1802 0.1397 0.1449 0.1540
## Balanced Accuracy 0.9541 0.9339 0.9908 0.9708 0.9375
rf_target<-"Sex"
rf_data_1<-as.data.frame(hwl_c1)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 302 25
## M 9 430
##
## Accuracy : 0.9556
## 95% CI : (0.9385, 0.9691)
## No Information Rate : 0.594
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9087
##
## Mcnemar's Test P-Value : 0.0101
##
## Sensitivity : 0.9711
## Specificity : 0.9451
## Pos Pred Value : 0.9235
## Neg Pred Value : 0.9795
## Prevalence : 0.4060
## Detection Rate : 0.3943
## Detection Prevalence : 0.4269
## Balanced Accuracy : 0.9581
##
## 'Positive' Class : F
##
rf_target<-"Population" ##here, each cluster is its own Howell's group.
rf_data_1<-as.data.frame(c1.1)
rf_data_1[4:24]<-scale(rf_data_1[4:24]) ##data scaled.
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)
## Confusion Matrix and Statistics
##
##
## pr AINU EASTER I MOKAPU
## AINU 84 0 4
## EASTER I 0 84 1
## MOKAPU 2 2 95
##
## Overall Statistics
##
## Accuracy : 0.9669
## 95% CI : (0.9381, 0.9848)
## No Information Rate : 0.3676
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9502
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: AINU Class: EASTER I Class: MOKAPU
## Sensitivity 0.9767 0.9767 0.9500
## Specificity 0.9785 0.9946 0.9767
## Pos Pred Value 0.9545 0.9882 0.9596
## Neg Pred Value 0.9891 0.9893 0.9711
## Prevalence 0.3162 0.3162 0.3676
## Detection Rate 0.3088 0.3088 0.3493
## Detection Prevalence 0.3235 0.3125 0.3640
## Balanced Accuracy 0.9776 0.9857 0.9634
rf_target<-"Sex"
rf_data_1<-as.data.frame(c1.1) ##assigning non-scaled data for sex estimation.
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 121 6
## M 3 142
##
## Accuracy : 0.9669
## 95% CI : (0.9381, 0.9848)
## No Information Rate : 0.5441
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9334
##
## Mcnemar's Test P-Value : 0.505
##
## Sensitivity : 0.9758
## Specificity : 0.9595
## Pos Pred Value : 0.9528
## Neg Pred Value : 0.9793
## Prevalence : 0.4559
## Detection Rate : 0.4449
## Detection Prevalence : 0.4669
## Balanced Accuracy : 0.9676
##
## 'Positive' Class : F
##
rf_target<-"Population"
rf_data_1<-as.data.frame(c1.2)
rf_data_1[4:24]<-scale(rf_data_1[4:24]) ##data scaled.
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)
## Confusion Matrix and Statistics
##
##
## pr ANYANG GUAM PHILLIPI
## ANYANG 36 3 3
## GUAM 1 51 0
## PHILLIPI 5 3 47
##
## Overall Statistics
##
## Accuracy : 0.8993
## 95% CI : (0.8394, 0.9426)
## No Information Rate : 0.3826
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8482
##
## Mcnemar's Test P-Value : 0.2123
##
## Statistics by Class:
##
## Class: ANYANG Class: GUAM Class: PHILLIPI
## Sensitivity 0.8571 0.8947 0.9400
## Specificity 0.9439 0.9891 0.9192
## Pos Pred Value 0.8571 0.9808 0.8545
## Neg Pred Value 0.9439 0.9381 0.9681
## Prevalence 0.2819 0.3826 0.3356
## Detection Rate 0.2416 0.3423 0.3154
## Detection Prevalence 0.2819 0.3490 0.3691
## Balanced Accuracy 0.9005 0.9419 0.9296
rf_target<-"Sex"
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 22 0
## M 5 122
##
## Accuracy : 0.9664
## 95% CI : (0.9234, 0.989)
## No Information Rate : 0.8188
## P-Value [Acc > NIR] : 4.13e-08
##
## Kappa : 0.8781
##
## Mcnemar's Test P-Value : 0.07364
##
## Sensitivity : 0.8148
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 0.9606
## Prevalence : 0.1812
## Detection Rate : 0.1477
## Detection Prevalence : 0.1477
## Balanced Accuracy : 0.9074
##
## 'Positive' Class : F
##
rf_target<-"Sex"
rf_data_1<-as.data.frame(c1.3)
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf_target<-"Sex"
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 52 4
## M 2 51
##
## Accuracy : 0.945
## 95% CI : (0.884, 0.9795)
## No Information Rate : 0.5046
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8899
##
## Mcnemar's Test P-Value : 0.6831
##
## Sensitivity : 0.9630
## Specificity : 0.9273
## Pos Pred Value : 0.9286
## Neg Pred Value : 0.9623
## Prevalence : 0.4954
## Detection Rate : 0.4771
## Detection Prevalence : 0.5138
## Balanced Accuracy : 0.9451
##
## 'Positive' Class : F
##
rf_target<-"Sex"
rf_data_1<-as.data.frame(c1.4)
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf_target<-"Sex"
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 55 0
## M 0 53
##
## Accuracy : 1
## 95% CI : (0.9664, 1)
## No Information Rate : 0.5093
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0000
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 1.0000
## Prevalence : 0.5093
## Detection Rate : 0.5093
## Detection Prevalence : 0.5093
## Balanced Accuracy : 1.0000
##
## 'Positive' Class : F
##
rf_target<-"Population"
rf_data_1<-as.data.frame(c1.5)
nobs<-nrow(rf_data_1)
rf_data_1[4:24]<-scale(rf_data_1[4:24]) ##data scaled.
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)
## Confusion Matrix and Statistics
##
##
## pr MORIORI N MAORI S MAORI
## MORIORI 108 4 3
## N MAORI 0 6 0
## S MAORI 0 0 7
##
## Overall Statistics
##
## Accuracy : 0.9453
## 95% CI : (0.8906, 0.9777)
## No Information Rate : 0.8438
## P-Value [Acc > NIR] : 0.0003583
##
## Kappa : 0.7663
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: MORIORI Class: N MAORI Class: S MAORI
## Sensitivity 1.0000 0.60000 0.70000
## Specificity 0.6500 1.00000 1.00000
## Pos Pred Value 0.9391 1.00000 1.00000
## Neg Pred Value 1.0000 0.96721 0.97521
## Prevalence 0.8438 0.07812 0.07812
## Detection Rate 0.8438 0.04688 0.05469
## Detection Prevalence 0.8984 0.04688 0.05469
## Balanced Accuracy 0.8250 0.80000 0.85000
rf_target<-"Sex"
rf_data_1<-as.data.frame(c1.5)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 49 1
## M 2 76
##
## Accuracy : 0.9766
## 95% CI : (0.933, 0.9951)
## No Information Rate : 0.6016
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9509
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9608
## Specificity : 0.9870
## Pos Pred Value : 0.9800
## Neg Pred Value : 0.9744
## Prevalence : 0.3984
## Detection Rate : 0.3828
## Detection Prevalence : 0.3906
## Balanced Accuracy : 0.9739
##
## 'Positive' Class : F
##
rf_target<-"Population"
rf_data<-as.data.frame(c2)
rf_data[5:25]<-scale(rf_data[5:25])
nobs<-nrow(rf_data)
rfsamp<-rftrain<-sample(nrow(rf_data), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data)), rftrain),rfvalidate)
cranio<-names((rf_data[5:25]))
rf1<-randomForest::randomForest(Population ~ ., data=rf_data[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data))
ct<-table(pr, rf_data$Population)
caret::confusionMatrix(ct, reference = rf_data$Population)
## Confusion Matrix and Statistics
##
##
## pr ANDAMAN BUSHMAN
## ANDAMAN 66 3
## BUSHMAN 4 87
##
## Accuracy : 0.9562
## 95% CI : (0.9119, 0.9822)
## No Information Rate : 0.5625
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.911
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9429
## Specificity : 0.9667
## Pos Pred Value : 0.9565
## Neg Pred Value : 0.9560
## Prevalence : 0.4375
## Detection Rate : 0.4125
## Detection Prevalence : 0.4313
## Balanced Accuracy : 0.9548
##
## 'Positive' Class : ANDAMAN
##
rf_target<-"Sex"
rf_data<-as.data.frame(c2)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data))
ct<-table(pr, rf_data$Sex)
caret::confusionMatrix(ct, reference = rf_data$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 82 5
## M 2 71
##
## Accuracy : 0.9562
## 95% CI : (0.9119, 0.9822)
## No Information Rate : 0.525
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9121
##
## Mcnemar's Test P-Value : 0.4497
##
## Sensitivity : 0.9762
## Specificity : 0.9342
## Pos Pred Value : 0.9425
## Neg Pred Value : 0.9726
## Prevalence : 0.5250
## Detection Rate : 0.5125
## Detection Prevalence : 0.5437
## Balanced Accuracy : 0.9552
##
## 'Positive' Class : F
##
rf_target<-"Morphogroup"
rf_data_1<-as.data.frame(hwl_c3)
nobs<-nrow(rf_data_1)
rf_data_1[4:24]<-scale(rf_data_1[4:24]) ##data scaled.
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Morphogroup ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Morphogroup)
caret::confusionMatrix(ct, reference = rf_data_1$Morphogroup)
## Confusion Matrix and Statistics
##
##
## pr 3.1 3.2 3.3
## 3.1 287 10 10
## 3.2 33 667 14
## 3.3 10 10 557
##
## Overall Statistics
##
## Accuracy : 0.9456
## 95% CI : (0.9333, 0.9562)
## No Information Rate : 0.4299
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9145
##
## Mcnemar's Test P-Value : 0.004704
##
## Statistics by Class:
##
## Class: 3.1 Class: 3.2 Class: 3.3
## Sensitivity 0.8697 0.9709 0.9587
## Specificity 0.9842 0.9484 0.9803
## Pos Pred Value 0.9349 0.9342 0.9653
## Neg Pred Value 0.9667 0.9774 0.9765
## Prevalence 0.2065 0.4299 0.3636
## Detection Rate 0.1796 0.4174 0.3486
## Detection Prevalence 0.1921 0.4468 0.3611
## Balanced Accuracy 0.9270 0.9596 0.9695
rf_target<-"Sex"
rf_data_1<-as.data.frame(hwl_c3)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 729 34
## M 32 803
##
## Accuracy : 0.9587
## 95% CI : (0.9478, 0.9679)
## No Information Rate : 0.5238
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9172
##
## Mcnemar's Test P-Value : 0.902
##
## Sensitivity : 0.9580
## Specificity : 0.9594
## Pos Pred Value : 0.9554
## Neg Pred Value : 0.9617
## Prevalence : 0.4762
## Detection Rate : 0.4562
## Detection Prevalence : 0.4775
## Balanced Accuracy : 0.9587
##
## 'Positive' Class : F
##
rf_target<-"Morphogroup"
rf_data_1<-as.data.frame(hwlr_c3.1)
rf_data_1[4:24]<-scale(rf_data_1[4:24])
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Morphogroup ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Morphogroup)
caret::confusionMatrix(ct, reference = rf_data_1$Morphogroup)
## Confusion Matrix and Statistics
##
##
## pr 3.1.1 3.1.2 3.1.3 3.1.4
## 3.1.1 63 2 4 3
## 3.1.2 1 69 1 4
## 3.1.3 2 4 70 8
## 3.1.4 3 8 12 76
##
## Overall Statistics
##
## Accuracy : 0.8424
## 95% CI : (0.7985, 0.88)
## No Information Rate : 0.2758
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.7892
##
## Mcnemar's Test P-Value : 0.5524
##
## Statistics by Class:
##
## Class: 3.1.1 Class: 3.1.2 Class: 3.1.3 Class: 3.1.4
## Sensitivity 0.9130 0.8313 0.8046 0.8352
## Specificity 0.9655 0.9757 0.9424 0.9038
## Pos Pred Value 0.8750 0.9200 0.8333 0.7677
## Neg Pred Value 0.9767 0.9451 0.9309 0.9351
## Prevalence 0.2091 0.2515 0.2636 0.2758
## Detection Rate 0.1909 0.2091 0.2121 0.2303
## Detection Prevalence 0.2182 0.2273 0.2545 0.3000
## Balanced Accuracy 0.9393 0.9035 0.8735 0.8695
rf_target<-"Population" ##here each morphogroup is its own Howells population.
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)
## Confusion Matrix and Statistics
##
##
## pr ARIKARA HAINAN N JAPAN S JAPAN
## ARIKARA 63 2 4 3
## HAINAN 1 69 1 4
## N JAPAN 2 4 69 7
## S JAPAN 3 8 13 77
##
## Overall Statistics
##
## Accuracy : 0.8424
## 95% CI : (0.7985, 0.88)
## No Information Rate : 0.2758
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.7891
##
## Mcnemar's Test P-Value : 0.4307
##
## Statistics by Class:
##
## Class: ARIKARA Class: HAINAN Class: N JAPAN Class: S JAPAN
## Sensitivity 0.9130 0.8313 0.7931 0.8462
## Specificity 0.9655 0.9757 0.9465 0.8996
## Pos Pred Value 0.8750 0.9200 0.8415 0.7624
## Neg Pred Value 0.9767 0.9451 0.9274 0.9389
## Prevalence 0.2091 0.2515 0.2636 0.2758
## Detection Rate 0.1909 0.2091 0.2091 0.2333
## Detection Prevalence 0.2182 0.2273 0.2485 0.3061
## Balanced Accuracy 0.9393 0.9035 0.8698 0.8729
rf_target<-"Sex"
rf_data_1<-as.data.frame(hwlr_c3.1)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 124 1
## M 14 191
##
## Accuracy : 0.9545
## 95% CI : (0.9261, 0.9743)
## No Information Rate : 0.5818
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9053
##
## Mcnemar's Test P-Value : 0.001946
##
## Sensitivity : 0.8986
## Specificity : 0.9948
## Pos Pred Value : 0.9920
## Neg Pred Value : 0.9317
## Prevalence : 0.4182
## Detection Rate : 0.3758
## Detection Prevalence : 0.3788
## Balanced Accuracy : 0.9467
##
## 'Positive' Class : F
##
rf_target<-"Morphogroup"
rf_data_1<-as.data.frame(hwlr_c3.2)
rf_data_1[4:24]<-scale(rf_data_1[4:24])
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Morphogroup ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Morphogroup)
caret::confusionMatrix(ct, reference = rf_data_1$Morphogroup)
## Confusion Matrix and Statistics
##
##
## pr 3.2.1 3.2.2 3.2.3 3.2.4
## 3.2.1 138 3 1 5
## 3.2.2 12 311 14 5
## 3.2.3 2 3 95 1
## 3.2.4 5 0 1 91
##
## Overall Statistics
##
## Accuracy : 0.9243
## 95% CI : (0.9019, 0.943)
## No Information Rate : 0.4614
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8881
##
## Mcnemar's Test P-Value : 0.006616
##
## Statistics by Class:
##
## Class: 3.2.1 Class: 3.2.2 Class: 3.2.3 Class: 3.2.4
## Sensitivity 0.8790 0.9811 0.8559 0.8922
## Specificity 0.9830 0.9162 0.9896 0.9897
## Pos Pred Value 0.9388 0.9094 0.9406 0.9381
## Neg Pred Value 0.9648 0.9826 0.9727 0.9814
## Prevalence 0.2285 0.4614 0.1616 0.1485
## Detection Rate 0.2009 0.4527 0.1383 0.1325
## Detection Prevalence 0.2140 0.4978 0.1470 0.1412
## Balanced Accuracy 0.9310 0.9486 0.9227 0.9410
rf_target<-"Population"
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)
## Confusion Matrix and Statistics
##
##
## pr ATAYAL PERU BERG NORSE ZALAVAR EGYPT SANTA CR
## ATAYAL 30 0 0 0 0 0 0
## PERU 10 94 1 0 3 0 4
## BERG 0 3 97 3 11 0 2
## NORSE 0 2 5 98 6 9 0
## ZALAVAR 1 0 4 5 76 2 0
## EGYPT 2 5 1 4 2 98 2
## SANTA CR 4 6 1 0 0 2 94
##
## Overall Statistics
##
## Accuracy : 0.8544
## 95% CI : (0.8258, 0.88)
## No Information Rate : 0.1616
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8283
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: ATAYAL Class: PERU Class: BERG Class: NORSE
## Sensitivity 0.63830 0.8545 0.8899 0.8909
## Specificity 1.00000 0.9688 0.9671 0.9619
## Pos Pred Value 1.00000 0.8393 0.8362 0.8167
## Neg Pred Value 0.97412 0.9722 0.9790 0.9788
## Prevalence 0.06841 0.1601 0.1587 0.1601
## Detection Rate 0.04367 0.1368 0.1412 0.1426
## Detection Prevalence 0.04367 0.1630 0.1689 0.1747
## Balanced Accuracy 0.81915 0.9117 0.9285 0.9264
## Class: ZALAVAR Class: EGYPT Class: SANTA CR
## Sensitivity 0.7755 0.8829 0.9216
## Specificity 0.9796 0.9722 0.9778
## Pos Pred Value 0.8636 0.8596 0.8785
## Neg Pred Value 0.9633 0.9773 0.9862
## Prevalence 0.1426 0.1616 0.1485
## Detection Rate 0.1106 0.1426 0.1368
## Detection Prevalence 0.1281 0.1659 0.1557
## Balanced Accuracy 0.8776 0.9276 0.9497
rf_target<-"Sex"
rf_data_1<-as.data.frame(hwlr_c3.2)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 313 12
## M 17 345
##
## Accuracy : 0.9578
## 95% CI : (0.9399, 0.9716)
## No Information Rate : 0.5197
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9154
##
## Mcnemar's Test P-Value : 0.4576
##
## Sensitivity : 0.9485
## Specificity : 0.9664
## Pos Pred Value : 0.9631
## Neg Pred Value : 0.9530
## Prevalence : 0.4803
## Detection Rate : 0.4556
## Detection Prevalence : 0.4731
## Balanced Accuracy : 0.9574
##
## 'Positive' Class : F
##
rf_target<-"Population"
rf_data_1<-as.data.frame(c3.2.1)
rf_data_1[4:24]<-scale(rf_data_1[4:24])
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)
## Confusion Matrix and Statistics
##
##
## pr ATAYAL PERU
## ATAYAL 36 1
## PERU 11 109
##
## Accuracy : 0.9236
## 95% CI : (0.8703, 0.9599)
## No Information Rate : 0.7006
## P-Value [Acc > NIR] : 7.671e-12
##
## Kappa : 0.806
##
## Mcnemar's Test P-Value : 0.009375
##
## Sensitivity : 0.7660
## Specificity : 0.9909
## Pos Pred Value : 0.9730
## Neg Pred Value : 0.9083
## Prevalence : 0.2994
## Detection Rate : 0.2293
## Detection Prevalence : 0.2357
## Balanced Accuracy : 0.8784
##
## 'Positive' Class : ATAYAL
##
rf_target<-"Sex"
rf_data_1<-as.data.frame(c3.2.1)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 70 4
## M 3 80
##
## Accuracy : 0.9554
## 95% CI : (0.9103, 0.9819)
## No Information Rate : 0.535
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9105
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9589
## Specificity : 0.9524
## Pos Pred Value : 0.9459
## Neg Pred Value : 0.9639
## Prevalence : 0.4650
## Detection Rate : 0.4459
## Detection Prevalence : 0.4713
## Balanced Accuracy : 0.9556
##
## 'Positive' Class : F
##
rf_target<-"Population"
rf_data_1<-as.data.frame(c3.2.2)
rf_data_1[4:24]<-scale(rf_data_1[4:24])
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)
## Confusion Matrix and Statistics
##
##
## pr BERG NORSE ZALAVAR
## BERG 104 1 4
## NORSE 4 101 5
## ZALAVAR 1 8 89
##
## Overall Statistics
##
## Accuracy : 0.9274
## 95% CI : (0.8931, 0.9535)
## No Information Rate : 0.347
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.891
##
## Mcnemar's Test P-Value : 0.2316
##
## Statistics by Class:
##
## Class: BERG Class: NORSE Class: ZALAVAR
## Sensitivity 0.9541 0.9182 0.9082
## Specificity 0.9760 0.9565 0.9589
## Pos Pred Value 0.9541 0.9182 0.9082
## Neg Pred Value 0.9760 0.9565 0.9589
## Prevalence 0.3438 0.3470 0.3091
## Detection Rate 0.3281 0.3186 0.2808
## Detection Prevalence 0.3438 0.3470 0.3091
## Balanced Accuracy 0.9650 0.9374 0.9335
rf_target<-"Sex"
rf_data_1<-as.data.frame(c3.2.2)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 151 5
## M 2 159
##
## Accuracy : 0.9779
## 95% CI : (0.955, 0.9911)
## No Information Rate : 0.5174
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9558
##
## Mcnemar's Test P-Value : 0.4497
##
## Sensitivity : 0.9869
## Specificity : 0.9695
## Pos Pred Value : 0.9679
## Neg Pred Value : 0.9876
## Prevalence : 0.4826
## Detection Rate : 0.4763
## Detection Prevalence : 0.4921
## Balanced Accuracy : 0.9782
##
## 'Positive' Class : F
##
rf_target<-"Sex" ##only only one group, so only classifying sex.
rf_data_1<-as.data.frame(c3.2.3)
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 50 2
## M 3 56
##
## Accuracy : 0.955
## 95% CI : (0.898, 0.9852)
## No Information Rate : 0.5225
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9097
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9434
## Specificity : 0.9655
## Pos Pred Value : 0.9615
## Neg Pred Value : 0.9492
## Prevalence : 0.4775
## Detection Rate : 0.4505
## Detection Prevalence : 0.4685
## Balanced Accuracy : 0.9545
##
## 'Positive' Class : F
##
rf_target<-"Sex" ##only only one group, so only classifying sex.
rf_data_1<-as.data.frame(c3.2.4)
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 50 2
## M 1 49
##
## Accuracy : 0.9706
## 95% CI : (0.9164, 0.9939)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9412
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9804
## Specificity : 0.9608
## Pos Pred Value : 0.9615
## Neg Pred Value : 0.9800
## Prevalence : 0.5000
## Detection Rate : 0.4902
## Detection Prevalence : 0.5098
## Balanced Accuracy : 0.9706
##
## 'Positive' Class : F
##
rf_target<-"Morphogroup"
rf_data_1<-as.data.frame(hwlr_c3.3)
rf_data_1[4:24]<-scale(rf_data_1[4:24])
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Morphogroup ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Morphogroup)
caret::confusionMatrix(ct, reference = rf_data_1$Morphogroup)
## Confusion Matrix and Statistics
##
##
## pr 3.3.1 3.3.2 3.3.3 3.3.4
## 3.3.1 204 12 12 4
## 3.3.2 1 165 2 16
## 3.3.3 4 1 73 0
## 3.3.4 2 4 0 81
##
## Overall Statistics
##
## Accuracy : 0.9002
## 95% CI : (0.8729, 0.9233)
## No Information Rate : 0.3632
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8593
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 3.3.1 Class: 3.3.2 Class: 3.3.3 Class: 3.3.4
## Sensitivity 0.9668 0.9066 0.8391 0.8020
## Specificity 0.9243 0.9524 0.9899 0.9875
## Pos Pred Value 0.8793 0.8967 0.9359 0.9310
## Neg Pred Value 0.9799 0.9572 0.9722 0.9595
## Prevalence 0.3632 0.3133 0.1497 0.1738
## Detection Rate 0.3511 0.2840 0.1256 0.1394
## Detection Prevalence 0.3993 0.3167 0.1343 0.1497
## Balanced Accuracy 0.9456 0.9295 0.9145 0.8947
rf_target<-"Population"
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)
## Confusion Matrix and Statistics
##
##
## pr AUSTRALI TOLAI DOGON TEITA TASMANIA ZULU
## AUSTRALI 91 4 0 3 6 2
## TOLAI 5 102 2 3 3 0
## DOGON 0 0 89 3 2 8
## TEITA 1 0 3 70 1 1
## TASMANIA 3 3 1 1 75 1
## ZULU 1 1 4 3 0 89
##
## Overall Statistics
##
## Accuracy : 0.8881
## 95% CI : (0.8596, 0.9126)
## No Information Rate : 0.1893
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8654
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: AUSTRALI Class: TOLAI Class: DOGON Class: TEITA
## Sensitivity 0.9010 0.9273 0.8990 0.8434
## Specificity 0.9688 0.9724 0.9730 0.9880
## Pos Pred Value 0.8585 0.8870 0.8725 0.9211
## Neg Pred Value 0.9789 0.9828 0.9791 0.9743
## Prevalence 0.1738 0.1893 0.1704 0.1429
## Detection Rate 0.1566 0.1756 0.1532 0.1205
## Detection Prevalence 0.1824 0.1979 0.1756 0.1308
## Balanced Accuracy 0.9349 0.9498 0.9360 0.9157
## Class: TASMANIA Class: ZULU
## Sensitivity 0.8621 0.8812
## Specificity 0.9818 0.9812
## Pos Pred Value 0.8929 0.9082
## Neg Pred Value 0.9759 0.9752
## Prevalence 0.1497 0.1738
## Detection Rate 0.1291 0.1532
## Detection Prevalence 0.1446 0.1687
## Balanced Accuracy 0.9219 0.9312
rf_target<-"Sex"
rf_data_1<-as.data.frame(hwlr_c3.3)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 282 10
## M 11 278
##
## Accuracy : 0.9639
## 95% CI : (0.9453, 0.9775)
## No Information Rate : 0.5043
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9277
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9625
## Specificity : 0.9653
## Pos Pred Value : 0.9658
## Neg Pred Value : 0.9619
## Prevalence : 0.5043
## Detection Rate : 0.4854
## Detection Prevalence : 0.5026
## Balanced Accuracy : 0.9639
##
## 'Positive' Class : F
##
rf_target<-"Population"
rf_data_1<-as.data.frame(c3.3.1)
rf_data_1[4:24]<-scale(rf_data_1[4:24])
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)
## Confusion Matrix and Statistics
##
##
## pr AUSTRALI TOLAI
## AUSTRALI 94 4
## TOLAI 7 106
##
## Accuracy : 0.9479
## 95% CI : (0.9086, 0.9737)
## No Information Rate : 0.5213
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8954
##
## Mcnemar's Test P-Value : 0.5465
##
## Sensitivity : 0.9307
## Specificity : 0.9636
## Pos Pred Value : 0.9592
## Neg Pred Value : 0.9381
## Prevalence : 0.4787
## Detection Rate : 0.4455
## Detection Prevalence : 0.4645
## Balanced Accuracy : 0.9472
##
## 'Positive' Class : AUSTRALI
##
rf_target<-"Sex"
rf_data_1<-as.data.frame(c3.3.1)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 94 2
## M 9 106
##
## Accuracy : 0.9479
## 95% CI : (0.9086, 0.9737)
## No Information Rate : 0.5118
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.8955
##
## Mcnemar's Test P-Value : 0.07044
##
## Sensitivity : 0.9126
## Specificity : 0.9815
## Pos Pred Value : 0.9792
## Neg Pred Value : 0.9217
## Prevalence : 0.4882
## Detection Rate : 0.4455
## Detection Prevalence : 0.4550
## Balanced Accuracy : 0.9471
##
## 'Positive' Class : F
##
rf_target<-"Population"
rf_data_1<-as.data.frame(c3.3.2)
rf_data_1[4:24]<-scale(rf_data_1[4:24])
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Population ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Population)
caret::confusionMatrix(ct, reference = rf_data_1$Population)
## Confusion Matrix and Statistics
##
##
## pr DOGON TEITA
## DOGON 97 3
## TEITA 2 80
##
## Accuracy : 0.9725
## 95% CI : (0.9371, 0.991)
## No Information Rate : 0.544
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9446
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9798
## Specificity : 0.9639
## Pos Pred Value : 0.9700
## Neg Pred Value : 0.9756
## Prevalence : 0.5440
## Detection Rate : 0.5330
## Detection Prevalence : 0.5495
## Balanced Accuracy : 0.9718
##
## 'Positive' Class : DOGON
##
rf_target<-"Sex"
rf_data_1<-as.data.frame(c3.3.2)
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 97 2
## M 5 78
##
## Accuracy : 0.9615
## 95% CI : (0.9224, 0.9844)
## No Information Rate : 0.5604
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9223
##
## Mcnemar's Test P-Value : 0.4497
##
## Sensitivity : 0.9510
## Specificity : 0.9750
## Pos Pred Value : 0.9798
## Neg Pred Value : 0.9398
## Prevalence : 0.5604
## Detection Rate : 0.5330
## Detection Prevalence : 0.5440
## Balanced Accuracy : 0.9630
##
## 'Positive' Class : F
##
rf_target<-"Sex" ##only only one group, so only classifying sex.
rf_data_1<-as.data.frame(c3.3.3)
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 40 1
## M 2 44
##
## Accuracy : 0.9655
## 95% CI : (0.9025, 0.9928)
## No Information Rate : 0.5172
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9309
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9524
## Specificity : 0.9778
## Pos Pred Value : 0.9756
## Neg Pred Value : 0.9565
## Prevalence : 0.4828
## Detection Rate : 0.4598
## Detection Prevalence : 0.4713
## Balanced Accuracy : 0.9651
##
## 'Positive' Class : F
##
rf_target<-"Sex" ##only only one group, so only classifying sex.
rf_data_1<-as.data.frame(c3.3.4)
nobs<-nrow(rf_data_1)
rfsamp<-rftrain<-sample(nrow(rf_data_1), 0.7*nobs)
rfvalidate<-sample(setdiff(seq_len(nrow(rf_data_1)), rftrain), 0.15*nobs)
rftest<-setdiff(setdiff(seq_len(nrow(rf_data_1)), rftrain),rfvalidate)
cranio<-names((rf_data_1[4:24]))
rf1<-randomForest::randomForest(Sex ~ ., data=rf_data_1[rfsamp,c(cranio, rf_target)],ntree=1000, mtry=5, importance=TRUE, na.action=randomForest::na.roughfix,replace=FALSE)
pr<-predict(rf1, newdata=na.omit(rf_data_1))
ct<-table(pr, rf_data_1$Sex)
caret::confusionMatrix(ct, reference = rf_data_1$Sex)
## Confusion Matrix and Statistics
##
##
## pr F M
## F 42 6
## M 4 49
##
## Accuracy : 0.901
## 95% CI : (0.8254, 0.9515)
## No Information Rate : 0.5446
## P-Value [Acc > NIR] : 8.075e-15
##
## Kappa : 0.8011
##
## Mcnemar's Test P-Value : 0.7518
##
## Sensitivity : 0.9130
## Specificity : 0.8909
## Pos Pred Value : 0.8750
## Neg Pred Value : 0.9245
## Prevalence : 0.4554
## Detection Rate : 0.4158
## Detection Prevalence : 0.4752
## Balanced Accuracy : 0.9020
##
## 'Positive' Class : F
##