library(tidyverse)
package ‘tidyverse’ was built under R version 3.6.2Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ────────────────── tidyverse 1.3.1 ──
✓ ggplot2 3.3.3     ✓ purrr   0.3.4
✓ tibble  3.1.0     ✓ dplyr   1.0.5
✓ tidyr   1.1.3     ✓ stringr 1.4.0
✓ readr   1.4.0     ✓ forcats 0.5.1
package ‘ggplot2’ was built under R version 3.6.2── Conflicts ───────────────────── tidyverse_conflicts() ──
x tidyr::expand() masks Matrix::expand()
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
x tidyr::pack()   masks Matrix::pack()
x tidyr::unpack() masks Matrix::unpack()
phylogeny %>% select(-c(Language, Family)) %>% dist(method='binary') -> phy.dist
bind_cols(phylogeny[, 1:2], as_tibble(cmdscale(phy.dist))) %>% ggplot(aes(x=V1, y=V2)) + geom_point(aes(color=Family)) + geom_text_repel(aes(label=Language))

phylogeny %>% select(-c(Language, Family)) %>% replace(is.na(.), 0) %>% prcomp() -> phyl.pca
summary(phyl.pca)
Importance of components:
                         PC1    PC2     PC3     PC4     PC5     PC6
Standard deviation     1.624 1.4084 1.35083 1.19633 1.18646 1.08662
Proportion of Variance 0.115 0.0865 0.07957 0.06241 0.06139 0.05149
Cumulative Proportion  0.115 0.2015 0.28108 0.34349 0.40488 0.45637
                           PC7     PC8     PC9    PC10    PC11    PC12
Standard deviation     1.05440 1.04160 0.99075 0.95372 0.89394 0.83752
Proportion of Variance 0.04848 0.04731 0.04281 0.03967 0.03485 0.03059
Cumulative Proportion  0.50485 0.55217 0.59497 0.63464 0.66949 0.70008
                          PC13    PC14    PC15    PC16    PC17    PC18
Standard deviation     0.83369 0.80408 0.77162 0.75212 0.72479 0.69319
Proportion of Variance 0.03031 0.02819 0.02596 0.02467 0.02291 0.02095
Cumulative Proportion  0.73038 0.75858 0.78454 0.80921 0.83212 0.85308
                          PC19    PC20    PC21    PC22    PC23    PC24
Standard deviation     0.66735 0.65395 0.60960 0.57704 0.54221 0.54160
Proportion of Variance 0.01942 0.01865 0.01621 0.01452 0.01282 0.01279
Cumulative Proportion  0.87250 0.89115 0.90735 0.92187 0.93469 0.94749
                          PC25    PC26    PC27    PC28    PC29   PC30
Standard deviation     0.52141 0.48892 0.47480 0.44282 0.38471 0.3518
Proportion of Variance 0.01186 0.01042 0.00983 0.00855 0.00645 0.0054
Cumulative Proportion  0.95934 0.96977 0.97960 0.98815 0.99460 1.0000
                            PC31
Standard deviation     7.212e-16
Proportion of Variance 0.000e+00
Cumulative Proportion  1.000e+00
phylogeny %>% select(-c(Language, Family)) %>% replace(is.na(.), 0) %>% predict(phyl.pca, .) %>% as_tibble %>% select(c(PC1, PC2)) %>% bind_cols(phylogeny[, 1:2], .) %>% ggplot(aes(x=PC1, y=PC2, color=Family, label=Language)) + geom_point() + geom_text_repel()
hclust(phy.dist) %>% plot(labels=paste(phylogeny$Language, substr(phylogeny$Family, 1,1)))
hclust(phy.dist) %>% cutree(2)
bind_cols(phylogeny[, 1:2], as_tibble(cmdscale(phy.dist)), cluster=hclust(phy.dist) %>% cutree(4) %>% as.factor) %>% ggplot(aes(x=V1, y=V2)) + geom_point(aes(color=cluster, shape=Family)) + geom_text_repel(aes(label=Language))
library("pvclust")
phylogeny %>% select(-c(Language, Family)) %>% replace(is.na(.), 0) %>% t %>% as.tibble()
pvclust(phylogeny %>% select(-c(Language, Family)) %>% replace(is.na(.), 0) %>% t %>% as.tibble(), method.hclust="single", method.dist="binary") -> phy.pvcl
Bootstrap (r = 0.5)... Done.
Bootstrap (r = 0.6)... Done.
Bootstrap (r = 0.7)... Done.
Bootstrap (r = 0.8)... Done.
Bootstrap (r = 0.9)... Done.
Bootstrap (r = 1.0)... Done.
Bootstrap (r = 1.1)... Done.
Bootstrap (r = 1.2)... Done.
Bootstrap (r = 1.3)... Done.
Bootstrap (r = 1.4)... Done.
phy.pvcl %>% plot(labels=paste(phylogeny$Language, substr(phylogeny$Family, 1,1)))
phy.pvcl %>% pvrect()

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkobGFuZ3VhZ2VSKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGdncmVwZWwpCmBgYAoKYGBge3J9CnBoeWxvZ2VueSAlPiUgc2VsZWN0KC1jKExhbmd1YWdlLCBGYW1pbHkpKSAlPiUgZGlzdChtZXRob2Q9J2JpbmFyeScpIC0+IHBoeS5kaXN0CmBgYApgYGB7cn0KcGh5bG9nZW55WywgMToyXQpgYGAKYGBge3J9CmJpbmRfY29scyhwaHlsb2dlbnlbLCAxOjJdLCBhc190aWJibGUoY21kc2NhbGUocGh5LmRpc3QpKSkgJT4lIGdncGxvdChhZXMoeD1WMSwgeT1WMikpICsgZ2VvbV9wb2ludChhZXMoY29sb3I9RmFtaWx5KSkgKyBnZW9tX3RleHRfcmVwZWwoYWVzKGxhYmVsPUxhbmd1YWdlKSkKYGBgCgpgYGB7cn0KcGh5bG9nZW55ICU+JSBzZWxlY3QoLWMoTGFuZ3VhZ2UsIEZhbWlseSkpICU+JSByZXBsYWNlKGlzLm5hKC4pLCAwKSAlPiUgcHJjb21wKCkgLT4gcGh5bC5wY2EKYGBgCgpgYGB7cn0Kc3VtbWFyeShwaHlsLnBjYSkKYGBgCmBgYHtyfQpwaHlsb2dlbnkgJT4lIHNlbGVjdCgtYyhMYW5ndWFnZSwgRmFtaWx5KSkgJT4lIHJlcGxhY2UoaXMubmEoLiksIDApICU+JSBwcmVkaWN0KHBoeWwucGNhLCAuKSAlPiUgYXNfdGliYmxlICU+JSBzZWxlY3QoYyhQQzEsIFBDMikpICU+JSBiaW5kX2NvbHMocGh5bG9nZW55WywgMToyXSwgLikgJT4lIGdncGxvdChhZXMoeD1QQzEsIHk9UEMyLCBjb2xvcj1GYW1pbHksIGxhYmVsPUxhbmd1YWdlKSkgKyBnZW9tX3BvaW50KCkgKyBnZW9tX3RleHRfcmVwZWwoKQpgYGAKCmBgYHtyfQpoY2x1c3QocGh5LmRpc3QpICU+JSBwbG90KGxhYmVscz1wYXN0ZShwaHlsb2dlbnkkTGFuZ3VhZ2UsIHN1YnN0cihwaHlsb2dlbnkkRmFtaWx5LCAxLDEpKSkKYGBgCgpgYGB7cn0KaGNsdXN0KHBoeS5kaXN0KSAlPiUgY3V0cmVlKDIpCmBgYAoKYGBge3J9CmJpbmRfY29scyhwaHlsb2dlbnlbLCAxOjJdLCBhc190aWJibGUoY21kc2NhbGUocGh5LmRpc3QpKSwgY2x1c3Rlcj1oY2x1c3QocGh5LmRpc3QpICU+JSBjdXRyZWUoNCkgJT4lIGFzLmZhY3RvcikgJT4lIGdncGxvdChhZXMoeD1WMSwgeT1WMikpICsgZ2VvbV9wb2ludChhZXMoY29sb3I9Y2x1c3Rlciwgc2hhcGU9RmFtaWx5KSkgKyBnZW9tX3RleHRfcmVwZWwoYWVzKGxhYmVsPUxhbmd1YWdlKSkKYGBgCgpgYGB7cn0KbGlicmFyeSgicHZjbHVzdCIpCmBgYAoKYGBge3J9CnBoeWxvZ2VueSAlPiUgc2VsZWN0KC1jKExhbmd1YWdlLCBGYW1pbHkpKSAlPiUgcmVwbGFjZShpcy5uYSguKSwgMCkgJT4lIHQgJT4lIGFzLnRpYmJsZSgpCmBgYApgYGB7cn0KcHZjbHVzdChwaHlsb2dlbnkgJT4lIHNlbGVjdCgtYyhMYW5ndWFnZSwgRmFtaWx5KSkgJT4lIHJlcGxhY2UoaXMubmEoLiksIDApICU+JSB0ICU+JSBhcy50aWJibGUoKSwgbWV0aG9kLmhjbHVzdD0ic2luZ2xlIiwgbWV0aG9kLmRpc3Q9ImJpbmFyeSIpIC0+IHBoeS5wdmNsCmBgYAoKYGBge3J9CnBoeS5wdmNsICU+JSBwbG90KGxhYmVscz1wYXN0ZShwaHlsb2dlbnkkTGFuZ3VhZ2UsIHN1YnN0cihwaHlsb2dlbnkkRmFtaWx5LCAxLDEpKSkKcGh5LnB2Y2wgJT4lIHB2cmVjdCgpCmBgYAo=