library(tidyverse)
package ‘tidyverse’ was built under R version 3.6.2Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ────────────────── tidyverse 1.3.1 ──
✓ ggplot2 3.3.3 ✓ purrr 0.3.4
✓ tibble 3.1.0 ✓ dplyr 1.0.5
✓ tidyr 1.1.3 ✓ stringr 1.4.0
✓ readr 1.4.0 ✓ forcats 0.5.1
package ‘ggplot2’ was built under R version 3.6.2── Conflicts ───────────────────── tidyverse_conflicts() ──
x tidyr::expand() masks Matrix::expand()
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
x tidyr::pack() masks Matrix::pack()
x tidyr::unpack() masks Matrix::unpack()
phylogeny %>% select(-c(Language, Family)) %>% dist(method='binary') -> phy.dist
bind_cols(phylogeny[, 1:2], as_tibble(cmdscale(phy.dist))) %>% ggplot(aes(x=V1, y=V2)) + geom_point(aes(color=Family)) + geom_text_repel(aes(label=Language))

phylogeny %>% select(-c(Language, Family)) %>% replace(is.na(.), 0) %>% prcomp() -> phyl.pca
summary(phyl.pca)
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6
Standard deviation 1.624 1.4084 1.35083 1.19633 1.18646 1.08662
Proportion of Variance 0.115 0.0865 0.07957 0.06241 0.06139 0.05149
Cumulative Proportion 0.115 0.2015 0.28108 0.34349 0.40488 0.45637
PC7 PC8 PC9 PC10 PC11 PC12
Standard deviation 1.05440 1.04160 0.99075 0.95372 0.89394 0.83752
Proportion of Variance 0.04848 0.04731 0.04281 0.03967 0.03485 0.03059
Cumulative Proportion 0.50485 0.55217 0.59497 0.63464 0.66949 0.70008
PC13 PC14 PC15 PC16 PC17 PC18
Standard deviation 0.83369 0.80408 0.77162 0.75212 0.72479 0.69319
Proportion of Variance 0.03031 0.02819 0.02596 0.02467 0.02291 0.02095
Cumulative Proportion 0.73038 0.75858 0.78454 0.80921 0.83212 0.85308
PC19 PC20 PC21 PC22 PC23 PC24
Standard deviation 0.66735 0.65395 0.60960 0.57704 0.54221 0.54160
Proportion of Variance 0.01942 0.01865 0.01621 0.01452 0.01282 0.01279
Cumulative Proportion 0.87250 0.89115 0.90735 0.92187 0.93469 0.94749
PC25 PC26 PC27 PC28 PC29 PC30
Standard deviation 0.52141 0.48892 0.47480 0.44282 0.38471 0.3518
Proportion of Variance 0.01186 0.01042 0.00983 0.00855 0.00645 0.0054
Cumulative Proportion 0.95934 0.96977 0.97960 0.98815 0.99460 1.0000
PC31
Standard deviation 7.212e-16
Proportion of Variance 0.000e+00
Cumulative Proportion 1.000e+00
phylogeny %>% select(-c(Language, Family)) %>% replace(is.na(.), 0) %>% predict(phyl.pca, .) %>% as_tibble %>% select(c(PC1, PC2)) %>% bind_cols(phylogeny[, 1:2], .) %>% ggplot(aes(x=PC1, y=PC2, color=Family, label=Language)) + geom_point() + geom_text_repel()
hclust(phy.dist) %>% plot(labels=paste(phylogeny$Language, substr(phylogeny$Family, 1,1)))
hclust(phy.dist) %>% cutree(2)
bind_cols(phylogeny[, 1:2], as_tibble(cmdscale(phy.dist)), cluster=hclust(phy.dist) %>% cutree(4) %>% as.factor) %>% ggplot(aes(x=V1, y=V2)) + geom_point(aes(color=cluster, shape=Family)) + geom_text_repel(aes(label=Language))
library("pvclust")
phylogeny %>% select(-c(Language, Family)) %>% replace(is.na(.), 0) %>% t %>% as.tibble()
pvclust(phylogeny %>% select(-c(Language, Family)) %>% replace(is.na(.), 0) %>% t %>% as.tibble(), method.hclust="single", method.dist="binary") -> phy.pvcl
Bootstrap (r = 0.5)... Done.
Bootstrap (r = 0.6)... Done.
Bootstrap (r = 0.7)... Done.
Bootstrap (r = 0.8)... Done.
Bootstrap (r = 0.9)... Done.
Bootstrap (r = 1.0)... Done.
Bootstrap (r = 1.1)... Done.
Bootstrap (r = 1.2)... Done.
Bootstrap (r = 1.3)... Done.
Bootstrap (r = 1.4)... Done.
phy.pvcl %>% plot(labels=paste(phylogeny$Language, substr(phylogeny$Family, 1,1)))
phy.pvcl %>% pvrect()

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkobGFuZ3VhZ2VSKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGdncmVwZWwpCmBgYAoKYGBge3J9CnBoeWxvZ2VueSAlPiUgc2VsZWN0KC1jKExhbmd1YWdlLCBGYW1pbHkpKSAlPiUgZGlzdChtZXRob2Q9J2JpbmFyeScpIC0+IHBoeS5kaXN0CmBgYApgYGB7cn0KcGh5bG9nZW55WywgMToyXQpgYGAKYGBge3J9CmJpbmRfY29scyhwaHlsb2dlbnlbLCAxOjJdLCBhc190aWJibGUoY21kc2NhbGUocGh5LmRpc3QpKSkgJT4lIGdncGxvdChhZXMoeD1WMSwgeT1WMikpICsgZ2VvbV9wb2ludChhZXMoY29sb3I9RmFtaWx5KSkgKyBnZW9tX3RleHRfcmVwZWwoYWVzKGxhYmVsPUxhbmd1YWdlKSkKYGBgCgpgYGB7cn0KcGh5bG9nZW55ICU+JSBzZWxlY3QoLWMoTGFuZ3VhZ2UsIEZhbWlseSkpICU+JSByZXBsYWNlKGlzLm5hKC4pLCAwKSAlPiUgcHJjb21wKCkgLT4gcGh5bC5wY2EKYGBgCgpgYGB7cn0Kc3VtbWFyeShwaHlsLnBjYSkKYGBgCmBgYHtyfQpwaHlsb2dlbnkgJT4lIHNlbGVjdCgtYyhMYW5ndWFnZSwgRmFtaWx5KSkgJT4lIHJlcGxhY2UoaXMubmEoLiksIDApICU+JSBwcmVkaWN0KHBoeWwucGNhLCAuKSAlPiUgYXNfdGliYmxlICU+JSBzZWxlY3QoYyhQQzEsIFBDMikpICU+JSBiaW5kX2NvbHMocGh5bG9nZW55WywgMToyXSwgLikgJT4lIGdncGxvdChhZXMoeD1QQzEsIHk9UEMyLCBjb2xvcj1GYW1pbHksIGxhYmVsPUxhbmd1YWdlKSkgKyBnZW9tX3BvaW50KCkgKyBnZW9tX3RleHRfcmVwZWwoKQpgYGAKCmBgYHtyfQpoY2x1c3QocGh5LmRpc3QpICU+JSBwbG90KGxhYmVscz1wYXN0ZShwaHlsb2dlbnkkTGFuZ3VhZ2UsIHN1YnN0cihwaHlsb2dlbnkkRmFtaWx5LCAxLDEpKSkKYGBgCgpgYGB7cn0KaGNsdXN0KHBoeS5kaXN0KSAlPiUgY3V0cmVlKDIpCmBgYAoKYGBge3J9CmJpbmRfY29scyhwaHlsb2dlbnlbLCAxOjJdLCBhc190aWJibGUoY21kc2NhbGUocGh5LmRpc3QpKSwgY2x1c3Rlcj1oY2x1c3QocGh5LmRpc3QpICU+JSBjdXRyZWUoNCkgJT4lIGFzLmZhY3RvcikgJT4lIGdncGxvdChhZXMoeD1WMSwgeT1WMikpICsgZ2VvbV9wb2ludChhZXMoY29sb3I9Y2x1c3Rlciwgc2hhcGU9RmFtaWx5KSkgKyBnZW9tX3RleHRfcmVwZWwoYWVzKGxhYmVsPUxhbmd1YWdlKSkKYGBgCgpgYGB7cn0KbGlicmFyeSgicHZjbHVzdCIpCmBgYAoKYGBge3J9CnBoeWxvZ2VueSAlPiUgc2VsZWN0KC1jKExhbmd1YWdlLCBGYW1pbHkpKSAlPiUgcmVwbGFjZShpcy5uYSguKSwgMCkgJT4lIHQgJT4lIGFzLnRpYmJsZSgpCmBgYApgYGB7cn0KcHZjbHVzdChwaHlsb2dlbnkgJT4lIHNlbGVjdCgtYyhMYW5ndWFnZSwgRmFtaWx5KSkgJT4lIHJlcGxhY2UoaXMubmEoLiksIDApICU+JSB0ICU+JSBhcy50aWJibGUoKSwgbWV0aG9kLmhjbHVzdD0ic2luZ2xlIiwgbWV0aG9kLmRpc3Q9ImJpbmFyeSIpIC0+IHBoeS5wdmNsCmBgYAoKYGBge3J9CnBoeS5wdmNsICU+JSBwbG90KGxhYmVscz1wYXN0ZShwaHlsb2dlbnkkTGFuZ3VhZ2UsIHN1YnN0cihwaHlsb2dlbnkkRmFtaWx5LCAxLDEpKSkKcGh5LnB2Y2wgJT4lIHB2cmVjdCgpCmBgYAo=