Question 2
a.
manova_result <- manova(cbind(GOL, BBH, XCB, ZYB, AUB, NPH) ~ Sex, data = Howells)
summary (manova_result)
## Df Pillai approx F num Df den Df Pr(>F)
## Sex 1 0.45667 352.59 6 2517 < 2.2e-16 ***
## Residuals 2522
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The p-value is less than 0.01%
b.
manova_population <- manova(cbind(GOL, BBH, XCB, ZYB, AUB, NPH) ~ Population, data = Howells)
summary(manova_population)
## Df Pillai approx F num Df den Df Pr(>F)
## Population 29 2.1104 46.66 174 14964 < 2.2e-16 ***
## Residuals 2494
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The p-value is less than 0.01%
Question 3
a.
sapply(Howells[, c("GOL", "BBH", "XCB", "ZYB", "AUB", "NPH")], range)
## GOL BBH XCB ZYB AUB NPH
## [1,] 151 107 116 105 98 48
## [2,] 206 155 167 158 149 82
# I am going to scale the data because the measurements are different orders of magnitude.
Howells_pca <- prcomp(Howells[, c("GOL", "BBH", "XCB", "ZYB", "AUB", "NPH")], scale = TRUE)
summary(Howells_pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 1.9149 0.9548 0.6980 0.69234 0.61105 0.28527
## Proportion of Variance 0.6112 0.1520 0.0812 0.07989 0.06223 0.01356
## Cumulative Proportion 0.6112 0.7631 0.8443 0.92421 0.98644 1.00000
Howells_pca$rotation
## PC1 PC2 PC3 PC4 PC5 PC6
## GOL -0.3465310 -0.587149895 -0.36244649 -0.5594148 0.28636548 0.09409394
## BBH -0.3607789 -0.487682042 0.72635325 0.3111983 0.07825679 0.03804233
## XCB -0.3727482 0.563268444 0.23823164 -0.2344307 0.61955648 -0.21960210
## ZYB -0.4740149 0.050671386 -0.04447027 -0.1401316 -0.59165211 -0.63330532
## AUB -0.4708969 0.312355070 0.02343741 -0.1109481 -0.35787016 0.73468400
## NPH -0.4054375 -0.004075318 -0.53081283 0.7094410 0.22337676 -0.02525498
b
Howells_pca <- prcomp(Howells[, c("GOL", "BBH", "XCB", "ZYB", "AUB", "NPH")], scale = TRUE)
plot(Howells_pca$x[, 1], Howells_pca$x[, 2], type = "n", xlab = "PC1", ylab = "PC2", main = "PCA of Cranial Measurements by Sex")
points(Howells_pca$x[Howells$Sex == "M", 1], Howells_pca$x[Howells$Sex == "M", 2], col = "lightgreen", pch = 16)
points(Howells_pca$x[Howells$Sex == "F", 1], Howells_pca$x[Howells$Sex == "F", 2], col = "lightblue", pch = 16)
legend("topright", legend = c("Male", "Female"), col = c("lightgreen", "lightblue"), pch = 16)
centroid_male <- colMeans(Howells_pca$x[Howells$Sex == "M", 1:2])
centroid_female <- colMeans(Howells_pca$x[Howells$Sex == "F", 1:2])
points(centroid_male[1], centroid_male[2], pch = 8, col = "black", cex = 2)
points(centroid_female[1], centroid_female[2], pch = 8, col = "purple4", cex = 2)

c.
eigenvalues <- Howells_pca$sdev ^ 2
barplot(eigenvalues, ylab = "Variance", xlab = "Principal Component",
names.arg = paste0("PC", 1:length(eigenvalues)), main = "Scree Plot of Eigenvalues")

percent_variance <- eigenvalues / sum(eigenvalues) * 100
percent_variance[1:2]
## [1] 61.11668 15.19518
# PC1 explains 61.12% of the variance and PC2 explains 15.20%, so they account for 76.32% of the total variance together.
d.
Howells_pca$rotation
## PC1 PC2 PC3 PC4 PC5 PC6
## GOL -0.3465310 -0.587149895 -0.36244649 -0.5594148 0.28636548 0.09409394
## BBH -0.3607789 -0.487682042 0.72635325 0.3111983 0.07825679 0.03804233
## XCB -0.3727482 0.563268444 0.23823164 -0.2344307 0.61955648 -0.21960210
## ZYB -0.4740149 0.050671386 -0.04447027 -0.1401316 -0.59165211 -0.63330532
## AUB -0.4708969 0.312355070 0.02343741 -0.1109481 -0.35787016 0.73468400
## NPH -0.4054375 -0.004075318 -0.53081283 0.7094410 0.22337676 -0.02525498
# For PC1, biauricular breadth and bizygomatic breadth have the largest negative loadings, which makes them the most significant contributors. Since all variables have negative loadings, PC1 probably represents an overall trend across glabello-occipital length, basion-bregma height, maximum cranial breadth, bizygomatic breadth, biauricular breadth, and nasion-prosthion height. Higher PC1 scores are associated with lower values across these measurements.