Howells <- read.csv("C:/Users/kelse/OneDrive/Documents/Research Design Analysis/Files to Import/howells_hw.csv", header=TRUE)

Question 1

a.

nrow (Howells)
## [1] 2524

b.

table (Howells$Sex)
## 
##    F    M 
## 1156 1368

c. 

table (Howells$Population)
## 
##     AINU  ANDAMAN   ANYANG  ARIKARA   ATAYAL AUSTRALI     BERG   BURIAT 
##       86       70       42       69       47      101      109      109 
##  BUSHMAN    DOGON EASTER I    EGYPT   ESKIMO     GUAM   HAINAN   MOKAPU 
##       90       99       86      111      108       57       83      100 
##  MORIORI  N JAPAN  N MAORI    NORSE     PERU PHILLIPI  S JAPAN  S MAORI 
##      108       87       10      110      110       50       91       10 
## SANTA CR TASMANIA    TEITA    TOLAI  ZALAVAR     ZULU 
##      102       87       83      110       98      101

Question 2

a.

manova_result <- manova(cbind(GOL, BBH, XCB, ZYB, AUB, NPH) ~ Sex, data = Howells)
summary (manova_result)
##             Df  Pillai approx F num Df den Df    Pr(>F)    
## Sex          1 0.45667   352.59      6   2517 < 2.2e-16 ***
## Residuals 2522                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The p-value is less than 0.01%

b.

manova_population <- manova(cbind(GOL, BBH, XCB, ZYB, AUB, NPH) ~ Population, data = Howells)
summary(manova_population)
##              Df Pillai approx F num Df den Df    Pr(>F)    
## Population   29 2.1104    46.66    174  14964 < 2.2e-16 ***
## Residuals  2494                                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The p-value is less than 0.01%

Question 3

a.

sapply(Howells[, c("GOL", "BBH", "XCB", "ZYB", "AUB", "NPH")], range)
##      GOL BBH XCB ZYB AUB NPH
## [1,] 151 107 116 105  98  48
## [2,] 206 155 167 158 149  82
# I am going to scale the data because the measurements are different orders of magnitude. 

Howells_pca <- prcomp(Howells[, c("GOL", "BBH", "XCB", "ZYB", "AUB", "NPH")], scale = TRUE)

summary(Howells_pca)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     1.9149 0.9548 0.6980 0.69234 0.61105 0.28527
## Proportion of Variance 0.6112 0.1520 0.0812 0.07989 0.06223 0.01356
## Cumulative Proportion  0.6112 0.7631 0.8443 0.92421 0.98644 1.00000
Howells_pca$rotation
##            PC1          PC2         PC3        PC4         PC5         PC6
## GOL -0.3465310 -0.587149895 -0.36244649 -0.5594148  0.28636548  0.09409394
## BBH -0.3607789 -0.487682042  0.72635325  0.3111983  0.07825679  0.03804233
## XCB -0.3727482  0.563268444  0.23823164 -0.2344307  0.61955648 -0.21960210
## ZYB -0.4740149  0.050671386 -0.04447027 -0.1401316 -0.59165211 -0.63330532
## AUB -0.4708969  0.312355070  0.02343741 -0.1109481 -0.35787016  0.73468400
## NPH -0.4054375 -0.004075318 -0.53081283  0.7094410  0.22337676 -0.02525498

b

Howells_pca <- prcomp(Howells[, c("GOL", "BBH", "XCB", "ZYB", "AUB", "NPH")], scale = TRUE)

plot(Howells_pca$x[, 1], Howells_pca$x[, 2], type = "n", xlab = "PC1", ylab = "PC2", main = "PCA of Cranial Measurements by Sex")

points(Howells_pca$x[Howells$Sex == "M", 1], Howells_pca$x[Howells$Sex == "M", 2], col = "lightgreen", pch = 16)
points(Howells_pca$x[Howells$Sex == "F", 1], Howells_pca$x[Howells$Sex == "F", 2], col = "lightblue", pch = 16)
legend("topright", legend = c("Male", "Female"), col = c("lightgreen", "lightblue"), pch = 16)

centroid_male <- colMeans(Howells_pca$x[Howells$Sex == "M", 1:2])
centroid_female <- colMeans(Howells_pca$x[Howells$Sex == "F", 1:2])

points(centroid_male[1], centroid_male[2], pch = 8, col = "black", cex = 2)
points(centroid_female[1], centroid_female[2], pch = 8, col = "purple4", cex = 2)

c. 

eigenvalues <- Howells_pca$sdev ^ 2

barplot(eigenvalues, ylab = "Variance", xlab = "Principal Component", 
        names.arg = paste0("PC", 1:length(eigenvalues)), main = "Scree Plot of Eigenvalues")

percent_variance <- eigenvalues / sum(eigenvalues) * 100
percent_variance[1:2] 
## [1] 61.11668 15.19518
# PC1 explains 61.12% of the variance and PC2 explains 15.20%, so they account for 76.32% of the total variance together. 

d. 

Howells_pca$rotation
##            PC1          PC2         PC3        PC4         PC5         PC6
## GOL -0.3465310 -0.587149895 -0.36244649 -0.5594148  0.28636548  0.09409394
## BBH -0.3607789 -0.487682042  0.72635325  0.3111983  0.07825679  0.03804233
## XCB -0.3727482  0.563268444  0.23823164 -0.2344307  0.61955648 -0.21960210
## ZYB -0.4740149  0.050671386 -0.04447027 -0.1401316 -0.59165211 -0.63330532
## AUB -0.4708969  0.312355070  0.02343741 -0.1109481 -0.35787016  0.73468400
## NPH -0.4054375 -0.004075318 -0.53081283  0.7094410  0.22337676 -0.02525498
# For PC1, biauricular breadth and bizygomatic breadth have the largest negative loadings, which makes them the most significant contributors. Since all variables have negative loadings, PC1 probably represents an overall trend across glabello-occipital length, basion-bregma height, maximum cranial breadth, bizygomatic breadth, biauricular breadth, and nasion-prosthion height. Higher PC1 scores are associated with lower values across these measurements.