Du <- read.csv("C:/Users/kelse/OneDrive/Documents/Research Design Analysis/Files to Import/Du_etal_2018.csv", header=TRUE)
Maxwell <- read.csv("C:/Users/kelse/OneDrive/Documents/Research Design Analysis/Files to Import/Maxwell_etal_2018.csv", header=TRUE)
Pearce <- read.csv("C:/Users/kelse/OneDrive/Documents/Research Design Analysis/Files to Import/Pearce_etal_2013.csv", header=TRUE)

Question 1

a.

species_num <- length (unique(Pearce$MSW3sp))
species_num
## [1] 100
# There are 100 species 

num_of_measure <- table(Pearce$MSW3sp)

hist(num_of_measure, main = "Histogram of Measurements Per Species",
     xlab = "Number of Measurements", ylab = "Frequency")

med_measure <- median(num_of_measure)
med_measure
## [1] 2
# Median num of measurements per species is 2

b.

plot(Pearce$mass, Pearce$home_range, 
     main="Home Range as a Function of Body Mass", 
     xlab="Mass (grams)", 
     ylab="Home Range (square km)")

Pearce$log_home_range <- log(Pearce$home_range)
Pearce$log_mass <- log(Pearce$mass)

plot(Pearce$log_mass, Pearce$log_home_range,
     main="Log(Home Range) as a Function of Log(Mass)",
     xlab="Log(Mass) (grams)", 
     ylab="Log(Home Range) (square km)")

c. 

#install.packages("nlme", repos = "https://cloud.r-project.org")
library(nlme)

lmm <- lme(log_home_range ~ log_mass,         
           random = ~ 1 | MSW3sp,   
           data = Pearce)
summary(lmm)
## Linear mixed-effects model fit by REML
##   Data: Pearce 
##        AIC     BIC   logLik
##   851.8781 866.474 -421.939
## 
## Random effects:
##  Formula: ~1 | MSW3sp
##         (Intercept)  Residual
## StdDev:    1.286372 0.7347218
## 
## Fixed effects:  log_home_range ~ log_mass 
##                  Value Std.Error  DF   t-value p-value
## (Intercept) -2.3920396 0.7812137 185 -3.061953  0.0025
## log_mass     0.7668417 0.0938061 185  8.174753  0.0000
##  Correlation: 
##          (Intr)
## log_mass -0.984
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -4.1918108 -0.3499751  0.0178977  0.4572035  2.7159028 
## 
## Number of Observations: 286
## Number of Groups: 100
# The fixed effects estimates: 
# intercept = -2.39 
# p-value = 0.0025 (statistically significant)
# log mass: 
# slope 0.7668417
# p-value < 0.0001 (statistically significant)

# Standard deviation: 
# Intercept: 1.286372
# Residual: 0.7347218

# This shows that body mass significantly influences home range size. The random effects show a standard deviation of 1.29 for the intercept across species, indicating moderate variability in baseline home range sizes across species.

Question 2

a.

species_num_du <- length(unique(Du$species))
species_num_du
## [1] 10
# There are 10 species 

ecv_cc_per_species <- table(Du$species)
ecv_cc_per_species
## 
##      Au. afarensis      Au. africanus          Au. garhi         Au. sediba 
##                  9                 28                  1                  1 
##         H. erectus         H. habilis H. heidelbergensis     P. aethiopicus 
##                 94                 22                  7                  3 
##          P. boisei        P. robustus 
##                 17                  9
ecv_cc_per_individual_id <- table(Du$individual_id)

med_ecv_cc_per_individual_id <- median(ecv_cc_per_individual_id)
med_ecv_cc_per_individual_id 
## [1] 2

b.

hist(Du$ecv_cc, 
     main = "Histogram of ECV Across All Species and Individuals", 
     xlab = "Endocranial Volume (ECV, cc)", 
     ylab = "Frequency",
     col = "orange",
     border = "black")

mean_ecv_cc <- mean(Du$ecv_cc, na.rm = TRUE)  
median_ecv_cc <- median(Du$ecv_cc, na.rm = TRUE)
sd_ecv_cc <- sd(Du$ecv_cc, na.rm = TRUE)  
range_ecv_cc <- range(Du$ecv_cc, na.rm = TRUE)  
iqr_ecv_cc <- IQR(Du$ecv_cc, na.rm = TRUE) 

mean_ecv_cc
## [1] 757.988
median_ecv_cc
## [1] 727
sd_ecv_cc
## [1] 272.0981
range_ecv_cc
## [1]  343 1390
iqr_ecv_cc
## [1] 497.5
# I don't really think the skew is extreme enough to need a log transformation. 

c. 

#install.packages("lme4", repos = "https://cloud.r-project.org")
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'lme4'
## The following object is masked from 'package:nlme':
## 
##     lmList
lmm.res <- lmer(ecv_cc ~ 1 + (1 | species) + (1 | species:individual_id), data = Du)
lmm.var <- as.data.frame(VarCorr(lmm.res))$vcov

total_variance <- sum(lmm.var)

prop_variances <- lmm.var / total_variance
prop_variances
## [1] 0.23578673 0.74080053 0.02341274

Question 3

a.

plot(Maxwell$age_Ma, Maxwell$hom_divers, 
     type = "o",                 
     col = "purple",            
     xlab = "Age (Millions of Years Ago)", 
     ylab = "Number of Hominin Species / Collections",
     main = "Hominin Diversity and Collections Over Time")

lines(Maxwell$age_Ma, Maxwell$hom_collect, 
      type = "o",               
      col = "orange")

b.

first_diff_divers <- diff(Maxwell$hom_divers)
first_diff_collect <- diff(Maxwell$hom_collect)

plot(first_diff_divers, type = "l")

plot(first_diff_collect, type = "l")

residual_divers <- lm(Maxwell$hom_divers ~ Maxwell$age_Ma)$residuals
residual_collect <- lm(Maxwell$hom_collect ~ Maxwell$age_Ma)$residuals

plot(residual_divers, type = "l")

plot(residual_collect, type = "l")

acf(first_diff_divers)

acf(first_diff_collect)

acf(residual_divers)

acf(residual_collect)

# For both hom divers and hominin-bearing collections the autocorrelation drops pretty significantly after lag 1, suggesting that there isn't any significant correlation. Also everything stays within the confidence interval lines. 

c. 

diff_divers <- diff(Maxwell$hom_divers)
diff_collect <- diff(Maxwell$hom_collect)

plot(diff_collect, diff_divers)

ccf(diff_collect, diff_divers)

# There is a significant relationship at lags 0 and 5 where the data points cross the blue dotted line which represents the confidence intervals. Honestly I am a little confused about the negative lags still. 

d. 

model <- lm(Maxwell$hom_divers ~ Maxwell$hom_collect)
residuals <- model$residuals
acf(residuals)

# There is no autocorrelation in the residuals. You said to skip this part "Now, fit a generalized least squares model. What are the coefficient estimates, and are they significant?" 

e.

model <- lm(Maxwell$hom_divers ~ Maxwell$hom_collect)
residuals <- model$residuals
plot(Maxwell$age_Ma, residuals, type = "l")