Du <- read.csv("C:/Users/kelse/OneDrive/Documents/Research Design Analysis/Files to Import/Du_etal_2018.csv", header=TRUE)
Maxwell <- read.csv("C:/Users/kelse/OneDrive/Documents/Research Design Analysis/Files to Import/Maxwell_etal_2018.csv", header=TRUE)
Pearce <- read.csv("C:/Users/kelse/OneDrive/Documents/Research Design Analysis/Files to Import/Pearce_etal_2013.csv", header=TRUE)
Question 1
a.
species_num <- length (unique(Pearce$MSW3sp))
species_num
## [1] 100
# There are 100 species
num_of_measure <- table(Pearce$MSW3sp)
hist(num_of_measure, main = "Histogram of Measurements Per Species",
xlab = "Number of Measurements", ylab = "Frequency")

med_measure <- median(num_of_measure)
med_measure
## [1] 2
# Median num of measurements per species is 2
b.
plot(Pearce$mass, Pearce$home_range,
main="Home Range as a Function of Body Mass",
xlab="Mass (grams)",
ylab="Home Range (square km)")

Pearce$log_home_range <- log(Pearce$home_range)
Pearce$log_mass <- log(Pearce$mass)
plot(Pearce$log_mass, Pearce$log_home_range,
main="Log(Home Range) as a Function of Log(Mass)",
xlab="Log(Mass) (grams)",
ylab="Log(Home Range) (square km)")

c.
#install.packages("nlme", repos = "https://cloud.r-project.org")
library(nlme)
lmm <- lme(log_home_range ~ log_mass,
random = ~ 1 | MSW3sp,
data = Pearce)
summary(lmm)
## Linear mixed-effects model fit by REML
## Data: Pearce
## AIC BIC logLik
## 851.8781 866.474 -421.939
##
## Random effects:
## Formula: ~1 | MSW3sp
## (Intercept) Residual
## StdDev: 1.286372 0.7347218
##
## Fixed effects: log_home_range ~ log_mass
## Value Std.Error DF t-value p-value
## (Intercept) -2.3920396 0.7812137 185 -3.061953 0.0025
## log_mass 0.7668417 0.0938061 185 8.174753 0.0000
## Correlation:
## (Intr)
## log_mass -0.984
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -4.1918108 -0.3499751 0.0178977 0.4572035 2.7159028
##
## Number of Observations: 286
## Number of Groups: 100
# The fixed effects estimates:
# intercept = -2.39
# p-value = 0.0025 (statistically significant)
# log mass:
# slope 0.7668417
# p-value < 0.0001 (statistically significant)
# Standard deviation:
# Intercept: 1.286372
# Residual: 0.7347218
# This shows that body mass significantly influences home range size. The random effects show a standard deviation of 1.29 for the intercept across species, indicating moderate variability in baseline home range sizes across species.
Question 2
a.
species_num_du <- length(unique(Du$species))
species_num_du
## [1] 10
# There are 10 species
ecv_cc_per_species <- table(Du$species)
ecv_cc_per_species
##
## Au. afarensis Au. africanus Au. garhi Au. sediba
## 9 28 1 1
## H. erectus H. habilis H. heidelbergensis P. aethiopicus
## 94 22 7 3
## P. boisei P. robustus
## 17 9
ecv_cc_per_individual_id <- table(Du$individual_id)
med_ecv_cc_per_individual_id <- median(ecv_cc_per_individual_id)
med_ecv_cc_per_individual_id
## [1] 2
b.
hist(Du$ecv_cc,
main = "Histogram of ECV Across All Species and Individuals",
xlab = "Endocranial Volume (ECV, cc)",
ylab = "Frequency",
col = "orange",
border = "black")

mean_ecv_cc <- mean(Du$ecv_cc, na.rm = TRUE)
median_ecv_cc <- median(Du$ecv_cc, na.rm = TRUE)
sd_ecv_cc <- sd(Du$ecv_cc, na.rm = TRUE)
range_ecv_cc <- range(Du$ecv_cc, na.rm = TRUE)
iqr_ecv_cc <- IQR(Du$ecv_cc, na.rm = TRUE)
mean_ecv_cc
## [1] 757.988
median_ecv_cc
## [1] 727
sd_ecv_cc
## [1] 272.0981
range_ecv_cc
## [1] 343 1390
iqr_ecv_cc
## [1] 497.5
# I don't really think the skew is extreme enough to need a log transformation.
c.
#install.packages("lme4", repos = "https://cloud.r-project.org")
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'lme4'
## The following object is masked from 'package:nlme':
##
## lmList
lmm.res <- lmer(ecv_cc ~ 1 + (1 | species) + (1 | species:individual_id), data = Du)
lmm.var <- as.data.frame(VarCorr(lmm.res))$vcov
total_variance <- sum(lmm.var)
prop_variances <- lmm.var / total_variance
prop_variances
## [1] 0.23578673 0.74080053 0.02341274
Question 3
a.
plot(Maxwell$age_Ma, Maxwell$hom_divers,
type = "o",
col = "purple",
xlab = "Age (Millions of Years Ago)",
ylab = "Number of Hominin Species / Collections",
main = "Hominin Diversity and Collections Over Time")
lines(Maxwell$age_Ma, Maxwell$hom_collect,
type = "o",
col = "orange")

b.
first_diff_divers <- diff(Maxwell$hom_divers)
first_diff_collect <- diff(Maxwell$hom_collect)
plot(first_diff_divers, type = "l")

plot(first_diff_collect, type = "l")

residual_divers <- lm(Maxwell$hom_divers ~ Maxwell$age_Ma)$residuals
residual_collect <- lm(Maxwell$hom_collect ~ Maxwell$age_Ma)$residuals
plot(residual_divers, type = "l")

plot(residual_collect, type = "l")

acf(first_diff_divers)

acf(first_diff_collect)

acf(residual_divers)

acf(residual_collect)

# For both hom divers and hominin-bearing collections the autocorrelation drops pretty significantly after lag 1, suggesting that there isn't any significant correlation. Also everything stays within the confidence interval lines.
c.
diff_divers <- diff(Maxwell$hom_divers)
diff_collect <- diff(Maxwell$hom_collect)
plot(diff_collect, diff_divers)

ccf(diff_collect, diff_divers)

# There is a significant relationship at lags 0 and 5 where the data points cross the blue dotted line which represents the confidence intervals. Honestly I am a little confused about the negative lags still.
d.
model <- lm(Maxwell$hom_divers ~ Maxwell$hom_collect)
residuals <- model$residuals
acf(residuals)

# There is no autocorrelation in the residuals. You said to skip this part "Now, fit a generalized least squares model. What are the coefficient estimates, and are they significant?"
e.
model <- lm(Maxwell$hom_divers ~ Maxwell$hom_collect)
residuals <- model$residuals
plot(Maxwell$age_Ma, residuals, type = "l")
