Boddy <- read.csv("C:/Users/kelse/OneDrive/Documents/Research Design Analysis/Files to Import/Boddy et al 2012.csv", header=TRUE)
Miller <- read.csv("C:/Users/kelse/OneDrive/Documents/Research Design Analysis/Files to Import/Miller et al 2014.csv", header=TRUE)
Question 1
a.
nrow (Miller)
## [1] 107
ncol (Miller)
## [1] 15
head (Miller)
## taxon com.name Live Bone major.gp mass.kg
## 1 Acinonyx jubatus Cheetah Y Y Felidae 43.4000
## 2 Acomys sp. Spiny Mouse Y N Muridae 0.0298
## 3 Aepyceros melampus Impala Y Y Antilopini 52.5001
## 4 Aethomys kaiseri Bush Rat Y N Muridae 0.0899
## 5 Alcelaphus buselaphus Kongoni Y Y Alcelaphini 171.0015
## 6 Arvicanthis cf. niloticus Unstriped Grass Rat N Y Muridae 0.1069
## diet.1 diet.2 diet.2.sum shel.hab shel.hab.sum feed.hab feed.hab.sum
## 1 A meat meat grl.bh grl grl grl
## 2 P gr gr cav.ag cav grl.wdl.bh grl_wdl
## 3 P br.gr br wdl wdl wdl.grl grl_wdl
## 4 P gr.br gr cav.ag cav grl.wdl.bh grl_wdl
## 5 P gr gr grl grl grl grl
## 6 P fr.gr fruit grl grl grl grl
## h20.dep act.time
## 1 M D
## 2 L C.N
## 3 H D
## 4 M N
## 5 H D
## 6 M D
colnames(Miller)
## [1] "taxon" "com.name" "Live" "Bone" "major.gp"
## [6] "mass.kg" "diet.1" "diet.2" "diet.2.sum" "shel.hab"
## [11] "shel.hab.sum" "feed.hab" "feed.hab.sum" "h20.dep" "act.time"
sapply(Miller, function(x) length(unique(x)))
## taxon com.name Live Bone major.gp mass.kg
## 107 103 3 2 47 102
## diet.1 diet.2 diet.2.sum shel.hab shel.hab.sum feed.hab
## 3 12 8 20 7 20
## feed.hab.sum h20.dep act.time
## 7 3 10
# The rows represent different species.
b.
hist(Miller$mass.kg, main = "Frequency Distribution of Body Mass",
xlab = "Body Mass (kg)", col = "lightpink2", breaks = 20)

mean_mass <- mean(Miller$mass.kg, na.rm = TRUE)
median_mass <- median(Miller$mass.kg, na.rm = TRUE)
sd_mass <- sd(Miller$mass.kg, na.rm = TRUE)
iqr_mass <- IQR(Miller$mass.kg, na.rm = TRUE)
mean_mass
## [1] 102.945
median_mass
## [1] 3.155
sd_mass
## [1] 428.8769
iqr_mass
## [1] 38.93065
log_mass <- log(Miller$mass.kg)
hist(log_mass, main = "Log-transformed Body Mass",
xlab = "Log Body Mass (kg)", col = "lightpink2", breaks = 20)

Question 2
a.
# It's super clear that this needs to be log transformed first.
Miller$log_mass <- log(Miller$mass.kg + 0.01)
boxplot(Miller$log_mass ~ Miller$diet.1,
main = "Log-Transformed Body Mass by Diet Category",
xlab = "Diet Category",
ylab = "Log Body Mass (kg)",
col = c("lightblue", "lightgreen", "lightpink"))

b.
anova_log_mass <- aov(mass.kg ~ diet.1, data = Miller)
summary(anova_log_mass)
## Df Sum Sq Mean Sq F value Pr(>F)
## diet.1 2 932153 466076 2.611 0.0783 .
## Residuals 104 18564998 178510
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Question 3
a.
diet_habitat <- table(Miller$diet.1, Miller$feed.hab.sum)
diet_habitat
##
## aer arb bh grl grl_bh grl_wdl wdl
## A 9 1 3 5 3 11 6
## O 0 2 1 0 1 11 2
## P 0 1 6 18 6 11 10
chisq_diet <- chisq.test(diet_habitat)
## Warning in chisq.test(diet_habitat): Chi-squared approximation may be incorrect
chisq_diet
##
## Pearson's Chi-squared test
##
## data: diet_habitat
## X-squared = 38.454, df = 12, p-value = 0.0001294
Question 4
a.
Miller$Bone <- as.factor(Miller$Bone)
Miller$log_mass <- log10(Miller$mass.kg + 0.01)
bone_model <- glm(Bone ~ log_mass, data = Miller, family = binomial)
summary(bone_model)
##
## Call:
## glm(formula = Bone ~ log_mass, family = binomial, data = Miller)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.3512 0.2211 -1.588 0.112
## log_mass 0.6405 0.1576 4.065 4.81e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 147.88 on 106 degrees of freedom
## Residual deviance: 128.29 on 105 degrees of freedom
## AIC: 132.29
##
## Number of Fisher Scoring iterations: 4
b.
# The slope coefficient is 0.6405, and the p-value is statistically significant. The slope is positive (that is expected). This shows that body mass does impact whether a species ends up in the bone record. When body mass increases by 10x, the chance of being found in the bone record increases by almost 2 times (1.9 specficially). So basically, bigger animals are more likely to be in the bone record, which supports smaller animals not appearing as often.
Question 5
a.
Boddy$log_body_mass <- log10(Boddy$body_mass_g + 0.01)
Boddy$log_brain_mass <- log10(Boddy$brain_mass_g + 0.01)
primates <- Boddy[Boddy$Order == "Primates", ]
non_primates <- Boddy[Boddy$Order != "Primates", ]
plot(non_primates$log_body_mass, non_primates$log_brain_mass,
xlab = "Log Body Mass (g)", ylab = "Log Brain Mass (g)",
main = "Brain Mass vs Body Mass",
col = "lightblue", pch = 16)
points(primates$log_body_mass, primates$log_brain_mass, col = "lavender", pch = 16)
legend("bottomright", legend = c("Non-Primates", "Primates"),
col = c("lightblue", "lavender"), pch = 16)

b.
Boddy$primate_status <- factor(Boddy$Order == "Primates", labels = c("Non-Primate", "Primate"))
Boddy_model <- lm(log_brain_mass ~ log_body_mass * primate_status, data = Boddy)
summary(Boddy_model)
##
## Call:
## lm(formula = log_brain_mass ~ log_body_mass * primate_status,
## data = Boddy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68671 -0.10688 0.00683 0.09282 0.75269
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.234158 0.015890 -77.670 <2e-16 ***
## log_body_mass 0.721976 0.005345 135.083 <2e-16 ***
## primate_statusPrimate 0.220841 0.105626 2.091 0.0369 *
## log_body_mass:primate_statusPrimate 0.055001 0.030346 1.812 0.0704 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1743 on 626 degrees of freedom
## Multiple R-squared: 0.9711, Adjusted R-squared: 0.971
## F-statistic: 7023 on 3 and 626 DF, p-value: < 2.2e-16
# For non-primates, the intercept is -1.23, and for primates, it’s -1.01. This means primates have a 22% higher intercept, which is statistically significant (p-value = 3.69%). Non-primates have a slope of 0.72, and primates have a slope of 0.78. However, this slope difference isn’t significant because the p-value is 7.04%. So while the intercept changes between primates and non-primates, the slope doesn’t change in any meaningful way.