Boddy <- read.csv("C:/Users/kelse/OneDrive/Documents/Research Design Analysis/Files to Import/Boddy et al 2012.csv", header=TRUE)
Miller <- read.csv("C:/Users/kelse/OneDrive/Documents/Research Design Analysis/Files to Import/Miller et al 2014.csv", header=TRUE)

Question 1

a.

nrow (Miller)
## [1] 107
ncol (Miller) 
## [1] 15
head (Miller)
##                       taxon            com.name Live Bone    major.gp  mass.kg
## 1          Acinonyx jubatus             Cheetah    Y    Y     Felidae  43.4000
## 2                Acomys sp.         Spiny Mouse    Y    N     Muridae   0.0298
## 3        Aepyceros melampus              Impala    Y    Y  Antilopini  52.5001
## 4          Aethomys kaiseri            Bush Rat    Y    N     Muridae   0.0899
## 5     Alcelaphus buselaphus             Kongoni    Y    Y Alcelaphini 171.0015
## 6 Arvicanthis cf. niloticus Unstriped Grass Rat    N    Y     Muridae   0.1069
##   diet.1 diet.2 diet.2.sum shel.hab shel.hab.sum   feed.hab feed.hab.sum
## 1      A   meat       meat   grl.bh          grl        grl          grl
## 2      P     gr         gr   cav.ag          cav grl.wdl.bh      grl_wdl
## 3      P  br.gr         br      wdl          wdl    wdl.grl      grl_wdl
## 4      P  gr.br         gr   cav.ag          cav grl.wdl.bh      grl_wdl
## 5      P     gr         gr      grl          grl        grl          grl
## 6      P  fr.gr      fruit      grl          grl        grl          grl
##   h20.dep act.time
## 1       M        D
## 2       L      C.N
## 3       H        D
## 4       M        N
## 5       H        D
## 6       M        D
colnames(Miller)
##  [1] "taxon"        "com.name"     "Live"         "Bone"         "major.gp"    
##  [6] "mass.kg"      "diet.1"       "diet.2"       "diet.2.sum"   "shel.hab"    
## [11] "shel.hab.sum" "feed.hab"     "feed.hab.sum" "h20.dep"      "act.time"
sapply(Miller, function(x) length(unique(x)))
##        taxon     com.name         Live         Bone     major.gp      mass.kg 
##          107          103            3            2           47          102 
##       diet.1       diet.2   diet.2.sum     shel.hab shel.hab.sum     feed.hab 
##            3           12            8           20            7           20 
## feed.hab.sum      h20.dep     act.time 
##            7            3           10
# The rows represent different species. 

b.

hist(Miller$mass.kg, main = "Frequency Distribution of Body Mass", 
     xlab = "Body Mass (kg)", col = "lightpink2", breaks = 20)

mean_mass <- mean(Miller$mass.kg, na.rm = TRUE)
median_mass <- median(Miller$mass.kg, na.rm = TRUE)
sd_mass <- sd(Miller$mass.kg, na.rm = TRUE)
iqr_mass <- IQR(Miller$mass.kg, na.rm = TRUE)

mean_mass
## [1] 102.945
median_mass
## [1] 3.155
sd_mass
## [1] 428.8769
iqr_mass
## [1] 38.93065
log_mass <- log(Miller$mass.kg)
hist(log_mass, main = "Log-transformed Body Mass", 
     xlab = "Log Body Mass (kg)", col = "lightpink2", breaks = 20)

Question 2

a.

# It's super clear that this needs to be log transformed first. 

Miller$log_mass <- log(Miller$mass.kg + 0.01)

boxplot(Miller$log_mass ~ Miller$diet.1, 
        main = "Log-Transformed Body Mass by Diet Category",
        xlab = "Diet Category", 
        ylab = "Log Body Mass (kg)", 
        col = c("lightblue", "lightgreen", "lightpink"))

b.

anova_log_mass <- aov(mass.kg ~ diet.1, data = Miller)
summary(anova_log_mass)
##              Df   Sum Sq Mean Sq F value Pr(>F)  
## diet.1        2   932153  466076   2.611 0.0783 .
## Residuals   104 18564998  178510                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Question 3

a.

diet_habitat <- table(Miller$diet.1, Miller$feed.hab.sum)
diet_habitat
##    
##     aer arb bh grl grl_bh grl_wdl wdl
##   A   9   1  3   5      3      11   6
##   O   0   2  1   0      1      11   2
##   P   0   1  6  18      6      11  10
chisq_diet <- chisq.test(diet_habitat)
## Warning in chisq.test(diet_habitat): Chi-squared approximation may be incorrect
chisq_diet
## 
##  Pearson's Chi-squared test
## 
## data:  diet_habitat
## X-squared = 38.454, df = 12, p-value = 0.0001294

Question 4

a.

Miller$Bone <- as.factor(Miller$Bone)
Miller$log_mass <- log10(Miller$mass.kg + 0.01)
bone_model <- glm(Bone ~ log_mass, data = Miller, family = binomial)
summary(bone_model)
## 
## Call:
## glm(formula = Bone ~ log_mass, family = binomial, data = Miller)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.3512     0.2211  -1.588    0.112    
## log_mass      0.6405     0.1576   4.065 4.81e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 147.88  on 106  degrees of freedom
## Residual deviance: 128.29  on 105  degrees of freedom
## AIC: 132.29
## 
## Number of Fisher Scoring iterations: 4

b.

# The slope coefficient is 0.6405, and the p-value is statistically significant. The slope is positive (that is expected). This shows that body mass does impact whether a species ends up in the bone record. When body mass increases by 10x, the chance of being found in the bone record increases by almost 2 times (1.9 specficially). So basically, bigger animals are more likely to be in the bone record, which supports smaller animals not appearing as often. 

Question 5

a.

Boddy$log_body_mass <- log10(Boddy$body_mass_g + 0.01)
Boddy$log_brain_mass <- log10(Boddy$brain_mass_g + 0.01)

primates <- Boddy[Boddy$Order == "Primates", ]
non_primates <- Boddy[Boddy$Order != "Primates", ]

plot(non_primates$log_body_mass, non_primates$log_brain_mass, 
     xlab = "Log Body Mass (g)", ylab = "Log Brain Mass (g)", 
     main = "Brain Mass vs Body Mass",
     col = "lightblue", pch = 16)
     points(primates$log_body_mass, primates$log_brain_mass, col = "lavender", pch = 16)
     legend("bottomright", legend = c("Non-Primates", "Primates"), 
       col = c("lightblue", "lavender"), pch = 16)

b.

Boddy$primate_status <- factor(Boddy$Order == "Primates", labels = c("Non-Primate", "Primate"))
Boddy_model <- lm(log_brain_mass ~ log_body_mass * primate_status, data = Boddy)
summary(Boddy_model)
## 
## Call:
## lm(formula = log_brain_mass ~ log_body_mass * primate_status, 
##     data = Boddy)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.68671 -0.10688  0.00683  0.09282  0.75269 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         -1.234158   0.015890 -77.670   <2e-16 ***
## log_body_mass                        0.721976   0.005345 135.083   <2e-16 ***
## primate_statusPrimate                0.220841   0.105626   2.091   0.0369 *  
## log_body_mass:primate_statusPrimate  0.055001   0.030346   1.812   0.0704 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1743 on 626 degrees of freedom
## Multiple R-squared:  0.9711, Adjusted R-squared:  0.971 
## F-statistic:  7023 on 3 and 626 DF,  p-value: < 2.2e-16
# For non-primates, the intercept is -1.23, and for primates, it’s -1.01. This means primates have a 22% higher intercept, which is statistically significant (p-value = 3.69%). Non-primates have a slope of 0.72, and primates have a slope of 0.78. However, this slope difference isn’t significant because the p-value is 7.04%. So while the intercept changes between primates and non-primates, the slope doesn’t change in any meaningful way.