library(readr)
foraging <- read_csv("Desktop/foraging.csv", 
    col_types = cols(`Plant Type` = col_factor(levels = c("1", 
        "3", "4")), `Comment` = col_skip(), 
        ForagingEvent = col_character(), 
        PlantType = col_factor(levels = c("1", 
            "3", "4")), Information = col_skip(), 
        Consumed = col_factor(levels = c("Y", 
            "N"))))
## Warning: The following named parsers don't match the column names: Plant Type,
## Comment
plant <- as.factor(foraging$PlantType)

consume <- as.factor(foraging$Consumed)
levels(plant)
## [1] "1" "3" "4"
levels(consume)
## [1] "Y" "N"

Checking Assumptions

plot(foraging)

hist(foraging$SniffingDuration)

boxplot(foraging$SniffingDuration, foraging$PlantType)

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ dplyr   1.0.2
## ✓ tibble  3.0.4     ✓ stringr 1.4.0
## ✓ tidyr   1.1.2     ✓ forcats 0.5.0
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(ggpubr)
library(rstatix)
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter
class(foraging$SniffingDuration)
## [1] "numeric"
plant.aov <- aov(foraging$SniffingDuration ~ foraging$PlantType + foraging$Consumed)
plant.aov
## Call:
##    aov(formula = foraging$SniffingDuration ~ foraging$PlantType + 
##     foraging$Consumed)
## 
## Terms:
##                 foraging$PlantType foraging$Consumed Residuals
## Sum of Squares             709.221           352.332  6177.535
## Deg. of Freedom                  2                 1       174
## 
## Residual standard error: 5.958446
## Estimated effects may be unbalanced
## 1 observation deleted due to missingness
shapiro.test(foraging$SniffingDuration)
## 
##  Shapiro-Wilk normality test
## 
## data:  foraging$SniffingDuration
## W = 0.73692, p-value < 2.2e-16

Data does not meet the assumptions of an ANOVA.

Equal Variances

library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
sniff.vartest <- leveneTest(SniffingDuration ~ Consumed*PlantType, data = foraging)
sniff.vartest
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value   Pr(>F)   
## group   5  3.5818 0.004164 **
##       172                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Transforming the Data

Inverse log transformation

sniff <- 1/log(foraging$SniffingDuration)

hist(sniff)

foraging$logSniffingDuration<- log(foraging$SniffingDuration)
hist(foraging$logSniffingDuration)

foraging$logSniffingDuration
##   [1] 1.0986123      -Inf      -Inf 0.6931472      -Inf      -Inf      -Inf
##   [8] 2.3025851 0.6931472 1.0986123 1.0986123 0.6931472 2.0794415 2.5649494
##  [15] 1.3862944 1.0986123 0.6931472 1.0986123 2.5649494 2.0794415 1.7917595
##  [22] 1.3862944 1.0986123 2.3025851 2.1972246 2.3025851 0.0000000 0.0000000
##  [29] 2.4069451 2.9454911 3.2180755 2.3272777 2.2159373 1.3056265 1.4374626
##  [36] 1.6272778 0.5538851 2.9729753 2.2782924 0.8458683 2.3075726 2.7363137
##  [43] 0.9895412 1.7732560 0.8415672 1.8437192 1.1568812      -Inf      -Inf
##  [50]      -Inf      -Inf 2.3542283      -Inf      -Inf      -Inf 1.0260416
##  [57]      -Inf      -Inf      -Inf      -Inf 1.4747630      -Inf 1.5282279
##  [64] 1.9501867 1.6253113 3.1135153 1.8066481 2.1815468 1.1537316 2.4006188
##  [71]      -Inf 1.8389611 2.0255132 2.7006898      -Inf      -Inf      -Inf
##  [78]      -Inf      -Inf 0.6931472 0.6931472 1.0986123      -Inf      -Inf
##  [85] 1.0986123      -Inf      -Inf      -Inf      -Inf      -Inf      -Inf
##  [92] 0.6931472      -Inf      -Inf 0.0000000 0.6931472      -Inf      -Inf
##  [99]      -Inf      -Inf      -Inf 1.0986123      -Inf      -Inf      -Inf
## [106]      -Inf      -Inf 0.0000000      -Inf 1.3862944 0.6931472 1.0986123
## [113] 1.7917595 1.0986123 1.6094379 1.3862944 1.0986123      -Inf      -Inf
## [120] 1.3862944 0.6931472 1.3862944 1.6094379 0.0000000 1.3862944 0.0000000
## [127] 0.6931472      -Inf      -Inf      -Inf 1.0986123 1.9459101 0.0000000
## [134] 0.6931472 1.7917595 1.3862944 1.6094379      -Inf      -Inf 2.3978953
## [141] 2.0794415      -Inf 0.6931472        NA 1.0986123 1.0986123 0.6931472
## [148] 1.7917595      -Inf      -Inf 0.0000000 0.6931472      -Inf 1.6094379
## [155]      -Inf      -Inf 0.6931472 1.3862944      -Inf 3.1637859      -Inf
## [162] 2.9866915 3.0056826 2.4397349 2.7173402 2.2905125      -Inf 1.3812818
## [169] 3.4011974 2.0643279 2.2170272 2.8870329 2.1758874 3.3156395      -Inf
## [176] 3.0722302 0.3293037 3.4011974 3.0855730

Consolidated Foraging Event

Testing to see if data can meet assumptions if we consolidate the foraging events to one line per event.

library(readr)
cf <- read_csv("Desktop/consolidated foraging.csv", 
    col_types = cols(ForagingEvent = col_character(), 
        PlantType = col_factor(levels = c("1", 
            "3", "4")), SniffingDuration = col_number(), 
        Consumed = col_factor(levels = c("Y", 
            "N"))))

summary(cf)
##  ForagingEvent         Block           PlantType SniffingDuration Consumed
##  Length:73          Length:73          1:32      Min.   :  0.00   Y:67    
##  Class :character   Class :character   3:16      1st Qu.:  3.00   N: 6    
##  Mode  :character   Mode  :character   4:25      Median :  8.81           
##                                                  Mean   : 11.42           
##                                                  3rd Qu.: 13.00           
##                                                  Max.   :115.00
boxplot(cf$SniffingDuration ~ cf$PlantType)

shapiro.test(cf$SniffingDuration)
## 
##  Shapiro-Wilk normality test
## 
## data:  cf$SniffingDuration
## W = 0.60505, p-value = 1.033e-12
hist(cf$SniffingDuration)

logsniff <- log(cf$SniffingDuration)
hist(logsniff)

shapiro.test(logsniff)
## 
##  Shapiro-Wilk normality test
## 
## data:  logsniff
## W = NaN, p-value = NA
leveneTest(SniffingDuration ~ PlantType*Consumed, data = cf)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  5  0.5341 0.7497
##       67

Consolidated data (cf) meets homogeneity of variances.

Hypothesis 1: Sniffing duration as a function of plant type

One way ANOVA is pretty robust against deviances from normality. So I will try it anyway on the consolidated data.

Data Visualisation

First, I want to visualise the means for each Plant Type.

# Mean plots
# Add error bars: mean_se
# (other values include: mean_sd, mean_ci, median_iqr, ....)
library("ggpubr")
ggline(cf, x = "PlantType", y = "SniffingDuration", 
       add = c("mean_se", "jitter"), 
       order = c("1", "3", "4"),
       ylab = "Sniffing Duration (Seconds)", xlab = "Plant Type")

Transforming the Data

library(readr)
nozero <- read_csv("Desktop/no zero.csv", 
    col_types = cols(PlantType = col_factor(levels = c("1", 
        "3", "4")), SniffingDuration = col_number(), 
        Consumed = col_factor(levels = c("Y", 
            "N"))))
## Warning: 1 parsing failure.
## row              col expected actual                  file
##  69 SniffingDuration a number      z 'Desktop/no zero.csv'
hist(nozero$SniffingDuration)

nozero$SniffingDuration <- log(nozero$SniffingDuration)
hist(nozero$SniffingDuration)

shapiro.test(nozero$SniffingDuration)
## 
##  Shapiro-Wilk normality test
## 
## data:  nozero$SniffingDuration
## W = 0.97924, p-value = 0.3441
leveneTest(SniffingDuration ~ PlantType*Consumed, data = nozero)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  5  1.2667 0.2904
##       59

Normally distributed.

Analysis of Variance

Plant Type

model <- lm(SniffingDuration ~ PlantType + Block, data = nozero)
Anova(model,
      type = "II")
## Anova Table (Type II tests)
## 
## Response: SniffingDuration
##           Sum Sq Df F value Pr(>F)
## PlantType  0.091  2  0.0477 0.9534
## Block     11.082  8  1.4487 0.1981
## Residuals 51.636 54

This shows no significant results.

Histogram and plot of residuals
x = residuals (model)
library(rcompanion)
plotNormalHistogram(x)

Interaction between Plant Type and Consumption

Now, lets see if there is an interaction between plant type and consumption.

model2 = lm(SniffingDuration ~ PlantType*Consumed + Block, data = nozero)

Anova(model2) #Type 1 ANOVA
## Anova Table (Type II tests)
## 
## Response: SniffingDuration
##                    Sum Sq Df F value  Pr(>F)  
## PlantType           0.025  2  0.0131 0.98702  
## Consumed            2.939  1  3.1222 0.08321 .
## Block               7.352  8  0.9763 0.46501  
## PlantType:Consumed  0.694  2  0.3684 0.69366  
## Residuals          48.004 51                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Interaction term not significant.

Testing without Blocking

model3 = lm(SniffingDuration ~ PlantType*Consumed, data = nozero)

Anova(model3)
## Anova Table (Type II tests)
## 
## Response: SniffingDuration
##                    Sum Sq Df F value   Pr(>F)   
## PlantType           0.052  2  0.0277 0.972706   
## Consumed            7.085  1  7.5517 0.007941 **
## PlantType:Consumed  0.277  2  0.1478 0.862887   
## Residuals          55.356 59                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Consumption is significant when the block is taken out of it. This shows that the block counts for the variance in the data to be considered significant, and it is not actually due to plant type, consumption or the interaction between the two factors.

Hypothesis 2: Sniffing Duration as a Function of Consumption

## Importing newly formatted data that can be used for a t-test. This data has two columns (consumed, not consumed)
library(readr)
test <- read_csv("Desktop/t test.csv", col_types = cols(NotConsumed = col_number(), 
    Consumed = col_number()))

T-Test

t.test(test$Consumed, test$NotConsumed)
## 
##  Welch Two Sample t-test
## 
## data:  test$Consumed and test$NotConsumed
## t = -3.3367, df = 8.1894, p-value = 0.009944
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -23.235166  -4.289163
## sample estimates:
## mean of x mean of y 
##  10.29284  24.05500

Describing the Data

T value is only -3.3367 so it is not far from 0. Small degrees of freedom due to limited “not consumed” entries. 95% of the time, the true difference between means is between -23.24 and -4.29.

Interpretation

The mean sniffing duration in seconds for not consumed is 24.0550 while the mean sniffing duration for consumed plants is only 10.29284. This suggests that the shorter the sniffing duration, the more likely a wallaby is to consume the plant.

p = 0.009944 with an alpha of 0.05.