Import dataset: dummydata

(on left: locate correct working directory, click “dummydata.xlsx” and say “import” when promted). OR, as I did here: save the dummydata tab from the excel file as .csv IN YOUR WORKING DIRECTORY (find out what your current working directory is: getwd() ; if you need to change the location: top bar -> session -> set working directory -> browse location [and put it where your R project & script are located])

#dummydata <- read.csv("dummydata.csv")
library(readxl)
dummydata <- read_excel("dummydata.xlsx")
str(dummydata)
## tibble [4 × 6] (S3: tbl_df/tbl/data.frame)
##  $ locoC : num [1:4] 23.3 20.4 16.4 27
##  $ restC : num [1:4] 70.4 75.4 70.9 58.9
##  $ climbC: num [1:4] 6.36 3.25 12.76 14.13
##  $ locoW : num [1:4] 31 36 36 25
##  $ restW : num [1:4] 33 30 28 24
##  $ climbW: num [1:4] 36 35 36 51

Other libraries

library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

2. Checking normality

Plotting the histograms

hist(dummydata$locoC) 

hist(dummydata$restC)  #skewed

hist(dummydata$climbC) #skewed

hist(dummydata$locoW) #skewed

hist(dummydata$restW) 

hist(dummydata$climbW) #skewed

shapiro.test(dummydata$locoC) #ok
## 
##  Shapiro-Wilk normality test
## 
## data:  dummydata$locoC
## W = 0.99877, p-value = 0.9963
shapiro.test(dummydata$restC)  #ok
## 
##  Shapiro-Wilk normality test
## 
## data:  dummydata$restC
## W = 0.87913, p-value = 0.3349
shapiro.test(dummydata$climbC) #ok
## 
##  Shapiro-Wilk normality test
## 
## data:  dummydata$climbC
## W = 0.9074, p-value = 0.4687
shapiro.test(dummydata$locoW) #ok
## 
##  Shapiro-Wilk normality test
## 
## data:  dummydata$locoW
## W = 0.85877, p-value = 0.2558
shapiro.test(dummydata$restW) #ok
## 
##  Shapiro-Wilk normality test
## 
## data:  dummydata$restW
## W = 0.9938, p-value = 0.976
shapiro.test(dummydata$climbW) #NOT normal
## 
##  Shapiro-Wilk normality test
## 
## data:  dummydata$climbW
## W = 0.68315, p-value = 0.007261

3. Statistal comparisons

Independent t-test: if normally distributed

Locomotion & Resting 1. Locomotion

t.test(dummydata$locoC,dummydata$locoW)
## 
##  Welch Two Sample t-test
## 
## data:  dummydata$locoC and dummydata$locoW
## t = -2.9682, df = 5.8699, p-value = 0.02568
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -18.722189  -1.752311
## sample estimates:
## mean of x mean of y 
##  21.76275  32.00000

Conclusion: There is a significant difference (if the wild or cative animals move more will have to be determined by looking at the raw data or a plot)

  1. Resting
t.test(dummydata$restC,dummydata$restW)
## 
##  Welch Two Sample t-test
## 
## data:  dummydata$restC and dummydata$restW
## t = 10.051, df = 4.5961, p-value = 0.0002664
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  29.57825 50.65125
## sample estimates:
## mean of x mean of y 
##  68.86475  28.75000

as above.

Mann Whitney Test

For non-normal data. This means, this test needs to be done if AT LEAST ONE of the datasets you want to compare is not normally distributed:

wilcox.test(dummydata$climbC,dummydata$climbW)
## Warning in wilcox.test.default(dummydata$climbC, dummydata$climbW): cannot
## compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  dummydata$climbC and dummydata$climbW
## W = 0, p-value = 0.0294
## alternative hypothesis: true location shift is not equal to 0

PLOT

This needs be get rearranged into “longer format”, want colums Behaviour, Environment, Observation. Needs multiple steps. MAY BE EASIER TO MAIPULATE IN EXCEL (see below whatthe table should look like)

For the brave, who’d like to do it ALL in R (which will require a lot of adjustments regarding your column names):

df_L <- pivot_longer (data = dummydata, cols = c(1:6), names_to = "ID", values_to = "Duration")

str(df_L)
## tibble [24 × 2] (S3: tbl_df/tbl/data.frame)
##  $ ID      : chr [1:24] "locoC" "restC" "climbC" "locoW" ...
##  $ Duration: num [1:24] 23.29 70.35 6.36 31 33 ...

Data “Wrangling” = restructuring

df_Lb <- df_L %>% 
  mutate(Environment = case_when(grepl("C", ID)  ~ "captive",
    grepl("W", ID) ~ "wild"
    ))  %>% 
  mutate(Behaviour = case_when(grepl("climb", ID)  ~ "Climbing",
    grepl("loco", ID) ~ "Locomotion",
    grepl("rest", ID) ~ "Resting",
    )) %>%
  select(., -ID)

THIS is how the table in excel should look like for the code to work:

df_Lb
## # A tibble: 24 × 3
##    Duration Environment Behaviour 
##       <dbl> <chr>       <chr>     
##  1    23.3  captive     Locomotion
##  2    70.4  captive     Resting   
##  3     6.36 captive     Climbing  
##  4    31    wild        Locomotion
##  5    33    wild        Resting   
##  6    36    wild        Climbing  
##  7    20.4  captive     Locomotion
##  8    75.4  captive     Resting   
##  9     3.25 captive     Climbing  
## 10    36    wild        Locomotion
## # ℹ 14 more rows

This is the plot

ggplot(df_Lb, aes(x=Behaviour, y=Duration, fill=Environment)) + 
    geom_boxplot()

#second alternative
library(ggpubr)
ggboxplot(df_Lb,"Behaviour", "Duration", color = "Environment",
          palette = c("#FC4E07", "#00AFBB"))