(on left: locate correct working directory, click “dummydata.xlsx” and say “import” when promted). OR, as I did here: save the dummydata tab from the excel file as .csv IN YOUR WORKING DIRECTORY (find out what your current working directory is: getwd() ; if you need to change the location: top bar -> session -> set working directory -> browse location [and put it where your R project & script are located])
#dummydata <- read.csv("dummydata.csv")
library(readxl)
dummydata <- read_excel("dummydata.xlsx")
str(dummydata)
## tibble [4 × 6] (S3: tbl_df/tbl/data.frame)
## $ locoC : num [1:4] 23.3 20.4 16.4 27
## $ restC : num [1:4] 70.4 75.4 70.9 58.9
## $ climbC: num [1:4] 6.36 3.25 12.76 14.13
## $ locoW : num [1:4] 31 36 36 25
## $ restW : num [1:4] 33 30 28 24
## $ climbW: num [1:4] 36 35 36 51
Other libraries
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
hist(dummydata$locoC)
hist(dummydata$restC) #skewed
hist(dummydata$climbC) #skewed
hist(dummydata$locoW) #skewed
hist(dummydata$restW)
hist(dummydata$climbW) #skewed
shapiro.test(dummydata$locoC) #ok
##
## Shapiro-Wilk normality test
##
## data: dummydata$locoC
## W = 0.99877, p-value = 0.9963
shapiro.test(dummydata$restC) #ok
##
## Shapiro-Wilk normality test
##
## data: dummydata$restC
## W = 0.87913, p-value = 0.3349
shapiro.test(dummydata$climbC) #ok
##
## Shapiro-Wilk normality test
##
## data: dummydata$climbC
## W = 0.9074, p-value = 0.4687
shapiro.test(dummydata$locoW) #ok
##
## Shapiro-Wilk normality test
##
## data: dummydata$locoW
## W = 0.85877, p-value = 0.2558
shapiro.test(dummydata$restW) #ok
##
## Shapiro-Wilk normality test
##
## data: dummydata$restW
## W = 0.9938, p-value = 0.976
shapiro.test(dummydata$climbW) #NOT normal
##
## Shapiro-Wilk normality test
##
## data: dummydata$climbW
## W = 0.68315, p-value = 0.007261
Locomotion & Resting 1. Locomotion
t.test(dummydata$locoC,dummydata$locoW)
##
## Welch Two Sample t-test
##
## data: dummydata$locoC and dummydata$locoW
## t = -2.9682, df = 5.8699, p-value = 0.02568
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -18.722189 -1.752311
## sample estimates:
## mean of x mean of y
## 21.76275 32.00000
Conclusion: There is a significant difference (if the wild or cative animals move more will have to be determined by looking at the raw data or a plot)
t.test(dummydata$restC,dummydata$restW)
##
## Welch Two Sample t-test
##
## data: dummydata$restC and dummydata$restW
## t = 10.051, df = 4.5961, p-value = 0.0002664
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 29.57825 50.65125
## sample estimates:
## mean of x mean of y
## 68.86475 28.75000
as above.
For non-normal data. This means, this test needs to be done if AT LEAST ONE of the datasets you want to compare is not normally distributed:
wilcox.test(dummydata$climbC,dummydata$climbW)
## Warning in wilcox.test.default(dummydata$climbC, dummydata$climbW): cannot
## compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: dummydata$climbC and dummydata$climbW
## W = 0, p-value = 0.0294
## alternative hypothesis: true location shift is not equal to 0
This needs be get rearranged into “longer format”, want colums Behaviour, Environment, Observation. Needs multiple steps. MAY BE EASIER TO MAIPULATE IN EXCEL (see below whatthe table should look like)
For the brave, who’d like to do it ALL in R (which will require a lot of adjustments regarding your column names):
df_L <- pivot_longer (data = dummydata, cols = c(1:6), names_to = "ID", values_to = "Duration")
str(df_L)
## tibble [24 × 2] (S3: tbl_df/tbl/data.frame)
## $ ID : chr [1:24] "locoC" "restC" "climbC" "locoW" ...
## $ Duration: num [1:24] 23.29 70.35 6.36 31 33 ...
Data “Wrangling” = restructuring
df_Lb <- df_L %>%
mutate(Environment = case_when(grepl("C", ID) ~ "captive",
grepl("W", ID) ~ "wild"
)) %>%
mutate(Behaviour = case_when(grepl("climb", ID) ~ "Climbing",
grepl("loco", ID) ~ "Locomotion",
grepl("rest", ID) ~ "Resting",
)) %>%
select(., -ID)
THIS is how the table in excel should look like for the code to work:
df_Lb
## # A tibble: 24 × 3
## Duration Environment Behaviour
## <dbl> <chr> <chr>
## 1 23.3 captive Locomotion
## 2 70.4 captive Resting
## 3 6.36 captive Climbing
## 4 31 wild Locomotion
## 5 33 wild Resting
## 6 36 wild Climbing
## 7 20.4 captive Locomotion
## 8 75.4 captive Resting
## 9 3.25 captive Climbing
## 10 36 wild Locomotion
## # ℹ 14 more rows
ggplot(df_Lb, aes(x=Behaviour, y=Duration, fill=Environment)) +
geom_boxplot()
#second alternative
library(ggpubr)
ggboxplot(df_Lb,"Behaviour", "Duration", color = "Environment",
palette = c("#FC4E07", "#00AFBB"))