library(knitr) #for R Markdown
library(MASS) #for negative binomial glm (glm.nb)
library(emmeans) #for posthoc
library(car) #for Anova
## Loading required package: carData
library(tidyverse) #for data processing, put last to avoid function masking
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2
## ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ dplyr::select() masks MASS::select()
## ✖ purrr::some() masks car::some()
data_raw <- read.csv("DB survey 2013-2018.csv",
# to avoid reading errors
fileEncoding="UTF-8-BOM", na.strings = "")
data_raw <- data_raw %>%
# correct categorical variables data type
mutate_at(c("Year","ID","Color_D","Color_V","Color_D_4","Color_V_4","Sex"), as.factor) %>%
# make numerical color variables that recodes: B = 0, IB = 1, I = 2, IR = 3, R = 4
mutate(Color_D_num = recode(Color_D, B = 0, IB = 1, I = 2, IR = 3, R = 4),
Color_V_num = recode(Color_V, B = 0, IB = 1, I = 2, IR = 3, R = 4))
# set the dataset for subsequent analyses
data <- data_raw
num_summary <- data %>%
drop_na(Year, Sex) %>%
group_by(Year, Sex) %>%
summarise(count = n())
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
plot_num <-
ggplot(data %>% drop_na(Year, Sex),
aes(x = Year, fill = Sex)) +
geom_bar(position = "dodge")+
labs(x = "Year", y = "Number of individuals caught") +
scale_color_discrete(labels = c("Female", "Juvenile", "Male")) +
theme_classic(base_size = 15)
plot_num
Does dorsal color change over time?
Calculate mean and standard deviation of dorsal color scores
dorsal_summary <- data %>%
drop_na(Year, Sex, Color_D_num) %>%
group_by(Year, Sex) %>%
summarise(mean = mean(Color_D_num), sd = sd(Color_D_num), se = sd(Color_D_num) / sqrt(n()))
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
Plot with year on x axis, and dorsal color on y axis, separated by sex
plot_dorsal <-
ggplot(dorsal_summary, aes(x = Year, y= mean, color = Sex, group = Sex)) +
geom_errorbar(aes(ymin=mean - se, ymax = mean + se), colour="black", width=.1, position=position_dodge(0.1)) +
geom_point(position=position_dodge(0.1))+
geom_line(position=position_dodge(0.1)) +
labs(x = "Year", y = "Dorsal color") +
scale_y_continuous(limits = c(0, 4), breaks = seq(0, 4, by = 1),
labels = c("B(0)", "IB(1)", "I(2)", "IR(3)", "R(4)"))+
scale_color_discrete(labels = c("Female", "Juvenile", "Male")) +
theme_classic(base_size = 15)
plot_dorsal
Calculate mean and standard deviation of dorsal color scores
ventral_summary <- data %>%
drop_na(Year, Sex, Color_V_num) %>%
group_by(Year, Sex) %>%
summarise(mean = mean(Color_V_num), sd = sd(Color_V_num), se = sd(Color_V_num) / sqrt(n()))
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
Plot with year on x axis, and dorsal color on y axis, separated by sex
plot_ventral <-
ggplot(ventral_summary, aes(x = Year, y= mean, color = Sex, group = Sex)) +
geom_errorbar(aes(ymin=mean - se, ymax = mean + se), colour="black", width=.1, position=position_dodge(0.1)) +
geom_point(position=position_dodge(0.1))+
geom_line(position=position_dodge(0.1)) +
labs(x = "Year", y = "Ventral color") +
scale_y_continuous(limits = c(0, 4), breaks = seq(0, 4, by = 1),
labels = c("B(0)", "IB(1)", "I(2)", "IR(3)", "R(4)"))+
scale_color_discrete(labels = c("Female", "Juvenile", "Male")) +
theme_classic(base_size = 15)
plot_ventral
How well does dorsal and ventral color correlate with each other?
plot_corr <-
ggplot(data, aes(x = Color_D_num, y = Color_V_num)) +
geom_count() +
stat_smooth(method = "glm", method.args = list(family = "poisson"), color="Black", fill="grey80" )+
scale_size_continuous(breaks = c(0, 10, 50, 100, Inf),
range = c(1, 15))+
labs(x = "Dorsal color", y = "Ventral color") +
scale_y_continuous(limits = c(0, 4), breaks = seq(0, 4, by = 1),
labels = c("B(0)", "IB(1)", "I(2)", "IR(3)", "R(4)"))+
scale_x_continuous(limits = c(0, 4), breaks = seq(0, 4, by = 1),
labels = c("B(0)", "IB(1)", "I(2)", "IR(3)", "R(4)"))+
theme_classic(base_size = 15)
plot_corr
## Warning: Removed 21 rows containing non-finite values (`stat_sum()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 21 rows containing non-finite values (`stat_smooth()`).
plot_corr <-
ggplot(data %>% filter(Sex =="M"),
aes(x = Color_D_num, y = Color_V_num)) +
geom_count() +
stat_smooth(method = "glm", method.args = list(family = "poisson"), color="steelblue3", fill="skyblue1" )+
scale_size_continuous(breaks = c(0, 10, 20, 50, Inf),
range = c(1, 15))+
labs(title = "Males", x = "Dorsal color", y = "Ventral color") +
scale_y_continuous(limits = c(0, 4), breaks = seq(0, 4, by = 1),
labels = c("B(0)", "IB(1)", "I(2)", "IR(3)", "R(4)"))+
scale_x_continuous(limits = c(0, 4), breaks = seq(0, 4, by = 1),
labels = c("B(0)", "IB(1)", "I(2)", "IR(3)", "R(4)"))+
theme_classic(base_size = 15)
plot_corr
## Warning: Removed 8 rows containing non-finite values (`stat_sum()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
plot_corr <-
ggplot(data %>% filter(Sex =="F"),
aes(x = Color_D_num, y = Color_V_num)) +
geom_count() +
stat_smooth(method = "glm", method.args = list(family = "poisson"), color="salmon2", fill="rosybrown1" )+
scale_size_continuous(breaks = c(0, 10, 20, 50, Inf),
range = c(1, 15))+
labs(title = "Females", x = "Dorsal color", y = "Ventral color") +
scale_y_continuous(limits = c(0, 4), breaks = seq(0, 4, by = 1),
labels = c("B(0)", "IB(1)", "I(2)", "IR(3)", "R(4)"))+
scale_x_continuous(limits = c(0, 4), breaks = seq(0, 4, by = 1),
labels = c("B(0)", "IB(1)", "I(2)", "IR(3)", "R(4)"))+
theme_classic(base_size = 15)
plot_corr
## Warning: Removed 10 rows containing non-finite values (`stat_sum()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 10 rows containing non-finite values (`stat_smooth()`).