Exploratory Data Visualization

Author

Maya Frey

library (ggplot2)
library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.3.0      ✔ stringr 1.5.0 
✔ readr   2.1.3      ✔ forcats 0.5.2 
✔ purrr   1.0.1      
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(tinytex)

#Read in data
library(readr)
behavior <- read_csv('behavior_23Feb23.csv')
Rows: 2008 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr   (5): microcolony, date, date_withyr, observer_initials, notes
dbl  (11): replicate, temp, moving, feeding, stationary, incubating, fanning...
lgl   (1): drop
time  (1): time

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(behavior)
# A tibble: 6 × 18
  replicate microcolony date  date_…¹ time   temp moving feeding stati…² incub…³
      <dbl> <chr>       <chr> <chr>   <tim> <dbl>  <dbl>   <dbl>   <dbl>   <dbl>
1         1 37A1        2/22  2/22/22 12:29    37      3       4       0       2
2         2 37A1        2/22  2/22/22 12:46    37      1       2       1       3
3         3 37A1        2/23  2/23/22 11:30    37      4       1       3       2
4         4 37A1        2/23  2/23/22 11:42    37      3       0       4       3
5         5 37A1        2/23  2/23/22 11:51    37      3       3       3       1
6         6 37A1        2/23  2/23/22 12:04    37      0       2       5       3
# … with 8 more variables: fanning <dbl>, total_alive <dbl>,
#   observer_initials <chr>, infected <dbl>, parent <dbl>,
#   day_of_experiment <dbl>, notes <chr>, drop <lgl>, and abbreviated variable
#   names ¹​date_withyr, ²​stationary, ³​incubating
# Formatting data
behavior$infected = as.factor(behavior$infected)
behavior$temp = as.character(behavior$temp)
behavior_noe <- dplyr::filter(behavior, day_of_experiment == "0" | day_of_experiment == "1" | day_of_experiment == "2" | day_of_experiment == "3" | day_of_experiment == "4"| day_of_experiment == "5" | day_of_experiment == "6" | day_of_experiment == "7" | day_of_experiment == "8" | day_of_experiment == "9" | day_of_experiment == "10")
# Mean fanning at each temperature
behavior_meanfanning <- behavior %>%
  group_by(temp) %>%
  drop_na(fanning) %>%
  summarize(meanfanning = mean(fanning), sd = sd(fanning), n = n(), se = sd/sqrt(n))
  
ggplot(data = behavior_meanfanning, aes(x = temp, y = meanfanning)) + geom_point() + geom_errorbar(data = behavior_meanfanning, aes(x = temp, ymin = meanfanning - se, ymax = meanfanning + se), size = 0.5) + theme_bw() + labs (x = "Temperature (C)", y = "Mean bees fanning")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

The mean number of bees exhibiting fanning behavior at the time of observation for each of the different temperatures experimental colonies were kept at. The error bars represent one standard error.

# Mean fanning at each temperature, colored by infection status
behavior_meanfanning_i <- behavior %>%
  group_by(temp, infected) %>%
  drop_na(fanning) %>%
  summarize(meanfanning = mean(fanning), sd = sd(fanning), n = n(), se = sd/sqrt(n))
`summarise()` has grouped output by 'temp'. You can override using the
`.groups` argument.
pd<-position_dodge(width=0.5)
  
ggplot(data = behavior_meanfanning_i, aes(x = temp, y = meanfanning, color = infected)) + geom_point(position = pd) + geom_errorbar(data = behavior_meanfanning_i, aes(x = temp, ymin = meanfanning - se, ymax = meanfanning + se), size = 0.5, position = pd) + theme_bw() + labs (x = "Temperature (C)", y = "Mean bees fanning",  color = "Infected") + scale_color_manual(labels = c("No", "Yes"), values = c("red", "blue"))

The mean number of bees exhibiting fanning behavior at the time of observation for each of the different temperatures experimental colonies were kept at, separated based on whether or not the bees were infected with a gut pathogen. The error bars represent one standard error.

# Total alive by day of experiment (up to 10 days)
ggplot(data = behavior_noe, aes(x = total_alive, fill = temp)) + geom_histogram() + facet_wrap(~day_of_experiment, ncol = 2) +theme(axis.text=element_text(size=15), axis.title=element_text(size=15)) + theme_bw() + theme_bw() + labs(x = "Total bees alive", y = "Count", fill = "Temperature (C)")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Individual histograms for the total number of bees alive (out of 10) at each day of the 10 day long experimental periods. Each histogram is colored to show the distribution of bees alive by temperature treatment.

# Mean incubating at each temperature, colored by infection status
behavior_meaninc_i <- behavior %>%
  group_by(temp, infected) %>%
  drop_na(incubating) %>%
  summarize(meaninc = mean(incubating), sd = sd(incubating), n = n(), se = sd/sqrt(n))
`summarise()` has grouped output by 'temp'. You can override using the
`.groups` argument.
ggplot(data = behavior_meaninc_i, aes(x = temp, y = meaninc, color = infected)) + geom_point(position = pd) + geom_errorbar(data = behavior_meaninc_i, aes(x = temp, ymin = meaninc - se, ymax = meaninc + se), size = 0.5, position = pd) + theme_bw() + labs(x = "Temperature (C)", y = "Mean bees incubating", color = "Infected") + scale_color_manual(labels = c("No", "Yes"), values = c("red", "blue"))

The mean number of bees incubating their colony’s brood at the time of observation for each of the different temperatures experimental colonies were kept at, separated based on whether or not the bees were infected with a gut pathogen. The error bars represent one standard error.

# Mean moving at each temperature, colored by infection status
behavior_meanmov <- behavior %>%
  group_by(temp, infected) %>%
  drop_na(moving) %>%
  summarize(meanmov = mean(moving), sd = sd(moving), n = n(), se = sd/sqrt(n))
`summarise()` has grouped output by 'temp'. You can override using the
`.groups` argument.
ggplot(data = behavior_meanmov, aes(x = temp, y = meanmov, color = infected)) + geom_point() + geom_errorbar(data = behavior_meanmov, aes(x = temp, ymin = meanmov - se, ymax = meanmov + se)) + theme_bw() + labs(x = "Temperature (C)", y = "Mean bees moving", color = "Infected") + scale_color_manual(labels = c("No", "Yes"), values = c("red", "blue"))

The mean number of bees actively moving at the time of observation for each of the different temperatures experimental colonies were kept at, separated based on whether or not the bees were infected with a gut pathogen. The error bars represent one standard error.