Import data

# excel file
bee_colonies <- read_excel("../00_data/MyData3.xlsx")

Introduction

Questions

Variation

Visualizing distributions

ggplot(data = bee_colonies) +
  geom_bar(mapping = aes(x = months))

bee_colonies %>% count(months)
## # A tibble: 4 × 2
##   months               n
##   <chr>            <int>
## 1 April-June         329
## 2 January-March      329
## 3 July-September     282
## 4 October-December   282
ggplot(data = bee_colonies) +
  geom_histogram(mapping = aes(x = colony_lost))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 47 rows containing non-finite values (`stat_bin()`).

    # Unusual values
bee_colonies %>%
    ggplot(aes(x = colony_lost)) +
    geom_histogram() +
    coord_cartesian(ylim = c(0, 50))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 47 rows containing non-finite values (`stat_bin()`).

ggplot(data = bee_colonies, mapping = aes(x = colony_lost, colour = months)) +
  geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 47 rows containing non-finite values (`stat_bin()`).

A categorical and continuous variable

bee_colonies %>%
    
    ggplot(aes(x = months, y = colony_lost_pct)) +
    geom_boxplot()
## Warning: Removed 54 rows containing non-finite values (`stat_boxplot()`).

Two categorical variables

bee_colonies %>%
    
    count(months, state) %>%

    ggplot(aes(x = months, y = state, fill = n)) +
    geom_tile()

Two continous variables

ggplot(data = bee_colonies) +
  geom_point(mapping = aes(x = colony_size, y = colony_lost), alpha = 1/100)
## Warning: Removed 47 rows containing missing values (`geom_point()`).

library(hexbin)
bee_colonies %>%
    ggplot(aes(x = colony_size, y = colony_lost)) +
    geom_hex()
## Warning: Removed 47 rows containing non-finite values (`stat_binhex()`).

Patterns and models

library(modelr)

mod <- lm(log(colony_lost) ~ log(colony_size), data = bee_colonies)

 bee_colonies2 <- bee_colonies %>%
    modelr::add_residuals(mod) %>%
    mutate(resid = exp(resid))
 
bee_colonies2 %>%
    ggplot(aes(colony_size, resid)) +
    geom_point()
## Warning: Removed 47 rows containing missing values (`geom_point()`).

bee_colonies2 %>%
    ggplot(aes(months, resid)) +
    geom_boxplot() +
    coord_cartesian(ylim = c(0,25))
## Warning: Removed 47 rows containing non-finite values (`stat_boxplot()`).