Import Data

data <- read.csv("../00_data/myData.csv")

Introduction

Questions

Variation

Visualizing distributions

data %>%
    ggplot(aes(x = colony_added)) +
    geom_bar()
## Warning: Removed 83 rows containing non-finite values (stat_count).

data %>%
    ggplot(mapping = aes(x = colony_n)) +
    geom_histogram(binwidth = 200)
## Warning: Removed 47 rows containing non-finite values (stat_bin).

data %>%
    filter(colony_n < 5000) %>%
    ggplot(aes(x = colony_n)) +
    geom_histogram(binwidth = 0.5)

data %>%
     ggplot(aes(x = colony_n, color = state)) +
     geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 47 rows containing non-finite values (stat_bin).

Typical values

data %>%
    
    #Filter out percentage
    filter(colony_lost_pct > 10) %>%
    
    #Plot
    ggplot(aes(x = colony_lost_pct)) +
    geom_histogram(binwidth = 0.1)

### Unusual values

data %>%
    ggplot(aes(x = colony_lost_pct)) +
    geom_histogram() 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 54 rows containing non-finite values (stat_bin).

data %>%
    ggplot(aes(x = colony_lost_pct)) +
    geom_histogram() +
    coord_cartesian(ylim = c(0,50))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 54 rows containing non-finite values (stat_bin).

## Missing Values

Covariation

A categorical and continuous variable

data %>%
    
    ggplot(aes(x = state, y = colony_lost_pct)) +
    geom_boxplot()
## Warning: Removed 54 rows containing non-finite values (stat_boxplot).

### Two categorical variables

data %>%
    
    count(state, year) %>%
    
    ggplot(aes(x = year, y = state, fill = n)) +
    geom_tile()

### Two continous variables

library(hexbin)
data %>%
    ggplot(aes(x = colony_lost, y = colony_lost_pct)) +
    geom_hex()
## Warning: Removed 54 rows containing non-finite values (stat_binhex).

diamonds %>%
    filter(carat < 3) %>%
    ggplot(aes(x = carat, y = price)) +
    geom_boxplot(aes(group = cut_width(carat, 0.1)))

## Patterns and models

library(modelr)
mod <- lm(log(price) ~ log(carat), data = diamonds)

diamonds2 <- diamonds %>%
    modelr::add_residuals(mod) %>%
    mutate(resid = exp(resid))

diamonds2 %>%
    ggplot(aes(carat, resid)) +
    geom_point()