Introduction

Questions

Variation

Visualizing Distributions

diamonds %>%
    ggplot(aes(x=cut))+
    geom_bar()

diamonds %>%
    ggplot(mapping=aes(x=carat))

geom_histogram(binwith=0.5)
## Warning in geom_histogram(binwith = 0.5): Ignoring unknown parameters:
## `binwith`
## geom_bar: na.rm = FALSE, orientation = NA
## stat_bin: binwidth = NULL, bins = NULL, na.rm = FALSE, orientation = NA, pad = FALSE
## position_stack
diamonds %>%
    filter(carat<3) %>%
    ggplot(aes(x=carat))+
    geom_histogram(binwidth = 0.5)

diamonds %>%
    ggplot(aes(x=carat,color=cut))+
    geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Typical values

diamonds %>%
    
    #Filter out bigger diamonds
    filter(carat<3) %>%
    
    #Plot
    ggplot(aes(x=carat))+
    geom_histogram(binwidth = 0.01)

faithful %>%
    
    ggplot(aes(x=eruptions))+
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Unusual Values

diamonds %>%
    
    ggplot(aes(x=y))+
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

diamonds %>%
    
    ggplot(aes(x=y))+
    geom_histogram()+
    coord_cartesian(ylim=c(0,50))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Missing values

diamonds %>%
    
    # filter(y<3|y>20) %>%
    mutate(y=ifelse(y<3|y>20,NA,y)) %>%
#Plot
ggplot(aes(x=x,y=y))+
    geom_point()
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

Covariation

A categorial and continous variable

diamonds %>%
    
    ggplot(aes(x=cut,y=price))+
    geom_boxplot()

Two categorial variables

diamonds %>%
    
    count(color,cut) %>%
    ggplot(aes(x=color,y=cut,fill=n))+
    geom_tile()

Two continuous variables

library(hexbin)
diamonds %>%
    ggplot(aes(x=carat,y=price))+
    geom_hex()

diamonds %>%
    ggplot(aes(x=carat,y=price))+
    geom_boxplot(aes(group=cut_width(carat,0.1)))

## Patterns and models

library(modelr)
mod<-lm(log(price)~log(carat),data=diamonds)

diamonds4<- diamonds %>%
    modelr::add_residuals(mod) %>%
    mutate(resid=exp(resid))
           
           diamonds4 %>%
               ggplot(aes(carat,resid))+
               geom_point()

           diamonds4 %>%
  ggplot(aes(x = cut_width(carat, 0.5), y = resid)) +
  geom_boxplot()