Variation

###Visualizing distributions

diamonds %>%
    ggplot(aes(x = cut)) +
    geom_bar()

diamonds%>%
    ggplot(mapping = aes(x = carat))+
    geom_histogram( binwidth = 0.5)

diamonds%>%
    filter(carat<3)%>%
    ggplot(aes(x=carat))+
    geom_histogram(binwidth = 0.5)

diamonds%>%
    ggplot(aes(x = carat, colour = cut)) +
  geom_freqpoly(binwidth = 0.1)

###Tipical values

diamonds%>%
    
    #filter out diamonds>3 carats
    filter(carat<3)%>%
    
    #plot
    ggplot(aes(x=carat))+
    geom_histogram(binwidth = .01)

faithful%>%
    ggplot(aes(eruptions))+
    geom_histogram(binwidth = .25)

###Unusual values

diamonds%>%
    ggplot(aes(y))+
    geom_histogram()+
    coord_cartesian(ylim = c(0, 50))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##Missing values

diamonds%>%
    #filter(y<3 | y>20)%>%
    mutate(y = ifelse(y < 3 | y > 20, NA, y))%>%

#plot
ggplot(aes(x =x,y =y ))+
    geom_point()
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

##Covariation

###Categorical & Continuous

diamonds%>%
    ggplot(aes(x= cut,y= price))+
    geom_boxplot()

###Two categorical

diamonds %>% 
  count(color, cut) %>%  
  ggplot(mapping = aes(x = color, y = cut)) +
    geom_tile(mapping = aes(fill = n))

###Two continuous

library(hexbin)
diamonds%>%
    ggplot(aes(x = carat, y = price))+
    geom_hex()

diamonds%>%
    filter(carat <3)%>%
    ggplot(aes(x = carat, y = price))+
    geom_boxplot( aes(group = cut_width(carat, 0.1)))

##Patterns & Models

library(modelr)
mod<- lm(log(price)~ log(carat), data = diamonds)

diamonds2<- diamonds%>%
     modelr::add_residuals(mod) %>%
    mutate(resid= exp(resid))

diamonds2%>%
    ggplot(aes(carat, resid))+
    geom_point()

diamonds2%>%
    ggplot(aes(x = cut, y = resid)) + 
  geom_boxplot()