Introduction

Questions

Variation

Visualizing distributions

ggplot(data = myData) +
    geom_bar(mapping = aes(x = country))

Typical values

myData %>%
    
    # filter out Cancers
    filter(`Cancers (%)` < 12) 
## # A tibble: 0 × 35
## # ℹ 35 variables: country <chr>, country_code <chr>, year <dbl>,
## #   Cardiovascular diseases (%) <dbl>, Cancers (%) <dbl>,
## #   Respiratory diseases (%) <dbl>, Diabetes (%) <dbl>, Dementia (%) <dbl>,
## #   Lower respiratory infections (%) <dbl>, Neonatal deaths (%) <dbl>,
## #   Diarrheal diseases (%) <dbl>, Road accidents (%) <dbl>,
## #   Liver disease (%) <dbl>, Tuberculosis (%) <dbl>, Kidney disease (%) <dbl>,
## #   Digestive diseases (%) <dbl>, HIV/AIDS (%) <dbl>, Suicide (%) <dbl>, …

Unusual values

Not applicable, did not work

Missing Values

Covariation

A categorical and continuous variable

Not applicable, did not work with any data

Two categorical variables

myData %>%
    
    count(`Cancers (%)`, `Diabetes (%)`) %>%
    
    ggplot(aes(x = `Cancers (%)`, y = `Diabetes (%)`, fill = n)) +
    geom_tile()

Two continous variables

library(hexbin)
## Warning: package 'hexbin' was built under R version 4.3.3
myData %>%
    ggplot(aes(x = year, y = country)) +
    geom_hex()
## Warning: Computation failed in `stat_binhex()`
## Caused by error in `if (diff(ybnds) <= 0) ...`:
## ! missing value where TRUE/FALSE needed

Patterns and models

library(modelr)
mod <- lm(log(price) ~ log(carat), data = diamonds)

diamonds4 <- diamonds %>%
    modelr::add_residuals(mod) %>%
    mutate(resid = exp(resid))

diamonds4 %>%
    ggplot(aes(carat, resid)) +
    geom_point()

diamonds4 %>%
    ggplot(aes(cut, resid)) +
    geom_boxplot()