Import data
data <- read_excel("../00_data/myData.xlsx")
Introduction
Questions
Variation
Visualizing distributions
ggplot(data = data) +
geom_bar(mapping = aes(x = food_category))

ggplot(data = data) +
geom_histogram(mapping = aes(x = consumption))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = data, mapping = aes(x = consumption, colour = food_category)) +
geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Typical values
ggplot(data = data, mapping = aes(x = consumption)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Unusual values
ggplot(data) +
geom_histogram(mapping = aes(x = consumption), binwidth = 0.5) +
coord_cartesian(ylim = c(0, 50))

Missing Values
data <- data %>%
mutate(consumption = ifelse(consumption < 0 | consumption > 400, NA, consumption))
ggplot(data = data, mapping = aes(x = consumption, y = co2_emmission)) +
geom_point()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

Covariation
A categorical and continuous variable
ggplot(data = data, mapping = aes(x = co2_emmission)) +
geom_freqpoly(mapping = aes(colour = food_category), binwidth = 500)

ggplot(data = data, mapping = aes(x = co2_emmission, y = food_category)) +
geom_boxplot()

Two categorical variables
ggplot(data = data) +
geom_count(mapping = aes(x = food_category, y = co2_emmission))

ggplot(data = data) +
geom_count(mapping = aes(x = food_category, y = consumption))
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_sum()`).

ggplot(data = data) +
geom_count(mapping = aes(x = consumption, y = co2_emmission))
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_sum()`).

Two continous variables
ggplot(data = data) +
geom_point(mapping = aes(x = consumption, y = co2_emmission))
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data = data) +
geom_bin2d(mapping = aes(x = consumption, y = co2_emmission))
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin2d()`).

ggplot(data = data) +
geom_point(mapping = aes(x = food_category, y = consumption))
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data = data) +
geom_bin2d(mapping = aes(x = food_category, y = consumption))
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin2d()`).

Patterns and models
ggplot(data = data) +
geom_point(mapping = aes(x = consumption, y = co2_emmission))
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
