Import data
data <- read.csv("../00_data/myData.csv")
Introduction
Questions
Variation
Visualizing distributions
data %>%
ggplot(aes(x = birth_country)) +
geom_bar(fill = "orange")

data %>%
ggplot(mapping = aes(x = birth_year)) +
geom_histogram(binwidth = 0.5, fill = "red")

Typical values
data %>%
# Filter out people born after 1950
filter(birth_year > 1950) %>%
#Plot
ggplot(aes(x = birth_year)) +
geom_histogram(fill= "blue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Unusual values
Missing Values
Covariation
A categorical and continuous variable
data %>%
ggplot(aes(x = birth_month, y = birth_country)) +
geom_boxplot(fill = "green")

Two categorical variables
library(hexbin)
## Warning: package 'hexbin' was built under R version 4.4.3
data %>%
ggplot(aes(x = birth_state_province, y = birth_country)) +
geom_hex()

data %>%
ggplot(aes(x = birth_state_province, y = birth_country)) +
geom_boxplot(fill = "orange")

Two continous variables
library(hexbin)
data %>%
ggplot(aes(x = birth_year, y = birth_month)) +
geom_hex()

data %>%
ggplot(aes(x = birth_year, y = birth_month)) +
geom_boxplot()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?

Patterns and models
library(modelr)
## Warning: package 'modelr' was built under R version 4.4.3
mod <- lm(log(birth_year) ~ log(birth_month), data = data)
data4 <- data %>%
modelr::add_residuals(mod) %>%
mutate(resid = exp(resid))
data4 %>%
ggplot(aes(birth_year, resid)) +
geom_point()

data4 %>%
ggplot(aes(birth_year, resid)) +
geom_boxplot(fill= "navy")
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
