Import Data
data <- read_excel("../00_data/MyData.xlsx")
Introduction
Questions
Variation
Visualizing distributions
ggplot(data = data) +
geom_bar(mapping = aes(x = Year))

ggplot(data = data) +
geom_histogram(mapping = aes(x = Kilometers_Driven))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = data, mapping = aes(x = Year, colour = Owner_Type)) +
geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Typical values
ggplot(data = data, mapping = aes(x = Kilometers_Driven)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Unusual values
ggplot(data) +
geom_histogram(mapping = aes(x = Year), binwidth = 0.5)

ggplot(data) +
geom_histogram(mapping = aes(x = Seats), binwidth = 0.5) +
coord_cartesian(ylim = c(0, 85))

Missing Values
data2 <- data %>%
mutate(Brand = ifelse(Price < 3 | Price > 20, NA, Price))
ggplot(data = data2, mapping = aes(x = Year, y = Price)) +
geom_point()

Covariation
A categorical and continuous variable
data %>%
ggplot(aes(x = Brand, y = Price)) +
geom_boxplot()

Two categorical variables
data %>%
count(Owner_Type, Brand) %>%
ggplot(aes(x = Owner_Type, y = Brand, fill = n)) +
geom_tile()

Two continous variables
Patterns and models