Import data

data <- read_excel("../00_data/my.Data.xlsx")
## New names:
## • `` -> `...1`

Introduction

Questions

Variation

library(ggplot2)
ggplot(data = data) +
  geom_bar(mapping = aes(x = genre)) +
  labs(
    title = "Count of Albums by Genre",
    x = "Genre",
    y = "Count"
  ) +
    theme_minimal()

library(ggplot2)
ggplot(data = data) +
  geom_histogram(mapping = aes(x = differential), bins = 20, fill = "blue", color = "black") +
  labs(
    title = "Histogram of Ranking Differential",
    x = "Differential",
    y = "Frequency"
  ) +
  theme_minimal()

ggplot(data = data, mapping = aes(x = differential, color = genre)) +
  geom_freqpoly(binwidth = 50) +
  labs(
    title = "Frequency Polygon of Differential by Genre",
    x = "Differential",
    y = "Frequency"
  ) +
  theme_minimal()

Typical values

ggplot(data = data, mapping = aes(x = release_year)) +
  geom_histogram(binwidth = 5, fill = "blue", color = "black") +
  labs(
    title = "Distribution of Release Year",
    x = "Release Year",
    y = "Frequency"
  ) +
  theme_minimal()

Unusual values

ggplot(data = data) +
  geom_point(mapping = aes(x = release_year, y = differential), color = "red", alpha = 0.6) +
  labs(
    title = "Scatter Plot Highlighting Unusual Values",
    x = "Release Year",
    y = "Differential"
  ) +
  theme_minimal()

Covariation

ggplot(data = data, mapping = aes(x = release_year, color = genre)) +
  geom_freqpoly(binwidth = 5) +
  labs(
    title = "Covariation Between Release Year and Genre",
    x = "Release Year",
    y = "Frequency",
    color = "Genre"
  ) +
  theme_minimal()

A categorical and continuous variable

library(ggplot2)
ggplot(data = data, mapping = aes(x = release_year, color = genre)) +
  geom_freqpoly(binwidth = 10) +
  labs(
    title = "Frequency Polygon of Albums by Release Year and Genre",
    x = "Release Year",
    y = "Frequency"
  ) +
  theme_minimal()

### Two categorical variables

ggplot(data = data) +
  geom_point(mapping = aes(x = release_year, y = rank_2020), color = "blue", alpha = 0.6) +
  labs(
    title = "Scatter Plot of Release Year vs Rank 2020",
    x = "Release Year",
    y = "Rank in 2020"
  ) +
  theme_minimal()

Two continous variables

ggplot(data = data) +
  geom_point(mapping = aes(x = release_year, y = rank_2020), color = "blue", alpha = 0.6) +
  labs(
    title = "Scatter Plot of Release Year vs Rank 2020",
    x = "Release Year",
    y = "Rank in 2020"
  ) +
  theme_minimal()

Patterns and models

ggplot(data = data) +
  geom_point(mapping = aes(x = release_year, y = differential), color = "blue", alpha = 0.6) +
  labs(
    title = "Scatter Plot of Release Year vs Differential",
    x = "Release Year",
    y = "Differential"
  ) +
  theme_minimal()