library(tidyverse)
library(here)
library(lubridate)
library(ggtext)
library(colorblindr)
knitr::opts_chunk$set(
fig.width = 8,
fig.asp = 0.618,
fig.retina = 3,
dpi = 300,
out.width = "90%"
)
episodes <- read_csv(here::here("data/episodes.csv"))
imdb <- read_csv(here::here("data/imdb.csv"))
Recreate the following plot (Source: https://www.independent.ie/entertainment/doctor-who-suffers-lowest-ratings-since-2005-revival-39028919.html).
series_viewership <- episodes |>
filter(type != "special") |>
mutate(
year = year(first_aired),
year = if_else(year %in% c(2012, 2013), "2012/13", as.character(year))
)|>
filter(year != "2021") |>
group_by(season_number, year) |>
summarise(mean_uk_viewers = round(mean(uk_viewers), 1), .groups = "drop")
ggplot(series_viewership,
aes(x = mean_uk_viewers, y = fct_rev(year))) +
geom_col(fill = "#0081BB", width = 0.75) +
geom_text(
aes(
label = paste0(mean_uk_viewers, "m")),
hjust = 0, fontface = "bold",
nudge_x = 0.1
) +
geom_text(
aes(
x = -0.1, label = year), hjust = 1
) +
labs(
title = "**Doctor Who TV ratings:** series average",
caption = "Recreated plot from Independent.ie, not all values match."
) +
coord_cartesian(clip = "off", xlim = c(-1, 8.5)) +
theme_void() +
theme(
panel.grid = element_blank(),
plot.title = element_markdown()
) +
geom_hline(yintercept = 12.5)
## Task 2: Improve
ggplot(series_viewership,
aes(x = year, y = mean_uk_viewers, group = 1)) +
geom_line(size = 1, color = "#0081BB") +
geom_point(color = "#0081BB") +
geom_point(color = "white", size = 0.5) +
geom_text(
aes(label = paste0(mean_uk_viewers, "m")),
nudge_x = -0.5
) +
theme_minimal() +
labs(
x = NULL, y = "UK viewers (in millions)",
title = "**Doctor Who TV ratings:** series average",
caption = "Recreated plot from Independent.ie, not all values match."
) +
theme(
plot.title = element_markdown(),
axis.text.y = element_blank()
) +
coord_cartesian(clip = "off")
Improve the plot above.
In the revived era there have been five doctors, see here for which doctors were in which seasons. Recreate the previous visualization, this time including doctor information.
doctors <- tribble(
~season_number, ~doctor_no, ~doctor_name,
1, 9, "Christopher Eccleston",
2, 10, "David Tennant",
3, 10, "David Tennant",
4, 10, "David Tennant",
5, 11, "Matt Smith",
6, 11, "Matt Smith",
7, 11, "Matt Smith",
8, 12, "Peter Capaldi",
9, 12, "Peter Capaldi",
10, 12, "Peter Capaldi",
11, 13, "Jodie Whittaker",
12, 13, "Jodie Whittaker",
)
series_viewership |>
left_join(doctors) |>
mutate(doctor_name = fct_reorder(doctor_name, season_number)) |>
ggplot(
aes(x = year,
y = mean_uk_viewers,
color = doctor_name,
shape = doctor_name,
group = 1)
) +
geom_line(color = "gray80") +
geom_point(size = 2) +
geom_text(
aes(label = paste0(mean_uk_viewers, "m")),
nudge_x = -0.5,
show.legend = F
) +
scale_color_OkabeIto(darken = 0.2) +
scale_shape_manual(values = c(8, 15:18)) +
theme_minimal() +
labs(
x = NULL, y = "UK viewers (in millions)",
color = "Doctor",
shape = "Doctor",
title = "**Doctor Who TV ratings:** series average",
caption = "Recreated plot from Independent.ie, not all values match."
) +
theme(
plot.title = element_markdown(),
axis.text.y = element_blank(),
legend.position = c(0.2, 0.35)
) +
coord_cartesian(clip = "off")
## Joining, by = "season_number"
Visualize the common words in episode descriptions.