Doctors of Dr. Who

library(tidyverse)
library(here)
library(lubridate)
library(ggtext)
library(colorblindr)

knitr::opts_chunk$set(
  fig.width = 8,
  fig.asp = 0.618,
  fig.retina = 3,
  dpi = 300,
  out.width = "90%"
)

episodes <- read_csv(here::here("data/episodes.csv"))
imdb <- read_csv(here::here("data/imdb.csv"))

Task 1: Recreate

Recreate the following plot (Source: https://www.independent.ie/entertainment/doctor-who-suffers-lowest-ratings-since-2005-revival-39028919.html).

series_viewership <- episodes |>
  filter(type != "special") |>
  mutate(
    year = year(first_aired),
    year = if_else(year %in% c(2012, 2013), "2012/13", as.character(year))
    )|>
  filter(year != "2021") |>
group_by(season_number, year) |>
  summarise(mean_uk_viewers = round(mean(uk_viewers), 1), .groups = "drop")
  
ggplot(series_viewership,
       aes(x = mean_uk_viewers, y = fct_rev(year))) +
  geom_col(fill = "#0081BB", width = 0.75) +
  geom_text(
    aes(
      label = paste0(mean_uk_viewers, "m")), 
    hjust = 0, fontface = "bold",
    nudge_x = 0.1
    ) +
  geom_text(
    aes(
      x = -0.1, label = year), hjust = 1 
    )  +
  labs(
    title = "**Doctor Who TV ratings:** series average",
    caption = "Recreated plot from Independent.ie, not all values match."
  ) +
  coord_cartesian(clip = "off", xlim = c(-1, 8.5)) +
  theme_void() +
  theme(
    panel.grid = element_blank(),
    plot.title = element_markdown()
  ) + 
  geom_hline(yintercept = 12.5)

## Task 2: Improve

ggplot(series_viewership,
       aes(x = year, y = mean_uk_viewers, group = 1)) +
  geom_line(size = 1, color = "#0081BB") +
  geom_point(color = "#0081BB") +
  geom_point(color = "white", size = 0.5) +
  geom_text(
    aes(label = paste0(mean_uk_viewers, "m")), 
    nudge_x = -0.5
    ) +
  theme_minimal() +
  labs(
    x = NULL, y = "UK viewers (in millions)",
    title = "**Doctor Who TV ratings:** series average",
    caption = "Recreated plot from Independent.ie, not all values match."
  ) + 
  theme(
    plot.title = element_markdown(),
    axis.text.y = element_blank()
  ) +
  coord_cartesian(clip = "off")

Time series line plot of mean UK viewership for Dr. Who between 2005 and 2020. In 2005, an average viewership was 7.9 million and it decreased steadily to 5.4 million in 2020, except for bumps up to 8 million in 2008 and 2018. Particular sharp decrease observed between 2014 and 2015.

Improve the plot above.

Task 3: Doctors and seasons

In the revived era there have been five doctors, see here for which doctors were in which seasons. Recreate the previous visualization, this time including doctor information.

doctors <- tribble(
  ~season_number, ~doctor_no, ~doctor_name,
               1,          9, "Christopher Eccleston",
               2,         10, "David Tennant",
               3,         10, "David Tennant",
               4,         10, "David Tennant",
               5,         11, "Matt Smith",
               6,         11, "Matt Smith",
               7,         11, "Matt Smith",
               8,         12, "Peter Capaldi",
               9,         12, "Peter Capaldi",
              10,         12, "Peter Capaldi",
              11,         13, "Jodie Whittaker",
              12,         13, "Jodie Whittaker",
)

series_viewership |>
  left_join(doctors) |>
  mutate(doctor_name = fct_reorder(doctor_name, season_number)) |>
ggplot(
  aes(x = year, 
      y = mean_uk_viewers, 
      color = doctor_name,
      shape = doctor_name,
      group = 1)
  ) +
  geom_line(color = "gray80") +
  geom_point(size = 2) +
  geom_text(
    aes(label = paste0(mean_uk_viewers, "m")), 
    nudge_x = -0.5,
    show.legend = F
    ) +
  scale_color_OkabeIto(darken = 0.2) +
  scale_shape_manual(values = c(8, 15:18)) +
  theme_minimal() +
  labs(
    x = NULL, y = "UK viewers (in millions)",
    color = "Doctor",
    shape = "Doctor",
    title = "**Doctor Who TV ratings:** series average",
    caption = "Recreated plot from Independent.ie, not all values match."
  ) + 
  theme(
    plot.title = element_markdown(),
    axis.text.y = element_blank(),
    legend.position = c(0.2, 0.35)
  ) +
  coord_cartesian(clip = "off")

## Joining, by = "season_number"

Task 4: Episode descriptions

Visualize the common words in episode descriptions.

Doctors of Dr. Who

Coding out loud, Ep 1

Mine Çetinkaya-Rundel + Evan Dragich

11/30/2021

Task 1: Recreate

Task 3: Doctors and seasons

Task 4: Episode descriptions