gallery

Author

gary

Code

library(tidyverse)
library(ggplot2)
library(extrafont)
library(ggridges)

Code

cleanbooks %>% 
  filter(genre != "Romance") %>%
  ggplot() +
  geom_density_ridges(aes(x = enjoyment, 
                          y = genre,
                          color = genre, fill = genre), alpha = 0.5) + 
  labs(
    title = "Enjoyment distribution per book genre",
    x = "Enjoyment rating",
    caption = "1-10 rating by me of how much I enjoyed books I've read in the past 2 years"
  )  +
  scale_color_manual(values = c(
    "Unrealistic Fiction" = "#BC1B68",
    "Science Fiction" = "#C44579",
    "Realistic Fiction" = "#CB6E8A",
    "Fantasy" = "#D3989B"
  ), aesthetics = "color", guide = "none") +
  scale_fill_manual(values = c(
    "Unrealistic Fiction" = "#440021",
    "Science Fiction" = "#824050",
    "Realistic Fiction" = "#C17F7F",
    "Fantasy" = "#FFBFAE"
  ), aesthetics = "fill", guide = "none")+
  scale_alpha(guide = "none") +
  theme(
    axis.line = element_line(linewidth = .25, color = "#440021"),
    panel.background = element_rect(fill = "#FFF1ED"),
    panel.grid.y = element_line(color = "#F5D2D2"),
    panel.grid.x = element_line(color = "#8FC6C9"),
    axis.title.y = element_blank(),
    plot.background = element_rect(fill = "#FEF5F2"
  ))

Realistic and unrealistic fiction are the two genres with the least rows (romance not included):

Code

table(cleanbooks$genre)


            Fantasy   Realistic Fiction             Romance     Science Fiction 
                 27                  12                   1                  29 
Unrealistic Fiction 
                 10

And to explain the irregularity of the unrealistic fiction distribution, here’s a table of the authors:

Code

unrealistic <-cleanbooks %>%  filter(genre == "Unrealistic Fiction")
table(unrealistic$author)


  China Miéville  Chuck Palahniuk    Helen Oyeyemi Mikhail Bulgakov 
               1                2                3                1 
   Rebecca Stead     Sarah Gailey    Stuart Turton 
               1                1                1

My lowest rated book in the entire dataset is in this small subset (and two top tens)

Code

cleanbooks %>% 
  filter(numgoodreads > 200000) %>% 
  ggplot(aes(y = shorttitle)) +
  geom_segment(aes(x = goodreads,
                   y = reorder(shorttitle, numgoodreads),
                   xend = amazon,
                   yend = shorttitle), color = "#A79E9E") +
  geom_point(aes(x = goodreads, color = "Goodreads"), alpha = 0.6, size = 3) +
  geom_point(aes(x = amazon,
                 color = "Amazon"), alpha = 0.6, size = 3) +
  geom_point(aes(x = personalrate, color = "Personal rating"), alpha = 0.6, size = 3) +
  scale_color_manual(values = c(
    "Goodreads" = "#33a302",
    "Amazon" = "#f3a3a3",
    "Personal rating" = "#87BFFF"
  )) +
  labs(
    title = "Book ratings, Amazon vs Goodreads (and myself)",
    x = "Rating (0-5)",
    y = "Book"
   # , # isn't it so lovely that you can just comment lines out like this in R? python would fucking never
    #caption = "only using books I have read and that are highly popular on Goodreads"
  ) +
  theme(
    panel.grid = element_line(color = "#ECE8E8"),
    panel.background = element_rect(color = "#f3a3a3", linewidth = 0.55, fill = "#fff"),
    axis.line = element_line(color = "#33a302"),
    axis.text.y = element_text(hjust = -0.0),
    axis.title.y = element_blank(),
    panel.grid.minor.y = element_line(color = "#D7DADA"),
    panel.grid.major.y = element_line(color = "#D7DADA"),
    panel.grid.minor.x = element_line(color = "#F9FAFA"), # make much paler than y
    panel.grid.major.x = element_line(color = "#F9FAFA"), # make much paler than y
    # why do those go on top of the y lines no matter what I do!!!!
    legend.position = "bottom",
    legend.background = element_blank(),
    legend.title = element_blank()
  )

What’s interesting is I keep the goodreads statistics updated regularly and the last published version had these on the plot (aka the books with a number of ratings higher than 200,000 a few months ago were this):

The Song of Achilles
Good Omens
The Thursday Murder Club
One for the Money
Howl’s Moving Castle
The Colour of Magik
The Master and Margarita
All Systems Red
Uprooted
Parable of the Sower
Guards Guards
Giovanni’s Room

And none of the books now displayed are new additions to my list. The number of ratings changed. I should set up some script that scrapes everything once a month or so and plots the current rating and number of ratings against the previous ones

Code

cleanbooks <- cleanbooks |>
  mutate(readdate2 = case_when(
    !is.na(readmonth) & !is.na(readyear) ~ paste(readmonth, readyear),
           TRUE ~ NA_character_
    ))
cleanbooks <- cleanbooks |>
  mutate(readmy = if_else(
    !is.na(readmonth) & !is.na(readyear),
    paste(readmonth, readyear),
      NA_character_
  ))

cleanbooks <- cleanbooks |>
  mutate(readdate2 = my(readmy),
  roundrate = round(personalrate))




readbooksnumber <- cleanbooks |>
  group_by(readdate2) |>
  summarise(bookspermonth = n())

obscurebooks <- cleanbooks |>
  filter(readyear != "2021") |>
  group_by(readmy, readdate2) |>
  summarise(meanpopularity = mean(numgoodreads))

obscurebooksX <- obscurebooks |> left_join(readbooksnumber, by = "readdate2") %>% 
  filter(!is.na(readmy))

ggplot(obscurebooksX, aes(x = readdate2, y = meanpopularity, color = bookspermonth)) +
  geom_line(size = 0.75) +
  scale_color_gradient(low = "#A4C6B8", high = "#651461", aesthetics = "color") +
  scale_x_date(breaks = "3 month", date_labels = "%m/%y") +
  theme_light() +
  labs(title = "Popularity of books I've read",
       x = "Date", 
       y = "Mean number of Goodreads reviews",
       color = "Books read") +
  theme(axis.line = element_line(linewidth = 0.6, colour = "#7F0078", arrow = arrow(angle = 30, length = unit(0.1, "inches"), ends = "last", type = "open")),
    plot.background = element_rect(fill = "#DFDEEE", colour = "#400C3E", linewidth = 1.2),
    plot.margin = margin(0.8, 0.8, 0.8, 0.8, "cm"),
    legend.background = element_rect(fill = "#C1D2DE", linewidth = 0.3),
    legend.ticks = element_line(color = "transparent", size = unit(1, "inches")),
    legend.ticks.length = unit(3, "inches")
  )

Code

goodreadbooks <- cleanbooks |>
  filter(readyear != "2021") |>
  group_by(readmy, readdate2) |>
  summarise(meanapproval = mean(goodreads))

goodreadbooksX <- goodreadbooks |> left_join(readbooksnumber, by = "readdate2")

ggplot(goodreadbooksX, aes(x = readdate2, y = meanapproval, color = bookspermonth)) +
  geom_line() +
  scale_color_gradient(low = "#FCA0F1", high = "#780016", aesthetics = "color") +
  scale_x_date(breaks = "3 month", date_labels = "%m/%y") +
  theme_light() +
  labs(title = "Goodreads approval of my monthly books",
       x = "Date", 
       y = "Average Goodreads review ratings",
       color = "Books read") +
  theme(axis.line = element_line(linewidth = 0.6, colour = "#790016", arrow = arrow(angle = 30, length = unit(0.1, "inches"), ends = "last", type = "open")),
    plot.background = element_rect(fill = "#FFDCF3", colour = "#6C0014", linewidth = 1),
    plot.margin = margin(0.8, 0.8, 0.8, 0.8, "cm"),
    legend.background = element_rect(fill = "#FFD4F0", linewidth = 0.3),
    legend.ticks = element_line(color = "transparent", size = unit(1, "inches")),
    legend.ticks.length = unit(3, "inches"),
    legend.position = c(.65, .23)
  )

Code

meanenjoyment <- cleanbooks |>
  group_by(readmonth) |>
  filter(readyear != "2021") |>
  mutate(meanenjoy = mean(enjoyment),
         meanrate = mean(personalrate))


meanenjoymentX <- meanenjoyment |>
  left_join(readbooksnumber, by = "readdate2")



ggplot(meanenjoymentX, aes(x = readdate2, y = meanenjoy, color = bookspermonth)) +
  geom_line(size = 0.5) +
  labs(title = "How much I enjoy my books, on average, per month",
       x = "Month",
       y = "Average enjoyment rating",
       color = "Books read") +
  scale_color_gradient(high = "#440021", low = "#FFBFAE", aesthetics = "color", guide = "none") +
  scale_x_date(date_breaks = "3 month", date_labels = "%m/%y") +
  theme_light() +
  theme(
    axis.line = element_line(linewidth = 0.4, colour = "#440021", arrow = arrow(angle = 30, length = unit(0.2, "inches"), ends = "last", type = "open")),
    plot.background = element_rect(fill = "#fce2db", colour = "black", linewidth = 0.1),
    plot.margin = margin(0.8, 0.8, 0.8, 0.8, "cm")#,
   # legend.background = element_rect(fill = "#fce2db", linewidth = 0.3),
   # legend.ticks = element_line(color = "transparent", size = unit(1, "inches")),
  #  legend.ticks.length = unit(3, "inches"),
   # legend.position = c(.13, .3)
  )

Code

filteredbooks <- cleanbooks |>
  filter(readyear != "2021") |>
  group_by(readdate2) |>
  summarise(bookspermonth = n(),
    sumpages = sum(pages),
            meanpages = mean(pages),
            meanapproval = mean(goodreads),
            meanpopularity = mean(numgoodreads),
            authorcount = n_distinct(author),
    meanage = round(mean(authorage), 0))



ggplot(filteredbooks) +
  geom_line(aes(x = readdate2, y = bookspermonth, alpha = 0.5, color = "Books/month")) +
  geom_line(aes(x = readdate2, y = authorcount, alpha = 0.5, color = "Authors/month")) +
  labs(title = "Books vs Authors read per month",
       x = "Date",
       y = "Books/authors per month",
       color = "Guide") +
  scale_alpha(guide = "none") +
  theme_light() +
  theme(
    plot.title = element_text(hjust = .5, family = "Franklin Gothic Medium"),
    text = element_text(family = "Franklin Gothic Medium")
  )