Code
library(tidyverse)
library(ggplot2)
library(extrafont)
library(ggridges)library(tidyverse)
library(ggplot2)
library(extrafont)
library(ggridges)cleanbooks %>%
filter(genre != "Romance") %>%
ggplot() +
geom_density_ridges(aes(x = enjoyment,
y = genre,
color = genre, fill = genre), alpha = 0.5) +
labs(
title = "Enjoyment distribution per book genre",
x = "Enjoyment rating",
caption = "1-10 rating by me of how much I enjoyed books I've read in the past 2 years"
) +
scale_color_manual(values = c(
"Unrealistic Fiction" = "#BC1B68",
"Science Fiction" = "#C44579",
"Realistic Fiction" = "#CB6E8A",
"Fantasy" = "#D3989B"
), aesthetics = "color", guide = "none") +
scale_fill_manual(values = c(
"Unrealistic Fiction" = "#440021",
"Science Fiction" = "#824050",
"Realistic Fiction" = "#C17F7F",
"Fantasy" = "#FFBFAE"
), aesthetics = "fill", guide = "none")+
scale_alpha(guide = "none") +
theme(
axis.line = element_line(linewidth = .25, color = "#440021"),
panel.background = element_rect(fill = "#FFF1ED"),
panel.grid.y = element_line(color = "#F5D2D2"),
panel.grid.x = element_line(color = "#8FC6C9"),
axis.title.y = element_blank(),
plot.background = element_rect(fill = "#FEF5F2"
))Realistic and unrealistic fiction are the two genres with the least rows (romance not included):
table(cleanbooks$genre)
Fantasy Realistic Fiction Romance Science Fiction
27 12 1 29
Unrealistic Fiction
10
And to explain the irregularity of the unrealistic fiction distribution, here’s a table of the authors:
unrealistic <-cleanbooks %>% filter(genre == "Unrealistic Fiction")
table(unrealistic$author)
China Miéville Chuck Palahniuk Helen Oyeyemi Mikhail Bulgakov
1 2 3 1
Rebecca Stead Sarah Gailey Stuart Turton
1 1 1
My lowest rated book in the entire dataset is in this small subset (and two top tens)
cleanbooks %>%
filter(numgoodreads > 200000) %>%
ggplot(aes(y = shorttitle)) +
geom_segment(aes(x = goodreads,
y = reorder(shorttitle, numgoodreads),
xend = amazon,
yend = shorttitle), color = "#A79E9E") +
geom_point(aes(x = goodreads, color = "Goodreads"), alpha = 0.6, size = 3) +
geom_point(aes(x = amazon,
color = "Amazon"), alpha = 0.6, size = 3) +
geom_point(aes(x = personalrate, color = "Personal rating"), alpha = 0.6, size = 3) +
scale_color_manual(values = c(
"Goodreads" = "#33a302",
"Amazon" = "#f3a3a3",
"Personal rating" = "#87BFFF"
)) +
labs(
title = "Book ratings, Amazon vs Goodreads (and myself)",
x = "Rating (0-5)",
y = "Book"
# , # isn't it so lovely that you can just comment lines out like this in R? python would fucking never
#caption = "only using books I have read and that are highly popular on Goodreads"
) +
theme(
panel.grid = element_line(color = "#ECE8E8"),
panel.background = element_rect(color = "#f3a3a3", linewidth = 0.55, fill = "#fff"),
axis.line = element_line(color = "#33a302"),
axis.text.y = element_text(hjust = -0.0),
axis.title.y = element_blank(),
panel.grid.minor.y = element_line(color = "#D7DADA"),
panel.grid.major.y = element_line(color = "#D7DADA"),
panel.grid.minor.x = element_line(color = "#F9FAFA"), # make much paler than y
panel.grid.major.x = element_line(color = "#F9FAFA"), # make much paler than y
# why do those go on top of the y lines no matter what I do!!!!
legend.position = "bottom",
legend.background = element_blank(),
legend.title = element_blank()
)What’s interesting is I keep the goodreads statistics updated regularly and the last published version had these on the plot (aka the books with a number of ratings higher than 200,000 a few months ago were this):
The Song of Achilles
Good Omens
The Thursday Murder Club
One for the Money
Howl’s Moving Castle
The Colour of Magik
The Master and Margarita
All Systems Red
Uprooted
Parable of the Sower
Guards Guards
Giovanni’s Room
And none of the books now displayed are new additions to my list. The number of ratings changed. I should set up some script that scrapes everything once a month or so and plots the current rating and number of ratings against the previous ones
cleanbooks <- cleanbooks |>
mutate(readdate2 = case_when(
!is.na(readmonth) & !is.na(readyear) ~ paste(readmonth, readyear),
TRUE ~ NA_character_
))
cleanbooks <- cleanbooks |>
mutate(readmy = if_else(
!is.na(readmonth) & !is.na(readyear),
paste(readmonth, readyear),
NA_character_
))
cleanbooks <- cleanbooks |>
mutate(readdate2 = my(readmy),
roundrate = round(personalrate))
readbooksnumber <- cleanbooks |>
group_by(readdate2) |>
summarise(bookspermonth = n())
obscurebooks <- cleanbooks |>
filter(readyear != "2021") |>
group_by(readmy, readdate2) |>
summarise(meanpopularity = mean(numgoodreads))
obscurebooksX <- obscurebooks |> left_join(readbooksnumber, by = "readdate2") %>%
filter(!is.na(readmy))
ggplot(obscurebooksX, aes(x = readdate2, y = meanpopularity, color = bookspermonth)) +
geom_line(size = 0.75) +
scale_color_gradient(low = "#A4C6B8", high = "#651461", aesthetics = "color") +
scale_x_date(breaks = "3 month", date_labels = "%m/%y") +
theme_light() +
labs(title = "Popularity of books I've read",
x = "Date",
y = "Mean number of Goodreads reviews",
color = "Books read") +
theme(axis.line = element_line(linewidth = 0.6, colour = "#7F0078", arrow = arrow(angle = 30, length = unit(0.1, "inches"), ends = "last", type = "open")),
plot.background = element_rect(fill = "#DFDEEE", colour = "#400C3E", linewidth = 1.2),
plot.margin = margin(0.8, 0.8, 0.8, 0.8, "cm"),
legend.background = element_rect(fill = "#C1D2DE", linewidth = 0.3),
legend.ticks = element_line(color = "transparent", size = unit(1, "inches")),
legend.ticks.length = unit(3, "inches")
)goodreadbooks <- cleanbooks |>
filter(readyear != "2021") |>
group_by(readmy, readdate2) |>
summarise(meanapproval = mean(goodreads))
goodreadbooksX <- goodreadbooks |> left_join(readbooksnumber, by = "readdate2")
ggplot(goodreadbooksX, aes(x = readdate2, y = meanapproval, color = bookspermonth)) +
geom_line() +
scale_color_gradient(low = "#FCA0F1", high = "#780016", aesthetics = "color") +
scale_x_date(breaks = "3 month", date_labels = "%m/%y") +
theme_light() +
labs(title = "Goodreads approval of my monthly books",
x = "Date",
y = "Average Goodreads review ratings",
color = "Books read") +
theme(axis.line = element_line(linewidth = 0.6, colour = "#790016", arrow = arrow(angle = 30, length = unit(0.1, "inches"), ends = "last", type = "open")),
plot.background = element_rect(fill = "#FFDCF3", colour = "#6C0014", linewidth = 1),
plot.margin = margin(0.8, 0.8, 0.8, 0.8, "cm"),
legend.background = element_rect(fill = "#FFD4F0", linewidth = 0.3),
legend.ticks = element_line(color = "transparent", size = unit(1, "inches")),
legend.ticks.length = unit(3, "inches"),
legend.position = c(.65, .23)
)meanenjoyment <- cleanbooks |>
group_by(readmonth) |>
filter(readyear != "2021") |>
mutate(meanenjoy = mean(enjoyment),
meanrate = mean(personalrate))
meanenjoymentX <- meanenjoyment |>
left_join(readbooksnumber, by = "readdate2")
ggplot(meanenjoymentX, aes(x = readdate2, y = meanenjoy, color = bookspermonth)) +
geom_line(size = 0.5) +
labs(title = "How much I enjoy my books, on average, per month",
x = "Month",
y = "Average enjoyment rating",
color = "Books read") +
scale_color_gradient(high = "#440021", low = "#FFBFAE", aesthetics = "color", guide = "none") +
scale_x_date(date_breaks = "3 month", date_labels = "%m/%y") +
theme_light() +
theme(
axis.line = element_line(linewidth = 0.4, colour = "#440021", arrow = arrow(angle = 30, length = unit(0.2, "inches"), ends = "last", type = "open")),
plot.background = element_rect(fill = "#fce2db", colour = "black", linewidth = 0.1),
plot.margin = margin(0.8, 0.8, 0.8, 0.8, "cm")#,
# legend.background = element_rect(fill = "#fce2db", linewidth = 0.3),
# legend.ticks = element_line(color = "transparent", size = unit(1, "inches")),
# legend.ticks.length = unit(3, "inches"),
# legend.position = c(.13, .3)
)filteredbooks <- cleanbooks |>
filter(readyear != "2021") |>
group_by(readdate2) |>
summarise(bookspermonth = n(),
sumpages = sum(pages),
meanpages = mean(pages),
meanapproval = mean(goodreads),
meanpopularity = mean(numgoodreads),
authorcount = n_distinct(author),
meanage = round(mean(authorage), 0))
ggplot(filteredbooks) +
geom_line(aes(x = readdate2, y = bookspermonth, alpha = 0.5, color = "Books/month")) +
geom_line(aes(x = readdate2, y = authorcount, alpha = 0.5, color = "Authors/month")) +
labs(title = "Books vs Authors read per month",
x = "Date",
y = "Books/authors per month",
color = "Guide") +
scale_alpha(guide = "none") +
theme_light() +
theme(
plot.title = element_text(hjust = .5, family = "Franklin Gothic Medium"),
text = element_text(family = "Franklin Gothic Medium")
)