Author Likeability

Key_Influencers

Moral Outlook Graphs

ggplot(ASL_SS, aes(x = reorder(`Moral_Outlook`, -table(`Moral_Outlook`)[`Moral_Outlook`]))) +
  geom_bar(fill = "steelblue") +
  coord_flip() +  # horizontal bars for readability
  labs(title = "Distribution of Moral Outlooks",
       x = "Moral Outlook",
       y = "Count") +
  theme_minimal()

ggplot(ASL_Novelette, aes(x = reorder(`Moral_Outlook`, -table(`Moral_Outlook`)[`Moral_Outlook`]))) +
  geom_bar(fill = "steelblue") +
  coord_flip() +  # horizontal bars for readability
  labs(title = "Distribution of Moral Outlooks",
       x = "Moral Outlook",
       y = "Count") +
  theme_minimal()

ggplot(ASL_Novellas, aes(x = reorder(`Moral_Outlook`, -table(`Moral_Outlook`)[`Moral_Outlook`]))) +
  geom_bar(fill = "steelblue") +
  coord_flip() +  # horizontal bars for readability
  labs(title = "Distribution of Moral Outlooks",
       x = "Moral Outlook",
       y = "Count") +
  theme_minimal()

ggplot(ASL_Novels, aes(x = reorder(`Moral_Outlook`, -table(`Moral_Outlook`)[`Moral_Outlook`]))) +
  geom_bar(fill = "steelblue") +
  coord_flip() +  # horizontal bars for readability
  labs(title = "Distribution of Moral Outlooks",
       x = "Moral Outlook",
       y = "Count") +
  theme_minimal()

ggplot(ASL_PF, aes(x = reorder(`Moral_Outlook`, -table(`Moral_Outlook`)[`Moral_Outlook`]))) +
  geom_bar(fill = "steelblue") +
  coord_flip() +  # horizontal bars for readability
  labs(title = "Distribution of Moral Outlooks",
       x = "Moral Outlook",
       y = "Count") +
  theme_minimal()

ggplot(ASL_Poetry_unique, aes(x = reorder(`Moral_Outlook`, -table(`Moral_Outlook`)[`Moral_Outlook`]))) +
  geom_bar(fill = "darkgreen") +
  coord_flip() +  # horizontal bars for readability
  labs(title = "Distribution of Moral Outlooks",
       x = "Moral Outlook",
       y = "Count") +
  theme_minimal()

Pie Chart of nationality with stars

# Step 1: Get top 5 nationalities
top5_nationalities <- ASL_unique %>%
  filter(!is.na(Nationality)) %>%
  count(Nationality, sort = TRUE) %>%
  top_n(5, n) %>%
  pull(Nationality)

# Step 2: Summarize data
nationality_summary <- ASL_unique %>%
  filter(Nationality %in% top5_nationalities) %>%
  group_by(Nationality) %>%
  summarise(
    Count = n(),
    Avg_Stars = round(mean(Stars, na.rm = TRUE), 2)
  ) %>%
  mutate(
    Label = paste0(Nationality, " (", Count, ", ", Avg_Stars, "★)")
  )

# Step 3: Pie chart with legend-only labels
ggplot(nationality_summary, aes(x = "", y = Count, fill = Label)) +
  geom_col(width = 1, color = "white") +
  coord_polar("y") +
  theme_void() +
  labs(title = "Top 5 Nationalities (Count & Avg Stars)") +
  guides(fill = guide_legend(title = "Nationality (Count, Avg Stars)")) +
  theme(
    legend.title = element_text(size = 11, face = "bold"),
    legend.text = element_text(size = 10)
  )

Religion Visualizations

# Summarize top 10 religions by Read Time
religion_read_time <- ASL_unique %>%
  group_by(Religion) %>%
  summarise(Total_Read_Time = sum(Read_Time, na.rm = TRUE)) %>%
  arrange(desc(Total_Read_Time)) %>%
  slice_head(n = 10)

# Create label for legend
religion_read_time <- religion_read_time %>%
  mutate(Religion_Label = paste0(Religion, " (", round(Total_Read_Time, 1), ")"))

# Plot without geom_text
ggplot(religion_read_time, aes(x = reorder(Religion, Total_Read_Time), 
                               y = Total_Read_Time,
                               fill = Religion_Label)) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(
    values = setNames(brewer.pal(n = 10, "Set3"), religion_read_time$Religion_Label),
    name = "Religion (Total Read Time)"
  ) +
  labs(
    title = "Total Read Time by Religion",
    x = "Religion",
    y = "Total Read Time (hours)"
  ) +
  theme_minimal() +
  guides(fill = guide_legend(reverse = TRUE)) +
  theme(
    legend.position = "right",
    legend.title = element_text(size = 10),
    legend.text = element_text(size = 9)
  )

Graph of Genres

ASL_unique %>%
  count(Genre, sort = TRUE) %>%
  ggplot(aes(x = reorder(Genre, n), y = n)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(
    title = "Genre Distribution",
    x = "Genre",
    y = "Count"
  ) +
  theme_minimal()

ASL_unique %>%
  group_by(Genre) %>%
  summarise(Total_Pages = sum(Pages, na.rm = TRUE)) %>%
  arrange(desc(Total_Pages)) %>%
  ggplot(aes(x = reorder(Genre, Total_Pages), y = Total_Pages)) +
  geom_col(fill = "darkgreen") +
  coord_flip() +
  labs(
    title = "Total Pages by Genre",
    x = "Genre",
    y = "Pages"
  ) +
  theme_minimal()

## In future I will not use count as much.

# Get top 10 genres by count
top_genres <- ASL_unique %>%
  count(Genre, sort = TRUE) %>%
  slice_head(n = 10) %>%
  pull(Genre)

# Filter dataset to those genres
ASL_top_genres <- ASL_unique %>%
  filter(Genre %in% top_genres)

# Calculate total pages read per author per genre
author_pages <- ASL_top_genres %>%
  group_by(Genre, Author) %>%
  summarise(TotalPages = sum(Pages, na.rm = TRUE), .groups = "drop") %>%
  group_by(Genre) %>%
  slice_max(order_by = TotalPages, n = 2) %>%  # Top 5 authors per genre by pages read
  ungroup()

# Reorder Author factor by TotalPages within each Genre for proper legend order
author_pages <- author_pages %>%
  group_by(Genre) %>%
  mutate(Author = fct_reorder(Author, TotalPages)) %>%
  ungroup()

# Plot bar chart
ggplot(author_pages, aes(x = reorder(Genre, -TotalPages), y = TotalPages, fill = Author)) +
  geom_col(position = position_dodge()) +
  labs(
    title = "Top 5 Authors per Top 10 Genres by Pages Read",
    x = "Genre",
    y = "Total Pages Read",
    fill = "Author"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "right"
  )

% of Fictional

fictional_percent <- ASL_unique %>%
  summarise(Percentage_Fictional = round(100 * mean(Fictional == 1, na.rm = TRUE), 1))

print(fictional_percent)

## # A tibble: 1 × 1
##   Percentage_Fictional
##                  <dbl>
## 1                 83.1

ASL_unique <- ASL_unique %>%
  mutate(
    Length_Category = case_when(
      Novel == 1 ~ "Novel",
      Novella == 1 ~ "Novella",
      Novelette == 1 ~ "Novelette",
      Short_Story == 1 ~ "Short Story",
      Poetry == 1 ~ "Poetry",
      TRUE ~ "Other"
    )
  )

fiction_by_length <- ASL_unique %>%
  group_by(Length_Category, Fictional) %>%
  summarise(Count = n(), .groups = "drop") %>%
  group_by(Length_Category) %>%
  mutate(
    Total = sum(Count),
    Percent = round(100 * Count / Total, 1)
  )

ggplot(fiction_by_length, aes(x = reorder(Length_Category, -Total), y = Percent, fill = as.factor(Fictional))) +
  geom_col(position = "dodge") +
  scale_fill_manual(values = c("0" = "gray60", "1" = "darkorange"), labels = c("Nonfiction", "Fiction")) +
  labs(
    title = "Fiction vs. Nonfiction by Length Category",
    x = "Length Category",
    y = "Percentage",
    fill = "Category"
  ) +
  theme_minimal()

## A flat percentage in insufficient, because it fails to consider size of book.

Philosophical View

# First, create a unified 'Length' category
ASL_long <- ASL_unique %>%
  mutate(Length = case_when(
    Short_Story == 1 ~ "Short Story",
    Novelette == 1 ~ "Novelette",
    Novella == 1 ~ "Novella",
    Novel == 1 ~ "Novel",
    Poetry == 1 ~ "Poetry",
    TRUE ~ "Other"
  ))

# Then, group and count by Length and Philosophical View
length_philosophy <- ASL_long %>%
  group_by(Length, Philosophical_View) %>%
  summarise(Count = n(), .groups = "drop")

novel_data <- ASL_long %>%
  filter(Length == "Novel") %>%
  group_by(Philosophical_View) %>%
  summarise(Count = n(), .groups = "drop")

ggplot(novel_data, aes(x = reorder(Philosophical_View, -Count), y = Count, fill = Philosophical_View)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  labs(
    title = "Philosophical Views in Novels",
    x = "Philosophical View",
    y = "Count"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

## Count is infufficient again.

novel_time <- ASL_long %>%
  filter(Length == "Novel") %>%
  group_by(Philosophical_View) %>%
  summarise(Time_Read = sum(Read_Time, na.rm = TRUE), .groups = "drop")

ggplot(novel_time, aes(x = reorder(Philosophical_View, -Time_Read), y = Time_Read, fill = Philosophical_View)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  labs(
    title = "Read Time by Philosophical View (Novels Only)",
    x = "Philosophical View",
    y = "Total Read Time"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Create a unified Length column
ASL_long <- ASL_unique %>%
  mutate(Length = case_when(
    Short_Story == 1 ~ "Short Story",
    Novelette == 1 ~ "Novelette",
    Novella == 1 ~ "Novella",
    Novel == 1 ~ "Novel",
    Poetry == 1 ~ "Poetry",
    TRUE ~ "Other"
  ))

# List of unique lengths to loop over
lengths <- unique(ASL_long$Length)

# Loop through each length and plot
for (len in lengths) {
  plot_data <- ASL_long %>%
    filter(Length == len) %>%
    group_by(Philosophical_View) %>%
    summarise(Time_Read = sum(Read_Time, na.rm = TRUE), .groups = "drop") %>%
    filter(Time_Read > 0)

  p <- ggplot(plot_data, aes(x = reorder(Philosophical_View, -Time_Read), y = Time_Read, fill = Philosophical_View)) +
    geom_bar(stat = "identity") +
    theme_minimal() +
    labs(
      title = paste("Read Time by Philosophical View (", len, "s)", sep = ""),
      x = "Philosophical View",
      y = "Total Read Time"
    ) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")

  print(p)
}

Personal Significance

# Get top 7 Personal_Significance by total Read_Time
top_significance <- ASL %>%
  group_by(Personal_Significance) %>%
  summarise(TotalReadTime = sum(Read_Time, na.rm = TRUE)) %>%
  arrange(desc(TotalReadTime)) %>%
  slice_head(n = 7) %>%
  pull(Personal_Significance)

# Filter ASL for only these top Personal_Significance categories
filtered_ASL <- ASL %>%
  filter(Personal_Significance %in% top_significance)

# Order Personal_Significance factor by total Read_Time (for x-axis order)
sig_order <- filtered_ASL %>%
  group_by(Personal_Significance) %>%
  summarise(TotalReadTime = sum(Read_Time, na.rm = TRUE)) %>%
  arrange(desc(TotalReadTime)) %>%
  pull(Personal_Significance)

filtered_ASL$Personal_Significance <- factor(filtered_ASL$Personal_Significance, levels = sig_order)

# Plot: x = Personal_Significance, y = Read_Time, fill = Personal_Significance (for legend)
ggplot(filtered_ASL, aes(x = Personal_Significance, y = Read_Time, fill = Personal_Significance)) +
  geom_boxplot(outlier.shape = 21, outlier.alpha = 0.4) +
  coord_flip() +
  labs(
    title = "Time Spent Reading by Personal Significance (Top 7 Categories)",
    x = "Personal Significance",
    y = "Read Time",
    fill = "Personal Significance"
  ) +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Moral Outlook (simplified)

table(ASL$Moral_Outlook)

## 
##          Absurdist           Admiring   Aesthetic Virtue       Affectionate 
##                  3                  1                  1                  2 
##          Ambiguity          Ambiguous         Carpe Diem         Cautionary 
##                  1                 25                  1                  3 
##   Christian Virtue      Collaborative    Creative Genius Creative Integrity 
##                  1                  1                  1                  1 
##           Critical            Cynical               Dark         Devotional 
##                  2                  7                  3                  6 
##        Educational       Entertaining         Fatalistic               Fear 
##                  5                  5                 10                  1 
##          Grotesque           Haunting            Healing             Heroic 
##                  8                  1                  1                  2 
##            Hopeful         Humanistic         Idealistic               Just 
##                  8                  2                 16                 65 
##        Melancholic              Moral         Moralistic             Morbid 
##                 53                  4                 41                  1 
##          Mortality           Mournful           Mystical         Nihilistic 
##                  4                  6                  1                  4 
##         Optimistic            Playful          Practical          Pragmatic 
##                  2                  1                  1                  8 
##           Rational        Rationalist            Realist         Redemptive 
##                  1                 11                  3                 11 
##         Reflective          Reformist           Reverent      Revolutionary 
##                  6                  1                  1                  1 
##           Romantic          Satirical           Sinister          Spiritual 
##                  4                  2                  2                  3 
##             Subtle             Tragic       Transcendent         Unsettling 
##                 12                  2                  1                  2 
##          Uplifting        Utilitarian        Warmhearted 
##                  3                  3                  1

ASL %>%
  count(Moral_Outlook, sort = TRUE)

## # A tibble: 59 × 2
##    Moral_Outlook     n
##    <chr>         <int>
##  1 Just             65
##  2 Melancholic      53
##  3 Moralistic       41
##  4 Ambiguous        25
##  5 Idealistic       16
##  6 Subtle           12
##  7 Rationalist      11
##  8 Redemptive       11
##  9 Fatalistic       10
## 10 Grotesque         8
## # ℹ 49 more rows

ASL_Moral_Outlook <- ASL %>%
  mutate(Moral_Outlook_Simplified = case_when(
    Moral_Outlook %in% c("Melancholic", "Mournful", "Tragic", "Morbid", "Mortality", "Grotesque", "Nihilistic", "Fatalistic", "Haunting", "Fear", "Unsettling") ~ "Melancholic",
    Moral_Outlook %in% c("Moralistic", "Just", "Devotional", "Christian Virtue", "Moral", "Rationalist", "Utilitarian", "Reverent", "Reformist", "Critical", "Cautionary") ~ "Moralistic",
    Moral_Outlook %in% c("Ambiguous", "Ambiguity", "Dark", "Absurdist", "Reflective", "Cynical", "Realist", "Mystical", "Sinister", "Spiritual") ~ "Ambiguous",
    Moral_Outlook %in% c("Idealistic", "Hopeful", "Romantic", "Humanistic", "Uplifting", "Heroic", "Transcendent", "Optimistic", "Redemptive", "Admiring") ~ "Idealistic",
    Moral_Outlook %in% c("Subtle", "Aesthetic Virtue", "Affectionate", "Healing", "Collaborative", "Creative Genius", "Creative Integrity", "Warmhearted", "Playful") ~ "Subtle",
    TRUE ~ "Other"  # optional fallback
  ))

ASL_Moral_Outlook %>%
  count(Moral_Outlook_Simplified) %>%
  ggplot(aes(x = "", y = n, fill = Moral_Outlook_Simplified)) +
  geom_col(width = 1) +
  coord_polar(theta = "y") +
  labs(title = "Moral Outlook Distribution (Simplified)") +
  theme_void() +
  theme(legend.position = "right")

Era (Box Plot)

ASL %>%
  mutate(Era = reorder(Era, Year_Published)) %>%
  ggplot(aes(x = Era, y = Read_Time, fill = Era)) +
  geom_boxplot() +
  labs(title = "Read Time by Era (Chronologically Ordered)",
       x = "Era", y = "Read Time") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Unique Authors Area

Political Leaning

# Create author_totals by summing read time (or pages) per author & political leaning
author_totals <- ASL_unique %>%
  group_by(Political_Leaning, Author) %>%
  summarise(Total_Read_Time = sum(Read_Time, na.rm = TRUE), .groups = "drop")

# Identify top author per Political_Leaning
top_authors <- author_totals %>%
  group_by(Political_Leaning) %>%
  slice_max(order_by = Total_Read_Time, n = 1, with_ties = FALSE) %>%
  select(Political_Leaning, Author) %>%
  rename(Top_Author = Author)

# Join and flag categories
author_totals_flagged <- author_totals %>%
  left_join(top_authors, by = "Political_Leaning") %>%
  mutate(Category = ifelse(Author == Top_Author, Author, "Other")) %>%
  select(-Top_Author)

# Calculate category order for plotting and legend
category_order <- author_totals_flagged %>%
  group_by(Category) %>%
  summarise(Total_Read_Time = sum(Total_Read_Time, na.rm = TRUE), .groups = "drop") %>%
  arrange(Total_Read_Time) %>%
  pull(Category)

category_order <- c("Other", setdiff(category_order, "Other"))

author_totals_flagged <- author_totals_flagged %>%
  mutate(Category = factor(Category, levels = category_order))

# Set palette colors
n_colors <- length(category_order)
palette_colors <- brewer.pal(min(n_colors, 12), "Set3")
if (n_colors > 12) {
  palette_colors <- colorRampPalette(palette_colors)(n_colors)
}
names(palette_colors) <- category_order

# Aggregate for plotting
plot_data <- author_totals_flagged %>%
  group_by(Political_Leaning, Category) %>%
  summarise(Total_Read_Time = sum(Total_Read_Time, na.rm = TRUE), .groups = "drop")

# Plot
ggplot(plot_data, aes(x = reorder(Political_Leaning, -Total_Read_Time), 
                      y = Total_Read_Time, fill = Category)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = palette_colors) +
  theme_minimal() +
  labs(
    title = "Total Read Time by Political Leaning: Top Author vs Other",
    x = "Political Leaning",
    y = "Total Read Time (minutes)",
    fill = "Category"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(fill = guide_legend(reverse = TRUE))

Education Level

ASL_authors_unique <- ASL_unique %>%
  group_by(Author) %>%
  summarise(
    Political_Leaning = first(Political_Leaning),
    Education_Level = first(Education_Level),
    Era = first(Era),
    Mental_Health = first(Mental_Health),
    Works = n(),
    Total_Read_Time = sum(Read_Time, na.rm = TRUE),
    Avg_Read_Time = round(mean(Read_Time, na.rm = TRUE), 2),
    Total_Pages = sum(Pages, na.rm = TRUE),
    Avg_Pages = round(mean(Pages, na.rm = TRUE), 1),
    Avg_Stars = round(mean(Stars, na.rm = TRUE), 2),
    Five_Stars = sum(Stars == 5, na.rm = TRUE),
    Likeability = round(100 * Five_Stars / Works, 1)
  ) %>%
  arrange(desc(Total_Read_Time))


ASL_authors_unique %>%
  count(Education_Level) %>%
  ggplot(aes(x = factor(Education_Level), y = n)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  theme_minimal() +
  labs(
    title = "Author Count by Education Level",
    x = "Education Level (-1 = No HS, 0 = HS, 2 = Assoc, 4 = BA, 6 = MA, 8 = PhD)",
    y = "Number of Authors"
  )

Mental Health

# Calculate total read time per mental health category
mental_health_summary <- ASL_authors_unique %>%
  group_by(Mental_Health) %>%
  summarise(
    Total_Read_Time = sum(Total_Read_Time, na.rm = TRUE)
  ) %>%
  mutate(
    Percentage = round(100 * Total_Read_Time / sum(Total_Read_Time), 1),
    label = paste0(Mental_Health, " (", Percentage, "%)")
  ) %>%
  arrange(desc(Total_Read_Time)) %>%
  mutate(label = factor(label, levels = label))  # This fixes the legend order

# Plot the pie chart
ggplot(mental_health_summary, aes(x = "", y = Total_Read_Time, fill = label)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  theme_void() +
  theme(
    legend.title = element_blank(),
    legend.position = "right"
  ) +
  guides(fill = guide_legend(reverse = FALSE))

A Study in Literature

Marshall Larson

2025-05-12

Author Likeability

Key_Influencers

Moral Outlook Graphs

Pie Chart of nationality with stars

Religion Visualizations

Graph of Genres

% of Fictional

Philosophical View

Personal Significance

Moral Outlook (simplified)

Era (Box Plot)

Unique Authors Area

Political Leaning

Education Level

Mental Health