Project Overiview

Sentiment Analysis of Justice John Paul Stevens’ dissenting and concurring opinions authored between the years of 1976 to 1980. This analysis aims to examine the emotional influence of Justice Stevens personal experiences on the opinions he wrote during the years prior to Stevens divorce, during his divorce and immediately after his divorce, by assessing emotional distribution from the case text.

Data Preparation

# Install packages
if (!require("tidyverse")) install.packages("tidyverse")
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
if (!require("tidytext")) install.packages("tidytext")
## Loading required package: tidytext
if (!require("stringr")) install.packages("stringr")
if (!require("ggplot2")) install.packages("ggplot2")
if (!require("scales")) install.packages("scales")
## Loading required package: scales
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
# Load library
library(tidyverse)
library(tidytext)
library(stringr)
library(ggplot2)
library(scales)
# Load Stevens opinions extracted case data
df <- read.csv("stevensdata.csv")
df <- read.csv("stevensdata.csv", stringsAsFactors = FALSE)

Design

# key for column names and items
id_col           <- "file_title"    # column with a case name 
type_col         <- "subfolder"    # column with "concurring" or "dissenting"
text_col         <- "stevens_opinion" # column with full text of Stevens' authored opinion
year_col         <- "year"          # column with the year
# validate date/years
df$case_year <- as.integer(df[[year_col]])

missing_years <- sum(is.na(df$case_year))
if (missing_years > 0) {
  warning(missing_years, " opinions have no year value. They will be excluded from year-based analyses.")
}

Define custom keyword and phrase lexicon for each emotion

# Each vector contains words and short phrases associated with that category.
lexicon <- list(
  Belonging = c(
    "agree", "agreement","appeasement", "consensus", "we", "our", "together", "common",
    "shared", "accepted", "acceptance", "harmony", "unity", "join", "collective",
    "community", "solidarity", "conform", "concur", "majority", "affirm",
    "accommodate", "inclusive", "cooperate", "mutual", "aligned"
  ),
  
  Liberty = c(
    "freedom", "liberty", "free", "autonomy", "right", "rights", "independent",
    "independence", "self-determination", "privacy", "unrestricted", "oppression",
    "oppressive", "restrict", "restriction", "coerce", "coercion", "compel",
    "compelled", "forced", "force", "censorship", "suppress", "suppression",
    "prohibit", "prohibition", "undue burden", "protect", "protection",
    "shield", "civil liberties", "fundamental right", "constitutional right"
  ),
  
  Empathy = c(
    "suffer", "suffering", "pain", "harm", "hurt", "burden", "hardship",
    "vulnerable", "victim", "neglect", "neglected", "misunderstood",
    "marginalized", "disadvantaged", "disproportionate", "injustice",
    "unfair", "inequity", "inequality", "compassion", "sympathize",
    "sympathy", "concern", "affected", "impacted", "plight", "struggle",
    "dignity", "humane", "humanity", "underserved"
  ),
  
  Control = c(
    "authority", "power", "duty", "obligation", "enforce", "enforcement",
    "regulate", "regulation", "govern", "governance", "oversight", "mandate",
    "mandate", "jurisdiction", "sovereign", "sovereignty", "command",
    "control", "administer", "administration", "comply", "compliance",
    "accountability", "responsibility", "sanction", "order", "directive",
    "subordinate", "supervise", "supervision", "preempt", "preemption",
    "deferential", "deference", "check", "balance", "limit", "limitation"
  ),
  
Contentment = c(
    "appropriate", "adequate", "satisf", "reasonable", "proper", "correct",
    "sound", "uphold", "affirm", "support", "sufficient", "well-established",
    "consistent", "coherent", "fair", "just", "balance", "harmonious",
    "endorse", "approve", "commend", "hope", "optimism", "progress",
    "success", "effective", "confident", "settled", "clear", "agree"
  ),
  
  Discontentment = c(
    "disagree", "dissent", "error", "wrong", "incorrect", "flawed",
    "misguided", "regret", "unfortunate", "troubling", "concern",
    "problematic", "disappoint", "disapproval", "reject", "object",
    "objection", "fail", "failure", "inadequate", "insufficient",
    "misinterpret", "misread", "overreach", "unduly", "improper",
    "unjust", "unfair", "inequitable", "arbitrary", "inconsistent", "unease",
    "absurd", "perverse", "alarming", "disturbing", "grave", "serious"
  )
)

Scoring

## Function to score a single opinion text 
score_opinion <- function(text, lexicon) {
  
  # Normalize text
  text_lower <- tolower(text)
  words <- unlist(str_split(text_lower, "\\W+"))  # split on non-word chars
  
  scores <- sapply(names(lexicon), function(category) {
    terms <- lexicon[[category]]
    # Count matches (supports partial matching via grepl for phrases)
    total <- sum(sapply(terms, function(term) {
      sum(grepl(paste0("\\b", term, "\\b"), text_lower))
    }))
    return(total)
  })
  
  scores <- as.data.frame(t(scores))
  
  # Determine dominant category
  max_score <- max(scores)
  if (max_score == 0) {
    scores$dominant_emotion <- "None"
  } else {
    scores$dominant_emotion <- names(which.max(scores[1, 1:length(lexicon)]))
  }
  
  # Normalize scores to proportions (excluding dominant_emotion column)
  numeric_scores <- scores[, 1:length(lexicon)]
  total_words <- max(length(words), 1)
  scores_normalized <- numeric_scores / total_words * 1000  # per 1000 words
  scores$total_emotion_hits <- sum(numeric_scores)
  
  return(cbind(scores_normalized, dominant_emotion = scores$dominant_emotion,
               total_emotion_hits = scores$total_emotion_hits))
}
## Apply scoring to all opinions
results <- df %>%
  rowwise() %>%
  mutate(scores = list(score_opinion(get(text_col), lexicon))) %>%
  unnest_wider(scores)

# Add a "None" flag column
results <- results %>%
  mutate(dominant_emotion = ifelse(total_emotion_hits == 0, "None", dominant_emotion))

Overall Emotional Distribution

Overview per year for all emotions

# Overview of results 
print(results %>% select(all_of(c(id_col, type_col, year_col)),
                          Belonging, Liberty, Empathy, Control,
                          Contentment, Discontentment,
                          dominant_emotion, total_emotion_hits))
## # A tibble: 255 × 11
##    file_title      subfolder  year Belonging Liberty Empathy Control Contentment
##    <chr>           <chr>     <int>     <dbl>   <dbl>   <dbl>   <dbl>       <dbl>
##  1 01 - Kraham v … Concurri…  1979     0       0       0      27.8         0    
##  2 02 - Liles v O… Concurri…  1976    22.7     0       0       0           0    
##  3 03 - Baldasar … Concurri…  1980     0       0       0       0           0    
##  4 04 - US v Jaco… Concurri…  1976     0       0       0       0           0    
##  5 05 - Burrell v… Concurri…  1976     9.68    0       0       6.45       12.9  
##  6 06 - Duke Powe… Concurri…  1978     0.412   0.471   0.294   0.707       0.765
##  7 07 - Drake v Z… Concurri…  1980     3.27    1.64    1.64    0           6.55 
##  8 08 - Arizona v… Concurri…  1976     1.28    0.855   1.71    1.28        0.855
##  9 09 - Goldwater… Concurri…  1979     0       0       0       0           0    
## 10 10 - Secretary… Concurri…  1979     0       0       0       0           0    
## # ℹ 245 more rows
## # ℹ 3 more variables: Discontentment <dbl>, dominant_emotion <chr>,
## #   total_emotion_hits <int>

Total opinions per emotional category

# Dominant emotion distribution of Results
emotion_summary <- results %>%
  group_by(dominant_emotion) %>%
  summarise(count = n()) %>%
  mutate(percent = count / sum(count) * 100)

print(emotion_summary)
## # A tibble: 7 × 3
##   dominant_emotion count percent
##   <chr>            <int>   <dbl>
## 1 Belonging           10   3.92 
## 2 Contentment         23   9.02 
## 3 Control             29  11.4  
## 4 Discontentment       1   0.392
## 5 Empathy              3   1.18 
## 6 Liberty             13   5.10 
## 7 None               176  69.0

Opinion Type by Year

Concurring vs Dissenting (Overall Opinion Type)

# Dominant opinion type per year (concurring vs dissenting)
results_yr <- results %>% filter(!is.na(case_year))
opinion_type_by_year <- results_yr %>%
  group_by(case_year, across(all_of(type_col))) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(case_year) %>%
  mutate(percent = count / sum(count) * 100,
         total_opinions = sum(count))


dominant_type_per_year <- opinion_type_by_year %>%
  group_by(case_year) %>%
  slice_max(order_by = count, n = 1, with_ties = FALSE) %>%
  rename(dominant_type = all_of(type_col),
         opinions_of_type = count,
         pct_of_year = percent)

print(opinion_type_by_year, n = Inf)
## # A tibble: 10 × 5
## # Groups:   case_year [5]
##    case_year subfolder  count percent total_opinions
##        <int> <chr>      <int>   <dbl>          <int>
##  1      1976 Concurring    17    40.5             42
##  2      1976 Dissenting    25    59.5             42
##  3      1977 Concurring    21    37.5             56
##  4      1977 Dissenting    35    62.5             56
##  5      1978 Concurring    16    29.6             54
##  6      1978 Dissenting    38    70.4             54
##  7      1979 Concurring    19    39.6             48
##  8      1979 Dissenting    29    60.4             48
##  9      1980 Concurring    22    40               55
## 10      1980 Dissenting    33    60               55
print(dominant_type_per_year %>% select(case_year, dominant_type, 
                                         opinions_of_type, pct_of_year, 
                                         total_opinions), n = Inf)
## # A tibble: 5 × 5
## # Groups:   case_year [5]
##   case_year dominant_type opinions_of_type pct_of_year total_opinions
##       <int> <chr>                    <int>       <dbl>          <int>
## 1      1976 Dissenting                  25        59.5             42
## 2      1977 Dissenting                  35        62.5             56
## 3      1978 Dissenting                  38        70.4             54
## 4      1979 Dissenting                  29        60.4             48
## 5      1980 Dissenting                  33        60               55

Concurring vs Dissenting (Dominant Emotion)

# Dominant emotion by opinion type (concurring vs dissenting) 
emotion_by_type <- results %>%
  group_by(across(all_of(type_col)), dominant_emotion) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(across(all_of(type_col))) %>%
  mutate(percent = count / sum(count) * 100)

print(emotion_by_type)
## # A tibble: 13 × 4
## # Groups:   subfolder [2]
##    subfolder  dominant_emotion count percent
##    <chr>      <chr>            <int>   <dbl>
##  1 Concurring Belonging            4   4.21 
##  2 Concurring Contentment          9   9.47 
##  3 Concurring Control              6   6.32 
##  4 Concurring Empathy              2   2.11 
##  5 Concurring Liberty              5   5.26 
##  6 Concurring None                69  72.6  
##  7 Dissenting Belonging            6   3.75 
##  8 Dissenting Contentment         14   8.75 
##  9 Dissenting Control             23  14.4  
## 10 Dissenting Discontentment       1   0.625
## 11 Dissenting Empathy              1   0.625
## 12 Dissenting Liberty              8   5    
## 13 Dissenting None               107  66.9

Emotions by Year

Dominant Emotion by Year

# results for dominant emotion by year
results_yr <- results %>% filter(!is.na(case_year))
emotion_by_year <- results_yr %>%
  group_by(case_year, dominant_emotion) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(case_year) %>%
  mutate(percent = count / sum(count) * 100)
print(emotion_by_year)
## # A tibble: 26 × 4
## # Groups:   case_year [5]
##    case_year dominant_emotion count percent
##        <int> <chr>            <int>   <dbl>
##  1      1976 Belonging            3    7.14
##  2      1976 Contentment          3    7.14
##  3      1976 Control              5   11.9 
##  4      1976 Empathy              2    4.76
##  5      1976 Liberty              2    4.76
##  6      1976 None                27   64.3 
##  7      1977 Belonging            3    5.36
##  8      1977 Contentment          7   12.5 
##  9      1977 Control              8   14.3 
## 10      1977 Empathy              1    1.79
## # ℹ 16 more rows

Average Emotion Intensity by Year

# results for avergae emotional intensity for a given year
intensity_by_year <- results_yr %>%
  group_by(case_year) %>%
  summarise(across(c(Belonging, Liberty, Empathy, Control,
                     Contentment, Discontentment), mean),
            opinion_count = n(),
            .groups = "drop")

print(intensity_by_year)
## # A tibble: 5 × 8
##   case_year Belonging Liberty Empathy Control Contentment Discontentment
##       <int>     <dbl>   <dbl>   <dbl>   <dbl>       <dbl>          <dbl>
## 1      1976     1.53    0.211  0.110    0.610       1.09           0.199
## 2      1977     0.663   0.420  0.271    0.908       0.556          0.210
## 3      1978     0.160   0.336  0.0806   0.350       0.276          0.229
## 4      1979     0.134   0.156  0.0288   0.744       0.165          0.113
## 5      1980     2.65    0.274  0.165    0.536       0.500          0.206
## # ℹ 1 more variable: opinion_count <int>

Visualizations of Results

# color-coding

emotion_colors <- c(
  Belonging      = "#4E79A7",
  Liberty        = "#F28E2B",
  Empathy        = "#E15759",
  Control        = "#76B7B2",
  Contentment    = "#59A14F",
  Discontentment = "#EDC948"
)
# Filter out for visualization datasets
emotion_summary_plot   <- emotion_summary   %>% filter(dominant_emotion != "None")
emotion_by_type_plot   <- emotion_by_type   %>% filter(dominant_emotion != "None")
emotion_by_year_plot   <- emotion_by_year   %>% filter(dominant_emotion != "None")
# Find top emotions by year and filter out none from visualizations
# Note: "None" category is retained in scored data and exports but excluded from all plots below for clarity
top_emotion_per_year <- results_yr %>%
  filter(dominant_emotion != "None") %>%
  group_by(case_year, dominant_emotion) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(case_year) %>%
  mutate(percent = count / sum(count) * 100) %>%
  slice_max(order_by = count, n = 1, with_ties = FALSE) %>%
  rename(dominant_emotion_that_year = dominant_emotion,
         opinions_with_that_emotion = count,
         pct_of_year = percent) %>%
  arrange(case_year)

top_emotion_per_year_plot <- top_emotion_per_year %>% filter(dominant_emotion_that_year != "None")

Dominant Emotion by Year, 1. View of Distribution

# stacked bar chart (dominant emotion by year) 
ggplot(emotion_by_year_plot, aes(x = factor(case_year), y = percent, fill = dominant_emotion)) +
  geom_bar(stat = "identity", position = "stack") +
  scale_fill_manual(values = emotion_colors) +
  labs(title = "Emotion Proportion by Year — Stevens Opinions",
       x = "Year", y = "Percentage (%)", fill = "Emotion") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Dominant Emotion by Year, 2. View of Distribution

# Dominant emotion by year line graph
ggplot(emotion_by_year_plot, aes(x = case_year, y = percent,
                                  color = dominant_emotion, group = dominant_emotion)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2.5) +
  scale_y_continuous(labels = percent_format(scale = 1),
                     limits = c(0, max(emotion_by_year_plot$percent) * 1.1),
                     breaks = pretty(c(0, max(emotion_by_year_plot$percent)), n = 8)) +
  scale_x_continuous(breaks = unique(emotion_by_year_plot$case_year)) +
  scale_color_manual(values = emotion_colors) +
  labs(title = "Dominant Emotion % by Year — Stevens Opinions",
       subtitle = "Each line tracks one emotion's share of opinions per year",
       x = "Year", y = "% of Opinions", color = "Emotion") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        panel.grid.minor = element_blank(),
        legend.position = "bottom")

Emotional Proportions by Year

# Individual emotion proportions of each year
ggplot(emotion_by_year_plot, aes(x = factor(case_year), y = percent, fill = dominant_emotion)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_y_continuous(labels = percent_format(scale = 1),
                     limits = c(0, max(emotion_by_year_plot$percent) * 1.1),
                     breaks = pretty(c(0, max(emotion_by_year_plot$percent)), n = 8)) +
  scale_fill_manual(values = emotion_colors) +
  labs(title = "Emotion Proportion by Year — Stevens Opinions",
       x = "Year", y = "% of Opinions", fill = "Emotion") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        panel.grid.minor = element_blank(),
        legend.position = "bottom")