# Install packages
if (!require("tidyverse")) install.packages("tidyverse")
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
if (!require("tidytext")) install.packages("tidytext")
## Loading required package: tidytext
if (!require("stringr")) install.packages("stringr")
if (!require("ggplot2")) install.packages("ggplot2")
if (!require("scales")) install.packages("scales")
## Loading required package: scales
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
# Load library
library(tidyverse)
library(tidytext)
library(stringr)
library(ggplot2)
library(scales)
# Load Stevens opinions extracted case data
df <- read.csv("stevensdata.csv")
df <- read.csv("stevensdata.csv", stringsAsFactors = FALSE)
# key for column names and items
id_col <- "file_title" # column with a case name
type_col <- "subfolder" # column with "concurring" or "dissenting"
text_col <- "stevens_opinion" # column with full text of Stevens' authored opinion
year_col <- "year" # column with the year
# validate date/years
df$case_year <- as.integer(df[[year_col]])
missing_years <- sum(is.na(df$case_year))
if (missing_years > 0) {
warning(missing_years, " opinions have no year value. They will be excluded from year-based analyses.")
}
# Each vector contains words and short phrases associated with that category.
lexicon <- list(
Belonging = c(
"agree", "agreement","appeasement", "consensus", "we", "our", "together", "common",
"shared", "accepted", "acceptance", "harmony", "unity", "join", "collective",
"community", "solidarity", "conform", "concur", "majority", "affirm",
"accommodate", "inclusive", "cooperate", "mutual", "aligned"
),
Liberty = c(
"freedom", "liberty", "free", "autonomy", "right", "rights", "independent",
"independence", "self-determination", "privacy", "unrestricted", "oppression",
"oppressive", "restrict", "restriction", "coerce", "coercion", "compel",
"compelled", "forced", "force", "censorship", "suppress", "suppression",
"prohibit", "prohibition", "undue burden", "protect", "protection",
"shield", "civil liberties", "fundamental right", "constitutional right"
),
Empathy = c(
"suffer", "suffering", "pain", "harm", "hurt", "burden", "hardship",
"vulnerable", "victim", "neglect", "neglected", "misunderstood",
"marginalized", "disadvantaged", "disproportionate", "injustice",
"unfair", "inequity", "inequality", "compassion", "sympathize",
"sympathy", "concern", "affected", "impacted", "plight", "struggle",
"dignity", "humane", "humanity", "underserved"
),
Control = c(
"authority", "power", "duty", "obligation", "enforce", "enforcement",
"regulate", "regulation", "govern", "governance", "oversight", "mandate",
"mandate", "jurisdiction", "sovereign", "sovereignty", "command",
"control", "administer", "administration", "comply", "compliance",
"accountability", "responsibility", "sanction", "order", "directive",
"subordinate", "supervise", "supervision", "preempt", "preemption",
"deferential", "deference", "check", "balance", "limit", "limitation"
),
Contentment = c(
"appropriate", "adequate", "satisf", "reasonable", "proper", "correct",
"sound", "uphold", "affirm", "support", "sufficient", "well-established",
"consistent", "coherent", "fair", "just", "balance", "harmonious",
"endorse", "approve", "commend", "hope", "optimism", "progress",
"success", "effective", "confident", "settled", "clear", "agree"
),
Discontentment = c(
"disagree", "dissent", "error", "wrong", "incorrect", "flawed",
"misguided", "regret", "unfortunate", "troubling", "concern",
"problematic", "disappoint", "disapproval", "reject", "object",
"objection", "fail", "failure", "inadequate", "insufficient",
"misinterpret", "misread", "overreach", "unduly", "improper",
"unjust", "unfair", "inequitable", "arbitrary", "inconsistent", "unease",
"absurd", "perverse", "alarming", "disturbing", "grave", "serious"
)
)
## Function to score a single opinion text
score_opinion <- function(text, lexicon) {
# Normalize text
text_lower <- tolower(text)
words <- unlist(str_split(text_lower, "\\W+")) # split on non-word chars
scores <- sapply(names(lexicon), function(category) {
terms <- lexicon[[category]]
# Count matches (supports partial matching via grepl for phrases)
total <- sum(sapply(terms, function(term) {
sum(grepl(paste0("\\b", term, "\\b"), text_lower))
}))
return(total)
})
scores <- as.data.frame(t(scores))
# Determine dominant category
max_score <- max(scores)
if (max_score == 0) {
scores$dominant_emotion <- "None"
} else {
scores$dominant_emotion <- names(which.max(scores[1, 1:length(lexicon)]))
}
# Normalize scores to proportions (excluding dominant_emotion column)
numeric_scores <- scores[, 1:length(lexicon)]
total_words <- max(length(words), 1)
scores_normalized <- numeric_scores / total_words * 1000 # per 1000 words
scores$total_emotion_hits <- sum(numeric_scores)
return(cbind(scores_normalized, dominant_emotion = scores$dominant_emotion,
total_emotion_hits = scores$total_emotion_hits))
}
## Apply scoring to all opinions
results <- df %>%
rowwise() %>%
mutate(scores = list(score_opinion(get(text_col), lexicon))) %>%
unnest_wider(scores)
# Add a "None" flag column
results <- results %>%
mutate(dominant_emotion = ifelse(total_emotion_hits == 0, "None", dominant_emotion))
# Overview of results
print(results %>% select(all_of(c(id_col, type_col, year_col)),
Belonging, Liberty, Empathy, Control,
Contentment, Discontentment,
dominant_emotion, total_emotion_hits))
## # A tibble: 255 × 11
## file_title subfolder year Belonging Liberty Empathy Control Contentment
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 01 - Kraham v … Concurri… 1979 0 0 0 27.8 0
## 2 02 - Liles v O… Concurri… 1976 22.7 0 0 0 0
## 3 03 - Baldasar … Concurri… 1980 0 0 0 0 0
## 4 04 - US v Jaco… Concurri… 1976 0 0 0 0 0
## 5 05 - Burrell v… Concurri… 1976 9.68 0 0 6.45 12.9
## 6 06 - Duke Powe… Concurri… 1978 0.412 0.471 0.294 0.707 0.765
## 7 07 - Drake v Z… Concurri… 1980 3.27 1.64 1.64 0 6.55
## 8 08 - Arizona v… Concurri… 1976 1.28 0.855 1.71 1.28 0.855
## 9 09 - Goldwater… Concurri… 1979 0 0 0 0 0
## 10 10 - Secretary… Concurri… 1979 0 0 0 0 0
## # ℹ 245 more rows
## # ℹ 3 more variables: Discontentment <dbl>, dominant_emotion <chr>,
## # total_emotion_hits <int>
# Dominant emotion distribution of Results
emotion_summary <- results %>%
group_by(dominant_emotion) %>%
summarise(count = n()) %>%
mutate(percent = count / sum(count) * 100)
print(emotion_summary)
## # A tibble: 7 × 3
## dominant_emotion count percent
## <chr> <int> <dbl>
## 1 Belonging 10 3.92
## 2 Contentment 23 9.02
## 3 Control 29 11.4
## 4 Discontentment 1 0.392
## 5 Empathy 3 1.18
## 6 Liberty 13 5.10
## 7 None 176 69.0
# Dominant opinion type per year (concurring vs dissenting)
results_yr <- results %>% filter(!is.na(case_year))
opinion_type_by_year <- results_yr %>%
group_by(case_year, across(all_of(type_col))) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(case_year) %>%
mutate(percent = count / sum(count) * 100,
total_opinions = sum(count))
dominant_type_per_year <- opinion_type_by_year %>%
group_by(case_year) %>%
slice_max(order_by = count, n = 1, with_ties = FALSE) %>%
rename(dominant_type = all_of(type_col),
opinions_of_type = count,
pct_of_year = percent)
print(opinion_type_by_year, n = Inf)
## # A tibble: 10 × 5
## # Groups: case_year [5]
## case_year subfolder count percent total_opinions
## <int> <chr> <int> <dbl> <int>
## 1 1976 Concurring 17 40.5 42
## 2 1976 Dissenting 25 59.5 42
## 3 1977 Concurring 21 37.5 56
## 4 1977 Dissenting 35 62.5 56
## 5 1978 Concurring 16 29.6 54
## 6 1978 Dissenting 38 70.4 54
## 7 1979 Concurring 19 39.6 48
## 8 1979 Dissenting 29 60.4 48
## 9 1980 Concurring 22 40 55
## 10 1980 Dissenting 33 60 55
print(dominant_type_per_year %>% select(case_year, dominant_type,
opinions_of_type, pct_of_year,
total_opinions), n = Inf)
## # A tibble: 5 × 5
## # Groups: case_year [5]
## case_year dominant_type opinions_of_type pct_of_year total_opinions
## <int> <chr> <int> <dbl> <int>
## 1 1976 Dissenting 25 59.5 42
## 2 1977 Dissenting 35 62.5 56
## 3 1978 Dissenting 38 70.4 54
## 4 1979 Dissenting 29 60.4 48
## 5 1980 Dissenting 33 60 55
# Dominant emotion by opinion type (concurring vs dissenting)
emotion_by_type <- results %>%
group_by(across(all_of(type_col)), dominant_emotion) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(across(all_of(type_col))) %>%
mutate(percent = count / sum(count) * 100)
print(emotion_by_type)
## # A tibble: 13 × 4
## # Groups: subfolder [2]
## subfolder dominant_emotion count percent
## <chr> <chr> <int> <dbl>
## 1 Concurring Belonging 4 4.21
## 2 Concurring Contentment 9 9.47
## 3 Concurring Control 6 6.32
## 4 Concurring Empathy 2 2.11
## 5 Concurring Liberty 5 5.26
## 6 Concurring None 69 72.6
## 7 Dissenting Belonging 6 3.75
## 8 Dissenting Contentment 14 8.75
## 9 Dissenting Control 23 14.4
## 10 Dissenting Discontentment 1 0.625
## 11 Dissenting Empathy 1 0.625
## 12 Dissenting Liberty 8 5
## 13 Dissenting None 107 66.9
# results for dominant emotion by year
results_yr <- results %>% filter(!is.na(case_year))
emotion_by_year <- results_yr %>%
group_by(case_year, dominant_emotion) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(case_year) %>%
mutate(percent = count / sum(count) * 100)
print(emotion_by_year)
## # A tibble: 26 × 4
## # Groups: case_year [5]
## case_year dominant_emotion count percent
## <int> <chr> <int> <dbl>
## 1 1976 Belonging 3 7.14
## 2 1976 Contentment 3 7.14
## 3 1976 Control 5 11.9
## 4 1976 Empathy 2 4.76
## 5 1976 Liberty 2 4.76
## 6 1976 None 27 64.3
## 7 1977 Belonging 3 5.36
## 8 1977 Contentment 7 12.5
## 9 1977 Control 8 14.3
## 10 1977 Empathy 1 1.79
## # ℹ 16 more rows
# results for avergae emotional intensity for a given year
intensity_by_year <- results_yr %>%
group_by(case_year) %>%
summarise(across(c(Belonging, Liberty, Empathy, Control,
Contentment, Discontentment), mean),
opinion_count = n(),
.groups = "drop")
print(intensity_by_year)
## # A tibble: 5 × 8
## case_year Belonging Liberty Empathy Control Contentment Discontentment
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1976 1.53 0.211 0.110 0.610 1.09 0.199
## 2 1977 0.663 0.420 0.271 0.908 0.556 0.210
## 3 1978 0.160 0.336 0.0806 0.350 0.276 0.229
## 4 1979 0.134 0.156 0.0288 0.744 0.165 0.113
## 5 1980 2.65 0.274 0.165 0.536 0.500 0.206
## # ℹ 1 more variable: opinion_count <int>
# color-coding
emotion_colors <- c(
Belonging = "#4E79A7",
Liberty = "#F28E2B",
Empathy = "#E15759",
Control = "#76B7B2",
Contentment = "#59A14F",
Discontentment = "#EDC948"
)
# Filter out for visualization datasets
emotion_summary_plot <- emotion_summary %>% filter(dominant_emotion != "None")
emotion_by_type_plot <- emotion_by_type %>% filter(dominant_emotion != "None")
emotion_by_year_plot <- emotion_by_year %>% filter(dominant_emotion != "None")
# Find top emotions by year and filter out none from visualizations
# Note: "None" category is retained in scored data and exports but excluded from all plots below for clarity
top_emotion_per_year <- results_yr %>%
filter(dominant_emotion != "None") %>%
group_by(case_year, dominant_emotion) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(case_year) %>%
mutate(percent = count / sum(count) * 100) %>%
slice_max(order_by = count, n = 1, with_ties = FALSE) %>%
rename(dominant_emotion_that_year = dominant_emotion,
opinions_with_that_emotion = count,
pct_of_year = percent) %>%
arrange(case_year)
top_emotion_per_year_plot <- top_emotion_per_year %>% filter(dominant_emotion_that_year != "None")
# stacked bar chart (dominant emotion by year)
ggplot(emotion_by_year_plot, aes(x = factor(case_year), y = percent, fill = dominant_emotion)) +
geom_bar(stat = "identity", position = "stack") +
scale_fill_manual(values = emotion_colors) +
labs(title = "Emotion Proportion by Year — Stevens Opinions",
x = "Year", y = "Percentage (%)", fill = "Emotion") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Dominant emotion by year line graph
ggplot(emotion_by_year_plot, aes(x = case_year, y = percent,
color = dominant_emotion, group = dominant_emotion)) +
geom_line(linewidth = 1) +
geom_point(size = 2.5) +
scale_y_continuous(labels = percent_format(scale = 1),
limits = c(0, max(emotion_by_year_plot$percent) * 1.1),
breaks = pretty(c(0, max(emotion_by_year_plot$percent)), n = 8)) +
scale_x_continuous(breaks = unique(emotion_by_year_plot$case_year)) +
scale_color_manual(values = emotion_colors) +
labs(title = "Dominant Emotion % by Year — Stevens Opinions",
subtitle = "Each line tracks one emotion's share of opinions per year",
x = "Year", y = "% of Opinions", color = "Emotion") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.minor = element_blank(),
legend.position = "bottom")
# Individual emotion proportions of each year
ggplot(emotion_by_year_plot, aes(x = factor(case_year), y = percent, fill = dominant_emotion)) +
geom_bar(stat = "identity", position = "dodge") +
scale_y_continuous(labels = percent_format(scale = 1),
limits = c(0, max(emotion_by_year_plot$percent) * 1.1),
breaks = pretty(c(0, max(emotion_by_year_plot$percent)), n = 8)) +
scale_fill_manual(values = emotion_colors) +
labs(title = "Emotion Proportion by Year — Stevens Opinions",
x = "Year", y = "% of Opinions", fill = "Emotion") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.minor = element_blank(),
legend.position = "bottom")