# ============================================
# APNews text analysis (Second-level agenda-setting theory version)
# ============================================
# ============================================
# --- Load required libraries ---
# ============================================
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("tidytext")) install.packages("tidytext")
library(tidyverse)
library(tidytext)
# ============================================
# --- Load the APNews data ---
# ============================================
# Read the data from the web
FetchedData <- readRDS(url("https://github.com/drkblake/Data/raw/refs/heads/main/APNews.rds"))
# Save the data on your computer
saveRDS(FetchedData, file = "APNews.rds")
# remove the downloaded data from the environment
rm (FetchedData)
APNews <- readRDS("APNews.rds")
# ============================================
# --- Define and apply FilterTopic ---
# ============================================
# --- Define FilterTopic phrases ---
FilterTopic_phrases <- c(
"tariff",
"tariffs"
)
# --- Escape regex special characters ---
escaped_FilterTopic <- str_replace_all(
FilterTopic_phrases,
"([\\^$.|?*+()\\[\\]{}\\\\])",
"\\\\\\1"
)
# --- Build whole-word/phrase regex pattern ---
FilterTopic_pattern <- paste0("\\b", escaped_FilterTopic, "\\b", collapse = "|")
# --- Flag stories matching the FilterTopic ---
APNews <- APNews %>%
mutate(
Full.Text.clean = str_squish(Full.Text),
FilterTopic = if_else(
str_detect(Full.Text.clean, regex(FilterTopic_pattern, ignore_case = TRUE)),
"Yes",
"No"
)
)
# --- Create a TopicNews data frame consisting only of FilterTopic stories ---
TopicNews <- APNews %>%
filter(FilterTopic == "Yes")
# ============================================
# --- Flag Topic1-related stories (within TopicNews) ---
# ============================================
# --- Define Topic1 phrases ---
phrases <- c(
"trade policy",
"trade deal",
"trade agreement",
"U.S. trade",
"American manufacturing",
"economic growth",
"domestic production",
"jobs",
"economy",
"economic boost",
"recovery"
)
# --- Escape regex special characters ---
escaped_phrases <- str_replace_all(
phrases,
"([\\^$.|?*+()\\[\\]{}\\\\])",
"\\\\\\1"
)
# --- Build pattern and apply matching ---
pattern <- paste0("\\b", escaped_phrases, "\\b", collapse = "|")
TopicNews <- TopicNews %>%
mutate(
Topic1 = if_else(
str_detect(Full.Text.clean, regex(pattern, ignore_case = TRUE)),
"Yes",
"No"
)
)
# ============================================
# --- Flag Topic2-related stories (within TopicNews) ---
# ============================================
# --- Define Topic2 phrases ---
phrases <- c(
"cost",
"prices",
"inflation",
"trade war",
"retaliation",
"economic slowdown",
"loss", "deficit",
"layoffs", "farmers",
"backlash",
"higher prices",
"hurt",
"decline",
"economic pain",
"instability",
"recession",
"criticism",
"tension",
"uncertainty",
"global market"
)
# --- Escape regex special characters ---
escaped_phrases <- str_replace_all(
phrases,
"([\\^$.|?*+()\\[\\]{}\\\\])",
"\\\\\\1"
)
# --- Build pattern and apply matching ---
pattern <- paste0("\\b", escaped_phrases, "\\b", collapse = "|")
TopicNews <- TopicNews %>%
mutate(
Topic2 = if_else(
str_detect(Full.Text.clean, regex(pattern, ignore_case = TRUE)),
"Yes",
"No"
)
)
# ============================================
# --- Visualize weekly counts of Topic1- and Topic2-related stories ---
# ============================================
if (!require("plotly")) install.packages("plotly")
library(plotly)
# --- Summarize weekly counts for Topic1 = "Yes" ---
Topic1_weekly <- TopicNews %>%
filter(Topic1 == "Yes") %>%
group_by(Week) %>%
summarize(Count = n(), .groups = "drop") %>%
mutate(Topic = "Gain") # <====== Note custom Topic2 label
# --- Summarize weekly counts for Topic2 = "Yes" ---
Topic2_weekly <- TopicNews %>%
filter(Topic2 == "Yes") %>%
group_by(Week) %>%
summarize(Count = n(), .groups = "drop") %>%
mutate(Topic = "Loss") # <====== Note custom Topic1 label
# --- Combine both summaries into one data frame ---
Weekly_counts <- bind_rows(Topic2_weekly, Topic1_weekly)
# --- Fill in missing combinations with zero counts ---
Weekly_counts <- Weekly_counts %>%
tidyr::complete(
Topic,
Week = full_seq(range(Week), 1), # generate all week numbers
fill = list(Count = 0)
) %>%
arrange(Topic, Week)
# --- Create interactive plotly line chart ---
AS2 <- plot_ly(
data = Weekly_counts,
x = ~Week,
y = ~Count,
color = ~Topic,
colors = c("steelblue", "firebrick"),
type = "scatter",
mode = "lines+markers",
line = list(width = 2),
marker = list(size = 6)
) %>%
layout(
title = "Weekly Counts of Topic1- and Topic2-Related Stories within the FilterTopic Dataset",
xaxis = list(
title = "Week Number (starting with Week 1 of 2025)",
dtick = 1
),
yaxis = list(title = "Number of Articles"),
legend = list(title = list(text = "Topic")),
hovermode = "x unified"
)
# ============================================
# --- Show the chart ---
# ============================================
AS2
# ============================================================
# Setup: Install and Load Required Packages
# ============================================================
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("plotly")) install.packages("plotly")
if (!require("gt")) install.packages("gt")
if (!require("gtExtras")) install.packages("gtExtras")
if (!require("broom")) install.packages("broom")
library(tidyverse)
library(plotly)
library(gt)
library(gtExtras)
library(broom)
options(scipen = 999)
# ============================================================
# Data Import
# ============================================================
# Reshape to wide form
mydata <- Weekly_counts %>%
pivot_wider(names_from = Topic, values_from = Count)
names(mydata) <- make.names(names(mydata))
# Specify the two variables involved
mydata$V1 <- mydata$Gain # <== Customize this
mydata$V2 <- mydata$Loss # <== Customize this
# ============================================================
# Compute Pair Differences
# ============================================================
mydata$PairDifferences <- mydata$V2 - mydata$V1
# ============================================================
# Interactive Histogram of Pair Differences
# ============================================================
hist_plot <- plot_ly(
data = mydata,
x = ~PairDifferences,
type = "histogram",
marker = list(color = "#1f78b4", line = list(color = "black", width = 1))
) %>%
layout(
title = "Distribution of Pair Differences",
xaxis = list(title = "Pair Differences"),
yaxis = list(title = "Count"),
shapes = list(
list(
type = "line",
x0 = mean(mydata$PairDifferences, na.rm = TRUE),
x1 = mean(mydata$PairDifferences, na.rm = TRUE),
y0 = 0,
y1 = max(table(mydata$PairDifferences)),
line = list(color = "red", dash = "dash")
)
)
)
# ============================================================
# Descriptive Statistics
# ============================================================
desc_stats <- mydata %>%
summarise(
count = n(),
mean = mean(PairDifferences, na.rm = TRUE),
sd = sd(PairDifferences, na.rm = TRUE),
min = min(PairDifferences, na.rm = TRUE),
max = max(PairDifferences, na.rm = TRUE)
)
desc_table <- desc_stats %>%
gt() %>%
gt_theme_538() %>%
tab_header(title = "Descriptive Statistics: Pair Differences") %>%
fmt_number(columns = where(is.numeric), decimals = 3)
# ============================================================
# Normality Test (Shapiro-Wilk)
# ============================================================
shapiro_res <- shapiro.test(mydata$PairDifferences)
shapiro_table <- tidy(shapiro_res) %>%
select(statistic, p.value, method) %>%
gt() %>%
gt_theme_538() %>%
tab_header(title = "Normality Test (Shapiro-Wilk)") %>%
fmt_number(columns = c(statistic, p.value), decimals = 4) %>%
tab_source_note(
source_note = "If the P.VALUE is 0.05 or less, the number of pairs is fewer than 40, and the distribution of pair differences shows obvious non-normality or outliers, consider using the Wilcoxon Signed Rank Test results instead of the Paired-Samples t-Test results."
)
# ============================================================
# Reshape Data for Repeated-Measures Plot
# ============================================================
df_long <- mydata %>%
pivot_longer(cols = c(V1, V2),
names_to = "Measure",
values_to = "Value")
# ============================================================
# Repeated-Measures Boxplot (Interactive, with Means)
# ============================================================
group_means <- df_long %>%
group_by(Measure) %>%
summarise(mean_value = mean(Value), .groups = "drop")
boxplot_measures <- plot_ly() %>%
add_trace(
data = df_long,
x = ~Measure, y = ~Value,
type = "box",
boxpoints = "outliers",
marker = list(color = "red", size = 4),
line = list(color = "black"),
fillcolor = "royalblue",
name = ""
) %>%
add_trace(
data = group_means,
x = ~Measure, y = ~mean_value,
type = "scatter", mode = "markers",
marker = list(
symbol = "diamond", size = 9,
color = "black", line = list(color = "white", width = 1)
),
text = ~paste0("Mean = ", round(mean_value, 2)),
hoverinfo = "text",
name = "Group Mean"
) %>%
layout(
title = "Boxplot of Repeated Measures (V1 vs V2) with Means",
xaxis = list(title = "Measure"),
yaxis = list(title = "Value"),
showlegend = FALSE
)
# ============================================================
# Parametric Test (Paired-Samples t-Test)
# ============================================================
t_res <- t.test(mydata$V2, mydata$V1, paired = TRUE)
t_table <- tidy(t_res) %>%
select(statistic, parameter, p.value, conf.low, conf.high, method) %>%
gt() %>%
gt_theme_538() %>%
tab_header(title = "Paired-Samples t-Test") %>%
fmt_number(columns = c(statistic, p.value, conf.low, conf.high), decimals = 4)
t_summary <- mydata %>%
select(V1, V2) %>%
summarise_all(list(Mean = mean, SD = sd)) %>%
gt() %>%
gt_theme_538() %>%
tab_header(title = "Group Means and SDs (t-Test)") %>%
fmt_number(columns = everything(), decimals = 3)
# ============================================================
# Nonparametric Test (Wilcoxon Signed Rank)
# ============================================================
wilcox_res <- wilcox.test(mydata$V1, mydata$V2, paired = TRUE,
exact = FALSE)
wilcox_table <- tidy(wilcox_res) %>%
select(statistic, p.value, method) %>%
gt() %>%
gt_theme_538() %>%
tab_header(title = "Wilcoxon Signed Rank Test") %>%
fmt_number(columns = c(statistic, p.value), decimals = 4)
wilcox_summary <- mydata %>%
select(V1, V2) %>%
summarise_all(list(Mean = mean, SD = sd)) %>%
gt() %>%
gt_theme_538() %>%
tab_header(title = "Group Means and SDs (Wilcoxon)") %>%
fmt_number(columns = everything(), decimals = 3)
# ============================================================
# Results Summary (in specified order)
# ============================================================
hist_plot
desc_table
shapiro_table
boxplot_measures
t_table
t_summary
wilcox_table
wilcox_summary