APNews Data Analysis

Rationale

This project examines how AP News covers Taylor Swift and the economic influence tied to her Eras Tour. The goal is to understand whether the media frames her primarily as an artist or as an economic force. Using Framing Theory, this study looks at how different perspectives shape public understanding. The artistic frame focuses on creativity, performance, and fan culture, while the economic frame highlights financial growth, tourism, and business impact. Understanding these frames helps show how media connects entertainment and economic narratives in modern culture.

Hypothesis

Null Hypothesis (H₀): There is no significant difference between the frequency of artistic and economic coverage of Taylor Swift in AP News. Alternative Hypothesis (H₁): There is a significant difference, meaning one framing type appears more often than the other.

Variables & method

Independent Variable: The framing type, categorized as either artistic or economic. Dependent Variable: The number of AP News stories associated with each framing type.

A content analysis was used to identify and code AP News stories that mention Taylor Swift. Each article was classified based on whether it contained artistic or economic keywords. Artistic terms included words such as “music,” “fans,” and “performance,” while economic terms included “money,” “spending,” and “business.” Data visualization was applied to track how frequently each topic appeared over time. A paired-samples t-test was then conducted in R to determine whether there was a statistically significant difference between the two types of coverage.

Results & discussion

The results showed that economic framing appeared more often than artistic framing. While coverage about her performances and fan engagement remained present, the data indicated a stronger media focus on her economic influence. Many articles highlighted the boost in travel, local spending, and tourism linked to her tour. Overall, the analysis supports the conclusion that AP News tends to frame Taylor Swift more as an economic phenomenon than as a musical artist.

count	mean	sd	min	max
Descriptive Statistics: Frame Differences
38.000	0.763	1.478	−1.000	5.000

statistic	p.value	method
Normality Test (Shapiro-Wilk)
0.8666	0.0003	Shapiro-Wilk normality test

statistic	parameter	p.value	conf.low	conf.high	method
Paired-Samples t-Test
3.1821	37	0.0030	0.2772	1.2491	Paired t-test

statistic	p.value	method
Wilcoxon Signed Rank Test
59.5000	0.0025	Wilcoxon signed rank test with continuity correction

Code:

# ============================================
# Taylor Swift & The Economy (Framing Theory Version)
# ============================================

# --- Load required libraries ---
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("tidytext")) install.packages("tidytext")
if (!require("plotly")) install.packages("plotly")
if (!require("gt")) install.packages("gt")
if (!require("gtExtras")) install.packages("gtExtras")
if (!require("broom")) install.packages("broom")

library(tidyverse)
library(tidytext)
library(plotly)
library(gt)
library(gtExtras)
library(broom)

# ============================================
# --- Load the APNews data ---
# ============================================

FetchedData <- readRDS(url("https://github.com/drkblake/Data/raw/refs/heads/main/APNews.rds"))
saveRDS(FetchedData, file = "APNews.rds")
rm(FetchedData)
APNews <- readRDS("APNews.rds")

# ============================================
# --- Define and apply FilterTopic ---
# ============================================

# --- Define FilterTopic phrases ---
FilterTopic_phrases <- c(
  "Taylor Swift", "Eras Tour", "concert", "music industry", 
  "tourism", "economic impact", "local economy", "ticket sales"
)

# --- Escape regex special characters ---
escaped_FilterTopic <- str_replace_all(FilterTopic_phrases, "([\\^$.|?*+()\\[\\]{}\\\\])", "\\\\\\1")

# --- Build whole-word/phrase regex pattern ---
FilterTopic_pattern <- paste0("\\b", escaped_FilterTopic, "\\b", collapse = "|")

# --- Flag stories matching the FilterTopic ---
APNews <- APNews %>%
  mutate(
    Full.Text.clean = str_squish(Full.Text),
    FilterTopic = if_else(
      str_detect(Full.Text.clean, regex(FilterTopic_pattern, ignore_case = TRUE)),
      "Yes", "No"
    )
  )

# --- Create a TopicNews data frame consisting only of FilterTopic stories ---
TopicNews <- APNews %>% filter(FilterTopic == "Yes")

# ============================================
# --- Flag Topic1-related stories (Economic View) ---
# ============================================

phrases <- c(
  "revenue", "growth", "boost", "business", "jobs", 
  "local economy", "economic impact", "spending", "hotel", "travel"
)

escaped_phrases <- str_replace_all(phrases, "([\\^$.|?*+()\\[\\]{}\\\\])", "\\\\\\1")
pattern <- paste0("\\b", escaped_phrases, "\\b", collapse = "|")

TopicNews <- TopicNews %>%
  mutate(
    Topic1 = if_else(
      str_detect(Full.Text.clean, regex(pattern, ignore_case = TRUE)),
      "Yes", "No"
    )
  )

# ============================================
# --- Flag Topic2-related stories (Cultural View) ---
# ============================================

phrases <- c(
  "fandom", "social media", "influence", "celebrity", "trend", 
  "pop culture", "fans", "viral", "music", "artistry"
)

escaped_phrases <- str_replace_all(phrases, "([\\^$.|?*+()\\[\\]{}\\\\])", "\\\\\\1")
pattern <- paste0("\\b", escaped_phrases, "\\b", collapse = "|")

TopicNews <- TopicNews %>%
  mutate(
    Topic2 = if_else(
      str_detect(Full.Text.clean, regex(pattern, ignore_case = TRUE)),
      "Yes", "No"
    )
  )

# ============================================
# --- Visualize weekly counts of Topic1- and Topic2-related stories ---
# ============================================

Topic1_weekly <- TopicNews %>%
  filter(Topic1 == "Yes") %>%
  group_by(Week) %>%
  summarize(Count = n(), .groups = "drop") %>%
  mutate(Topic = "Economic Frame")

Topic2_weekly <- TopicNews %>%
  filter(Topic2 == "Yes") %>%
  group_by(Week) %>%
  summarize(Count = n(), .groups = "drop") %>%
  mutate(Topic = "Cultural Frame")

Weekly_counts <- bind_rows(Topic1_weekly, Topic2_weekly) %>%
  tidyr::complete(
    Topic,
    Week = full_seq(range(Week), 1),
    fill = list(Count = 0)
  ) %>%
  arrange(Topic, Week)

FR1 <- plot_ly(
  data = Weekly_counts,
  x = ~Week,
  y = ~Count,
  color = ~Topic,
  colors = c("steelblue", "firebrick"),
  type = "scatter",
  mode = "lines+markers",
  line = list(width = 2),
  marker = list(size = 6)
) %>%
  layout(
    title = "Weekly Counts of Taylor Swift–Related Stories by Frame",
    xaxis = list(title = "Week Number (2025)", dtick = 1),
    yaxis = list(title = "Number of Articles"),
    legend = list(title = list(text = "Framing Type")),
    hovermode = "x unified"
  )

FR1

# ============================================
# --- Statistical Analysis ---
# ============================================

mydata <- Weekly_counts %>% pivot_wider(names_from = Topic, values_from = Count)
names(mydata) <- make.names(names(mydata))

mydata$V1 <- mydata$Economic.Frame
mydata$V2 <- mydata$Cultural.Frame

mydata$PairDifferences <- mydata$V2 - mydata$V1

# --- Histogram of Differences ---
hist_plot <- plot_ly(
  data = mydata,
  x = ~PairDifferences,
  type = "histogram",
  marker = list(color = "#1f78b4", line = list(color = "black", width = 1))
) %>%
  layout(
    title = "Distribution of Frame Differences (Cultural – Economic)",
    xaxis = list(title = "Pair Differences"),
    yaxis = list(title = "Count"),
    shapes = list(list(
      type = "line",
      x0 = mean(mydata$PairDifferences, na.rm = TRUE),
      x1 = mean(mydata$PairDifferences, na.rm = TRUE),
      y0 = 0,
      y1 = max(table(mydata$PairDifferences)),
      line = list(color = "red", dash = "dash")
    ))
  )

# --- Descriptive Stats ---
desc_stats <- mydata %>%
  summarise(
    count = n(),
    mean = mean(PairDifferences, na.rm = TRUE),
    sd = sd(PairDifferences, na.rm = TRUE),
    min = min(PairDifferences, na.rm = TRUE),
    max = max(PairDifferences, na.rm = TRUE)
  )

desc_table <- desc_stats %>%
  gt() %>%
  gt_theme_538() %>%
  tab_header(title = "Descriptive Statistics: Frame Differences") %>%
  fmt_number(columns = where(is.numeric), decimals = 3)

# --- Normality Test ---
shapiro_res <- shapiro.test(mydata$PairDifferences)
shapiro_table <- tidy(shapiro_res) %>%
  select(statistic, p.value, method) %>%
  gt() %>%
  gt_theme_538() %>%
  tab_header(title = "Normality Test (Shapiro-Wilk)") %>%
  fmt_number(columns = c(statistic, p.value), decimals = 4)

# --- Boxplot of Repeated Measures ---
df_long <- mydata %>%
  pivot_longer(cols = c(V1, V2), names_to = "Measure", values_to = "Value")

group_means <- df_long %>%
  group_by(Measure) %>%
  summarise(mean_value = mean(Value), .groups = "drop")

boxplot_measures <- plot_ly() %>%
  add_trace(
    data = df_long,
    x = ~Measure, y = ~Value, type = "box",
    boxpoints = "outliers", fillcolor = "royalblue", name = ""
  ) %>%
  add_trace(
    data = group_means, x = ~Measure, y = ~mean_value,
    type = "scatter", mode = "markers",
    marker = list(symbol = "diamond", size = 9, color = "black"),
    text = ~paste0("Mean = ", round(mean_value, 2)),
    hoverinfo = "text", name = "Group Mean"
  ) %>%
  layout(
    title = "Comparison of Frames (Economic vs Cultural)",
    xaxis = list(title = "Frame Type"),
    yaxis = list(title = "Count"),
    showlegend = FALSE
  )

# --- Paired t-Test ---
t_res <- t.test(mydata$V2, mydata$V1, paired = TRUE)
t_table <- tidy(t_res) %>%
  select(statistic, parameter, p.value, conf.low, conf.high, method) %>%
  gt() %>%
  gt_theme_538() %>%
  tab_header(title = "Paired-Samples t-Test") %>%
  fmt_number(columns = c(statistic, p.value, conf.low, conf.high), decimals = 4)

# --- Wilcoxon Test ---
wilcox_res <- wilcox.test(mydata$V1, mydata$V2, paired = TRUE, exact = FALSE)
wilcox_table <- tidy(wilcox_res) %>%
  select(statistic, p.value, method) %>%
  gt() %>%
  gt_theme_538() %>%
  tab_header(title = "Wilcoxon Signed Rank Test") %>%
  fmt_number(columns = c(statistic, p.value), decimals = 4)

# ============================================
# --- Output Summary ---
# ============================================

hist_plot
desc_table
shapiro_table
boxplot_measures
t_table
wilcox_table