Online news outlets published more coverage of Republican presidential candidate Donald Trump than of democratic rival Kamala Harris throughout most of the 2024 election, an analysis of GDELT data shows.
This chart compares daily counts of online articles mentioning “Donald Trump” and “Kamala Harris” between July 1, 2024, shortly before Democratic Harris entered the race, and Nov 6, 2024, the day Harris conceded the race to Trump. “Volume” values are the number of articles published per day in all U.S. based online news outlets tracked by GDELT.
The agenda setting theory of mass communication suggests that media coverage volumes about topic can influence the amount of attention the public pays to the topic. Other factors may affect the development of public opinions about the topic, though.
Here is the R script that gathered the data and produced the chart:
# ------------------------------
# Install and load required packages
# ------------------------------
if (!require("tidyverse"))
install.packages("tidyverse")
if (!require("plotly"))
install.packages("plotly")
if (!require("readr"))
install.packages("readr")
library(tidyverse)
library(plotly)
library(readr)
# ------------------------------
# Define date range
# ------------------------------
startdate <- "20240701"
enddate <- "20241106"
# ------------------------------
# Fetch Topic A: Donald Trump
# ------------------------------
query <- "'Donald Trump' SourceCountry:US"
vp1 <- "https://api.gdeltproject.org/api/v2/doc/doc?query="
vp2 <- "&mode=timelinevolraw&startdatetime="
vp3 <- "000000&enddatetime="
vp4 <- "000000&format=CSV"
text_v_url <- paste0(vp1, query, vp2, startdate, vp3, enddate, vp4)
v_url <- URLencode(text_v_url)
VolumeA <- tryCatch({
# Try API request
Volume <- read_csv(v_url)
Volume$Date <- as.Date(Volume$Date, "%Y-%m-%d")
Volume <- Volume %>% filter(Series == "Article Count")
Volume
}, error = function(e) {
message("⚠️ Trump query failed — loading from GitHub backup")
# Download from GitHub and process
Volume <- read_csv("https://github.com/drkblake/Data/raw/refs/heads/main/TrumpResultsGDELT.csv")
Volume$Date <- as.Date(Volume$Date, "%Y-%m-%d")
Volume <- Volume %>% filter(Series == "Article Count")
Volume
})
# ------------------------------
# Fetch Topic B: Kamala Harris
# ------------------------------
query <- "'Kamala Harris' SourceCountry:US"
text_v_url <- paste0(vp1, query, vp2, startdate, vp3, enddate, vp4)
v_url <- URLencode(text_v_url)
VolumeB <- tryCatch({
# Try API request
Volume <- read_csv(v_url)
Volume$Date <- as.Date(Volume$Date, "%Y-%m-%d")
Volume <- Volume %>% filter(Series == "Article Count")
Volume
}, error = function(e) {
message("⚠️ Harris query failed — loading from GitHub backup")
# Download from GitHub and process
Volume <- read_csv("https://github.com/drkblake/Data/raw/refs/heads/main/HarrisResultsGDELT.csv")
Volume$Date <- as.Date(Volume$Date, "%Y-%m-%d")
Volume <- Volume %>% filter(Series == "Article Count")
Volume
})
# ------------------------------
# Merge datasets
# ------------------------------
VolumeAB <- merge(VolumeA, VolumeB, by = "Date")
VolumeAB$VolumeA <- VolumeAB$Value.x # Trump
VolumeAB$VolumeB <- VolumeAB$Value.y # Harris
# ------------------------------
# Create interactive plot
# ------------------------------
fig <- plot_ly(
data = VolumeAB,
x = ~Date,
y = ~VolumeA,
name = 'Trump',
type = 'scatter',
mode = 'lines',
line = list(color = "#AE2012")
) %>%
add_trace(
y = ~VolumeB,
name = 'Harris',
mode = 'lines',
line = list(color = "#005F73")
) %>%
layout(
title = 'U.S. coverage volume',
xaxis = list(title = "Date", showgrid = FALSE),
yaxis = list(title = "Volume", showgrid = TRUE)
)
fig
# ------------------------------
# Save merged dataset
# ------------------------------
write_csv(VolumeAB, "VolumeAB.csv")