Online news outlets published more coverage of Donald Trump than Kamala Harris across most of the 2024 election period, using GDELT’s raw article counts. “Volume” is the number of U.S.-based online news articles per day that matched each query.
# --- Libraries (packages already installed in Step 1) ---
library(tidyverse)
library(plotly)
library(readr)
# --- Dates ---
startdate <- "20240701"
enddate <- "20241106"
# --- GDELT URL parts ---
vp1 <- "https://api.gdeltproject.org/api/v2/doc/doc?query="
vp2 <- "&mode=timelinevolraw&startdatetime="
vp3 <- "000000&enddatetime="
vp4 <- "000000&format=CSV"
# --- Topic A: Donald Trump ---
queryA <- "'Donald Trump' SourceCountry:US"
v_urlA <- URLencode(paste0(vp1, queryA, vp2, startdate, vp3, enddate, vp4))
VolumeA <- tryCatch({
V <- read_csv(v_urlA, show_col_types = FALSE)
V$Date <- as.Date(V$Date, "%Y-%m-%d")
dplyr::filter(V, Series == "Article Count")
}, error = function(e) {
V <- read_csv("https://github.com/drkblake/Data/raw/refs/heads/main/TrumpResultsGDELT.csv",
show_col_types = FALSE)
V$Date <- as.Date(V$Date, "%Y-%m-%d")
dplyr::filter(V, Series == "Article Count")
})
# --- Topic B: Kamala Harris ---
queryB <- "'Kamala Harris' SourceCountry:US"
v_urlB <- URLencode(paste0(vp1, queryB, vp2, startdate, vp3, enddate, vp4))
VolumeB <- tryCatch({
V <- read_csv(v_urlB, show_col_types = FALSE)
V$Date <- as.Date(V$Date, "%Y-%m-%d")
dplyr::filter(V, Series == "Article Count")
}, error = function(e) {
V <- read_csv("https://github.com/drkblake/Data/raw/refs/heads/main/HarrisResultsGDELT.csv",
show_col_types = FALSE)
V$Date <- as.Date(V$Date, "%Y-%m-%d")
dplyr::filter(V, Series == "Article Count")
})
# --- Merge, label, save ---
VolumeAB <- merge(VolumeA, VolumeB, by = "Date")
VolumeAB$Trump <- VolumeAB$Value.x
VolumeAB$Harris <- VolumeAB$Value.y
VolumeAB <- VolumeAB |>
dplyr::arrange(Date) |>
dplyr::select(Date, Trump, Harris)
readr::write_csv(VolumeAB, "VolumeAB.csv")
# --- Plot ---
plot_ly(
data = VolumeAB,
x = ~Date,
y = ~Trump,
name = "Trump",
type = "scatter",
mode = "lines",
line = list(color = "#AE2012")
) |>
add_trace(
y = ~Harris,
name = "Harris",
mode = "lines",
line = list(color = "#005F73")
) |>
layout(
title = "U.S. coverage volume",
xaxis = list(title = "Date", showgrid = FALSE),
yaxis = list(title = "Volume", showgrid = TRUE)
)
# --- Session info ---
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Chicago
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] plotly_4.11.0 lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1
## [5] dplyr_1.1.4 purrr_1.1.0 readr_2.1.5 tidyr_1.3.1
## [9] tibble_3.3.0 ggplot2_3.5.2 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.10 generics_0.1.4 stringi_1.8.7 hms_1.1.3
## [5] digest_0.6.37 magrittr_2.0.3 evaluate_1.0.5 grid_4.5.1
## [9] timechange_0.3.0 RColorBrewer_1.1-3 fastmap_1.2.0 jsonlite_2.0.0
## [13] httr_1.4.7 crosstalk_1.2.2 viridisLite_0.4.2 scales_1.4.0
## [17] lazyeval_0.2.2 jquerylib_0.1.4 cli_3.6.5 rlang_1.1.6
## [21] crayon_1.5.3 bit64_4.6.0-1 withr_3.0.2 cachem_1.1.0
## [25] yaml_2.3.10 tools_4.5.1 parallel_4.5.1 tzdb_0.5.0
## [29] curl_7.0.0 vctrs_0.6.5 R6_2.6.1 lifecycle_1.0.4
## [33] htmlwidgets_1.6.4 bit_4.6.0 vroom_1.6.5 pkgconfig_2.0.3
## [37] pillar_1.11.0 bslib_0.9.0 gtable_0.3.6 glue_1.8.0
## [41] data.table_1.17.8 xfun_0.53 tidyselect_1.2.1 rstudioapi_0.17.1
## [45] knitr_1.50 farver_2.1.2 htmltools_0.5.8.1 rmarkdown_2.29
## [49] compiler_4.5.1