Topic Modeling YLE and HS data on migration and energy politics

R script for building Structured Topic Model using data from YLE and HS web news archives. Datasets are not publicly available. HTML version of this document including all graphics is available on RPubs. Source code is also available.

Energy subcorpus

energy_processed <- textProcessor(
  energy_full$content, 
  metadata = energy_full,
  stem=FALSE,
  customstopwords = stopwords$V1)

energy_out <- prepDocuments(
  energy_processed$documents, 
  energy_processed$vocab,
  energy_processed$meta,
  lower.thresh = 15)

# Let's use automatic selection of topics using parameter K=0
energy_model_fit <- stm(
  energy_out$documents, 
  energy_out$vocab, 
  K = 0,
  prevalence =~ media + s(year), 
  max.em.its = 100,
  data = energy_out$meta, 
  init.type = "Spectral")

energy_model_fit <- readRDS('energy_model_fit.RDs')
energy_out <- readRDS('energy_out.RDs')
labelTopics(energy_model_fit)
energy_prep <- estimateEffect(
  1:72 ~ media + s(year),
  energy_model_fit,
  meta = energy_out$meta,
  uncertainty = "Global")

energy_model_fit <- readRDS('energy_model_fit.RDs')
plot(
  energy_model_fit,
  type = "summary",
  xlim = c(0, .6),
  n = 10,
  labeltype = "frex",
  topics = c(67,25,61,36,30),
  text.cex = 0.6,
  main = "Selected energy-related topics")

Ukrainian problems -topic (topic no 25)

energy_prep <- readRDS('energy_prep.Rds')
plot(
  energy_prep, "year", 
  method = "continuous", 
  topics = 25, 
  printlegend = FALSE,
  xaxt = "n",
  xlab = "Years",
  main = "Ukrainian problems")
axis(1,at=seq(from=2000, to=2016,by=2))

Russia-Ukraine relations (topic no 61)

plot(
  energy_prep, "year", 
  method = "continuous", 
  topics = 61, 
  printlegend = FALSE,
  xaxt = "n",
  xlab = "Years",
  main = "Russia - Ukraine -relations")
axis(1,at=seq(from=2000, to=2016,by=2))

plot(energy_prep, covariate = "media", topics = c(67,25,61,36,30),
  model = energy_model_fit, method = "difference",
  cov.value1 = "HS", cov.value2 = "YLE",
  main = "HS vs YLE",
  xlim = c(-.1, .1),
  labeltype = "custom",
  custom.labels = c(
    "LNG and natural gas",
    "Ukrainan gas pipeline",
    "Russia-Ukraine -relations",
    "Oil price",
    "Price of gasoline"))

How different energy-related topics are correlated?

energy_model_fit <- readRDS('energy_model_fit.RDs')
mod.out.corr <- topicCorr(energy_model_fit)
plot(
  mod.out.corr,
  topics = c(67,25,61,36,30),
  vlabels = c(
    "LNG and natural gas",
    "Ukrainan gas pipeline",
    "Russia-Ukraine -relations",
    "Oil price",
    "Price of gasoline"))

Migration subcorpus

migration_processed <- textProcessor(
  migration_full$content,
  metadata = migration_full,
  stem=FALSE,
  customstopwords = stopwords$V1)

migration_out <- prepDocuments(
  migration_processed$documents,
  migration_processed$vocab,
  migration_processed$meta,
  lower.thresh = 15)

# Let's use automatic selection of topics using parameter K=0
migration_model_fit <- stm(
  migration_out$documents,
  migration_out$vocab,
  K = 0,
  prevalence =~ media + s(year),
  max.em.its = 100,
  data = migration_out$meta,
  init.type = "Spectral")

migration_model_fit <- readRDS('migration_model_fit.RDs')
migration_out <- readRDS('migration_out.RDs')
labelTopics(migration_model_fit)
migration_prep <- estimateEffect(
  1:50 ~ media + s(year),
  migration_model_fit,
  meta = migration_out$meta,
  uncertainty = "Global")

migration_model_fit <- readRDS('migration_model_fit.RDs')
plot(
  migration_model_fit,
  type = "summary",
  xlim = c(0, .6),
  n = 10,
  labeltype = "frex",
  topics = c(3,5,13,28,80,1,8),
  text.cex = 0.6,
  main = "Selected immigration-related topics")

How different immigration-related topics are correlated?

migration_model_fit <- readRDS('migration_model_fit.RDs')
mod.out.corr <- topicCorr(migration_model_fit)
plot(
  mod.out.corr,
  topics = c(3,5,13,28,80,1,8),
  vlabels = c(
    "Asylum process",
    "Mediterranean human trafficking",
    "Deporting",
    "Crime and unrest",
    "Events in Tornio",
    "Syrian civil war",
    "Immigation reception centers"
    )
  
  )

saveRDS(migration_model_fit,file="migration_model_fit.RDs")
saveRDS(migration_processed,file="migration_processed.RDs")
saveRDS(migration_out,file="migration_out.RDs")
saveRDS(migration_prep,file="migration_prep.RDs")

Topic Modeling YLE and HS data on migration and energy politics

Digital Humanities Hackathon 2017, Media & Politics group

5/24/2017

Energy subcorpus

Ukrainian problems -topic (topic no 25)

Russia-Ukraine relations (topic no 61)

Migration subcorpus