R script for building Structured Topic Model using data from YLE and HS web news archives. Datasets are not publicly available. HTML version of this document including all graphics is available on RPubs. Source code is also available.
energy_processed <- textProcessor(
energy_full$content,
metadata = energy_full,
stem=FALSE,
customstopwords = stopwords$V1)
energy_out <- prepDocuments(
energy_processed$documents,
energy_processed$vocab,
energy_processed$meta,
lower.thresh = 15)
# Let's use automatic selection of topics using parameter K=0
energy_model_fit <- stm(
energy_out$documents,
energy_out$vocab,
K = 0,
prevalence =~ media + s(year),
max.em.its = 100,
data = energy_out$meta,
init.type = "Spectral")
energy_model_fit <- readRDS('energy_model_fit.RDs')
energy_out <- readRDS('energy_out.RDs')
labelTopics(energy_model_fit)
energy_prep <- estimateEffect(
1:72 ~ media + s(year),
energy_model_fit,
meta = energy_out$meta,
uncertainty = "Global")
energy_model_fit <- readRDS('energy_model_fit.RDs')
plot(
energy_model_fit,
type = "summary",
xlim = c(0, .6),
n = 10,
labeltype = "frex",
topics = c(67,25,61,36,30),
text.cex = 0.6,
main = "Selected energy-related topics")
energy_prep <- readRDS('energy_prep.Rds')
plot(
energy_prep, "year",
method = "continuous",
topics = 25,
printlegend = FALSE,
xaxt = "n",
xlab = "Years",
main = "Ukrainian problems")
axis(1,at=seq(from=2000, to=2016,by=2))
plot(
energy_prep, "year",
method = "continuous",
topics = 61,
printlegend = FALSE,
xaxt = "n",
xlab = "Years",
main = "Russia - Ukraine -relations")
axis(1,at=seq(from=2000, to=2016,by=2))
plot(energy_prep, covariate = "media", topics = c(67,25,61,36,30),
model = energy_model_fit, method = "difference",
cov.value1 = "HS", cov.value2 = "YLE",
main = "HS vs YLE",
xlim = c(-.1, .1),
labeltype = "custom",
custom.labels = c(
"LNG and natural gas",
"Ukrainan gas pipeline",
"Russia-Ukraine -relations",
"Oil price",
"Price of gasoline"))
migration_processed <- textProcessor(
migration_full$content,
metadata = migration_full,
stem=FALSE,
customstopwords = stopwords$V1)
migration_out <- prepDocuments(
migration_processed$documents,
migration_processed$vocab,
migration_processed$meta,
lower.thresh = 15)
# Let's use automatic selection of topics using parameter K=0
migration_model_fit <- stm(
migration_out$documents,
migration_out$vocab,
K = 0,
prevalence =~ media + s(year),
max.em.its = 100,
data = migration_out$meta,
init.type = "Spectral")
migration_model_fit <- readRDS('migration_model_fit.RDs')
migration_out <- readRDS('migration_out.RDs')
labelTopics(migration_model_fit)
migration_prep <- estimateEffect(
1:50 ~ media + s(year),
migration_model_fit,
meta = migration_out$meta,
uncertainty = "Global")
migration_model_fit <- readRDS('migration_model_fit.RDs')
plot(
migration_model_fit,
type = "summary",
xlim = c(0, .6),
n = 10,
labeltype = "frex",
topics = c(3,5,13,28,80,1,8),
text.cex = 0.6,
main = "Selected immigration-related topics")