Author

Eetu Mäkelä

Published

November 8, 2022

Concentration of quote sources through time

Code
qdby <- tbl(con,sql("
SELECT
  media,
  type,
  year_created,
  direct,
  canonical_name, 
  COUNT(*) AS n 
FROM flopo.quotes_c
INNER JOIN flopo.q_qa_c USING (q_id)
INNER JOIN flopo.quote_authors_c USING (qa_id)
INNER JOIN flopo.quote_author_names_to_canonical_names_c USING (name)
INNER JOIN flopo.articles_c USING (a_id)
INNER JOIN article_types_c USING (a_id)
GROUP BY media,type,year_created,direct,canonical_name
")) %>% 
  anti_join(top_quotes_categorized_c %>% filter(group=="Pois") %>% select(canonical_name)) %>%
  select(-canonical_name) %>%
  collect()
Code
library(ineq)
qdby %>% 
  filter(type=="Domestic general/political/economic news") %>%
  filter(!(type %in% c("Cruft","Other")),!str_detect(type,"opinion")) %>%
  group_by(media,type,direct,year_created) %>% 
  summarize(G=Gini(n),.groups="drop") %>%
  mutate(direct=if_else(direct==1,"direct","indirect")) %>%
  ggplot(aes(x=year_created,y=G,color=media)) +
  geom_line() +
  facet_grid(direct~type) + 
  theme_hsci_discrete() +
  ylab("Gini coefficient") +
  xlab("Year")

Interpretations:

  • More diversity in direct quote sources than in indirect quote source
  • Diversity is mostly decreasing. YLE does best here (but there may be lingering data problems ATM)

Top sources in each subgroup

Code
yearly_top_quotes_local <- top_quotes_categorized_c %>%
  select(-c(indirect:canonical_joined_role_all)) %>%
  inner_join(quote_author_names_to_canonical_names_c) %>%
  inner_join(quote_authors_c) %>%
  inner_join(q_qa_c) %>%
  right_join(
    article_types_c %>% 
      filter(type=="Domestic general/political/economic news") %>% 
      inner_join(quotes_c)) %>%
  inner_join(articles_c) %>%
  filter(is.na(after) | year_created>=after, is.na(before) | year_created<before) %>%
  count(canonical_name,group,subgroup,direct,media,year_created) %>%
  ungroup() %>%
  collect() %>%
  mutate(direct=if_else(direct==1,"direct","indirect"))
Code
yearly_top_quotes_local %>% 
  group_by(canonical_name,subgroup,direct) %>%
  tally(wt=n) %>%
#  filter(n>1000) %>%
  group_by(subgroup,direct) %>% 
  arrange(desc(n)) %>%
  slice_head(n=3) %>%
  gt()
canonical_name n
Etujärjestö - direct
Matti Vanhanen 512
Teemu Lehtinen 365
Jari Koskinen 184
Etujärjestö - indirect
Kuntaliitto 1471
Matti Vanhanen 1321
MTK 931
Itsesääntelyelin - direct
Risto Uimonen 275
Itsesääntelyelin - indirect
Risto Uimonen 457
Järjestöt - direct
liitto 657
yhdistys 194
keskusliitto 57
Järjestöt - indirect
liitto 9395
yhdistys 2223
keskusliitto 2176
Kansainvälinen järjestö - direct
Alexander Stubb 233
komissio 144
IMF 29
Kansainvälinen järjestö - indirect
komissio 4317
IMF 1064
OECD 1038
Kirkko - direct
Jukka Paarma 321
Irja Askola 123
Mitro Repo 24
Kirkko - indirect
Jukka Paarma 560
Irja Askola 167
Mitro Repo 49
Kulttuuri - direct
Olli Saarela 26
Kulttuuri - indirect
Olli Saarela 32
Media - direct
Yle 714
Matti Huutonen 391
sanoma 205
Media - indirect
sanoma 11475
Yle 5313
Iltalehti 2530
Pois - direct
mies 2079
joka 1908
nainen 1420
Pois - indirect
joka 29651
arvio 17850
se 17339
Poliisi - direct
poliisi 1682
Tero Haapala 260
Kari Tolvanen 238
Poliisi - indirect
poliisi 67258
krp 1319
Tero Haapala 516
Politiikka - direct
Jyrki Katainen 4024
Matti Vanhanen 3438
Alexander Stubb 3171
Politiikka - indirect
Jyrki Katainen 10451
Matti Vanhanen 9742
Paavo Lipponen 7286
Rikos - direct
Jari Aarnio 297
Riitta Leppiniemi 279
Heikki Lampela 147
Rikos - indirect
Jari Aarnio 894
Riitta Leppiniemi 457
Anneli Auer 427
Säätiö - direct
Anne Berner 716
Kaarlo Simojoki 187
Antti Kaikkonen 42
Säätiö - indirect
Anne Berner 1357
Kaarlo Simojoki 203
Antti Kaikkonen 95
Tavis - direct
Merja Vanhanen 26
Rauno Koivusaari 21
Tavis - indirect
Merja Vanhanen 49
Rauno Koivusaari 30
Tutkimus - direct
tutkija 1085
tutkimus 446
Pekka Himanen 57
Tutkimus - indirect
tutkimus 18743
tutkija 7715
Pekka Himanen 146
Tutkimuslaitos - direct
Sixten Korkman 379
Teija Tiilikainen 360
Jari Tuovinen 297
Tutkimuslaitos - indirect
THL 1354
Sixten Korkman 764
Teija Tiilikainen 667
Työnantajajärjestö - direct
Timo Lappi 390
Tuomas Aarto 241
Penna Urrila 202
Työnantajajärjestö - indirect
EK 1670
TT 534
Timo Lappi 375
Työntekijäjärjestö - direct
Lauri Lyly 774
Olli Luukkainen 549
Jarkko Eloranta 521
Työntekijäjärjestö - indirect
Lauri Lyly 1790
SAK 1442
Lauri Ihalainen 1055
Ulkomainen toimija - direct
Vladimir Putin 259
Jean-Claude Juncker 229
Angela Merkel 213
Ulkomainen toimija - indirect
Reuters 1715
Vladimir Putin 937
Financial Times 821
Urheilujärjestö - direct
Esko Aho 151
Ilkka Kanerva 91
Urheilujärjestö - indirect
Esko Aho 378
Ilkka Kanerva 246
Viranomainen/julkistoimija - direct
Mikko Paatero 688
Erkki Liikanen 551
oikeus 499
Viranomainen/julkistoimija - indirect
oikeus 12569
pelastuslaitos 11791
tilastokeskus 8361
Yhdistys - direct
Risto Manninen 47
Yhdistys - indirect
Risto Manninen 69
Yliopisto - direct
Matti Tolvanen 575
Erkka Railo 571
professori 456
Yliopisto - indirect
professori 1804
Erkka Railo 896
Matti Tolvanen 735
Yritys - direct
Reijo Heiskanen 548
Jorma Ollila 357
Aki Kangasharju 354
Yritys - indirect
VR 2785
Nokia 2658
Finnair 2412
NA - direct
NA 663030
NA - indirect
NA 1286684

Proportions of quotes categorized in the different categories in domestic general/political/economic news

Code
yearly_top_quotes_local %>%
  group_by(group,media,direct,year_created) %>%
  summarize(n=sum(n),.groups="drop") %>%
  group_by(media,direct,year_created) %>%
  mutate(prop=n/sum(n)) %>%
  ungroup() %>%
  filter(!is.na(group)) %>%
  ggplot(aes(x=year_created,y=prop,color=group)) +
  geom_step() + 
  facet_grid(media~direct) + 
  theme_hsci_discrete(base_family="Arial") +
  scale_y_continuous(labels=scales::percent_format(accuracy=1)) +
  theme(legend.position = 'bottom')

Interpretation:

  • No clear patterns apart from the police massively gaining prominence and the rise in media self-references

Proportions of quotes categorized in the different subcategories in domestic general/political/economic news.

Largest subgroups of Politiikka,Poliisi,Media and Pois removed

Code
yearly_top_quotes_local %>%
  filter(subgroup!="Politiikka",subgroup!="Poliisi",subgroup!="Media",subgroup!="Pois") %>%
  group_by(subgroup,media,direct,year_created) %>%
  summarize(n=sum(n),.groups="drop") %>%
  group_by(media,direct,year_created) %>%
  mutate(prop=n/sum(n)) %>%
  ungroup() %>%
  filter(!is.na(subgroup)) %>%
  ggplot(aes(x=year_created,y=prop,color=subgroup)) +
  geom_step() + 
  facet_grid(media~direct) + 
  theme_hsci_discrete(base_family="Arial") +
  scale_y_continuous(labels=scales::percent_format(accuracy=1)) +
  theme(legend.position = 'bottom')
Warning: This palette returns a maximum of 13 distinct values. You have
requested 19.

Interpretation:

  • Even with the police removed, “Viranomainen/julkistoimija” gains prominence in indirect quotes, mostly consisting of “oikeus” and “pelastuslaitos”

Overall proportions

Code
d <- random_quotes_categorized_local %>%
  mutate(period=as_factor(2009+5*floor((year(time_created)-2009)/5)), direct=if_else(direct==0,"indirect","direct")) %>%
  count(group,media,direct,period) %>%
  group_by(media,direct,period) %>%
  group_modify(~bind_cols(.x,scimp_bmde(.x$n, p=1))) %>%
  mutate(prop=n/sum(n),rest=TRUE) %>%
  ungroup() %>%
  full_join(yearly_top_quotes_local %>%
    mutate(period=as_factor(2009+5*floor((year_created-2009)/5))) %>%
    group_by(group,media,direct,period) %>%
    summarize(n=sum(n),.groups="drop") %>%
    group_by(media,direct,period) %>%
    mutate(prop=n/sum(n)) %>%
    ungroup() %>%
    mutate(rest=is.na(group)),by=c("media","direct","period","rest")) %>%
  mutate(group=coalesce(group.x,group.y), prop=if_else(is.na(group.y),prop.y*prop.x,prop.y),upper_limit=if_else(is.na(group.y),prop.y*upper_limit,prop.y), lower_limit=if_else(is.na(group.y),prop.y*lower_limit,prop.y)) %>%
  group_by(media,direct,period,group) %>% 
  summarize(prop=sum(prop),upper_limit=sum(upper_limit),lower_limit=sum(lower_limit),.groups="drop") %>%
  filter(group!="Pois") %>%
  group_by(media,direct,period) %>%
  mutate(prop=prop/sum(prop), upper_limit=upper_limit/sum(upper_limit), lower_limit=lower_limit/sum(lower_limit)) %>%
  ungroup() %>%
#  mutate(period=if_else(period==FALSE,"1999-2008","2009-2018")) %>%
  identity()

d2 <- random_quotes_categorized_local %>%
  mutate(period=as_factor(2009+5*floor((year(time_created)-2009)/5)), direct=if_else(direct==0,"indirect","direct")) %>%
  count(subgroup,media,direct,period) %>%
  group_by(media,direct,period) %>%
  group_modify(~bind_cols(.x,scimp_bmde(.x$n, p=1))) %>%
  mutate(prop=n/sum(n),rest=TRUE) %>%
  ungroup() %>%
  full_join(yearly_top_quotes_local %>%
    mutate(period=as_factor(2009+5*floor((year_created-2009)/5))) %>%
    group_by(subgroup,media,direct,period) %>%
    summarize(n=sum(n),.groups="drop") %>%
    group_by(media,direct,period) %>%
    mutate(prop=n/sum(n)) %>%
    ungroup() %>%
    mutate(rest=is.na(subgroup)),by=c("media","direct","period","rest")) %>%
  mutate(subgroup=coalesce(subgroup.x,subgroup.y), prop=if_else(is.na(subgroup.y),prop.y*prop.x,prop.y),upper_limit=if_else(is.na(subgroup.y),prop.y*upper_limit,prop.y), lower_limit=if_else(is.na(subgroup.y),prop.y*lower_limit,prop.y)) %>%
  group_by(media,direct,period,subgroup) %>% 
  summarize(prop=sum(prop),upper_limit=sum(upper_limit),lower_limit=sum(lower_limit),.groups="drop") %>%
  filter(subgroup!="Pois") %>%
  group_by(media,direct,period) %>%
  mutate(prop=prop/sum(prop), upper_limit=upper_limit/sum(upper_limit), lower_limit=lower_limit/sum(lower_limit)) %>%
  ungroup() %>%
#  mutate(period=if_else(period==FALSE,"1999-2008","2009-2018")) %>%
  identity()

By group

Code
d %>% 
  ggplot(aes(x=group,color=period)) +
  geom_point(aes(y=prop),position=position_dodge(width=1)) + 
  geom_errorbar(aes(ymin=lower_limit,ymax=upper_limit),position=position_dodge(width=1)) +
  facet_grid(media~direct) +
  coord_flip() +
  theme_hsci_discrete(base_family="Arial") +
  scale_y_continuous(labels=scales::percent_format(accuracy=1)) +
  theme(legend.position="bottom") +
  xlab("Group") +
  ylab("Proportion")

Code
d %>%
  ggplot(aes(x=group,group=media,color=media)) +
  geom_point(aes(y=prop),position=position_dodge(width=1)) + 
  geom_errorbar(aes(ymin=lower_limit,ymax=upper_limit),position=position_dodge(width=1)) +
  facet_grid(period~direct) +
  coord_flip() +
  theme_hsci_discrete(base_family="Arial") +
  scale_y_continuous(labels=scales::percent_format(accuracy=1)) +
  theme(legend.position="bottom") +
  xlab("Group") +
  ylab("Proportion")

Code
d %>%
  ggplot(aes(x=group,group=media,color=direct)) +
  geom_point(aes(y=prop),position=position_dodge(width=1)) + 
  geom_errorbar(aes(ymin=lower_limit,ymax=upper_limit),position=position_dodge(width=1)) +
  facet_grid(media~period) +
  coord_flip() +
  theme_hsci_discrete(base_family="Arial") +
  scale_y_continuous(labels=scales::percent_format(accuracy=1)) +
  theme(legend.position="bottom") +
  xlab("Subgroup") +
  ylab("Proportion")

By subgroup

Code
d2 %>% 
  ggplot(aes(x=subgroup,color=period)) +
  geom_point(aes(y=prop),position=position_dodge(width=1)) + 
  geom_errorbar(aes(ymin=lower_limit,ymax=upper_limit),position=position_dodge(width=1)) +
  facet_grid(media~direct) +
  coord_flip() +
  theme_hsci_discrete(base_family="Arial") +
  scale_y_continuous(labels=scales::percent_format(accuracy=1)) +
  theme(legend.position="bottom") +
  xlab("Subgroup") +
  ylab("Proportion")

Code
d2 %>%
  ggplot(aes(x=subgroup,group=media,color=media)) +
  geom_point(aes(y=prop),position=position_dodge(width=1)) + 
  geom_errorbar(aes(ymin=lower_limit,ymax=upper_limit),position=position_dodge(width=1)) +
  facet_grid(period~direct) +
  coord_flip() +
  theme_hsci_discrete(base_family="Arial") +
  scale_y_continuous(labels=scales::percent_format(accuracy=1)) +
  theme(legend.position="bottom") +
  xlab("Subgroup") +
  ylab("Proportion")

Code
d2 %>%
  ggplot(aes(x=subgroup,group=media,color=direct)) +
  geom_point(aes(y=prop),position=position_dodge(width=1)) + 
  geom_errorbar(aes(ymin=lower_limit,ymax=upper_limit),position=position_dodge(width=1)) +
  facet_grid(media~period) +
  coord_flip() +
  theme_hsci_discrete(base_family="Arial") +
  scale_y_continuous(labels=scales::percent_format(accuracy=1)) +
  theme(legend.position="bottom") +
  xlab("Subgroup") +
  ylab("Proportion")