# load libraries
library(topicmodels)
library(tab)
library(sjPlot)
## Warning: package 'sjPlot' was built under R version 3.5.2
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.2.15
## Current Matrix version is 1.2.17
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
library(descr)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'tidyr' was built under R version 3.5.2
## Warning: package 'purrr' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
## Warning: package 'stringr' was built under R version 3.5.2
## Warning: package 'forcats' was built under R version 3.5.2
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(quanteda)
## Warning: package 'quanteda' was built under R version 3.5.2
## Package version: 1.4.3
## Parallel computing: 2 of 12 threads used.
## See https://quanteda.io for tutorials and examples.
##
## Attaching package: 'quanteda'
## The following object is masked from 'package:utils':
##
## View
library(readtext)
## Warning: package 'readtext' was built under R version 3.5.2
library(topicmodels)
library(ggplot2)
library(stm)
## stm v1.3.3 (2018-1-26) successfully loaded. See ?stm for help.
## Papers, resources, and other materials at structuraltopicmodel.com
library(ldatuning)
library(tidytext)
library(FactoMineR)
library(factoextra)
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
# load main datafile
data1 <-readtext(paste0("~/Dropbox/DBXPAGAANDEARBEID/Statistikk/Rworkdir/Quanteda19/talkofnorway.xlsx"), text_field = "text")
count(data1)
## # A tibble: 1 x 1
## n
## <int>
## 1 250373
# subsetting
data <- data1[grepl("nnvandr|lyktning|sylsøk", data1$text),] # 1) subset on matched words
data1 <- NULL
data$n_innv <- str_count(data$text, "nnvandr") # count occurences of word
data$n_flykt <- str_count(data$text, "lyktning") # count occurences of word
data$n_asyl <- str_count(data$text, "sylsøk") # count occurences of word
data$n <- data$n_asyl + data$n_flykt + data$n_innv # total score
data$length <-nchar(data$text)
data$aar <- as.numeric(substring(data$date, 1,4))
data$aar5<-car::recode(data$aar, "1998:2002='1998-02'; 2003:2007='2000-7';2008:2012='2008-12'; 2013:2016='2013-16';else=NA")
count(data)
## # A tibble: 1 x 1
## n
## <int>
## 1 7423
freq(data$session)
## data$session
## Frequency Percent
## 1998-1999 514 6.924
## 1999-2000 369 4.971
## 2000-2001 474 6.386
## 2001-2002 411 5.537
## 2002-2003 281 3.786
## 2003-2004 247 3.327
## 2004-2005 255 3.435
## 2005-2006 255 3.435
## 2006-2007 336 4.526
## 2007-2008 321 4.324
## 2008-2009 338 4.553
## 2009-2010 308 4.149
## 2010-2011 477 6.426
## 2011-2012 456 6.143
## 2012-2013 513 6.911
## 2013-2014 477 6.426
## 2014-2015 579 7.800
## 2015-2016 812 10.939
## Total 7423 100.000
freq(data$debate_type)
## data$debate_type
## Frequency Percent Valid Percent
## formalia 83 1.1181 1.1195
## interpellasjon 783 10.5483 10.5611
## muntligsporretime 820 11.0467 11.0602
## ordinarsporretime 669 9.0125 9.0235
## referatsaker 93 1.2529 1.2544
## saksreferat 4939 66.5364 66.6172
## voteringer 27 0.3637 0.3642
## NA's 9 0.1212
## Total 7423 100.0000 100.0000
freq(data$party_name)
## data$party_name
## Frequency Percent Valid Percent
## Arbeiderpartiet 1862 25.0842 26.22905
## Fremskrittspartiet 1288 17.3515 18.14340
## Høyre 1138 15.3307 16.03043
## Kristelig Folkeparti 811 10.9255 11.42414
## Kystpartiet 29 0.3907 0.40851
## Miljøpartiet De Grønne 29 0.3907 0.40851
## Senterpartiet 505 6.8032 7.11368
## Sosialistisk Venstreparti 922 12.4209 12.98774
## Tverrpolitisk Folkevalgte (Kystpartiet) 7 0.0943 0.09861
## Venstre 508 6.8436 7.15594
## NA's 324 4.3648
## Total 7423 100.0000 100.00000
freq(data$cabinet_short)
## data$cabinet_short
## Frequency Percent
## Bondevik I 734 9.888
## Bondevik II 1192 16.058
## Solberg I 1868 25.165
## Stoltenberg I 625 8.420
## Stoltenberg II 1278 17.217
## Stoltenberg III 1726 23.252
## Total 7423 100.000
data %>% sjplot(party_id,aar5, fun="xtab", type="bar", show.values=FALSE)
sjt.xtab(data$party_name, data$session,show.col.prc = TRUE, show.obs=FALSE)
| party_name | session | Total | |||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1998-1999 | 1999-2000 | 2000-2001 | 2001-2002 | 2002-2003 | 2003-2004 | 2004-2005 | 2005-2006 | 2006-2007 | 2007-2008 | 2008-2009 | 2009-2010 | 2010-2011 | 2011-2012 | 2012-2013 | 2013-2014 | 2014-2015 | 2015-2016 | ||
| Arbeiderpartiet | 18.8Â % | 25.5Â % | 37.6Â % | 15.4Â % | 13.6Â % | 12.2Â % | 14.9Â % | 33.7Â % | 32.2Â % | 34Â % | 29.6Â % | 30.6Â % | 34.7Â % | 32.9Â % | 37Â % | 24.2Â % | 16.3Â % | 23.7Â % | 26.2Â % |
| Fremskrittspartiet | 23Â % | 21.5Â % | 13.8Â % | 17.3Â % | 19.5Â % | 14.8Â % | 14.5Â % | 20.6Â % | 19.3Â % | 19.3Â % | 24.5Â % | 20.3Â % | 18Â % | 14.3Â % | 16Â % | 20.8Â % | 19.2Â % | 14.2Â % | 18.1Â % |
| Høyre | 6.1 % | 10 % | 14 % | 27 % | 26.5 % | 29.3 % | 21.7 % | 9.5 % | 15.1 % | 13.1 % | 10 % | 9.6 % | 13.2 % | 15.9 % | 11.1 % | 17.1 % | 16.5 % | 24.2 % | 16 % |
| Kristelig Folkeparti | 22.8Â % | 15.5Â % | 13.6Â % | 14.1Â % | 12.5Â % | 11.8Â % | 12.8Â % | 11.5Â % | 11.1Â % | 8.5Â % | 15.4Â % | 12.6Â % | 6.6Â % | 9.4Â % | 7.1Â % | 7.7Â % | 8.6Â % | 8.9Â % | 11.4Â % |
| Kystpartiet | 0Â % | 0Â % | 0Â % | 1.3Â % | 2.3Â % | 3.5Â % | 4.3Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0.4Â % |
|
Miljøpartiet De Grønne |
0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0.6Â % | 2.7Â % | 1.4Â % | 0.4Â % |
| Senterpartiet | 8.5Â % | 6.6Â % | 6.7Â % | 7.9Â % | 6.2Â % | 8.3Â % | 8.5Â % | 3.2Â % | 3.3Â % | 4.2Â % | 3.3Â % | 3Â % | 3.3Â % | 6Â % | 9.5Â % | 7.7Â % | 12.7Â % | 10.1Â % | 7.1Â % |
|
Sosialistisk Venstreparti |
13.5Â % | 7.4Â % | 9.8Â % | 12Â % | 12.5Â % | 15.7Â % | 18.3Â % | 13.9Â % | 10.5Â % | 12.1Â % | 11.2Â % | 13.3Â % | 16.3Â % | 16.1Â % | 14.7Â % | 12.8Â % | 14.9Â % | 11Â % | 13Â % |
|
Tverrpolitisk Folkevalgte (Kystpartiet) |
0.4Â % | 1.4Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0Â % | 0.1Â % |
| Venstre | 6.9Â % | 12Â % | 4.7Â % | 5Â % | 7Â % | 4.4Â % | 5.1Â % | 7.5Â % | 8.4Â % | 8.8Â % | 6Â % | 10.6Â % | 7.9Â % | 5.5Â % | 4.6Â % | 9Â % | 9.1Â % | 6.5Â % | 7.2Â % |
| Total | 100 % | 100 % | 100 % | 100 % | 100 % | 100 % | 100 % | 100 % | 100 % | 100 % | 100 % | 100 % | 100 % | 100 % | 100 % | 100 % | 100 % | 100 % | 100 % | χ2=1043.072 · df=153 · Cramer’s V=0.128 · Fisher’s p=0.000 |
storting <- subset(data, n>1) # 2) the terms are mentioned at least twice
storting <- corpus(storting)
summary(storting, 5)
## Warning in seq_len(n): first element used of 'length.out' argument
## Corpus consisting of 4035 documents:
##
## Text Types Tokens Sentences id url_rep_id rep_id
## talkofnorway.xlsx.164 56 69 4 tale000163 OHE OHE
## talkofnorway.xlsx.166 106 177 9 tale000165 OHE OHE
## rep_first_name rep_last_name rep_name rep_from rep_to
## Øystein Hedstrøm Øystein Hedstrøm 35704 37164
## Øystein Hedstrøm Øystein Hedstrøm 35704 37164
## rep_type county list_number party_id party_name party_role
## Representant Østfold 3 FrP Fremskrittspartiet Opposition
## Representant Østfold 3 FrP Fremskrittspartiet Opposition
## party_seats cabinet_short cabinet_start cabinet_end cabinet_composition
## 25 Bondevik I 35720 36601 Coalition
## 25 Bondevik I 35720 36601 Coalition
## rep_gender rep_birth rep_death parl_period parl_size party_seats_lagting
## mann 07.08.1946 <NA> 1997-2001 165 6
## mann 07.08.1946 <NA> 1997-2001 165 6
## party_seats_odelsting
## 19
## 19
## com_member
## Næringskomiteen ; Næringskomiteen ; Valgkomiteen ; Næringskomiteen
## Næringskomiteen ; Næringskomiteen ; Valgkomiteen ; Næringskomiteen
## com_date
## 16.03.1999 - 27.04.1999 ; 27.04.1999 - 30.09.2001 ; 08.10.1997 - 30.09.2001 ; 21.10.1997 - 16.03.1999
## 16.03.1999 - 27.04.1999 ; 27.04.1999 - 30.09.2001 ; 08.10.1997 - 30.09.2001 ; 21.10.1997 - 16.03.1999
## com_role case_id
## Fung. leder gruppestyret ; Nestleder ; Medlem ; Nestleder NA
## Fung. leder gruppestyret ; Nestleder ; Medlem ; Nestleder NA
## debate_reference
## Saker-og-publikasjoner/Publikasjoner/Referater/Stortinget/1998-1999/981021/ordinarsporretime/11/
## Saker-og-publikasjoner/Publikasjoner/Referater/Stortinget/1998-1999/981021/ordinarsporretime/11/
## debate_title debate_subject debate_type proposition_id
## Spørsmål 11 <NA> ordinarsporretime NA
## Spørsmål 11 <NA> ordinarsporretime NA
## proposition_text document_group document_references
## <NA> <NA> <NA>
## <NA> <NA> <NA>
## document_subject_short decision_short document_note case_source_id
## <NA> <NA> <NA> <NA>
## <NA> <NA> <NA> <NA>
## case_chair_id case_type decision_text question_number question_from_id
## <NA> <NA> <NA> NA <NA>
## <NA> <NA> <NA> NA <NA>
## question_to_id question_answered_by_id question_answered_by_ministry_id
## <NA> <NA> <NA>
## <NA> <NA> <NA>
## question_answered_by_minister_title subject_ids subject_names
## <NA> <NA> <NA>
## <NA> <NA> <NA>
## is_main_subject main_subject_id subject_committee_id
## <NA> <NA> <NA>
## <NA> <NA> <NA>
## subject_committee_name agenda_case_number agenda_case_reference
## <NA> NA <NA>
## <NA> NA <NA>
## agenda_case_text agenda_case_type agenda_number meeting_id procedure_id
## <NA> <NA> NA NA <NA>
## <NA> <NA> NA NA <NA>
## procedure_name procedure_stepnumber publication_export_id
## <NA> <NA> <NA>
## <NA> <NA> <NA>
## publication_link_text publication_link_url publication_type
## <NA> <NA> <NA>
## <NA> <NA> <NA>
## publication_undertype related_case_id related_case_type
## <NA> <NA> <NA>
## <NA> <NA> <NA>
## related_case_title_short keyword keywords language transcript order
## <NA> <NA> <NA> nob s981021a 117
## <NA> <NA> <NA> nob s981021a 119
## session time date speaker_role n_innv
## 1998-1999 1998-10-21T00:00:00+02:00 1998-10-21 Representant 2
## 1998-1999 1998-10-21T00:00:00+02:00 1998-10-21 Representant 2
## n_flykt n_asyl n length aar aar5
## 0 0 2 438 1998 1998-02
## 0 0 2 895 1998 1998-02
##
## Source: /Users/janfredrikhovden/Dropbox/DBXPAGAANDEARBEID/Statistikk/Rworkdir/Quanteda19/* on x86_64 by janfredrikhovden
## Created: Mon May 13 09:57:02 2019
## Notes:
quanteda_options("language_stemmer"="no")
# make tokens
toks <- tokens_remove(tokens(storting, remove_punct = TRUE, remove_numbers = TRUE), stopwords("norwegian")) # er dette også nynorsk=
toks <-tokens_tolower(toks)
toks <-tokens_wordstem(toks)
#check typical "double words"
head(textstat_collocations(toks, size=2, min_count=20),n=100)
## collocation count count_nested length lambda
## 1 kr kr 4460 0 2 4.782903
## 2 driftsutgift forhøy 794 0 2 6.031204
## 3 ta imot 880 0 2 5.831699
## 4 overfør forhøy 683 0 2 5.723315
## 5 overfør nedsett 602 0 2 6.169664
## 6 nytt und 619 0 2 6.046325
## 7 forslag nr 797 0 2 5.942027
## 8 spesiell driftsutgift 683 0 2 7.497881
## 9 hvert fall 457 0 2 7.518283
## 10 und post 522 0 2 6.753788
## 11 ens mindreår 612 0 2 9.565936
## 12 står overfor 405 0 2 5.762166
## 13 mindreår asylsøker 441 0 2 6.129043
## 14 storting ber 517 0 2 6.472043
## 15 frivil organisasjon 344 0 2 6.482924
## 16 størr grad 370 0 2 5.755106
## 17 norsk samfunn 639 0 2 3.996796
## 18 nr lyd 424 0 2 7.927981
## 19 søk asyl 302 0 2 6.464634
## 20 nest år 432 0 2 5.097394
## 21 post forhøy 360 0 2 5.828216
## 22 offent sektor 312 0 2 6.747714
## 23 sist åren 296 0 2 6.102355
## 24 lagt fram 318 0 2 5.789688
## 25 legg rett 416 0 2 4.367578
## 26 driftsutgift nedsett 314 0 2 5.097324
## 27 lyd storting 396 0 2 6.554418
## 28 ber regjering 586 0 2 5.735055
## 29 mill kr 1057 0 2 6.860223
## 30 fremm forslag 355 0 2 4.579274
## 31 andr land 618 0 2 3.183851
## 32 komm tilbak 386 0 2 4.330036
## 33 lang tid 269 0 2 5.208461
## 34 overfør nytt 264 0 2 4.936027
## 35 humanitær bistand 190 0 2 5.914237
## 36 stor grad 297 0 2 4.481677
## 37 barn ung 291 0 2 4.605247
## 38 driftsutgift nytt 241 0 2 4.821017
## 39 folkeparti venstr 214 0 2 5.156605
## 40 først fremst 397 0 2 7.972572
## 41 psykisk hels 148 0 2 6.912620
## 42 dokument nr 182 0 2 6.921478
## 43 jf kap 300 0 2 10.508030
## 44 still krav 182 0 2 5.455615
## 45 fns høykommissær 188 0 2 8.267627
## 46 legg fram 258 0 2 4.370014
## 47 stor utfordring 327 0 2 3.834138
## 48 asyl flyktningpolitikk 168 0 2 7.302988
## 49 overslagsbevilgning forhøy 205 0 2 6.048440
## 50 kr tilskudd 391 0 2 3.669043
## 51 tar imot 224 0 2 4.590194
## 52 alternativ budsjett 162 0 2 5.923926
## 53 milliard kr 367 0 2 5.191678
## 54 komm norg 671 0 2 2.534368
## 55 antall asylsøker 227 0 2 4.452867
## 56 post nedsett 172 0 2 5.230290
## 57 kriminell handling 114 0 2 7.244207
## 58 komm hit 310 0 2 5.983571
## 59 million mennesk 185 0 2 5.233528
## 60 end avslag 135 0 2 6.085162
## 61 all flest 155 0 2 5.399274
## 62 opphold norg 367 0 2 3.469441
## 63 overslagsbevilgning nedsett 150 0 2 6.013311
## 64 behov beskytt 188 0 2 4.800975
## 65 mennesk flukt 176 0 2 5.499748
## 66 post post 138 0 2 5.663678
## 67 driftsutgift overfør 206 0 2 4.355491
## 68 imot flyktning 259 0 2 3.837986
## 69 kap post 118 0 2 6.191654
## 70 humanitært grunnlag 175 0 2 7.949317
## 71 kvinn menn 138 0 2 5.668750
## 72 bred enig 137 0 2 5.615379
## 73 internasjonal samfunn 216 0 2 4.092143
## 74 kort tid 166 0 2 5.108903
## 75 forr uke 115 0 2 8.208197
## 76 europeisk land 233 0 2 4.108626
## 77 rund omkring 138 0 2 7.631978
## 78 nedsett kr 1633 0 2 8.534363
## 79 barn best 206 0 2 4.173399
## 80 hel tatt 157 0 2 4.664960
## 81 indr marked 85 0 2 8.756609
## 82 kap driftsutgift 132 0 2 5.476980
## 83 bevilg ny 130 0 2 5.321501
## 84 vedlikehold overfør 207 0 2 7.742309
## 85 én ting 110 0 2 5.745505
## 86 rett plikt 155 0 2 5.168117
## 87 helt nødvend 181 0 2 4.192400
## 88 norsk økonomi 204 0 2 4.274609
## 89 legg vekt 135 0 2 5.173462
## 90 opphold humanitært 122 0 2 6.617918
## 91 lukk mottak 112 0 2 6.453483
## 92 internasjonal konvensjon 118 0 2 6.031752
## 93 ta var 164 0 2 4.684031
## 94 still spørsmål 156 0 2 4.518511
## 95 organiser kriminalit 82 0 2 6.880459
## 96 rett slett 224 0 2 7.039008
## 97 midlertid arbeidstillat 79 0 2 7.139765
## 98 stort sett 151 0 2 4.608466
## 99 videregå skol 118 0 2 7.197767
## 100 flertall komite 139 0 2 4.683810
## z
## 1 207.62177
## 2 117.26276
## 3 116.82521
## 4 109.37167
## 5 106.98668
## 6 106.33675
## 7 105.77562
## 8 104.61241
## 9 97.81687
## 10 93.84564
## 11 91.85092
## 12 88.98577
## 13 87.76371
## 14 87.65574
## 15 87.48205
## 16 86.03864
## 17 85.81081
## 18 84.44813
## 19 83.32265
## 20 83.09528
## 21 81.57724
## 22 81.16415
## 23 79.77947
## 24 79.74653
## 25 76.76428
## 26 75.77674
## 27 75.41729
## 28 74.71834
## 29 73.00972
## 30 71.49643
## 31 71.40519
## 32 69.93051
## 33 69.00042
## 34 68.84469
## 35 66.78090
## 36 65.76648
## 37 65.38921
## 38 65.10459
## 39 64.79318
## 40 63.53017
## 41 62.98336
## 42 62.84324
## 43 62.64447
## 44 62.60682
## 45 62.54484
## 46 62.44144
## 47 62.26100
## 48 61.46432
## 49 61.40719
## 50 61.39419
## 51 60.87962
## 52 60.71023
## 53 60.63707
## 54 60.52251
## 55 59.28840
## 56 58.97758
## 57 58.66349
## 58 58.27176
## 59 58.23741
## 60 58.14661
## 61 58.07091
## 62 57.83676
## 63 57.68721
## 64 57.34505
## 65 57.29919
## 66 56.93171
## 67 56.30548
## 68 55.78729
## 69 55.46043
## 70 55.28171
## 71 55.02154
## 72 54.64268
## 73 54.51026
## 74 54.38274
## 75 54.22304
## 76 53.49586
## 77 53.43609
## 78 52.89111
## 79 52.65318
## 80 52.48084
## 81 52.41558
## 82 52.39778
## 83 52.22060
## 84 51.96859
## 85 51.85150
## 86 51.46917
## 87 51.35425
## 88 51.17587
## 89 50.98343
## 90 50.98179
## 91 50.95701
## 92 50.95640
## 93 50.80475
## 94 50.61125
## 95 50.32787
## 96 50.32307
## 97 50.23582
## 98 50.21433
## 99 50.00767
## 100 49.91998
# compound typical "double words"
toks <- tokens_compound(toks, list(c("ta","imot"), c("står", "overfor"), c("frivil", "organisasjon"), c("størr","grad"), c("nest", "år"), c("offent", "sektor"), c("mindreår", "asylsøker"), c("still", "krav"), c("internasjonal", "samfunn"), c("humanitær", "bistand")))
# merge similar tokens
#toks <- tokens_replace(toks, "ta_var", "tar_vare")
#toks <- tokens_replace(toks, "små_forskjell", "lit forskjell")
# make dfm
storting <- dfm(toks)
# remove additional stopwords (if any - just an example below)
#dfm_typisk <- dfm_remove(dfm_typisk,pattern=c("000", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0"))
# vanlegaste ord
topfeatures(storting, 50)
## kr norg regjering må
## 11009 10676 9560 8896
## komm få land vikt
## 6822 6730 6722 6572
## norsk gjør andr arbeid
## 6563 5551 5480 5134
## dag forslag barn kommun
## 5092 4888 4700 4637
## flyktning gjeld stor mer
## 4378 4360 4331 4175
## får fler mul derfor
## 4102 4033 3962 3777
## men tiltak rett år
## 3708 3698 3619 3600
## fremskrittsparti storting del god
## 3512 3494 3490 3334
## sak ta tid situasjon
## 3118 3094 3059 2939
## ønsk ser mennesk forhold
## 2927 2906 2840 2763
## gjennom samfunn forhøy bruk
## 2741 2719 2704 2690
## sett nye spørsmål und
## 2644 2571 2473 2470
## behov asylsøker
## 2466 2466
# plot most common words
typisk_freqplot <- storting %>%
textstat_frequency(n = 20) %>%
ggplot(aes(x = reorder(feature, frequency),
y = frequency)) +
geom_point() + ggtitle("Most common words") +
coord_flip() + theme_minimal() +
labs(x = NULL, y = "Frequency")
typisk_freqplot
# wordcloud
set.seed(97)
textplot_wordcloud(storting, min_count = 10, random_order = FALSE,
rotation = .25,
color = RColorBrewer::brewer.pal(8,"Dark2"))
# tokens in context
kw_rasis <- kwic(toks, pattern = 'rasis*')
head(kw_rasis, n=20)
##
## [talkofnorway.xlsx.927, 79]
## [talkofnorway.xlsx.4591, 210]
## [talkofnorway.xlsx.4990, 135]
## [talkofnorway.xlsx.7345, 78]
## [talkofnorway.xlsx.10499, 1018]
## [talkofnorway.xlsx.14532, 2590]
## [talkofnorway.xlsx.14780, 226]
## [talkofnorway.xlsx.14783, 118]
## [talkofnorway.xlsx.17809, 550]
## [talkofnorway.xlsx.17845, 341]
## [talkofnorway.xlsx.17873, 185]
## [talkofnorway.xlsx.19882, 28]
## [talkofnorway.xlsx.19882, 140]
## [talkofnorway.xlsx.20180, 8]
## [talkofnorway.xlsx.20180, 30]
## [talkofnorway.xlsx.23430, 53]
## [talkofnorway.xlsx.24495, 142]
## [talkofnorway.xlsx.24502, 91]
## [talkofnorway.xlsx.24502, 113]
## [talkofnorway.xlsx.26927, 102]
##
## vold bl.a blant innvandrer del | rasistisk |
## mer forebygg art innenfor områd | rasism |
## ann grupp kirkeasylant ifølg sos | rasism |
## lov påpek beskyld fremmedfiendt pisk | rasistisk |
## lik høv jent gut fordomm | rasism |
## rapport fns rasediskrimineringskomité framgår omfang | rasistisk |
## form liberal norg samfunn intolerans | rasism |
## men tilheng regl sett grens | rasismebestemm |
## lykk integreringsarbeid gi grobunn økt | rasism |
## ta utgangspunkt enkelt mennesk egenverd | rasism |
## mer motstand nør und fremmedhat | rasism |
## mangl integrering størst bidrag dag | rasism |
## tas ytterliger styrk grunnlag fremvekst | rasistisk |
## jakobs uttalt frihetsparti østerrik parti | rasistisk |
## standpunkt henholdsvis haid frihetsparti program | rasistisk |
## vekk harm sinn forarg fremprovoser | rasism |
## stat restriktiv politikk bruk legitimer | rasism |
## sett forslag arbeidsled norg fremelsk | rasism |
## imot asylsøker norg altså fremelsk | rasism |
## gunders utsagn varm sametelt forvrengt | rasistisk |
##
## tilnærming politiker forhold innvandrergrupp medfør
## diskriminering levekår storby framgår nevnt
## satt søker land kirkeasyl pr
## stemning avgjør poeng situasjon gjør
## diskriminering motarbeid jent innvandrarbakgrunn ei
## diskriminer hending gått bruk ftalat
## fordomm mer mer akterutseilt plasser
## lovverk går f.eks ytringsfrihetskommisjon juster
## må gjør alt unngå mennesk
## måt destruktiv nedbryt vikt motarbeid
## kommun må få beting gjør
## statsråd innlegg illustrer nettopp poeng
## fremmedfiendt holdning
## langt vei nazistisk holdning gav
## nazistisk østerrik tross alt vår
## oppfatt sak svært uheld forskjellsbehandling
## fremmedfrykt andr europeisk internasjonal sammenheng
## nettopp typ argumentasjon trekk frem
## ønsk fremelsk god mat viss
## angrep sam sett debatt frihetsparti
#textplot_xray(head((kw_rasis),n=10))
#textplot_xray(
# kw_demokra,kw_likestil) +
# ggtitle("Lexical dispersion")
# trim dfm by minimum number of terms
dfm_storting2 <- dfm_trim(storting, min_termfreq = 20)
# drop all documents with only zeroes
docvars(dfm_storting2, "ntoken") <- ntoken(dfm_storting2)
ndoc(dfm_storting2)
## [1] 4035
dfm_storting3 <- dfm_storting2 %>%
dfm_subset(ntoken > 0)
ndoc(dfm_storting3)
## [1] 4035
# idf (not ready)
dfm_typiskidf <- dfm_tfidf(dfm_storting3) # tf-idf vekting
# plot idf
# compare fit statistics for different latent classes
start_time <- Sys.time()
result <- FindTopicsNumber(
dfm_storting3,
topics = seq(from = 2, to = 20, by = 1),
metrics = c("Griffiths2004", "CaoJuan2009", "Arun2010", "Deveaud2014"),
method = "Gibbs",
control = list(seed = 77),
mc.cores = 6L,
verbose = TRUE
)
## fit models... done.
## calculate metrics:
## Griffiths2004... done.
## CaoJuan2009... done.
## Arun2010... done.
## Deveaud2014... done.
end_time <- Sys.time()
end_time - start_time
## Time difference of 9.761897 mins
saveRDS(result, file = "ldaresult.rds")
# Restore the object
readRDS(file = "ldaresult.rds")
## topics Griffiths2004 CaoJuan2009 Arun2010 Deveaud2014
## 1 2 -7419673 0.3897934 3461.641 1.633206
## 2 3 -7188097 0.3705460 3252.242 1.905457
## 3 4 -7051651 0.3147871 3082.183 2.200614
## 4 5 -6900867 0.2807516 2969.080 2.378753
## 5 6 -6825633 0.2552440 2867.229 2.401576
## 6 7 -6779510 0.2566874 2801.621 2.315328
## 7 8 -6694365 0.2375158 2750.057 2.362587
## 8 9 -6621406 0.2259752 2676.389 2.418058
## 9 10 -6581611 0.2307424 2623.028 2.395197
## 10 11 -6509591 0.2086781 2594.498 2.455785
## 11 12 -6458494 0.1998226 2539.348 2.466442
## 12 13 -6413261 0.1883297 2485.406 2.470284
## 13 14 -6397861 0.1807736 2436.515 2.499721
## 14 15 -6337269 0.1803551 2410.658 2.510703
## 15 16 -6306687 0.1783214 2372.614 2.508143
## 16 17 -6265585 0.1588345 2343.769 2.579914
## 17 18 -6243628 0.1638607 2313.290 2.541017
## 18 19 -6173256 0.1473172 2265.370 2.616734
## 19 20 -6167027 0.1437151 2243.875 2.640733
result
## topics Griffiths2004 CaoJuan2009 Arun2010 Deveaud2014
## 1 2 -7419673 0.3897934 3461.641 1.633206
## 2 3 -7188097 0.3705460 3252.242 1.905457
## 3 4 -7051651 0.3147871 3082.183 2.200614
## 4 5 -6900867 0.2807516 2969.080 2.378753
## 5 6 -6825633 0.2552440 2867.229 2.401576
## 6 7 -6779510 0.2566874 2801.621 2.315328
## 7 8 -6694365 0.2375158 2750.057 2.362587
## 8 9 -6621406 0.2259752 2676.389 2.418058
## 9 10 -6581611 0.2307424 2623.028 2.395197
## 10 11 -6509591 0.2086781 2594.498 2.455785
## 11 12 -6458494 0.1998226 2539.348 2.466442
## 12 13 -6413261 0.1883297 2485.406 2.470284
## 13 14 -6397861 0.1807736 2436.515 2.499721
## 14 15 -6337269 0.1803551 2410.658 2.510703
## 15 16 -6306687 0.1783214 2372.614 2.508143
## 16 17 -6265585 0.1588345 2343.769 2.579914
## 17 18 -6243628 0.1638607 2313.290 2.541017
## 18 19 -6173256 0.1473172 2265.370 2.616734
## 19 20 -6167027 0.1437151 2243.875 2.640733
FindTopicsNumber_plot(result)
topic.count <- 9
control_LDA_Gibbs <- list(alpha = 50/topic.count, estimate.beta = T,
verbose = 0, prefix = tempfile(),
save = 0,
keep = 50,
seed = 980,
nstart = 1, best = T,
delta = 0.1,
iter = 200,
burnin = 100,
thin = 200)
many_models <- lapply(seq(2, 35, by = 1), function(x) {topicmodels::LDA(dfm_storting3, x, method = "Gibbs", control = control_LDA_Gibbs)} )
result.logLik <- as.data.frame(as.matrix(lapply(many_models, logLik)))
plot(2:35, unlist(result.logLik), xlab="Number of Topics", ylab="Log-Likelihood")
# Quantada
set.seed(100)
my_lda_fit <- LDA(quanteda::convert(dfm_storting3, to = "topicmodels"), k = topic.count)
get_terms(my_lda_fit, 30)
## Topic 1 Topic 2 Topic 3 Topic 4
## [1,] "flyktning" "norg" "forslag" "ei"
## [2,] "norg" "land" "sak" "frå"
## [3,] "regjering" "må" "regjering" "regjering"
## [4,] "komm" "vikt" "barn" "noreg"
## [5,] "mennesk" "norsk" "storting" "òg"
## [6,] "må" "politisk" "asylsøker" "auk"
## [7,] "hjelp" "arbeid" "norg" "må"
## [8,] "gjør" "samarbeid" "opphold" "arbeid"
## [9,] "land" "internasjonal" "person" "vikt"
## [10,] "situasjon" "andr" "nr" "meir"
## [11,] "verd" "støtt" "komite" "gjer"
## [12,] "krist" "situasjon" "rett" "fram"
## [13,] "fler" "utvikling" "få" "fleir"
## [14,] "ta_imot" "konflikt" "men" "få"
## [15,] "dag" "styrk" "utlending" "norsk"
## [16,] "syri" "bidr" "vikt" "land"
## [17,] "folkeparti" "stor" "behandling" "gjeld"
## [18,] "mer" "humanitær" "udi" "år"
## [19,] "få" "områd" "behandl" "andr"
## [20,] "stor" "europ" "gjeld" "kommun"
## [21,] "ta" "fns" "ber" "stor"
## [22,] "andr" "gjennom" "mottak" "dess"
## [23,] "men" "afghanistan" "politi" "rett"
## [24,] "treng" "del" "mul" "tiltak"
## [25,] "vikt" "dag" "må" "dag"
## [26,] "Ã¥r" "russland" "hensyn" "vert"
## [27,] "ser" "nato" "land" "pst"
## [28,] "veld" "rett" "vurder" "storting"
## [29,] "million" "løsning" "derfor" "framstegsparti"
## [30,] "kutt" "derfor" "asyl" "betr"
## Topic 5 Topic 6 Topic 7 Topic 8
## [1,] "regjering" "norg" "kommun" "fremskrittsparti"
## [2,] "norg" "norsk" "barn" "komm"
## [3,] "mer" "må" "vikt" "norg"
## [4,] "eu" "samfunn" "må" "få"
## [5,] "norsk" "vikt" "regjering" "gjør"
## [6,] "nye" "komm" "få" "statsråd"
## [7,] "land" "innvandrer" "arbeid" "må"
## [8,] "økt" "arbeid" "tiltak" "representant"
## [9,] "arbeid" "gjør" "god" "land"
## [10,] "vikt" "få" "fler" "gjeld"
## [11,] "må" "jobb" "derfor" "andr"
## [12,] "utvikling" "kvinn" "flyktning" "jo"
## [13,] "gjennom" "andr" "får" "får"
## [14,] "stor" "arbeidsliv" "komm" "dag"
## [15,] "derfor" "arbeidskraft" "skol" "syn"
## [16,] "budsjett" "dag" "bedr" "spørsmål"
## [17,] "legg" "arbeidsinnvandring" "stor" "forslag"
## [18,] "økonomisk" "rett" "år" "faktisk"
## [19,] "europ" "land" "mul" "altså"
## [20,] "storting" "mul" "integrering" "men"
## [21,] "bruk" "god" "gjør" "helt"
## [22,] "bedr" "fler" "barnehag" "forhold"
## [23,] "vekst" "tiltak" "mer" "debatt"
## [24,] "eus" "del" "bosetting" "veld"
## [25,] "komm" "mer" "ung" "regjering"
## [26,] "politikk" "gjeld" "dag" "arbeiderparti"
## [27,] "mul" "ta" "oslo" "sak"
## [28,] "fler" "stor" "barnevern" "tror"
## [29,] "pst" "får" "godt" "sier"
## [30,] "utfordring" "men" "tilbud" "ønsk"
## Topic 9
## [1,] "kr"
## [2,] "forhøy"
## [3,] "overfør"
## [4,] "driftsutgift"
## [5,] "nedsett"
## [6,] "tilskudd"
## [7,] "post"
## [8,] "und"
## [9,] "spesiell"
## [10,] "nytt"
## [11,] "kap"
## [12,] "stat"
## [13,] "bevilg"
## [14,] "overslagsbevilgning"
## [15,] "ny"
## [16,] "tiltak"
## [17,] "jf"
## [18,] "mv"
## [19,] "vedlikehold"
## [20,] "størr"
## [21,] "nr"
## [22,] "forslag"
## [23,] "utstyrsanskaff"
## [24,] "utvikling"
## [25,] "hels"
## [26,] "internasjonal"
## [27,] "norg"
## [28,] "andr"
## [29,] "forskning"
## [30,] "flyktning"
lda_inf <-posterior(my_lda_fit)
# lda_inf
# STM
stm_storting <- quanteda::convert(dfm_storting3, to = "stm", docvars = docvars(dfm_storting3)) # konvertert DFM til STM
# find k
#meta <- stm_blogs$meta
#findingk <- searchK(stm_blogs$documents, stm_blogs$vocab, K = 25, prevalence =~r13P5_2+ r13P4_1+ r13P1, data=meta)
#plot(findingk)
# run structural topic model
stm_object <- stm(documents = stm_storting$documents,
vocab = stm_storting$vocab,
data = stm_storting$meta,
K = topic.count,
seed = 12345)
## Beginning Spectral Initialization
## Calculating the gram matrix...
## Finding anchor words...
## .........
## Recovering initialization...
## ................................................
## Initialization complete.
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 1 (approx. per word bound = -7.203)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 2 (approx. per word bound = -7.053, relative change = 2.092e-02)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 3 (approx. per word bound = -7.008, relative change = 6.306e-03)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 4 (approx. per word bound = -6.990, relative change = 2.603e-03)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 5 (approx. per word bound = -6.981, relative change = 1.264e-03)
## Topic 1: kr, forhøy, driftsutgift, overfør, nedsett
## Topic 2: stat, offent, kommun, oslo, jf
## Topic 3: norg, asylsøker, sak, få, komm
## Topic 4: fremskrittsparti, norg, må, komm, gjør
## Topic 5: forslag, regjering, storting, nr, ber
## Topic 6: kommun, flyktning, regjering, må, norg
## Topic 7: norg, land, norsk, eu, må
## Topic 8: barn, vikt, arbeid, må, tiltak
## Topic 9: ei, regjering, må, land, frå
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 6 (approx. per word bound = -6.976, relative change = 7.060e-04)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 7 (approx. per word bound = -6.973, relative change = 4.507e-04)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 8 (approx. per word bound = -6.971, relative change = 3.172e-04)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 9 (approx. per word bound = -6.969, relative change = 2.390e-04)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 10 (approx. per word bound = -6.968, relative change = 1.883e-04)
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett
## Topic 2: stat, kommun, offent, oslo, bruk
## Topic 3: norg, asylsøker, sak, opphold, barn
## Topic 4: norg, fremskrittsparti, komm, må, gjør
## Topic 5: forslag, regjering, storting, nr, komite
## Topic 6: kommun, regjering, flyktning, komm, må
## Topic 7: norg, land, norsk, eu, vikt
## Topic 8: barn, vikt, arbeid, må, tiltak
## Topic 9: ei, regjering, frå, må, land
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 11 (approx. per word bound = -6.967, relative change = 1.560e-04)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 12 (approx. per word bound = -6.966, relative change = 1.359e-04)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 13 (approx. per word bound = -6.965, relative change = 1.171e-04)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 14 (approx. per word bound = -6.964, relative change = 9.823e-05)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 15 (approx. per word bound = -6.964, relative change = 8.578e-05)
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett
## Topic 2: stat, kommun, regjering, offent, arbeid
## Topic 3: asylsøker, norg, sak, barn, opphold
## Topic 4: komm, norg, fremskrittsparti, gjør, må
## Topic 5: forslag, regjering, storting, nr, komite
## Topic 6: kommun, regjering, flyktning, komm, må
## Topic 7: norg, land, norsk, eu, vikt
## Topic 8: barn, vikt, arbeid, må, tiltak
## Topic 9: ei, regjering, frå, må, noreg
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 16 (approx. per word bound = -6.963, relative change = 7.785e-05)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 17 (approx. per word bound = -6.963, relative change = 7.225e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 18 (approx. per word bound = -6.962, relative change = 6.847e-05)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 19 (approx. per word bound = -6.962, relative change = 6.723e-05)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 20 (approx. per word bound = -6.961, relative change = 6.647e-05)
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett
## Topic 2: regjering, kommun, stat, offent, arbeid
## Topic 3: asylsøker, norg, sak, barn, opphold
## Topic 4: komm, norg, fremskrittsparti, gjør, må
## Topic 5: forslag, storting, regjering, komite, nr
## Topic 6: kommun, regjering, flyktning, komm, budsjett
## Topic 7: norg, land, norsk, eu, vikt
## Topic 8: barn, vikt, arbeid, må, tiltak
## Topic 9: ei, frå, regjering, noreg, må
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 21 (approx. per word bound = -6.961, relative change = 6.730e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 22 (approx. per word bound = -6.960, relative change = 7.050e-05)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 23 (approx. per word bound = -6.960, relative change = 7.638e-05)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 24 (approx. per word bound = -6.959, relative change = 8.609e-05)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 25 (approx. per word bound = -6.958, relative change = 9.344e-05)
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett
## Topic 2: regjering, kommun, stat, arbeid, offent
## Topic 3: asylsøker, norg, barn, sak, opphold
## Topic 4: komm, norg, fremskrittsparti, gjør, må
## Topic 5: forslag, storting, regjering, komite, nr
## Topic 6: kommun, flyktning, regjering, komm, norg
## Topic 7: norg, land, norsk, eu, må
## Topic 8: barn, vikt, arbeid, må, samfunn
## Topic 9: ei, frå, regjering, noreg, må
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 26 (approx. per word bound = -6.958, relative change = 9.557e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 27 (approx. per word bound = -6.957, relative change = 1.006e-04)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 28 (approx. per word bound = -6.956, relative change = 1.034e-04)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 29 (approx. per word bound = -6.956, relative change = 9.982e-05)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 30 (approx. per word bound = -6.955, relative change = 9.125e-05)
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett
## Topic 2: regjering, kommun, stat, arbeid, offent
## Topic 3: asylsøker, norg, barn, sak, opphold
## Topic 4: komm, norg, fremskrittsparti, gjør, må
## Topic 5: forslag, regjering, storting, komite, nr
## Topic 6: kommun, flyktning, regjering, komm, norg
## Topic 7: norg, land, norsk, eu, må
## Topic 8: barn, vikt, arbeid, må, samfunn
## Topic 9: ei, frå, noreg, regjering, må
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 31 (approx. per word bound = -6.954, relative change = 8.351e-05)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 32 (approx. per word bound = -6.954, relative change = 7.785e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 33 (approx. per word bound = -6.953, relative change = 7.258e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 34 (approx. per word bound = -6.953, relative change = 7.114e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 35 (approx. per word bound = -6.952, relative change = 7.974e-05)
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett
## Topic 2: regjering, stat, arbeid, kommun, nye
## Topic 3: asylsøker, norg, barn, sak, opphold
## Topic 4: komm, norg, fremskrittsparti, gjør, få
## Topic 5: forslag, regjering, storting, komite, nr
## Topic 6: kommun, flyktning, regjering, komm, norg
## Topic 7: norg, land, norsk, eu, må
## Topic 8: barn, vikt, arbeid, må, samfunn
## Topic 9: ei, frå, noreg, regjering, òg
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 36 (approx. per word bound = -6.952, relative change = 9.751e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 37 (approx. per word bound = -6.951, relative change = 1.027e-04)
## ....................................................................................................
## Completed E-Step (1 seconds).
## Completed M-Step.
## Completing Iteration 38 (approx. per word bound = -6.950, relative change = 7.537e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 39 (approx. per word bound = -6.950, relative change = 6.546e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 40 (approx. per word bound = -6.950, relative change = 5.704e-05)
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett
## Topic 2: regjering, arbeid, mer, stat, nye
## Topic 3: asylsøker, norg, barn, sak, opphold
## Topic 4: komm, norg, fremskrittsparti, gjør, få
## Topic 5: forslag, storting, regjering, komite, nr
## Topic 6: kommun, flyktning, regjering, komm, fler
## Topic 7: norg, land, norsk, eu, må
## Topic 8: barn, vikt, arbeid, må, samfunn
## Topic 9: ei, frå, regjering, noreg, òg
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 41 (approx. per word bound = -6.949, relative change = 4.810e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 42 (approx. per word bound = -6.949, relative change = 5.050e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 43 (approx. per word bound = -6.949, relative change = 6.186e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 44 (approx. per word bound = -6.948, relative change = 6.484e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 45 (approx. per word bound = -6.948, relative change = 5.393e-05)
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett
## Topic 2: regjering, mer, arbeid, nye, økt
## Topic 3: asylsøker, norg, barn, sak, opphold
## Topic 4: komm, norg, fremskrittsparti, gjør, få
## Topic 5: forslag, storting, regjering, komite, nr
## Topic 6: kommun, flyktning, regjering, komm, fler
## Topic 7: norg, land, norsk, eu, må
## Topic 8: barn, vikt, arbeid, må, samfunn
## Topic 9: ei, frå, regjering, noreg, òg
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 46 (approx. per word bound = -6.947, relative change = 5.219e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 47 (approx. per word bound = -6.947, relative change = 5.431e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 48 (approx. per word bound = -6.947, relative change = 5.811e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 49 (approx. per word bound = -6.946, relative change = 5.026e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 50 (approx. per word bound = -6.946, relative change = 5.344e-05)
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett
## Topic 2: regjering, mer, arbeid, nye, økt
## Topic 3: asylsøker, norg, barn, sak, opphold
## Topic 4: komm, norg, gjør, fremskrittsparti, få
## Topic 5: forslag, storting, regjering, komite, nr
## Topic 6: kommun, flyktning, regjering, komm, fler
## Topic 7: norg, land, norsk, eu, må
## Topic 8: barn, vikt, arbeid, må, samfunn
## Topic 9: ei, frå, regjering, noreg, òg
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 51 (approx. per word bound = -6.945, relative change = 4.740e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 52 (approx. per word bound = -6.945, relative change = 3.693e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 53 (approx. per word bound = -6.945, relative change = 3.267e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 54 (approx. per word bound = -6.945, relative change = 3.607e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 55 (approx. per word bound = -6.944, relative change = 3.720e-05)
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett
## Topic 2: regjering, mer, arbeid, nye, økt
## Topic 3: asylsøker, norg, barn, sak, opphold
## Topic 4: komm, norg, gjør, fremskrittsparti, få
## Topic 5: forslag, storting, regjering, komite, nr
## Topic 6: kommun, flyktning, regjering, komm, fler
## Topic 7: norg, land, norsk, eu, må
## Topic 8: barn, vikt, arbeid, må, samfunn
## Topic 9: ei, frå, regjering, noreg, òg
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 56 (approx. per word bound = -6.944, relative change = 2.982e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 57 (approx. per word bound = -6.944, relative change = 3.139e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 58 (approx. per word bound = -6.944, relative change = 3.085e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 59 (approx. per word bound = -6.944, relative change = 2.823e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 60 (approx. per word bound = -6.943, relative change = 2.560e-05)
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett
## Topic 2: regjering, mer, arbeid, må, vikt
## Topic 3: asylsøker, norg, sak, barn, opphold
## Topic 4: komm, norg, gjør, fremskrittsparti, få
## Topic 5: forslag, storting, regjering, komite, nr
## Topic 6: kommun, flyktning, regjering, komm, fler
## Topic 7: norg, land, norsk, eu, må
## Topic 8: barn, vikt, arbeid, må, samfunn
## Topic 9: ei, frå, regjering, noreg, òg
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 61 (approx. per word bound = -6.943, relative change = 2.307e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 62 (approx. per word bound = -6.943, relative change = 2.297e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 63 (approx. per word bound = -6.943, relative change = 2.352e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 64 (approx. per word bound = -6.943, relative change = 2.582e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 65 (approx. per word bound = -6.943, relative change = 2.764e-05)
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett
## Topic 2: regjering, mer, arbeid, må, vikt
## Topic 3: asylsøker, norg, sak, barn, opphold
## Topic 4: komm, norg, gjør, fremskrittsparti, få
## Topic 5: forslag, storting, regjering, komite, nr
## Topic 6: kommun, flyktning, regjering, komm, fler
## Topic 7: norg, land, norsk, eu, må
## Topic 8: barn, vikt, arbeid, må, samfunn
## Topic 9: ei, frå, regjering, noreg, òg
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 66 (approx. per word bound = -6.942, relative change = 2.876e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 67 (approx. per word bound = -6.942, relative change = 2.661e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 68 (approx. per word bound = -6.942, relative change = 2.395e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 69 (approx. per word bound = -6.942, relative change = 2.521e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 70 (approx. per word bound = -6.942, relative change = 2.595e-05)
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett
## Topic 2: regjering, mer, arbeid, må, vikt
## Topic 3: asylsøker, norg, sak, barn, opphold
## Topic 4: komm, norg, gjør, fremskrittsparti, få
## Topic 5: forslag, storting, regjering, komite, nr
## Topic 6: kommun, flyktning, regjering, komm, fler
## Topic 7: norg, land, norsk, eu, må
## Topic 8: barn, vikt, arbeid, må, samfunn
## Topic 9: ei, frå, regjering, noreg, òg
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 71 (approx. per word bound = -6.942, relative change = 2.400e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 72 (approx. per word bound = -6.941, relative change = 2.023e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 73 (approx. per word bound = -6.941, relative change = 1.732e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 74 (approx. per word bound = -6.941, relative change = 1.786e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 75 (approx. per word bound = -6.941, relative change = 2.227e-05)
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett
## Topic 2: regjering, arbeid, mer, må, vikt
## Topic 3: asylsøker, norg, sak, barn, opphold
## Topic 4: komm, norg, gjør, fremskrittsparti, få
## Topic 5: forslag, storting, regjering, komite, nr
## Topic 6: kommun, flyktning, regjering, komm, fler
## Topic 7: norg, land, norsk, eu, må
## Topic 8: barn, vikt, må, arbeid, samfunn
## Topic 9: ei, frå, regjering, noreg, òg
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 76 (approx. per word bound = -6.941, relative change = 3.571e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 77 (approx. per word bound = -6.941, relative change = 3.684e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 78 (approx. per word bound = -6.940, relative change = 1.982e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 79 (approx. per word bound = -6.940, relative change = 1.423e-05)
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Completing Iteration 80 (approx. per word bound = -6.940, relative change = 1.069e-05)
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett
## Topic 2: regjering, arbeid, mer, må, norg
## Topic 3: asylsøker, norg, sak, barn, opphold
## Topic 4: komm, norg, gjør, fremskrittsparti, få
## Topic 5: forslag, storting, regjering, komite, nr
## Topic 6: kommun, flyktning, regjering, komm, fler
## Topic 7: norg, land, norsk, eu, må
## Topic 8: barn, vikt, må, arbeid, samfunn
## Topic 9: ei, frå, regjering, noreg, òg
## ....................................................................................................
## Completed E-Step (0 seconds).
## Completed M-Step.
## Model Converged
saveRDS(my_lda_fit, file = "my_lda_fit.rds")
saveRDS(stm_object, file = "stm_object.rds")
saveRDS(stm_storting, file = "stm_storting.rds")
#restore objects
#readRDS(file = "my_lda_fit.rds")
#readRDS(file = "stm_object.rds")
#readRDS(file = "stm_storting.rds")
ap_topics <- tidy(my_lda_fit, matrix = "beta")
# ap_topics
# plot of most characteristic words by topic
ap_top_terms <- ap_topics %>%
group_by(topic) %>%
top_n(30, beta) %>%
ungroup() %>%
arrange(topic, -beta)
ap_top_terms %>%
mutate(term = reorder(term, beta)) %>%
ggplot(aes(term, beta, fill = factor(topic))) +
geom_col(show.legend = FALSE) + ggtitle("Mest karakteristiske ord i kvar topic")+
facet_wrap(~ topic, scales = "free") +
coord_flip()
#paragonic texts for the topic?
# diverging topics
beta_spread <- ap_topics %>%
mutate(topic = paste0("topic", topic)) %>%
spread(topic, beta) %>%
filter(topic1 > .001 | topic2 > .001) %>%
mutate(log_ratio = log2(topic2 / topic1))
beta_spread
## # A tibble: 345 x 11
## term topic1 topic2 topic3 topic4 topic5 topic6 topic7
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 afgh… 2.00e- 6 3.92e-3 1.25e-4 1.14e- 5 8.77e- 6 3.51e-15 5.27e-14
## 2 afgh… 7.60e-18 1.79e-3 2.47e-9 2.97e-15 2.34e-18 3.26e-31 1.54e-36
## 3 afrik 3.79e- 5 1.59e-3 1.06e-6 1.18e- 4 3.63e- 7 9.63e- 5 1.71e- 6
## 4 aktiv 3.24e- 6 1.81e-3 3.46e-4 1.55e- 3 1.51e- 3 9.66e- 4 8.20e- 4
## 5 aktør 5.64e- 5 1.05e-3 6.70e-5 3.25e- 4 5.27e- 4 8.27e- 5 3.57e- 4
## 6 aldri 1.00e- 3 2.72e-4 1.27e-5 1.63e- 4 1.06e- 4 6.61e- 4 8.92e- 5
## 7 all 2.43e- 3 4.72e-4 4.79e-4 7.38e- 4 4.52e- 4 1.21e- 3 1.26e- 3
## 8 aller 1.08e- 3 7.75e-4 1.60e-3 1.43e-23 1.14e- 3 1.01e- 3 8.63e- 4
## 9 alte… 1.30e- 3 1.78e-4 2.95e-4 7.77e- 4 9.03e- 4 1.09e- 4 8.88e- 4
## 10 altså 1.34e- 3 4.35e-4 6.07e-4 1.04e- 3 3.83e- 4 1.26e- 3 9.16e- 4
## # … with 335 more rows, and 3 more variables: topic8 <dbl>, topic9 <dbl>,
## # log_ratio <dbl>
# probability of being in a class
for (topic in 1:topic.count) {
print(paste(topic," ",mean(lda_inf$topics[topic])))
}
## [1] "1 0.00266604304525932"
## [1] "2 0.00114055564187702"
## [1] "3 0.0589393447844829"
## [1] "4 0.000560157061494367"
## [1] "5 0.0011584685124563"
## [1] "6 0.401551243001264"
## [1] "7 0.000307732664124251"
## [1] "8 0.000962027933507623"
## [1] "9 0.00121576136539665"
#paragonic texts for the topic???
plot(stm_object, type = "summary", text.cex = 0.8)
plot(stm_object, type = "perspectives", topics = c(1,2), n=50)
plot(stm_object, type = "perspectives", topics = c(3,4), n=50)
plot(stm_object, type = "perspectives", topics = c(5,6), n=50)
plot(stm_object, type = "perspectives", topics = c(7,8), n=50)
plot(stm_object, type = "perspectives", topics = c(1,9), n=50)
plot(stm_object, type = "hist", topics = sample(1:topic.count, size = 9))
plot(stm_object, type="labels")
mod.out.corr <- topicCorr(stm_object)
plot(mod.out.corr, topics = c(1:9))
# pick and label topics of interest
#topics_of_interest <- c(5, 2, 4, 3)
#topic_labels <- c("Positiv", "Reklame", "Neg_Tilpassing", "Overvåkning")
# top words in topic
labelTopics(stm_object, 1:9)
## Topic 1 Top Words:
## Highest Prob: kr, forhøy, overfør, driftsutgift, nedsett, tilskudd, post
## FREX: forhøy, driftsutgift, overfør, overslagsbevilgning, nedsett, post, utstyrsanskaff
## Lift: friluftsområd, frivillighetsformål, innbyggertilskudd, overslagsbevilgning, pensjonskass, rhf, utredningsvirksom
## Score: forhøy, driftsutgift, kr, nedsett, overfør, overslagsbevilgning, kap
## Topic 2 Top Words:
## Highest Prob: regjering, arbeid, mer, norg, må, vikt, arbeidskraft
## FREX: arbeidskraft, bedrift, arbeidsmarked, arbeidsled, arbeidsinnvandring, dumping, velferd
## Lift: fellesskapsløsning, gasskraftverk, fastlandsøkonomi, tannhelsetjenest, vekstfremm, salgsinntekt, sykefravær
## Score: salgsinntekt, arbeidskraft, mer, arbeidsinnvandring, må, vikt, arbeidsmarked
## Topic 3 Top Words:
## Highest Prob: asylsøker, norg, sak, opphold, barn, person, land
## FREX: asyl, asylinstitutt, avslag, udi, asylsøknad, returner, une
## Lift: bevæpning, eurodac, fremdrift, kat, lengevær, oppholdstid, praksisendring
## Score: asylsøker, opphold, udi, asyl, oppholdstillat, utlending, une
## Topic 4 Top Words:
## Highest Prob: komm, norg, gjør, fremskrittsparti, få, må, andr
## FREX: jo, tror, gansk, egent, litt, snakk, faktisk
## Lift: innvandrerungdom, kolberg, putt, unnskyld, tybring-gjedd, kongsberg, voldsomt
## Score: fremskrittsparti, tror, veld, helt, jo, sier, få
## Topic 5 Top Words:
## Highest Prob: forslag, storting, regjering, komite, nr, sak, flertall
## FREX: lyd, ber, komite, forslag, dokument, nr, innstilling
## Lift: bønnerop, familieoppholdstillat, lyd, referanseperson, underholdskrav, vedlegg, forslagsstillern
## Score: lyd, flertall, nr, ber, forslag, fremskrittsparti, familieoppholdstillat
## Topic 6 Top Words:
## Highest Prob: kommun, flyktning, regjering, komm, fler, hjelp, må
## FREX: ta_imot, kommun, kutt, bosett, bosetting, kvoteflyktning, flyktning
## Lift: investeringstilskudd, bosettingsarbeid, beregningsutvalg, skattekutt, bosett, kvoteflyktning, barnevernstiltak
## Score: kommun, syri, ta_imot, flyktning, budsjett, bosett, bosetting
## Topic 7 Top Words:
## Highest Prob: norg, land, norsk, eu, må, vikt, europ
## FREX: eu, utenriksminister, sikkerhetsråd, militær, nato, eøs-avtal, palestinsk
## Lift: liby, afghanern, arktis, arktisk, bilateralt, delegasjon, efta-land
## Score: eu, nato, europ, militær, humanitær, sikkerhetsråd, må
## Topic 8 Top Words:
## Highest Prob: barn, vikt, må, arbeid, samfunn, kvinn, norsk
## FREX: innvandrerbakgrunn, menn, homofil, minoritetsbakgrunn, kvinn, innvandrerkvinn, foreldr
## Lift: behersk, hivpositiv, innvandrerbarn, innvandrerfamili, kvotering, lavinntekt, lavinntektsfamili
## Score: vikt, innvandrer, innvandrerbakgrunn, innvandrerkvinn, må, minoritetsbakgrunn, kvinn
## Topic 9 Top Words:
## Highest Prob: ei, frå, regjering, noreg, òg, auk, må
## FREX: ei, frå, noreg, òg, auk, meir, gjer
## Lift: arbeidd, breitt, bustad, eig, føreslår, haldning, handter
## Score: frå, ei, noreg, auk, gjer, fleir, meir
# topic prevalance over time
model.stm.labels <- labelTopics(stm_object, 1:topic.count)
model.stm.ee <- estimateEffect(1:topic.count ~ aar, stm_object, meta = stm_storting$meta)
par(mfrow=c(3,3))
for (i in seq_along(sample(1:topic.count, size = 9)))
{
plot(model.stm.ee, "aar", method = "continuous", topics = i, main = paste0(model.stm.labels$prob[i,1:3], collapse = ", "), printlegend = F)
}
out <- stm_object
save.image('stm_storting.RData')
#library(stminsights)
#run_stminsights()