# load libraries
library(topicmodels)
library(tab)
library(sjPlot)
## Warning: package 'sjPlot' was built under R version 3.5.2
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.2.15
## Current Matrix version is 1.2.17
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
library(descr)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1       ✔ purrr   0.3.2  
## ✔ tibble  2.1.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.3       ✔ stringr 1.4.0  
## ✔ readr   1.3.1       ✔ forcats 0.4.0
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'tidyr' was built under R version 3.5.2
## Warning: package 'purrr' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
## Warning: package 'stringr' was built under R version 3.5.2
## Warning: package 'forcats' was built under R version 3.5.2
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(quanteda)
## Warning: package 'quanteda' was built under R version 3.5.2
## Package version: 1.4.3
## Parallel computing: 2 of 12 threads used.
## See https://quanteda.io for tutorials and examples.
## 
## Attaching package: 'quanteda'
## The following object is masked from 'package:utils':
## 
##     View
library(readtext)
## Warning: package 'readtext' was built under R version 3.5.2
library(topicmodels)
library(ggplot2)
library(stm)
## stm v1.3.3 (2018-1-26) successfully loaded. See ?stm for help. 
##  Papers, resources, and other materials at structuraltopicmodel.com
library(ldatuning)
library(tidytext)
library(FactoMineR)
library(factoextra)
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
# load main datafile
data1 <-readtext(paste0("~/Dropbox/DBXPAGAANDEARBEID/Statistikk/Rworkdir/Quanteda19/talkofnorway.xlsx"), text_field = "text")
count(data1)
## # A tibble: 1 x 1
##        n
##    <int>
## 1 250373
# subsetting
data <- data1[grepl("nnvandr|lyktning|sylsøk", data1$text),] # 1) subset on matched words
data1 <- NULL

data$n_innv <- str_count(data$text, "nnvandr") # count occurences of word
data$n_flykt <- str_count(data$text, "lyktning") # count occurences of word
data$n_asyl <- str_count(data$text, "sylsøk") # count occurences of word
data$n <- data$n_asyl + data$n_flykt + data$n_innv # total score
data$length <-nchar(data$text)
data$aar <- as.numeric(substring(data$date, 1,4))

data$aar5<-car::recode(data$aar, "1998:2002='1998-02'; 2003:2007='2000-7';2008:2012='2008-12'; 2013:2016='2013-16';else=NA")

count(data)
## # A tibble: 1 x 1
##       n
##   <int>
## 1  7423
freq(data$session)

## data$session 
##           Frequency Percent
## 1998-1999       514   6.924
## 1999-2000       369   4.971
## 2000-2001       474   6.386
## 2001-2002       411   5.537
## 2002-2003       281   3.786
## 2003-2004       247   3.327
## 2004-2005       255   3.435
## 2005-2006       255   3.435
## 2006-2007       336   4.526
## 2007-2008       321   4.324
## 2008-2009       338   4.553
## 2009-2010       308   4.149
## 2010-2011       477   6.426
## 2011-2012       456   6.143
## 2012-2013       513   6.911
## 2013-2014       477   6.426
## 2014-2015       579   7.800
## 2015-2016       812  10.939
## Total          7423 100.000
freq(data$debate_type)

## data$debate_type 
##                   Frequency  Percent Valid Percent
## formalia                 83   1.1181        1.1195
## interpellasjon          783  10.5483       10.5611
## muntligsporretime       820  11.0467       11.0602
## ordinarsporretime       669   9.0125        9.0235
## referatsaker             93   1.2529        1.2544
## saksreferat            4939  66.5364       66.6172
## voteringer               27   0.3637        0.3642
## NA's                      9   0.1212              
## Total                  7423 100.0000      100.0000
freq(data$party_name)

## data$party_name 
##                                         Frequency  Percent Valid Percent
## Arbeiderpartiet                              1862  25.0842      26.22905
## Fremskrittspartiet                           1288  17.3515      18.14340
## Høyre                                        1138  15.3307      16.03043
## Kristelig Folkeparti                          811  10.9255      11.42414
## Kystpartiet                                    29   0.3907       0.40851
## Miljøpartiet De Grønne                         29   0.3907       0.40851
## Senterpartiet                                 505   6.8032       7.11368
## Sosialistisk Venstreparti                     922  12.4209      12.98774
## Tverrpolitisk Folkevalgte (Kystpartiet)         7   0.0943       0.09861
## Venstre                                       508   6.8436       7.15594
## NA's                                          324   4.3648              
## Total                                        7423 100.0000     100.00000
freq(data$cabinet_short)

## data$cabinet_short 
##                 Frequency Percent
## Bondevik I            734   9.888
## Bondevik II          1192  16.058
## Solberg I            1868  25.165
## Stoltenberg I         625   8.420
## Stoltenberg II       1278  17.217
## Stoltenberg III      1726  23.252
## Total                7423 100.000
data %>% sjplot(party_id,aar5, fun="xtab", type="bar", show.values=FALSE)

sjt.xtab(data$party_name, data$session,show.col.prc = TRUE, show.obs=FALSE)
party_name session Total
1998-1999 1999-2000 2000-2001 2001-2002 2002-2003 2003-2004 2004-2005 2005-2006 2006-2007 2007-2008 2008-2009 2009-2010 2010-2011 2011-2012 2012-2013 2013-2014 2014-2015 2015-2016
Arbeiderpartiet 18.8 % 25.5 % 37.6 % 15.4 % 13.6 % 12.2 % 14.9 % 33.7 % 32.2 % 34 % 29.6 % 30.6 % 34.7 % 32.9 % 37 % 24.2 % 16.3 % 23.7 % 26.2 %
Fremskrittspartiet 23 % 21.5 % 13.8 % 17.3 % 19.5 % 14.8 % 14.5 % 20.6 % 19.3 % 19.3 % 24.5 % 20.3 % 18 % 14.3 % 16 % 20.8 % 19.2 % 14.2 % 18.1 %
Høyre 6.1 % 10 % 14 % 27 % 26.5 % 29.3 % 21.7 % 9.5 % 15.1 % 13.1 % 10 % 9.6 % 13.2 % 15.9 % 11.1 % 17.1 % 16.5 % 24.2 % 16 %
Kristelig Folkeparti 22.8 % 15.5 % 13.6 % 14.1 % 12.5 % 11.8 % 12.8 % 11.5 % 11.1 % 8.5 % 15.4 % 12.6 % 6.6 % 9.4 % 7.1 % 7.7 % 8.6 % 8.9 % 11.4 %
Kystpartiet 0 % 0 % 0 % 1.3 % 2.3 % 3.5 % 4.3 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0.4 %
Miljøpartiet De
Grønne
0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0.6 % 2.7 % 1.4 % 0.4 %
Senterpartiet 8.5 % 6.6 % 6.7 % 7.9 % 6.2 % 8.3 % 8.5 % 3.2 % 3.3 % 4.2 % 3.3 % 3 % 3.3 % 6 % 9.5 % 7.7 % 12.7 % 10.1 % 7.1 %
Sosialistisk
Venstreparti
13.5 % 7.4 % 9.8 % 12 % 12.5 % 15.7 % 18.3 % 13.9 % 10.5 % 12.1 % 11.2 % 13.3 % 16.3 % 16.1 % 14.7 % 12.8 % 14.9 % 11 % 13 %
Tverrpolitisk
Folkevalgte
(Kystpartiet)
0.4 % 1.4 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0 % 0.1 %
Venstre 6.9 % 12 % 4.7 % 5 % 7 % 4.4 % 5.1 % 7.5 % 8.4 % 8.8 % 6 % 10.6 % 7.9 % 5.5 % 4.6 % 9 % 9.1 % 6.5 % 7.2 %
Total 100 % 100 % 100 % 100 % 100 % 100 % 100 % 100 % 100 % 100 % 100 % 100 % 100 % 100 % 100 % 100 % 100 % 100 % 100 %
χ2=1043.072 · df=153 · Cramer’s V=0.128 · Fisher’s p=0.000
storting <- subset(data, n>1) # 2) the terms are mentioned at least twice 

storting <- corpus(storting)
summary(storting, 5)
## Warning in seq_len(n): first element used of 'length.out' argument
## Corpus consisting of 4035 documents:
## 
##                   Text Types Tokens Sentences         id url_rep_id rep_id
##  talkofnorway.xlsx.164    56     69         4 tale000163        OHE    OHE
##  talkofnorway.xlsx.166   106    177         9 tale000165        OHE    OHE
##  rep_first_name rep_last_name         rep_name rep_from rep_to
##         Øystein      Hedstrøm Øystein Hedstrøm    35704  37164
##         Øystein      Hedstrøm Øystein Hedstrøm    35704  37164
##      rep_type  county list_number party_id         party_name party_role
##  Representant Østfold           3      FrP Fremskrittspartiet Opposition
##  Representant Østfold           3      FrP Fremskrittspartiet Opposition
##  party_seats cabinet_short cabinet_start cabinet_end cabinet_composition
##           25    Bondevik I         35720       36601           Coalition
##           25    Bondevik I         35720       36601           Coalition
##  rep_gender  rep_birth rep_death parl_period parl_size party_seats_lagting
##        mann 07.08.1946      <NA>   1997-2001       165                   6
##        mann 07.08.1946      <NA>   1997-2001       165                   6
##  party_seats_odelsting
##                     19
##                     19
##                                                          com_member
##  Næringskomiteen ; Næringskomiteen ; Valgkomiteen ; Næringskomiteen
##  Næringskomiteen ; Næringskomiteen ; Valgkomiteen ; Næringskomiteen
##                                                                                               com_date
##  16.03.1999 - 27.04.1999 ; 27.04.1999 - 30.09.2001 ; 08.10.1997 - 30.09.2001 ; 21.10.1997 - 16.03.1999
##  16.03.1999 - 27.04.1999 ; 27.04.1999 - 30.09.2001 ; 08.10.1997 - 30.09.2001 ; 21.10.1997 - 16.03.1999
##                                                   com_role case_id
##  Fung. leder gruppestyret ; Nestleder ; Medlem ; Nestleder      NA
##  Fung. leder gruppestyret ; Nestleder ; Medlem ; Nestleder      NA
##                                                                                  debate_reference
##  Saker-og-publikasjoner/Publikasjoner/Referater/Stortinget/1998-1999/981021/ordinarsporretime/11/
##  Saker-og-publikasjoner/Publikasjoner/Referater/Stortinget/1998-1999/981021/ordinarsporretime/11/
##  debate_title debate_subject       debate_type proposition_id
##   Spørsmål 11           <NA> ordinarsporretime             NA
##   Spørsmål 11           <NA> ordinarsporretime             NA
##  proposition_text document_group document_references
##              <NA>           <NA>                <NA>
##              <NA>           <NA>                <NA>
##  document_subject_short decision_short document_note case_source_id
##                    <NA>           <NA>          <NA>           <NA>
##                    <NA>           <NA>          <NA>           <NA>
##  case_chair_id case_type decision_text question_number question_from_id
##           <NA>      <NA>          <NA>              NA             <NA>
##           <NA>      <NA>          <NA>              NA             <NA>
##  question_to_id question_answered_by_id question_answered_by_ministry_id
##            <NA>                    <NA>                             <NA>
##            <NA>                    <NA>                             <NA>
##  question_answered_by_minister_title subject_ids subject_names
##                                 <NA>        <NA>          <NA>
##                                 <NA>        <NA>          <NA>
##  is_main_subject main_subject_id subject_committee_id
##             <NA>            <NA>                 <NA>
##             <NA>            <NA>                 <NA>
##  subject_committee_name agenda_case_number agenda_case_reference
##                    <NA>                 NA                  <NA>
##                    <NA>                 NA                  <NA>
##  agenda_case_text agenda_case_type agenda_number meeting_id procedure_id
##              <NA>             <NA>            NA         NA         <NA>
##              <NA>             <NA>            NA         NA         <NA>
##  procedure_name procedure_stepnumber publication_export_id
##            <NA>                 <NA>                  <NA>
##            <NA>                 <NA>                  <NA>
##  publication_link_text publication_link_url publication_type
##                   <NA>                 <NA>             <NA>
##                   <NA>                 <NA>             <NA>
##  publication_undertype related_case_id related_case_type
##                   <NA>            <NA>              <NA>
##                   <NA>            <NA>              <NA>
##  related_case_title_short keyword keywords language transcript order
##                      <NA>    <NA>     <NA>      nob   s981021a   117
##                      <NA>    <NA>     <NA>      nob   s981021a   119
##    session                      time       date speaker_role n_innv
##  1998-1999 1998-10-21T00:00:00+02:00 1998-10-21 Representant      2
##  1998-1999 1998-10-21T00:00:00+02:00 1998-10-21 Representant      2
##  n_flykt n_asyl n length  aar    aar5
##        0      0 2    438 1998 1998-02
##        0      0 2    895 1998 1998-02
## 
## Source: /Users/janfredrikhovden/Dropbox/DBXPAGAANDEARBEID/Statistikk/Rworkdir/Quanteda19/* on x86_64 by janfredrikhovden
## Created: Mon May 13 09:57:02 2019
## Notes:
quanteda_options("language_stemmer"="no")

# make tokens
toks <- tokens_remove(tokens(storting, remove_punct = TRUE, remove_numbers = TRUE), stopwords("norwegian")) # er dette også nynorsk=

toks <-tokens_tolower(toks)
toks <-tokens_wordstem(toks)

#check typical "double words"
head(textstat_collocations(toks, size=2, min_count=20),n=100)
##                     collocation count count_nested length    lambda
## 1                         kr kr  4460            0      2  4.782903
## 2           driftsutgift forhøy   794            0      2  6.031204
## 3                       ta imot   880            0      2  5.831699
## 4                overfør forhøy   683            0      2  5.723315
## 5               overfør nedsett   602            0      2  6.169664
## 6                      nytt und   619            0      2  6.046325
## 7                    forslag nr   797            0      2  5.942027
## 8         spesiell driftsutgift   683            0      2  7.497881
## 9                    hvert fall   457            0      2  7.518283
## 10                     und post   522            0      2  6.753788
## 11                 ens mindreår   612            0      2  9.565936
## 12                 står overfor   405            0      2  5.762166
## 13           mindreår asylsøker   441            0      2  6.129043
## 14                 storting ber   517            0      2  6.472043
## 15          frivil organisasjon   344            0      2  6.482924
## 16                   størr grad   370            0      2  5.755106
## 17                norsk samfunn   639            0      2  3.996796
## 18                       nr lyd   424            0      2  7.927981
## 19                     søk asyl   302            0      2  6.464634
## 20                      nest år   432            0      2  5.097394
## 21                  post forhøy   360            0      2  5.828216
## 22                offent sektor   312            0      2  6.747714
## 23                    sist åren   296            0      2  6.102355
## 24                    lagt fram   318            0      2  5.789688
## 25                    legg rett   416            0      2  4.367578
## 26         driftsutgift nedsett   314            0      2  5.097324
## 27                 lyd storting   396            0      2  6.554418
## 28                ber regjering   586            0      2  5.735055
## 29                      mill kr  1057            0      2  6.860223
## 30                fremm forslag   355            0      2  4.579274
## 31                    andr land   618            0      2  3.183851
## 32                  komm tilbak   386            0      2  4.330036
## 33                     lang tid   269            0      2  5.208461
## 34                 overfør nytt   264            0      2  4.936027
## 35            humanitær bistand   190            0      2  5.914237
## 36                    stor grad   297            0      2  4.481677
## 37                     barn ung   291            0      2  4.605247
## 38            driftsutgift nytt   241            0      2  4.821017
## 39            folkeparti venstr   214            0      2  5.156605
## 40                 først fremst   397            0      2  7.972572
## 41                 psykisk hels   148            0      2  6.912620
## 42                  dokument nr   182            0      2  6.921478
## 43                       jf kap   300            0      2 10.508030
## 44                   still krav   182            0      2  5.455615
## 45             fns høykommissær   188            0      2  8.267627
## 46                    legg fram   258            0      2  4.370014
## 47              stor utfordring   327            0      2  3.834138
## 48       asyl flyktningpolitikk   168            0      2  7.302988
## 49   overslagsbevilgning forhøy   205            0      2  6.048440
## 50                  kr tilskudd   391            0      2  3.669043
## 51                     tar imot   224            0      2  4.590194
## 52          alternativ budsjett   162            0      2  5.923926
## 53                  milliard kr   367            0      2  5.191678
## 54                    komm norg   671            0      2  2.534368
## 55             antall asylsøker   227            0      2  4.452867
## 56                 post nedsett   172            0      2  5.230290
## 57           kriminell handling   114            0      2  7.244207
## 58                     komm hit   310            0      2  5.983571
## 59              million mennesk   185            0      2  5.233528
## 60                   end avslag   135            0      2  6.085162
## 61                    all flest   155            0      2  5.399274
## 62                 opphold norg   367            0      2  3.469441
## 63  overslagsbevilgning nedsett   150            0      2  6.013311
## 64                behov beskytt   188            0      2  4.800975
## 65                mennesk flukt   176            0      2  5.499748
## 66                    post post   138            0      2  5.663678
## 67         driftsutgift overfør   206            0      2  4.355491
## 68               imot flyktning   259            0      2  3.837986
## 69                     kap post   118            0      2  6.191654
## 70          humanitært grunnlag   175            0      2  7.949317
## 71                   kvinn menn   138            0      2  5.668750
## 72                    bred enig   137            0      2  5.615379
## 73        internasjonal samfunn   216            0      2  4.092143
## 74                     kort tid   166            0      2  5.108903
## 75                     forr uke   115            0      2  8.208197
## 76               europeisk land   233            0      2  4.108626
## 77                 rund omkring   138            0      2  7.631978
## 78                   nedsett kr  1633            0      2  8.534363
## 79                    barn best   206            0      2  4.173399
## 80                     hel tatt   157            0      2  4.664960
## 81                  indr marked    85            0      2  8.756609
## 82             kap driftsutgift   132            0      2  5.476980
## 83                    bevilg ny   130            0      2  5.321501
## 84          vedlikehold overfør   207            0      2  7.742309
## 85                      én ting   110            0      2  5.745505
## 86                   rett plikt   155            0      2  5.168117
## 87                 helt nødvend   181            0      2  4.192400
## 88                norsk økonomi   204            0      2  4.274609
## 89                    legg vekt   135            0      2  5.173462
## 90           opphold humanitært   122            0      2  6.617918
## 91                  lukk mottak   112            0      2  6.453483
## 92     internasjonal konvensjon   118            0      2  6.031752
## 93                       ta var   164            0      2  4.684031
## 94               still spørsmål   156            0      2  4.518511
## 95         organiser kriminalit    82            0      2  6.880459
## 96                   rett slett   224            0      2  7.039008
## 97      midlertid arbeidstillat    79            0      2  7.139765
## 98                   stort sett   151            0      2  4.608466
## 99                videregå skol   118            0      2  7.197767
## 100             flertall komite   139            0      2  4.683810
##             z
## 1   207.62177
## 2   117.26276
## 3   116.82521
## 4   109.37167
## 5   106.98668
## 6   106.33675
## 7   105.77562
## 8   104.61241
## 9    97.81687
## 10   93.84564
## 11   91.85092
## 12   88.98577
## 13   87.76371
## 14   87.65574
## 15   87.48205
## 16   86.03864
## 17   85.81081
## 18   84.44813
## 19   83.32265
## 20   83.09528
## 21   81.57724
## 22   81.16415
## 23   79.77947
## 24   79.74653
## 25   76.76428
## 26   75.77674
## 27   75.41729
## 28   74.71834
## 29   73.00972
## 30   71.49643
## 31   71.40519
## 32   69.93051
## 33   69.00042
## 34   68.84469
## 35   66.78090
## 36   65.76648
## 37   65.38921
## 38   65.10459
## 39   64.79318
## 40   63.53017
## 41   62.98336
## 42   62.84324
## 43   62.64447
## 44   62.60682
## 45   62.54484
## 46   62.44144
## 47   62.26100
## 48   61.46432
## 49   61.40719
## 50   61.39419
## 51   60.87962
## 52   60.71023
## 53   60.63707
## 54   60.52251
## 55   59.28840
## 56   58.97758
## 57   58.66349
## 58   58.27176
## 59   58.23741
## 60   58.14661
## 61   58.07091
## 62   57.83676
## 63   57.68721
## 64   57.34505
## 65   57.29919
## 66   56.93171
## 67   56.30548
## 68   55.78729
## 69   55.46043
## 70   55.28171
## 71   55.02154
## 72   54.64268
## 73   54.51026
## 74   54.38274
## 75   54.22304
## 76   53.49586
## 77   53.43609
## 78   52.89111
## 79   52.65318
## 80   52.48084
## 81   52.41558
## 82   52.39778
## 83   52.22060
## 84   51.96859
## 85   51.85150
## 86   51.46917
## 87   51.35425
## 88   51.17587
## 89   50.98343
## 90   50.98179
## 91   50.95701
## 92   50.95640
## 93   50.80475
## 94   50.61125
## 95   50.32787
## 96   50.32307
## 97   50.23582
## 98   50.21433
## 99   50.00767
## 100  49.91998
# compound typical "double words"
toks <- tokens_compound(toks, list(c("ta","imot"), c("står", "overfor"), c("frivil", "organisasjon"), c("størr","grad"), c("nest", "år"), c("offent", "sektor"),  c("mindreår", "asylsøker"), c("still", "krav"), c("internasjonal", "samfunn"),  c("humanitær", "bistand")))

# merge similar tokens
#toks <- tokens_replace(toks, "ta_var", "tar_vare")
#toks <- tokens_replace(toks, "små_forskjell", "lit forskjell")

# make dfm
storting <- dfm(toks) 

# remove additional stopwords (if any - just an example below)
#dfm_typisk <- dfm_remove(dfm_typisk,pattern=c("000", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0"))
# vanlegaste ord
topfeatures(storting, 50)
##               kr             norg        regjering               må 
##            11009            10676             9560             8896 
##             komm               få             land             vikt 
##             6822             6730             6722             6572 
##            norsk             gjør             andr           arbeid 
##             6563             5551             5480             5134 
##              dag          forslag             barn           kommun 
##             5092             4888             4700             4637 
##        flyktning            gjeld             stor              mer 
##             4378             4360             4331             4175 
##              får             fler              mul           derfor 
##             4102             4033             3962             3777 
##              men           tiltak             rett               år 
##             3708             3698             3619             3600 
## fremskrittsparti         storting              del              god 
##             3512             3494             3490             3334 
##              sak               ta              tid        situasjon 
##             3118             3094             3059             2939 
##             ønsk              ser          mennesk          forhold 
##             2927             2906             2840             2763 
##          gjennom          samfunn           forhøy             bruk 
##             2741             2719             2704             2690 
##             sett              nye         spørsmål              und 
##             2644             2571             2473             2470 
##            behov        asylsøker 
##             2466             2466
# plot most common words
typisk_freqplot <- storting %>%
    textstat_frequency(n = 20) %>%
    ggplot(aes(x = reorder(feature, frequency),
             y = frequency)) +
    geom_point() + ggtitle("Most common words") +
    coord_flip() + theme_minimal() +
    labs(x = NULL, y = "Frequency")
typisk_freqplot

# wordcloud
set.seed(97)
textplot_wordcloud(storting, min_count = 10, random_order = FALSE,
                   rotation = .25, 
                   color = RColorBrewer::brewer.pal(8,"Dark2"))

# tokens in context
kw_rasis <- kwic(toks, pattern =  'rasis*')

head(kw_rasis, n=20)
##                                 
##      [talkofnorway.xlsx.927, 79]
##    [talkofnorway.xlsx.4591, 210]
##    [talkofnorway.xlsx.4990, 135]
##     [talkofnorway.xlsx.7345, 78]
##  [talkofnorway.xlsx.10499, 1018]
##  [talkofnorway.xlsx.14532, 2590]
##   [talkofnorway.xlsx.14780, 226]
##   [talkofnorway.xlsx.14783, 118]
##   [talkofnorway.xlsx.17809, 550]
##   [talkofnorway.xlsx.17845, 341]
##   [talkofnorway.xlsx.17873, 185]
##    [talkofnorway.xlsx.19882, 28]
##   [talkofnorway.xlsx.19882, 140]
##     [talkofnorway.xlsx.20180, 8]
##    [talkofnorway.xlsx.20180, 30]
##    [talkofnorway.xlsx.23430, 53]
##   [talkofnorway.xlsx.24495, 142]
##    [talkofnorway.xlsx.24502, 91]
##   [talkofnorway.xlsx.24502, 113]
##   [talkofnorway.xlsx.26927, 102]
##                                                                         
##                        vold bl.a blant innvandrer del |   rasistisk    |
##                       mer forebygg art innenfor områd |     rasism     |
##                      ann grupp kirkeasylant ifølg sos |     rasism     |
##                  lov påpek beskyld fremmedfiendt pisk |   rasistisk    |
##                              lik høv jent gut fordomm |     rasism     |
##  rapport fns rasediskrimineringskomité framgår omfang |   rasistisk    |
##                  form liberal norg samfunn intolerans |     rasism     |
##                           men tilheng regl sett grens | rasismebestemm |
##                lykk integreringsarbeid gi grobunn økt |     rasism     |
##               ta utgangspunkt enkelt mennesk egenverd |     rasism     |
##                       mer motstand nør und fremmedhat |     rasism     |
##                   mangl integrering størst bidrag dag |     rasism     |
##               tas ytterliger styrk grunnlag fremvekst |   rasistisk    |
##             jakobs uttalt frihetsparti østerrik parti |   rasistisk    |
##      standpunkt henholdsvis haid frihetsparti program |   rasistisk    |
##                    vekk harm sinn forarg fremprovoser |     rasism     |
##               stat restriktiv politikk bruk legitimer |     rasism     |
##                 sett forslag arbeidsled norg fremelsk |     rasism     |
##                    imot asylsøker norg altså fremelsk |     rasism     |
##                gunders utsagn varm sametelt forvrengt |   rasistisk    |
##                                                      
##  tilnærming politiker forhold innvandrergrupp medfør 
##  diskriminering levekår storby framgår nevnt         
##  satt søker land kirkeasyl pr                        
##  stemning avgjør poeng situasjon gjør                
##  diskriminering motarbeid jent innvandrarbakgrunn ei 
##  diskriminer hending gått bruk ftalat                
##  fordomm mer mer akterutseilt plasser                
##  lovverk går f.eks ytringsfrihetskommisjon juster    
##  må gjør alt unngå mennesk                           
##  måt destruktiv nedbryt vikt motarbeid               
##  kommun må få beting gjør                            
##  statsråd innlegg illustrer nettopp poeng            
##  fremmedfiendt holdning                              
##  langt vei nazistisk holdning gav                    
##  nazistisk østerrik tross alt vår                    
##  oppfatt sak svært uheld forskjellsbehandling        
##  fremmedfrykt andr europeisk internasjonal sammenheng
##  nettopp typ argumentasjon trekk frem                
##  ønsk fremelsk god mat viss                          
##  angrep sam sett debatt frihetsparti
#textplot_xray(head((kw_rasis),n=10))
#textplot_xray(
#    kw_demokra,kw_likestil) + 
#    ggtitle("Lexical dispersion")
# trim dfm by minimum number of terms
dfm_storting2 <- dfm_trim(storting, min_termfreq = 20)

# drop all documents with only zeroes
docvars(dfm_storting2, "ntoken") <- ntoken(dfm_storting2)
ndoc(dfm_storting2)
## [1] 4035
dfm_storting3 <- dfm_storting2 %>% 
  dfm_subset(ntoken > 0)
ndoc(dfm_storting3)
## [1] 4035
# idf (not ready)
dfm_typiskidf <- dfm_tfidf(dfm_storting3) # tf-idf vekting
# plot idf
# compare fit statistics for different latent classes
start_time <- Sys.time()
result <- FindTopicsNumber(
  dfm_storting3,
  topics = seq(from = 2, to = 20, by = 1),
  metrics = c("Griffiths2004", "CaoJuan2009", "Arun2010", "Deveaud2014"),
  method = "Gibbs",
  control = list(seed = 77),
  mc.cores = 6L,
  verbose = TRUE
)
## fit models... done.
## calculate metrics:
##   Griffiths2004... done.
##   CaoJuan2009... done.
##   Arun2010... done.
##   Deveaud2014... done.
end_time <- Sys.time()
end_time - start_time
## Time difference of 9.761897 mins
saveRDS(result, file = "ldaresult.rds")
# Restore the object
readRDS(file = "ldaresult.rds")
##    topics Griffiths2004 CaoJuan2009 Arun2010 Deveaud2014
## 1       2      -7419673   0.3897934 3461.641    1.633206
## 2       3      -7188097   0.3705460 3252.242    1.905457
## 3       4      -7051651   0.3147871 3082.183    2.200614
## 4       5      -6900867   0.2807516 2969.080    2.378753
## 5       6      -6825633   0.2552440 2867.229    2.401576
## 6       7      -6779510   0.2566874 2801.621    2.315328
## 7       8      -6694365   0.2375158 2750.057    2.362587
## 8       9      -6621406   0.2259752 2676.389    2.418058
## 9      10      -6581611   0.2307424 2623.028    2.395197
## 10     11      -6509591   0.2086781 2594.498    2.455785
## 11     12      -6458494   0.1998226 2539.348    2.466442
## 12     13      -6413261   0.1883297 2485.406    2.470284
## 13     14      -6397861   0.1807736 2436.515    2.499721
## 14     15      -6337269   0.1803551 2410.658    2.510703
## 15     16      -6306687   0.1783214 2372.614    2.508143
## 16     17      -6265585   0.1588345 2343.769    2.579914
## 17     18      -6243628   0.1638607 2313.290    2.541017
## 18     19      -6173256   0.1473172 2265.370    2.616734
## 19     20      -6167027   0.1437151 2243.875    2.640733
result
##    topics Griffiths2004 CaoJuan2009 Arun2010 Deveaud2014
## 1       2      -7419673   0.3897934 3461.641    1.633206
## 2       3      -7188097   0.3705460 3252.242    1.905457
## 3       4      -7051651   0.3147871 3082.183    2.200614
## 4       5      -6900867   0.2807516 2969.080    2.378753
## 5       6      -6825633   0.2552440 2867.229    2.401576
## 6       7      -6779510   0.2566874 2801.621    2.315328
## 7       8      -6694365   0.2375158 2750.057    2.362587
## 8       9      -6621406   0.2259752 2676.389    2.418058
## 9      10      -6581611   0.2307424 2623.028    2.395197
## 10     11      -6509591   0.2086781 2594.498    2.455785
## 11     12      -6458494   0.1998226 2539.348    2.466442
## 12     13      -6413261   0.1883297 2485.406    2.470284
## 13     14      -6397861   0.1807736 2436.515    2.499721
## 14     15      -6337269   0.1803551 2410.658    2.510703
## 15     16      -6306687   0.1783214 2372.614    2.508143
## 16     17      -6265585   0.1588345 2343.769    2.579914
## 17     18      -6243628   0.1638607 2313.290    2.541017
## 18     19      -6173256   0.1473172 2265.370    2.616734
## 19     20      -6167027   0.1437151 2243.875    2.640733
FindTopicsNumber_plot(result)

topic.count <- 9

control_LDA_Gibbs <- list(alpha = 50/topic.count, estimate.beta = T, 
                          verbose = 0, prefix = tempfile(), 
                          save = 0, 
                          keep = 50, 
                          seed = 980,
                          nstart = 1, best = T,
                          delta = 0.1,
                          iter = 200, 
                          burnin = 100, 
                          thin = 200) 

many_models <- lapply(seq(2, 35, by = 1), function(x) {topicmodels::LDA(dfm_storting3, x, method = "Gibbs", control = control_LDA_Gibbs)} )
result.logLik <- as.data.frame(as.matrix(lapply(many_models, logLik)))
plot(2:35, unlist(result.logLik), xlab="Number of Topics", ylab="Log-Likelihood")

# Quantada
set.seed(100)
my_lda_fit <- LDA(quanteda::convert(dfm_storting3, to = "topicmodels"), k = topic.count)
get_terms(my_lda_fit, 30)
##       Topic 1      Topic 2         Topic 3      Topic 4         
##  [1,] "flyktning"  "norg"          "forslag"    "ei"            
##  [2,] "norg"       "land"          "sak"        "frå"           
##  [3,] "regjering"  "må"            "regjering"  "regjering"     
##  [4,] "komm"       "vikt"          "barn"       "noreg"         
##  [5,] "mennesk"    "norsk"         "storting"   "òg"            
##  [6,] "må"         "politisk"      "asylsøker"  "auk"           
##  [7,] "hjelp"      "arbeid"        "norg"       "må"            
##  [8,] "gjør"       "samarbeid"     "opphold"    "arbeid"        
##  [9,] "land"       "internasjonal" "person"     "vikt"          
## [10,] "situasjon"  "andr"          "nr"         "meir"          
## [11,] "verd"       "støtt"         "komite"     "gjer"          
## [12,] "krist"      "situasjon"     "rett"       "fram"          
## [13,] "fler"       "utvikling"     "få"         "fleir"         
## [14,] "ta_imot"    "konflikt"      "men"        "få"            
## [15,] "dag"        "styrk"         "utlending"  "norsk"         
## [16,] "syri"       "bidr"          "vikt"       "land"          
## [17,] "folkeparti" "stor"          "behandling" "gjeld"         
## [18,] "mer"        "humanitær"     "udi"        "år"            
## [19,] "få"         "områd"         "behandl"    "andr"          
## [20,] "stor"       "europ"         "gjeld"      "kommun"        
## [21,] "ta"         "fns"           "ber"        "stor"          
## [22,] "andr"       "gjennom"       "mottak"     "dess"          
## [23,] "men"        "afghanistan"   "politi"     "rett"          
## [24,] "treng"      "del"           "mul"        "tiltak"        
## [25,] "vikt"       "dag"           "må"         "dag"           
## [26,] "Ã¥r"         "russland"      "hensyn"     "vert"          
## [27,] "ser"        "nato"          "land"       "pst"           
## [28,] "veld"       "rett"          "vurder"     "storting"      
## [29,] "million"    "løsning"       "derfor"     "framstegsparti"
## [30,] "kutt"       "derfor"        "asyl"       "betr"          
##       Topic 5      Topic 6              Topic 7       Topic 8           
##  [1,] "regjering"  "norg"               "kommun"      "fremskrittsparti"
##  [2,] "norg"       "norsk"              "barn"        "komm"            
##  [3,] "mer"        "må"                 "vikt"        "norg"            
##  [4,] "eu"         "samfunn"            "må"          "få"              
##  [5,] "norsk"      "vikt"               "regjering"   "gjør"            
##  [6,] "nye"        "komm"               "få"          "statsråd"        
##  [7,] "land"       "innvandrer"         "arbeid"      "må"              
##  [8,] "økt"        "arbeid"             "tiltak"      "representant"    
##  [9,] "arbeid"     "gjør"               "god"         "land"            
## [10,] "vikt"       "få"                 "fler"        "gjeld"           
## [11,] "må"         "jobb"               "derfor"      "andr"            
## [12,] "utvikling"  "kvinn"              "flyktning"   "jo"              
## [13,] "gjennom"    "andr"               "får"         "får"             
## [14,] "stor"       "arbeidsliv"         "komm"        "dag"             
## [15,] "derfor"     "arbeidskraft"       "skol"        "syn"             
## [16,] "budsjett"   "dag"                "bedr"        "spørsmål"        
## [17,] "legg"       "arbeidsinnvandring" "stor"        "forslag"         
## [18,] "økonomisk"  "rett"               "år"          "faktisk"         
## [19,] "europ"      "land"               "mul"         "altså"           
## [20,] "storting"   "mul"                "integrering" "men"             
## [21,] "bruk"       "god"                "gjør"        "helt"            
## [22,] "bedr"       "fler"               "barnehag"    "forhold"         
## [23,] "vekst"      "tiltak"             "mer"         "debatt"          
## [24,] "eus"        "del"                "bosetting"   "veld"            
## [25,] "komm"       "mer"                "ung"         "regjering"       
## [26,] "politikk"   "gjeld"              "dag"         "arbeiderparti"   
## [27,] "mul"        "ta"                 "oslo"        "sak"             
## [28,] "fler"       "stor"               "barnevern"   "tror"            
## [29,] "pst"        "får"                "godt"        "sier"            
## [30,] "utfordring" "men"                "tilbud"      "ønsk"            
##       Topic 9              
##  [1,] "kr"                 
##  [2,] "forhøy"             
##  [3,] "overfør"            
##  [4,] "driftsutgift"       
##  [5,] "nedsett"            
##  [6,] "tilskudd"           
##  [7,] "post"               
##  [8,] "und"                
##  [9,] "spesiell"           
## [10,] "nytt"               
## [11,] "kap"                
## [12,] "stat"               
## [13,] "bevilg"             
## [14,] "overslagsbevilgning"
## [15,] "ny"                 
## [16,] "tiltak"             
## [17,] "jf"                 
## [18,] "mv"                 
## [19,] "vedlikehold"        
## [20,] "størr"              
## [21,] "nr"                 
## [22,] "forslag"            
## [23,] "utstyrsanskaff"     
## [24,] "utvikling"          
## [25,] "hels"               
## [26,] "internasjonal"      
## [27,] "norg"               
## [28,] "andr"               
## [29,] "forskning"          
## [30,] "flyktning"
lda_inf <-posterior(my_lda_fit)
# lda_inf

# STM

stm_storting <- quanteda::convert(dfm_storting3, to = "stm", docvars = docvars(dfm_storting3)) # konvertert DFM til STM

# find k
#meta <- stm_blogs$meta
#findingk <- searchK(stm_blogs$documents, stm_blogs$vocab, K = 25,  prevalence =~r13P5_2+ r13P4_1+ r13P1, data=meta)

#plot(findingk)

# run structural topic model

stm_object <- stm(documents = stm_storting$documents,
                    vocab = stm_storting$vocab,
                    data = stm_storting$meta,
                    K = topic.count,
                    seed = 12345)
## Beginning Spectral Initialization 
##   Calculating the gram matrix...
##   Finding anchor words...
##      .........
##   Recovering initialization...
##      ................................................
## Initialization complete.
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 1 (approx. per word bound = -7.203) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 2 (approx. per word bound = -7.053, relative change = 2.092e-02) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 3 (approx. per word bound = -7.008, relative change = 6.306e-03) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 4 (approx. per word bound = -6.990, relative change = 2.603e-03) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 5 (approx. per word bound = -6.981, relative change = 1.264e-03) 
## Topic 1: kr, forhøy, driftsutgift, overfør, nedsett 
##  Topic 2: stat, offent, kommun, oslo, jf 
##  Topic 3: norg, asylsøker, sak, få, komm 
##  Topic 4: fremskrittsparti, norg, må, komm, gjør 
##  Topic 5: forslag, regjering, storting, nr, ber 
##  Topic 6: kommun, flyktning, regjering, må, norg 
##  Topic 7: norg, land, norsk, eu, må 
##  Topic 8: barn, vikt, arbeid, må, tiltak 
##  Topic 9: ei, regjering, må, land, frå 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 6 (approx. per word bound = -6.976, relative change = 7.060e-04) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 7 (approx. per word bound = -6.973, relative change = 4.507e-04) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 8 (approx. per word bound = -6.971, relative change = 3.172e-04) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 9 (approx. per word bound = -6.969, relative change = 2.390e-04) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 10 (approx. per word bound = -6.968, relative change = 1.883e-04) 
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett 
##  Topic 2: stat, kommun, offent, oslo, bruk 
##  Topic 3: norg, asylsøker, sak, opphold, barn 
##  Topic 4: norg, fremskrittsparti, komm, må, gjør 
##  Topic 5: forslag, regjering, storting, nr, komite 
##  Topic 6: kommun, regjering, flyktning, komm, må 
##  Topic 7: norg, land, norsk, eu, vikt 
##  Topic 8: barn, vikt, arbeid, må, tiltak 
##  Topic 9: ei, regjering, frå, må, land 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 11 (approx. per word bound = -6.967, relative change = 1.560e-04) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 12 (approx. per word bound = -6.966, relative change = 1.359e-04) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 13 (approx. per word bound = -6.965, relative change = 1.171e-04) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 14 (approx. per word bound = -6.964, relative change = 9.823e-05) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 15 (approx. per word bound = -6.964, relative change = 8.578e-05) 
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett 
##  Topic 2: stat, kommun, regjering, offent, arbeid 
##  Topic 3: asylsøker, norg, sak, barn, opphold 
##  Topic 4: komm, norg, fremskrittsparti, gjør, må 
##  Topic 5: forslag, regjering, storting, nr, komite 
##  Topic 6: kommun, regjering, flyktning, komm, må 
##  Topic 7: norg, land, norsk, eu, vikt 
##  Topic 8: barn, vikt, arbeid, må, tiltak 
##  Topic 9: ei, regjering, frå, må, noreg 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 16 (approx. per word bound = -6.963, relative change = 7.785e-05) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 17 (approx. per word bound = -6.963, relative change = 7.225e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 18 (approx. per word bound = -6.962, relative change = 6.847e-05) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 19 (approx. per word bound = -6.962, relative change = 6.723e-05) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 20 (approx. per word bound = -6.961, relative change = 6.647e-05) 
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett 
##  Topic 2: regjering, kommun, stat, offent, arbeid 
##  Topic 3: asylsøker, norg, sak, barn, opphold 
##  Topic 4: komm, norg, fremskrittsparti, gjør, må 
##  Topic 5: forslag, storting, regjering, komite, nr 
##  Topic 6: kommun, regjering, flyktning, komm, budsjett 
##  Topic 7: norg, land, norsk, eu, vikt 
##  Topic 8: barn, vikt, arbeid, må, tiltak 
##  Topic 9: ei, frå, regjering, noreg, må 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 21 (approx. per word bound = -6.961, relative change = 6.730e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 22 (approx. per word bound = -6.960, relative change = 7.050e-05) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 23 (approx. per word bound = -6.960, relative change = 7.638e-05) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 24 (approx. per word bound = -6.959, relative change = 8.609e-05) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 25 (approx. per word bound = -6.958, relative change = 9.344e-05) 
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett 
##  Topic 2: regjering, kommun, stat, arbeid, offent 
##  Topic 3: asylsøker, norg, barn, sak, opphold 
##  Topic 4: komm, norg, fremskrittsparti, gjør, må 
##  Topic 5: forslag, storting, regjering, komite, nr 
##  Topic 6: kommun, flyktning, regjering, komm, norg 
##  Topic 7: norg, land, norsk, eu, må 
##  Topic 8: barn, vikt, arbeid, må, samfunn 
##  Topic 9: ei, frå, regjering, noreg, må 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 26 (approx. per word bound = -6.958, relative change = 9.557e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 27 (approx. per word bound = -6.957, relative change = 1.006e-04) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 28 (approx. per word bound = -6.956, relative change = 1.034e-04) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 29 (approx. per word bound = -6.956, relative change = 9.982e-05) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 30 (approx. per word bound = -6.955, relative change = 9.125e-05) 
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett 
##  Topic 2: regjering, kommun, stat, arbeid, offent 
##  Topic 3: asylsøker, norg, barn, sak, opphold 
##  Topic 4: komm, norg, fremskrittsparti, gjør, må 
##  Topic 5: forslag, regjering, storting, komite, nr 
##  Topic 6: kommun, flyktning, regjering, komm, norg 
##  Topic 7: norg, land, norsk, eu, må 
##  Topic 8: barn, vikt, arbeid, må, samfunn 
##  Topic 9: ei, frå, noreg, regjering, må 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 31 (approx. per word bound = -6.954, relative change = 8.351e-05) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 32 (approx. per word bound = -6.954, relative change = 7.785e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 33 (approx. per word bound = -6.953, relative change = 7.258e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 34 (approx. per word bound = -6.953, relative change = 7.114e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 35 (approx. per word bound = -6.952, relative change = 7.974e-05) 
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett 
##  Topic 2: regjering, stat, arbeid, kommun, nye 
##  Topic 3: asylsøker, norg, barn, sak, opphold 
##  Topic 4: komm, norg, fremskrittsparti, gjør, få 
##  Topic 5: forslag, regjering, storting, komite, nr 
##  Topic 6: kommun, flyktning, regjering, komm, norg 
##  Topic 7: norg, land, norsk, eu, må 
##  Topic 8: barn, vikt, arbeid, må, samfunn 
##  Topic 9: ei, frå, noreg, regjering, òg 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 36 (approx. per word bound = -6.952, relative change = 9.751e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 37 (approx. per word bound = -6.951, relative change = 1.027e-04) 
## ....................................................................................................
## Completed E-Step (1 seconds). 
## Completed M-Step. 
## Completing Iteration 38 (approx. per word bound = -6.950, relative change = 7.537e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 39 (approx. per word bound = -6.950, relative change = 6.546e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 40 (approx. per word bound = -6.950, relative change = 5.704e-05) 
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett 
##  Topic 2: regjering, arbeid, mer, stat, nye 
##  Topic 3: asylsøker, norg, barn, sak, opphold 
##  Topic 4: komm, norg, fremskrittsparti, gjør, få 
##  Topic 5: forslag, storting, regjering, komite, nr 
##  Topic 6: kommun, flyktning, regjering, komm, fler 
##  Topic 7: norg, land, norsk, eu, må 
##  Topic 8: barn, vikt, arbeid, må, samfunn 
##  Topic 9: ei, frå, regjering, noreg, òg 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 41 (approx. per word bound = -6.949, relative change = 4.810e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 42 (approx. per word bound = -6.949, relative change = 5.050e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 43 (approx. per word bound = -6.949, relative change = 6.186e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 44 (approx. per word bound = -6.948, relative change = 6.484e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 45 (approx. per word bound = -6.948, relative change = 5.393e-05) 
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett 
##  Topic 2: regjering, mer, arbeid, nye, økt 
##  Topic 3: asylsøker, norg, barn, sak, opphold 
##  Topic 4: komm, norg, fremskrittsparti, gjør, få 
##  Topic 5: forslag, storting, regjering, komite, nr 
##  Topic 6: kommun, flyktning, regjering, komm, fler 
##  Topic 7: norg, land, norsk, eu, må 
##  Topic 8: barn, vikt, arbeid, må, samfunn 
##  Topic 9: ei, frå, regjering, noreg, òg 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 46 (approx. per word bound = -6.947, relative change = 5.219e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 47 (approx. per word bound = -6.947, relative change = 5.431e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 48 (approx. per word bound = -6.947, relative change = 5.811e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 49 (approx. per word bound = -6.946, relative change = 5.026e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 50 (approx. per word bound = -6.946, relative change = 5.344e-05) 
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett 
##  Topic 2: regjering, mer, arbeid, nye, økt 
##  Topic 3: asylsøker, norg, barn, sak, opphold 
##  Topic 4: komm, norg, gjør, fremskrittsparti, få 
##  Topic 5: forslag, storting, regjering, komite, nr 
##  Topic 6: kommun, flyktning, regjering, komm, fler 
##  Topic 7: norg, land, norsk, eu, må 
##  Topic 8: barn, vikt, arbeid, må, samfunn 
##  Topic 9: ei, frå, regjering, noreg, òg 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 51 (approx. per word bound = -6.945, relative change = 4.740e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 52 (approx. per word bound = -6.945, relative change = 3.693e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 53 (approx. per word bound = -6.945, relative change = 3.267e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 54 (approx. per word bound = -6.945, relative change = 3.607e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 55 (approx. per word bound = -6.944, relative change = 3.720e-05) 
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett 
##  Topic 2: regjering, mer, arbeid, nye, økt 
##  Topic 3: asylsøker, norg, barn, sak, opphold 
##  Topic 4: komm, norg, gjør, fremskrittsparti, få 
##  Topic 5: forslag, storting, regjering, komite, nr 
##  Topic 6: kommun, flyktning, regjering, komm, fler 
##  Topic 7: norg, land, norsk, eu, må 
##  Topic 8: barn, vikt, arbeid, må, samfunn 
##  Topic 9: ei, frå, regjering, noreg, òg 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 56 (approx. per word bound = -6.944, relative change = 2.982e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 57 (approx. per word bound = -6.944, relative change = 3.139e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 58 (approx. per word bound = -6.944, relative change = 3.085e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 59 (approx. per word bound = -6.944, relative change = 2.823e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 60 (approx. per word bound = -6.943, relative change = 2.560e-05) 
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett 
##  Topic 2: regjering, mer, arbeid, må, vikt 
##  Topic 3: asylsøker, norg, sak, barn, opphold 
##  Topic 4: komm, norg, gjør, fremskrittsparti, få 
##  Topic 5: forslag, storting, regjering, komite, nr 
##  Topic 6: kommun, flyktning, regjering, komm, fler 
##  Topic 7: norg, land, norsk, eu, må 
##  Topic 8: barn, vikt, arbeid, må, samfunn 
##  Topic 9: ei, frå, regjering, noreg, òg 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 61 (approx. per word bound = -6.943, relative change = 2.307e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 62 (approx. per word bound = -6.943, relative change = 2.297e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 63 (approx. per word bound = -6.943, relative change = 2.352e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 64 (approx. per word bound = -6.943, relative change = 2.582e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 65 (approx. per word bound = -6.943, relative change = 2.764e-05) 
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett 
##  Topic 2: regjering, mer, arbeid, må, vikt 
##  Topic 3: asylsøker, norg, sak, barn, opphold 
##  Topic 4: komm, norg, gjør, fremskrittsparti, få 
##  Topic 5: forslag, storting, regjering, komite, nr 
##  Topic 6: kommun, flyktning, regjering, komm, fler 
##  Topic 7: norg, land, norsk, eu, må 
##  Topic 8: barn, vikt, arbeid, må, samfunn 
##  Topic 9: ei, frå, regjering, noreg, òg 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 66 (approx. per word bound = -6.942, relative change = 2.876e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 67 (approx. per word bound = -6.942, relative change = 2.661e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 68 (approx. per word bound = -6.942, relative change = 2.395e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 69 (approx. per word bound = -6.942, relative change = 2.521e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 70 (approx. per word bound = -6.942, relative change = 2.595e-05) 
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett 
##  Topic 2: regjering, mer, arbeid, må, vikt 
##  Topic 3: asylsøker, norg, sak, barn, opphold 
##  Topic 4: komm, norg, gjør, fremskrittsparti, få 
##  Topic 5: forslag, storting, regjering, komite, nr 
##  Topic 6: kommun, flyktning, regjering, komm, fler 
##  Topic 7: norg, land, norsk, eu, må 
##  Topic 8: barn, vikt, arbeid, må, samfunn 
##  Topic 9: ei, frå, regjering, noreg, òg 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 71 (approx. per word bound = -6.942, relative change = 2.400e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 72 (approx. per word bound = -6.941, relative change = 2.023e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 73 (approx. per word bound = -6.941, relative change = 1.732e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 74 (approx. per word bound = -6.941, relative change = 1.786e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 75 (approx. per word bound = -6.941, relative change = 2.227e-05) 
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett 
##  Topic 2: regjering, arbeid, mer, må, vikt 
##  Topic 3: asylsøker, norg, sak, barn, opphold 
##  Topic 4: komm, norg, gjør, fremskrittsparti, få 
##  Topic 5: forslag, storting, regjering, komite, nr 
##  Topic 6: kommun, flyktning, regjering, komm, fler 
##  Topic 7: norg, land, norsk, eu, må 
##  Topic 8: barn, vikt, må, arbeid, samfunn 
##  Topic 9: ei, frå, regjering, noreg, òg 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 76 (approx. per word bound = -6.941, relative change = 3.571e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 77 (approx. per word bound = -6.941, relative change = 3.684e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 78 (approx. per word bound = -6.940, relative change = 1.982e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 79 (approx. per word bound = -6.940, relative change = 1.423e-05) 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Completing Iteration 80 (approx. per word bound = -6.940, relative change = 1.069e-05) 
## Topic 1: kr, forhøy, overfør, driftsutgift, nedsett 
##  Topic 2: regjering, arbeid, mer, må, norg 
##  Topic 3: asylsøker, norg, sak, barn, opphold 
##  Topic 4: komm, norg, gjør, fremskrittsparti, få 
##  Topic 5: forslag, storting, regjering, komite, nr 
##  Topic 6: kommun, flyktning, regjering, komm, fler 
##  Topic 7: norg, land, norsk, eu, må 
##  Topic 8: barn, vikt, må, arbeid, samfunn 
##  Topic 9: ei, frå, regjering, noreg, òg 
## ....................................................................................................
## Completed E-Step (0 seconds). 
## Completed M-Step. 
## Model Converged
saveRDS(my_lda_fit, file = "my_lda_fit.rds")
saveRDS(stm_object, file = "stm_object.rds")
saveRDS(stm_storting, file = "stm_storting.rds")

#restore objects
#readRDS(file = "my_lda_fit.rds")
#readRDS(file = "stm_object.rds")
#readRDS(file = "stm_storting.rds")
ap_topics <- tidy(my_lda_fit, matrix = "beta")
# ap_topics

# plot of most characteristic words by topic
ap_top_terms <- ap_topics %>%
  group_by(topic) %>%
  top_n(30, beta) %>%
  ungroup() %>%
  arrange(topic, -beta)

ap_top_terms %>%
  mutate(term = reorder(term, beta)) %>%
  ggplot(aes(term, beta, fill = factor(topic))) +
  geom_col(show.legend = FALSE) + ggtitle("Mest karakteristiske ord i kvar topic")+
  facet_wrap(~ topic, scales = "free") +
  coord_flip()

#paragonic texts for the topic?

# diverging topics
beta_spread <- ap_topics %>%
  mutate(topic = paste0("topic", topic)) %>%
  spread(topic, beta) %>%
  filter(topic1 > .001 | topic2 > .001) %>%
  mutate(log_ratio = log2(topic2 / topic1))

beta_spread
## # A tibble: 345 x 11
##    term    topic1  topic2  topic3   topic4   topic5   topic6   topic7
##    <chr>    <dbl>   <dbl>   <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
##  1 afgh… 2.00e- 6 3.92e-3 1.25e-4 1.14e- 5 8.77e- 6 3.51e-15 5.27e-14
##  2 afgh… 7.60e-18 1.79e-3 2.47e-9 2.97e-15 2.34e-18 3.26e-31 1.54e-36
##  3 afrik 3.79e- 5 1.59e-3 1.06e-6 1.18e- 4 3.63e- 7 9.63e- 5 1.71e- 6
##  4 aktiv 3.24e- 6 1.81e-3 3.46e-4 1.55e- 3 1.51e- 3 9.66e- 4 8.20e- 4
##  5 aktør 5.64e- 5 1.05e-3 6.70e-5 3.25e- 4 5.27e- 4 8.27e- 5 3.57e- 4
##  6 aldri 1.00e- 3 2.72e-4 1.27e-5 1.63e- 4 1.06e- 4 6.61e- 4 8.92e- 5
##  7 all   2.43e- 3 4.72e-4 4.79e-4 7.38e- 4 4.52e- 4 1.21e- 3 1.26e- 3
##  8 aller 1.08e- 3 7.75e-4 1.60e-3 1.43e-23 1.14e- 3 1.01e- 3 8.63e- 4
##  9 alte… 1.30e- 3 1.78e-4 2.95e-4 7.77e- 4 9.03e- 4 1.09e- 4 8.88e- 4
## 10 altså 1.34e- 3 4.35e-4 6.07e-4 1.04e- 3 3.83e- 4 1.26e- 3 9.16e- 4
## # … with 335 more rows, and 3 more variables: topic8 <dbl>, topic9 <dbl>,
## #   log_ratio <dbl>
# probability of being in a class
for (topic in 1:topic.count) {
  print(paste(topic," ",mean(lda_inf$topics[topic])))
}
## [1] "1   0.00266604304525932"
## [1] "2   0.00114055564187702"
## [1] "3   0.0589393447844829"
## [1] "4   0.000560157061494367"
## [1] "5   0.0011584685124563"
## [1] "6   0.401551243001264"
## [1] "7   0.000307732664124251"
## [1] "8   0.000962027933507623"
## [1] "9   0.00121576136539665"
#paragonic texts for the topic???
plot(stm_object, type = "summary", text.cex = 0.8)

plot(stm_object, type = "perspectives", topics = c(1,2), n=50)

plot(stm_object, type = "perspectives", topics = c(3,4), n=50)

plot(stm_object, type = "perspectives", topics = c(5,6), n=50)

plot(stm_object, type = "perspectives", topics = c(7,8), n=50)

plot(stm_object, type = "perspectives", topics = c(1,9), n=50)

plot(stm_object, type = "hist", topics = sample(1:topic.count, size = 9))

plot(stm_object, type="labels")

mod.out.corr <- topicCorr(stm_object)
plot(mod.out.corr, topics = c(1:9))

# pick and label topics of interest
#topics_of_interest <- c(5, 2, 4, 3)
#topic_labels <- c("Positiv", "Reklame", "Neg_Tilpassing", "Overvåkning")

# top words in topic
 labelTopics(stm_object, 1:9)
## Topic 1 Top Words:
##       Highest Prob: kr, forhøy, overfør, driftsutgift, nedsett, tilskudd, post 
##       FREX: forhøy, driftsutgift, overfør, overslagsbevilgning, nedsett, post, utstyrsanskaff 
##       Lift: friluftsområd, frivillighetsformål, innbyggertilskudd, overslagsbevilgning, pensjonskass, rhf, utredningsvirksom 
##       Score: forhøy, driftsutgift, kr, nedsett, overfør, overslagsbevilgning, kap 
## Topic 2 Top Words:
##       Highest Prob: regjering, arbeid, mer, norg, må, vikt, arbeidskraft 
##       FREX: arbeidskraft, bedrift, arbeidsmarked, arbeidsled, arbeidsinnvandring, dumping, velferd 
##       Lift: fellesskapsløsning, gasskraftverk, fastlandsøkonomi, tannhelsetjenest, vekstfremm, salgsinntekt, sykefravær 
##       Score: salgsinntekt, arbeidskraft, mer, arbeidsinnvandring, må, vikt, arbeidsmarked 
## Topic 3 Top Words:
##       Highest Prob: asylsøker, norg, sak, opphold, barn, person, land 
##       FREX: asyl, asylinstitutt, avslag, udi, asylsøknad, returner, une 
##       Lift: bevæpning, eurodac, fremdrift, kat, lengevær, oppholdstid, praksisendring 
##       Score: asylsøker, opphold, udi, asyl, oppholdstillat, utlending, une 
## Topic 4 Top Words:
##       Highest Prob: komm, norg, gjør, fremskrittsparti, få, må, andr 
##       FREX: jo, tror, gansk, egent, litt, snakk, faktisk 
##       Lift: innvandrerungdom, kolberg, putt, unnskyld, tybring-gjedd, kongsberg, voldsomt 
##       Score: fremskrittsparti, tror, veld, helt, jo, sier, få 
## Topic 5 Top Words:
##       Highest Prob: forslag, storting, regjering, komite, nr, sak, flertall 
##       FREX: lyd, ber, komite, forslag, dokument, nr, innstilling 
##       Lift: bønnerop, familieoppholdstillat, lyd, referanseperson, underholdskrav, vedlegg, forslagsstillern 
##       Score: lyd, flertall, nr, ber, forslag, fremskrittsparti, familieoppholdstillat 
## Topic 6 Top Words:
##       Highest Prob: kommun, flyktning, regjering, komm, fler, hjelp, må 
##       FREX: ta_imot, kommun, kutt, bosett, bosetting, kvoteflyktning, flyktning 
##       Lift: investeringstilskudd, bosettingsarbeid, beregningsutvalg, skattekutt, bosett, kvoteflyktning, barnevernstiltak 
##       Score: kommun, syri, ta_imot, flyktning, budsjett, bosett, bosetting 
## Topic 7 Top Words:
##       Highest Prob: norg, land, norsk, eu, må, vikt, europ 
##       FREX: eu, utenriksminister, sikkerhetsråd, militær, nato, eøs-avtal, palestinsk 
##       Lift: liby, afghanern, arktis, arktisk, bilateralt, delegasjon, efta-land 
##       Score: eu, nato, europ, militær, humanitær, sikkerhetsråd, må 
## Topic 8 Top Words:
##       Highest Prob: barn, vikt, må, arbeid, samfunn, kvinn, norsk 
##       FREX: innvandrerbakgrunn, menn, homofil, minoritetsbakgrunn, kvinn, innvandrerkvinn, foreldr 
##       Lift: behersk, hivpositiv, innvandrerbarn, innvandrerfamili, kvotering, lavinntekt, lavinntektsfamili 
##       Score: vikt, innvandrer, innvandrerbakgrunn, innvandrerkvinn, må, minoritetsbakgrunn, kvinn 
## Topic 9 Top Words:
##       Highest Prob: ei, frå, regjering, noreg, òg, auk, må 
##       FREX: ei, frå, noreg, òg, auk, meir, gjer 
##       Lift: arbeidd, breitt, bustad, eig, føreslår, haldning, handter 
##       Score: frå, ei, noreg, auk, gjer, fleir, meir
 # topic prevalance over time
model.stm.labels <- labelTopics(stm_object, 1:topic.count)
model.stm.ee <- estimateEffect(1:topic.count ~ aar, stm_object, meta = stm_storting$meta)

par(mfrow=c(3,3))
for (i in seq_along(sample(1:topic.count, size = 9)))
{
  plot(model.stm.ee, "aar", method = "continuous", topics = i, main = paste0(model.stm.labels$prob[i,1:3], collapse = ", "), printlegend = F)
} 

out <- stm_object
 
save.image('stm_storting.RData')
#library(stminsights)
#run_stminsights()