## Corpus consisting of 39 documents, showing 39 documents:
## 
##                                          Text Types Tokens Sentences num
##         054_AnnualReport_HuskyEnergy_2019.txt  6996  88257      2349  54
##         055_AnnualReport_HuskyEnergy_2018.txt  6756  88275      2368  55
##         056_AnnualReport_HuskyEnergy_2017.txt  6901  89446      2118  56
##         057_AnnualReport_HuskyEnergy_2016.txt  6927  84807      1949  57
##         058_AnnualReport_HuskyEnergy_2015.txt  6538  74582      1738  58
##         071_AnnualReport_HuskyEnergy_2013.txt  6731  75466      1868  71
##         072_AnnualReport_HuskyEnergy_2012.txt  6848  82056      2046  72
##         073_AnnualReport_HuskyEnergy_2010.txt  6790  66654      1895  73
##         073_AnnualReport_HuskyEnergy_2011.txt  7732  74912      1900  73
##       075_AnnualReport_CenovusEnergy_2020.txt  7023  89771      2237  75
##       076_AnnualReport_CenovusEnergy_2019.txt  7117  89565      2219  76
##       077_AnnualReport_CenovusEnergy_2018.txt  7240  91739      2317  77
##       078_AnnualReport_CenovusEnergy_2017.txt  6584  69431      1837  78
##       079_AnnualReport_CenovusEnergy_2016.txt  6807  66181      1728  79
##       080_AnnualReport_CenovusEnergy_2015.txt  7235  80427      1992  80
##       081_AnnualReport_CenovusEnergy_2014.txt  7233  81619      2070  81
##       082_AnnualReport_CenovusEnergy_2013.txt  7616  86863      1790  82
##       083_AnnualReport_CenovusEnergy_2012.txt  7730  97091      1723  83
##       084_AnnualReport_CenovusEnergy_2011.txt  9860  90597      1478  84
##  194_AnnualReport_IndustrialAlliance_2010.txt  8423 103862      2575 194
##  195_AnnualReport_IndustrialAlliance_2011.txt  8866 110223      2604 195
##  196_AnnualReport_IndustrialAlliance_2012.txt  8201  91466      2226 196
##  197_AnnualReport_IndustrialAlliance_2013.txt  8210  96588      2406 197
##  198_AnnualReport_IndustrialAlliance_2014.txt  7873  88822      2113 198
##  199_AnnualReport_IndustrialAlliance_2015.txt  8359  88368      1953 199
##  200_AnnualReport_IndustrialAlliance_2016.txt  8012  88846      1980 200
##  201_AnnualReport_IndustrialAlliance_2017.txt  7054  79247      1911 201
##  202_AnnualReport_IndustrialAlliance_2018.txt  7636  85051      1949 202
##  203_AnnualReport_IndustrialAlliance_2019.txt  8140  81083      1208 203
##               247_AnnualReport_Shell_2020.txt 16210 234186      5628 247
##               248_AnnualReport_Shell_2019.txt 15471 223496      5373 248
##               249_AnnualReport_Shell_2018.txt 15021 208112      4878 249
##               250_AnnualReport_Shell_2017.txt 14651 192587      4596 250
##               251_AnnualReport_Shell_2016.txt 13091 165700      4115 251
##               252_AnnualReport_Shell_2015.txt 12071 143206      3349 252
##               253_AnnualReport_Shell_2014.txt 11754 138184      3242 253
##               254_AnnualReport_Shell_2013.txt 11513 130806      3014 254
##               255_AnnualReport_Shell_2012.txt 11669 131995      3077 255
##               256_AnnualReport_Shell_2011.txt 11420 130451      3044 256
##          type            company year
##  AnnualReport        HuskyEnergy 2019
##  AnnualReport        HuskyEnergy 2018
##  AnnualReport        HuskyEnergy 2017
##  AnnualReport        HuskyEnergy 2016
##  AnnualReport        HuskyEnergy 2015
##  AnnualReport        HuskyEnergy 2013
##  AnnualReport        HuskyEnergy 2012
##  AnnualReport        HuskyEnergy 2010
##  AnnualReport        HuskyEnergy 2011
##  AnnualReport      CenovusEnergy 2020
##  AnnualReport      CenovusEnergy 2019
##  AnnualReport      CenovusEnergy 2018
##  AnnualReport      CenovusEnergy 2017
##  AnnualReport      CenovusEnergy 2016
##  AnnualReport      CenovusEnergy 2015
##  AnnualReport      CenovusEnergy 2014
##  AnnualReport      CenovusEnergy 2013
##  AnnualReport      CenovusEnergy 2012
##  AnnualReport      CenovusEnergy 2011
##  AnnualReport IndustrialAlliance 2010
##  AnnualReport IndustrialAlliance 2011
##  AnnualReport IndustrialAlliance 2012
##  AnnualReport IndustrialAlliance 2013
##  AnnualReport IndustrialAlliance 2014
##  AnnualReport IndustrialAlliance 2015
##  AnnualReport IndustrialAlliance 2016
##  AnnualReport IndustrialAlliance 2017
##  AnnualReport IndustrialAlliance 2018
##  AnnualReport IndustrialAlliance 2019
##  AnnualReport              Shell 2020
##  AnnualReport              Shell 2019
##  AnnualReport              Shell 2018
##  AnnualReport              Shell 2017
##  AnnualReport              Shell 2016
##  AnnualReport              Shell 2015
##  AnnualReport              Shell 2014
##  AnnualReport              Shell 2013
##  AnnualReport              Shell 2012
##  AnnualReport              Shell 2011

#Dictionary approach

## Document-feature matrix of: 39 documents, 4 features (0.0% sparse) and 4 docvars.
##                                        features
## docs                                    risk climate government country
##   054_AnnualReport_HuskyEnergy_2019.txt  176      53        236     120
##   055_AnnualReport_HuskyEnergy_2018.txt  190      53        250     135
##   056_AnnualReport_HuskyEnergy_2017.txt  180      40        251     171
##   057_AnnualReport_HuskyEnergy_2016.txt  177      41        241     157
##   058_AnnualReport_HuskyEnergy_2015.txt  167      37        197     129
##   071_AnnualReport_HuskyEnergy_2013.txt  176      31        185     118
## [ reached max_ndoc ... 33 more documents ]
## Loading required package: Matrix

## 
## Call:
## textmodel_wordfish.dfm(x = wordfishdfm, dir = c(9, 30), dispersion = "poisson")
## 
## Estimated Document Positions:
##                                                 theta        se
## 054_AnnualReport_HuskyEnergy_2019.txt         0.33362 4.898e-03
## 055_AnnualReport_HuskyEnergy_2018.txt         0.31532 5.146e-03
## 056_AnnualReport_HuskyEnergy_2017.txt         0.29806 5.423e-03
## 057_AnnualReport_HuskyEnergy_2016.txt         0.32350 5.182e-03
## 058_AnnualReport_HuskyEnergy_2015.txt         0.31185 5.742e-03
## 071_AnnualReport_HuskyEnergy_2013.txt         0.29458 5.994e-03
## 072_AnnualReport_HuskyEnergy_2012.txt         0.30881 5.564e-03
## 073_AnnualReport_HuskyEnergy_2010.txt         0.40225 4.678e-03
## 073_AnnualReport_HuskyEnergy_2011.txt         0.04527 1.016e-02
## 075_AnnualReport_CenovusEnergy_2020.txt       0.29265 5.470e-03
## 076_AnnualReport_CenovusEnergy_2019.txt       0.27788 5.681e-03
## 077_AnnualReport_CenovusEnergy_2018.txt       0.29153 5.458e-03
## 078_AnnualReport_CenovusEnergy_2017.txt       0.26888 6.656e-03
## 079_AnnualReport_CenovusEnergy_2016.txt       0.24164 7.310e-03
## 080_AnnualReport_CenovusEnergy_2015.txt       0.31304 5.407e-03
## 081_AnnualReport_CenovusEnergy_2014.txt       0.30275 5.646e-03
## 082_AnnualReport_CenovusEnergy_2013.txt       0.33820 4.914e-03
## 083_AnnualReport_CenovusEnergy_2012.txt       0.33408 4.581e-03
## 084_AnnualReport_CenovusEnergy_2011.txt       0.34417 5.194e-03
## 194_AnnualReport_IndustrialAlliance_2010.txt -1.19839 9.251e-03
## 195_AnnualReport_IndustrialAlliance_2011.txt -1.28158 8.130e-03
## 196_AnnualReport_IndustrialAlliance_2012.txt -1.28666 8.892e-03
## 197_AnnualReport_IndustrialAlliance_2013.txt -1.24827 8.966e-03
## 198_AnnualReport_IndustrialAlliance_2014.txt -1.28477 8.990e-03
## 199_AnnualReport_IndustrialAlliance_2015.txt -1.29349 9.044e-03
## 200_AnnualReport_IndustrialAlliance_2016.txt -1.26842 9.157e-03
## 201_AnnualReport_IndustrialAlliance_2017.txt -1.22840 9.954e-03
## 202_AnnualReport_IndustrialAlliance_2018.txt -1.15401 1.056e-02
## 203_AnnualReport_IndustrialAlliance_2019.txt -3.67991 2.221e-03
## 247_AnnualReport_Shell_2020.txt               0.95724 5.535e-05
## 248_AnnualReport_Shell_2019.txt               0.96093 5.503e-05
## 249_AnnualReport_Shell_2018.txt               0.96827 5.358e-05
## 250_AnnualReport_Shell_2017.txt               0.97335 5.371e-05
## 251_AnnualReport_Shell_2016.txt               0.93203 7.667e-05
## 252_AnnualReport_Shell_2015.txt               0.90203 9.804e-05
## 253_AnnualReport_Shell_2014.txt               0.89945 1.015e-04
## 254_AnnualReport_Shell_2013.txt               0.89821 1.046e-04
## 255_AnnualReport_Shell_2012.txt               0.89684 1.043e-04
## 256_AnnualReport_Shell_2011.txt               0.89745 1.046e-04
## 
## Estimated Feature Scores:
##      energi annual report corpor    huski integr compani   base calgari alberta
## beta  1.051 0.0993  0.465 0.1515 -0.02412 0.8057  -0.140 0.2889 -0.2598  0.3477
## psi   4.359 5.3266  5.675 4.7080  4.41735 3.5555   6.478 4.9964  2.6417  3.5305
##       common share public  trade toronto   stock exchang   oper   canada
## beta -0.3069 0.283 0.1821 0.7915  -0.417 -0.2792  0.3221 0.2702 -0.04413
## psi   4.4827 6.020 3.2831 3.9417   2.511  4.3433  4.5876 5.8850  4.54058
##          unit  state  asia     pacif  region upstream downstream     busi
## beta -0.02858 0.1857 2.585 -0.002966 0.02217    1.727      1.859 -0.08095
## psi   4.30396 3.6002 2.699  2.675478 3.42303    3.644      3.093  5.17173
##      segment    two     main
## beta  0.1893 0.1037 -0.09506
## psi   4.0062 3.6939  3.64682

## $fit
##                                                      fit         lwr
## 054_AnnualReport_HuskyEnergy_2019.txt         0.33362113  0.32402164
## 055_AnnualReport_HuskyEnergy_2018.txt         0.31532162  0.30523509
## 056_AnnualReport_HuskyEnergy_2017.txt         0.29806357  0.28743442
## 057_AnnualReport_HuskyEnergy_2016.txt         0.32350357  0.31334653
## 058_AnnualReport_HuskyEnergy_2015.txt         0.31185356  0.30059888
## 071_AnnualReport_HuskyEnergy_2013.txt         0.29458079  0.28283347
## 072_AnnualReport_HuskyEnergy_2012.txt         0.30880525  0.29789970
## 073_AnnualReport_HuskyEnergy_2010.txt         0.40225024  0.39308235
## 073_AnnualReport_HuskyEnergy_2011.txt         0.04526666  0.02535787
## 075_AnnualReport_CenovusEnergy_2020.txt       0.29264616  0.28192515
## 076_AnnualReport_CenovusEnergy_2019.txt       0.27788079  0.26674718
## 077_AnnualReport_CenovusEnergy_2018.txt       0.29153309  0.28083547
## 078_AnnualReport_CenovusEnergy_2017.txt       0.26888133  0.25583666
## 079_AnnualReport_CenovusEnergy_2016.txt       0.24164150  0.22731370
## 080_AnnualReport_CenovusEnergy_2015.txt       0.31304111  0.30244282
## 081_AnnualReport_CenovusEnergy_2014.txt       0.30275408  0.29168869
## 082_AnnualReport_CenovusEnergy_2013.txt       0.33820487  0.32857375
## 083_AnnualReport_CenovusEnergy_2012.txt       0.33408204  0.32510425
## 084_AnnualReport_CenovusEnergy_2011.txt       0.34416673  0.33398715
## 194_AnnualReport_IndustrialAlliance_2010.txt -1.19839036 -1.21652211
## 195_AnnualReport_IndustrialAlliance_2011.txt -1.28157615 -1.29751150
## 196_AnnualReport_IndustrialAlliance_2012.txt -1.28666136 -1.30408871
## 197_AnnualReport_IndustrialAlliance_2013.txt -1.24826718 -1.26584102
## 198_AnnualReport_IndustrialAlliance_2014.txt -1.28476852 -1.30238790
## 199_AnnualReport_IndustrialAlliance_2015.txt -1.29349448 -1.31122121
## 200_AnnualReport_IndustrialAlliance_2016.txt -1.26842413 -1.28637099
## 201_AnnualReport_IndustrialAlliance_2017.txt -1.22840294 -1.24791325
## 202_AnnualReport_IndustrialAlliance_2018.txt -1.15401351 -1.17471677
## 203_AnnualReport_IndustrialAlliance_2019.txt -3.67990885 -3.68426143
## 247_AnnualReport_Shell_2020.txt               0.95724112  0.95713263
## 248_AnnualReport_Shell_2019.txt               0.96093289  0.96082503
## 249_AnnualReport_Shell_2018.txt               0.96826551  0.96816049
## 250_AnnualReport_Shell_2017.txt               0.97334566  0.97324039
## 251_AnnualReport_Shell_2016.txt               0.93203354  0.93188327
## 252_AnnualReport_Shell_2015.txt               0.90202954  0.90183738
## 253_AnnualReport_Shell_2014.txt               0.89945094  0.89925197
## 254_AnnualReport_Shell_2013.txt               0.89821332  0.89800830
## 255_AnnualReport_Shell_2012.txt               0.89684307  0.89663863
## 256_AnnualReport_Shell_2011.txt               0.89745381  0.89724871
##                                                      upr
## 054_AnnualReport_HuskyEnergy_2019.txt         0.34322062
## 055_AnnualReport_HuskyEnergy_2018.txt         0.32540815
## 056_AnnualReport_HuskyEnergy_2017.txt         0.30869273
## 057_AnnualReport_HuskyEnergy_2016.txt         0.33366060
## 058_AnnualReport_HuskyEnergy_2015.txt         0.32310824
## 071_AnnualReport_HuskyEnergy_2013.txt         0.30632811
## 072_AnnualReport_HuskyEnergy_2012.txt         0.31971080
## 073_AnnualReport_HuskyEnergy_2010.txt         0.41141814
## 073_AnnualReport_HuskyEnergy_2011.txt         0.06517546
## 075_AnnualReport_CenovusEnergy_2020.txt       0.30336718
## 076_AnnualReport_CenovusEnergy_2019.txt       0.28901440
## 077_AnnualReport_CenovusEnergy_2018.txt       0.30223071
## 078_AnnualReport_CenovusEnergy_2017.txt       0.28192599
## 079_AnnualReport_CenovusEnergy_2016.txt       0.25596930
## 080_AnnualReport_CenovusEnergy_2015.txt       0.32363939
## 081_AnnualReport_CenovusEnergy_2014.txt       0.31381947
## 082_AnnualReport_CenovusEnergy_2013.txt       0.34783598
## 083_AnnualReport_CenovusEnergy_2012.txt       0.34305983
## 084_AnnualReport_CenovusEnergy_2011.txt       0.35434630
## 194_AnnualReport_IndustrialAlliance_2010.txt -1.18025862
## 195_AnnualReport_IndustrialAlliance_2011.txt -1.26564081
## 196_AnnualReport_IndustrialAlliance_2012.txt -1.26923401
## 197_AnnualReport_IndustrialAlliance_2013.txt -1.23069333
## 198_AnnualReport_IndustrialAlliance_2014.txt -1.26714915
## 199_AnnualReport_IndustrialAlliance_2015.txt -1.27576776
## 200_AnnualReport_IndustrialAlliance_2016.txt -1.25047726
## 201_AnnualReport_IndustrialAlliance_2017.txt -1.20889264
## 202_AnnualReport_IndustrialAlliance_2018.txt -1.13331025
## 203_AnnualReport_IndustrialAlliance_2019.txt -3.67555627
## 247_AnnualReport_Shell_2020.txt               0.95734961
## 248_AnnualReport_Shell_2019.txt               0.96104076
## 249_AnnualReport_Shell_2018.txt               0.96837053
## 250_AnnualReport_Shell_2017.txt               0.97345093
## 251_AnnualReport_Shell_2016.txt               0.93218381
## 252_AnnualReport_Shell_2015.txt               0.90222170
## 253_AnnualReport_Shell_2014.txt               0.89964990
## 254_AnnualReport_Shell_2013.txt               0.89841835
## 255_AnnualReport_Shell_2012.txt               0.89704751
## 256_AnnualReport_Shell_2011.txt               0.89765891
require('quanteda.textmodels')
dfm_stopicmod <- dfm_trim(data_texts_dfm, min_termfreq = 500, max_docfreq = 10000)
library(topicmodels)
LDA_fit_20 <- convert(dfm_stopicmod, to = "topicmodels") %>%
    LDA(k = 20)
# get top five terms per topic
get_terms(LDA_fit_20, 5)
##      Topic 1   Topic 2   Topic 3 Topic 4  Topic 5   Topic 6 Topic 7    Topic 8
## [1,] "financi" "insur"   "uif"   "invest" "million" "oil"   "shell"    "n"    
## [2,] "cost"    "compani" "pg"    "incom"  "huski"   "gas"   "report"   "e"    
## [3,] "oil"     "financi" "boe"   "asset"  "compani" "year"  "director" "s"    
## [4,] "oper"    "risk"    "jo"    "rate"   "decemb"  "huski" "million"  "o"    
## [5,] "tax"     "manag"   "d"     "liabil" "oper"    "valu"  "share"    "d"    
##      Topic 9   Topic 10 Topic 11    Topic 12  Topic 13  Topic 14  Topic 15 
## [1,] "oil"     "oper"   "shell"     "compani" "compani" "oil"     "compani"
## [2,] "compani" "asset"  "compani"   "financi" "oper"    "cenovus" "financi"
## [3,] "price"   "price"  "share"     "ia"      "product" "product" "oil"    
## [4,] "cash"    "loss"   "statement" "risk"    "manag"   "crude"   "share"  
## [5,] "million" "decemb" "financi"   "valu"    "million" "cash"    "product"
##      Topic 16   Topic 17  Topic 18   Topic 19  Topic 20 
## [1,] "compani"  "financi" "share"    "shell"   "shell"  
## [2,] "industri" "asset"   "report"   "gas"     "compani"
## [3,] "allianc"  "cash"    "interest" "report"  "product"
## [4,] "servic"   "decemb"  "gas"      "share"   "million"
## [5,] "valu"     "risk"    "million"  "compani" "financi"
#The identified groups of topics.