## Corpus consisting of 39 documents, showing 39 documents:
##
## Text Types Tokens Sentences num
## 054_AnnualReport_HuskyEnergy_2019.txt 6996 88257 2349 54
## 055_AnnualReport_HuskyEnergy_2018.txt 6756 88275 2368 55
## 056_AnnualReport_HuskyEnergy_2017.txt 6901 89446 2118 56
## 057_AnnualReport_HuskyEnergy_2016.txt 6927 84807 1949 57
## 058_AnnualReport_HuskyEnergy_2015.txt 6538 74582 1738 58
## 071_AnnualReport_HuskyEnergy_2013.txt 6731 75466 1868 71
## 072_AnnualReport_HuskyEnergy_2012.txt 6848 82056 2046 72
## 073_AnnualReport_HuskyEnergy_2010.txt 6790 66654 1895 73
## 073_AnnualReport_HuskyEnergy_2011.txt 7732 74912 1900 73
## 075_AnnualReport_CenovusEnergy_2020.txt 7023 89771 2237 75
## 076_AnnualReport_CenovusEnergy_2019.txt 7117 89565 2219 76
## 077_AnnualReport_CenovusEnergy_2018.txt 7240 91739 2317 77
## 078_AnnualReport_CenovusEnergy_2017.txt 6584 69431 1837 78
## 079_AnnualReport_CenovusEnergy_2016.txt 6807 66181 1728 79
## 080_AnnualReport_CenovusEnergy_2015.txt 7235 80427 1992 80
## 081_AnnualReport_CenovusEnergy_2014.txt 7233 81619 2070 81
## 082_AnnualReport_CenovusEnergy_2013.txt 7616 86863 1790 82
## 083_AnnualReport_CenovusEnergy_2012.txt 7730 97091 1723 83
## 084_AnnualReport_CenovusEnergy_2011.txt 9860 90597 1478 84
## 194_AnnualReport_IndustrialAlliance_2010.txt 8423 103862 2575 194
## 195_AnnualReport_IndustrialAlliance_2011.txt 8866 110223 2604 195
## 196_AnnualReport_IndustrialAlliance_2012.txt 8201 91466 2226 196
## 197_AnnualReport_IndustrialAlliance_2013.txt 8210 96588 2406 197
## 198_AnnualReport_IndustrialAlliance_2014.txt 7873 88822 2113 198
## 199_AnnualReport_IndustrialAlliance_2015.txt 8359 88368 1953 199
## 200_AnnualReport_IndustrialAlliance_2016.txt 8012 88846 1980 200
## 201_AnnualReport_IndustrialAlliance_2017.txt 7054 79247 1911 201
## 202_AnnualReport_IndustrialAlliance_2018.txt 7636 85051 1949 202
## 203_AnnualReport_IndustrialAlliance_2019.txt 8140 81083 1208 203
## 247_AnnualReport_Shell_2020.txt 16210 234186 5628 247
## 248_AnnualReport_Shell_2019.txt 15471 223496 5373 248
## 249_AnnualReport_Shell_2018.txt 15021 208112 4878 249
## 250_AnnualReport_Shell_2017.txt 14651 192587 4596 250
## 251_AnnualReport_Shell_2016.txt 13091 165700 4115 251
## 252_AnnualReport_Shell_2015.txt 12071 143206 3349 252
## 253_AnnualReport_Shell_2014.txt 11754 138184 3242 253
## 254_AnnualReport_Shell_2013.txt 11513 130806 3014 254
## 255_AnnualReport_Shell_2012.txt 11669 131995 3077 255
## 256_AnnualReport_Shell_2011.txt 11420 130451 3044 256
## type company year
## AnnualReport HuskyEnergy 2019
## AnnualReport HuskyEnergy 2018
## AnnualReport HuskyEnergy 2017
## AnnualReport HuskyEnergy 2016
## AnnualReport HuskyEnergy 2015
## AnnualReport HuskyEnergy 2013
## AnnualReport HuskyEnergy 2012
## AnnualReport HuskyEnergy 2010
## AnnualReport HuskyEnergy 2011
## AnnualReport CenovusEnergy 2020
## AnnualReport CenovusEnergy 2019
## AnnualReport CenovusEnergy 2018
## AnnualReport CenovusEnergy 2017
## AnnualReport CenovusEnergy 2016
## AnnualReport CenovusEnergy 2015
## AnnualReport CenovusEnergy 2014
## AnnualReport CenovusEnergy 2013
## AnnualReport CenovusEnergy 2012
## AnnualReport CenovusEnergy 2011
## AnnualReport IndustrialAlliance 2010
## AnnualReport IndustrialAlliance 2011
## AnnualReport IndustrialAlliance 2012
## AnnualReport IndustrialAlliance 2013
## AnnualReport IndustrialAlliance 2014
## AnnualReport IndustrialAlliance 2015
## AnnualReport IndustrialAlliance 2016
## AnnualReport IndustrialAlliance 2017
## AnnualReport IndustrialAlliance 2018
## AnnualReport IndustrialAlliance 2019
## AnnualReport Shell 2020
## AnnualReport Shell 2019
## AnnualReport Shell 2018
## AnnualReport Shell 2017
## AnnualReport Shell 2016
## AnnualReport Shell 2015
## AnnualReport Shell 2014
## AnnualReport Shell 2013
## AnnualReport Shell 2012
## AnnualReport Shell 2011
#Dictionary approach
## Document-feature matrix of: 39 documents, 4 features (0.0% sparse) and 4 docvars.
## features
## docs risk climate government country
## 054_AnnualReport_HuskyEnergy_2019.txt 176 53 236 120
## 055_AnnualReport_HuskyEnergy_2018.txt 190 53 250 135
## 056_AnnualReport_HuskyEnergy_2017.txt 180 40 251 171
## 057_AnnualReport_HuskyEnergy_2016.txt 177 41 241 157
## 058_AnnualReport_HuskyEnergy_2015.txt 167 37 197 129
## 071_AnnualReport_HuskyEnergy_2013.txt 176 31 185 118
## [ reached max_ndoc ... 33 more documents ]
## Loading required package: Matrix
##
## Call:
## textmodel_wordfish.dfm(x = wordfishdfm, dir = c(9, 30), dispersion = "poisson")
##
## Estimated Document Positions:
## theta se
## 054_AnnualReport_HuskyEnergy_2019.txt 0.33362 4.898e-03
## 055_AnnualReport_HuskyEnergy_2018.txt 0.31532 5.146e-03
## 056_AnnualReport_HuskyEnergy_2017.txt 0.29806 5.423e-03
## 057_AnnualReport_HuskyEnergy_2016.txt 0.32350 5.182e-03
## 058_AnnualReport_HuskyEnergy_2015.txt 0.31185 5.742e-03
## 071_AnnualReport_HuskyEnergy_2013.txt 0.29458 5.994e-03
## 072_AnnualReport_HuskyEnergy_2012.txt 0.30881 5.564e-03
## 073_AnnualReport_HuskyEnergy_2010.txt 0.40225 4.678e-03
## 073_AnnualReport_HuskyEnergy_2011.txt 0.04527 1.016e-02
## 075_AnnualReport_CenovusEnergy_2020.txt 0.29265 5.470e-03
## 076_AnnualReport_CenovusEnergy_2019.txt 0.27788 5.681e-03
## 077_AnnualReport_CenovusEnergy_2018.txt 0.29153 5.458e-03
## 078_AnnualReport_CenovusEnergy_2017.txt 0.26888 6.656e-03
## 079_AnnualReport_CenovusEnergy_2016.txt 0.24164 7.310e-03
## 080_AnnualReport_CenovusEnergy_2015.txt 0.31304 5.407e-03
## 081_AnnualReport_CenovusEnergy_2014.txt 0.30275 5.646e-03
## 082_AnnualReport_CenovusEnergy_2013.txt 0.33820 4.914e-03
## 083_AnnualReport_CenovusEnergy_2012.txt 0.33408 4.581e-03
## 084_AnnualReport_CenovusEnergy_2011.txt 0.34417 5.194e-03
## 194_AnnualReport_IndustrialAlliance_2010.txt -1.19839 9.251e-03
## 195_AnnualReport_IndustrialAlliance_2011.txt -1.28158 8.130e-03
## 196_AnnualReport_IndustrialAlliance_2012.txt -1.28666 8.892e-03
## 197_AnnualReport_IndustrialAlliance_2013.txt -1.24827 8.966e-03
## 198_AnnualReport_IndustrialAlliance_2014.txt -1.28477 8.990e-03
## 199_AnnualReport_IndustrialAlliance_2015.txt -1.29349 9.044e-03
## 200_AnnualReport_IndustrialAlliance_2016.txt -1.26842 9.157e-03
## 201_AnnualReport_IndustrialAlliance_2017.txt -1.22840 9.954e-03
## 202_AnnualReport_IndustrialAlliance_2018.txt -1.15401 1.056e-02
## 203_AnnualReport_IndustrialAlliance_2019.txt -3.67991 2.221e-03
## 247_AnnualReport_Shell_2020.txt 0.95724 5.535e-05
## 248_AnnualReport_Shell_2019.txt 0.96093 5.503e-05
## 249_AnnualReport_Shell_2018.txt 0.96827 5.358e-05
## 250_AnnualReport_Shell_2017.txt 0.97335 5.371e-05
## 251_AnnualReport_Shell_2016.txt 0.93203 7.667e-05
## 252_AnnualReport_Shell_2015.txt 0.90203 9.804e-05
## 253_AnnualReport_Shell_2014.txt 0.89945 1.015e-04
## 254_AnnualReport_Shell_2013.txt 0.89821 1.046e-04
## 255_AnnualReport_Shell_2012.txt 0.89684 1.043e-04
## 256_AnnualReport_Shell_2011.txt 0.89745 1.046e-04
##
## Estimated Feature Scores:
## energi annual report corpor huski integr compani base calgari alberta
## beta 1.051 0.0993 0.465 0.1515 -0.02412 0.8057 -0.140 0.2889 -0.2598 0.3477
## psi 4.359 5.3266 5.675 4.7080 4.41735 3.5555 6.478 4.9964 2.6417 3.5305
## common share public trade toronto stock exchang oper canada
## beta -0.3069 0.283 0.1821 0.7915 -0.417 -0.2792 0.3221 0.2702 -0.04413
## psi 4.4827 6.020 3.2831 3.9417 2.511 4.3433 4.5876 5.8850 4.54058
## unit state asia pacif region upstream downstream busi
## beta -0.02858 0.1857 2.585 -0.002966 0.02217 1.727 1.859 -0.08095
## psi 4.30396 3.6002 2.699 2.675478 3.42303 3.644 3.093 5.17173
## segment two main
## beta 0.1893 0.1037 -0.09506
## psi 4.0062 3.6939 3.64682
## $fit
## fit lwr
## 054_AnnualReport_HuskyEnergy_2019.txt 0.33362113 0.32402164
## 055_AnnualReport_HuskyEnergy_2018.txt 0.31532162 0.30523509
## 056_AnnualReport_HuskyEnergy_2017.txt 0.29806357 0.28743442
## 057_AnnualReport_HuskyEnergy_2016.txt 0.32350357 0.31334653
## 058_AnnualReport_HuskyEnergy_2015.txt 0.31185356 0.30059888
## 071_AnnualReport_HuskyEnergy_2013.txt 0.29458079 0.28283347
## 072_AnnualReport_HuskyEnergy_2012.txt 0.30880525 0.29789970
## 073_AnnualReport_HuskyEnergy_2010.txt 0.40225024 0.39308235
## 073_AnnualReport_HuskyEnergy_2011.txt 0.04526666 0.02535787
## 075_AnnualReport_CenovusEnergy_2020.txt 0.29264616 0.28192515
## 076_AnnualReport_CenovusEnergy_2019.txt 0.27788079 0.26674718
## 077_AnnualReport_CenovusEnergy_2018.txt 0.29153309 0.28083547
## 078_AnnualReport_CenovusEnergy_2017.txt 0.26888133 0.25583666
## 079_AnnualReport_CenovusEnergy_2016.txt 0.24164150 0.22731370
## 080_AnnualReport_CenovusEnergy_2015.txt 0.31304111 0.30244282
## 081_AnnualReport_CenovusEnergy_2014.txt 0.30275408 0.29168869
## 082_AnnualReport_CenovusEnergy_2013.txt 0.33820487 0.32857375
## 083_AnnualReport_CenovusEnergy_2012.txt 0.33408204 0.32510425
## 084_AnnualReport_CenovusEnergy_2011.txt 0.34416673 0.33398715
## 194_AnnualReport_IndustrialAlliance_2010.txt -1.19839036 -1.21652211
## 195_AnnualReport_IndustrialAlliance_2011.txt -1.28157615 -1.29751150
## 196_AnnualReport_IndustrialAlliance_2012.txt -1.28666136 -1.30408871
## 197_AnnualReport_IndustrialAlliance_2013.txt -1.24826718 -1.26584102
## 198_AnnualReport_IndustrialAlliance_2014.txt -1.28476852 -1.30238790
## 199_AnnualReport_IndustrialAlliance_2015.txt -1.29349448 -1.31122121
## 200_AnnualReport_IndustrialAlliance_2016.txt -1.26842413 -1.28637099
## 201_AnnualReport_IndustrialAlliance_2017.txt -1.22840294 -1.24791325
## 202_AnnualReport_IndustrialAlliance_2018.txt -1.15401351 -1.17471677
## 203_AnnualReport_IndustrialAlliance_2019.txt -3.67990885 -3.68426143
## 247_AnnualReport_Shell_2020.txt 0.95724112 0.95713263
## 248_AnnualReport_Shell_2019.txt 0.96093289 0.96082503
## 249_AnnualReport_Shell_2018.txt 0.96826551 0.96816049
## 250_AnnualReport_Shell_2017.txt 0.97334566 0.97324039
## 251_AnnualReport_Shell_2016.txt 0.93203354 0.93188327
## 252_AnnualReport_Shell_2015.txt 0.90202954 0.90183738
## 253_AnnualReport_Shell_2014.txt 0.89945094 0.89925197
## 254_AnnualReport_Shell_2013.txt 0.89821332 0.89800830
## 255_AnnualReport_Shell_2012.txt 0.89684307 0.89663863
## 256_AnnualReport_Shell_2011.txt 0.89745381 0.89724871
## upr
## 054_AnnualReport_HuskyEnergy_2019.txt 0.34322062
## 055_AnnualReport_HuskyEnergy_2018.txt 0.32540815
## 056_AnnualReport_HuskyEnergy_2017.txt 0.30869273
## 057_AnnualReport_HuskyEnergy_2016.txt 0.33366060
## 058_AnnualReport_HuskyEnergy_2015.txt 0.32310824
## 071_AnnualReport_HuskyEnergy_2013.txt 0.30632811
## 072_AnnualReport_HuskyEnergy_2012.txt 0.31971080
## 073_AnnualReport_HuskyEnergy_2010.txt 0.41141814
## 073_AnnualReport_HuskyEnergy_2011.txt 0.06517546
## 075_AnnualReport_CenovusEnergy_2020.txt 0.30336718
## 076_AnnualReport_CenovusEnergy_2019.txt 0.28901440
## 077_AnnualReport_CenovusEnergy_2018.txt 0.30223071
## 078_AnnualReport_CenovusEnergy_2017.txt 0.28192599
## 079_AnnualReport_CenovusEnergy_2016.txt 0.25596930
## 080_AnnualReport_CenovusEnergy_2015.txt 0.32363939
## 081_AnnualReport_CenovusEnergy_2014.txt 0.31381947
## 082_AnnualReport_CenovusEnergy_2013.txt 0.34783598
## 083_AnnualReport_CenovusEnergy_2012.txt 0.34305983
## 084_AnnualReport_CenovusEnergy_2011.txt 0.35434630
## 194_AnnualReport_IndustrialAlliance_2010.txt -1.18025862
## 195_AnnualReport_IndustrialAlliance_2011.txt -1.26564081
## 196_AnnualReport_IndustrialAlliance_2012.txt -1.26923401
## 197_AnnualReport_IndustrialAlliance_2013.txt -1.23069333
## 198_AnnualReport_IndustrialAlliance_2014.txt -1.26714915
## 199_AnnualReport_IndustrialAlliance_2015.txt -1.27576776
## 200_AnnualReport_IndustrialAlliance_2016.txt -1.25047726
## 201_AnnualReport_IndustrialAlliance_2017.txt -1.20889264
## 202_AnnualReport_IndustrialAlliance_2018.txt -1.13331025
## 203_AnnualReport_IndustrialAlliance_2019.txt -3.67555627
## 247_AnnualReport_Shell_2020.txt 0.95734961
## 248_AnnualReport_Shell_2019.txt 0.96104076
## 249_AnnualReport_Shell_2018.txt 0.96837053
## 250_AnnualReport_Shell_2017.txt 0.97345093
## 251_AnnualReport_Shell_2016.txt 0.93218381
## 252_AnnualReport_Shell_2015.txt 0.90222170
## 253_AnnualReport_Shell_2014.txt 0.89964990
## 254_AnnualReport_Shell_2013.txt 0.89841835
## 255_AnnualReport_Shell_2012.txt 0.89704751
## 256_AnnualReport_Shell_2011.txt 0.89765891
require('quanteda.textmodels')
dfm_stopicmod <- dfm_trim(data_texts_dfm, min_termfreq = 500, max_docfreq = 10000)
library(topicmodels)
LDA_fit_20 <- convert(dfm_stopicmod, to = "topicmodels") %>%
LDA(k = 20)
# get top five terms per topic
get_terms(LDA_fit_20, 5)
## Topic 1 Topic 2 Topic 3 Topic 4 Topic 5 Topic 6 Topic 7 Topic 8
## [1,] "financi" "insur" "uif" "invest" "million" "oil" "shell" "n"
## [2,] "cost" "compani" "pg" "incom" "huski" "gas" "report" "e"
## [3,] "oil" "financi" "boe" "asset" "compani" "year" "director" "s"
## [4,] "oper" "risk" "jo" "rate" "decemb" "huski" "million" "o"
## [5,] "tax" "manag" "d" "liabil" "oper" "valu" "share" "d"
## Topic 9 Topic 10 Topic 11 Topic 12 Topic 13 Topic 14 Topic 15
## [1,] "oil" "oper" "shell" "compani" "compani" "oil" "compani"
## [2,] "compani" "asset" "compani" "financi" "oper" "cenovus" "financi"
## [3,] "price" "price" "share" "ia" "product" "product" "oil"
## [4,] "cash" "loss" "statement" "risk" "manag" "crude" "share"
## [5,] "million" "decemb" "financi" "valu" "million" "cash" "product"
## Topic 16 Topic 17 Topic 18 Topic 19 Topic 20
## [1,] "compani" "financi" "share" "shell" "shell"
## [2,] "industri" "asset" "report" "gas" "compani"
## [3,] "allianc" "cash" "interest" "report" "product"
## [4,] "servic" "decemb" "gas" "share" "million"
## [5,] "valu" "risk" "million" "compani" "financi"
#The identified groups of topics.