library(pdftools);library(tidyverse);library(data.table)
## Warning: package 'pdftools' was built under R version 4.0.2
## Using poppler version 0.73.0
## -- Attaching packages -------------------------------------- tidyverse 1.3.0 --
## <U+221A> ggplot2 3.3.1     <U+221A> purrr   0.3.4
## <U+221A> tibble  3.0.1     <U+221A> dplyr   1.0.0
## <U+221A> tidyr   1.1.0     <U+221A> stringr 1.4.0
## <U+221A> readr   1.3.1     <U+221A> forcats 0.5.0
## -- Conflicts ----------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
library(splitstackshape);library(tidytext);library(topicmodels)
## Warning: package 'splitstackshape' was built under R version 4.0.2
## Warning: package 'tidytext' was built under R version 4.0.2
## Warning: package 'topicmodels' was built under R version 4.0.2

ป้อน text หน้าตาอะไรคล้ายๆประมาณนี้ ทางที่ดีตัด part คำที่ไม่จำเปนออกก่อนก้ได้ เช่น ชื่อหัวข้อการประชุม วันที่ ลายเซ็นองค์กร บลาๆ

#ป้อน input หน้าตาคล้ายๆแบบนี้
Text <- readRDS("C:\\Few_pila\\MPR_BOT\\MPR_data.rds")
head(Text,3)
## # A tibble: 3 x 2
##   date        Text                                                              
##   <chr>       <chr>                                                             
## 1 2017_Q1.pdf Advanced economies continued to recover. This was particularly th~
## 2 2017_Q2.pdf The global economy was projected to continue expanding. The U.S. ~
## 3 2017_Q3.pdf The global economy was projected to continue expanding. Advanced ~

#แยกจาก Text ใหญ่ๆ แบ่งด้วยการจบประโยค (end of sentence >> .)

#preprocess ***
#สำคัญที่สุด การแบ่ง text ด้วยประโยค (ลงท้ายด้วย.)
Text$Text <- gsub('([a-z])([a-z])(\\.)','\\1\\2$~',Text$Text)
Text <- cSplit(Text,'Text','$~','long')
Text <- Text %>% ungroup()
Text$Text <- as.character(Text$Text)
Text$Text[1:5]
## [1] "Advanced economies continued to recover$"                                                                                                                                                                                                                                                                                                 
## [2] "This was particularly the case for the U.S. economy where labor market conditions strengthened with investment poised to increase$"                                                                                                                                                                                                       
## [3] "The euro area economies gained traction through domestic consumption, while the Japanese economy continued to recover at a gradual pace$"                                                                                                                                                                                                 
## [4] "The Chinese economy was expected to slow down as a result of the ongoing economic reforms; however, the economic recovery in other Asian economies was expected to continue, driven by exports, especially those of electronic goods which benefited from the technological upcycle and the uptrend of Internet of Things (IoT) products$"
## [5] "Overall, trading partner growth was revised upward$"
#clean_stopword func
preprocess <- function(text_data){
  temp_text = c()
  for (i in text_data){
    i1 = str_split(i,' ') %>% unlist()
    #del stopwords
    i1 = i1[!(tolower(i1) %in% stop_words$word)]
    #combine
    i1 = paste(i1,collapse = ' ')
    temp_text <- append(temp_text,i1)
  }
  return(temp_text)
}
#clean_text
Text$Text <- str_squish(Text$Text)
Text$Text = paste(Text$Title,Text$Text)
Text$Text <- gsub("[0-9]+","",Text$Text)
Text$Text <- gsub("\\s*\\([^\\)]+\\)","",Text$Text)
Text$Text <- gsub("'s|´s|'|’s","",Text$Text)
Text$Text <- gsub("[[:punct:]]+"," ",Text$Text)
Text$Text <- gsub("\\s+"," ",Text$Text)
Text$Text <- preprocess(Text$Text)
Text$Text <- tolower(Text$Text)
Text$Text <- textstem::lemmatize_strings(Text$Text)
Text$Text[1:5]
## [1] "advance economy continue recover"                                                                                                                                                         
## [2] "economy labor market condition strengthen investment poise increase"                                                                                                                      
## [3] "euro economy gain traction domestic consumption japanese economy continue recover gradual pace"                                                                                           
## [4] "chinese economy expect slow result ongoing economic reform economic recovery asian economy expect continue drive export electronic benefit technological upcycle uptrend internet product"
## [5] "trade partner growth revise upward"
Text$doc_id <- 1:nrow(Text)
colnames(Text)
## [1] "date"   "Text"   "doc_id"

#จัดเป็น DTM เพื่อเอาเข้าโมเดล Topic Modeling

count_word <- Text %>% unnest_tokens(input = 'Text',output = 'Terms',
                                     token = 'ngrams' ,n = 3,n_min = 2) %>%
  count(Terms,doc_id)
count_word <- count_word[!(grepl('quarter|million|billion|trillion|due|previous|prior|percent',count_word$Terms)),]
count_word <- count_word[!(grepl('committee|due|monetary policy|thai|ahead',count_word$Terms)),]
count_word <- count_word[!(grepl('[0-9]',count_word$Terms)),]
count_word <- count_word[!(is.na(count_word$Terms)),]
count_word
##                            Terms doc_id n
##     1:        ability government    905 1
##     2: ability government agency    905 1
##     3:         ability household    822 1
##     4:         ability household    885 1
##     5:         ability household    951 1
##    ---                                   
## 23288:       yield seek behavior     58 1
## 23289:           yield snapbacks    444 1
## 23290:    yield snapbacks affect    444 1
## 23291:              yield tenure     86 1
## 23292:      yield tenure decline     86 1
the_dtm <- cast_dtm(count_word,doc_id,Terms,n)
the_dtm
## <<DocumentTermMatrix (documents: 1074, terms: 14153)>>
## Non-/sparse entries: 23292/15177030
## Sparsity           : 100%
## Maximal term length: 45
## Weighting          : term frequency (tf)

MODELING

ในส่วนนี้ให้ลองเพิ่ม/ลด k ลองดูกับดาต้า ปรับ alpha,delta (ในช่วง 0.01 - 2) ปรับรวมกับ k จนกว่าจะได้โมเดล ลองผิดลองถูกไปเรื่อยๆจนได้โมเดลที่เราชอบ

model = LDA(the_dtm,k = 12,
             method = 'Gibbs',
             control = list(alpha = 0.1,delta = 2,seed = 999))
#สร้าง function ไว้พลอตเล่นๆ
def <- function(k=8,alpha=0.1,delta=0.3){
  model = LDA(the_dtm,k = k,
               method = 'Gibbs',
               control = list(alpha = alpha,delta = delta,seed = 999))
  all_topics = tidy(model,matrix = 'beta')
  all_topics$beta <- all_topics$beta*1000
  all_top_terms <- all_topics %>% arrange(desc(beta)) %>%
    group_by(topic) %>% filter(row_number() <= 10)
  
  x = all_top_terms %>%
    mutate(term = reorder_within(term, beta, topic)) %>%
    ggplot(aes(term, beta, fill = factor(topic))) +
    geom_col(show.legend = FALSE) +
    facet_wrap(~ topic, scales = "free") +
    coord_flip() +
    scale_x_reordered() +ggdark::dark_theme_grey()
  return(x)
}
#def(k=12,alpha = 0.1,delta = 2) #great ***
image

image

เตรียมดาต้าเข้าโมเดล

เพราะเราแบ่งข้อมูลเปนสองส่วนคือ Text อันแรกพี่ใช้เพื่อทำโมเดลโดยเฉพาะ ตัว doc_id = number(แยกเอกสารด้วยเลขมั่วๆจนครบ) ส่วน input ของเราคือการแปลง Text ให้อยู่ในรูปเอกสารดั้งเดิมของมัน เช่น 2017-Q1

#prepare input
input = Text
count_word_input <- input %>% unnest_tokens(input = 'Text',output = 'Terms',
                                     token = 'ngrams' ,n = 3,n_min = 2) %>%
  count(Terms,date)#เปลี่ยนตรงนี้
count_word_input
##                         Terms        date n
##     1:             4 continue 2018_Q2.pdf 1
##     2:      4 continue excess 2018_Q2.pdf 1
##     3:           4 government 2020_Q1.pdf 1
##     4: 4 government implement 2020_Q1.pdf 1
##     5:           4 oversupply 2018_Q1.pdf 1
##    ---                                     
## 24003:                   <NA> 2019_Q1.pdf 1
## 24004:                   <NA> 2019_Q2.pdf 1
## 24005:                   <NA> 2019_Q3.pdf 1
## 24006:                   <NA> 2019_Q4.pdf 1
## 24007:                   <NA> 2020_Q2.pdf 1
input_dtm <- cast_dtm(count_word_input,date,Terms,n)
model <- readRDS('C:\\Few_pila\\MPR_BOT\\lda_model.rds') #from my macbook
#predict with our model
test_lda <- posterior(model,input_dtm)

classifier <- function(test_lda){
  test_topic <- data.table(test_lda[['topics']] %>% as.matrix(),
                           'doc' = row.names(test_lda[['topics']] %>% 
                                               as.matrix()))
  test_topic$doc_num <- test_topic$doc  %>% as.character() %>%
    gsub('_Q|.pdf',"",.) %>% as.numeric() 
  test_topic <- test_topic %>% arrange(doc_num) %>% select(-doc_num)
  return(test_topic)
}

result <- classifier(test_lda = test_lda)
colnames(result) <- c(paste0('num',c(1:12)),'doc')
result
##             num1         num2         num3         num4      num5         num6
##  1: 7.551729e-05 7.551729e-05 2.855309e-01 7.551729e-05 0.1073101 7.551729e-05
##  2: 1.288185e-03 6.134217e-05 5.588271e-02 6.134217e-05 0.4024660 6.134217e-05
##  3: 4.959825e-05 9.775816e-02 1.042059e-01 4.513441e-03 0.2053864 3.025494e-03
##  4: 6.034275e-05 2.227251e-01 8.514362e-02 6.034275e-05 0.2553102 6.637702e-04
##  5: 5.248793e-05 2.777136e-01 2.614424e-01 5.773672e-04 0.3113059 5.248793e-05
##  6: 1.591652e-01 1.313208e-02 1.395488e-01 5.448997e-05 0.5335113 5.448997e-05
##  7: 2.398412e-01 3.145627e-03 2.089006e-01 5.156766e-05 0.4724113 5.156766e-05
##  8: 1.071476e-01 6.692545e-05 6.692545e-05 6.692545e-05 0.7235310 6.692545e-05
##  9: 3.346806e-01 1.066790e-02 6.626027e-05 6.626027e-05 0.5897827 7.354890e-03
## 10: 1.806843e-01 1.057116e-01 6.815703e-05 6.815703e-05 0.5153353 3.482824e-02
## 11: 2.282007e-01 4.980937e-03 6.149305e-05 6.149305e-05 0.5036896 6.149305e-05
## 12: 1.267542e-01 1.462448e-01 6.178534e-02 6.496881e-05 0.4256107 6.496881e-05
## 13: 1.083772e-01 1.008640e-01 2.473704e-01 6.260957e-05 0.3124843 6.260957e-05
## 14: 1.560537e-01 5.324247e-05 6.128208e-02 5.324247e-05 0.4526142 1.118092e-03
##             num7         num8         num9        num10        num11
##  1: 1.971001e-02 7.551729e-05 7.551729e-05 7.551729e-05 4.818758e-01
##  2: 6.134217e-05 6.134217e-05 6.134217e-05 6.134217e-05 4.257760e-01
##  3: 4.959825e-05 4.959825e-05 4.959825e-05 4.959825e-05 2.768079e-01
##  4: 6.034275e-05 6.034275e-05 6.034275e-05 6.034275e-05 1.032464e-01
##  5: 5.248793e-05 5.773672e-04 5.248793e-05 5.248793e-05 9.453076e-02
##  6: 4.413688e-03 5.448997e-05 5.448997e-05 5.448997e-05 1.499019e-01
##  7: 5.156766e-05 5.156766e-05 5.672442e-04 5.156766e-05 7.379332e-02
##  8: 6.692545e-05 6.692545e-05 6.692545e-05 6.692545e-05 5.695355e-02
##  9: 1.331831e-02 6.626027e-05 6.626027e-05 1.391466e-03 6.626027e-05
## 10: 6.815703e-05 6.815703e-05 6.815703e-05 6.815703e-05 1.301799e-02
## 11: 6.149305e-05 6.764236e-04 6.149305e-05 6.149305e-05 2.263559e-01
## 12: 6.496881e-05 6.496881e-05 6.496881e-05 6.496881e-05 7.023129e-02
## 13: 3.449787e-02 6.260957e-05 1.314801e-03 6.260957e-05 1.947784e-01
## 14: 2.359174e-01 1.118092e-03 5.324247e-05 5.324247e-05 9.163028e-02
##            num12         doc
##  1: 1.050446e-01 2017_Q1.pdf
##  2: 1.141578e-01 2017_Q2.pdf
##  3: 3.080548e-01 2017_Q3.pdf
##  4: 3.325489e-01 2017_Q4.pdf
##  5: 5.359017e-02 2018_Q1.pdf
##  6: 5.448997e-05 2018_Q2.pdf
##  7: 1.082921e-03 2018_Q3.pdf
##  8: 1.118324e-01 2018_Q4.pdf
##  9: 4.247283e-02 2019_Q1.pdf
## 10: 1.500136e-01 2019_Q2.pdf
## 11: 3.572746e-02 2019_Q3.pdf
## 12: 1.689839e-01 2019_Q4.pdf
## 13: 6.260957e-05 2020_Q1.pdf
## 14: 5.324247e-05 2020_Q2.pdf
#จัดกลุ่ม Topic ตามที่เราคิด (ไม่มีถูกผิดเพราะเป็น cluster)
topic_combine <- data.table('doc' = result$doc,
                            'monetary_policy' = result$num1,
                            'macro&fiscal' = result$num2,
                            'macro' = result$num4+
                              result$num8+result$num10+result$num6,
                            'exchange_rate' = result$num9,
                            'price&inflation' =result$num11,
                            'financial worry' = result$num12,
                            'financial' = result$num3,
                            'Trade' = result$num5,
                            'Tourism' = result$num7)
topic_combine[,c(2:10)] <- topic_combine[,c(2:10)] * 100
topic_combine
##             doc monetary_policy macro&fiscal      macro exchange_rate
##  1: 2017_Q1.pdf     0.007551729  0.007551729 0.03020692   0.007551729
##  2: 2017_Q2.pdf     0.128818550  0.006134217 0.02453687   0.006134217
##  3: 2017_Q3.pdf     0.004959825  9.775815891 0.76381311   0.004959825
##  4: 2017_Q4.pdf     0.006034275 22.272507845 0.08447985   0.006034275
##  5: 2018_Q1.pdf     0.005248793 27.771362587 0.12597103   0.005248793
##  6: 2018_Q2.pdf    15.916521360  1.313208370 0.02179599   0.005448997
##  7: 2018_Q3.pdf    23.984117162  0.314562706 0.02062706   0.056724422
##  8: 2018_Q4.pdf    10.714763753  0.006692545 0.02677018   0.006692545
##  9: 2019_Q1.pdf    33.468062550  1.066790353 0.88788762   0.006626027
## 10: 2019_Q2.pdf    18.068429662 10.571155943 3.50327154   0.006815703
## 11: 2019_Q3.pdf    22.820071332  0.498093715 0.08609027   0.006149305
## 12: 2019_Q4.pdf    12.675415800 14.624480249 0.02598753   0.006496881
## 13: 2020_Q1.pdf    10.837716003 10.086401202 0.02504383   0.131480090
## 14: 2020_Q2.pdf    15.605366841  0.005324247 0.23426685   0.005324247
##     price&inflation financial worry    financial    Trade      Tourism
##  1:    48.187584957    10.504455520 28.553088657 10.73101  1.971001359
##  2:    42.577597841    11.415777205  5.588271378 40.24660  0.006134217
##  3:    27.680785636    30.805475647 10.420593195 20.53864  0.004959825
##  4:    10.324643978    33.254887762  8.514361574 25.53102  0.006034275
##  5:     9.453075793     5.359017426 26.144236826 31.13059  0.005248793
##  6:    14.990191805     0.005448997 13.954882302 53.35113  0.441368788
##  7:     7.379331683     0.108292079 20.890057756 47.24113  0.005156766
##  8:     5.695355374    11.183241869  0.006692545 72.35310  0.006692545
##  9:     0.006626027     4.247283329  0.006626027 58.97827  1.331831434
## 10:     1.301799346    15.001363141  0.006815703 51.53353  0.006815703
## 11:    22.635592178     3.572746280  0.006149305 50.36896  0.006149305
## 12:     7.023128898    16.898388773  6.178534304 42.56107  0.006496881
## 13:    19.477836213     0.006260957 24.737039820 31.24843  3.449787127
## 14:     9.163028431     0.005324247  6.128207859 45.26142 23.591736769

Sentiment Part

extract_side_words <- function(word_dum,input_text){
  num <- input[grepl(word_dum,input_text),]
  num$left <-gsub(paste0('(.*) (.*?) ',word_dum,' (.*?) (.*)'),'\\2',num$Text)
  num$right <-gsub(paste0('(.*) (.*?) ',word_dum,' (.*?) (.*)'),'\\3',num$Text)
  #left side
  for (i in 1:nrow(num)) {
    if(nchar(num$left[i]) > 20 ){
      word1 <- gsub(paste0('(.*?) ',word_dum,' (.*?) (.*)'),'\\1',num$Text[i])
      word2 <- gsub(paste0('(.*) (.*?) ',word_dum,' (.*)'),'\\2',num$Text[i])
      word3 <- gsub(paste0('(.*) (.*?) ',word_dum),'\\2',num$Text[i])
      
      score <- rbind(word1,word2,word3) %>% as.vector()
      score <- score %>%  data.frame('text' = .)%>% mutate(len = nchar(text)) %>% filter(len < 20)
      if(nrow(score) == 0){
        word1 <- gsub(paste0('(.*) (.*?) ',word_dum,' (.*?)'),'\\3',num$Text[i])
        word2 <- gsub(paste0('(.*?) ',word_dum,' (.*?) (.*)'),'\\1',num$Text[i])
        word3 <- gsub(paste0(word_dum,' (.*?) (.*)'),'\\1',num$Text[i])
        word4 <- gsub(paste0('(.*)',word_dum,' (.*)'),'\\2',num$Text[i])
        score <- rbind(word1,word2,word3,word4) %>% as.vector()
        score <- score %>%  data.frame('text' = .)%>% mutate(len = nchar(text)) %>% filter(len < 20)
        if(nrow(score) == 0){
          num$left[i] <- num$left[i]
        }else{
          num$left[i] <- score$text[1]
        }
      }else if (nrow(score) != 0){
        num$left[i] <- score$text[1]
      }
    }
  }
  #right side
  for (i in 1:nrow(num)) {
    if(nchar(num$right[i]) > 20 ){
      word1 <- gsub(paste0('(.*?) ',word_dum,' (.*?) (.*)'),'\\1',num$Text[i])
      word2 <- gsub(paste0('(.*) (.*?) ',word_dum,' (.*)'),'\\2',num$Text[i])
      word3 <- gsub(paste0('(.*) (.*?) ',word_dum),'\\2',num$Text[i])
      
      score <- rbind(word1,word2,word3) %>% as.vector()
      score <- score %>%  data.frame('text' = .)%>% mutate(len = nchar(text)) %>% filter(len < 20)
      if(nrow(score) == 0){
        word1 <- gsub(paste0('(.*) (.*?) ',word_dum,' (.*?)'),'\\3',num$Text[i])
        word2 <- gsub(paste0('(.*?) ',word_dum,' (.*?) (.*)'),'\\1',num$Text[i])
        word3 <- gsub(paste0(word_dum,' (.*?) (.*)'),'\\1',num$Text[i])
        word4 <- gsub(paste0('(.*)',word_dum,' (.*)'),'\\2',num$Text[i])
        score <- rbind(word1,word2,word3,word4) %>% as.vector()
        score <- score %>%  data.frame('text' = .)%>% mutate(len = nchar(text)) %>% filter(len < 20)
        if(nrow(score) == 0){
          num$right[i] <- num$right[i]
        }else{
          num$right[i] <- score$text[1]
        }
      }else if (nrow(score) != 0){
        num$right[i] <- score$text[1]
      }
    }
  }
  #check position
  word <- append(num$left,num$right) %>% unique()
  left <- data.table()
  for (i in 1:length(word)) {
    left_word <- paste(word[i],word_dum)
    count_left = sum(grepl(left_word,num$Text))
    if(count_left != 0){
      data_left <- data.table('side_word' = word[i],'direction' = 'left_side',
                              'count' = count_left)
      left <- rbind(left,data_left)
    }else{next}  
  }
  right <- data.table()
  for (i in 1:length(word)) {
    right_word <- paste(word_dum,word[i])
    count_right = sum(grepl(right_word,num$Text))
    if(count_right != 0){
      data_right <- data.table('side_word' = word[i],'direction' = 'right_side',
                               'count' = count_right)
      right <- rbind(right,data_right)
    }else{next} 
  }
  combine <- rbind(left,right)
  combine$target <- word_dum
  combine <- combine %>% select(target,side_word,direction,count) %>% arrange(desc(count))
  return(combine)
}

#แนวคิดคือเอาคำ ข้างซ้าย หรือข้างขวาของ keyword ที่เราสกัดออกมาจาก LDA มาทำ sentiment เช่น policy rate cut >> ลดอัตราดอกเบี้ยนโยบาย ส่งผลทางไหน

#example
extract_side_words('policy rate',input$Text)
##          target       side_word  direction count
##  1: policy rate         percent right_side    25
##  2: policy rate        maintain  left_side    16
##  3: policy rate           raise  left_side    13
##  4: policy rate             cut  left_side    13
##  5: policy rate      percentage right_side    11
##  6: policy rate             cut right_side     8
##  7: policy rate            real  left_side     5
##  8: policy rate            hold right_side     5
##  9: policy rate        increase right_side     4
## 10: policy rate            vote  left_side     3
## 11: policy rate          remain  left_side     3
## 12: policy rate      assessment  left_side     3
## 13: policy rate        increase  left_side     3
## 14: policy rate          reduce  left_side     3
## 15: policy rate         include  left_side     3
## 16: policy rate            line  left_side     3
## 17: policy rate          remain right_side     3
## 18: policy rate         support right_side     3
## 19: policy rate       unchanged right_side     3
## 20: policy rate             due right_side     3
## 21: policy rate microprudential right_side     3
## 22: policy rate            bank  left_side     2
## 23: policy rate            view  left_side     2
## 24: policy rate          expect  left_side     2
## 25: policy rate           lower  left_side     2
## 26: policy rate             low  left_side     2
## 27: policy rate            curb right_side     2
## 28: policy rate            half right_side     2
## 29: policy rate           close  left_side     1
## 30: policy rate         current  left_side     1
## 31: policy rate     unanimously  left_side     1
## 32: policy rate            rise  left_side     1
## 33: policy rate           ahead  left_side     1
## 34: policy rate   accommodative  left_side     1
## 35: policy rate        currency  left_side     1
## 36: policy rate   international  left_side     1
## 37: policy rate        negative  left_side     1
## 38: policy rate        purchase  left_side     1
## 39: policy rate            hold  left_side     1
## 40: policy rate            bank right_side     1
## 41: policy rate           raise right_side     1
## 42: policy rate             low right_side     1
## 43: policy rate         federal right_side     1
## 44: policy rate           march right_side     1
## 45: policy rate      government right_side     1
## 46: policy rate       stabilize right_side     1
## 47: policy rate            weak right_side     1
## 48: policy rate               4 right_side     1
## 49: policy rate        previous right_side     1
## 50: policy rate    continuously right_side     1
## 51: policy rate      additional right_side     1
## 52: policy rate        majority right_side     1
## 53: policy rate       september right_side     1
## 54: policy rate         october right_side     1
## 55: policy rate         decline right_side     1
## 56: policy rate       financial right_side     1
## 57: policy rate           range right_side     1
## 58: policy rate        announce right_side     1
## 59: policy rate        regional right_side     1
## 60: policy rate         compute right_side     1
## 61: policy rate       reduction right_side     1
##          target       side_word  direction count
#select our main word from each topic
all_topics = tidy(model,matrix = 'beta')
## Warning: `tbl_df()` is deprecated as of dplyr 1.0.0.
## Please use `tibble::as_tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
all_topics$beta <- all_topics$beta*1000
all_top_terms <- all_topics %>% arrange(desc(beta)) %>%
  group_by(topic) %>% filter(row_number() <= 100)
#top 100  เลือกคำอันดับtop 100
filtered_word <- all_top_terms %>% group_by(topic) %>% filter(beta > median(beta))
#filter only > median แล้วเลือก > median beta
#map to topic_name
group_topic <- function(topic_num){
  topic_num <- as.character(topic_num)
  topic_num <- str_split(topic_num,pattern = ',') %>% unlist()
  if(length(topic_num > 1)){
    topic_num = topic_num[1]
  }else{topic_num = topic_num}
  
  if(topic_num == '1'){
    value = 'monetary_policy'
  }else if(topic_num == '2'){
    value = 'macro&fiscal'
  }else if(topic_num == '4'|topic_num =='6'|
           topic_num == '8'|topic_num == '10'){
    value = 'macro'
  }else if(topic_num == '3'){
    value ='financial'
  }else if(topic_num == '9'){
    value = 'exchange_rate'
  }else if(topic_num == '5'){
    value = 'Trade'
  }else if(topic_num == '7'){
    value = 'Tourism'
  }else if(topic_num == '11'){
    value = 'price&inflation'
  }else if(topic_num == '12'){
    value = 'financial worry'
  }
  return(value)
}
filtered_word <- filtered_word %>% mutate(topic_name = group_topic(topic))
filtered_word
## # A tibble: 435 x 4
## # Groups:   topic [12]
##    topic term                 beta topic_name     
##    <int> <chr>               <dbl> <chr>          
##  1     1 policy rate          2.45 monetary_policy
##  2    12 financial stability  1.92 financial worry
##  3    11 headline inflation   1.71 price&inflation
##  4     5 trade partner        1.71 Trade          
##  5     1 central bank         1.50 monetary_policy
##  6     3 bond yield           1.39 financial      
##  7    11 oil price            1.32 price&inflation
##  8     5 merchandise export   1.22 Trade          
##  9     2 private consumption  1.19 macro&fiscal   
## 10    11 core inflation       1.19 price&inflation
## # ... with 425 more rows

ซึ่งในขั้นตอนนี้เราก็ต้องนั่งหาเองว่าคำไหนเปน sentiment ที่เราต้องการลองแยกๆ ไปทำใน excel แล้วค่อยเอากลับเข้ามาก้ได้

topic_name1 <- filtered_word %>% filter(topic_name == 'monetary_policy')
topic_side_word <- data.table()
for (i in topic_name1$term) {
  temp <- extract_side_words(i,input$Text)
  topic_side_word <- rbind(topic_side_word,temp)
}
topic_side_word
##                     target  side_word  direction count
##   1:           policy rate    percent right_side    25
##   2:           policy rate   maintain  left_side    16
##   3:           policy rate      raise  left_side    13
##   4:           policy rate        cut  left_side    13
##   5:           policy rate percentage right_side    11
##  ---                                                  
## 474:      regional central       rate  left_side     1
## 475: regional central bank    include right_side     2
## 476: regional central bank       ease right_side     2
## 477: regional central bank       hand  left_side     1
## 478: regional central bank       rate  left_side     1

after ไปแยกเปนตัวๆทำใน excel มา

#หลังจากค้นหา sentiment ได้ครบ
sentiment_bot <- read.csv('C:\\Few_pila\\MPR_BOT\\sentiment_bot-Sheet1.csv')
sentiment_bot_special <-read.csv('C:\\Few_pila\\MPR_BOT\\sentiment_bot_special-Sheet1.csv')
sentiment_bot
##        topic         terms direction
## 1          1      increase        up
## 2          1         raise        up
## 3          1          rise        up
## 4          1    facilitate        up
## 5          1       improve        up
## 6          1        expand        up
## 7          1     implement        up
## 8          1   improvement        up
## 9          1    outperform        up
## 10         1  acommodative        up
## 11         1      increase        up
## 12         1         raise        up
## 13         1       improve        up
## 14         1    relaxation        up
## 15         1      maintain   neutral
## 16         1        remain   neutral
## 17         1          hold   neutral
## 18         1           cut      down
## 19         1      volatile      down
## 20         1        weaken      down
## 21         1        dovish      down
## 22         1        reduce      down
## 23         1          weak      down
## 24         1     reduction      down
## 25         1  acommodative      down
## 26         2  acommodative        up
## 27         2      increase        up
## 28         2         raise        up
## 29         2       improve        up
## 30         2    relaxation        up
## 31         2        remain   neutral
## 32         2      maintain   neutral
## 33         2          hold   neutral
## 34         2          stay   neutral
## 35         2      decrease      down
## 36         2          fall      down
## 37         2      negative      down
## 38         2 vulnerability      down
## 39         2      negative      down
## 40         2           cut      down
## 41         2       decline      down
## 42         7        expand        up
## 43         7          rise        up
## 44         7       improve        up
## 45         7      increase        up
## 46         7          lead        up
## 47         7    relaxation        up
## 48         7   improvement        up
## 49         7     expansion        up
## 50         7      slowdown      down
## 51         7         covid      down
## 52         7    disruption      down
## 53         7         short      down
## 54         7  decreasedrop      down
## 55         7       fragile      down
## 56         7      pandemic      down
## 57         7      outbreak      down
## 58         7    decelerate      down
## 59         5      continue        up
## 60         5   improvement        up
## 61         5    outperform        up
## 62         5       sustain        up
## 63         5      strength        up
## 64         5     stimulate        up
## 65         5      stimulus        up
## 66         5        remain   neutral
## 67         5      maintain   neutral
## 68         5      slowdown      down
## 69         5     intensify      down
## 70         5      slowdown      down
## 71         5       tension      down
## 72         5   contraction      down
## 73         5      pandemic      down
## 74         5 protectionist      down
## 75         5         short      down
## 76         5       downard      down
## 77         5       decline      down
## 78         9    strengthen        up
## 79         9        remain   neutral
## 80         9    depreciate      down
## 81         3  accomodative        up
## 82         3      increase        up
## 83         3         raise        up
## 84         3          rise        up
## 85         3      addition        up
## 86         3       improve        up
## 87         3        remain   neutral
## 88         3          hold   neutral
## 89         3      unchange   neutral
## 90         3          stay   neutral
## 91         3           cut      down
## 92         3       decline      down
## 93         3          fall      down
## 94         3       tighten      down
## 95         3     reduction      down
## 96        11      increase        up
## 97        11          rise        up
## 98        11          pull        up
## 99        11       improve        up
## 100       11        upward        up
## 101       11        remain   neutral
## 102       11          hold   neutral
## 103       11     unchanged   neutral
## 104       11           low      down
## 105       11       decline      down
## 106       11      downward      down
## 107       11      negative      down
## 108       11          fall      down
## 109       11      downside      down
## 110       11      decrease      down
## 111       11    decelerate      down
## 112       11     reduction      down
## 113 4,6,8,10      continue        up
## 114 4,6,8,10     expansion        up
## 115 4,6,8,10    accelerate        up
## 116 4,6,8,10       achieve        up
## 117 4,6,8,10          gain        up
## 118 4,6,8,10     extension        up
## 119 4,6,8,10    continuous        up
## 120 4,6,8,10      increase        up
## 121 4,6,8,10        remain   neutral
## 122 4,6,8,10       concern      down
## 123 4,6,8,10 deterioration      down
## 124 4,6,8,10    disruption      down
## 125 4,6,8,10       tighten      down
## 126 4,6,8,10    facilitate      down
## 127 4,6,8,10          weak      down
## 128 4,6,8,10      decrease      down
## 129 4,6,8,10   contraction      down

อันนี้้ในกรณีที่เรารู้คำเต็มก็เอามาใส่ได้เลย

sentiment_bot_special 
##    topic                     special_word direction
## 1      1            accomodative monetary        up
## 2      2                  continue expand        up
## 3      2              trade protectionism      down
## 4     12      concern financial stability      down
## 5     12         risk financial stability      down
## 6     12       facilitate economic growth      down
## 7     12    pose vulnerabillity financial      down
## 8     12   build vulnerabillity financial      down
## 9     12 buildup vulnerabillity financial      down
## 10    12       consistent economic growth   neutral
## 11    12          remain inflation target   neutral
## 12    12       remain financial stability        up
## 13    12      improve financial stability        up
## 14    12       ensure financial stability        up
## 15    12    continutation economic growth        up
## 16    12         continue economic growth        up
## 17    12             rise economic growth        up
## 18    12       continuous economic growth        up
## 19    12        expansion economic growth        up
## 20    12             gain economic growth        up
## 21    12               remain pocket risk        up
## 22    12            rise inflation target        up
## 23    12        increase inflation target        up
sentiment_bot <- sentiment_bot  %>% group_by(topic) %>% mutate(topic_name = group_topic(topic))

sentiment_bot_special <- sentiment_bot_special  %>% group_by(topic) %>% mutate(topic_name = group_topic(topic))

sentiment_bot
## # A tibble: 129 x 4
## # Groups:   topic [8]
##    topic terms        direction topic_name     
##    <chr> <chr>        <chr>     <chr>          
##  1 1     increase     up        monetary_policy
##  2 1     raise        up        monetary_policy
##  3 1     rise         up        monetary_policy
##  4 1     facilitate   up        monetary_policy
##  5 1     improve      up        monetary_policy
##  6 1     expand       up        monetary_policy
##  7 1     implement    up        monetary_policy
##  8 1     improvement  up        monetary_policy
##  9 1     outperform   up        monetary_policy
## 10 1     acommodative up        monetary_policy
## # ... with 119 more rows

#สกัด key word + sentiment ไปหาใน input (เอกสารของเรา)

topic_name_x <- unique(filtered_word$topic_name)[1]
topic_word <- filtered_word %>% filter(topic_name == topic_name_x)
side_word <-data.table()
for (i in topic_word$term) {
  temp <- extract_side_words(i,input$Text)
  side_word <- rbind(side_word,temp)
}
direction_word <- sentiment_bot %>% filter(topic_name == topic_name_x)
colnames(direction_word)[2] <- 'side_word'
colnames(direction_word)[3] <- 'sentiment'
combine = direction_word %>% inner_join(side_word,by = 'side_word') %>% mutate(full_word = ifelse(direction == 'right_side',paste(target,side_word),
                            paste(side_word,target))) %>% ungroup()
combine
## # A tibble: 81 x 8
##    topic side_word sentiment topic_name   target   direction count full_word    
##    <chr> <chr>     <chr>     <chr>        <chr>    <chr>     <int> <chr>        
##  1 1     increase  up        monetary_po~ policy ~ right_si~     4 policy rate ~
##  2 1     increase  up        monetary_po~ policy ~ left_side     3 increase pol~
##  3 1     increase  up        monetary_po~ inflati~ left_side     1 increase inf~
##  4 1     increase  up        monetary_po~ bank re~ right_si~     1 bank region ~
##  5 1     increase  up        monetary_po~ build p~ left_side     2 increase bui~
##  6 1     increase  up        monetary_po~ central~ right_si~     1 central bank~
##  7 1     increase  up        monetary_po~ domesti~ right_si~     1 domestic dem~
##  8 1     raise     up        monetary_po~ policy ~ left_side    13 raise policy~
##  9 1     raise     up        monetary_po~ policy ~ right_si~     1 policy rate ~
## 10 1     raise     up        monetary_po~ central~ right_si~     3 central bank~
## # ... with 71 more rows
sentiment_word <- combine %>% select(full_word,sentiment,topic_name)
sentiment_word
## # A tibble: 81 x 3
##    full_word                    sentiment topic_name     
##    <chr>                        <chr>     <chr>          
##  1 policy rate increase         up        monetary_policy
##  2 increase policy rate         up        monetary_policy
##  3 increase inflation target    up        monetary_policy
##  4 bank region increase         up        monetary_policy
##  5 increase build policy        up        monetary_policy
##  6 central bank region increase up        monetary_policy
##  7 domestic demand increase     up        monetary_policy
##  8 raise policy rate            up        monetary_policy
##  9 policy rate raise            up        monetary_policy
## 10 central bank raise           up        monetary_policy
## # ... with 71 more rows
#by date
sentiment_count <- data.table()
for (i in unique(input$date)) {
  input_data <- input %>% filter(date == i)
  temp <- sentiment_word %>% group_by(full_word) %>% mutate(count = sum(grepl(full_word,input_data$Text)))
  temp$doc <- i
  sentiment_count <- rbind(sentiment_count,temp)
}
sentiment_count
##                          full_word sentiment      topic_name count         doc
##    1:         policy rate increase        up monetary_policy     0 2017_Q1.pdf
##    2:         increase policy rate        up monetary_policy     0 2017_Q1.pdf
##    3:    increase inflation target        up monetary_policy     0 2017_Q1.pdf
##    4:         bank region increase        up monetary_policy     0 2017_Q1.pdf
##    5:        increase build policy        up monetary_policy     0 2017_Q1.pdf
##   ---                                                                         
## 1130:         policy stance dovish      down monetary_policy     0 2020_Q2.pdf
## 1131: central bank maintain dovish      down monetary_policy     0 2020_Q2.pdf
## 1132:           reduce policy rate      down monetary_policy     0 2020_Q2.pdf
## 1133:             policy rate weak      down monetary_policy     0 2020_Q2.pdf
## 1134:        policy rate reduction      down monetary_policy     1 2020_Q2.pdf
sentiment_only <- sentiment_count %>% group_by(sentiment,doc) %>%
  summarise(count = sum(count)) %>% ungroup()
## `summarise()` regrouping output by 'sentiment' (override with `.groups` argument)
sentiment_only
## # A tibble: 42 x 3
##    sentiment doc         count
##    <chr>     <chr>       <int>
##  1 down      2017_Q1.pdf     1
##  2 down      2017_Q2.pdf     1
##  3 down      2017_Q3.pdf     1
##  4 down      2017_Q4.pdf     0
##  5 down      2018_Q1.pdf     0
##  6 down      2018_Q2.pdf     1
##  7 down      2018_Q3.pdf     2
##  8 down      2018_Q4.pdf     1
##  9 down      2019_Q1.pdf     0
## 10 down      2019_Q2.pdf     6
## # ... with 32 more rows
sentiment_ratio <- sentiment_only %>% spread(sentiment,count) %>%
  mutate(sentiment_ratio = (up-down)/(sum(up+down+neutral)) * 100)
sentiment_ratio #for one topic
## # A tibble: 14 x 5
##    doc          down neutral    up sentiment_ratio
##    <chr>       <int>   <int> <int>           <dbl>
##  1 2017_Q1.pdf     1       1     0          -0.483
##  2 2017_Q2.pdf     1       3     3           0.966
##  3 2017_Q3.pdf     1       4     6           2.42 
##  4 2017_Q4.pdf     0       4     9           4.35 
##  5 2018_Q1.pdf     0       4     9           4.35 
##  6 2018_Q2.pdf     1       5    22          10.1  
##  7 2018_Q3.pdf     2       9    23          10.1  
##  8 2018_Q4.pdf     1       3    10           4.35 
##  9 2019_Q1.pdf     0      10     5           2.42 
## 10 2019_Q2.pdf     6       7     2          -1.93 
## 11 2019_Q3.pdf    12       3     3          -4.35 
## 12 2019_Q4.pdf     9       6     1          -3.86 
## 13 2020_Q1.pdf     8       5     1          -3.38 
## 14 2020_Q2.pdf     2       4     1          -0.483

wrap it up

sentiment_data <- data.table()
for (name in unique(filtered_word$topic_name)) {
  topic_name_x = name  
  topic_word <- filtered_word %>% filter(topic_name == topic_name_x)
  side_word <-data.table()
  for (i in topic_word$term) {
    temp <- extract_side_words(i,input$Text)
    side_word <- rbind(side_word,temp)
  }
  direction_word <- sentiment_bot %>% filter(topic_name == topic_name_x)
  colnames(direction_word)[2] <- 'side_word'
  colnames(direction_word)[3] <- 'sentiment'
  combine = direction_word %>% inner_join(side_word,by = 'side_word') %>%
    mutate(full_word = ifelse(direction == 'right_side',paste(target,side_word),
                              paste(side_word,target))) %>% ungroup()
  
  sentiment_word <- combine %>% select(full_word,sentiment,topic_name)
  
  #combine with special case
  special_word <- sentiment_bot_special %>% filter(topic_name == topic_name_x)
  if(nrow(special_word) != 0){
    special_word <- special_word %>% ungroup() %>% select(special_word,direction,topic_name)
    colnames(special_word) <- c('full_word','sentiment','topic_name')
    sentiment_word <- rbind(sentiment_word,special_word)
  }else{sentiment_word = sentiment_word}
  
  #by date
  sentiment_count <- data.table()
  for (i in unique(input$date)) {
    input_data <- input %>% filter(date == i)
    temp <- sentiment_word %>% group_by(full_word) %>% 
      mutate(count = sum(grepl(full_word,input_data$Text)))
    temp$doc <- i
    sentiment_count <- rbind(sentiment_count,temp)
  }
  #combine count of sentiment
  sentiment_only <- sentiment_count %>% group_by(sentiment,doc) %>%
    summarise(count = sum(count)) %>% ungroup()
  
  if(grepl('neutral',sentiment_only$sentiment) == T){
  #sentiment ratio positive-negative/ (positive+negative+neutral)
  sentiment_ratio <- sentiment_only %>% spread(sentiment,count) %>%
    mutate(sentiment_ratio = (up-down)/(sum(up+down+neutral)) * 100)
  }else{
    sentiment_ratio <- sentiment_only %>% spread(sentiment,count) %>%
    mutate(sentiment_ratio = (up-down)/(sum(up+down)) * 100)
    sentiment_ratio$neutral = 0
    sentiment_ratio <- sentiment_ratio %>% select(doc,down,neutral,up,
                                                  sentiment_ratio)
  }
  
  sentiment_ratio$topic_name <- paste0('sentiment_',topic_name_x)
  #BIND
  sentiment_data <- rbind(sentiment_data,sentiment_ratio)
}
sentiment_data
##              doc down neutral up sentiment_ratio                topic_name
##   1: 2017_Q1.pdf    1       0  0      -0.7194245 sentiment_monetary_policy
##   2: 2017_Q2.pdf    1       0  3       1.4388489 sentiment_monetary_policy
##   3: 2017_Q3.pdf    1       0  6       3.5971223 sentiment_monetary_policy
##   4: 2017_Q4.pdf    0       0  9       6.4748201 sentiment_monetary_policy
##   5: 2018_Q1.pdf    0       0  9       6.4748201 sentiment_monetary_policy
##  ---                                                                      
## 122: 2019_Q2.pdf    5       0  0      -7.3529412         sentiment_Tourism
## 123: 2019_Q3.pdf    3       0  0      -4.4117647         sentiment_Tourism
## 124: 2019_Q4.pdf    0       0  0       0.0000000         sentiment_Tourism
## 125: 2020_Q1.pdf   11       0  1     -14.7058824         sentiment_Tourism
## 126: 2020_Q2.pdf   14       0  5     -13.2352941         sentiment_Tourism
#combine them together
col_word = colnames(topic_combine)[c(2:10)]
topic_sentiment <- data.table()
for (word in col_word) {
  key_word <- paste0('sentiment_',word)
  one_topic = sentiment_data %>% filter(topic_name == key_word) %>% select(sentiment_ratio)
  
  colnames(one_topic) = key_word
  topic_sentiment <- cbind(topic_sentiment,one_topic)
}
#final result
topic_modeling <- data.table(topic_combine,topic_sentiment)
topic_modeling <- topic_modeling %>% select(doc,monetary_policy,sentiment_monetary_policy,
                                            `macro&fiscal`,`sentiment_macro&fiscal`,
                                            macro,sentiment_macro,
                                            exchange_rate,sentiment_exchange_rate,
                                            `price&inflation`,`sentiment_price&inflation`,
                                            `financial worry`,`sentiment_financial worry`,
                                            financial,sentiment_financial,
                                            Trade,sentiment_Trade,
                                            Tourism,sentiment_Tourism)
topic_modeling
##             doc monetary_policy sentiment_monetary_policy macro&fiscal
##  1: 2017_Q1.pdf     0.007551729                -0.7194245  0.007551729
##  2: 2017_Q2.pdf     0.128818550                 1.4388489  0.006134217
##  3: 2017_Q3.pdf     0.004959825                 3.5971223  9.775815891
##  4: 2017_Q4.pdf     0.006034275                 6.4748201 22.272507845
##  5: 2018_Q1.pdf     0.005248793                 6.4748201 27.771362587
##  6: 2018_Q2.pdf    15.916521360                15.1079137  1.313208370
##  7: 2018_Q3.pdf    23.984117162                15.1079137  0.314562706
##  8: 2018_Q4.pdf    10.714763753                 6.4748201  0.006692545
##  9: 2019_Q1.pdf    33.468062550                 3.5971223  1.066790353
## 10: 2019_Q2.pdf    18.068429662                -2.8776978 10.571155943
## 11: 2019_Q3.pdf    22.820071332                -6.4748201  0.498093715
## 12: 2019_Q4.pdf    12.675415800                -5.7553957 14.624480249
## 13: 2020_Q1.pdf    10.837716003                -5.0359712 10.086401202
## 14: 2020_Q2.pdf    15.605366841                -0.7194245  0.005324247
##     sentiment_macro&fiscal      macro sentiment_macro exchange_rate
##  1:               3.846154 0.03020692        0.000000   0.007551729
##  2:               8.974359 0.02453687        7.608696   0.006134217
##  3:               8.974359 0.76381311        6.521739   0.004959825
##  4:              11.538462 0.08447985        6.521739   0.006034275
##  5:              16.666667 0.12597103       10.869565   0.005248793
##  6:               6.410256 0.02179599        3.260870   0.005448997
##  7:              -6.410256 0.02062706        4.347826   0.056724422
##  8:             -10.256410 0.02677018       -1.086957   0.006692545
##  9:               3.846154 0.88788762        3.260870   0.006626027
## 10:               2.564103 3.50327154       -4.347826   0.006815703
## 11:               1.282051 0.08609027        1.086957   0.006149305
## 12:               1.282051 0.02598753        1.086957   0.006496881
## 13:               0.000000 0.02504383       -1.086957   0.131480090
## 14:              -2.564103 0.23426685       -3.260870   0.005324247
##     sentiment_exchange_rate price&inflation sentiment_price&inflation
##  1:                 0.00000    48.187584957                 2.0270270
##  2:                 0.00000    42.577597841                -2.0270270
##  3:                 0.00000    27.680785636                -5.4054054
##  4:                 0.00000    10.324643978                 2.0270270
##  5:                 0.00000     9.453075793                 0.6756757
##  6:                33.33333    14.990191805                 5.4054054
##  7:                 0.00000     7.379331683                 2.7027027
##  8:               -33.33333     5.695355374                -2.7027027
##  9:                 0.00000     0.006626027                -2.0270270
## 10:                 0.00000     1.301799346                 0.0000000
## 11:                 0.00000    22.635592178                 4.0540541
## 12:                 0.00000     7.023128898                 1.3513514
## 13:               -33.33333    19.477836213                -4.0540541
## 14:                 0.00000     9.163028431                -2.0270270
##     financial worry sentiment_financial worry    financial sentiment_financial
##  1:    10.504455520                  3.448276 28.553088657           6.8376068
##  2:    11.415777205                 10.344828  5.588271378          -2.5641026
##  3:    30.805475647                 13.793103 10.420593195          -7.6923077
##  4:    33.254887762                 -6.896552  8.514361574           4.2735043
##  5:     5.359017426                 -3.448276 26.144236826           5.1282051
##  6:     0.005448997                  3.448276 13.954882302           9.4017094
##  7:     0.108292079                 10.344828 20.890057756          11.1111111
##  8:    11.183241869                  3.448276  0.006692545           4.2735043
##  9:     4.247283329                 -3.448276  0.006626027           3.4188034
## 10:    15.001363141                  0.000000  0.006815703          -0.8547009
## 11:     3.572746280                  3.448276  0.006149305          -4.2735043
## 12:    16.898388773                 10.344828  6.178534304          -5.1282051
## 13:     0.006260957                  0.000000 24.737039820          -5.9829060
## 14:     0.005324247                  0.000000  6.128207859          -5.1282051
##        Trade sentiment_Trade      Tourism sentiment_Tourism
##  1: 10.73101       1.9108280  1.971001359          0.000000
##  2: 40.24660       3.1847134  0.006134217          1.470588
##  3: 20.53864       6.3694268  0.004959825          2.941176
##  4: 25.53102       3.1847134  0.006034275          8.823529
##  5: 31.13059       3.8216561  0.005248793          4.411765
##  6: 53.35113       3.8216561  0.441368788          1.470588
##  7: 47.24113       1.2738854  0.005156766          2.941176
##  8: 72.35310      -3.1847134  0.006692545          1.470588
##  9: 58.97827      -4.4585987  1.331831434          1.470588
## 10: 51.53353      -9.5541401  0.006815703         -7.352941
## 11: 50.36896      -7.6433121  0.006149305         -4.411765
## 12: 42.56107      -3.1847134  0.006496881          0.000000
## 13: 31.24843      -0.6369427  3.449787127        -14.705882
## 14: 45.26142      -1.9108280 23.591736769        -13.235294
#topic_modeling %>% select()