1. CoreNLP

1.1 環境建置與套件安裝

library(dplyr)
library(stringr)
library(ggplot2)
library(xml2)
library(httr)
library(jsonlite)
library(NLP)
library(igraph)
library(sentimentr)
library(tidytext)
library(wordcloud2)
library(tidyr)
library(scales)
library(lubridate)
library(data.table)
library(data.tree)
load(file = "tweets_coreNLP.RData")

資料載入function

fun <- function(t){
  files <- list.files(path = t, pattern = "*.csv", recursive = TRUE) #檔案路徑
  df1 <- data.frame()
  df2 <- data.frame()
  
  for(file in files) {
    df1 <- read.csv(paste(t, file, sep="")) #讀進檔案
    df2 <- rbind(df2, df1) #合併多個檔案
  }
  return(df2)
}

資料整理function

從回傳的object中整理斷詞出結果,輸出為 tidydata 格式

coreNLP_tokens_parser <- function(coreNLP_objects){
  
  result <- do.call(rbind, lapply(coreNLP_objects, function(obj){
    original_data <- obj$data
    doc <- obj$doc
    # for a sentences
    sentences <- doc$sentences
   
    sen <- sentences[[1]]
    
    tokens <- do.call(rbind, lapply(sen$tokens, function(x){
      result <- data.frame(word=x$word, lemma=x$lemma, pos=x$pos, ner=x$ner)
      result
    }))
    
    tokens <- original_data %>%
      t() %>% 
      data.frame() %>% 
      select(-text) %>% 
      slice(rep(1:n(), each = nrow(tokens))) %>% 
      bind_cols(tokens)
    
    tokens
  }))
  return(result)
}

#從回傳的core-nlp object中整理出詞彙依存關係,輸出為 tidydata 格式

coreNLP_dependency_parser <- function(coreNLP_objects){
  result <- do.call(rbind, lapply(coreNLP_objects, function(obj){
    original_data <- obj$data
    doc <- obj$doc
    # for a sentences
    sentences <- doc$sentences
    sen <- sentences[[1]]
    dependencies <- do.call(rbind, lapply(sen$basicDependencies, function(x){
      result <- data.frame(dep=x$dep, governor=x$governor, governorGloss=x$governorGloss, dependent=x$dependent, dependentGloss=x$dependentGloss)
      result
    }))
  
    dependencies <- original_data %>%
      t() %>% 
      data.frame() %>% 
      select(-text) %>% 
      slice(rep(1:n(), each = nrow(dependencies))) %>% 
      bind_cols(dependencies)
    dependencies
  }))
  return(result)
}

#從回傳的core-nlp object中整理出語句情緒,輸出為 tidydata 格式

coreNLP_sentiment_parser <- function(coreNLP_objects){
  result <- do.call(rbind, lapply(coreNLP_objects, function(obj){
    original_data <- obj$data
    doc <- obj$doc
    # for a sentences
    sentences <- doc$sentences
    sen <- sentences[[1]]
    
    sentiment <- original_data %>%
      t() %>% 
      data.frame() %>% 
      bind_cols(data.frame(sentiment=sen$sentiment, sentimentValue=sen$sentimentValue))
  
    sentiment
  }))
  return(result)
}

#圖形化顯示dependency結果

parse2tree <- function(ptext) {
  stopifnot(require(NLP) && require(igraph))

  ## Replace words with unique versions
  ms <- gregexpr("[^() ]+", ptext)                                      # just ignoring spaces and brackets?
  words <- regmatches(ptext, ms)[[1]]                                   # just words
  regmatches(ptext, ms) <- list(paste0(words, seq.int(length(words))))  # add id to words

  ## Going to construct an edgelist and pass that to igraph
  ## allocate here since we know the size (number of nodes - 1) and -1 more to exclude 'TOP'
  edgelist <- matrix('', nrow=length(words)-2, ncol=2)

  ## Function to fill in edgelist in place
  edgemaker <- (function() {
    i <- 0                                       # row counter
    g <- function(node) {                        # the recursive function
      if (inherits(node, "Tree")) {            # only recurse subtrees
        if ((val <- node$value) != 'TOP1') { # skip 'TOP' node (added '1' above)
          for (child in node$children) {
            childval <- if(inherits(child, "Tree")) child$value else child
            i <<- i+1
            edgelist[i,1:2] <<- c(val, childval)
          }
        }
        invisible(lapply(node$children, g))
      }
    }
  })()

  ## Create the edgelist from the parse tree
  edgemaker(Tree_parse(ptext))
  tree <- FromDataFrameNetwork(as.data.frame(edgelist))
  return (tree)
}

用於資料清理

clean = function(txt) {
  txt = iconv(txt, "latin1", "ASCII", sub="") #轉換字符編碼
  txt = gsub("(@|#)\\w+", "", txt) #去除@或#後有數字,字母,底線 (標記人名或hashtag)
  txt = gsub("(http|https)://.*", "", txt) #去除網址
  txt = gsub("[ \t]{2,}", "", txt) #去除兩個以上空格或tab
  txt = gsub("\\n"," ",txt) #去除換行
  txt = gsub("\\s+"," ",txt) #去除一個或多個空格
  txt = gsub("^\\s+|\\s+$","",txt) #去除前後一個或多個空格
  txt = gsub("&.*;","",txt) #去除html特殊字元編碼
  txt = gsub("[^a-zA-Z0-9?!. ']","",txt) #除了字母,數字 ?!. ,空白的都去掉
  txt }

tweets資料整理

tw <- fread('./tweets.csv') %>% #讀進檔案
  mutate(from = "tweets") %>% #新增欄位分類
  select(screen_name, text, timestamp, from)
names(tw)[1] = "user" 
names(tw)[2] = "text"
names(tw)[3] = "date"
tw$text = clean(tw$text)
tw$date = as.Date(tw$date)
#data <- bind_rows(df, tw) #合併reddit和tweets的資料
data <- tw
data$id <- seq.int(nrow(data))
data <- data[,c(5,1,2,3,4)]

head(data)
##   id          user
## 1  1   Rebel_of_Oz
## 2  2  andresg_0122
## 3  3        Shugah
## 4  4         suv49
## 5  5 HowGeneration
## 6  6      PostFomo
##                                                                                                                                                                                                                                                                                  text
## 1                           he Democratic primaries have broken down into chaos after the party encountered its archnemesis headon for the first time basic math. While progressives were optimistic going into the primaries they'd forgotten that numbers counting and addition ...
## 2                  If Bernie isn't the nominee we don't get universal healthcare. If the Democratic party dies we don't get universal healthcare. They wanna rig primary elections then scold progressives for not backing the centrist in the general? LOL the party deserves to die
## 3                                                                    Well the way the democratic party is demoralizing voters rigging this primary and taking all of the bribes from Bloomberg the GOP won't need black votes because young black voters won't be voting in November.
## 4                      If the Democratic ? Party wont allow DEMOCRACY to prevail in the primaries wo sticking their grubby fingers into each 1 to make corrections resulting in THEIR choice then why have them?Just choose your candidate well stay home  you can lose the WH again.
## 5                                                                                                                                                                                                          1 in 4 chance the Democratic party collapses before the primaries are over
## 6 Does the DNC suck? Totally. But this isn't about the DNC. This is about whether the more moderate part of the Democratic party the actual voters are able to stop Bernie in the primary. If they are and I hope they aren't the responsible thing to do is vote for that candidate.
##         date   from
## 1 2020-02-05 tweets
## 2 2020-02-05 tweets
## 3 2020-02-05 tweets
## 4 2020-02-05 tweets
## 5 2020-02-05 tweets
## 6 2020-02-05 tweets

串接CoreNLP API

server端 : + 需先在terminal開啟corenlp server + 在corenlp的路徑下開啟terminal輸入 java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000

# 生產core-nlp的api url,可以設定斷詞依據、以及要標註的任務
generate_API_url <- function(host, port="9000",
                    tokenize.whitespace="false", annotators=""){ #斷詞依據不是空格
    url <- sprintf('http://%s:%s/?properties={"tokenize.whitespace":"%s","annotators":"%s"}',
                     host, port, tokenize.whitespace, annotators)
    url <- URLencode(url)
}
generate_API_url("127.0.0.1")
# 呼叫core-nlp api
call_coreNLP <- function(server_host, text, host="localhost", language="eng",
                    tokenize.whitespace="true", ssplit.eolonly="true", annotators=c("tokenize","ssplit","pos","lemma","ner","parse","sentiment")){
  # 假設有兩個core-nlp server、一個負責英文(使用9000 port)、另一個則負責中文(使用9001 port)
  port <- ifelse(language=="eng", 9000, 9001);
  # 產生api網址
  url <- generate_API_url(server_host, port=port,
                    tokenize.whitespace=tokenize.whitespace, annotators=paste0(annotators, collapse = ','))
  
  result <- POST(url, body = text, encode = "json")
  doc <- httr::content(result, "parsed","application/json",encoding = "UTF-8")
  return (doc)
}
host = "127.0.0.1"
coreNLP <- function(data,host){
  # 依序將每個文件丟進core-nlp進行處理,每份文件的回傳結果為json格式
  # 在R中使用objects來儲存處理結果
  result <- apply(data, 1 , function(x){
    #object <- call_coreNLP(host, x['text'])
    object <- tryCatch({
        output <- call_coreNLP(host, x['text'])
    }, error = function(e) {
      print("error occur here")
      print(x['text'])
    })
    
    list(doc=object, data=x)
  })
  
  return(result)
}

取得coreNLP回傳的物件

gc() #釋放不使用的記憶體

t0 = Sys.time()
obj = data %>% filter(text != "") %>% coreNLP(host) #twitter文章
#丟入coreNLP的物件 必須符合: 是一個data.frame 有一個text欄位
Sys.time() - t0 #執行時間
#Time difference of 14 mins

save.image("tweets_coreNLP.RData")

#load twitter

load(file = "tweets_coreNLP.RData")
tokens
tokens =  coreNLP_tokens_parser(obj)
head(tokens)
##     id        user       date   from       word      lemma pos      ner
## 1    1 Rebel_of_Oz 2020-02-05 tweets         he         he PRP        O
## 2    1 Rebel_of_Oz 2020-02-05 tweets Democratic democratic  JJ IDEOLOGY
## 3    1 Rebel_of_Oz 2020-02-05 tweets  primaries    primary NNS        O
## 4    1 Rebel_of_Oz 2020-02-05 tweets       have       have VBP        O
## 5    1 Rebel_of_Oz 2020-02-05 tweets     broken      break VBN        O
## 6    1 Rebel_of_Oz 2020-02-05 tweets       down       down  RP        O

#辨識出哪幾種類型的實體

levels(tokens$ner)
##  [1] "O"                 "IDEOLOGY"          "ORDINAL"          
##  [4] "PERSON"            "ORGANIZATION"      "NUMBER"           
##  [7] "STATE_OR_PROVINCE" "DATE"              "TITLE"            
## [10] "COUNTRY"           "DURATION"          "MISC"             
## [13] "NATIONALITY"       "CRIMINAL_CHARGE"   "RELIGION"         
## [16] "LOCATION"          "SET"               "CITY"             
## [19] "CAUSE_OF_DEATH"    "TIME"              "URL"              
## [22] "MONEY"             "PERCENT"

#除去entity為Other,有多少種word有被標註entity

length(unique(tokens$word[tokens$ner != "O"])) 
## [1] 3290

#在Twitter上談論民主黨初選議題,所涉及的人物

tokens %>%
  filter(ner == "PERSON") %>%  #篩選NER為PERSION
  group_by(word) %>% #根據word分組
  summarize(count = n()) %>% #計算每組
  top_n(n = 10, count) %>%
  ungroup() %>% 
  mutate(word = reorder(word, count)) %>%
  ggplot(aes(word, count)) + 
  geom_col()+
  ggtitle("Word Frequency (NER is PERSON)") +
  theme(text=element_text(size=14))+
  coord_flip()

#所涉及的國家

tokens %>%
  filter(ner == "COUNTRY") %>%  #篩選NER為COUNTRY
  group_by(word) %>% #根據word分組
  summarize(count = n()) %>% #計算每組
  top_n(n = 10, count) %>%
  ungroup() %>% 
  mutate(word = reorder(word, count)) %>%
  ggplot(aes(word, count)) + 
  geom_col()+
  ggtitle("Word Frequency (NER is COUNTRY)") +
  theme(text=element_text(size=14))+
  coord_flip()

#所涉及的思想

tokens %>%
  filter(ner == "IDEOLOGY") %>%  #篩選NER為IDEOLOGY
  group_by(word) %>% #根據word分組
  summarize(count = n()) %>% #計算每組
  top_n(n = 10, count) %>%
  ungroup() %>% 
  mutate(word = reorder(word, count)) %>%
  ggplot(aes(word, count)) + 
  geom_col()+
  ggtitle("Word Frequency (NER is IDEOLOGY)") +
  theme(text=element_text(size=14))+
  coord_flip()

dependencies = coreNLP_dependency_parser(obj)
head(dependencies)
##     id        user       date   from          dep governor governorGloss
## 1    1 Rebel_of_Oz 2020-02-05 tweets         ROOT        0          ROOT
## 2    1 Rebel_of_Oz 2020-02-05 tweets        nsubj        5        broken
## 3    1 Rebel_of_Oz 2020-02-05 tweets         amod        3     primaries
## 4    1 Rebel_of_Oz 2020-02-05 tweets          dep        1            he
## 5    1 Rebel_of_Oz 2020-02-05 tweets          aux        5        broken
## 6    1 Rebel_of_Oz 2020-02-05 tweets compound:prt        5        broken
##   dependent dependentGloss
## 1         5         broken
## 2         1             he
## 3         2     Democratic
## 4         3      primaries
## 5         4           have
## 6         6           down

#視覺化Dependency Tree

#parse_tree <- obj[[400]]$doc[[1]][[1]]$parse
#tree <- parse2tree(parse_tree)
#SetNodeStyle(tree, style = "filled,rounded", shape = "box")
#plot(tree)

sentiment

sentiment = coreNLP_sentiment_parser(obj)
head(sentiment)
##     id          user
## 1    1   Rebel_of_Oz
## 2    2  andresg_0122
## 3    3        Shugah
## 4    4         suv49
## 5    5 HowGeneration
## 6    6      PostFomo
##                                                                                                                                                                                                                                                                                  text
## 1                           he Democratic primaries have broken down into chaos after the party encountered its archnemesis headon for the first time basic math. While progressives were optimistic going into the primaries they'd forgotten that numbers counting and addition ...
## 2                  If Bernie isn't the nominee we don't get universal healthcare. If the Democratic party dies we don't get universal healthcare. They wanna rig primary elections then scold progressives for not backing the centrist in the general? LOL the party deserves to die
## 3                                                                    Well the way the democratic party is demoralizing voters rigging this primary and taking all of the bribes from Bloomberg the GOP won't need black votes because young black voters won't be voting in November.
## 4                      If the Democratic ? Party wont allow DEMOCRACY to prevail in the primaries wo sticking their grubby fingers into each 1 to make corrections resulting in THEIR choice then why have them?Just choose your candidate well stay home  you can lose the WH again.
## 5                                                                                                                                                                                                          1 in 4 chance the Democratic party collapses before the primaries are over
## 6 Does the DNC suck? Totally. But this isn't about the DNC. This is about whether the more moderate part of the Democratic party the actual voters are able to stop Bernie in the primary. If they are and I hope they aren't the responsible thing to do is vote for that candidate.
##         date   from sentiment sentimentValue
## 1 2020-02-05 tweets  Negative              1
## 2 2020-02-05 tweets  Negative              1
## 3 2020-02-05 tweets  Negative              1
## 4 2020-02-05 tweets   Neutral              2
## 5 2020-02-05 tweets  Negative              1
## 6 2020-02-05 tweets  Negative              1

#語句情緒值

levels(sentiment$sentiment)
## [1] "Negative"     "Neutral"      "Positive"     "Verynegative" "Verypositive"

#平均情緒分數時間趨勢

#tw$date = as.Date(tw$date)
sentiment$sentimentValue = as.numeric(sentiment$sentimentValue) 
sentiment$date = as.Date(sentiment$date)

sentiment %>% 
#  merge(data[,c("id")]) %>%
  group_by(date) %>% 
  summarise(avg_sentiment = mean(sentimentValue,na.rm=T)) %>% 
  ggplot(aes(x=date,y=avg_sentiment)) + 
  geom_line()

tweets_partial <- sentiment
tweets_partial$candidate<-ifelse(grepl("Andrew",tweets_partial$text, ignore.case = T),"Andrew Yang",
              ifelse(grepl("Yang",tweets_partial$text, ignore.case = T),"Andrew Yang",
              ifelse(grepl("Michael",tweets_partial$text, ignore.case = T),"Michael Bloomberg",
                     ifelse(grepl("Bloomberg",tweets_partial$text, ignore.case = T),"Michael Bloomberg",
              ifelse(grepl("Biden",tweets_partial$text, ignore.case = T),"Joe Biden",
              ifelse(grepl("Joe",tweets_partial$text, ignore.case = T),"Joe Biden",
              ifelse(grepl("Bernie",tweets_partial$text, ignore.case = T),"Bernie Sanders",
              ifelse(grepl("Sanders",tweets_partial$text, ignore.case = T),"Bernie Sanders",
              ifelse(grepl("Warren",tweets_partial$text, ignore.case = T),"Elizabeth Warren",
              ifelse(grepl("Klobuchar",tweets_partial$text, ignore.case = T),"Amy Klobuchar",
              ifelse(grepl("Gabbard",tweets_partial$text, ignore.case = T),"Tulsi Gabbard",
              ifelse(grepl("Buttigieg",tweets_partial$text, ignore.case = T),"Pete Buttigieg",
              ifelse(grepl("Tulsi",tweets_partial$text, ignore.case = T),"Tulsi Gabbard",

                     "Others"
                     )))))))))))))
tweets_sentiment_candidate <- tweets_partial%>%filter(candidate!="Others")

#候選人平均情緒分數時間趨勢

#tw$date = as.Date(tw$date)
tweets_sentiment_candidate$sentimentValue = as.numeric(tweets_sentiment_candidate$sentimentValue) 
tweets_sentiment_candidate$date = as.Date(tweets_sentiment_candidate$date)

tweets_sentiment_candidate %>% 
  #filter(candidate == "Bernie Sanders") %>% 
  group_by(date, candidate) %>% 
  summarise(avg_sentiment = mean(sentimentValue,na.rm=T)) %>% 
  ggplot(aes(x=date, y=avg_sentiment, colour=candidate)) + 
  geom_line()

#情緒文章的分佈

sentiment$sentiment %>% table()
## .
##     Negative      Neutral     Positive Verynegative Verypositive 
##         4289          731          137           61            2

#正面文章的詞彙使用

sentiment %>% 
  merge(tokens) %>% 
  anti_join(stop_words) %>% 
  filter(!word %in% c('Bernie','Sanders','Bernie Sanders')) %>% 
  filter(sentiment == "Verypositive" | sentiment =='Positive') %>%
  group_by(lemma) %>% #根據word分組
  summarize(count = n()) %>% 
  filter(count >5)%>%
  wordcloud2()
## Joining, by = "word"
## Warning: Column `word` joining factor and character vector, coercing into
## character vector

#負面文章的詞彙使用

sentiment %>% 
  merge(tokens) %>% 
  anti_join(stop_words) %>% 
  filter(!word %in% c('Bernie','Sanders','Bernie Sanders')) %>% 
  filter(sentiment == "Verynegative" | sentiment =='Negative') %>%
  group_by(lemma) %>% #根據word分組
  summarize(count = n()) %>% 
  filter(count >10)%>%
  wordcloud2()
## Joining, by = "word"
## Warning: Column `word` joining factor and character vector, coercing into
## character vector
#data$text[1]
mytext <- get_sentences(data$text)
head(mytext)
## [[1]]
## [1] "he Democratic primaries have broken down into chaos after the party encountered its archnemesis headon for the first time basic math."
## [2] "While progressives were optimistic going into the primaries they'd forgotten that numbers counting and addition ..."                  
## 
## [[2]]
## [1] "If Bernie isn't the nominee we don't get universal healthcare."                                       
## [2] "If the Democratic party dies we don't get universal healthcare."                                      
## [3] "They wanna rig primary elections then scold progressives for not backing the centrist in the general?"
## [4] "LOL the party deserves to die"                                                                        
## 
## [[3]]
## [1] "Well the way the democratic party is demoralizing voters rigging this primary and taking all of the bribes from Bloomberg the GOP won't need black votes because young black voters won't be voting in November."
## 
## [[4]]
## [1] "If the Democratic ?"                                                                                                                                                  
## [2] "Party wont allow DEMOCRACY to prevail in the primaries wo sticking their grubby fingers into each 1 to make corrections resulting in THEIR choice then why have them?"
## [3] "Just choose your candidate well stay home  you can lose the WH again."                                                                                                
## 
## [[5]]
## [1] "1 in 4 chance the Democratic party collapses before the primaries are over"
## 
## [[6]]
## [1] "Does the DNC suck?"                                                                                                            
## [2] "Totally."                                                                                                                      
## [3] "But this isn't about the DNC."                                                                                                 
## [4] "This is about whether the more moderate part of the Democratic party the actual voters are able to stop Bernie in the primary."
## [5] "If they are and I hope they aren't the responsible thing to do is vote for that candidate."
sentimentr_doc = sentiment_by(mytext)# sentiment_by()  給定文本的平均情感分數 
sentimentr_sen = sentiment(mytext) # sentiment() 在sentence的級別評分
sentiment_by(mytext) %>% highlight()  ## 可以透過highlight 了解文章中各個句子的情緒
## Saved in /var/folders/6w/xrn440qn32g4zh49tsgprbk80000gn/T//RtmpAzkc5e/polarity.html
## Opening /var/folders/6w/xrn440qn32g4zh49tsgprbk80000gn/T//RtmpAzkc5e/polarity.html ...

用日期來了解情緒波動

#tweets$date = format(tweets$created_at,'%Y%m%d')

(out  = data  %>%  with(
    sentiment_by(
        get_sentences(text), 
        list( date)
    )
))
##      date word_count          sd ave_sentiment
##  1: 18263        422 0.027983585   0.038163346
##  2: 18264        878 0.128566561  -0.074266211
##  3: 18265       6570 0.257232030   0.024012818
##  4: 18266         88 0.176397111  -0.135658601
##  5: 18267        296 0.148590655   0.177476253
##  6: 18268        911 0.117364066   0.102460949
##  7: 18269        661 0.228716971  -0.053682048
##  8: 18270       5879 0.284164154   0.029346142
##  9: 18271        174 0.101984772   0.076710263
## 10: 18272        199 0.065203088   0.013413108
## 11: 18273        268 0.080618766  -0.005089300
## 12: 18274       7832 0.273717822   0.034621502
## 13: 18275        199 0.071950709   0.083237069
## 14: 18276        610 0.254651517  -0.056435595
## 15: 18277        584 0.171587669   0.118109536
## 16: 18278        408 0.133437253   0.023856790
## 17: 18279       6251 0.291881981   0.044398858
## 18: 18280        386 0.074777550   0.096557529
## 19: 18281        374 0.151707857   0.174990809
## 20: 18282        122 0.345992474   0.180438061
## 21: 18283       7727 0.239267156   0.036351399
## 22: 18284        213 0.139618497  -0.039936969
## 23: 18285        115 0.039998207   0.040138843
## 24: 18286        126 0.142515053   0.205476594
## 25: 18287        600 0.219174359   0.037039477
## 26: 18288       7036 0.254215261   0.082358744
## 27: 18289        358 0.045884890   0.037607843
## 28: 18290        144 0.163122955  -0.023036728
## 29: 18291        617 0.138075212   0.089347006
## 30: 18292       7203 0.202589698   0.081378972
## 31: 18294        384 0.099871402   0.025014529
## 32: 18295        444 0.052851442   0.036529075
## 33: 18296       1020 0.183864223   0.124503514
## 34: 18297       7970 0.239731555   0.001768664
## 35: 18299        262 0.092448576   0.063933466
## 36: 18300        597 0.169340723  -0.006705792
## 37: 18301       8146 0.216336912   0.072981236
## 38: 18304       1068 0.127527554  -0.003981143
## 39: 18305        272 0.137858084   0.049114033
## 40: 18306       7355 0.217977721   0.064330654
## 41: 18307        222 0.496516902   0.152727558
## 42: 18308        258 0.153100241   0.050355035
## 43: 18309        266 0.140857816  -0.109765952
## 44: 18310        196 0.056738578   0.077890248
## 45: 18311       6592 0.237628429   0.047279738
## 46: 18312        284 0.260865751   0.016644108
## 47: 18313         66 0.119256959  -0.135530935
## 48: 18314        164 0.201665474   0.017157911
## 49: 18315       7241 0.234600630   0.079552900
## 50: 18316        390 0.135663615   0.117300653
## 51: 18317         76 0.000000000   0.040555355
## 52: 18318        144 0.003817127  -0.020713494
## 53: 18320       6999 0.233004038   0.078350098
## 54: 18322        462 0.306106895  -0.038465557
## 55: 18323        148 0.098063269   0.099621770
## 56: 18324       6984 0.231445886   0.120485694
## 57: 18325        360 0.165319195  -0.041071742
## 58: 18326        282 0.150044140  -0.087483848
## 59: 18328         98 0.155013909   0.247635325
## 60: 18329       7346 0.246519660   0.023075009
## 61: 18330        134 0.282435185   0.215521307
## 62: 18331         42 0.000000000  -0.021821789
## 63: 18332        819 0.188332026   0.035123350
## 64: 18333       6152 0.257179196   0.019291352
## 65: 18334        222 0.120817962   0.181799736
## 66: 18335        398 0.196699237  -0.063401988
## 67: 18336        300 0.331535503  -0.092453200
## 68: 18337        784 0.259060460  -0.038609309
## 69: 18338       5127 0.246811653   0.022719956
## 70: 18339        104 0.089219724   0.087535093
## 71: 18340       1127 0.100664751   0.033045317
## 72: 18341        634 0.217478479   0.015879814
## 73: 18342       7103 0.278019705   0.011527535
## 74: 18343        667 0.290834974   0.034715517
## 75: 18344        232 0.301674776   0.188611271
## 76: 18345        450 0.118970262   0.064102936
## 77: 18346        688 0.218970056  -0.089497443
## 78: 18347       6227 0.276173118   0.032892225
## 79: 18348        248 0.107782802  -0.016472718
## 80: 18349         81 0.142275229   0.066444247
## 81: 18350        220 0.114640049   0.123904281
## 82: 18351       1141 0.178774841   0.074074754
## 83: 18352       6334 0.255260816  -0.003561798
##      date word_count          sd ave_sentiment
plot(out)

plot(uncombine(out))