library(dplyr)
library(stringr)
library(ggplot2)
library(xml2)
library(httr)
library(jsonlite)
library(NLP)
library(igraph)
library(sentimentr)
library(tidytext)
library(wordcloud2)
library(tidyr)
library(scales)
library(lubridate)
library(data.table)
library(data.tree)
load(file = "tweets_coreNLP.RData")
fun <- function(t){
files <- list.files(path = t, pattern = "*.csv", recursive = TRUE) #檔案路徑
df1 <- data.frame()
df2 <- data.frame()
for(file in files) {
df1 <- read.csv(paste(t, file, sep="")) #讀進檔案
df2 <- rbind(df2, df1) #合併多個檔案
}
return(df2)
}
從回傳的object中整理斷詞出結果,輸出為 tidydata 格式
coreNLP_tokens_parser <- function(coreNLP_objects){
result <- do.call(rbind, lapply(coreNLP_objects, function(obj){
original_data <- obj$data
doc <- obj$doc
# for a sentences
sentences <- doc$sentences
sen <- sentences[[1]]
tokens <- do.call(rbind, lapply(sen$tokens, function(x){
result <- data.frame(word=x$word, lemma=x$lemma, pos=x$pos, ner=x$ner)
result
}))
tokens <- original_data %>%
t() %>%
data.frame() %>%
select(-text) %>%
slice(rep(1:n(), each = nrow(tokens))) %>%
bind_cols(tokens)
tokens
}))
return(result)
}
#從回傳的core-nlp object中整理出詞彙依存關係,輸出為 tidydata 格式
coreNLP_dependency_parser <- function(coreNLP_objects){
result <- do.call(rbind, lapply(coreNLP_objects, function(obj){
original_data <- obj$data
doc <- obj$doc
# for a sentences
sentences <- doc$sentences
sen <- sentences[[1]]
dependencies <- do.call(rbind, lapply(sen$basicDependencies, function(x){
result <- data.frame(dep=x$dep, governor=x$governor, governorGloss=x$governorGloss, dependent=x$dependent, dependentGloss=x$dependentGloss)
result
}))
dependencies <- original_data %>%
t() %>%
data.frame() %>%
select(-text) %>%
slice(rep(1:n(), each = nrow(dependencies))) %>%
bind_cols(dependencies)
dependencies
}))
return(result)
}
#從回傳的core-nlp object中整理出語句情緒,輸出為 tidydata 格式
coreNLP_sentiment_parser <- function(coreNLP_objects){
result <- do.call(rbind, lapply(coreNLP_objects, function(obj){
original_data <- obj$data
doc <- obj$doc
# for a sentences
sentences <- doc$sentences
sen <- sentences[[1]]
sentiment <- original_data %>%
t() %>%
data.frame() %>%
bind_cols(data.frame(sentiment=sen$sentiment, sentimentValue=sen$sentimentValue))
sentiment
}))
return(result)
}
#圖形化顯示dependency結果
parse2tree <- function(ptext) {
stopifnot(require(NLP) && require(igraph))
## Replace words with unique versions
ms <- gregexpr("[^() ]+", ptext) # just ignoring spaces and brackets?
words <- regmatches(ptext, ms)[[1]] # just words
regmatches(ptext, ms) <- list(paste0(words, seq.int(length(words)))) # add id to words
## Going to construct an edgelist and pass that to igraph
## allocate here since we know the size (number of nodes - 1) and -1 more to exclude 'TOP'
edgelist <- matrix('', nrow=length(words)-2, ncol=2)
## Function to fill in edgelist in place
edgemaker <- (function() {
i <- 0 # row counter
g <- function(node) { # the recursive function
if (inherits(node, "Tree")) { # only recurse subtrees
if ((val <- node$value) != 'TOP1') { # skip 'TOP' node (added '1' above)
for (child in node$children) {
childval <- if(inherits(child, "Tree")) child$value else child
i <<- i+1
edgelist[i,1:2] <<- c(val, childval)
}
}
invisible(lapply(node$children, g))
}
}
})()
## Create the edgelist from the parse tree
edgemaker(Tree_parse(ptext))
tree <- FromDataFrameNetwork(as.data.frame(edgelist))
return (tree)
}
clean = function(txt) {
txt = iconv(txt, "latin1", "ASCII", sub="") #轉換字符編碼
txt = gsub("(@|#)\\w+", "", txt) #去除@或#後有數字,字母,底線 (標記人名或hashtag)
txt = gsub("(http|https)://.*", "", txt) #去除網址
txt = gsub("[ \t]{2,}", "", txt) #去除兩個以上空格或tab
txt = gsub("\\n"," ",txt) #去除換行
txt = gsub("\\s+"," ",txt) #去除一個或多個空格
txt = gsub("^\\s+|\\s+$","",txt) #去除前後一個或多個空格
txt = gsub("&.*;","",txt) #去除html特殊字元編碼
txt = gsub("[^a-zA-Z0-9?!. ']","",txt) #除了字母,數字 ?!. ,空白的都去掉
txt }
tw <- fread('./tweets.csv') %>% #讀進檔案
mutate(from = "tweets") %>% #新增欄位分類
select(screen_name, text, timestamp, from)
names(tw)[1] = "user"
names(tw)[2] = "text"
names(tw)[3] = "date"
tw$text = clean(tw$text)
tw$date = as.Date(tw$date)
#data <- bind_rows(df, tw) #合併reddit和tweets的資料
data <- tw
data$id <- seq.int(nrow(data))
data <- data[,c(5,1,2,3,4)]
head(data)
## id user
## 1 1 Rebel_of_Oz
## 2 2 andresg_0122
## 3 3 Shugah
## 4 4 suv49
## 5 5 HowGeneration
## 6 6 PostFomo
## text
## 1 he Democratic primaries have broken down into chaos after the party encountered its archnemesis headon for the first time basic math. While progressives were optimistic going into the primaries they'd forgotten that numbers counting and addition ...
## 2 If Bernie isn't the nominee we don't get universal healthcare. If the Democratic party dies we don't get universal healthcare. They wanna rig primary elections then scold progressives for not backing the centrist in the general? LOL the party deserves to die
## 3 Well the way the democratic party is demoralizing voters rigging this primary and taking all of the bribes from Bloomberg the GOP won't need black votes because young black voters won't be voting in November.
## 4 If the Democratic ? Party wont allow DEMOCRACY to prevail in the primaries wo sticking their grubby fingers into each 1 to make corrections resulting in THEIR choice then why have them?Just choose your candidate well stay home you can lose the WH again.
## 5 1 in 4 chance the Democratic party collapses before the primaries are over
## 6 Does the DNC suck? Totally. But this isn't about the DNC. This is about whether the more moderate part of the Democratic party the actual voters are able to stop Bernie in the primary. If they are and I hope they aren't the responsible thing to do is vote for that candidate.
## date from
## 1 2020-02-05 tweets
## 2 2020-02-05 tweets
## 3 2020-02-05 tweets
## 4 2020-02-05 tweets
## 5 2020-02-05 tweets
## 6 2020-02-05 tweets
server端 : + 需先在terminal開啟corenlp server + 在corenlp的路徑下開啟terminal輸入 java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000
# 生產core-nlp的api url,可以設定斷詞依據、以及要標註的任務
generate_API_url <- function(host, port="9000",
tokenize.whitespace="false", annotators=""){ #斷詞依據不是空格
url <- sprintf('http://%s:%s/?properties={"tokenize.whitespace":"%s","annotators":"%s"}',
host, port, tokenize.whitespace, annotators)
url <- URLencode(url)
}
generate_API_url("127.0.0.1")
# 呼叫core-nlp api
call_coreNLP <- function(server_host, text, host="localhost", language="eng",
tokenize.whitespace="true", ssplit.eolonly="true", annotators=c("tokenize","ssplit","pos","lemma","ner","parse","sentiment")){
# 假設有兩個core-nlp server、一個負責英文(使用9000 port)、另一個則負責中文(使用9001 port)
port <- ifelse(language=="eng", 9000, 9001);
# 產生api網址
url <- generate_API_url(server_host, port=port,
tokenize.whitespace=tokenize.whitespace, annotators=paste0(annotators, collapse = ','))
result <- POST(url, body = text, encode = "json")
doc <- httr::content(result, "parsed","application/json",encoding = "UTF-8")
return (doc)
}
host = "127.0.0.1"
coreNLP <- function(data,host){
# 依序將每個文件丟進core-nlp進行處理,每份文件的回傳結果為json格式
# 在R中使用objects來儲存處理結果
result <- apply(data, 1 , function(x){
#object <- call_coreNLP(host, x['text'])
object <- tryCatch({
output <- call_coreNLP(host, x['text'])
}, error = function(e) {
print("error occur here")
print(x['text'])
})
list(doc=object, data=x)
})
return(result)
}
取得coreNLP回傳的物件
gc() #釋放不使用的記憶體
t0 = Sys.time()
obj = data %>% filter(text != "") %>% coreNLP(host) #twitter文章
#丟入coreNLP的物件 必須符合: 是一個data.frame 有一個text欄位
Sys.time() - t0 #執行時間
#Time difference of 14 mins
save.image("tweets_coreNLP.RData")
#load twitter
load(file = "tweets_coreNLP.RData")
tokens = coreNLP_tokens_parser(obj)
head(tokens)
## id user date from word lemma pos ner
## 1 1 Rebel_of_Oz 2020-02-05 tweets he he PRP O
## 2 1 Rebel_of_Oz 2020-02-05 tweets Democratic democratic JJ IDEOLOGY
## 3 1 Rebel_of_Oz 2020-02-05 tweets primaries primary NNS O
## 4 1 Rebel_of_Oz 2020-02-05 tweets have have VBP O
## 5 1 Rebel_of_Oz 2020-02-05 tweets broken break VBN O
## 6 1 Rebel_of_Oz 2020-02-05 tweets down down RP O
#辨識出哪幾種類型的實體
levels(tokens$ner)
## [1] "O" "IDEOLOGY" "ORDINAL"
## [4] "PERSON" "ORGANIZATION" "NUMBER"
## [7] "STATE_OR_PROVINCE" "DATE" "TITLE"
## [10] "COUNTRY" "DURATION" "MISC"
## [13] "NATIONALITY" "CRIMINAL_CHARGE" "RELIGION"
## [16] "LOCATION" "SET" "CITY"
## [19] "CAUSE_OF_DEATH" "TIME" "URL"
## [22] "MONEY" "PERCENT"
#除去entity為Other,有多少種word有被標註entity
length(unique(tokens$word[tokens$ner != "O"]))
## [1] 3290
#在Twitter上談論民主黨初選議題,所涉及的人物
tokens %>%
filter(ner == "PERSON") %>% #篩選NER為PERSION
group_by(word) %>% #根據word分組
summarize(count = n()) %>% #計算每組
top_n(n = 10, count) %>%
ungroup() %>%
mutate(word = reorder(word, count)) %>%
ggplot(aes(word, count)) +
geom_col()+
ggtitle("Word Frequency (NER is PERSON)") +
theme(text=element_text(size=14))+
coord_flip()
#所涉及的國家
tokens %>%
filter(ner == "COUNTRY") %>% #篩選NER為COUNTRY
group_by(word) %>% #根據word分組
summarize(count = n()) %>% #計算每組
top_n(n = 10, count) %>%
ungroup() %>%
mutate(word = reorder(word, count)) %>%
ggplot(aes(word, count)) +
geom_col()+
ggtitle("Word Frequency (NER is COUNTRY)") +
theme(text=element_text(size=14))+
coord_flip()
#所涉及的思想
tokens %>%
filter(ner == "IDEOLOGY") %>% #篩選NER為IDEOLOGY
group_by(word) %>% #根據word分組
summarize(count = n()) %>% #計算每組
top_n(n = 10, count) %>%
ungroup() %>%
mutate(word = reorder(word, count)) %>%
ggplot(aes(word, count)) +
geom_col()+
ggtitle("Word Frequency (NER is IDEOLOGY)") +
theme(text=element_text(size=14))+
coord_flip()
dependencies = coreNLP_dependency_parser(obj)
head(dependencies)
## id user date from dep governor governorGloss
## 1 1 Rebel_of_Oz 2020-02-05 tweets ROOT 0 ROOT
## 2 1 Rebel_of_Oz 2020-02-05 tweets nsubj 5 broken
## 3 1 Rebel_of_Oz 2020-02-05 tweets amod 3 primaries
## 4 1 Rebel_of_Oz 2020-02-05 tweets dep 1 he
## 5 1 Rebel_of_Oz 2020-02-05 tweets aux 5 broken
## 6 1 Rebel_of_Oz 2020-02-05 tweets compound:prt 5 broken
## dependent dependentGloss
## 1 5 broken
## 2 1 he
## 3 2 Democratic
## 4 3 primaries
## 5 4 have
## 6 6 down
#視覺化Dependency Tree
#parse_tree <- obj[[400]]$doc[[1]][[1]]$parse
#tree <- parse2tree(parse_tree)
#SetNodeStyle(tree, style = "filled,rounded", shape = "box")
#plot(tree)
sentiment = coreNLP_sentiment_parser(obj)
head(sentiment)
## id user
## 1 1 Rebel_of_Oz
## 2 2 andresg_0122
## 3 3 Shugah
## 4 4 suv49
## 5 5 HowGeneration
## 6 6 PostFomo
## text
## 1 he Democratic primaries have broken down into chaos after the party encountered its archnemesis headon for the first time basic math. While progressives were optimistic going into the primaries they'd forgotten that numbers counting and addition ...
## 2 If Bernie isn't the nominee we don't get universal healthcare. If the Democratic party dies we don't get universal healthcare. They wanna rig primary elections then scold progressives for not backing the centrist in the general? LOL the party deserves to die
## 3 Well the way the democratic party is demoralizing voters rigging this primary and taking all of the bribes from Bloomberg the GOP won't need black votes because young black voters won't be voting in November.
## 4 If the Democratic ? Party wont allow DEMOCRACY to prevail in the primaries wo sticking their grubby fingers into each 1 to make corrections resulting in THEIR choice then why have them?Just choose your candidate well stay home you can lose the WH again.
## 5 1 in 4 chance the Democratic party collapses before the primaries are over
## 6 Does the DNC suck? Totally. But this isn't about the DNC. This is about whether the more moderate part of the Democratic party the actual voters are able to stop Bernie in the primary. If they are and I hope they aren't the responsible thing to do is vote for that candidate.
## date from sentiment sentimentValue
## 1 2020-02-05 tweets Negative 1
## 2 2020-02-05 tweets Negative 1
## 3 2020-02-05 tweets Negative 1
## 4 2020-02-05 tweets Neutral 2
## 5 2020-02-05 tweets Negative 1
## 6 2020-02-05 tweets Negative 1
#語句情緒值
levels(sentiment$sentiment)
## [1] "Negative" "Neutral" "Positive" "Verynegative" "Verypositive"
#平均情緒分數時間趨勢
#tw$date = as.Date(tw$date)
sentiment$sentimentValue = as.numeric(sentiment$sentimentValue)
sentiment$date = as.Date(sentiment$date)
sentiment %>%
# merge(data[,c("id")]) %>%
group_by(date) %>%
summarise(avg_sentiment = mean(sentimentValue,na.rm=T)) %>%
ggplot(aes(x=date,y=avg_sentiment)) +
geom_line()
tweets_partial <- sentiment
tweets_partial$candidate<-ifelse(grepl("Andrew",tweets_partial$text, ignore.case = T),"Andrew Yang",
ifelse(grepl("Yang",tweets_partial$text, ignore.case = T),"Andrew Yang",
ifelse(grepl("Michael",tweets_partial$text, ignore.case = T),"Michael Bloomberg",
ifelse(grepl("Bloomberg",tweets_partial$text, ignore.case = T),"Michael Bloomberg",
ifelse(grepl("Biden",tweets_partial$text, ignore.case = T),"Joe Biden",
ifelse(grepl("Joe",tweets_partial$text, ignore.case = T),"Joe Biden",
ifelse(grepl("Bernie",tweets_partial$text, ignore.case = T),"Bernie Sanders",
ifelse(grepl("Sanders",tweets_partial$text, ignore.case = T),"Bernie Sanders",
ifelse(grepl("Warren",tweets_partial$text, ignore.case = T),"Elizabeth Warren",
ifelse(grepl("Klobuchar",tweets_partial$text, ignore.case = T),"Amy Klobuchar",
ifelse(grepl("Gabbard",tweets_partial$text, ignore.case = T),"Tulsi Gabbard",
ifelse(grepl("Buttigieg",tweets_partial$text, ignore.case = T),"Pete Buttigieg",
ifelse(grepl("Tulsi",tweets_partial$text, ignore.case = T),"Tulsi Gabbard",
"Others"
)))))))))))))
tweets_sentiment_candidate <- tweets_partial%>%filter(candidate!="Others")
#候選人平均情緒分數時間趨勢
#tw$date = as.Date(tw$date)
tweets_sentiment_candidate$sentimentValue = as.numeric(tweets_sentiment_candidate$sentimentValue)
tweets_sentiment_candidate$date = as.Date(tweets_sentiment_candidate$date)
tweets_sentiment_candidate %>%
#filter(candidate == "Bernie Sanders") %>%
group_by(date, candidate) %>%
summarise(avg_sentiment = mean(sentimentValue,na.rm=T)) %>%
ggplot(aes(x=date, y=avg_sentiment, colour=candidate)) +
geom_line()
#情緒文章的分佈
sentiment$sentiment %>% table()
## .
## Negative Neutral Positive Verynegative Verypositive
## 4289 731 137 61 2
#正面文章的詞彙使用
sentiment %>%
merge(tokens) %>%
anti_join(stop_words) %>%
filter(!word %in% c('Bernie','Sanders','Bernie Sanders')) %>%
filter(sentiment == "Verypositive" | sentiment =='Positive') %>%
group_by(lemma) %>% #根據word分組
summarize(count = n()) %>%
filter(count >5)%>%
wordcloud2()
## Joining, by = "word"
## Warning: Column `word` joining factor and character vector, coercing into
## character vector
#負面文章的詞彙使用
sentiment %>%
merge(tokens) %>%
anti_join(stop_words) %>%
filter(!word %in% c('Bernie','Sanders','Bernie Sanders')) %>%
filter(sentiment == "Verynegative" | sentiment =='Negative') %>%
group_by(lemma) %>% #根據word分組
summarize(count = n()) %>%
filter(count >10)%>%
wordcloud2()
## Joining, by = "word"
## Warning: Column `word` joining factor and character vector, coercing into
## character vector
#data$text[1]
mytext <- get_sentences(data$text)
head(mytext)
## [[1]]
## [1] "he Democratic primaries have broken down into chaos after the party encountered its archnemesis headon for the first time basic math."
## [2] "While progressives were optimistic going into the primaries they'd forgotten that numbers counting and addition ..."
##
## [[2]]
## [1] "If Bernie isn't the nominee we don't get universal healthcare."
## [2] "If the Democratic party dies we don't get universal healthcare."
## [3] "They wanna rig primary elections then scold progressives for not backing the centrist in the general?"
## [4] "LOL the party deserves to die"
##
## [[3]]
## [1] "Well the way the democratic party is demoralizing voters rigging this primary and taking all of the bribes from Bloomberg the GOP won't need black votes because young black voters won't be voting in November."
##
## [[4]]
## [1] "If the Democratic ?"
## [2] "Party wont allow DEMOCRACY to prevail in the primaries wo sticking their grubby fingers into each 1 to make corrections resulting in THEIR choice then why have them?"
## [3] "Just choose your candidate well stay home you can lose the WH again."
##
## [[5]]
## [1] "1 in 4 chance the Democratic party collapses before the primaries are over"
##
## [[6]]
## [1] "Does the DNC suck?"
## [2] "Totally."
## [3] "But this isn't about the DNC."
## [4] "This is about whether the more moderate part of the Democratic party the actual voters are able to stop Bernie in the primary."
## [5] "If they are and I hope they aren't the responsible thing to do is vote for that candidate."
sentimentr_doc = sentiment_by(mytext)# sentiment_by() 給定文本的平均情感分數
sentimentr_sen = sentiment(mytext) # sentiment() 在sentence的級別評分
sentiment_by(mytext) %>% highlight() ## 可以透過highlight 了解文章中各個句子的情緒
## Saved in /var/folders/6w/xrn440qn32g4zh49tsgprbk80000gn/T//RtmpAzkc5e/polarity.html
## Opening /var/folders/6w/xrn440qn32g4zh49tsgprbk80000gn/T//RtmpAzkc5e/polarity.html ...
用日期來了解情緒波動
#tweets$date = format(tweets$created_at,'%Y%m%d')
(out = data %>% with(
sentiment_by(
get_sentences(text),
list( date)
)
))
## date word_count sd ave_sentiment
## 1: 18263 422 0.027983585 0.038163346
## 2: 18264 878 0.128566561 -0.074266211
## 3: 18265 6570 0.257232030 0.024012818
## 4: 18266 88 0.176397111 -0.135658601
## 5: 18267 296 0.148590655 0.177476253
## 6: 18268 911 0.117364066 0.102460949
## 7: 18269 661 0.228716971 -0.053682048
## 8: 18270 5879 0.284164154 0.029346142
## 9: 18271 174 0.101984772 0.076710263
## 10: 18272 199 0.065203088 0.013413108
## 11: 18273 268 0.080618766 -0.005089300
## 12: 18274 7832 0.273717822 0.034621502
## 13: 18275 199 0.071950709 0.083237069
## 14: 18276 610 0.254651517 -0.056435595
## 15: 18277 584 0.171587669 0.118109536
## 16: 18278 408 0.133437253 0.023856790
## 17: 18279 6251 0.291881981 0.044398858
## 18: 18280 386 0.074777550 0.096557529
## 19: 18281 374 0.151707857 0.174990809
## 20: 18282 122 0.345992474 0.180438061
## 21: 18283 7727 0.239267156 0.036351399
## 22: 18284 213 0.139618497 -0.039936969
## 23: 18285 115 0.039998207 0.040138843
## 24: 18286 126 0.142515053 0.205476594
## 25: 18287 600 0.219174359 0.037039477
## 26: 18288 7036 0.254215261 0.082358744
## 27: 18289 358 0.045884890 0.037607843
## 28: 18290 144 0.163122955 -0.023036728
## 29: 18291 617 0.138075212 0.089347006
## 30: 18292 7203 0.202589698 0.081378972
## 31: 18294 384 0.099871402 0.025014529
## 32: 18295 444 0.052851442 0.036529075
## 33: 18296 1020 0.183864223 0.124503514
## 34: 18297 7970 0.239731555 0.001768664
## 35: 18299 262 0.092448576 0.063933466
## 36: 18300 597 0.169340723 -0.006705792
## 37: 18301 8146 0.216336912 0.072981236
## 38: 18304 1068 0.127527554 -0.003981143
## 39: 18305 272 0.137858084 0.049114033
## 40: 18306 7355 0.217977721 0.064330654
## 41: 18307 222 0.496516902 0.152727558
## 42: 18308 258 0.153100241 0.050355035
## 43: 18309 266 0.140857816 -0.109765952
## 44: 18310 196 0.056738578 0.077890248
## 45: 18311 6592 0.237628429 0.047279738
## 46: 18312 284 0.260865751 0.016644108
## 47: 18313 66 0.119256959 -0.135530935
## 48: 18314 164 0.201665474 0.017157911
## 49: 18315 7241 0.234600630 0.079552900
## 50: 18316 390 0.135663615 0.117300653
## 51: 18317 76 0.000000000 0.040555355
## 52: 18318 144 0.003817127 -0.020713494
## 53: 18320 6999 0.233004038 0.078350098
## 54: 18322 462 0.306106895 -0.038465557
## 55: 18323 148 0.098063269 0.099621770
## 56: 18324 6984 0.231445886 0.120485694
## 57: 18325 360 0.165319195 -0.041071742
## 58: 18326 282 0.150044140 -0.087483848
## 59: 18328 98 0.155013909 0.247635325
## 60: 18329 7346 0.246519660 0.023075009
## 61: 18330 134 0.282435185 0.215521307
## 62: 18331 42 0.000000000 -0.021821789
## 63: 18332 819 0.188332026 0.035123350
## 64: 18333 6152 0.257179196 0.019291352
## 65: 18334 222 0.120817962 0.181799736
## 66: 18335 398 0.196699237 -0.063401988
## 67: 18336 300 0.331535503 -0.092453200
## 68: 18337 784 0.259060460 -0.038609309
## 69: 18338 5127 0.246811653 0.022719956
## 70: 18339 104 0.089219724 0.087535093
## 71: 18340 1127 0.100664751 0.033045317
## 72: 18341 634 0.217478479 0.015879814
## 73: 18342 7103 0.278019705 0.011527535
## 74: 18343 667 0.290834974 0.034715517
## 75: 18344 232 0.301674776 0.188611271
## 76: 18345 450 0.118970262 0.064102936
## 77: 18346 688 0.218970056 -0.089497443
## 78: 18347 6227 0.276173118 0.032892225
## 79: 18348 248 0.107782802 -0.016472718
## 80: 18349 81 0.142275229 0.066444247
## 81: 18350 220 0.114640049 0.123904281
## 82: 18351 1141 0.178774841 0.074074754
## 83: 18352 6334 0.255260816 -0.003561798
## date word_count sd ave_sentiment
plot(out)
plot(uncombine(out))