Load data
fifa <- read_csv('FIFA.csv')
#sum(is.na(fifa$Place))
Frequency of words
TweetTidy <- fifa %>%
select(Tweet) %>%
unnest_tokens(word,Tweet)
TweetFreqWord <- TweetTidy %>%
anti_join(stop_words) %>%
count(word)
TopWords <- TweetFreqWord %>%
top_n(30)
#------------------------ plot top words with highchart
topword_list <- TweetTidy %>% filter(word %in% TopWords$word)
#barplot(topword_list$word)
hc2<-hchart(topword_list$word, colorByPoint = TRUE, name = "Words") %>%
hc_title(text = "Top 30 Words in Tweets, during 2018 World Cup Period")
hc <- highchart() %>%
hc_title(text = "Top 30 Words in Tweets, during 2018 World Cup Period") %>%
hc_add_series(topword_list$word)
hc2
save_d3_html(hc2, file = 'hc2.html')
#----------------------------------top words in hashtag------------------
#
# hash_top_word <- fifa %>%
# select(Hashtags) %>%
# unnest_tokens(word,Hashtags) %>%
# count(word) %>%
# top_n(30)
#
# hashword_list <- fifa %>%
# select(Hashtags) %>%
# unnest_tokens(word,Hashtags) %>%
# filter(word %in% hash_top_word)
#
# hc2<- hchart(hashword_list$word, colorByPoint = TRUE, name = "Words") %>%
# hc_title(text = "Top 30 Words in Tweets Hashtags, during 2018 World Cup Period")
#TweetFreqWord <- HashTidy %>%
# anti_join(stop_words) %>%
# count(word)
#TopWords <- TweetFreqWord %>%
# top_n(30)
# hc2 <- hchart(topword_list,type ="column")
# hc2 %>% hc_add_theme(hc_theme_google())
## -------------------------plot top words with ggplot
#
#
# ggplot(TopWords, aes(x=fct_reorder(word,n), y = n)) + geom_bar(stat = "identity",position = "dodge") +
# coord_flip() +
# theme_bw()+
# labs(title='Top 30 Words during 2018 World Cup Period',
# x='Count',
# y= 'Word') +
# scale_fill_brewer(palette = 1)
Frequency for words from different resources
#sort(table(fifa$Source))
# source_list = c('Instagram','Facebook')
#
# TweetTidy2 <- fifa %>%
# select(Tweet,Source) %>%
# filter(Source %in% source_list) %>%
# unnest_tokens(word,Tweet)
#
#
# freq_word = TweetTidy2 %>%
# group_by(Source) %>%
# anti_join(stop_words) %>%
# count(word)
#
# highchart() %>%
# hc_chart(type = 'bubble') %>%
# hc_xAxis(categories = TopWords$word) %>%
# hc_yAxis(categories = TopWords$n) #%>%
# #hc_add_series(freq_word, group = freq_word$Source, showInLegend = FALSE)
#
#
# hchart(freq_word, "scatter", hcaes(x = word, y = n, group = Source))
top words through time
#
# tweet_word <- fifa %>%
# select(Tweet,Date) %>%
# unnest_tokens(word,Tweet)
#
# tweet_word <- tweet_word %>% mutate(Date = cut(Date, 'day'))
#
#
# freq_word = tweet_word %>%
# group_by(Date) %>%
# anti_join(stop_words) %>%
# count(word)
#
# max_day = freq_word %>%
# group_by(Date) %>%
# mutate(day_max = max(n)) %>%
# filter(n == day_max)
#library(xts)
#antenne <- as.xts(ts(start = c(2018,6), frequency = 365,
# data = max_day$day_max))
#ts_count <- ts(max_day$day_max, start = c(2018,6), frequency = 365)
#plot(ts_count)
#dygraph(ts_count)
#dygraph(ts_count, main = "Top word of the Day") %>%
# dyRangeSelector(dateWindow = c("2018-06-29", "2018-07-15"))
WordCloud
topWords<- TweetFreqWord %>%
top_n(100)
wordcloud(topWords$word,
topWords$n,
scale=c(3,0.4),
colors=brewer.pal(8,"Set2"))

Word network
# library(widyr)
#
# title_word_pairs <- TweetFreqWord %>%
# pairwise_count(word, id, sort = TRUE, upper = FALSE)
#
# title_word_pairs
Sentiment Analysis
WordCloud D3
#words=c("big", "data", "machine", "learning", "wordcloud", "R", "d3js", "algorithm", "analytics", "science", "API")
#freq=c(50, 50, 30, 30, 100, 10, 10, 10, 5, 5, 5 )
#json<-jsonWordCloud(TopWords$word, TopWords$n )
#D3WordCloud(json, file_out="word_cloud.html")