Load data

fifa <- read_csv('FIFA.csv')

#sum(is.na(fifa$Place))

Frequency of words

TweetTidy <- fifa %>%
  select(Tweet) %>%
  unnest_tokens(word,Tweet)

TweetFreqWord <- TweetTidy %>%
  anti_join(stop_words) %>%
  count(word)


TopWords <- TweetFreqWord %>%
  top_n(30)

#------------------------ plot top words with highchart
topword_list <- TweetTidy %>% filter(word %in% TopWords$word)

#barplot(topword_list$word)

hc2<-hchart(topword_list$word, colorByPoint = TRUE, name = "Words")  %>%
  hc_title(text = "Top 30 Words in Tweets, during 2018 World Cup Period")

hc <- highchart() %>%
  hc_title(text = "Top 30 Words in Tweets, during 2018 World Cup Period") %>%
  hc_add_series(topword_list$word)

hc2
save_d3_html(hc2, file = 'hc2.html')
#----------------------------------top words in hashtag------------------
# 
# hash_top_word <- fifa %>%
#   select(Hashtags) %>%
#   unnest_tokens(word,Hashtags) %>%
#   count(word) %>%
#   top_n(30)
# 
# hashword_list <- fifa %>%
#   select(Hashtags) %>%
#   unnest_tokens(word,Hashtags) %>%
#   filter(word %in% hash_top_word)
# 
# hc2<- hchart(hashword_list$word, colorByPoint = TRUE, name = "Words")  %>%
#   hc_title(text = "Top 30 Words in Tweets Hashtags, during 2018 World Cup Period")



#TweetFreqWord <- HashTidy %>%
#  anti_join(stop_words) %>%
#  count(word)


#TopWords <- TweetFreqWord %>%
#  top_n(30)



# hc2 <- hchart(topword_list,type ="column")
# hc2 %>% hc_add_theme(hc_theme_google())

## -------------------------plot top words with ggplot


# 
# 
# ggplot(TopWords, aes(x=fct_reorder(word,n), y = n)) + geom_bar(stat = "identity",position = "dodge") +
#   coord_flip() + 
#   theme_bw()+
#   labs(title='Top 30 Words during 2018 World Cup Period',
#        x='Count',
#        y= 'Word') +
#   scale_fill_brewer(palette = 1)

Frequency for words from different resources

#sort(table(fifa$Source))

# source_list = c('Instagram','Facebook')
# 
# TweetTidy2 <- fifa %>%
#   select(Tweet,Source) %>%
#   filter(Source %in% source_list) %>%
#   unnest_tokens(word,Tweet)
# 
# 
# freq_word = TweetTidy2 %>%
#   group_by(Source) %>%
#   anti_join(stop_words) %>%
#   count(word)
# 
#  highchart() %>%
#     hc_chart(type = 'bubble') %>%
#     hc_xAxis(categories = TopWords$word) %>%
#     hc_yAxis(categories = TopWords$n) #%>%
#     #hc_add_series(freq_word, group = freq_word$Source, showInLegend = FALSE)
# 
# 
# hchart(freq_word, "scatter", hcaes(x = word, y = n, group = Source))

top words through time

# 
# tweet_word <- fifa %>%
#   select(Tweet,Date) %>%
#   unnest_tokens(word,Tweet)
# 
# tweet_word <- tweet_word %>% mutate(Date = cut(Date, 'day'))
# 
# 
# freq_word = tweet_word %>% 
#   group_by(Date) %>%
#   anti_join(stop_words) %>% 
#   count(word)
# 
# max_day = freq_word %>%
#   group_by(Date) %>% 
#   mutate(day_max = max(n)) %>%
#   filter(n == day_max)

#library(xts)
#antenne <- as.xts(ts(start = c(2018,6), frequency = 365,
 # data = max_day$day_max))


#ts_count <- ts(max_day$day_max, start = c(2018,6), frequency = 365)

#plot(ts_count)
#dygraph(ts_count)

#dygraph(ts_count, main = "Top word of the Day") %>% 
 # dyRangeSelector(dateWindow = c("2018-06-29", "2018-07-15"))

WordCloud

topWords<- TweetFreqWord %>%
  top_n(100)

wordcloud(topWords$word,
           topWords$n,
           scale=c(3,0.4), 
           colors=brewer.pal(8,"Set2"))

Word network

# library(widyr)
# 
# title_word_pairs <- TweetFreqWord %>% 
#   pairwise_count(word, id, sort = TRUE, upper = FALSE)
# 
# title_word_pairs

Sentiment Analysis

WordCloud D3

#words=c("big", "data", "machine", "learning", "wordcloud", "R", "d3js", "algorithm", "analytics", "science", "API")
#freq=c(50, 50, 30, 30, 100, 10, 10, 10, 5, 5, 5 )
#json<-jsonWordCloud(TopWords$word, TopWords$n )
#D3WordCloud(json, file_out="word_cloud.html")