We are interested in finding the most proposed heroes for Heroes of the Storm from reddit.
First, we’ll load the required packages.
suppressPackageStartupMessages({
library(tidytext)
library(tidyverse)
library(ggplot2)
})
Next, we load the flat files (this will take some time) and get a count of all words used in every comment. We then load our character names and convert to lower case.
hots<-read.csv("heroes_data.csv")
hots$body<-as.character(hots$body)
hots_words<-hots %>%
unnest_tokens(word, body)%>%
count(word, sort=TRUE) %>%
ungroup()
characters<-read.csv("Heroes of the Storm Character List.csv")
characters$WarCraft<-tolower(characters$WarCraft)
characters$Diablo<-tolower(characters$Diablo)
characters$StarCraft<-tolower(characters$StarCraft)
characters$Overwatch<-tolower(characters$Overwatch)
characters$Hearthstone<-tolower(characters$Hearthstone)
characters$Classics<-tolower(characters$Classics)
We will then filter based on franchise and plot the results.
warcraft<-hots_words %>% filter(word %in% characters$WarCraft)%>%filter(n>=100)
ggplot(data=warcraft, aes(x=reorder(word,n), y=n, fill=word))+geom_bar(stat='identity')+ylab("Word Frequency")+xlab(NULL)+
ggtitle("Most Requested Warcraft Heroes")+geom_text(aes(label=n), hjust=0)+
theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+coord_flip()
starcraft<-hots_words %>% filter(word %in% characters$StarCraft)%>%filter(n>=100)
ggplot(data=starcraft, aes(x=reorder(word,n), y=n, fill=word))+geom_bar(stat='identity')+ylab("Word Frequency")+xlab(NULL)+
ggtitle("Most Requested Starcraft Heroes")+geom_text(aes(label=n), hjust=1)+
theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+coord_flip()
diablo<-hots_words %>% filter(word %in% characters$Diablo)%>%filter(n>=100)
ggplot(data=diablo, aes(x=reorder(word,n), y=n, fill=word))+geom_bar(stat='identity')+ylab("Word Frequency")+xlab(NULL)+
ggtitle("Most Requested Diablo Heroes")+geom_text(aes(label=n), hjust=0)+
theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+coord_flip()
overwatch<-hots_words %>% filter(word %in% characters$Overwatch)%>%filter(n>=100)
ggplot(data=overwatch, aes(x=reorder(word,n), y=n, fill=word))+geom_bar(stat='identity')+ylab("Word Frequency")+xlab(NULL)+
ggtitle("Most Requested Overwatch Heroes")+geom_text(aes(label=n), hjust=0)+
theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+coord_flip()
hearthstone<-hots_words %>% filter(word %in% characters$Hearthstone)
ggplot(data=hearthstone, aes(x=reorder(word,n), y=n, fill=word))+geom_bar(stat='identity')+ylab("Word Frequency")+xlab(NULL)+
ggtitle("Most Requested Hearthstone Heroes")+geom_text(aes(label=n), hjust=0)+
theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+coord_flip()
classic<-hots_words %>% filter(word %in% characters$Classics)
ggplot(data=classic, aes(x=reorder(word,n), y=n, fill=word))+geom_bar(stat='identity')+ylab("Word Frequency")+xlab(NULL)+
ggtitle("Most Requested Blizzard Classic Heroes")+geom_text(aes(label=n), hjust=0)+
theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+coord_flip()
Finally, we find the top 10 most requested heroes across all franchises and plot.
overall<-hots_words %>% filter(word %in% characters$WarCraft | word %in% characters$StarCraft |
word %in% characters$Diablo| word %in% characters$Overwatch|
word %in% characters$Hearthstone | word %in% characters$Classics)%>%top_n(n=10)
## Selecting by n
ggplot(data=overall, aes(x=reorder(word,n), y=n, fill=word))+geom_bar(stat='identity')+ylab("Word Frequency")+xlab(NULL)+
ggtitle("Most Requested Heroes")+geom_text(aes(label=n), hjust=1)+
theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+coord_flip()
Fenix has the most number of mentions, followed by Kel’thuzad. Note that the range of these comments are between launch (06/2015) and 02/2017.