#install the necessary packages
# install.packages("readr")
# install.packages("plyr")
# install.packages("stringr")
# install.packages("stringi")
# install.packages("magrittr")
# install.packages("dplyr")
# install.packages('tm')
# install.packages('RColorBrewer')
# install.packages('wordcloud')
# install.packages('plotly')
# install.packages("ggplot2")
# install.packages("twitteR")
library('readr')
zynga <- readRDS("Zynga.rds")
zyngaTweets <- zynga$text
#********************************************
# Clean tweets
#********************************************
#use this function to clean the tweets
clean.text = function(x)
{
# remove unicode
x = gsub("[^\x20-\x7E]", " ",x)
# remove rt
x = gsub("rt", "", x)
# remove at
x = gsub("@\\w+", "", x)
# remove punctuation
x = gsub("[[:punct:]]", "", x)
# remove numbers
x = gsub("[[:digit:]]", "", x)
# remove links http
x = gsub("http\\w+", "", x)
# remove tabs
x = gsub("[ |\t]{2,}", "", x)
# remove blank spaces at the beginning
x = gsub("^ ", "", x)
# remove blank spaces at the end
x = gsub(" $", "", x)
# tolower
x = tolower(x)
return(x)
}
# clean tweets
zyngaTweets = clean.text(zyngaTweets)
require(tm)
## Loading required package: tm
## Loading required package: NLP
require(wordcloud)
## Loading required package: wordcloud
## Loading required package: RColorBrewer
require(RColorBrewer)
corpus = Corpus(VectorSource(zyngaTweets))
# corpus = Corpus(VectorSource(cmail))
# create term-document matrix
tdm = TermDocumentMatrix(
corpus,
control = list(
wordLengths=c(3,40),
removePunctuation = TRUE,
stopwords = c("the", "a", stopwords("english")),
removeNumbers = TRUE) )
# convert as matrix
tdm = as.matrix(tdm)
# get word counts in decreasing order
word_freqs = sort(rowSums(tdm), decreasing=TRUE)
# create a data frame with words and their frequencies
dm = data.frame(word=names(word_freqs), freq=word_freqs)
#remove the top words which we don't want to generate insights such as "the", "a", "and", etc.
word_freqs = word_freqs[-(1:9)]
#Plot corpus in a clored graph; need RColorBrewer package
wordcloud(head(dm$word, 100), head(dm$freq, 100), random.order=FALSE, colors=brewer.pal(8, "Dark2"))
#check top 50 most mentioned words
head(word_freqs, 50)
## looking can prized now adult
## 258 232 219 196 187
## petra game jeneva found trees
## 187 184 174 167 166
## points car rthere bit video
## 142 142 140 140 139
## sponsorship needing shook rewards gotas
## 139 138 138 137 137
## betty play get mobile win
## 125 121 121 116 116
## fruit kathryn career farm use
## 113 112 111 110 109
## check spring won hand hat
## 107 106 95 94 93
## grew black farmvilleonweb crafting king
## 93 92 89 88 88
## county players new horse farmville
## 88 84 84 83 83
## nesting online dolls market ribbon
## 83 82 82 82 81
pos.words = scan('positive-words.txt', what='character', comment.char=';')
neg.words = scan('negative-words.txt', what='character', comment.char=';')
neg.words = c(neg.words, 'wtf', 'fail')
#Implementing our sentiment scoring algorithm
require(plyr)
## Loading required package: plyr
require(stringr)
## Loading required package: stringr
score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
{
# we got a vector of sentences. plyr will handle a list
# or a vector as an "l" for us
# we want a simple array of scores back, so we use
# "l" + "a" + "ply" = "laply":
scores = laply(sentences, function(sentence, pos.words, neg.words) {
# clean up sentences with R's regex-driven global substitute, gsub():
sentence = gsub('[[:punct:]]', '', sentence)
sentence = gsub('[[:cntrl:]]', '', sentence)
sentence = gsub('\\d+', '', sentence)
# and convert to lower case:
sentence = tolower(sentence)
# split into words. str_split is in the stringr package
word.list = str_split(sentence, '\\s+')
# sometimes a list() is one level of hierarchy too much
words = unlist(word.list)
# compare our words to the dictionaries of positive & negative terms
pos.matches = match(words, pos.words)
neg.matches = match(words, neg.words)
# match() returns the position of the matched term or NA
# we just want a TRUE/FALSE:
pos.matches = !is.na(pos.matches)
neg.matches = !is.na(neg.matches)
# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
score = sum(pos.matches) - sum(neg.matches)
return(score)
}, pos.words, neg.words, .progress=.progress )
scores.df = data.frame(score=scores, text=sentences)
return(scores.df)
}
sentiment.scores= score.sentiment(zyngaTweets, pos.words, neg.words, .progress='none')
score <- sentiment.scores$score
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following objects are masked from 'package:plyr':
##
## arrange, mutate, rename, summarise
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p <- plot_ly(x = ~score, type = "histogram")
p
zynga$days <- weekdays(as.POSIXlt(zynga$created))
Involvement <- zynga$days
library(plotly)
p <- plot_ly(x = ~Involvement, type = "histogram")
p
For a game company to increase their active users, they have to increase the communication between the players and between them. Zynga is not the exception. One of the techniques to increase users are the incentives, almost all people love incentives, and for Zynga one of the largest games companies, the amount of good users are more important than other kind of business or they can lose everything.
Incentives to increase Active users on Zynga:
Provide gift for those who are inactive and comeback to the games.
Provide daily gift, for those who want to achieve something in the game, they will continue to login to redeem an especific item.
Reach those who are actually active and give them a challenge event during the weekend with prizes and maybe an increase of bonuses points not only for who participate in the event, but also for those who are active during the weekend.