Install required packages

#install the necessary packages
# install.packages("readr")
# install.packages("plyr")
# install.packages("stringr")
# install.packages("stringi")
# install.packages("magrittr")
# install.packages("dplyr")
# install.packages('tm')
# install.packages('RColorBrewer')
# install.packages('wordcloud')
# install.packages('plotly')
# install.packages("ggplot2")
# install.packages("twitteR")

Clean Data

library('readr')
zynga <- readRDS("Zynga.rds")
zyngaTweets <- zynga$text

#********************************************
#         Clean tweets
#********************************************
#use this function to clean the tweets
clean.text = function(x)
{
  # remove unicode 
  x = gsub("[^\x20-\x7E]", " ",x)
  # remove rt
  x = gsub("rt", "", x)
  # remove at
  x = gsub("@\\w+", "", x)
  # remove punctuation
  x = gsub("[[:punct:]]", "", x)
  # remove numbers
  x = gsub("[[:digit:]]", "", x)
  # remove links http
  x = gsub("http\\w+", "", x)
  # remove tabs
  x = gsub("[ |\t]{2,}", "", x)
  # remove blank spaces at the beginning
  x = gsub("^ ", "", x)
  # remove blank spaces at the end
  x = gsub(" $", "", x)
  # tolower
  x = tolower(x)
  return(x)
}
# clean tweets
zyngaTweets = clean.text(zyngaTweets)

Word Cloud of Zynga Users Tweets

require(tm)
## Loading required package: tm
## Loading required package: NLP
require(wordcloud)
## Loading required package: wordcloud
## Loading required package: RColorBrewer
require(RColorBrewer)

corpus = Corpus(VectorSource(zyngaTweets))
# corpus = Corpus(VectorSource(cmail))
# create term-document matrix
tdm = TermDocumentMatrix(
  corpus,
  control = list(
    wordLengths=c(3,40),
    removePunctuation = TRUE,
    stopwords = c("the", "a", stopwords("english")),
    removeNumbers = TRUE) )

# convert as matrix
tdm = as.matrix(tdm)

# get word counts in decreasing order
word_freqs = sort(rowSums(tdm), decreasing=TRUE) 

# create a data frame with words and their frequencies
dm = data.frame(word=names(word_freqs), freq=word_freqs)

#remove the top words which we don't want to generate insights such as "the", "a", "and", etc.
word_freqs = word_freqs[-(1:9)]

#Plot corpus in a clored graph; need RColorBrewer package
wordcloud(head(dm$word, 100), head(dm$freq, 100), random.order=FALSE, colors=brewer.pal(8, "Dark2"))

#check top 50 most mentioned words
head(word_freqs, 50)
##        looking            can         prized            now          adult 
##            258            232            219            196            187 
##          petra           game         jeneva          found          trees 
##            187            184            174            167            166 
##         points            car         rthere            bit          video 
##            142            142            140            140            139 
##    sponsorship        needing          shook        rewards          gotas 
##            139            138            138            137            137 
##          betty           play            get         mobile            win 
##            125            121            121            116            116 
##          fruit        kathryn         career           farm            use 
##            113            112            111            110            109 
##          check         spring            won           hand            hat 
##            107            106             95             94             93 
##           grew          black farmvilleonweb       crafting           king 
##             93             92             89             88             88 
##         county        players            new          horse      farmville 
##             88             84             84             83             83 
##        nesting         online          dolls         market         ribbon 
##             83             82             82             82             81

Sentiment Analysis of Zynga Users

pos.words = scan('positive-words.txt', what='character', comment.char=';')
neg.words = scan('negative-words.txt', what='character', comment.char=';')

neg.words = c(neg.words, 'wtf', 'fail')

#Implementing our sentiment scoring algorithm
require(plyr)
## Loading required package: plyr
require(stringr)
## Loading required package: stringr
score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
{
  
  # we got a vector of sentences. plyr will handle a list
  # or a vector as an "l" for us
  # we want a simple array of scores back, so we use
  # "l" + "a" + "ply" = "laply":
  scores = laply(sentences, function(sentence, pos.words, neg.words) {
    
    # clean up sentences with R's regex-driven global substitute, gsub():
    sentence = gsub('[[:punct:]]', '', sentence)
    sentence = gsub('[[:cntrl:]]', '', sentence)
    sentence = gsub('\\d+', '', sentence)
    # and convert to lower case:
    sentence = tolower(sentence)
    
    # split into words. str_split is in the stringr package
    word.list = str_split(sentence, '\\s+')
    # sometimes a list() is one level of hierarchy too much
    words = unlist(word.list)
    
    # compare our words to the dictionaries of positive & negative terms
    pos.matches = match(words, pos.words)
    neg.matches = match(words, neg.words)
    
    # match() returns the position of the matched term or NA
    # we just want a TRUE/FALSE:
    pos.matches = !is.na(pos.matches)
    neg.matches = !is.na(neg.matches)
    
    # and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
    score = sum(pos.matches) - sum(neg.matches)
    
    return(score)
  }, pos.words, neg.words, .progress=.progress )
  
  scores.df = data.frame(score=scores, text=sentences)
  return(scores.df)
}

sentiment.scores= score.sentiment(zyngaTweets, pos.words, neg.words, .progress='none')
score <- sentiment.scores$score

library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
## 
##     annotate
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following objects are masked from 'package:plyr':
## 
##     arrange, mutate, rename, summarise
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p <- plot_ly(x = ~score, type = "histogram")
p

Zynga Users Involvement On Each Weekday

zynga$days <- weekdays(as.POSIXlt(zynga$created))
Involvement <- zynga$days

library(plotly)
p <- plot_ly(x = ~Involvement, type = "histogram")
p

Recommendations For Zynga to Increase Active Users

For a game company to increase their active users, they have to increase the communication between the players and between them. Zynga is not the exception. One of the techniques to increase users are the incentives, almost all people love incentives, and for Zynga one of the largest games companies, the amount of good users are more important than other kind of business or they can lose everything.

Incentives to increase Active users on Zynga: