data file

df<-read.csv('/Users/jonathanbouchet/Desktop/WORK/PROJECT/gaming/IGN/ign.csv',sep=',')
#fix one entry having release_year = 1970
df[df$release_year<1980,]
##       X score_phrase                                              title
## 517 516        Great The Walking Dead: The Game -- Episode 1: A New Day
##                                                            url platform
## 517 /games/the-walking-dead-season-1-episode-1/xbox-360-135866 Xbox 360
##     score     genre editors_choice release_year release_month release_day
## 517   8.5 Adventure              N         1970             1           1
df[df$release_year<1980,'release_year']<-2012
df[df$release_year<1980,'release_month']<-4
df[df$release_year<1980,'release_day']<-27
library(ggplot2)
library(gridExtra)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(circlize)

data preparation

I define new vectors to summarize the genres and platforms.

#platform
sony<-c('PlayStation','PlayStation 2','PlayStation 3','PlayStation 4' ,'PlayStation Portable','PlayStation Vita')
micro<-c('Xbox','Xbox 360','Xbox One')
bigN<-c('Game Boy','Game Boy Advance','Game Boy Color' ,'GameCube', 'NES', 'New Nintendo 3DS' ,'Nintendo 3DS','Nintendo 64', 'Nintendo 64DD','Nintendo DS', 'Nintendo DSi','Super NES','Wii','Wii U')
sega<-c('Dreamcast','Dreamcast VMU','Genesis', 'Master System','Saturn' ,'Sega 32X','Sega CD')
bandai<-c('WonderSwan','WonderSwan Color')
nokia<-c('N-Gage')
neogeo<-c('NeoGeo','NeoGeo Pocket Color')
nec<-c('TurboGrafx-16','TurboGrafx-CD')
atari<-c('Atari 5200','Atari 2600','Lynx')
apple<-c('iPad','iPhone','iPod','Macintosh')
win<-c('Pocket PC','PC','Windows Phone','Windows Surface')
and<-c('Android','Ouya')
lin<-c('SteamOS','linux')

#genre
action<-c('Action','Action, Adventure','Action, Compilation','Action, Platformer','Action, Puzzle','Action, RPG','Action, Simulation','Action, Editor','Action, Strategy')
adventure<-c('Adventure','Adventure, Adventure','Adventure, Compilation','Adventure, Episodic','Adventure, Platformer','Adventure, RPG','Adventure, Adult')
sport<-c('Baseball','Sports','Sports, Action','Sports, Baseball','Sports, Compilation','Sports, Racing','Sports, Simulation','Sports, Editor','Sports, Fighting','Sports, Golf','Sports, Other','Sports, Party')
fighting<-c('Fighting','Fighting, Action','Fighting, Adventure','Fighting, Compilation','Fighting, RPG','Fighting, Simulation')
platform<-c('Platformer','Platformer, Action','Platformer, Adventure')
racing<-c('Racing','Racing, Action','Racing, Shooter','Racing, Simulation','Racing, Editor','Racing, Compilation')
rpg<-c('RPG','RPG, Editor','RPG, Simulation','RPG, Action','RPG, Compilation')
sim<-c('Simulation','Simulation, Adventure')
music<-c('Music','Music, Action','Music, Compilation','Music, Editor','Music, Adventure','Music, RPG')
puzzle<-c('Puzzle','Puzzle, Action','Puzzle, Compilation','Puzzle, Adventure','Puzzle, Platformer','Puzzle, RPG','Puzzle, Word Game','Puzzle, Compilation')
strategy<-c('Strategy','Strategy, RPG','Strategy, Compilation','Strategy, Simulation')
edu<-c('Educational','Educational, Action','Educational, Puzzle','Educational, Trivia','Educational, Adventure','Educational, Card','Educational, Productivity','Educational, Simulation')
flight<-c('Flight','Flight, Action','Flight, Racing','Flight, Simulation')
shooter<-c('Shooter','Shooter, Platformer','Shooter, RPG','Shooter, Adventure','Shooter, First-Person')
party<-c('Party')
card<-c('Card','Card, Battle','Card, Compilation','Card, RPG')
pinball<-c('Pinball','Pinball, Compilation')
compil<-c('Compilation','Compilation, RPG')
hunting<-c('Hunting','Hunting, Action','Hunting, Simulation')
wrestling<-c('Wrestling','Wrestling, Simulation')
prod<-c('Productivity','Productivity, Action')

I create new columns for these 2 new features

newManufacturer<-function(x){
    if (x %in% sony == TRUE) {return('SONY')}
    else if(x %in% micro == TRUE) {return('MICROSOFT')}
    else if(x %in% bigN == TRUE) {return('NINTENDO')}
    else if(x %in% sega == TRUE) {return('SEGA')}
    else if(x %in% bandai == TRUE) {return('BANDAI')}
    else if(x %in% nokia == TRUE) {return('NOKIA')}
    else if(x %in% neogeo == TRUE) {return('NEOGEO')}
    else if(x %in% nec == TRUE) {return('NEC')}
    else if(x %in% atari == TRUE) {return('ATARI')}
    else if(x %in% apple == TRUE) {return('APPLE')}
    else if(x %in% win == TRUE) {return('WINDOWS')}
    else if(x %in% and == TRUE) {return('ANDROID')}
    else if(x %in% lin == TRUE) {return('LINUX')}
    else{return('OTHER')}
}

genreBetter<-function(x){
    if(x %in% action == TRUE) {return('ACTION')}
    else if(x %in% adventure == TRUE) {return('ADVENTURE')}
    else if(x %in% sport == TRUE) {return('SPORTS')}
    else if(x %in% fighting == TRUE) {return('FIGHTING')}
    else if(x %in% platform == TRUE) {return('PLATFORM')}
    else if(x %in% racing == TRUE) {return('RACING')}
    else if(x %in% rpg == TRUE) {return('RPG')}
    else if(x %in% sim == TRUE) {return('SIMULATION')}
    else if(x %in% music == TRUE) {return('MUSICAL')}
    else if(x %in% puzzle == TRUE) {return('PUZZLE')}
    else if(x %in% strategy == TRUE) {return('STRATEGY')}
    else if(x %in% edu == TRUE) {return('EDUCATIONAL')}
    else if(x %in% flight == TRUE) {return('FLIGHT')}
    else if(x %in% shooter == TRUE) {return('SHOOTER')}
    else if(x %in% party == TRUE) {return('PARTY')}
    else if(x %in% card == TRUE) {return('CARD')}
    else if(x %in% pinball == TRUE) {return('PINBALL')}
    else if(x %in% compil == TRUE) {return('COMPIL')}
    else if(x %in% prod == TRUE) {return('PRODUCTIVITY')}
    else if(x %in% wrestling == TRUE) {return('WRESTLING')}
    else if(x %in% hunting == TRUE) {return('HUNTING')}
    else{return('OTHER')}
}
df$newGenre<-sapply(df$genre, genreBetter)
df$newPlatform<-sapply(df$platform, newManufacturer)

summary plots

With these plots we can have an estimate of the most popular genres, as well as the most productive years since the lat 20 years. It appears that ACTION, SPORT, SHOOTER games are the most popular through all consoles. SHOOTER games are less presnet for NINTENDO consoles though. 2008 appears to be the golden Year, with a large part due to NINTENDO (Wii console).

ggplot(df,aes(x=reorder(newGenre,newGenre,function(x)-length(x)))) + geom_bar(aes(fill=newPlatform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Genre') + theme(legend.title=element_blank()) 

ggplot(df,aes(x=factor(release_year))) + geom_bar(aes(fill=newPlatform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Year') + theme(legend.title=element_blank())

ggplot(df,aes(x=factor(release_year))) + geom_bar(aes(fill=newGenre),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Year') + theme(legend.title=element_blank())

Sony

sonyGenre<-ggplot(df[df$newPlatform=='SONY',],aes(x=reorder(newGenre,newGenre,function(x)-length(x)))) + geom_bar(aes(fill=platform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Genre') +theme(legend.position=c(.7, .7))

sonyYear<-ggplot(df[df$newPlatform=='SONY',],aes(x=factor(release_year))) + geom_bar(aes(fill=platform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Year') + theme(legend.position = "none")

grid.arrange(sonyGenre, sonyYear,ncol=2)

Microsoft

microsoftGenre<-ggplot(df[df$newPlatform=='MICROSOFT',],aes(x=reorder(newGenre,newGenre,function(x)-length(x)))) + geom_bar(aes(fill=platform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Genre') +theme(legend.position=c(.7, .7))

microsoftYear<-ggplot(df[df$newPlatform=='MICROSOFT',],aes(x=factor(release_year))) + geom_bar(aes(fill=platform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Year') + theme(legend.position = "none")

grid.arrange(microsoftGenre, microsoftYear,ncol=2)

Nintendo

nintendoGenre<-ggplot(df[df$newPlatform=='NINTENDO',],aes(x=reorder(newGenre,newGenre,function(x)-length(x)))) + geom_bar(aes(fill=platform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Genre') +theme(legend.position=c(.8, .6))

nintendoYear<-ggplot(df[df$newPlatform=='NINTENDO',],aes(x=factor(release_year))) + geom_bar(aes(fill=platform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Year') + theme(legend.position = "none")

grid.arrange(nintendoGenre, nintendoYear,ncol=2)

Sega

segaGenre<-ggplot(df[df$newPlatform=='SEGA',],aes(x=reorder(newGenre,newGenre,function(x)-length(x)))) + geom_bar(aes(fill=platform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Genre') +theme(legend.position=c(.7, .7))

segaYear<-ggplot(df[df$newPlatform=='SEGA',],aes(x=factor(release_year))) + geom_bar(aes(fill=platform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Year') + theme(legend.position = "none")

grid.arrange(segaGenre, segaYear,ncol=2)

Apple

appleGenre<-ggplot(df[df$newPlatform=='APPLE',],aes(x=reorder(newGenre,newGenre,function(x)-length(x)))) + geom_bar(aes(fill=platform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Genre') +theme(legend.position=c(.8, .6))

appleYear<-ggplot(df[df$newPlatform=='APPLE',],aes(x=factor(release_year))) + geom_bar(aes(fill=platform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Year') + theme(legend.position = "none")

grid.arrange(appleGenre, appleYear,ncol=2)

Bandai

bandaiGenre<-ggplot(df[df$newPlatform=='BANDAI',],aes(x=reorder(newGenre,newGenre,function(x)-length(x)))) + geom_bar(aes(fill=platform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Genre') +theme(legend.position=c(.8, .6))

bandaiYear<-ggplot(df[df$newPlatform=='BANDAI',],aes(x=factor(release_year))) + geom_bar(aes(fill=platform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Year') + theme(legend.position = "none")

grid.arrange(bandaiGenre, bandaiYear,ncol=2)

Nec

necGenre<-ggplot(df[df$newPlatform=='NEC',],aes(x=reorder(newGenre,newGenre,function(x)-length(x)))) + geom_bar(aes(fill=platform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Genre') +theme(legend.position=c(.8, .6))

necYear<-ggplot(df[df$newPlatform=='NEC',],aes(x=factor(release_year))) + geom_bar(aes(fill=platform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Year') + theme(legend.position = "none")

grid.arrange(necGenre, necYear,ncol=2)

NeoGeo

neogeoGenre<-ggplot(df[df$newPlatform=='NEOGEO',],aes(x=reorder(newGenre,newGenre,function(x)-length(x)))) + geom_bar(aes(fill=platform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Genre') +theme(legend.position=c(.8, .6))

neogeoYear<-ggplot(df[df$newPlatform=='NEOGEO',],aes(x=factor(release_year))) + geom_bar(aes(fill=platform),color='black') + theme(axis.text.x = element_text(angle=90, hjust=1)) + xlab('Year') + theme(legend.position = "none")

grid.arrange(neogeoGenre, neogeoYear,ncol=2)

Chord Diagrams

Chord Diagram is useful to represent relationship between data into a matrix. In R, we can use the circlize package to mkae chordDiagram.

Sony

Below is a representation of the 5 best genres for SONY consoles.

#make an array of all the genres -dcreasing order- for sony consoles
bestSony<-as.data.frame(sort(table(df[df$newPlatform=='SONY',c('newGenre')]),decreasing=TRUE))

#select the 5 top
bestSonyTop<-as.vector(bestSony[1:5,1])

#subset data from the initial DF and count the number of occurences
d1<-subset(df,newPlatform=='SONY' & newGenre %in% bestSonyTop)
t1<-aggregate( genre ~ platform + newGenre, d1, FUN = length)

#plot the chorDiagram
chordSony<-chordDiagram(t1)

Microsoft

Below is a representation of the 5 best genres for MICORSOFT consoles.

#for microsoft
bestMicro<-as.data.frame(sort(table(df[df$newPlatform=='MICROSOFT',c('newGenre')]),decreasing=TRUE))
bestMicroTop<-as.vector(bestMicro[1:5,1])
d2<-subset(df,newPlatform=='MICROSOFT' & newGenre %in% bestMicroTop)
t2<-aggregate( genre ~ platform + newGenre, d2, FUN = length)
chordMicro<-chordDiagram(t2)

#grid.arrange(chordSony,chordMicro,ncol=2)