->Reading dataset into R
vgame1.df <- read.csv(paste("Video Game Sales Data.csv.csv", sep=""))
View(vgame1.df)
-Dimensions of the dataset
dim(vgame1.df)
## [1] 16719 16
-Visualizing the data , Frequency count based on the year of release of a video game
table(vgame1.df$Year_of_Release)
##
## 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994
## 9 46 36 17 14 14 21 16 15 17 16 41 43 62 121
## 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009
## 219 263 289 379 338 350 482 829 775 762 939 1006 1197 1427 1426
## 2010 2011 2012 2013 2014 2015 2016 2017 2020 N/A
## 1255 1136 653 544 581 606 502 3 1 269
->Cleaning the data
-1) Viewing the subset-1 containing data of only year 2016
vgame2016<- vgame1.df[which(vgame1.df$Year_of_Release=="2016"), ]
View(vgame2016)
-Dimensions of subset-1(year 2016)
dim(vgame2016)
## [1] 502 16
-Summarizing the subset-1 (year 2016)
library(psych)
describe(vgame2016)
## vars n mean sd median trimmed mad min
## Name* 1 502 5739.26 3456.23 5894.00 5739.69 4452.25 3.00
## Platform* 2 502 19.83 7.47 19.00 20.43 2.97 3.00
## Year_of_Release* 3 502 37.00 0.00 37.00 37.00 0.00 37.00
## Genre* 4 502 5.85 3.89 5.00 5.53 4.45 2.00
## Publisher* 5 502 309.33 178.72 354.00 315.46 231.29 7.00
## NA_Sales 6 502 0.09 0.26 0.01 0.03 0.01 0.00
## EU_Sales 7 502 0.10 0.36 0.01 0.03 0.01 0.00
## JP_Sales 8 502 0.04 0.13 0.00 0.02 0.00 0.00
## Other_Sales 9 502 0.03 0.09 0.00 0.01 0.00 0.00
## Global_Sales 10 502 0.26 0.70 0.06 0.11 0.07 0.01
## Critic_Score 11 232 73.16 11.74 74.50 74.28 11.12 31.00
## Critic_Count 12 232 30.25 23.89 22.00 26.98 19.27 4.00
## User_Score* 13 502 41.07 36.42 49.50 39.55 52.63 1.00
## User_Count 14 262 264.97 671.24 57.00 126.51 69.68 5.00
## Developer* 15 502 545.22 580.25 378.00 484.34 558.94 1.00
## Rating* 16 502 3.86 3.13 3.00 3.58 2.97 1.00
## max range skew kurtosis se
## Name* 11536.00 11533.00 -0.04 -1.25 154.26
## Platform* 31.00 28.00 -0.45 0.36 0.33
## Year_of_Release* 37.00 0.00 NaN NaN 0.00
## Genre* 13.00 11.00 0.41 -1.45 0.17
## Publisher* 576.00 569.00 -0.26 -1.32 7.98
## NA_Sales 2.98 2.98 5.69 42.87 0.01
## EU_Sales 5.75 5.75 9.76 128.76 0.02
## JP_Sales 2.26 2.26 11.51 169.13 0.01
## Other_Sales 1.11 1.11 6.80 57.74 0.00
## Global_Sales 7.59 7.58 6.52 52.98 0.03
## Critic_Score 93.00 62.00 -0.85 0.64 0.77
## Critic_Count 113.00 109.00 1.12 0.66 1.57
## User_Score* 97.00 96.00 0.01 -1.69 1.63
## User_Count 7064.00 7059.00 6.43 52.11 41.47
## Developer* 1671.00 1670.00 0.60 -1.15 25.90
## Rating* 9.00 8.00 0.57 -1.29 0.14
-One-way Contingency tables of subset-1( year 2016)
mytable1<- with(vgame2016,table(Genre))
mytable1
## Genre
## Action Adventure Fighting Misc
## 0 178 56 16 32
## Platform Puzzle Racing Role-Playing Shooter
## 15 1 24 54 47
## Simulation Sports Strategy
## 18 48 13
mytable2<- with(vgame2016,table(Platform))
mytable2
## Platform
## 2600 3DO 3DS DC DS GB GBA GC GEN GG N64 NES NG PC PCFX
## 0 0 46 0 0 0 0 0 0 0 0 0 0 54 0
## PS PS2 PS3 PS4 PSP PSV SAT SCD SNES TG16 Wii WiiU WS X360 XB
## 0 0 38 164 0 85 0 0 0 0 1 14 0 13 0
## XOne
## 87
mytable3<- with(vgame2016,table(Rating))
mytable3
## Rating
## AO E E10+ EC K-A M RP T
## 222 0 66 50 0 0 78 0 86
-Two-way Contingency tables of subset-1( year 2016)
mytable<-xtabs(~ Genre+Rating,data=vgame2016)
mytable
## Rating
## Genre AO E E10+ EC K-A M RP T
## 0 0 0 0 0 0 0 0 0
## Action 88 0 9 20 0 0 34 0 27
## Adventure 38 0 0 2 0 0 10 0 6
## Fighting 5 0 0 1 0 0 0 0 10
## Misc 16 0 5 6 0 0 1 0 4
## Platform 2 0 2 8 0 0 0 0 3
## Puzzle 0 0 0 1 0 0 0 0 0
## Racing 7 0 17 0 0 0 0 0 0
## Role-Playing 31 0 1 2 0 0 8 0 12
## Shooter 5 0 0 7 0 0 25 0 10
## Simulation 10 0 6 0 0 0 0 0 2
## Sports 12 0 26 2 0 0 0 0 8
## Strategy 8 0 0 1 0 0 0 0 4
mytable11<-xtabs(~ Genre+Platform,data=vgame2016)
mytable11
## Platform
## Genre 2600 3DO 3DS DC DS GB GBA GC GEN GG N64 NES NG PC PCFX PS
## 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Action 0 0 22 0 0 0 0 0 0 0 0 0 0 7 0 0
## Adventure 0 0 5 0 0 0 0 0 0 0 0 0 0 5 0 0
## Fighting 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0
## Misc 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0
## Platform 0 0 2 0 0 0 0 0 0 0 0 0 0 1 0 0
## Puzzle 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Racing 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0
## Role-Playing 0 0 7 0 0 0 0 0 0 0 0 0 0 4 0 0
## Shooter 0 0 0 0 0 0 0 0 0 0 0 0 0 9 0 0
## Simulation 0 0 3 0 0 0 0 0 0 0 0 0 0 8 0 0
## Sports 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0
## Strategy 0 0 1 0 0 0 0 0 0 0 0 0 0 8 0 0
## Platform
## Genre PS2 PS3 PS4 PSP PSV SAT SCD SNES TG16 Wii WiiU WS X360 XB
## 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Action 0 13 59 0 35 0 0 0 0 0 6 0 3 0
## Adventure 0 2 14 0 25 0 0 0 0 0 1 0 1 0
## Fighting 0 2 7 0 2 0 0 0 0 0 1 0 0 0
## Misc 0 6 10 0 3 0 0 0 0 1 2 0 1 0
## Platform 0 1 5 0 0 0 0 0 0 0 2 0 1 0
## Puzzle 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## Racing 0 0 9 0 0 0 0 0 0 0 0 0 0 0
## Role-Playing 0 5 18 0 15 0 0 0 0 0 1 0 0 0
## Shooter 0 1 20 0 0 0 0 0 0 0 1 0 1 0
## Simulation 0 0 4 0 1 0 0 0 0 0 0 0 0 0
## Sports 0 8 16 0 2 0 0 0 0 0 0 0 6 0
## Strategy 0 0 2 0 1 0 0 0 0 0 0 0 0 0
## Platform
## Genre XOne
## 0
## Action 33
## Adventure 3
## Fighting 2
## Misc 4
## Platform 3
## Puzzle 0
## Racing 9
## Role-Playing 4
## Shooter 15
## Simulation 2
## Sports 11
## Strategy 1
mytable12<-xtabs(~ Rating+Platform,data=vgame2016)
mytable12
## Platform
## Rating 2600 3DO 3DS DC DS GB GBA GC GEN GG N64 NES NG PC PCFX PS PS2 PS3
## 0 0 32 0 0 0 0 0 0 0 0 0 0 17 0 0 0 24
## AO 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## E 0 0 5 0 0 0 0 0 0 0 0 0 0 12 0 0 0 4
## E10+ 0 0 4 0 0 0 0 0 0 0 0 0 0 2 0 0 0 4
## EC 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## K-A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## M 0 0 1 0 0 0 0 0 0 0 0 0 0 14 0 0 0 1
## RP 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## T 0 0 4 0 0 0 0 0 0 0 0 0 0 9 0 0 0 5
## Platform
## Rating PS4 PSP PSV SAT SCD SNES TG16 Wii WiiU WS X360 XB XOne
## 61 0 69 0 0 0 0 0 2 0 1 0 16
## AO 0 0 0 0 0 0 0 0 0 0 0 0 0
## E 20 0 0 0 0 0 0 0 2 0 5 0 18
## E10+ 16 0 4 0 0 0 0 1 7 0 4 0 8
## EC 0 0 0 0 0 0 0 0 0 0 0 0 0
## K-A 0 0 0 0 0 0 0 0 0 0 0 0 0
## M 32 0 2 0 0 0 0 0 0 0 1 0 27
## RP 0 0 0 0 0 0 0 0 0 0 0 0 0
## T 35 0 10 0 0 0 0 0 3 0 2 0 18
-2) Viewing the subset-2 containing data of year 2015
vgame2015<- vgame1.df[which(vgame1.df$Year_of_Release=="2015"), ]
View(vgame2015)
-Dimensions of subset-2( year 2015)
dim(vgame2015)
## [1] 606 16
-Summarizing the subset-2 (year 2015)
library(psych)
describe(vgame2015)
## vars n mean sd median trimmed mad min
## Name* 1 606 5816.44 3367.04 5832.00 5832.54 4175.00 4.00
## Platform* 2 606 19.14 8.21 19.00 19.67 2.97 3.00
## Year_of_Release* 3 606 36.00 0.00 36.00 36.00 0.00 36.00
## Genre* 4 606 5.50 3.88 3.00 5.10 1.48 2.00
## Publisher* 5 606 309.81 178.25 354.00 316.18 231.29 7.00
## NA_Sales 6 606 0.18 0.49 0.02 0.07 0.03 0.00
## EU_Sales 7 606 0.16 0.48 0.02 0.06 0.03 0.00
## JP_Sales 8 606 0.06 0.19 0.01 0.02 0.01 0.00
## Other_Sales 9 606 0.05 0.16 0.01 0.02 0.01 0.00
## Global_Sales 10 606 0.44 1.10 0.09 0.20 0.10 0.01
## Critic_Score 11 225 72.87 12.44 74.00 74.05 10.38 19.00
## Critic_Count 12 225 32.31 24.35 26.00 29.15 22.24 4.00
## User_Score* 13 606 38.71 36.44 42.00 36.61 60.79 1.00
## User_Count 14 297 393.37 1166.83 65.00 139.76 78.58 4.00
## Developer* 15 606 522.15 603.88 180.00 453.34 265.39 1.00
## Rating* 16 606 3.64 3.13 3.00 3.31 2.97 1.00
## max range skew kurtosis se
## Name* 11534.00 11530.00 -0.02 -1.20 136.78
## Platform* 31.00 28.00 -0.56 -0.13 0.33
## Year_of_Release* 36.00 0.00 NaN NaN 0.00
## Genre* 13.00 11.00 0.58 -1.28 0.16
## Publisher* 574.00 567.00 -0.28 -1.23 7.24
## NA_Sales 6.03 6.03 6.17 51.98 0.02
## EU_Sales 6.12 6.12 7.64 76.86 0.02
## JP_Sales 2.79 2.79 9.42 109.84 0.01
## Other_Sales 2.38 2.38 8.20 93.11 0.01
## Global_Sales 14.63 14.62 6.40 58.93 0.04
## Critic_Score 96.00 77.00 -1.36 3.33 0.83
## Critic_Count 103.00 99.00 1.00 0.21 1.62
## User_Score* 97.00 96.00 0.12 -1.68 1.48
## User_Count 10665.00 10661.00 6.02 42.34 67.71
## Developer* 1677.00 1676.00 0.67 -1.15 24.53
## Rating* 9.00 8.00 0.73 -1.09 0.13
-One-way Contingency tables of subset-2( year 2015)
mytable1<- with(vgame2015,table(Genre))
mytable1
## Genre
## Action Adventure Fighting Misc
## 0 253 54 21 39
## Platform Puzzle Racing Role-Playing Shooter
## 13 6 18 78 34
## Simulation Sports Strategy
## 15 59 16
mytable2<- with(vgame2015,table(Platform))
mytable2
## Platform
## 2600 3DO 3DS DC DS GB GBA GC GEN GG N64 NES NG PC PCFX
## 0 0 86 0 0 0 0 0 0 0 0 0 0 50 0
## PS PS2 PS3 PS4 PSP PSV SAT SCD SNES TG16 Wii WiiU WS X360 XB
## 0 0 73 137 3 110 0 0 0 0 4 28 0 35 0
## XOne
## 80
mytable3<- with(vgame2015,table(Rating))
mytable3
## Rating
## AO E E10+ EC K-A M RP T
## 291 0 87 51 0 0 71 0 106
-Two-way Contingency tables of subset-2(year 2015)
mytable<-xtabs(~ Genre+Rating,data=vgame2015)
mytable
## Rating
## Genre AO E E10+ EC K-A M RP T
## 0 0 0 0 0 0 0 0 0
## Action 132 0 19 29 0 0 35 0 38
## Adventure 40 0 0 1 0 0 7 0 6
## Fighting 7 0 0 0 0 0 3 0 11
## Misc 19 0 3 8 0 0 1 0 8
## Platform 2 0 10 1 0 0 0 0 0
## Puzzle 3 0 2 1 0 0 0 0 0
## Racing 7 0 10 1 0 0 0 0 0
## Role-Playing 45 0 1 2 0 0 9 0 21
## Shooter 12 0 0 1 0 0 16 0 5
## Simulation 6 0 4 1 0 0 0 0 4
## Sports 8 0 38 5 0 0 0 0 8
## Strategy 10 0 0 1 0 0 0 0 5
mytable11<-xtabs(~ Genre+Platform,data=vgame2015)
mytable11
## Platform
## Genre 2600 3DO 3DS DC DS GB GBA GC GEN GG N64 NES NG PC PCFX PS
## 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Action 0 0 39 0 0 0 0 0 0 0 0 0 0 16 0 0
## Adventure 0 0 4 0 0 0 0 0 0 0 0 0 0 2 0 0
## Fighting 0 0 2 0 0 0 0 0 0 0 0 0 0 1 0 0
## Misc 0 0 10 0 0 0 0 0 0 0 0 0 0 2 0 0
## Platform 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0
## Puzzle 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0
## Racing 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0
## Role-Playing 0 0 15 0 0 0 0 0 0 0 0 0 0 3 0 0
## Shooter 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0
## Simulation 0 0 3 0 0 0 0 0 0 0 0 0 0 6 0 0
## Sports 0 0 1 0 0 0 0 0 0 0 0 0 0 4 0 0
## Strategy 0 0 4 0 0 0 0 0 0 0 0 0 0 8 0 0
## Platform
## Genre PS2 PS3 PS4 PSP PSV SAT SCD SNES TG16 Wii WiiU WS X360 XB
## 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Action 0 35 52 3 54 0 0 0 0 3 11 0 13 0
## Adventure 0 7 10 0 21 0 0 0 0 0 0 0 4 0
## Fighting 0 5 9 0 0 0 0 0 0 0 0 0 1 0
## Misc 0 3 4 0 5 0 0 0 0 1 7 0 2 0
## Platform 0 0 2 0 2 0 0 0 0 0 4 0 1 0
## Puzzle 0 0 1 0 0 0 0 0 0 0 1 0 0 0
## Racing 0 2 5 0 1 0 0 0 0 0 0 0 1 0
## Role-Playing 0 5 25 0 20 0 0 0 0 0 3 0 0 0
## Shooter 0 3 11 0 0 0 0 0 0 0 1 0 3 0
## Simulation 0 1 2 0 1 0 0 0 0 0 0 0 1 0
## Sports 0 12 15 0 4 0 0 0 0 0 1 0 9 0
## Strategy 0 0 1 0 2 0 0 0 0 0 0 0 0 0
## Platform
## Genre XOne
## 0
## Action 27
## Adventure 6
## Fighting 3
## Misc 5
## Platform 0
## Puzzle 0
## Racing 6
## Role-Playing 7
## Shooter 11
## Simulation 1
## Sports 13
## Strategy 1
mytable12<-xtabs(~ Rating+Platform,data=vgame2015)
mytable12
## Platform
## Rating 2600 3DO 3DS DC DS GB GBA GC GEN GG N64 NES NG PC PCFX PS PS2 PS3
## 0 0 57 0 0 0 0 0 0 0 0 0 0 14 0 0 0 39
## AO 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## E 0 0 10 0 0 0 0 0 0 0 0 0 0 9 0 0 0 9
## E10+ 0 0 13 0 0 0 0 0 0 0 0 0 0 3 0 0 0 5
## EC 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## K-A 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## M 0 0 0 0 0 0 0 0 0 0 0 0 0 15 0 0 0 4
## RP 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## T 0 0 6 0 0 0 0 0 0 0 0 0 0 9 0 0 0 16
## Platform
## Rating PS4 PSP PSV SAT SCD SNES TG16 Wii WiiU WS X360 XB XOne
## 52 3 82 0 0 0 0 1 7 0 10 0 26
## AO 0 0 0 0 0 0 0 0 0 0 0 0 0
## E 18 0 4 0 0 0 0 2 11 0 11 0 13
## E10+ 8 0 3 0 0 0 0 1 7 0 4 0 7
## EC 0 0 0 0 0 0 0 0 0 0 0 0 0
## K-A 0 0 0 0 0 0 0 0 0 0 0 0 0
## M 21 0 6 0 0 0 0 0 1 0 4 0 20
## RP 0 0 0 0 0 0 0 0 0 0 0 0 0
## T 38 0 15 0 0 0 0 0 2 0 6 0 14
-> Box plots of different variables independently showing the comaparison in 2016 & 2015
par(mfrow=c(2,1))
with(vgame2016,boxplot(vgame2016$NA_Sales,
main="Boxplot of North America sales in 2016",
col=c("yellow"),
horizontal=TRUE,
xlab="NA sales" ))
with(vgame2015,boxplot(vgame2015$NA_Sales,
main="Boxplot of North America sales in 2015",
col=c("yellow"),
horizontal=TRUE,
xlab="NA sales" ))
par(mfrow=c(2,1))
with(vgame2016, boxplot(vgame2016$EU_Sales,
main="Boxplot of Europe sales in 2016",
col=c("yellow"),xlim=c(0,3),
horizontal=TRUE,
xlab="EU sales" ))
with(vgame2015, boxplot(vgame2015$EU_Sales,
main="Boxplot of Europe sales in 2015",
col=c("yellow"),xlim=c(0,3),
horizontal=TRUE,
xlab="EU sales" ))
par(mfrow=c(2,1))
with(vgame2016, boxplot(vgame2016$JP_Sales,
main="Boxplot of Japan sales in 2016",
col=c("yellow"),
horizontal=TRUE,
xlab="JP sales" ))
with(vgame2015, boxplot(vgame2015$JP_Sales,
main="Boxplot of Japan sales in 2015",
col=c("yellow"),
horizontal=TRUE,
xlab="JP sales" ))
par(mfrow=c(2,1))
with(vgame2016,boxplot(vgame2016$Global_Sales,
main="Boxplot of Global sales in 2016",
col=c("yellow"),
horizontal=TRUE,
xlab="Global sales" ))
with(vgame2015,boxplot(vgame2015$Global_Sales,
main="Boxplot of Global sales in 2015",
col=c("yellow"),
horizontal=TRUE,
xlab="Global sales" ))
par(mfrow=c(2,1))
with(vgame2016,boxplot(vgame2016$Other_Sales,
main="Boxplot of Other sales in 2016",
col=c("yellow"),
horizontal=TRUE,
xlab="Other sales" ))
with(vgame2015,boxplot(vgame2015$Other_Sales,
main="Boxplot of Other sales in 2015",
col=c("yellow"),
horizontal=TRUE,
xlab="Other sales" ))
–Changing User_score variable from factor vestor to numeric vector for year 2016
str(vgame2016)
## 'data.frame': 502 obs. of 16 variables:
## $ Name : Factor w/ 11563 levels "","'98 Koshien",..: 3121 7423 10726 1238 727 10403 3121 1238 3053 727 ...
## $ Platform : Factor w/ 31 levels "2600","3DO","3DS",..: 19 3 19 19 19 19 31 31 19 31 ...
## $ Year_of_Release: Factor w/ 40 levels "1980","1981",..: 37 37 37 37 37 37 37 37 37 37 ...
## $ Genre : Factor w/ 13 levels "","Action","Adventure",..: 12 9 10 10 10 10 12 10 2 10 ...
## $ Publisher : Factor w/ 582 levels "10TACLE Studios",..: 140 371 467 17 140 536 140 17 536 140 ...
## $ NA_Sales : num 0.66 2.98 1.85 1.61 1.1 1.35 0.43 1.46 0.6 1.28 ...
## $ EU_Sales : num 5.75 1.45 2.5 2 2.15 1.7 2.05 0.74 1.25 0.77 ...
## $ JP_Sales : num 0.08 2.26 0.19 0.15 0.21 0.15 0 0 0.06 0 ...
## $ Other_Sales : num 1.11 0.45 0.85 0.71 0.61 0.6 0.17 0.22 0.35 0.2 ...
## $ Global_Sales : num 7.59 7.14 5.38 4.46 4.08 3.8 2.65 2.42 2.26 2.25 ...
## $ Critic_Score : int 85 NA 93 77 88 80 84 78 76 87 ...
## $ Critic_Count : int 41 NA 113 82 31 64 50 17 91 37 ...
## $ User_Score : Factor w/ 97 levels "","0","0.2","0.3",..: 49 1 78 33 83 69 54 30 62 81 ...
## $ User_Count : int 398 NA 7064 1129 809 2219 201 290 635 440 ...
## $ Developer : Factor w/ 1697 levels "","10tacle Studios",..: 455 1 1002 733 440 910 455 733 1561 440 ...
## $ Rating : Factor w/ 9 levels "","AO","E","E10+",..: 3 1 9 7 7 7 3 7 7 7 ...
vgame2016$User_Score<-as.integer(vgame2016$User_Score)
str(vgame2016$User_Score)
## int [1:502] 49 1 78 33 83 69 54 30 62 81 ...
–Changing User_score variable from factor vestor to numeric vector for year 2015
str(vgame2015)
## 'data.frame': 606 obs. of 16 variables:
## $ Name : Factor w/ 11563 levels "","'98 Koshien",..: 1234 3120 9144 1234 2986 10729 3935 9043 2986 10206 ...
## $ Platform : Factor w/ 31 levels "2600","3DO","3DS",..: 19 19 19 31 19 19 31 27 31 19 ...
## $ Year_of_Release: Factor w/ 40 levels "1980","1981",..: 36 36 36 36 36 36 36 36 36 36 ...
## $ Genre : Factor w/ 13 levels "","Action","Adventure",..: 10 12 10 10 9 2 10 10 9 9 ...
## $ Publisher : Factor w/ 582 levels "10TACLE Studios",..: 17 140 140 17 65 467 330 371 65 354 ...
## $ NA_Sales : num 6.03 1.12 2.99 4.59 2.53 2.07 2.78 1.54 2.51 1.02 ...
## $ EU_Sales : num 5.86 6.12 3.49 2.11 3.27 1.71 1.27 1.18 1.32 2.13 ...
## $ JP_Sales : num 0.36 0.06 0.22 0.01 0.24 0.08 0.03 1.46 0.01 0.23 ...
## $ Other_Sales : num 2.38 1.28 1.28 0.68 1.13 0.76 0.41 0.26 0.38 0.59 ...
## $ Global_Sales : num 14.63 8.57 7.98 7.39 7.16 ...
## $ Critic_Score : int NA 82 NA NA 87 86 84 81 88 92 ...
## $ Critic_Count : int NA 42 NA NA 58 78 101 88 39 79 ...
## $ User_Score : Factor w/ 97 levels "","0","0.2","0.3",..: 1 42 1 1 64 80 63 84 61 91 ...
## $ User_Count : int NA 896 NA NA 4228 1264 2438 1184 1749 10179 ...
## $ Developer : Factor w/ 1697 levels "","10tacle Studios",..: 1 452 1 1 176 233 20 1035 176 282 ...
## $ Rating : Factor w/ 9 levels "","AO","E","E10+",..: 1 3 1 1 7 9 9 4 7 7 ...
vgame2015$User_Score<-as.integer(vgame2015$User_Score)
str(vgame2015$User_Score)
## int [1:606] 1 42 1 1 64 80 63 84 61 91 ...
par(mfrow=c(2,2))
with(vgame2016, boxplot(vgame2016$Critic_Score,
main="Boxplot of Critic score in 2016",
col=c("yellow"),
horizontal=TRUE,
xlab="Critic score" ))
with(vgame2016, boxplot(vgame2016$User_Score,
main="Boxplot of User score in 2016",
col=c("yellow"),
horizontal=TRUE,
xlab="User score" ))
with(vgame2015, boxplot(vgame2015$Critic_Score,
main="Boxplot of Critic score in 2015",
col=c("yellow"),
horizontal=TRUE,
xlab="Critic score" ))
with(vgame2015, boxplot(vgame2015$User_Score,
main="Boxplot of User score in 2015",
col=c("yellow"),
horizontal=TRUE,
xlab="User score" ))
par(mfrow=c(2,2))
with(vgame2016, boxplot(vgame2016$Critic_Count,
main="Boxplot of Critic count in 2016",
col=c("yellow"),
horizontal=TRUE,
xlab="Critic count" ))
with(vgame2016, boxplot(vgame2016$User_Count,
main="Boxplot of User count in 2016",
col=c("yellow"),
horizontal=TRUE,
xlab="User count" ))
with(vgame2015, boxplot(vgame2015$Critic_Count,
main="Boxplot of Critic count in 2015",
col=c("yellow"),
horizontal=TRUE,
xlab="Critic count" ))
with(vgame2015, boxplot(vgame2015$User_Count,
main="Boxplot of User count in 2015",
col=c("yellow"),
horizontal=TRUE,
xlab="User count" ))
-> Boxplots of variables correlated pair-wise and comaparison of them based on years 2016 & 2015
par(mfrow=c(2,1))
with(vgame2016,boxplot(vgame2016$Global_Sales ~ vgame2016$Genre, data=vgame2016,
horizontal=TRUE, yaxt="n",
ylab="Genre", xlab="Global sales", col=c("yellow"),
main="Comparison of Global sales based on Genre of the video game in 2016"),
axis(side=2, at=c(1,2,3,4,5,6,7,8,9,10,11,12) ))
with(vgame2015,boxplot(vgame2015$Global_Sales ~ vgame2015$Genre, data=vgame2015,
horizontal=TRUE, yaxt="n",
ylab="Genre", xlab="Global sales", col=c("yellow"),
main="Comparison of Global sales based on Genre of the video game in 2015"),
axis(side=2, at=c(1,2,3,4,5,6,7,8,9,10,11,12) ))
par(mfrow=c(2,1))
with(vgame2016,boxplot(vgame2016$Global_Sales ~ vgame2016$Rating, data=vgame2016,
horizontal=TRUE, yaxt="n",
ylab="Rating", xlab="Global sales", col=c("yellow"),
main="Comparison of Global sales on rating in 2016"),
axis(side=2, at=c(1,2,3,4,5,6,7,8,9,10,11,12) ))
with(vgame2015,boxplot(vgame2015$Global_Sales ~ vgame2015$Rating, data=vgame2015,
horizontal=TRUE, yaxt="n",
ylab="Rating", xlab="Global sales", col=c("yellow"),
main="Comparison of Global sales on rating in 2015"),
axis(side=2, at=c(1,2,3,4,5,6,7,8,9,10,11,12) ))
par(mfrow=c(2,1))
with(vgame2016,boxplot(vgame2016$Critic_Score ~ vgame2016$Rating, data=vgame2016, horizontal=TRUE, yaxt="n",
ylab="Rating", xlab="Critic score", col=c("yellow"),
main="Comparison of Critic score based on Rating in 2016"),
axis(side=2, at=c(1,2,3,4,5,6,7,8,9,10,11,12) ))
with(vgame2015,boxplot(vgame2015$Critic_Score~ vgame2015$Rating, data=vgame2015, horizontal=TRUE, yaxt="n",
ylab="Rating", xlab="Critic score", col=c("yellow"),
main="Comparison of Critic score based on Rating in 2015"),
axis(side=2, at=c(1,2,3,4,5,6,7,8,9,10,11,12) ))
par(mfrow=c(2,1))
with(vgame2016,boxplot(vgame2016$User_Score ~ vgame2016$Rating, data=vgame2016, horizontal=TRUE, yaxt="n",
ylab="Rating", xlab="User score", col=c("yellow"),
main="Comparison of User score based on Rating in 2016"),
axis(side=2, at=c(1,2,3,4,5,6,7,8,9,10,11,12) ))
with(vgame2015,boxplot(vgame2015$User_Score~ vgame2015$Rating, data=vgame2015, horizontal=TRUE, yaxt="n",
ylab="Rating", xlab="User score", col=c("yellow"),
main="Comparison of User score basedon Rating in 2015"),
axis(side=2, at=c(1,2,3,4,5,6,7,8,9,10,11,12) ))
-> Histograms of different variables correlated pair-wise in year 2016
library(lattice)
histogram(~Genre | Rating, data=vgame2016)
histogram(~Genre | Platform, data=vgame2016)
histogram(~Platform | Rating, data=vgame2016)
-> histograms of different variables correlated pair-wise in year 2015
library(lattice)
histogram(~Genre | Rating, data=vgame2015)
histogram(~Genre | Platform, data=vgame2015)
histogram(~Platform | Rating, data=vgame2015)
-> Scatterplots of variables showing comparison in sales in years 2016 & 2015
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(Global_Sales~ Critic_Score, data=vgame2016, spread=FALSE,
smoother.args=list(lty=2),pch=19,
main="Scatterplot of Global Sales vs. Critic score in 2016",
xlab="Critic score",
ylab="Global sales ",cex=0.6)
scatterplot(Global_Sales ~ User_Score, data=vgame2016, spread=FALSE,
smoother.args=list(lty=2),pch=19,xlim=c(1,100),
main="Scatterplot of Global sales vs. User SCore in 2016",
xlab="User Score",
ylab="Global sales ",cex=0.6)
scatterplot(Global_Sales~ Critic_Score, data=vgame2015, spread=FALSE,
smoother.args=list(lty=2),pch=19,
main="Scatterplot of Global sales vs. Critic score in 2015",
xlab="Critic score",
ylab="Global sales ",cex=0.6)
scatterplot(Global_Sales ~ User_Score, data=vgame2015, spread=FALSE,
smoother.args=list(lty=2),pch=19,xlim=c(1,100),
main="Scatterplot of Global sales vs. User SCore in 2015",
xlab="User score",
ylab="Global sales ",cex=0.6)
–Changing the Rating variables from factor vector to integer vector of year 2016
str(vgame2016)
## 'data.frame': 502 obs. of 16 variables:
## $ Name : Factor w/ 11563 levels "","'98 Koshien",..: 3121 7423 10726 1238 727 10403 3121 1238 3053 727 ...
## $ Platform : Factor w/ 31 levels "2600","3DO","3DS",..: 19 3 19 19 19 19 31 31 19 31 ...
## $ Year_of_Release: Factor w/ 40 levels "1980","1981",..: 37 37 37 37 37 37 37 37 37 37 ...
## $ Genre : Factor w/ 13 levels "","Action","Adventure",..: 12 9 10 10 10 10 12 10 2 10 ...
## $ Publisher : Factor w/ 582 levels "10TACLE Studios",..: 140 371 467 17 140 536 140 17 536 140 ...
## $ NA_Sales : num 0.66 2.98 1.85 1.61 1.1 1.35 0.43 1.46 0.6 1.28 ...
## $ EU_Sales : num 5.75 1.45 2.5 2 2.15 1.7 2.05 0.74 1.25 0.77 ...
## $ JP_Sales : num 0.08 2.26 0.19 0.15 0.21 0.15 0 0 0.06 0 ...
## $ Other_Sales : num 1.11 0.45 0.85 0.71 0.61 0.6 0.17 0.22 0.35 0.2 ...
## $ Global_Sales : num 7.59 7.14 5.38 4.46 4.08 3.8 2.65 2.42 2.26 2.25 ...
## $ Critic_Score : int 85 NA 93 77 88 80 84 78 76 87 ...
## $ Critic_Count : int 41 NA 113 82 31 64 50 17 91 37 ...
## $ User_Score : int 49 1 78 33 83 69 54 30 62 81 ...
## $ User_Count : int 398 NA 7064 1129 809 2219 201 290 635 440 ...
## $ Developer : Factor w/ 1697 levels "","10tacle Studios",..: 455 1 1002 733 440 910 455 733 1561 440 ...
## $ Rating : Factor w/ 9 levels "","AO","E","E10+",..: 3 1 9 7 7 7 3 7 7 7 ...
vgame2016$Rating<-as.numeric(vgame2016$Rating)
str(vgame2016$Rating)
## num [1:502] 3 1 9 7 7 7 3 7 7 7 ...
–Changing the Rating variables from factor vector to integer vector of year 2015
str(vgame2015)
## 'data.frame': 606 obs. of 16 variables:
## $ Name : Factor w/ 11563 levels "","'98 Koshien",..: 1234 3120 9144 1234 2986 10729 3935 9043 2986 10206 ...
## $ Platform : Factor w/ 31 levels "2600","3DO","3DS",..: 19 19 19 31 19 19 31 27 31 19 ...
## $ Year_of_Release: Factor w/ 40 levels "1980","1981",..: 36 36 36 36 36 36 36 36 36 36 ...
## $ Genre : Factor w/ 13 levels "","Action","Adventure",..: 10 12 10 10 9 2 10 10 9 9 ...
## $ Publisher : Factor w/ 582 levels "10TACLE Studios",..: 17 140 140 17 65 467 330 371 65 354 ...
## $ NA_Sales : num 6.03 1.12 2.99 4.59 2.53 2.07 2.78 1.54 2.51 1.02 ...
## $ EU_Sales : num 5.86 6.12 3.49 2.11 3.27 1.71 1.27 1.18 1.32 2.13 ...
## $ JP_Sales : num 0.36 0.06 0.22 0.01 0.24 0.08 0.03 1.46 0.01 0.23 ...
## $ Other_Sales : num 2.38 1.28 1.28 0.68 1.13 0.76 0.41 0.26 0.38 0.59 ...
## $ Global_Sales : num 14.63 8.57 7.98 7.39 7.16 ...
## $ Critic_Score : int NA 82 NA NA 87 86 84 81 88 92 ...
## $ Critic_Count : int NA 42 NA NA 58 78 101 88 39 79 ...
## $ User_Score : int 1 42 1 1 64 80 63 84 61 91 ...
## $ User_Count : int NA 896 NA NA 4228 1264 2438 1184 1749 10179 ...
## $ Developer : Factor w/ 1697 levels "","10tacle Studios",..: 1 452 1 1 176 233 20 1035 176 282 ...
## $ Rating : Factor w/ 9 levels "","AO","E","E10+",..: 1 3 1 1 7 9 9 4 7 7 ...
vgame2015$Rating<-as.numeric(vgame2015$Rating)
str(vgame2015$Rating)
## num [1:606] 1 3 1 1 7 9 9 4 7 7 ...
->Correlation Matrix visualization
library(corrplot)
## corrplot 0.84 loaded
corrplot(corr=cor(vgame2016[ ,6:14 ], use="complete.obs"),
method ="ellipse", main="correlation matrix of variables in 2016")
library(corrplot)
corrplot(corr=cor(vgame2015[ ,6:14 ], use="complete.obs"),
method ="ellipse" , main="correlation matrix of variables in 2015")
->Corrogram
library(corrgram)
corrgram(vgame2016, order=FALSE,
lower.panel=panel.shade,
upper.panel=panel.pie,
diag.panel=panel.minmax,
text.panel=panel.txt,
main="Corrgram of all the variables in 2016")
library(corrgram)
corrgram(vgame2015, order=FALSE,
lower.panel=panel.shade,
upper.panel=panel.pie,
diag.panel=panel.minmax,
text.panel=panel.txt,
main="Corrgram of all the variables in 2015")
->Scatterplot matrix
library(car)
scatterplotMatrix(formula = ~ Critic_Score + Critic_Count +
User_Score+ User_Count +Global_Sales , cex=0.6,
spread=FALSE, smoother.args=list(lty=2),pch=19,
data=vgame2016, diagonal="histogram",
main="scatterplot matrix in 2016")
scatterplotMatrix(formula = ~ Critic_Score + Critic_Count +
User_Score+ User_Count +Global_Sales , cex=0.6,
spread=FALSE, smoother.args=list(lty=2),pch=19,
data=vgame2015, diagonal="histogram",
main="scatterplot matrix in 2015")
-> Pearson’s chi-squared test not applied due to lack of definite categorical variables
->Appropriate dependent T- tests can be carried out for deciding the statistical significance of the dependency as follows
->NULL HYPOTHESIS: Global sales is independent of Critic score , Critic count, NA sales,EU sales, JP sales, Other sales and User count
attach(vgame2016)
t.test(Critic_Score,Global_Sales,paired=TRUE, data=vgame2016)
##
## Paired t-test
##
## data: Critic_Score and Global_Sales
## t = 96.234, df = 231, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 71.26568 74.24484
## sample estimates:
## mean of the differences
## 72.75526
t.test(Critic_Count,Global_Sales,paired=TRUE, data=vgame2016)
##
## Paired t-test
##
## data: Critic_Count and Global_Sales
## t = 19.309, df = 231, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 26.80807 32.90072
## sample estimates:
## mean of the differences
## 29.8544
t.test(Other_Sales,Global_Sales,paired=TRUE, data=vgame2016)
##
## Paired t-test
##
## data: Other_Sales and Global_Sales
## t = -8.4585, df = 501, p-value = 2.989e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2838165 -0.1768209
## sample estimates:
## mean of the differences
## -0.2303187
-Since the p-value is very low (<0.001) , there does appear a significant relationship between the two variables.
-Hence, the t-test rejects the null hypothesis that the two variables are indepependent and is statistically significant