->Reading dataset into R

vgame1.df <- read.csv(paste("Video Game Sales Data.csv.csv", sep=""))
View(vgame1.df)

-Dimensions of the dataset

dim(vgame1.df)
## [1] 16719    16

-Visualizing the data , Frequency count based on the year of release of a video game

table(vgame1.df$Year_of_Release)
## 
## 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 
##    9   46   36   17   14   14   21   16   15   17   16   41   43   62  121 
## 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 
##  219  263  289  379  338  350  482  829  775  762  939 1006 1197 1427 1426 
## 2010 2011 2012 2013 2014 2015 2016 2017 2020  N/A 
## 1255 1136  653  544  581  606  502    3    1  269

->Cleaning the data

-1) Viewing the subset-1 containing data of only year 2016

vgame2016<- vgame1.df[which(vgame1.df$Year_of_Release=="2016"), ]
View(vgame2016)

-Dimensions of subset-1(year 2016)

dim(vgame2016)
## [1] 502  16

-Summarizing the subset-1 (year 2016)

library(psych)
describe(vgame2016)
##                  vars   n    mean      sd  median trimmed     mad   min
## Name*               1 502 5739.26 3456.23 5894.00 5739.69 4452.25  3.00
## Platform*           2 502   19.83    7.47   19.00   20.43    2.97  3.00
## Year_of_Release*    3 502   37.00    0.00   37.00   37.00    0.00 37.00
## Genre*              4 502    5.85    3.89    5.00    5.53    4.45  2.00
## Publisher*          5 502  309.33  178.72  354.00  315.46  231.29  7.00
## NA_Sales            6 502    0.09    0.26    0.01    0.03    0.01  0.00
## EU_Sales            7 502    0.10    0.36    0.01    0.03    0.01  0.00
## JP_Sales            8 502    0.04    0.13    0.00    0.02    0.00  0.00
## Other_Sales         9 502    0.03    0.09    0.00    0.01    0.00  0.00
## Global_Sales       10 502    0.26    0.70    0.06    0.11    0.07  0.01
## Critic_Score       11 232   73.16   11.74   74.50   74.28   11.12 31.00
## Critic_Count       12 232   30.25   23.89   22.00   26.98   19.27  4.00
## User_Score*        13 502   41.07   36.42   49.50   39.55   52.63  1.00
## User_Count         14 262  264.97  671.24   57.00  126.51   69.68  5.00
## Developer*         15 502  545.22  580.25  378.00  484.34  558.94  1.00
## Rating*            16 502    3.86    3.13    3.00    3.58    2.97  1.00
##                       max    range  skew kurtosis     se
## Name*            11536.00 11533.00 -0.04    -1.25 154.26
## Platform*           31.00    28.00 -0.45     0.36   0.33
## Year_of_Release*    37.00     0.00   NaN      NaN   0.00
## Genre*              13.00    11.00  0.41    -1.45   0.17
## Publisher*         576.00   569.00 -0.26    -1.32   7.98
## NA_Sales             2.98     2.98  5.69    42.87   0.01
## EU_Sales             5.75     5.75  9.76   128.76   0.02
## JP_Sales             2.26     2.26 11.51   169.13   0.01
## Other_Sales          1.11     1.11  6.80    57.74   0.00
## Global_Sales         7.59     7.58  6.52    52.98   0.03
## Critic_Score        93.00    62.00 -0.85     0.64   0.77
## Critic_Count       113.00   109.00  1.12     0.66   1.57
## User_Score*         97.00    96.00  0.01    -1.69   1.63
## User_Count        7064.00  7059.00  6.43    52.11  41.47
## Developer*        1671.00  1670.00  0.60    -1.15  25.90
## Rating*              9.00     8.00  0.57    -1.29   0.14

-One-way Contingency tables of subset-1( year 2016)

mytable1<- with(vgame2016,table(Genre))
mytable1
## Genre
##                    Action    Adventure     Fighting         Misc 
##            0          178           56           16           32 
##     Platform       Puzzle       Racing Role-Playing      Shooter 
##           15            1           24           54           47 
##   Simulation       Sports     Strategy 
##           18           48           13
mytable2<- with(vgame2016,table(Platform))
mytable2
## Platform
## 2600  3DO  3DS   DC   DS   GB  GBA   GC  GEN   GG  N64  NES   NG   PC PCFX 
##    0    0   46    0    0    0    0    0    0    0    0    0    0   54    0 
##   PS  PS2  PS3  PS4  PSP  PSV  SAT  SCD SNES TG16  Wii WiiU   WS X360   XB 
##    0    0   38  164    0   85    0    0    0    0    1   14    0   13    0 
## XOne 
##   87
mytable3<- with(vgame2016,table(Rating))
mytable3
## Rating
##        AO    E E10+   EC  K-A    M   RP    T 
##  222    0   66   50    0    0   78    0   86

-Two-way Contingency tables of subset-1( year 2016)

mytable<-xtabs(~ Genre+Rating,data=vgame2016)
mytable
##               Rating
## Genre             AO  E E10+ EC K-A  M RP  T
##                 0  0  0    0  0   0  0  0  0
##   Action       88  0  9   20  0   0 34  0 27
##   Adventure    38  0  0    2  0   0 10  0  6
##   Fighting      5  0  0    1  0   0  0  0 10
##   Misc         16  0  5    6  0   0  1  0  4
##   Platform      2  0  2    8  0   0  0  0  3
##   Puzzle        0  0  0    1  0   0  0  0  0
##   Racing        7  0 17    0  0   0  0  0  0
##   Role-Playing 31  0  1    2  0   0  8  0 12
##   Shooter       5  0  0    7  0   0 25  0 10
##   Simulation   10  0  6    0  0   0  0  0  2
##   Sports       12  0 26    2  0   0  0  0  8
##   Strategy      8  0  0    1  0   0  0  0  4
mytable11<-xtabs(~ Genre+Platform,data=vgame2016)
mytable11
##               Platform
## Genre          2600 3DO 3DS DC DS GB GBA GC GEN GG N64 NES NG PC PCFX PS
##                   0   0   0  0  0  0   0  0   0  0   0   0  0  0    0  0
##   Action          0   0  22  0  0  0   0  0   0  0   0   0  0  7    0  0
##   Adventure       0   0   5  0  0  0   0  0   0  0   0   0  0  5    0  0
##   Fighting        0   0   1  0  0  0   0  0   0  0   0   0  0  1    0  0
##   Misc            0   0   5  0  0  0   0  0   0  0   0   0  0  0    0  0
##   Platform        0   0   2  0  0  0   0  0   0  0   0   0  0  1    0  0
##   Puzzle          0   0   0  0  0  0   0  0   0  0   0   0  0  0    0  0
##   Racing          0   0   0  0  0  0   0  0   0  0   0   0  0  6    0  0
##   Role-Playing    0   0   7  0  0  0   0  0   0  0   0   0  0  4    0  0
##   Shooter         0   0   0  0  0  0   0  0   0  0   0   0  0  9    0  0
##   Simulation      0   0   3  0  0  0   0  0   0  0   0   0  0  8    0  0
##   Sports          0   0   0  0  0  0   0  0   0  0   0   0  0  5    0  0
##   Strategy        0   0   1  0  0  0   0  0   0  0   0   0  0  8    0  0
##               Platform
## Genre          PS2 PS3 PS4 PSP PSV SAT SCD SNES TG16 Wii WiiU WS X360 XB
##                  0   0   0   0   0   0   0    0    0   0    0  0    0  0
##   Action         0  13  59   0  35   0   0    0    0   0    6  0    3  0
##   Adventure      0   2  14   0  25   0   0    0    0   0    1  0    1  0
##   Fighting       0   2   7   0   2   0   0    0    0   0    1  0    0  0
##   Misc           0   6  10   0   3   0   0    0    0   1    2  0    1  0
##   Platform       0   1   5   0   0   0   0    0    0   0    2  0    1  0
##   Puzzle         0   0   0   0   1   0   0    0    0   0    0  0    0  0
##   Racing         0   0   9   0   0   0   0    0    0   0    0  0    0  0
##   Role-Playing   0   5  18   0  15   0   0    0    0   0    1  0    0  0
##   Shooter        0   1  20   0   0   0   0    0    0   0    1  0    1  0
##   Simulation     0   0   4   0   1   0   0    0    0   0    0  0    0  0
##   Sports         0   8  16   0   2   0   0    0    0   0    0  0    6  0
##   Strategy       0   0   2   0   1   0   0    0    0   0    0  0    0  0
##               Platform
## Genre          XOne
##                   0
##   Action         33
##   Adventure       3
##   Fighting        2
##   Misc            4
##   Platform        3
##   Puzzle          0
##   Racing          9
##   Role-Playing    4
##   Shooter        15
##   Simulation      2
##   Sports         11
##   Strategy        1
mytable12<-xtabs(~ Rating+Platform,data=vgame2016)
mytable12
##       Platform
## Rating 2600 3DO 3DS DC DS GB GBA GC GEN GG N64 NES NG PC PCFX PS PS2 PS3
##           0   0  32  0  0  0   0  0   0  0   0   0  0 17    0  0   0  24
##   AO      0   0   0  0  0  0   0  0   0  0   0   0  0  0    0  0   0   0
##   E       0   0   5  0  0  0   0  0   0  0   0   0  0 12    0  0   0   4
##   E10+    0   0   4  0  0  0   0  0   0  0   0   0  0  2    0  0   0   4
##   EC      0   0   0  0  0  0   0  0   0  0   0   0  0  0    0  0   0   0
##   K-A     0   0   0  0  0  0   0  0   0  0   0   0  0  0    0  0   0   0
##   M       0   0   1  0  0  0   0  0   0  0   0   0  0 14    0  0   0   1
##   RP      0   0   0  0  0  0   0  0   0  0   0   0  0  0    0  0   0   0
##   T       0   0   4  0  0  0   0  0   0  0   0   0  0  9    0  0   0   5
##       Platform
## Rating PS4 PSP PSV SAT SCD SNES TG16 Wii WiiU WS X360 XB XOne
##         61   0  69   0   0    0    0   0    2  0    1  0   16
##   AO     0   0   0   0   0    0    0   0    0  0    0  0    0
##   E     20   0   0   0   0    0    0   0    2  0    5  0   18
##   E10+  16   0   4   0   0    0    0   1    7  0    4  0    8
##   EC     0   0   0   0   0    0    0   0    0  0    0  0    0
##   K-A    0   0   0   0   0    0    0   0    0  0    0  0    0
##   M     32   0   2   0   0    0    0   0    0  0    1  0   27
##   RP     0   0   0   0   0    0    0   0    0  0    0  0    0
##   T     35   0  10   0   0    0    0   0    3  0    2  0   18

-2) Viewing the subset-2 containing data of year 2015

vgame2015<- vgame1.df[which(vgame1.df$Year_of_Release=="2015"), ]
View(vgame2015)

-Dimensions of subset-2( year 2015)

dim(vgame2015)
## [1] 606  16

-Summarizing the subset-2 (year 2015)

library(psych)
describe(vgame2015)
##                  vars   n    mean      sd  median trimmed     mad   min
## Name*               1 606 5816.44 3367.04 5832.00 5832.54 4175.00  4.00
## Platform*           2 606   19.14    8.21   19.00   19.67    2.97  3.00
## Year_of_Release*    3 606   36.00    0.00   36.00   36.00    0.00 36.00
## Genre*              4 606    5.50    3.88    3.00    5.10    1.48  2.00
## Publisher*          5 606  309.81  178.25  354.00  316.18  231.29  7.00
## NA_Sales            6 606    0.18    0.49    0.02    0.07    0.03  0.00
## EU_Sales            7 606    0.16    0.48    0.02    0.06    0.03  0.00
## JP_Sales            8 606    0.06    0.19    0.01    0.02    0.01  0.00
## Other_Sales         9 606    0.05    0.16    0.01    0.02    0.01  0.00
## Global_Sales       10 606    0.44    1.10    0.09    0.20    0.10  0.01
## Critic_Score       11 225   72.87   12.44   74.00   74.05   10.38 19.00
## Critic_Count       12 225   32.31   24.35   26.00   29.15   22.24  4.00
## User_Score*        13 606   38.71   36.44   42.00   36.61   60.79  1.00
## User_Count         14 297  393.37 1166.83   65.00  139.76   78.58  4.00
## Developer*         15 606  522.15  603.88  180.00  453.34  265.39  1.00
## Rating*            16 606    3.64    3.13    3.00    3.31    2.97  1.00
##                       max    range  skew kurtosis     se
## Name*            11534.00 11530.00 -0.02    -1.20 136.78
## Platform*           31.00    28.00 -0.56    -0.13   0.33
## Year_of_Release*    36.00     0.00   NaN      NaN   0.00
## Genre*              13.00    11.00  0.58    -1.28   0.16
## Publisher*         574.00   567.00 -0.28    -1.23   7.24
## NA_Sales             6.03     6.03  6.17    51.98   0.02
## EU_Sales             6.12     6.12  7.64    76.86   0.02
## JP_Sales             2.79     2.79  9.42   109.84   0.01
## Other_Sales          2.38     2.38  8.20    93.11   0.01
## Global_Sales        14.63    14.62  6.40    58.93   0.04
## Critic_Score        96.00    77.00 -1.36     3.33   0.83
## Critic_Count       103.00    99.00  1.00     0.21   1.62
## User_Score*         97.00    96.00  0.12    -1.68   1.48
## User_Count       10665.00 10661.00  6.02    42.34  67.71
## Developer*        1677.00  1676.00  0.67    -1.15  24.53
## Rating*              9.00     8.00  0.73    -1.09   0.13

-One-way Contingency tables of subset-2( year 2015)

mytable1<- with(vgame2015,table(Genre))
mytable1
## Genre
##                    Action    Adventure     Fighting         Misc 
##            0          253           54           21           39 
##     Platform       Puzzle       Racing Role-Playing      Shooter 
##           13            6           18           78           34 
##   Simulation       Sports     Strategy 
##           15           59           16
mytable2<- with(vgame2015,table(Platform))
mytable2
## Platform
## 2600  3DO  3DS   DC   DS   GB  GBA   GC  GEN   GG  N64  NES   NG   PC PCFX 
##    0    0   86    0    0    0    0    0    0    0    0    0    0   50    0 
##   PS  PS2  PS3  PS4  PSP  PSV  SAT  SCD SNES TG16  Wii WiiU   WS X360   XB 
##    0    0   73  137    3  110    0    0    0    0    4   28    0   35    0 
## XOne 
##   80
mytable3<- with(vgame2015,table(Rating))
mytable3
## Rating
##        AO    E E10+   EC  K-A    M   RP    T 
##  291    0   87   51    0    0   71    0  106

-Two-way Contingency tables of subset-2(year 2015)

mytable<-xtabs(~ Genre+Rating,data=vgame2015)
mytable
##               Rating
## Genre               AO   E E10+  EC K-A   M  RP   T
##                  0   0   0    0   0   0   0   0   0
##   Action       132   0  19   29   0   0  35   0  38
##   Adventure     40   0   0    1   0   0   7   0   6
##   Fighting       7   0   0    0   0   0   3   0  11
##   Misc          19   0   3    8   0   0   1   0   8
##   Platform       2   0  10    1   0   0   0   0   0
##   Puzzle         3   0   2    1   0   0   0   0   0
##   Racing         7   0  10    1   0   0   0   0   0
##   Role-Playing  45   0   1    2   0   0   9   0  21
##   Shooter       12   0   0    1   0   0  16   0   5
##   Simulation     6   0   4    1   0   0   0   0   4
##   Sports         8   0  38    5   0   0   0   0   8
##   Strategy      10   0   0    1   0   0   0   0   5
mytable11<-xtabs(~ Genre+Platform,data=vgame2015)
mytable11
##               Platform
## Genre          2600 3DO 3DS DC DS GB GBA GC GEN GG N64 NES NG PC PCFX PS
##                   0   0   0  0  0  0   0  0   0  0   0   0  0  0    0  0
##   Action          0   0  39  0  0  0   0  0   0  0   0   0  0 16    0  0
##   Adventure       0   0   4  0  0  0   0  0   0  0   0   0  0  2    0  0
##   Fighting        0   0   2  0  0  0   0  0   0  0   0   0  0  1    0  0
##   Misc            0   0  10  0  0  0   0  0   0  0   0   0  0  2    0  0
##   Platform        0   0   4  0  0  0   0  0   0  0   0   0  0  0    0  0
##   Puzzle          0   0   4  0  0  0   0  0   0  0   0   0  0  0    0  0
##   Racing          0   0   0  0  0  0   0  0   0  0   0   0  0  3    0  0
##   Role-Playing    0   0  15  0  0  0   0  0   0  0   0   0  0  3    0  0
##   Shooter         0   0   0  0  0  0   0  0   0  0   0   0  0  5    0  0
##   Simulation      0   0   3  0  0  0   0  0   0  0   0   0  0  6    0  0
##   Sports          0   0   1  0  0  0   0  0   0  0   0   0  0  4    0  0
##   Strategy        0   0   4  0  0  0   0  0   0  0   0   0  0  8    0  0
##               Platform
## Genre          PS2 PS3 PS4 PSP PSV SAT SCD SNES TG16 Wii WiiU WS X360 XB
##                  0   0   0   0   0   0   0    0    0   0    0  0    0  0
##   Action         0  35  52   3  54   0   0    0    0   3   11  0   13  0
##   Adventure      0   7  10   0  21   0   0    0    0   0    0  0    4  0
##   Fighting       0   5   9   0   0   0   0    0    0   0    0  0    1  0
##   Misc           0   3   4   0   5   0   0    0    0   1    7  0    2  0
##   Platform       0   0   2   0   2   0   0    0    0   0    4  0    1  0
##   Puzzle         0   0   1   0   0   0   0    0    0   0    1  0    0  0
##   Racing         0   2   5   0   1   0   0    0    0   0    0  0    1  0
##   Role-Playing   0   5  25   0  20   0   0    0    0   0    3  0    0  0
##   Shooter        0   3  11   0   0   0   0    0    0   0    1  0    3  0
##   Simulation     0   1   2   0   1   0   0    0    0   0    0  0    1  0
##   Sports         0  12  15   0   4   0   0    0    0   0    1  0    9  0
##   Strategy       0   0   1   0   2   0   0    0    0   0    0  0    0  0
##               Platform
## Genre          XOne
##                   0
##   Action         27
##   Adventure       6
##   Fighting        3
##   Misc            5
##   Platform        0
##   Puzzle          0
##   Racing          6
##   Role-Playing    7
##   Shooter        11
##   Simulation      1
##   Sports         13
##   Strategy        1
mytable12<-xtabs(~ Rating+Platform,data=vgame2015)
mytable12
##       Platform
## Rating 2600 3DO 3DS DC DS GB GBA GC GEN GG N64 NES NG PC PCFX PS PS2 PS3
##           0   0  57  0  0  0   0  0   0  0   0   0  0 14    0  0   0  39
##   AO      0   0   0  0  0  0   0  0   0  0   0   0  0  0    0  0   0   0
##   E       0   0  10  0  0  0   0  0   0  0   0   0  0  9    0  0   0   9
##   E10+    0   0  13  0  0  0   0  0   0  0   0   0  0  3    0  0   0   5
##   EC      0   0   0  0  0  0   0  0   0  0   0   0  0  0    0  0   0   0
##   K-A     0   0   0  0  0  0   0  0   0  0   0   0  0  0    0  0   0   0
##   M       0   0   0  0  0  0   0  0   0  0   0   0  0 15    0  0   0   4
##   RP      0   0   0  0  0  0   0  0   0  0   0   0  0  0    0  0   0   0
##   T       0   0   6  0  0  0   0  0   0  0   0   0  0  9    0  0   0  16
##       Platform
## Rating PS4 PSP PSV SAT SCD SNES TG16 Wii WiiU WS X360 XB XOne
##         52   3  82   0   0    0    0   1    7  0   10  0   26
##   AO     0   0   0   0   0    0    0   0    0  0    0  0    0
##   E     18   0   4   0   0    0    0   2   11  0   11  0   13
##   E10+   8   0   3   0   0    0    0   1    7  0    4  0    7
##   EC     0   0   0   0   0    0    0   0    0  0    0  0    0
##   K-A    0   0   0   0   0    0    0   0    0  0    0  0    0
##   M     21   0   6   0   0    0    0   0    1  0    4  0   20
##   RP     0   0   0   0   0    0    0   0    0  0    0  0    0
##   T     38   0  15   0   0    0    0   0    2  0    6  0   14

-> Box plots of different variables independently showing the comaparison in 2016 & 2015

par(mfrow=c(2,1))
with(vgame2016,boxplot(vgame2016$NA_Sales, 
        main="Boxplot of  North America sales in 2016",
        col=c("yellow"),
        horizontal=TRUE,
        xlab="NA sales" ))
with(vgame2015,boxplot(vgame2015$NA_Sales, 
                       main="Boxplot of  North America sales in 2015",
                       col=c("yellow"),
                       horizontal=TRUE,
                       xlab="NA sales" ))

par(mfrow=c(2,1))
with(vgame2016, boxplot(vgame2016$EU_Sales, 
        main="Boxplot of  Europe sales in 2016",
        col=c("yellow"),xlim=c(0,3),
        horizontal=TRUE,
        xlab="EU sales" ))
with(vgame2015, boxplot(vgame2015$EU_Sales, 
                        main="Boxplot of  Europe sales in 2015",
                        col=c("yellow"),xlim=c(0,3),
                        horizontal=TRUE,
                        xlab="EU sales" ))

par(mfrow=c(2,1))
with(vgame2016, boxplot(vgame2016$JP_Sales, 
        main="Boxplot of  Japan sales in 2016",
        col=c("yellow"),
        horizontal=TRUE,
        xlab="JP sales" ))
with(vgame2015, boxplot(vgame2015$JP_Sales, 
                        main="Boxplot of  Japan sales in 2015",
                        col=c("yellow"),
                        horizontal=TRUE,
                        xlab="JP sales" ))

par(mfrow=c(2,1))
with(vgame2016,boxplot(vgame2016$Global_Sales, 
        main="Boxplot of  Global sales in 2016",
        col=c("yellow"),
        horizontal=TRUE,
        xlab="Global sales" ))
with(vgame2015,boxplot(vgame2015$Global_Sales, 
                       main="Boxplot of  Global sales in 2015",
                       col=c("yellow"),
                       horizontal=TRUE,
                       xlab="Global sales" ))

par(mfrow=c(2,1))
with(vgame2016,boxplot(vgame2016$Other_Sales, 
        main="Boxplot of Other sales in 2016",
        col=c("yellow"),
        horizontal=TRUE,
        xlab="Other sales" ))
with(vgame2015,boxplot(vgame2015$Other_Sales, 
                       main="Boxplot of  Other sales in 2015",
                       col=c("yellow"),
                       horizontal=TRUE,
                       xlab="Other sales" ))

–Changing User_score variable from factor vestor to numeric vector for year 2016

str(vgame2016)
## 'data.frame':    502 obs. of  16 variables:
##  $ Name           : Factor w/ 11563 levels "","'98 Koshien",..: 3121 7423 10726 1238 727 10403 3121 1238 3053 727 ...
##  $ Platform       : Factor w/ 31 levels "2600","3DO","3DS",..: 19 3 19 19 19 19 31 31 19 31 ...
##  $ Year_of_Release: Factor w/ 40 levels "1980","1981",..: 37 37 37 37 37 37 37 37 37 37 ...
##  $ Genre          : Factor w/ 13 levels "","Action","Adventure",..: 12 9 10 10 10 10 12 10 2 10 ...
##  $ Publisher      : Factor w/ 582 levels "10TACLE Studios",..: 140 371 467 17 140 536 140 17 536 140 ...
##  $ NA_Sales       : num  0.66 2.98 1.85 1.61 1.1 1.35 0.43 1.46 0.6 1.28 ...
##  $ EU_Sales       : num  5.75 1.45 2.5 2 2.15 1.7 2.05 0.74 1.25 0.77 ...
##  $ JP_Sales       : num  0.08 2.26 0.19 0.15 0.21 0.15 0 0 0.06 0 ...
##  $ Other_Sales    : num  1.11 0.45 0.85 0.71 0.61 0.6 0.17 0.22 0.35 0.2 ...
##  $ Global_Sales   : num  7.59 7.14 5.38 4.46 4.08 3.8 2.65 2.42 2.26 2.25 ...
##  $ Critic_Score   : int  85 NA 93 77 88 80 84 78 76 87 ...
##  $ Critic_Count   : int  41 NA 113 82 31 64 50 17 91 37 ...
##  $ User_Score     : Factor w/ 97 levels "","0","0.2","0.3",..: 49 1 78 33 83 69 54 30 62 81 ...
##  $ User_Count     : int  398 NA 7064 1129 809 2219 201 290 635 440 ...
##  $ Developer      : Factor w/ 1697 levels "","10tacle Studios",..: 455 1 1002 733 440 910 455 733 1561 440 ...
##  $ Rating         : Factor w/ 9 levels "","AO","E","E10+",..: 3 1 9 7 7 7 3 7 7 7 ...
vgame2016$User_Score<-as.integer(vgame2016$User_Score)
str(vgame2016$User_Score)
##  int [1:502] 49 1 78 33 83 69 54 30 62 81 ...

–Changing User_score variable from factor vestor to numeric vector for year 2015

str(vgame2015)
## 'data.frame':    606 obs. of  16 variables:
##  $ Name           : Factor w/ 11563 levels "","'98 Koshien",..: 1234 3120 9144 1234 2986 10729 3935 9043 2986 10206 ...
##  $ Platform       : Factor w/ 31 levels "2600","3DO","3DS",..: 19 19 19 31 19 19 31 27 31 19 ...
##  $ Year_of_Release: Factor w/ 40 levels "1980","1981",..: 36 36 36 36 36 36 36 36 36 36 ...
##  $ Genre          : Factor w/ 13 levels "","Action","Adventure",..: 10 12 10 10 9 2 10 10 9 9 ...
##  $ Publisher      : Factor w/ 582 levels "10TACLE Studios",..: 17 140 140 17 65 467 330 371 65 354 ...
##  $ NA_Sales       : num  6.03 1.12 2.99 4.59 2.53 2.07 2.78 1.54 2.51 1.02 ...
##  $ EU_Sales       : num  5.86 6.12 3.49 2.11 3.27 1.71 1.27 1.18 1.32 2.13 ...
##  $ JP_Sales       : num  0.36 0.06 0.22 0.01 0.24 0.08 0.03 1.46 0.01 0.23 ...
##  $ Other_Sales    : num  2.38 1.28 1.28 0.68 1.13 0.76 0.41 0.26 0.38 0.59 ...
##  $ Global_Sales   : num  14.63 8.57 7.98 7.39 7.16 ...
##  $ Critic_Score   : int  NA 82 NA NA 87 86 84 81 88 92 ...
##  $ Critic_Count   : int  NA 42 NA NA 58 78 101 88 39 79 ...
##  $ User_Score     : Factor w/ 97 levels "","0","0.2","0.3",..: 1 42 1 1 64 80 63 84 61 91 ...
##  $ User_Count     : int  NA 896 NA NA 4228 1264 2438 1184 1749 10179 ...
##  $ Developer      : Factor w/ 1697 levels "","10tacle Studios",..: 1 452 1 1 176 233 20 1035 176 282 ...
##  $ Rating         : Factor w/ 9 levels "","AO","E","E10+",..: 1 3 1 1 7 9 9 4 7 7 ...
vgame2015$User_Score<-as.integer(vgame2015$User_Score)
str(vgame2015$User_Score)
##  int [1:606] 1 42 1 1 64 80 63 84 61 91 ...
par(mfrow=c(2,2))
with(vgame2016, boxplot(vgame2016$Critic_Score, 
                        main="Boxplot of  Critic score in 2016",
                        col=c("yellow"),
                        horizontal=TRUE,
                        xlab="Critic score" ))
with(vgame2016, boxplot(vgame2016$User_Score, 
                        main="Boxplot of  User score in 2016",
                        col=c("yellow"),
                        horizontal=TRUE,
                        xlab="User score" ))
with(vgame2015, boxplot(vgame2015$Critic_Score, 
                        main="Boxplot of  Critic score in 2015",
                        col=c("yellow"),
                        horizontal=TRUE,
                        xlab="Critic score" ))
with(vgame2015, boxplot(vgame2015$User_Score, 
                        main="Boxplot of  User score in 2015",
                        col=c("yellow"),
                        horizontal=TRUE,
                        xlab="User score" ))

par(mfrow=c(2,2))
with(vgame2016, boxplot(vgame2016$Critic_Count, 
                        main="Boxplot of  Critic count in 2016",
                        col=c("yellow"),
                        horizontal=TRUE,
                        xlab="Critic count" ))
with(vgame2016, boxplot(vgame2016$User_Count, 
                        main="Boxplot of  User count in 2016",
                        col=c("yellow"),
                        horizontal=TRUE,
                        xlab="User count" ))
with(vgame2015, boxplot(vgame2015$Critic_Count, 
                        main="Boxplot of  Critic count in 2015",
                        col=c("yellow"),
                        horizontal=TRUE,
                        xlab="Critic count" ))
with(vgame2015, boxplot(vgame2015$User_Count, 
                        main="Boxplot of  User count in 2015",
                        col=c("yellow"),
                        horizontal=TRUE,
                        xlab="User count" ))

-> Boxplots of variables correlated pair-wise and comaparison of them based on years 2016 & 2015

par(mfrow=c(2,1))
with(vgame2016,boxplot(vgame2016$Global_Sales ~ vgame2016$Genre, data=vgame2016,        
                       horizontal=TRUE, yaxt="n", 
                      ylab="Genre", xlab="Global sales", col=c("yellow"),
                      main="Comparison of Global sales based on Genre of the video game in 2016"),
axis(side=2, at=c(1,2,3,4,5,6,7,8,9,10,11,12) ))
with(vgame2015,boxplot(vgame2015$Global_Sales ~ vgame2015$Genre, data=vgame2015,    
                       horizontal=TRUE, yaxt="n", 
                       ylab="Genre", xlab="Global sales", col=c("yellow"),
                       main="Comparison of Global sales based on Genre of the video game in 2015"),
     axis(side=2, at=c(1,2,3,4,5,6,7,8,9,10,11,12) ))

par(mfrow=c(2,1))
with(vgame2016,boxplot(vgame2016$Global_Sales ~ vgame2016$Rating, data=vgame2016, 
                       horizontal=TRUE, yaxt="n", 
                       ylab="Rating", xlab="Global sales", col=c("yellow"),
                       main="Comparison of Global sales on rating in 2016"),
     axis(side=2, at=c(1,2,3,4,5,6,7,8,9,10,11,12) ))
with(vgame2015,boxplot(vgame2015$Global_Sales ~ vgame2015$Rating, data=vgame2015, 
                       horizontal=TRUE, yaxt="n", 
                       ylab="Rating", xlab="Global sales", col=c("yellow"),
                       main="Comparison of Global sales on rating in 2015"),
     axis(side=2, at=c(1,2,3,4,5,6,7,8,9,10,11,12) ))

par(mfrow=c(2,1))
with(vgame2016,boxplot(vgame2016$Critic_Score ~ vgame2016$Rating, data=vgame2016, horizontal=TRUE, yaxt="n", 
                       ylab="Rating", xlab="Critic score", col=c("yellow"),
                       main="Comparison of Critic score based on Rating in 2016"),
     axis(side=2, at=c(1,2,3,4,5,6,7,8,9,10,11,12) ))
with(vgame2015,boxplot(vgame2015$Critic_Score~ vgame2015$Rating, data=vgame2015, horizontal=TRUE, yaxt="n", 
                       ylab="Rating", xlab="Critic score", col=c("yellow"),
                       main="Comparison of Critic score based on Rating in 2015"),
     axis(side=2, at=c(1,2,3,4,5,6,7,8,9,10,11,12) ))

par(mfrow=c(2,1))
with(vgame2016,boxplot(vgame2016$User_Score ~ vgame2016$Rating, data=vgame2016, horizontal=TRUE, yaxt="n", 
                       ylab="Rating", xlab="User score", col=c("yellow"),
                       main="Comparison of User score based on Rating in 2016"),
     axis(side=2, at=c(1,2,3,4,5,6,7,8,9,10,11,12) ))
with(vgame2015,boxplot(vgame2015$User_Score~ vgame2015$Rating, data=vgame2015, horizontal=TRUE, yaxt="n", 
                       ylab="Rating", xlab="User score", col=c("yellow"),
                       main="Comparison of User score basedon Rating in 2015"),
     axis(side=2, at=c(1,2,3,4,5,6,7,8,9,10,11,12) ))

-> Histograms of different variables correlated pair-wise in year 2016

library(lattice)
histogram(~Genre | Rating, data=vgame2016)

histogram(~Genre | Platform, data=vgame2016)

histogram(~Platform | Rating, data=vgame2016)

-> histograms of different variables correlated pair-wise in year 2015

library(lattice)
histogram(~Genre | Rating, data=vgame2015)

histogram(~Genre | Platform, data=vgame2015)

histogram(~Platform | Rating, data=vgame2015)

-> Scatterplots of variables showing comparison in sales in years 2016 & 2015

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(Global_Sales~ Critic_Score, data=vgame2016, spread=FALSE,
            smoother.args=list(lty=2),pch=19,
            main="Scatterplot of Global Sales vs. Critic score in 2016",
            xlab="Critic score",
            ylab="Global sales ",cex=0.6)

scatterplot(Global_Sales ~ User_Score, data=vgame2016, spread=FALSE,
            smoother.args=list(lty=2),pch=19,xlim=c(1,100),
            main="Scatterplot of Global sales vs. User SCore in 2016",
            xlab="User Score",
            ylab="Global sales ",cex=0.6)

scatterplot(Global_Sales~ Critic_Score, data=vgame2015, spread=FALSE,
            smoother.args=list(lty=2),pch=19,
            main="Scatterplot of Global sales vs. Critic score in 2015",
            xlab="Critic score",
            ylab="Global sales ",cex=0.6)

scatterplot(Global_Sales ~ User_Score, data=vgame2015, spread=FALSE,
            smoother.args=list(lty=2),pch=19,xlim=c(1,100),
            main="Scatterplot of Global sales vs. User SCore in 2015",
            xlab="User score",
            ylab="Global sales ",cex=0.6)

–Changing the Rating variables from factor vector to integer vector of year 2016

str(vgame2016)
## 'data.frame':    502 obs. of  16 variables:
##  $ Name           : Factor w/ 11563 levels "","'98 Koshien",..: 3121 7423 10726 1238 727 10403 3121 1238 3053 727 ...
##  $ Platform       : Factor w/ 31 levels "2600","3DO","3DS",..: 19 3 19 19 19 19 31 31 19 31 ...
##  $ Year_of_Release: Factor w/ 40 levels "1980","1981",..: 37 37 37 37 37 37 37 37 37 37 ...
##  $ Genre          : Factor w/ 13 levels "","Action","Adventure",..: 12 9 10 10 10 10 12 10 2 10 ...
##  $ Publisher      : Factor w/ 582 levels "10TACLE Studios",..: 140 371 467 17 140 536 140 17 536 140 ...
##  $ NA_Sales       : num  0.66 2.98 1.85 1.61 1.1 1.35 0.43 1.46 0.6 1.28 ...
##  $ EU_Sales       : num  5.75 1.45 2.5 2 2.15 1.7 2.05 0.74 1.25 0.77 ...
##  $ JP_Sales       : num  0.08 2.26 0.19 0.15 0.21 0.15 0 0 0.06 0 ...
##  $ Other_Sales    : num  1.11 0.45 0.85 0.71 0.61 0.6 0.17 0.22 0.35 0.2 ...
##  $ Global_Sales   : num  7.59 7.14 5.38 4.46 4.08 3.8 2.65 2.42 2.26 2.25 ...
##  $ Critic_Score   : int  85 NA 93 77 88 80 84 78 76 87 ...
##  $ Critic_Count   : int  41 NA 113 82 31 64 50 17 91 37 ...
##  $ User_Score     : int  49 1 78 33 83 69 54 30 62 81 ...
##  $ User_Count     : int  398 NA 7064 1129 809 2219 201 290 635 440 ...
##  $ Developer      : Factor w/ 1697 levels "","10tacle Studios",..: 455 1 1002 733 440 910 455 733 1561 440 ...
##  $ Rating         : Factor w/ 9 levels "","AO","E","E10+",..: 3 1 9 7 7 7 3 7 7 7 ...
vgame2016$Rating<-as.numeric(vgame2016$Rating)
str(vgame2016$Rating)
##  num [1:502] 3 1 9 7 7 7 3 7 7 7 ...

–Changing the Rating variables from factor vector to integer vector of year 2015

str(vgame2015)
## 'data.frame':    606 obs. of  16 variables:
##  $ Name           : Factor w/ 11563 levels "","'98 Koshien",..: 1234 3120 9144 1234 2986 10729 3935 9043 2986 10206 ...
##  $ Platform       : Factor w/ 31 levels "2600","3DO","3DS",..: 19 19 19 31 19 19 31 27 31 19 ...
##  $ Year_of_Release: Factor w/ 40 levels "1980","1981",..: 36 36 36 36 36 36 36 36 36 36 ...
##  $ Genre          : Factor w/ 13 levels "","Action","Adventure",..: 10 12 10 10 9 2 10 10 9 9 ...
##  $ Publisher      : Factor w/ 582 levels "10TACLE Studios",..: 17 140 140 17 65 467 330 371 65 354 ...
##  $ NA_Sales       : num  6.03 1.12 2.99 4.59 2.53 2.07 2.78 1.54 2.51 1.02 ...
##  $ EU_Sales       : num  5.86 6.12 3.49 2.11 3.27 1.71 1.27 1.18 1.32 2.13 ...
##  $ JP_Sales       : num  0.36 0.06 0.22 0.01 0.24 0.08 0.03 1.46 0.01 0.23 ...
##  $ Other_Sales    : num  2.38 1.28 1.28 0.68 1.13 0.76 0.41 0.26 0.38 0.59 ...
##  $ Global_Sales   : num  14.63 8.57 7.98 7.39 7.16 ...
##  $ Critic_Score   : int  NA 82 NA NA 87 86 84 81 88 92 ...
##  $ Critic_Count   : int  NA 42 NA NA 58 78 101 88 39 79 ...
##  $ User_Score     : int  1 42 1 1 64 80 63 84 61 91 ...
##  $ User_Count     : int  NA 896 NA NA 4228 1264 2438 1184 1749 10179 ...
##  $ Developer      : Factor w/ 1697 levels "","10tacle Studios",..: 1 452 1 1 176 233 20 1035 176 282 ...
##  $ Rating         : Factor w/ 9 levels "","AO","E","E10+",..: 1 3 1 1 7 9 9 4 7 7 ...
vgame2015$Rating<-as.numeric(vgame2015$Rating)
str(vgame2015$Rating)
##  num [1:606] 1 3 1 1 7 9 9 4 7 7 ...

->Correlation Matrix visualization

library(corrplot)    
## corrplot 0.84 loaded
corrplot(corr=cor(vgame2016[ ,6:14 ], use="complete.obs"), 
         method ="ellipse", main="correlation matrix of variables in 2016")

library(corrplot)    
corrplot(corr=cor(vgame2015[ ,6:14 ], use="complete.obs"), 
         method ="ellipse" , main="correlation matrix of variables in 2015")

->Corrogram

library(corrgram)
corrgram(vgame2016, order=FALSE, 
         lower.panel=panel.shade,
         upper.panel=panel.pie, 
         diag.panel=panel.minmax,
         text.panel=panel.txt,
         main="Corrgram of all the variables in 2016")

library(corrgram)
corrgram(vgame2015, order=FALSE, 
         lower.panel=panel.shade,
         upper.panel=panel.pie, 
         diag.panel=panel.minmax,
         text.panel=panel.txt,
         main="Corrgram of all the  variables in 2015")

->Scatterplot matrix

library(car)
scatterplotMatrix(formula = ~  Critic_Score + Critic_Count +
                    User_Score+ User_Count +Global_Sales , cex=0.6,
                  spread=FALSE, smoother.args=list(lty=2),pch=19,
                  data=vgame2016, diagonal="histogram", 
                  main="scatterplot matrix in 2016")

scatterplotMatrix(formula = ~  Critic_Score + Critic_Count +
                    User_Score+ User_Count +Global_Sales , cex=0.6,
                  spread=FALSE, smoother.args=list(lty=2),pch=19,
                  data=vgame2015, diagonal="histogram", 
                  main="scatterplot matrix in 2015")

-> Pearson’s chi-squared test not applied due to lack of definite categorical variables

->Appropriate dependent T- tests can be carried out for deciding the statistical significance of the dependency as follows

->NULL HYPOTHESIS: Global sales is independent of Critic score , Critic count, NA sales,EU sales, JP sales, Other sales and User count

attach(vgame2016)
t.test(Critic_Score,Global_Sales,paired=TRUE, data=vgame2016)
## 
##  Paired t-test
## 
## data:  Critic_Score and Global_Sales
## t = 96.234, df = 231, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  71.26568 74.24484
## sample estimates:
## mean of the differences 
##                72.75526
t.test(Critic_Count,Global_Sales,paired=TRUE, data=vgame2016)
## 
##  Paired t-test
## 
## data:  Critic_Count and Global_Sales
## t = 19.309, df = 231, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  26.80807 32.90072
## sample estimates:
## mean of the differences 
##                 29.8544
t.test(Other_Sales,Global_Sales,paired=TRUE, data=vgame2016)
## 
##  Paired t-test
## 
## data:  Other_Sales and Global_Sales
## t = -8.4585, df = 501, p-value = 2.989e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2838165 -0.1768209
## sample estimates:
## mean of the differences 
##              -0.2303187

-Since the p-value is very low (<0.001) , there does appear a significant relationship between the two variables.

-Hence, the t-test rejects the null hypothesis that the two variables are indepependent and is statistically significant