library(ggplot2)
library(plotly) 
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr) 
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
lbb2 <- read.csv(file = "vgsales.csv")
head(lbb2)
##   Rank                     Name Platform Year        Genre Publisher NA_Sales
## 1    1               Wii Sports      Wii 2006       Sports  Nintendo    41.49
## 2    2        Super Mario Bros.      NES 1985     Platform  Nintendo    29.08
## 3    3           Mario Kart Wii      Wii 2008       Racing  Nintendo    15.85
## 4    4        Wii Sports Resort      Wii 2009       Sports  Nintendo    15.75
## 5    5 Pokemon Red/Pokemon Blue       GB 1996 Role-Playing  Nintendo    11.27
## 6    6                   Tetris       GB 1989       Puzzle  Nintendo    23.20
##   EU_Sales JP_Sales Other_Sales Global_Sales
## 1    29.02     3.77        8.46        82.74
## 2     3.58     6.81        0.77        40.24
## 3    12.88     3.79        3.31        35.82
## 4    11.01     3.28        2.96        33.00
## 5     8.89    10.22        1.00        31.37
## 6     2.26     4.22        0.58        30.26

##- This function is for to see the line

nrow(lbb2)
## [1] 16598

##- This function is for to view the column

ncol(lbb2)
## [1] 11

##- This function is for viewing rows and columns

dim(lbb2)
## [1] 16598    11

##- View column name

names(lbb2)
##  [1] "Rank"         "Name"         "Platform"     "Year"         "Genre"       
##  [6] "Publisher"    "NA_Sales"     "EU_Sales"     "JP_Sales"     "Other_Sales" 
## [11] "Global_Sales"

##- See the structure in the watching table

str(lbb2)
## 'data.frame':    16598 obs. of  11 variables:
##  $ Rank        : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Name        : chr  "Wii Sports" "Super Mario Bros." "Mario Kart Wii" "Wii Sports Resort" ...
##  $ Platform    : chr  "Wii" "NES" "Wii" "Wii" ...
##  $ Year        : chr  "2006" "1985" "2008" "2009" ...
##  $ Genre       : chr  "Sports" "Platform" "Racing" "Sports" ...
##  $ Publisher   : chr  "Nintendo" "Nintendo" "Nintendo" "Nintendo" ...
##  $ NA_Sales    : num  41.5 29.1 15.8 15.8 11.3 ...
##  $ EU_Sales    : num  29.02 3.58 12.88 11.01 8.89 ...
##  $ JP_Sales    : num  3.77 6.81 3.79 3.28 10.22 ...
##  $ Other_Sales : num  8.46 0.77 3.31 2.96 1 0.58 2.9 2.85 2.26 0.47 ...
##  $ Global_Sales: num  82.7 40.2 35.8 33 31.4 ...
str(lbb2)
## 'data.frame':    16598 obs. of  11 variables:
##  $ Rank        : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Name        : chr  "Wii Sports" "Super Mario Bros." "Mario Kart Wii" "Wii Sports Resort" ...
##  $ Platform    : chr  "Wii" "NES" "Wii" "Wii" ...
##  $ Year        : chr  "2006" "1985" "2008" "2009" ...
##  $ Genre       : chr  "Sports" "Platform" "Racing" "Sports" ...
##  $ Publisher   : chr  "Nintendo" "Nintendo" "Nintendo" "Nintendo" ...
##  $ NA_Sales    : num  41.5 29.1 15.8 15.8 11.3 ...
##  $ EU_Sales    : num  29.02 3.58 12.88 11.01 8.89 ...
##  $ JP_Sales    : num  3.77 6.81 3.79 3.28 10.22 ...
##  $ Other_Sales : num  8.46 0.77 3.31 2.96 1 0.58 2.9 2.85 2.26 0.47 ...
##  $ Global_Sales: num  82.7 40.2 35.8 33 31.4 ...

##- We can see a summary of the data

summary(lbb2)
##       Rank           Name             Platform             Year          
##  Min.   :    1   Length:16598       Length:16598       Length:16598      
##  1st Qu.: 4151   Class :character   Class :character   Class :character  
##  Median : 8300   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 8301                                                           
##  3rd Qu.:12450                                                           
##  Max.   :16600                                                           
##     Genre            Publisher            NA_Sales          EU_Sales      
##  Length:16598       Length:16598       Min.   : 0.0000   Min.   : 0.0000  
##  Class :character   Class :character   1st Qu.: 0.0000   1st Qu.: 0.0000  
##  Mode  :character   Mode  :character   Median : 0.0800   Median : 0.0200  
##                                        Mean   : 0.2647   Mean   : 0.1467  
##                                        3rd Qu.: 0.2400   3rd Qu.: 0.1100  
##                                        Max.   :41.4900   Max.   :29.0200  
##     JP_Sales         Other_Sales        Global_Sales    
##  Min.   : 0.00000   Min.   : 0.00000   Min.   : 0.0100  
##  1st Qu.: 0.00000   1st Qu.: 0.00000   1st Qu.: 0.0600  
##  Median : 0.00000   Median : 0.01000   Median : 0.1700  
##  Mean   : 0.07778   Mean   : 0.04806   Mean   : 0.5374  
##  3rd Qu.: 0.04000   3rd Qu.: 0.04000   3rd Qu.: 0.4700  
##  Max.   :10.22000   Max.   :10.57000   Max.   :82.7400

##- To check for missing values in each column

colSums(is.na(lbb2))
##         Rank         Name     Platform         Year        Genre    Publisher 
##            0            0            0            0            0            0 
##     NA_Sales     EU_Sales     JP_Sales  Other_Sales Global_Sales 
##            0            0            0            0            0

##- To find out if there is a missing value in the data

anyNA(lbb2)
## [1] FALSE

Subseting and Practical Statistics

fir <- lbb2
head(fir)
##   Rank                     Name Platform Year        Genre Publisher NA_Sales
## 1    1               Wii Sports      Wii 2006       Sports  Nintendo    41.49
## 2    2        Super Mario Bros.      NES 1985     Platform  Nintendo    29.08
## 3    3           Mario Kart Wii      Wii 2008       Racing  Nintendo    15.85
## 4    4        Wii Sports Resort      Wii 2009       Sports  Nintendo    15.75
## 5    5 Pokemon Red/Pokemon Blue       GB 1996 Role-Playing  Nintendo    11.27
## 6    6                   Tetris       GB 1989       Puzzle  Nintendo    23.20
##   EU_Sales JP_Sales Other_Sales Global_Sales
## 1    29.02     3.77        8.46        82.74
## 2     3.58     6.81        0.77        40.24
## 3    12.88     3.79        3.31        35.82
## 4    11.01     3.28        2.96        33.00
## 5     8.89    10.22        1.00        31.37
## 6     2.26     4.22        0.58        30.26
plot2 <- 
  ggplot(data = fir, mapping = aes(x = Genre, y = Global_Sales))+
  geom_point() +
  geom_boxplot()

  ggplotly(plot2)
a <- fir %>% 
  group_by(Platform)
a
## # A tibble: 16,598 × 11
## # Groups:   Platform [31]
##     Rank Name        Platf…¹ Year  Genre Publi…² NA_Sa…³ EU_Sa…⁴ JP_Sa…⁵ Other…⁶
##    <int> <chr>       <chr>   <chr> <chr> <chr>     <dbl>   <dbl>   <dbl>   <dbl>
##  1     1 Wii Sports  Wii     2006  Spor… Ninten…    41.5   29.0     3.77    8.46
##  2     2 Super Mari… NES     1985  Plat… Ninten…    29.1    3.58    6.81    0.77
##  3     3 Mario Kart… Wii     2008  Raci… Ninten…    15.8   12.9     3.79    3.31
##  4     4 Wii Sports… Wii     2009  Spor… Ninten…    15.8   11.0     3.28    2.96
##  5     5 Pokemon Re… GB      1996  Role… Ninten…    11.3    8.89   10.2     1   
##  6     6 Tetris      GB      1989  Puzz… Ninten…    23.2    2.26    4.22    0.58
##  7     7 New Super … DS      2006  Plat… Ninten…    11.4    9.23    6.5     2.9 
##  8     8 Wii Play    Wii     2006  Misc  Ninten…    14.0    9.2     2.93    2.85
##  9     9 New Super … Wii     2009  Plat… Ninten…    14.6    7.06    4.7     2.26
## 10    10 Duck Hunt   NES     1984  Shoo… Ninten…    26.9    0.63    0.28    0.47
## # … with 16,588 more rows, 1 more variable: Global_Sales <dbl>, and abbreviated
## #   variable names ¹​Platform, ²​Publisher, ³​NA_Sales, ⁴​EU_Sales, ⁵​JP_Sales,
## #   ⁶​Other_Sales
unique(fir$Platform)
##  [1] "Wii"  "NES"  "GB"   "DS"   "X360" "PS3"  "PS2"  "SNES" "GBA"  "3DS" 
## [11] "PS4"  "N64"  "PS"   "XB"   "PC"   "2600" "PSP"  "XOne" "GC"   "WiiU"
## [21] "GEN"  "DC"   "PSV"  "SAT"  "SCD"  "WS"   "NG"   "TG16" "3DO"  "GG"  
## [31] "PCFX"
plot3 <- 
  ggplot(fir, aes(Global_Sales, Other_Sales))+
  geom_point()
plot3

plot4 <- 
  ggplot(fir[fir$Global_Sales,], mapping = aes(Genre))+
  geom_bar(stat="count", width = 0.5, fill="darkblue")
ggplotly(plot4)
# ggplot(fir, aes(x = "", y=Genre, fill = Genre)) +
#   geom_bar(width = 1, stat = "identity") +coord_polar(theta = "y", start=0)+
#   # scale_fill_brewer(palette="Blues")+
#   # labs(fill="",
#   #      x=NULL,
#   #      y=NULL,
#   #      title="Pie Chart of patient severeness ")
#
#
plot5 <- ggplot(fir, aes(x=NA_Sales, y=Genre)) + 
  geom_bar(stat="identity", width=.5, fill="darkblue")+
  labs(x="NA Sales",
         y="Genre", 
       title="Genre to Na_Sales")
plot5

plot6 <- ggplot(fir, aes(x=EU_Sales, y=Genre)) + 
  geom_bar(stat="identity", width=.5, fill="darkblue")+
  labs(x="NA Sales",
         y="Genre", 
       title="Genre to EU_Sales")
plot6

plot7 <- ggplot(fir, aes(x=JP_Sales, y=Genre)) + 
  geom_bar(stat="identity", width=.5, fill="darkblue")+
  labs(x="NA Sales",
         y="Genre", 
       title="Genre to JP_Sales")
plot7

plot8 <- ggplot(fir, aes(x=Other_Sales, y=Genre)) + 
  geom_bar(stat="identity", width=.5, fill="darkblue")+
  labs(x="NA Sales",
         y="Genre", 
       title="Genre to Other_Sales")
plot8

plot9 <- ggplot(fir, aes(x=Global_Sales, y=Genre)) + 
  geom_bar(stat="identity", width=.5, fill="darkblue")+
  labs(x="NA Sales",
         y="Genre", 
       title="Genre to Global_Sales")
plot9

fir %>% 
  select(Platform) %>%
  group_by(Platform) %>%
  summarise(Count = n()) %>%
  arrange(Count) %>%
  plot_ly(
    x = ~ Count ,
    y = ~ Platform,
    type = "bar",
    orientation = 'h'
  ) %>%
  layout(yaxis = list(categoryorder = "array", categoryarray = ~ Count)) %>%
  layout(
    title = "Platform by GLobal Sales",
    yaxis = list(title = "Platform"),
    xaxis = list(title = "Global Sales")
  )