library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
lbb2 <- read.csv(file = "vgsales.csv")
head(lbb2)
## Rank Name Platform Year Genre Publisher NA_Sales
## 1 1 Wii Sports Wii 2006 Sports Nintendo 41.49
## 2 2 Super Mario Bros. NES 1985 Platform Nintendo 29.08
## 3 3 Mario Kart Wii Wii 2008 Racing Nintendo 15.85
## 4 4 Wii Sports Resort Wii 2009 Sports Nintendo 15.75
## 5 5 Pokemon Red/Pokemon Blue GB 1996 Role-Playing Nintendo 11.27
## 6 6 Tetris GB 1989 Puzzle Nintendo 23.20
## EU_Sales JP_Sales Other_Sales Global_Sales
## 1 29.02 3.77 8.46 82.74
## 2 3.58 6.81 0.77 40.24
## 3 12.88 3.79 3.31 35.82
## 4 11.01 3.28 2.96 33.00
## 5 8.89 10.22 1.00 31.37
## 6 2.26 4.22 0.58 30.26
##- This function is for to see the line
nrow(lbb2)
## [1] 16598
##- This function is for to view the column
ncol(lbb2)
## [1] 11
##- This function is for viewing rows and columns
dim(lbb2)
## [1] 16598 11
##- View column name
names(lbb2)
## [1] "Rank" "Name" "Platform" "Year" "Genre"
## [6] "Publisher" "NA_Sales" "EU_Sales" "JP_Sales" "Other_Sales"
## [11] "Global_Sales"
##- See the structure in the watching table
str(lbb2)
## 'data.frame': 16598 obs. of 11 variables:
## $ Rank : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Name : chr "Wii Sports" "Super Mario Bros." "Mario Kart Wii" "Wii Sports Resort" ...
## $ Platform : chr "Wii" "NES" "Wii" "Wii" ...
## $ Year : chr "2006" "1985" "2008" "2009" ...
## $ Genre : chr "Sports" "Platform" "Racing" "Sports" ...
## $ Publisher : chr "Nintendo" "Nintendo" "Nintendo" "Nintendo" ...
## $ NA_Sales : num 41.5 29.1 15.8 15.8 11.3 ...
## $ EU_Sales : num 29.02 3.58 12.88 11.01 8.89 ...
## $ JP_Sales : num 3.77 6.81 3.79 3.28 10.22 ...
## $ Other_Sales : num 8.46 0.77 3.31 2.96 1 0.58 2.9 2.85 2.26 0.47 ...
## $ Global_Sales: num 82.7 40.2 35.8 33 31.4 ...
str(lbb2)
## 'data.frame': 16598 obs. of 11 variables:
## $ Rank : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Name : chr "Wii Sports" "Super Mario Bros." "Mario Kart Wii" "Wii Sports Resort" ...
## $ Platform : chr "Wii" "NES" "Wii" "Wii" ...
## $ Year : chr "2006" "1985" "2008" "2009" ...
## $ Genre : chr "Sports" "Platform" "Racing" "Sports" ...
## $ Publisher : chr "Nintendo" "Nintendo" "Nintendo" "Nintendo" ...
## $ NA_Sales : num 41.5 29.1 15.8 15.8 11.3 ...
## $ EU_Sales : num 29.02 3.58 12.88 11.01 8.89 ...
## $ JP_Sales : num 3.77 6.81 3.79 3.28 10.22 ...
## $ Other_Sales : num 8.46 0.77 3.31 2.96 1 0.58 2.9 2.85 2.26 0.47 ...
## $ Global_Sales: num 82.7 40.2 35.8 33 31.4 ...
##- We can see a summary of the data
summary(lbb2)
## Rank Name Platform Year
## Min. : 1 Length:16598 Length:16598 Length:16598
## 1st Qu.: 4151 Class :character Class :character Class :character
## Median : 8300 Mode :character Mode :character Mode :character
## Mean : 8301
## 3rd Qu.:12450
## Max. :16600
## Genre Publisher NA_Sales EU_Sales
## Length:16598 Length:16598 Min. : 0.0000 Min. : 0.0000
## Class :character Class :character 1st Qu.: 0.0000 1st Qu.: 0.0000
## Mode :character Mode :character Median : 0.0800 Median : 0.0200
## Mean : 0.2647 Mean : 0.1467
## 3rd Qu.: 0.2400 3rd Qu.: 0.1100
## Max. :41.4900 Max. :29.0200
## JP_Sales Other_Sales Global_Sales
## Min. : 0.00000 Min. : 0.00000 Min. : 0.0100
## 1st Qu.: 0.00000 1st Qu.: 0.00000 1st Qu.: 0.0600
## Median : 0.00000 Median : 0.01000 Median : 0.1700
## Mean : 0.07778 Mean : 0.04806 Mean : 0.5374
## 3rd Qu.: 0.04000 3rd Qu.: 0.04000 3rd Qu.: 0.4700
## Max. :10.22000 Max. :10.57000 Max. :82.7400
##- To check for missing values in each column
colSums(is.na(lbb2))
## Rank Name Platform Year Genre Publisher
## 0 0 0 0 0 0
## NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales
## 0 0 0 0 0
##- To find out if there is a missing value in the data
anyNA(lbb2)
## [1] FALSE
fir <- lbb2
head(fir)
## Rank Name Platform Year Genre Publisher NA_Sales
## 1 1 Wii Sports Wii 2006 Sports Nintendo 41.49
## 2 2 Super Mario Bros. NES 1985 Platform Nintendo 29.08
## 3 3 Mario Kart Wii Wii 2008 Racing Nintendo 15.85
## 4 4 Wii Sports Resort Wii 2009 Sports Nintendo 15.75
## 5 5 Pokemon Red/Pokemon Blue GB 1996 Role-Playing Nintendo 11.27
## 6 6 Tetris GB 1989 Puzzle Nintendo 23.20
## EU_Sales JP_Sales Other_Sales Global_Sales
## 1 29.02 3.77 8.46 82.74
## 2 3.58 6.81 0.77 40.24
## 3 12.88 3.79 3.31 35.82
## 4 11.01 3.28 2.96 33.00
## 5 8.89 10.22 1.00 31.37
## 6 2.26 4.22 0.58 30.26
plot2 <-
ggplot(data = fir, mapping = aes(x = Genre, y = Global_Sales))+
geom_point() +
geom_boxplot()
ggplotly(plot2)
a <- fir %>%
group_by(Platform)
a
## # A tibble: 16,598 × 11
## # Groups: Platform [31]
## Rank Name Platf…¹ Year Genre Publi…² NA_Sa…³ EU_Sa…⁴ JP_Sa…⁵ Other…⁶
## <int> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 Wii Sports Wii 2006 Spor… Ninten… 41.5 29.0 3.77 8.46
## 2 2 Super Mari… NES 1985 Plat… Ninten… 29.1 3.58 6.81 0.77
## 3 3 Mario Kart… Wii 2008 Raci… Ninten… 15.8 12.9 3.79 3.31
## 4 4 Wii Sports… Wii 2009 Spor… Ninten… 15.8 11.0 3.28 2.96
## 5 5 Pokemon Re… GB 1996 Role… Ninten… 11.3 8.89 10.2 1
## 6 6 Tetris GB 1989 Puzz… Ninten… 23.2 2.26 4.22 0.58
## 7 7 New Super … DS 2006 Plat… Ninten… 11.4 9.23 6.5 2.9
## 8 8 Wii Play Wii 2006 Misc Ninten… 14.0 9.2 2.93 2.85
## 9 9 New Super … Wii 2009 Plat… Ninten… 14.6 7.06 4.7 2.26
## 10 10 Duck Hunt NES 1984 Shoo… Ninten… 26.9 0.63 0.28 0.47
## # … with 16,588 more rows, 1 more variable: Global_Sales <dbl>, and abbreviated
## # variable names ¹Platform, ²Publisher, ³NA_Sales, ⁴EU_Sales, ⁵JP_Sales,
## # ⁶Other_Sales
unique(fir$Platform)
## [1] "Wii" "NES" "GB" "DS" "X360" "PS3" "PS2" "SNES" "GBA" "3DS"
## [11] "PS4" "N64" "PS" "XB" "PC" "2600" "PSP" "XOne" "GC" "WiiU"
## [21] "GEN" "DC" "PSV" "SAT" "SCD" "WS" "NG" "TG16" "3DO" "GG"
## [31] "PCFX"
plot3 <-
ggplot(fir, aes(Global_Sales, Other_Sales))+
geom_point()
plot3
plot4 <-
ggplot(fir[fir$Global_Sales,], mapping = aes(Genre))+
geom_bar(stat="count", width = 0.5, fill="darkblue")
ggplotly(plot4)
# ggplot(fir, aes(x = "", y=Genre, fill = Genre)) +
# geom_bar(width = 1, stat = "identity") +coord_polar(theta = "y", start=0)+
# # scale_fill_brewer(palette="Blues")+
# # labs(fill="",
# # x=NULL,
# # y=NULL,
# # title="Pie Chart of patient severeness ")
#
#
plot5 <- ggplot(fir, aes(x=NA_Sales, y=Genre)) +
geom_bar(stat="identity", width=.5, fill="darkblue")+
labs(x="NA Sales",
y="Genre",
title="Genre to Na_Sales")
plot5
plot6 <- ggplot(fir, aes(x=EU_Sales, y=Genre)) +
geom_bar(stat="identity", width=.5, fill="darkblue")+
labs(x="NA Sales",
y="Genre",
title="Genre to EU_Sales")
plot6
plot7 <- ggplot(fir, aes(x=JP_Sales, y=Genre)) +
geom_bar(stat="identity", width=.5, fill="darkblue")+
labs(x="NA Sales",
y="Genre",
title="Genre to JP_Sales")
plot7
plot8 <- ggplot(fir, aes(x=Other_Sales, y=Genre)) +
geom_bar(stat="identity", width=.5, fill="darkblue")+
labs(x="NA Sales",
y="Genre",
title="Genre to Other_Sales")
plot8
plot9 <- ggplot(fir, aes(x=Global_Sales, y=Genre)) +
geom_bar(stat="identity", width=.5, fill="darkblue")+
labs(x="NA Sales",
y="Genre",
title="Genre to Global_Sales")
plot9
fir %>%
select(Platform) %>%
group_by(Platform) %>%
summarise(Count = n()) %>%
arrange(Count) %>%
plot_ly(
x = ~ Count ,
y = ~ Platform,
type = "bar",
orientation = 'h'
) %>%
layout(yaxis = list(categoryorder = "array", categoryarray = ~ Count)) %>%
layout(
title = "Platform by GLobal Sales",
yaxis = list(title = "Platform"),
xaxis = list(title = "Global Sales")
)