Packages
#Upload Packages
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.4
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(treemap)
library(streamgraph)
library(pillar)
##
## Attaching package: 'pillar'
## The following object is masked from 'package:dplyr':
##
## dim_desc
library(RColorBrewer)
#Dataset
#Setting working environment
setwd("C:/Users/mivul/OneDrive/Desktop/Data 110/Datasets")
#upload dataset
game_sales <-read_csv("vgsales.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## Rank = col_double(),
## Name = col_character(),
## Platform = col_character(),
## Year = col_double(),
## Genre = col_character(),
## Publisher = col_character(),
## NA_Sales = col_double(),
## EU_Sales = col_double(),
## JP_Sales = col_double(),
## Other_Sales = col_double(),
## Global_Sales = col_double()
## )
#set year to numeric
game_sales$Year <- as.numeric(game_sales$Year)
#Overview of the dataset structure
glimpse(game_sales)
## Rows: 16,598
## Columns: 11
## $ Rank <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17~
## $ Name <chr> "Wii Sports", "Super Mario Bros.", "Mario Kart Wii", "Wii~
## $ Platform <chr> "Wii", "NES", "Wii", "Wii", "GB", "GB", "DS", "Wii", "Wii~
## $ Year <dbl> 2006, 1985, 2008, 2009, 1996, 1989, 2006, 2006, 2009, 198~
## $ Genre <chr> "Sports", "Platform", "Racing", "Sports", "Role-Playing",~
## $ Publisher <chr> "Nintendo", "Nintendo", "Nintendo", "Nintendo", "Nintendo~
## $ NA_Sales <dbl> 41.49, 29.08, 15.85, 15.75, 11.27, 23.20, 11.38, 14.03, 1~
## $ EU_Sales <dbl> 29.02, 3.58, 12.88, 11.01, 8.89, 2.26, 9.23, 9.20, 7.06, ~
## $ JP_Sales <dbl> 3.77, 6.81, 3.79, 3.28, 10.22, 4.22, 6.50, 2.93, 4.70, 0.~
## $ Other_Sales <dbl> 8.46, 0.77, 3.31, 2.96, 1.00, 0.58, 2.90, 2.85, 2.26, 0.4~
## $ Global_Sales <dbl> 82.74, 40.24, 35.82, 33.00, 31.37, 30.26, 30.01, 29.02, 2~
# Create table for top 10 games for the Xbox One
top_XOne <-game_sales %>%
group_by(Name,Platform, Year, Genre, Publisher) %>%
#Summarize sum of region sales
summarise(North_America = sum(NA_Sales), Europe = sum(EU_Sales), Japan = sum(JP_Sales), Other = sum(Other_Sales), Global = sum(Global_Sales)) %>%
#Filter results for Xbox One
filter(Platform == "XOne") %>%
#Arrange results by global sales in descending order
arrange(desc(Global)) %>%
#Subset top 10
head(10)
## `summarise()` has grouped output by 'Name', 'Platform', 'Year', 'Genre'. You can override using the `.groups` argument.
# Create table for top 10 games for the PlayStation 4
top_ps4 <-game_sales %>%
group_by(Name,Platform, Year, Genre, Publisher) %>%
#Summarize sum of region sales
summarise(North_America = sum(NA_Sales), Europe = sum(EU_Sales), Japan = sum(JP_Sales), Other = sum(Other_Sales), Global = sum(Global_Sales)) %>%
#Filter results for PlayStation 4
filter(Platform == "PS4") %>%
#Arrange results by global sales in descending order
arrange(desc(Global)) %>%
#Subset top 10
head(10)
## `summarise()` has grouped output by 'Name', 'Platform', 'Year', 'Genre'. You can override using the `.groups` argument.
# Create table for top 10 games for the WiiU
top_wiiu<-game_sales %>%
group_by(Name,Platform, Year, Genre, Publisher) %>%
#Summarize sum of region sales
summarise(North_America = sum(NA_Sales), Europe = sum(EU_Sales), Japan = sum(JP_Sales), Other = sum(Other_Sales), Global = sum(Global_Sales)) %>%
#Filter results for WiiU
filter(Platform == "WiiU") %>%
#Arrange results by global sales in descending order
arrange(desc(Global)) %>%
#Subset top 10
head(10)
## `summarise()` has grouped output by 'Name', 'Platform', 'Year', 'Genre'. You can override using the `.groups` argument.
#Construct Treemap for PlayStation 4
treemap(top_ps4, index="Name", vSize="Global",
vColor="North_America", type="value",
palette="Spectral", title = " Top 10 PlayStation 4 Global Sales Ranking", title.legend = "North America Sales")
#Construct Treemap for Xbox One
treemap(top_XOne, index="Name", vSize="Global",
vColor="North_America", type="value",
palette="Spectral", title = "Top 10 XBox One Global Sales Ranking", title.legend = "North America Sales ")
#Construct Treemap for WiiU
treemap(top_wiiu, index="Name", vSize="Global",
vColor="North_America", type="value",
palette="Spectral",title = "Top 10 WiiU Global Sales Ranking", title.legend = "North America Sales")
#Create a new table named g3
g3<- game_sales %>%
#subset the rows for the new table
select(4:11) %>%
#group observations by publisher
group_by(Publisher) %>%
#summarize the sum of sales by region
summarise(North_America = sum(NA_Sales), Europe = sum(EU_Sales), Japan = sum(JP_Sales), Other = sum(Other_Sales), Global = sum(Global_Sales)) %>%
#Arrange the observations in descending order by global sales
arrange(desc(Global)) %>%
#subset the top 10
head(10)
#set order of table by global sales
g3 <- g3[order(g3$Global),]
#Convert tibble to a Dataframe
g3 <- as.data.frame(g3)
#Set the publisher column as the name of the observations
row.names(g3) <- g3$Publisher
#subset the graph for the columns that will go into the matrix
g3_subset <- g3[,2:5]
#Create Matrix
g3_matrix <- data.matrix(g3_subset)
# parameter for RowSideColors
varcols = setNames(colorRampPalette(brewer.pal(nrow(g3_matrix), "RdPu"))(nrow(g3_matrix)), rownames(g3_matrix))
## Warning in brewer.pal(nrow(g3_matrix), "RdPu"): n too large, allowed maximum for palette RdPu is 9
## Returning the palette you asked for with that many colors
#Create Heatmap
heatmap(g3_matrix, Rowv = NA,
Colv = NA,
col= colorRampPalette(brewer.pal(nrow(g3_matrix), "RdPu"))(nrow(g3_matrix)), rownames(g3_matrix),
s=0.6, v=1, scale="column",
margins=c(10,15),
main = "Publishers Regional Sales",
xlab ="Regions",
ylab="Publishers",
cexCol=1, cexRow =1, RowSideColors = varcols)
## layout: widths = 0.05 0.2 4 , heights = 0.25 4 ; lmat=
## [,1] [,2] [,3]
## [1,] 0 0 4
## [2,] 3 1 2
## Warning in brewer.pal(nrow(g3_matrix), "RdPu"): n too large, allowed maximum for palette RdPu is 9
## Returning the palette you asked for with that many colors
#Create table to count the number of games by genre
Nintendo_games <- game_sales %>%
#filter table for a publisher with Nintendo in the name
filter(grepl("Nintendo", Publisher, ignore.case = TRUE)) %>%
#count the number of games by genre
count(Genre)
#Create a bar graph to count the number of games made by a publisher for each genre
Nintendo_games %>%
ggplot(aes(x = Genre, y = n, fill = Genre)) +
#Creat bar chart with geom_col
geom_col() +
#Flip the x and y axis
coord_flip() +
#label Y axis
ylab("Number of Games")+
#label Title
ggtitle("Count of Nintendo Games by Genre") +
#Set color palette
scale_fill_brewer(palette = "Set3")
#Create table to count the number of games by genre
EA_games<- game_sales %>%
#filter table for a publisher with Electronic Arts in the name
filter(grepl("Electronic Arts", Publisher, ignore.case = TRUE)) %>%
#count the number of games by genre
count(Genre)
#Create a bar graph to count the number of games made by a publisher for each genre
EA_games %>%
ggplot(aes(x = Genre, y = n, fill = Genre)) +
#Creat bar chart with geom_col
geom_col() +
#Flip the x and y axis
coord_flip() +
#label Y axis
ylab("Number of Games")+
#label Title
ggtitle("Count of Electronic Arts Games by Genre")+
#Set color palette
scale_fill_brewer(palette = "Set3")
#Create table to count the number of games by genre
Activision_games<- game_sales %>%
#filter table for a publisher with Activision in the name
filter(grepl("Activision", Publisher, ignore.case = TRUE)) %>%
#count the number of games by genre
count(Genre)
#Create a bar graph to count the number of games made by a publisher for each genre
Activision_games %>%
ggplot(aes(x = Genre, y = n, fill = Genre)) +
#Creat bar chart with geom_col
geom_col() +
#Flip the x and y axis
coord_flip() +
#label Y axis
ylab("Number of Games")+
#label Title
ggtitle("Count of Activision Games by Genre")+
#Set color palette
scale_fill_brewer(palette = "Set3")
#Create table to count the number of games by genre
Sony<- game_sales %>%
#filter table for a publisher with Sony in the name
filter(grepl("Sony", Publisher, ignore.case = TRUE)) %>%
#count the number of games by genre
count(Genre)
#Create a bar graph to count the number of games made by a publisher for each genre
Sony %>%
ggplot(aes(x = Genre, y = n, fill = Genre)) +
#Creat bar chart with geom_col
geom_col() +
#Flip the x and y axis
coord_flip() +
#label Y axis
ylab("Number of Games")+
#label Title
ggtitle("Count of Sony Computer Entertainment
Games by Genre")+
#Set color palette
scale_fill_brewer(palette = "Set3")
#Create table to count the number of games by genre
ubisoft<- game_sales %>%
#filter table for a publisher with Ubisoft in the name
filter(grepl("Ubisoft", Publisher, ignore.case = TRUE)) %>%
#count the number of games by genre
count(Genre)
#Create a bar graph to count the number of games made by a publisher for each genre
ubisoft %>%
ggplot(aes(x = Genre, y = n, fill = Genre)) +
#Creat bar chart with geom_col
geom_col() +
#Flip the x and y axis
coord_flip() +
#label Y axis
ylab("Number of Games")+
#label Title
ggtitle("Count of Ubisoft Games by Genre")+
#Set color palette
scale_fill_brewer(palette = "Set3")
#Create table to count the number of games by genre
Take_two<- game_sales %>%
#filter table for a publisher with Take-Two in the name
filter(grepl("Take-Two", Publisher, ignore.case = TRUE)) %>%
#count the number of games by genre
count(Genre)
#Create a bar graph to count the number of games made by a publisher for each genre
Take_two %>%
ggplot(aes(x = Genre, y = n, fill = Genre)) +
#Creat bar chart with geom_col
geom_col() +
#Flip the x and y axis
coord_flip() +
#label Y axis
ylab("Number of Games")+
#label Title
ggtitle("Count of Take-Two Interactive Games by Genre")+
#Set color palette
scale_fill_brewer(palette = "Set3")
#Create table to count the number of games by genre
THQ<- game_sales %>%
#filter table for a publisher with THQ in the name
filter(grepl("THQ", Publisher, ignore.case = TRUE)) %>%
#count the number of games by genre
count(Genre)
#Create a bar graph to count the number of games made by a publisher for each genre
THQ %>%
ggplot(aes(x = Genre, y = n, fill = Genre)) +
#Creat bar chart with geom_col
geom_col() +
#Flip the x and y axis
coord_flip() +
#label Y axis
ylab("Number of Games")+
#label Title
ggtitle("Count of THQ Games by Genre")+
#Set color palette
scale_fill_brewer(palette = "Set3")
#Create table to count the number of games by genre
Konami<- game_sales %>%
#filter table for a publisher with Konami in the name
filter(grepl("Konami", Publisher, ignore.case = TRUE)) %>%
#count the number of games by genre
count(Genre)
#Create a bar graph to count the number of games made by a publisher for each genre
Konami %>%
ggplot(aes(x = Genre, y = n, fill = Genre)) +
#Creat bar chart with geom_col
geom_col() +
#Flip the x and y axis
coord_flip() +
#label Y axis
ylab("Number of Games")+
#label Title
ggtitle("Count of Konami Digital Entertainment Games by Genre")+
#Set color palette
scale_fill_brewer(palette = "Set3")
#Create table to count the number of games by genre
Sega<- game_sales %>%
#filter table for a publisher with Sega in the name
filter(grepl("Sega", Publisher, ignore.case = TRUE)) %>%
#count the number of games by genre
count(Genre)
#Create a bar graph to count the number of games made by a publisher for each genre
Sega %>%
ggplot(aes(x = Genre, y = n, fill = Genre)) +
#Creat bar chart with geom_col
geom_col() +
#Flip the x and y axis
coord_flip() +
#label Y axis
ylab("Number of Games")+
#label Title
ggtitle("Count of Sega Games by Genre")+
#Set color palette
scale_fill_brewer(palette = "Set3")
#Create table to count the number of games by genre
Namco<- game_sales %>%
#filter table for a publisher with Namco in the name
filter(grepl("Namco", Publisher, ignore.case = TRUE)) %>%
#count the number of games by genre
count(Genre)
#Create a bar graph to count the number of games made by a publisher for each genre
Namco %>%
ggplot(aes(x = Genre, y = n, fill = Genre)) +
#Creat bar chart with geom_col
geom_col() +
#Flip the x and y axis
coord_flip() +
#label Y axis
ylab("Number of Games")+
#label Title
ggtitle("Count of Namco Bandai Games by Genre")+
#Set color palette
scale_fill_brewer(palette = "Set3")
#create a table for sales of only Sonic games
sonic_full<-game_sales %>%
#filter for games with Sonic in the name
filter(grepl("Sonic", Name, ignore.case = TRUE))%>%
#filter table to remove games with a sport genre
filter(Genre != "Sports")%>%
#filter table to remove games published by Atari, Midas and Kadokawa
filter(Publisher != "Kadokawa Games") %>%
filter(Publisher !="Atari") %>%
filter(Publisher !="Midas Interactive Entertainment") %>%
#filter table to remove games with certain titles
filter(Name != "Mario & Sonic at the Rio 2016 Olympic Games") %>%
#select variables for new table
select(Name, Platform, Year, Publisher, NA_Sales, EU_Sales, JP_Sales, Other_Sales)%>%
#create a column to identify Sonic games
mutate( Series = "Sonic")
#create a table for sales of only Mario games
mario_full <- game_sales %>%
#filter for games with Mario in the name
filter(grepl("Mario", Name, ignore.case = TRUE)) %>%
#filter table to remove games published by Sega and Namco
filter(Publisher != "Sega") %>%
filter(Publisher !="Namco Bandai Games") %>%
#filter table to remove games with certain titles
filter(Name != "Mario & Sonic at the Rio 2016 Olympic Games") %>%
filter(Name != "Mario & Sonic at the Sochi 2014 Olympic Winter Games") %>%
#select variables for new table
select(Name, Platform, Year, Publisher, NA_Sales, EU_Sales, JP_Sales, Other_Sales) %>%
#create a column to identify Mario games
mutate( Series = "Mario")
#Combine the two tables
classic_full <- mario_full %>%
#Join the tables by stacking them on top of each other
bind_rows(sonic_full) %>%
#Create to new variables to
pivot_longer(cols = 5:8, names_to = "Region",values_to = "Sales")
#Create new graph
classic_full %>%
ggplot(aes(x = Year, y = Sales, fill = Series))+
#create a bar graph with geom_col
geom_col() +
#Label Y axis
ylab("Sales in the Millions")+
ggtitle("Mario Game Sales Vs Sonic Game Sales by Region")+
#Create separate graphs by regions
facet_wrap(~Region)
#create a table for total global sales by game genre
game_genre <- game_sales %>%
group_by(Genre, Year) %>%
#summarize sum of global sales
summarise(tot_gobal = sum(Global_Sales)) %>%
ungroup() %>%
#Remove observations with 0 as value for Year
filter(Year != 0)
## `summarise()` has grouped output by 'Genre'. You can override using the `.groups` argument.
game_genre %>%
#create streamgraph
streamgraph("Genre", "tot_gobal", "Year", interpolate="cardinal") %>%
#set interval for years
sg_axis_x(10, "year", "%Y") %>%
#set color palette
sg_fill_brewer("PuOr")
## Warning in widget_html(name = class(x)[1], package = attr(x, "package"), :
## streamgraph_html returned an object of class `list` instead of a `shiny.tag`.
The data in the dataset was scraped from vgchartz.com and acquired through Kaggle.com. The dataset has a list of video games from various publishers with sales greater than one hundred thousand copies. The table contains eleven variables and sixteen thousand five hundred and ninety-eight observations. The eleven variables are the rank of game based global sales, name of the video game, game platform, year the game was released, publisher of the game, North America sales in the millions, Europe sales in the millions, Japan sales in the millions, other regions sales in the millions, and total global sales in the millions. The number of unique publishers identified in the dataset are five hundred seventy-nine. The range of the years of when the games were released are from 1980 to 2020. The csv file for the dataset from Kaggle had missing information. The dataset needed to be edited to conduct analysis.
I decided to replace the quantitative information with zeros by using the replace function in excel. I had to clean up the dataset to conduct a comparison analysis of sales of Mario games versus Sonic games. I utilized the pivot longer function on the dataset to move the names columns with the regional sales to a new variable column called region. The sales numbers were moved to new variable column called sales. The manipulation of the table resulted in each row observation containing the sales number of a game for one region. The bar chart showed Mario game sales beat sonic game sales. As a sonic fan, I was slightly disappointed by the results.
The three treemap were created to show the top ten games for the PlayStation 4, Xbox One, and the WiiU. The size the box for each game title represents the global sales ranking for the top ten games. The colors of the tree graph represent the North American sales. The PlayStation 4 and Xbox One treemaps contain some of the same games because certain games are cross platform. Call of Duty: Black Ops 3 was the number one game for both global sales and North America sales for both platforms. The game in second place for global and North America sales for PlayStation 4 and Xbox One were Grand Theft Auto V and Call of Duty Advanced Warfare respectively. The difference in the number two game gave some insight the purchasing habit of each respective platform owners. Grand Theft Auto V and Call of Duty Advanced Warfare are both cross platform games, and I would have expected them have same ranks on both platforms. The WiiU treemap shows that Mario games are the most popular games. After analyzing the top games on each platform, I wanted to analyze the top publishers in the dataset.
The heatmap displays the top ten publishers in the dataset by global sales on the right side of the graph, and the observed regions on the bottom of the graph. The color gradient of the heatmap refers to the total global sales by publishers in each region. Nintendo is the top publisher in Japan, North America, and Europe. In other regions Electronic Arts has higher sales than Nintendo. It interesting to observe that non-Japanese publishers seem to have low sales in Japan signifying that they may have trouble capturing the Japanese market. I constructed a bar chart to learn more about the identified publishers. The charts display the number of games each publisher released by the type of genre. The charts show that certain publisher have preference on the genres of games they published. I I decided to focus on the game genres and visualize the popularity of games by their genres.
I constructed an interactive streamgraph to show the global sales of games for each genre throughout the years covered in the dataset. In the year 2000, the sales sport and action games started to increase dramatically. Also, around 2010, shooter games started to experience an increase in sales. The rise of popularity in these three genres can be seen in the game highlighted in top ten games treemaps for the PlayStation 4 and Xbox One. The genres of the games in the PlayStation treemap are four shooter games, two action games, two sport games, and one roleplaying game. The genres of the games in the Xbox One treemap are six shooter games, two action games, one sport game, and one roleplaying game. The WiiU is the exception with one shooter game, one fighting game, one action game, two miscellaneous games, four platform games, and one racing game. I initially wanted to create an alluvial to display this information. The alluvial seemed disjointed and looked like splattered paint because a game for each genre was not released every year. I decided to go with the streamgraph because it was more aesthetically pleasing.