Data Source: https://www.kaggle.com/ashaheedq/video-games-sales-2019
We’re going to analyze video game data from kaggle, which outlines game genre types, sales numbers, critic and user ratings, and more.
Note: there are some games that users did not rate potentially because those games aren’t for commercial use (e.g. genre ‘education’). We’re going to remove these games from our analysis.
file_path <- "https://raw.githubusercontent.com/devinteran/Data607-Project2/master/vgsales-12-4-2019.csv"
video_game_raw <- read_csv(file_path)
## Parsed with column specification:
## cols(
## .default = col_double(),
## Name = col_character(),
## basename = col_character(),
## Genre = col_character(),
## ESRB_Rating = col_character(),
## Platform = col_character(),
## Publisher = col_character(),
## Developer = col_character(),
## VGChartz_Score = col_logical(),
## Last_Update = col_character(),
## url = col_character(),
## img_url = col_character()
## )
## See spec(...) for full column specifications.
#select subset of columns
video_game <- select(video_game_raw,Rank,basename,Genre,ESRB_Rating,Platform,Publisher,Critic_Score,User_Score,Total_Shipped,Global_Sales,NA_Sales,PAL_Sales,JP_Sales,Other_Sales,Year)
#Remove games where we are missing a critic score or user score
video_game <- filter(video_game,is.na(User_Score) == FALSE,is.na(Critic_Score) == FALSE,is.na(Global_Sales) == FALSE)
Next let’s look if sales correlate with critic or users findings..
genre_score<- video_game %>% group_by(Genre) %>% summarize(Critic_Score = mean(Critic_Score,na.rm=TRUE),User_Score = mean(User_Score,na.rm=TRUE))
genre_score <- pivot_longer(genre_score,cols=ends_with("Score"),names_to="Type",values_to="Score")
ggplot(genre_score,aes(fill=Type,reorder(x=Genre,-Score),y=Score)) +
geom_bar(position="dodge",stat = "identity") +
theme(axis.text.x = element_text(angle = 90)) +
xlab("Genre") +
ggtitle("Average Score for Video Games by Genre") +
theme(plot.title = element_text(hjust = 0.5))
Here we see the highest selling game genres globally are Racing, Action & Shooter.
* Racing games are both highly rated and high sellers
game_sales <- video_game %>% group_by(Genre) %>% summarize(Global_Sales = mean(Global_Sales,na.rm=TRUE))
filter(video_game,Genre == 'Action')
## # A tibble: 36 x 15
## Rank basename Genre ESRB_Rating Platform Publisher Critic_Score User_Score
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 80 grand-t… Acti… M PS3 Rockstar… 10 9
## 2 108 grand-t… Acti… M XOne Rockstar… 9 9
## 3 148 unchart… Acti… T PS3 Sony Com… 9.3 6.3
## 4 152 unchart… Acti… T PS3 Sony Com… 9.5 9.6
## 5 161 red-dea… Acti… M X360 Rockstar… 9.5 10
## 6 177 metal-g… Acti… M PS2 Konami 9.5 7
## 7 181 metal-g… Acti… M PS3 Konami 9.3 9.8
## 8 203 assassi… Acti… M X360 Ubisoft 8.2 8
## 9 244 residen… Acti… M PS3 Capcom 8.6 8.8
## 10 262 unchart… Acti… T PS3 Sony Com… 8.7 8.6
## # … with 26 more rows, and 7 more variables: Total_Shipped <dbl>,
## # Global_Sales <dbl>, NA_Sales <dbl>, PAL_Sales <dbl>, JP_Sales <dbl>,
## # Other_Sales <dbl>, Year <dbl>
ggplot(drop_na(game_sales),aes(reorder(x=Genre,-Global_Sales),y=Global_Sales)) +
geom_bar(position="dodge",stat = "identity") +
theme(axis.text.x = element_text(angle = 90)) +
xlab("Genre") +
ggtitle("Average Sales (in millions) for Video Games by Genre") +
theme(plot.title = element_text(hjust = 0.5))
genre_score<- video_game %>% group_by(Genre) %>% summarize(Critic_Score = mean(Critic_Score,na.rm=TRUE),User_Score = mean(User_Score,na.rm=TRUE))
genre_score <- pivot_longer(genre_score,cols=ends_with("Score"),names_to="Type",values_to="Score")
game_sales <- video_game %>% group_by(Genre) %>% summarize(Global_Sales = mean(Global_Sales,na.rm=TRUE))
sales_plus_scores <- genre_score %>% merge(game_sales,by="Genre")
sales_plus_scores_critic <- filter(drop_na(sales_plus_scores),Type == 'Critic_Score')
sales_plus_scores_user <- filter(drop_na(sales_plus_scores),Type == 'User_Score')
critic <- ggplot(sales_plus_scores_critic,aes(fill=Genre,x=Score,y=Global_Sales,color=Genre)) +
geom_point(size=3) +
xlab("Critic Score") +
ylab("Game Sales") +
ggtitle("Critic Ratings vs. Game Sales (in millions)") +
guides(fill=guide_legend(ncol=3)) +
geom_jitter()
user <- ggplot(sales_plus_scores_user,aes(fill=Genre,x=Score,y=Global_Sales,color=Genre)) +
geom_point(size=3) +
xlab("User Score") +
ylab("Game Sales") +
ggtitle("User Ratings vs. Game Sales (in millions)") +
guides(fill=guide_legend(ncol=3)) +
geom_jitter()
grid.arrange(critic,user)