Project

Toan Phan

2022-11-29

Library

library(ggplot2)
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggthemes)

Video Games Data

raw_data <- read_csv("video_game.csv")
## Rows: 26688 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): game, release_date, owners, developer, publisher
## dbl (5): number, price, average_playtime, median_playtime, metascore
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(raw_data)
## Rows: 26,688
## Columns: 10
## $ number           <dbl> 1, 3, 21, 47, 36, 52, 2, 4, 14, 40, 9, 17, 43, 49, 53…
## $ game             <chr> "Half-Life 2", "Counter-Strike: Source", "Counter-Str…
## $ release_date     <chr> "16-Nov-04", "1-Nov-04", "1-Mar-04", "1-Nov-04", "1-J…
## $ price            <dbl> 9.99, 9.99, 9.99, 4.99, 9.99, 0.00, 14.99, 4.99, 5.99…
## $ owners           <chr> "10,000,000 .. 20,000,000", "10,000,000 .. 20,000,000…
## $ developer        <chr> "Valve", "Valve", "Valve", "Valve", "Valve", "Unreal …
## $ publisher        <chr> "Valve", "Valve", "Valve", "Valve", "Valve", "Unreal …
## $ average_playtime <dbl> 110, 236, 10, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 371, 0, …
## $ median_playtime  <dbl> 66, 128, 3, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 270, 0, 0,…
## $ metascore        <dbl> 96, 88, 65, 0, 0, 0, 93, 87, 73, 0, 80, 69, 0, 0, 0, …

Video Games Data

#video_game = raw_data %>% 
 # mutate(release_year = as.numeric(substring(release_date,first = nchar(release_date)+1 - 4, last = #nchar(release_date))))

video_game = raw_data %>% 
  mutate(release_year = as.numeric(substring(release_date,first = nchar(release_date)+1 - 2, last = nchar(release_date))))
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
glimpse(video_game)
## Rows: 26,688
## Columns: 11
## $ number           <dbl> 1, 3, 21, 47, 36, 52, 2, 4, 14, 40, 9, 17, 43, 49, 53…
## $ game             <chr> "Half-Life 2", "Counter-Strike: Source", "Counter-Str…
## $ release_date     <chr> "16-Nov-04", "1-Nov-04", "1-Mar-04", "1-Nov-04", "1-J…
## $ price            <dbl> 9.99, 9.99, 9.99, 4.99, 9.99, 0.00, 14.99, 4.99, 5.99…
## $ owners           <chr> "10,000,000 .. 20,000,000", "10,000,000 .. 20,000,000…
## $ developer        <chr> "Valve", "Valve", "Valve", "Valve", "Valve", "Unreal …
## $ publisher        <chr> "Valve", "Valve", "Valve", "Valve", "Valve", "Unreal …
## $ average_playtime <dbl> 110, 236, 10, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 371, 0, …
## $ median_playtime  <dbl> 66, 128, 3, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 270, 0, 0,…
## $ metascore        <dbl> 96, 88, 65, 0, 0, 0, 93, 87, 73, 0, 80, 69, 0, 0, 0, …
## $ release_year     <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,…

Number of Video Games Release over Year

#video_game %>% ggplot(aes(x = developer)) + geom_bar()
number_of_game_over_year <- video_game %>% 
              group_by(release_year) %>% 
              summarize(count = n())
number_of_game_over_year %>% ggplot(aes(x=release_year,y=count)) + 
                     geom_col(fill = "#f68060", alpha=0.7, width = 0.7) +
                     geom_text(position="stack",aes(x = release_year, y = count + 300, label = count,))+
                     ggtitle("Number Of Games Over Year") +
                     xlab("Years(2000s)")+
                     ylab("Number Of Games")+
                     theme_light()
## Warning: Removed 1 rows containing missing values (position_stack).
## Removed 1 rows containing missing values (position_stack).

Developers with The Most Games

developer <- video_game %>% 
              group_by(developer) %>% 
              summarize(count = n())
top_10_developer <- developer %>% top_n(n = 10, wt = count)
top_10_developer <- top_10_developer %>% filter(developer != 0)
top_10_developer %>% ggplot(aes(x=reorder(developer,-count),y=count)) + 
                     geom_col(fill = "#f68060", alpha=0.7, width = 0.7) +
                     ggtitle("Top 10 Developers with The Most Games (2004-2018)") +
                     xlab("Developers")+
                     ylab("Number of Games")+
                     coord_flip()+ theme_light()

Radar Chart

library(fmsb)
## Warning: package 'fmsb' was built under R version 4.2.2
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
top_10_developer_trans <- transpose(top_10_developer)

radar_data <- tail(top_10_developer_trans,1)
colnames(radar_data) <- as.character(as.vector(top_10_developer_trans[1,]))
radar_data[1,] <- 100
radar_data[2,] <- 0
radar_data[3,] <- as.numeric(as.vector(top_10_developer_trans[2,]))
rownames(radar_data) <- c(1,2,3)
radar_data <- as.data.frame(sapply(radar_data, as.numeric))
radarchart(radar_data, axistype=1 , 
 
    #custom polygon
    pcol=rgb(0.08,0.16,0.48,0.6) , pfcol=rgb(0.56,0.63,0.92,0.5) , plwd=3 , 
 
    #custom the grid
    cglcol="grey", cglty=1, axislabcol="grey", caxislabels=seq(20,100,20), cglwd=0.8,
 
    #custom labels
    vlcex=0.8
    )

Top Games with The Highest Average Playtime

top_10_playtime <- video_game %>% top_n(n = 10, wt = average_playtime)
top_10_playtime %>% ggplot(aes(x=reorder(game,release_year),y=average_playtime, fill = as.character(release_year))) + 
                     geom_col() +
                     ggtitle("Top 10 Average Playtime Games") +
                     xlab("Games")+
                     ylab("Average Play Time")+
                     labs(fill = "Release Year (2000s)")+
                     scale_fill_hue(c = 40) +
                     coord_flip()+ theme_minimal()

## Top Games with The Most Owners

top_game_owners <- video_game %>% filter(owners == "100,000,000 .. 200,000,000" | owners == "50,000,000 .. 100,000,000" | owners == "20,000,000 .. 50,000,000")

top_game_owners <- top_game_owners %>% group_by(owners)

top_game_owners %>% ggplot(aes(x=game,y=metascore, fill = owners)) + 
                     geom_col() +
                      geom_text(position="stack",aes(x = game, y = 50, label = paste("$",as.character(price), sep = "", collapse = NULL)))+
                     ggtitle("Games with The Most Owners") +
                     xlab("Games")+
                     ylab("Meta Score")+
                     labs(fill = "Owners")+
                     scale_fill_hue(c = 40) +
                     coord_flip()+ theme_minimal()