data <- read.csv("C:\\Users\\gajaw\\OneDrive\\Desktop\\STATS\\vgsales.csv")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
grouped_df1 <- data %>%
group_by(Genre) %>%
summarize(mean_global_sales = mean(Global_Sales, na.rm = TRUE), count = n())
print(grouped_df1)
## # A tibble: 12 × 3
## Genre mean_global_sales count
## <chr> <dbl> <int>
## 1 Action 0.528 3316
## 2 Adventure 0.186 1286
## 3 Fighting 0.529 848
## 4 Misc 0.466 1739
## 5 Platform 0.938 886
## 6 Puzzle 0.421 582
## 7 Racing 0.586 1249
## 8 Role-Playing 0.623 1488
## 9 Shooter 0.792 1310
## 10 Simulation 0.452 867
## 11 Sports 0.567 2346
## 12 Strategy 0.257 681
grouped_df2 <- data %>%
group_by(Platform) %>%
summarize(mean_na_sales = mean(NA_Sales, na.rm = TRUE), count = n())
print(grouped_df2)
## # A tibble: 31 × 3
## Platform mean_na_sales count
## <chr> <dbl> <int>
## 1 2600 0.681 133
## 2 3DO 0 3
## 3 3DS 0.155 509
## 4 DC 0.104 52
## 5 DS 0.181 2163
## 6 GB 1.17 98
## 7 GBA 0.228 822
## 8 GC 0.240 556
## 9 GEN 0.714 27
## 10 GG 0 1
## # ℹ 21 more rows
grouped_df3 <- data %>%
group_by(Publisher) %>%
summarize(mean_eu_sales = mean(EU_Sales, na.rm = TRUE), count = n())
print(grouped_df3)
## # A tibble: 579 × 3
## Publisher mean_eu_sales count
## <chr> <dbl> <int>
## 1 10TACLE Studios 0.0133 3
## 2 1C Company 0.0233 3
## 3 20th Century Fox Video Games 0.02 5
## 4 2D Boy 0.03 1
## 5 3DO 0.0844 36
## 6 49Games 0.04 1
## 7 505 Games 0.0856 192
## 8 5pb 0 61
## 9 7G//AMES 0.015 4
## 10 989 Sports 0.14 1
## # ℹ 569 more rows
grouped_df1$group_tag <- ifelse(grouped_df1$count == min(grouped_df1$count), "Lowest Probability", "")
grouped_df2$group_tag <- ifelse(grouped_df2$count == min(grouped_df2$count), "Lowest Probability", "")
grouped_df3$group_tag <- ifelse(grouped_df3$count == min(grouped_df3$count), "Lowest Probability", "")
print(grouped_df1)
## # A tibble: 12 × 4
## Genre mean_global_sales count group_tag
## <chr> <dbl> <int> <chr>
## 1 Action 0.528 3316 ""
## 2 Adventure 0.186 1286 ""
## 3 Fighting 0.529 848 ""
## 4 Misc 0.466 1739 ""
## 5 Platform 0.938 886 ""
## 6 Puzzle 0.421 582 "Lowest Probability"
## 7 Racing 0.586 1249 ""
## 8 Role-Playing 0.623 1488 ""
## 9 Shooter 0.792 1310 ""
## 10 Simulation 0.452 867 ""
## 11 Sports 0.567 2346 ""
## 12 Strategy 0.257 681 ""
print(grouped_df2)
## # A tibble: 31 × 4
## Platform mean_na_sales count group_tag
## <chr> <dbl> <int> <chr>
## 1 2600 0.681 133 ""
## 2 3DO 0 3 ""
## 3 3DS 0.155 509 ""
## 4 DC 0.104 52 ""
## 5 DS 0.181 2163 ""
## 6 GB 1.17 98 ""
## 7 GBA 0.228 822 ""
## 8 GC 0.240 556 ""
## 9 GEN 0.714 27 ""
## 10 GG 0 1 "Lowest Probability"
## # ℹ 21 more rows
print(grouped_df3)
## # A tibble: 579 × 4
## Publisher mean_eu_sales count group_tag
## <chr> <dbl> <int> <chr>
## 1 10TACLE Studios 0.0133 3 ""
## 2 1C Company 0.0233 3 ""
## 3 20th Century Fox Video Games 0.02 5 ""
## 4 2D Boy 0.03 1 "Lowest Probability"
## 5 3DO 0.0844 36 ""
## 6 49Games 0.04 1 "Lowest Probability"
## 7 505 Games 0.0856 192 ""
## 8 5pb 0 61 ""
## 9 7G//AMES 0.015 4 ""
## 10 989 Sports 0.14 1 "Lowest Probability"
## # ℹ 569 more rows
Bar plot for Grouped DataFrame 1 : Global Sales by Genre
barplot(grouped_df1$mean_global_sales,
names.arg = grouped_df1$Genre,
main = "Mean Global Sales by Genre",
col = "skyblue",
las = 2,
cex.names = 0.8)
Insights:
Highest Sales: Platform genre has the highest mean global sales.
Lowest Sales: Adventure and strategy genres have the lowest mean sales.
Moderate Sales: Other genres like Action, Shooter, and Role-Playing show moderate sales.
Significance:
Further Questions:
What drives the success of the Platform genre?
Why do Adventure
and Strategy genres have lower
sales?
Scatterplot for Grouped Dataframe 2: NA_Sales by Platform
plot(grouped_df2$count,
grouped_df2$mean_na_sales,
main = "Mean NA Sales by Number of Games per Platform",
xlab = "Number of Games (Count)",
ylab = "Mean NA Sales",
col = "green",
pch = 16,
cex = 1.2)
Insights:
Higher Sales with Fewer Games: Platforms with fewer games tend to have higher average NA sales, suggesting less competition among titles.
Lower Sales with More Games: Platforms with many games generally show lower average sales, likely due to market saturation.
Significance:
Further Questions:
What factors drive higher sales on platforms with fewer games?
How do game genres or exclusive titles impact these sales trends?
Barplot for Grouped Dataframe 3: EU Sales by Publisher
boxplot(mean_eu_sales ~ Publisher,
data = grouped_df3,
main = "Mean EU Sales Distribution by Publisher",
xlab = "Publisher",
ylab = "Mean EU Sales",
col = "lightpink",
las = 2,
cex.axis = 0.7)
Insights:
Low Mean Sales Across Publishers: Most publishers have mean EU sales close to zero, indicating that many games have low sales in the European market.
Few Outliers: A small number of publishers have significantly higher mean sales, suggesting that only a few companies dominate the market.
Significance:
Further Questions:
Which publishers are the outliers with high mean sales?
What strategies or game types are contributing to their success?
Dataframe 1: Grouped by Genre
Hypothesis: Production costs, developer specialisation,
and differing degrees of customer demand may all have an impact on how
uncommon a certain game genre becomes.
Observations: Certain genres are less commonly represented than others in video games, according to an analysis of the distribution of these genres. This suggests a possible link between genre rarity and several influencing factors. Further investigation could explore
Certain genres may attract a smaller audience, leading to lower production volumes.
Some genres might be more expensive or technically challenging to develop, discouraging production.
Game developers and studios might focus on specific genres based on their expertise or past success, resulting in fewer games in less familiar genres.
Dataframe 2: Grouped by Platform
Hypothesis: Platform popularity, market share, or
developer exclusivity agreements may be the main factors influencing a
game’s rarity on a certain platform.
Observation: Examining how games are distributed among
various platforms reveals that some have a notably less selection of
games than others. This may be related to a number of things,
including:
Due to lower potential sales, platforms with a smaller user base might draw fewer developers.
Due to exclusive contracts with publishers or developers, certain platforms may restrict the number of games that are available.
Platforms that are less appealing for game creation may be differentiated by differences in hardware capabilities or development tools.
Dataframe 3: Grouped by and EU sales and Publisher
Hypothesis: Market conditions, budgetary restraints, or shifts in corporate strategy may have an impact on the scarcity of games released by particular publishers in particular years.
Observations: Certain combinations are less prevalent when the quantity of games published by publishers throughout various years is analysed. This may point to a number of influential factors, including:
A decline in the number of games launched in a given year could be caused by changes in market trends or economic downturns.
Publishers may experience financial setbacks that restrict their ability to deliver games at particular times.
The frequency of game publication may be affected by changes in company strategy, such as concentrating on different game genres or limiting the number of releases.
unique_platforms <- unique(data$Platform)
unique_genres <- unique(data$Genre)
all_combinations <- expand.grid(Platform = unique_platforms, Genre = unique_genres)
print(head(all_combinations,25))
## Platform Genre
## 1 Wii Sports
## 2 NES Sports
## 3 GB Sports
## 4 DS Sports
## 5 X360 Sports
## 6 PS3 Sports
## 7 PS2 Sports
## 8 SNES Sports
## 9 GBA Sports
## 10 3DS Sports
## 11 PS4 Sports
## 12 N64 Sports
## 13 PS Sports
## 14 XB Sports
## 15 PC Sports
## 16 2600 Sports
## 17 PSP Sports
## 18 XOne Sports
## 19 GC Sports
## 20 WiiU Sports
## 21 GEN Sports
## 22 DC Sports
## 23 PSV Sports
## 24 SAT Sports
## 25 SCD Sports
Finding missing combination
actual_combinations <- data %>%
select(Platform, Genre) %>%
distinct()
missing_combinations <- anti_join(all_combinations, actual_combinations, by = c("Platform", "Genre"))
print(head(missing_combinations,25))
## Platform Genre
## 1 SCD Sports
## 2 WS Sports
## 3 TG16 Sports
## 4 3DO Sports
## 5 GG Sports
## 6 PCFX Sports
## 7 WS Platform
## 8 NG Platform
## 9 TG16 Platform
## 10 3DO Platform
## 11 PCFX Platform
## 12 WS Racing
## 13 NG Racing
## 14 TG16 Racing
## 15 3DO Racing
## 16 GG Racing
## 17 PCFX Racing
## 18 2600 Role-Playing
## 19 NG Role-Playing
## 20 TG16 Role-Playing
## 21 3DO Role-Playing
## 22 GG Role-Playing
## 23 XOne Puzzle
## 24 GEN Puzzle
## 25 DC Puzzle
Why Might These Combinations Be Missing?
Owing to hardware or technical issues, some platforms might not be able to support a particular genre.
On some platforms, some genres can not be well-liked or in high demand.
Game publishers and developers frequently concentrate on
particular platforms where they anticipate the greatest levels of
interaction or sales.
Counting the frequency of the combination
combination_counts <- data %>%
group_by(Platform, Genre) %>%
summarize(count = n()) %>%
arrange(desc(count))
## `summarise()` has grouped output by 'Platform'. You can override using the
## `.groups` argument.
print(head(combination_counts,25))
## # A tibble: 25 × 3
## # Groups: Platform [8]
## Platform Genre count
## <chr> <chr> <int>
## 1 PS2 Sports 400
## 2 DS Misc 393
## 3 PS3 Action 380
## 4 PS2 Action 348
## 5 DS Action 343
## 6 X360 Action 324
## 7 DS Simulation 285
## 8 Wii Misc 280
## 9 Wii Sports 261
## 10 DS Adventure 240
## # ℹ 15 more rows
Find Most and Least Common Combinations:
most_common <- combination_counts %>% filter(count == max(count))
least_common <- combination_counts %>% filter(count == min(count))
print("Most common combinations:")
## [1] "Most common combinations:"
print(head(most_common,15))
## # A tibble: 15 × 3
## # Groups: Platform [15]
## Platform Genre count
## <chr> <chr> <int>
## 1 PS2 Sports 400
## 2 DS Misc 393
## 3 PS3 Action 380
## 4 X360 Action 324
## 5 Wii Misc 280
## 6 PS Sports 222
## 7 PSP Action 222
## 8 PC Strategy 188
## 9 3DS Action 182
## 10 XB Sports 170
## 11 GBA Action 167
## 12 PSV Action 142
## 13 PS4 Action 122
## 14 GC Sports 110
## 15 N64 Sports 80
print("Least common combinations:")
## [1] "Least common combinations:"
print(head(least_common,15))
## # A tibble: 15 × 3
## # Groups: Platform [14]
## Platform Genre count
## <chr> <chr> <int>
## 1 DS Fighting 36
## 2 PS Puzzle 32
## 3 PSP Simulation 29
## 4 Wii Strategy 25
## 5 GBA Simulation 18
## 6 GBA Strategy 18
## 7 PS2 Puzzle 18
## 8 GC Strategy 11
## 9 3DS Shooter 7
## 10 X360 Puzzle 7
## 11 XB Puzzle 7
## 12 PC Fighting 6
## 13 N64 Adventure 4
## 14 SNES Adventure 4
## 15 PS3 Puzzle 3
Most Common Combinations:
Games from many different genres can be found on popular
platforms like PlayStation and Xbox, which have big user bases. This
leads to high counts for popular combinations, such as “Platformer” or
“Action” games on PlayStation.
Some genres, such as “Action,” “Adventure,” or “Sports,” have a tendency to be more well-liked on several platforms, which results in greater tallies.
Least Common Combinations:
Because of hardware limitations, some systems might not be able to support certain genres as well. For instance, compared to PCs, “Simulation” and “Strategy” games may be less popular on consoles.
Some publishers or developers might concentrate on particular
genres or platforms, making other pairings uncommon or
nonexistent.
Visualization for the Game Combinations by Platform and Genre
library(ggplot2)
ggplot(combination_counts, aes(x = Platform, y = count, fill = Genre)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Bar Graph of Game Combinations by Platform and Genre",
x = "Platform",
y = "Frequency of Combinations",
fill = "Genre") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Insight :
PS2, PS3, PS4, Wii, DS, and X360 account for over 60% of all genre-platform combinations, indicating their broad appeal and game diversity.
Top Genres: Action, Adventure, Sports, and Role-Playing games together make up more than 70% of all combinations, highlighting their popularity across multiple platforms.
Specialized Platforms: Wii and DS dominate Puzzle and Sports genres, contributing to over 40% of the games in these genres, suggesting a focus on family and casual gaming.
Underrepresented Platforms: Platforms like PCFX and Neo Geo (NG) have less than 1% of all combinations, indicating limited market reach or developer interest.
Significance :
Developers can use this data to decide which platforms to release their games on, based on the popularity of certain genres.
Console makers can use these insights to attract more developers or to focus on certain genres that are popular on their platform.
Further Questions :
Why are some genres less common on certain platforms?
What factors drive the popularity of certain genres on specific platforms?
How do market trends or technological constraints influence these patterns?