data <- read.csv("C:\\Users\\91814\\Desktop\\stats\\vgsales.csv")
col1_summary <- summary(data$Global_Sales)

print(col1_summary)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0100  0.0600  0.1700  0.5374  0.4700 82.7400
col2_summary <- summary(data$Other_Sales)

print(col2_summary)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##  0.00000  0.00000  0.01000  0.04806  0.04000 10.57000
col3_unique_val <- unique(data$Genre) 
col3_val_count <- table(data$Genre)
#Categorical Summary for column Publisher' 
print(data.frame(Value=col3_unique_val, Count= col3_val_count))
##           Value   Count.Var1 Count.Freq
## 1        Sports       Action       3316
## 2      Platform    Adventure       1286
## 3        Racing     Fighting        848
## 4  Role-Playing         Misc       1739
## 5        Puzzle     Platform        886
## 6          Misc       Puzzle        582
## 7       Shooter       Racing       1249
## 8    Simulation Role-Playing       1488
## 9        Action      Shooter       1310
## 10     Fighting   Simulation        867
## 11    Adventure       Sports       2346
## 12     Strategy     Strategy        681

Novel Questions to Investigate

Aggregate function for question 1 -

 mean_global_sales<- aggregate(Global_Sales ~ Genre, data= data,sum)
print(mean_global_sales)
##           Genre Global_Sales
## 1        Action      1751.18
## 2     Adventure       239.04
## 3      Fighting       448.91
## 4          Misc       809.96
## 5      Platform       831.37
## 6        Puzzle       244.95
## 7        Racing       732.04
## 8  Role-Playing       927.37
## 9       Shooter      1037.37
## 10   Simulation       392.20
## 11       Sports      1330.93
## 12     Strategy       175.12

Insights:

Top-Performing Genres Globally:

Moderate-Performing Genres:

Lower-Performing Genres:

Significance:

Further Questions:

Visual Summaries

Box plot visualisation between 2 columns - Global sales & Genre

library(ggplot2)

ggplot(data, aes(x = Genre, y = Global_Sales)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(title = "Distribution of global sales by genre", x = "Genre", y = "Global sales in millions") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Scatter Plot for correlation between columns - Global sales and other sales by Genre

ggplot(data, aes(x = Global_Sales, y = Other_Sales, color = Genre)) +
  geom_point(alpha = 0.7) +
  labs(title = "Correlation between global sales and other sales by Genre", x = "Global sales in millions)", y = "Other sales in millions)") +
  theme_minimal()

Scatter plot for trend of global sales over time by publisher

ggplot(data, aes(x = Year, y = Global_Sales, color = Genre, group = Genre)) +
  geom_line() +
  labs(title = "Trend of global sales over time by Genre", x = "Year", y = "Global Sales in millions") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90))

Stacked bar plot showing Interaction between variables: Sales distribution by Genre and publisher:

ggplot(data, aes(x = Genre, y = Global_Sales, fill = Publisher)) +
  geom_bar(stat = "identity") +
  labs(title = "Sales distribution by Genre and Publisher", x = "Genre", y = "Total global sales in millions")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))