Analisis Statistik Pemilihan Hero dalam Rank Mobile Legends

Instal Library

library(ggplot2)
library(readxl)
library(dplyr)  
library(tidyr) 
library(corrplot)
library(kableExtra)

Input Data

Mengambil dataset dari Excel yang berisi variabel kategorik (HERO) dan variabel numerik (PICK RATE, WIN RATE, BAN RATE).

setwd("C:/Users/Advan/OneDrive/Documents/Semester 2")
data<-read_excel("DATA_PICK_PROSTAT.xlsx")
head(data)

## # A tibble: 6 × 5
##   RANKING HERO           `PICK RATE` `WIN RATE` `BAN RATE`
##     <dbl> <chr>                <dbl>      <dbl>      <dbl>
## 1       1 Lolita              0.0016      0.581     0.0066
## 2       2 Floryn              0.0071      0.566     0.0452
## 3       3 Julian              0.0168      0.564     0.495 
## 4       4 Melissa             0.0127      0.556     0.0518
## 5       5 Argus               0.0063      0.549     0.0456
## 6       6 Popol and Kupa      0.0055      0.549     0.0081

1. Pie Chart

Memvisualisasikan distribusi hero yang paling sering digunakan dalam game.

pie_chart <- ggplot(data, aes(x = "", fill = HERO)) +
  geom_bar(width = 1) +
  coord_polar("y", start = 0) +
  theme_minimal() +
  labs(title = "Distribution of Heroes") +
  theme(axis.text.x = element_blank(), plot.title = element_text(hjust = 0.5))

pie_chart

2. Bar Chart untuk PICK RATE, WIN RATE, dan BAN RATE

data_long <- data %>%
  pivot_longer(cols = c("PICK RATE", "WIN RATE", "BAN RATE"),
               names_to = "RATE_TYPE",
               values_to = "VALUE")

Buat Bar Chart

Memvisualisasikan perbandingan tiga metrik utama untuk setiap hero.

bar_chart <- ggplot(data_long, aes(x = HERO, y = VALUE, fill = RATE_TYPE)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  theme_minimal() +
  labs(title = "Pick, Win, and Ban Rates by Hero",
       x = "Hero",
       y = "Rate",
       fill = "Rate Type") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

bar_chart

3. Histogram untuk WIN RATE

Menunjukkan distribusi win rate di seluruh hero.

histogram <- ggplot(data, aes(x = `WIN RATE`)) +
  geom_histogram(binwidth = 0.005, fill = "steelblue", color = "black", alpha = 0.7) +
  theme_minimal() +
  labs(title = "Distribution of Win Rates",
       subtitle = "Showing frequency of different win rate values",
       x = "Win Rate",
       y = "Frequency") +
  theme(plot.title = element_text(face = "bold"),
        axis.title = element_text(face = "bold")) +
  scale_x_continuous(labels = scales::percent_format(accuracy = 0.1))

print(histogram)

4. Density plot untuk WIN RATE

Menunjukkan pola distribusi win rate dengan pendekatan yang lebih halus dibanding histogram.

density_plot <- ggplot(data, aes(x = `WIN RATE`)) +
  geom_density(fill = "steelblue", alpha = 0.7) +
  theme_minimal() +
  labs(title = "Density of Win Rates",
       subtitle = "Showing distribution pattern of win rates",
       x = "Win Rate",
       y = "Density") +
  theme(plot.title = element_text(face = "bold"),
        axis.title = element_text(face = "bold")) +
  scale_x_continuous(labels = scales::percent_format(accuracy = 0.1))

print(density_plot)

5. Boxplot untuk PICK RATE, WIN RATE, dan BAN RATE

Membandingkan sebaran ketiga metrik utama dan mendeteksi outlier.

hero_points <- ggplot(data_long, aes(x = RATE_TYPE, y = VALUE, fill = RATE_TYPE)) +
  geom_boxplot(alpha = 0.7, outlier.shape = NA) +
  geom_jitter(aes(color = HERO), width = 0.2, size = 3, alpha = 0.8) +
  theme_minimal() +
  labs(title = "Hero Statistics by Rate Type",
       subtitle = "With individual heroes highlighted",
       x = "Rate Type",
       y = "Value") +
  theme(plot.title = element_text(face = "bold"),
        axis.title = element_text(face = "bold")) +
  scale_fill_brewer(palette = "Set2") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 0.1))

print(hero_points)

6. Statistik Deskriptif

Menghitung statistik deskriptif untuk PICK RATE

pick_stats <- data %>%
  summarise(
    mean = mean(`PICK RATE`),
    median = median(`PICK RATE`),
    mode = as.character(names(which.max(table(`PICK RATE`)))),
    Q1 = quantile(`PICK RATE`, 0.25),
    Q3 = quantile(`PICK RATE`, 0.75),
    range = max(`PICK RATE`) - min(`PICK RATE`),
    variance = var(`PICK RATE`),
    std_dev = sd(`PICK RATE`)
  )

Menghitung statistik deskriptif untuk WIN RATE

win_stats <- data %>%
  summarise(
    mean = mean(`WIN RATE`),
    median = median(`WIN RATE`),
    mode = as.character(names(which.max(table(`WIN RATE`)))),
    Q1 = quantile(`WIN RATE`, 0.25),
    Q3 = quantile(`WIN RATE`, 0.75),
    range = max(`WIN RATE`) - min(`WIN RATE`),
    variance = var(`WIN RATE`),
    std_dev = sd(`WIN RATE`)
  )

Menghitung statistik deskriptif untuk BAN RATE

ban_stats <- data %>%
  summarise(
    mean = mean(`BAN RATE`),
    median = median(`BAN RATE`),
    mode = as.character(names(which.max(table(`BAN RATE`)))),
    Q1 = quantile(`BAN RATE`, 0.25),
    Q3 = quantile(`BAN RATE`, 0.75),
    range = max(`BAN RATE`) - min(`BAN RATE`),
    variance = var(`BAN RATE`),
    std_dev = sd(`BAN RATE`)
  )

Format descriptive statistics

format_stats <- function(stats_df) {
  stats_df %>%
    mutate(across(where(is.numeric), ~ round(., 6)))
}
pick_stats_formatted <- format_stats(pick_stats)
win_stats_formatted <- format_stats(win_stats)
ban_stats_formatted <- format_stats(ban_stats)

Print descriptive statistics

Memenuhi syarat perhitungan statistik deskriptif yang diminta dalam tugas.

cat("Statistik Deskriptif untuk PICK_RATE:")

## Statistik Deskriptif untuk PICK_RATE:

kable(pick_stats_formatted) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

mean	median	mode	Q1	Q3	range	variance	std_dev
0.008244	0.0063	0.0016	0.0042	0.0127	0.0152	3.1e-05	0.005609

cat("Statistik Deskriptif untuk WIN_RATE:")

## Statistik Deskriptif untuk WIN_RATE:

kable(win_stats_formatted) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

mean	median	mode	Q1	Q3	range	variance	std_dev
0.555056	0.5488	0.5412	0.5474	0.5639	0.04	0.000173	0.013145

cat("Statistik Deskriptif untuk BAN_RATE:")

## Statistik Deskriptif untuk BAN_RATE:

kable(ban_stats_formatted) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

mean	median	mode	Q1	Q3	range	variance	std_dev
0.108278	0.0452	0.0051	0.0081	0.0518	0.4895	0.02989	0.172886

7. Korelasi antara PICK RATE, WIN RATE, dan BAN RATE

Melihat apakah ada hubungan antara pemilihan hero, kemenangan hero, dan pemblokiran hero.

correlation <- cor(data %>% select(`PICK RATE`, `WIN RATE`, `BAN RATE`), use = "complete.obs")
cat("Korelasi antara variabel:")

## Korelasi antara variabel:

print(correlation)

##             PICK RATE    WIN RATE  BAN RATE
## PICK RATE  1.00000000 -0.04448753 0.8554308
## WIN RATE  -0.04448753  1.00000000 0.1202186
## BAN RATE   0.85543082  0.12021857 1.0000000

Visualisasi korelasi

Grafik korelasi ini digunakan untuk memahami hubungan antara tiga variabel numerik utama dalam dataset:

corrplot(correlation, method = "circle", type = "upper", 
         tl.col = "black", tl.srt = 45, 
         addCoef.col = "black", number.cex = 0.8,
         col = colorRampPalette(c("#D73027", "#FFFFBF", "#1A9850"))(100))