Instal Library

library(ggplot2)
library(readxl)
library(dplyr)  
library(tidyr) 
library(corrplot)
library(kableExtra)

Input Data

Mengambil dataset dari Excel yang berisi variabel kategorik (HERO) dan variabel numerik (PICK RATE, WIN RATE, BAN RATE).

setwd("C:/Users/Advan/OneDrive/Documents/Semester 2")
data<-read_excel("DATA_PICK_PROSTAT.xlsx")
head(data)
## # A tibble: 6 × 5
##   RANKING HERO           `PICK RATE` `WIN RATE` `BAN RATE`
##     <dbl> <chr>                <dbl>      <dbl>      <dbl>
## 1       1 Lolita              0.0016      0.581     0.0066
## 2       2 Floryn              0.0071      0.566     0.0452
## 3       3 Julian              0.0168      0.564     0.495 
## 4       4 Melissa             0.0127      0.556     0.0518
## 5       5 Argus               0.0063      0.549     0.0456
## 6       6 Popol and Kupa      0.0055      0.549     0.0081

1. Pie Chart

Memvisualisasikan distribusi hero yang paling sering digunakan dalam game.

pie_chart <- ggplot(data, aes(x = "", fill = HERO)) +
  geom_bar(width = 1) +
  coord_polar("y", start = 0) +
  theme_minimal() +
  labs(title = "Distribution of Heroes") +
  theme(axis.text.x = element_blank(), plot.title = element_text(hjust = 0.5))

pie_chart

2. Bar Chart untuk PICK RATE, WIN RATE, dan BAN RATE

data_long <- data %>%
  pivot_longer(cols = c("PICK RATE", "WIN RATE", "BAN RATE"),
               names_to = "RATE_TYPE",
               values_to = "VALUE")

Buat Bar Chart

Memvisualisasikan perbandingan tiga metrik utama untuk setiap hero.

bar_chart <- ggplot(data_long, aes(x = HERO, y = VALUE, fill = RATE_TYPE)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  theme_minimal() +
  labs(title = "Pick, Win, and Ban Rates by Hero",
       x = "Hero",
       y = "Rate",
       fill = "Rate Type") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

bar_chart

3. Histogram untuk WIN RATE

Menunjukkan distribusi win rate di seluruh hero.

histogram <- ggplot(data, aes(x = `WIN RATE`)) +
  geom_histogram(binwidth = 0.005, fill = "steelblue", color = "black", alpha = 0.7) +
  theme_minimal() +
  labs(title = "Distribution of Win Rates",
       subtitle = "Showing frequency of different win rate values",
       x = "Win Rate",
       y = "Frequency") +
  theme(plot.title = element_text(face = "bold"),
        axis.title = element_text(face = "bold")) +
  scale_x_continuous(labels = scales::percent_format(accuracy = 0.1))

print(histogram)

4. Density plot untuk WIN RATE

Menunjukkan pola distribusi win rate dengan pendekatan yang lebih halus dibanding histogram.

density_plot <- ggplot(data, aes(x = `WIN RATE`)) +
  geom_density(fill = "steelblue", alpha = 0.7) +
  theme_minimal() +
  labs(title = "Density of Win Rates",
       subtitle = "Showing distribution pattern of win rates",
       x = "Win Rate",
       y = "Density") +
  theme(plot.title = element_text(face = "bold"),
        axis.title = element_text(face = "bold")) +
  scale_x_continuous(labels = scales::percent_format(accuracy = 0.1))

print(density_plot)

5. Boxplot untuk PICK RATE, WIN RATE, dan BAN RATE

Membandingkan sebaran ketiga metrik utama dan mendeteksi outlier.

hero_points <- ggplot(data_long, aes(x = RATE_TYPE, y = VALUE, fill = RATE_TYPE)) +
  geom_boxplot(alpha = 0.7, outlier.shape = NA) +
  geom_jitter(aes(color = HERO), width = 0.2, size = 3, alpha = 0.8) +
  theme_minimal() +
  labs(title = "Hero Statistics by Rate Type",
       subtitle = "With individual heroes highlighted",
       x = "Rate Type",
       y = "Value") +
  theme(plot.title = element_text(face = "bold"),
        axis.title = element_text(face = "bold")) +
  scale_fill_brewer(palette = "Set2") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 0.1))

print(hero_points)

6. Statistik Deskriptif

Menghitung statistik deskriptif untuk PICK RATE

pick_stats <- data %>%
  summarise(
    mean = mean(`PICK RATE`),
    median = median(`PICK RATE`),
    mode = as.character(names(which.max(table(`PICK RATE`)))),
    Q1 = quantile(`PICK RATE`, 0.25),
    Q3 = quantile(`PICK RATE`, 0.75),
    range = max(`PICK RATE`) - min(`PICK RATE`),
    variance = var(`PICK RATE`),
    std_dev = sd(`PICK RATE`)
  )

Menghitung statistik deskriptif untuk WIN RATE

win_stats <- data %>%
  summarise(
    mean = mean(`WIN RATE`),
    median = median(`WIN RATE`),
    mode = as.character(names(which.max(table(`WIN RATE`)))),
    Q1 = quantile(`WIN RATE`, 0.25),
    Q3 = quantile(`WIN RATE`, 0.75),
    range = max(`WIN RATE`) - min(`WIN RATE`),
    variance = var(`WIN RATE`),
    std_dev = sd(`WIN RATE`)
  )

Menghitung statistik deskriptif untuk BAN RATE

ban_stats <- data %>%
  summarise(
    mean = mean(`BAN RATE`),
    median = median(`BAN RATE`),
    mode = as.character(names(which.max(table(`BAN RATE`)))),
    Q1 = quantile(`BAN RATE`, 0.25),
    Q3 = quantile(`BAN RATE`, 0.75),
    range = max(`BAN RATE`) - min(`BAN RATE`),
    variance = var(`BAN RATE`),
    std_dev = sd(`BAN RATE`)
  )

Format descriptive statistics

format_stats <- function(stats_df) {
  stats_df %>%
    mutate(across(where(is.numeric), ~ round(., 6)))
}
pick_stats_formatted <- format_stats(pick_stats)
win_stats_formatted <- format_stats(win_stats)
ban_stats_formatted <- format_stats(ban_stats)

7. Korelasi antara PICK RATE, WIN RATE, dan BAN RATE

Melihat apakah ada hubungan antara pemilihan hero, kemenangan hero, dan pemblokiran hero.

correlation <- cor(data %>% select(`PICK RATE`, `WIN RATE`, `BAN RATE`), use = "complete.obs")
cat("Korelasi antara variabel:")
## Korelasi antara variabel:
print(correlation)
##             PICK RATE    WIN RATE  BAN RATE
## PICK RATE  1.00000000 -0.04448753 0.8554308
## WIN RATE  -0.04448753  1.00000000 0.1202186
## BAN RATE   0.85543082  0.12021857 1.0000000

Visualisasi korelasi

Grafik korelasi ini digunakan untuk memahami hubungan antara tiga variabel numerik utama dalam dataset:

corrplot(correlation, method = "circle", type = "upper", 
         tl.col = "black", tl.srt = 45, 
         addCoef.col = "black", number.cex = 0.8,
         col = colorRampPalette(c("#D73027", "#FFFFBF", "#1A9850"))(100))