library(ggplot2)
library(readxl)
library(dplyr)
library(tidyr)
library(corrplot)
library(kableExtra)
Mengambil dataset dari Excel yang berisi variabel kategorik (HERO) dan variabel numerik (PICK RATE, WIN RATE, BAN RATE).
setwd("C:/Users/Advan/OneDrive/Documents/Semester 2")
data<-read_excel("DATA_PICK_PROSTAT.xlsx")
head(data)
## # A tibble: 6 × 5
## RANKING HERO `PICK RATE` `WIN RATE` `BAN RATE`
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 1 Lolita 0.0016 0.581 0.0066
## 2 2 Floryn 0.0071 0.566 0.0452
## 3 3 Julian 0.0168 0.564 0.495
## 4 4 Melissa 0.0127 0.556 0.0518
## 5 5 Argus 0.0063 0.549 0.0456
## 6 6 Popol and Kupa 0.0055 0.549 0.0081
Memvisualisasikan distribusi hero yang paling sering digunakan dalam game.
pie_chart <- ggplot(data, aes(x = "", fill = HERO)) +
geom_bar(width = 1) +
coord_polar("y", start = 0) +
theme_minimal() +
labs(title = "Distribution of Heroes") +
theme(axis.text.x = element_blank(), plot.title = element_text(hjust = 0.5))
pie_chart
data_long <- data %>%
pivot_longer(cols = c("PICK RATE", "WIN RATE", "BAN RATE"),
names_to = "RATE_TYPE",
values_to = "VALUE")
Memvisualisasikan perbandingan tiga metrik utama untuk setiap hero.
bar_chart <- ggplot(data_long, aes(x = HERO, y = VALUE, fill = RATE_TYPE)) +
geom_bar(stat = "identity", position = position_dodge()) +
theme_minimal() +
labs(title = "Pick, Win, and Ban Rates by Hero",
x = "Hero",
y = "Rate",
fill = "Rate Type") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
bar_chart
Menunjukkan distribusi win rate di seluruh hero.
histogram <- ggplot(data, aes(x = `WIN RATE`)) +
geom_histogram(binwidth = 0.005, fill = "steelblue", color = "black", alpha = 0.7) +
theme_minimal() +
labs(title = "Distribution of Win Rates",
subtitle = "Showing frequency of different win rate values",
x = "Win Rate",
y = "Frequency") +
theme(plot.title = element_text(face = "bold"),
axis.title = element_text(face = "bold")) +
scale_x_continuous(labels = scales::percent_format(accuracy = 0.1))
print(histogram)
Menunjukkan pola distribusi win rate dengan pendekatan yang lebih halus dibanding histogram.
density_plot <- ggplot(data, aes(x = `WIN RATE`)) +
geom_density(fill = "steelblue", alpha = 0.7) +
theme_minimal() +
labs(title = "Density of Win Rates",
subtitle = "Showing distribution pattern of win rates",
x = "Win Rate",
y = "Density") +
theme(plot.title = element_text(face = "bold"),
axis.title = element_text(face = "bold")) +
scale_x_continuous(labels = scales::percent_format(accuracy = 0.1))
print(density_plot)
Membandingkan sebaran ketiga metrik utama dan mendeteksi outlier.
hero_points <- ggplot(data_long, aes(x = RATE_TYPE, y = VALUE, fill = RATE_TYPE)) +
geom_boxplot(alpha = 0.7, outlier.shape = NA) +
geom_jitter(aes(color = HERO), width = 0.2, size = 3, alpha = 0.8) +
theme_minimal() +
labs(title = "Hero Statistics by Rate Type",
subtitle = "With individual heroes highlighted",
x = "Rate Type",
y = "Value") +
theme(plot.title = element_text(face = "bold"),
axis.title = element_text(face = "bold")) +
scale_fill_brewer(palette = "Set2") +
scale_y_continuous(labels = scales::percent_format(accuracy = 0.1))
print(hero_points)
pick_stats <- data %>%
summarise(
mean = mean(`PICK RATE`),
median = median(`PICK RATE`),
mode = as.character(names(which.max(table(`PICK RATE`)))),
Q1 = quantile(`PICK RATE`, 0.25),
Q3 = quantile(`PICK RATE`, 0.75),
range = max(`PICK RATE`) - min(`PICK RATE`),
variance = var(`PICK RATE`),
std_dev = sd(`PICK RATE`)
)
win_stats <- data %>%
summarise(
mean = mean(`WIN RATE`),
median = median(`WIN RATE`),
mode = as.character(names(which.max(table(`WIN RATE`)))),
Q1 = quantile(`WIN RATE`, 0.25),
Q3 = quantile(`WIN RATE`, 0.75),
range = max(`WIN RATE`) - min(`WIN RATE`),
variance = var(`WIN RATE`),
std_dev = sd(`WIN RATE`)
)
ban_stats <- data %>%
summarise(
mean = mean(`BAN RATE`),
median = median(`BAN RATE`),
mode = as.character(names(which.max(table(`BAN RATE`)))),
Q1 = quantile(`BAN RATE`, 0.25),
Q3 = quantile(`BAN RATE`, 0.75),
range = max(`BAN RATE`) - min(`BAN RATE`),
variance = var(`BAN RATE`),
std_dev = sd(`BAN RATE`)
)
format_stats <- function(stats_df) {
stats_df %>%
mutate(across(where(is.numeric), ~ round(., 6)))
}
pick_stats_formatted <- format_stats(pick_stats)
win_stats_formatted <- format_stats(win_stats)
ban_stats_formatted <- format_stats(ban_stats)
Memenuhi syarat perhitungan statistik deskriptif yang diminta dalam tugas.
cat("Statistik Deskriptif untuk PICK_RATE:")
## Statistik Deskriptif untuk PICK_RATE:
kable(pick_stats_formatted) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
| mean | median | mode | Q1 | Q3 | range | variance | std_dev |
|---|---|---|---|---|---|---|---|
| 0.008244 | 0.0063 | 0.0016 | 0.0042 | 0.0127 | 0.0152 | 3.1e-05 | 0.005609 |
cat("Statistik Deskriptif untuk WIN_RATE:")
## Statistik Deskriptif untuk WIN_RATE:
kable(win_stats_formatted) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
| mean | median | mode | Q1 | Q3 | range | variance | std_dev |
|---|---|---|---|---|---|---|---|
| 0.555056 | 0.5488 | 0.5412 | 0.5474 | 0.5639 | 0.04 | 0.000173 | 0.013145 |
cat("Statistik Deskriptif untuk BAN_RATE:")
## Statistik Deskriptif untuk BAN_RATE:
kable(ban_stats_formatted) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
| mean | median | mode | Q1 | Q3 | range | variance | std_dev |
|---|---|---|---|---|---|---|---|
| 0.108278 | 0.0452 | 0.0051 | 0.0081 | 0.0518 | 0.4895 | 0.02989 | 0.172886 |
Melihat apakah ada hubungan antara pemilihan hero, kemenangan hero, dan pemblokiran hero.
correlation <- cor(data %>% select(`PICK RATE`, `WIN RATE`, `BAN RATE`), use = "complete.obs")
cat("Korelasi antara variabel:")
## Korelasi antara variabel:
print(correlation)
## PICK RATE WIN RATE BAN RATE
## PICK RATE 1.00000000 -0.04448753 0.8554308
## WIN RATE -0.04448753 1.00000000 0.1202186
## BAN RATE 0.85543082 0.12021857 1.0000000
Grafik korelasi ini digunakan untuk memahami hubungan antara tiga variabel numerik utama dalam dataset:
corrplot(correlation, method = "circle", type = "upper",
tl.col = "black", tl.srt = 45,
addCoef.col = "black", number.cex = 0.8,
col = colorRampPalette(c("#D73027", "#FFFFBF", "#1A9850"))(100))