Memuat library yang dibutuhkan
library(tidyverse)
library(lubridate)
library(scales)
Persiapan Data
# Load dataset
covid_data <- read.csv("owid-covid-data.csv")
df_filtered <- covid_data %>%
mutate(
date = as.Date(date),
new_cases = as.numeric(new_cases),
new_deaths = as.numeric(new_deaths),
total_deaths = as.numeric(total_deaths)
) %>%
filter(date >= as.Date("2020-04-01") & date <= as.Date("2020-04-09")) %>%
filter(continent != "") %>%
select(iso_code, location, continent, date, new_cases, new_deaths, total_deaths)
# Tampilkan beberapa data awal
head(df_filtered)
## iso_code location continent date new_cases new_deaths total_deaths
## 1 AFG Afghanistan Asia 2020-04-01 25 0 4
## 2 AFG Afghanistan Asia 2020-04-02 26 0 4
## 3 AFG Afghanistan Asia 2020-04-03 43 0 4
## 4 AFG Afghanistan Asia 2020-04-04 0 0 4
## 5 AFG Afghanistan Asia 2020-04-05 35 1 5
## 6 AFG Afghanistan Asia 2020-04-06 29 2 7
Pie Chart
df_pie <- df_filtered %>%
group_by(continent) %>%
summarise(sum_new_deaths = sum(new_deaths, na.rm = TRUE)) %>%
mutate(prop = sum_new_deaths / sum(sum_new_deaths) * 100)
ggplot(df_pie, aes(x = "", y = prop, fill = continent)) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar("y", start = 0) +
theme_void() +
labs(title = "Proporsi Kematian Baru per Benua (1-9 April 2020)",
fill = "Benua")

Bar Chart
# Menyiapkan data 10 teratas
df_bar <- df_filtered %>%
group_by(location) %>%
summarise(sum_cases = sum(new_cases, na.rm = TRUE)) %>%
arrange(desc(sum_cases)) %>%
slice(1:10)
ggplot(df_bar, aes(x = reorder(location, sum_cases), y = sum_cases, fill = location)) +
geom_col() +
coord_flip() +
theme_minimal() +
labs(title = "Top 10 Negara dengan Penambahan Kasus Tertinggi (1-9 April 2020)",
x = "Negara (Location)", y = "Total Kasus Baru",
fill = "Negara") +
theme(panel.grid.major.y = element_blank())

Histogram
ggplot(df_filtered, aes(x = new_cases, fill = continent)) +
geom_histogram(color = "white", bins = 50, alpha = 0.8, position = "stack") +
theme_minimal() +
scale_x_log10(labels = comma) +
labs(title = "Distribusi Frekuensi Kasus Baru Harian per Benua",
x = "Penambahan Kasus Baru", y = "Frekuensi Observasi Harian",
fill = "Benua")

Density Plot
ggplot(df_filtered, aes(x = total_deaths, fill = continent)) +
geom_density(alpha = 0.5, color = "white") +
scale_x_log10(labels = comma) +
theme_minimal() +
labs(title = "Density Plot: Total Kematian per Benua",
x = "Total Kematian", y = "Kepadatan",
fill = "Benua")

Box Plot
ggplot(df_filtered, aes(x = reorder(continent, new_deaths, FUN = median, na.rm=TRUE),
y = new_deaths, fill = continent)) +
geom_boxplot(outlier.color = "red", outlier.alpha = 0.6) +
scale_y_log10(labels = comma) +
coord_flip() +
theme_minimal() +
labs(title = "Persebaran Kematian Baru Harian per Benua",
x = "Benua (Continent)", y = "Penambahan Kematian Baru",
fill = "Benua")

Statistika Deskriptif
# Menghilangkan nilai NA sebelum menghitung
kasus_baru <- na.omit(df_filtered$new_cases)
# Fungsi buatan untuk mencari Modus
get_mode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
# Perhitungan nilai-nilai statistika
stat_mean <- mean(kasus_baru)
stat_median <- median(kasus_baru)
stat_mode <- get_mode(kasus_baru)
stat_q1 <- quantile(kasus_baru, 0.25)
stat_q3 <- quantile(kasus_baru, 0.75)
stat_range <- max(kasus_baru) - min(kasus_baru)
stat_variance <- var(kasus_baru)
stat_sd <- sd(kasus_baru)
df_stats <- data.frame(
Indikator = c("Mean (Rata-rata)", "Median (Nilai Tengah)", "Modus (Nilai Sering Muncul)",
"Q1 (Kuartil 1)", "Q3 (Kuartil 3)", "Range (Jangkauan)",
"Variance (Varians)", "Standard Deviation"),
Nilai = c(round(stat_mean, 2), stat_median, stat_mode,
stat_q1, stat_q3, stat_range,
round(stat_variance, 2), round(stat_sd, 2))
)
# Menampilkan tabel statistik
knitr::kable(df_stats, format = "html", caption = "Tabel Statistik Deskriptif untuk new_cases (1-9 April 2020)")
Tabel Statistik Deskriptif untuk new_cases (1-9 April 2020)
|
Indikator
|
Nilai
|
|
Mean (Rata-rata)
|
384.64
|
|
Median (Nilai Tengah)
|
10.00
|
|
Modus (Nilai Sering Muncul)
|
0.00
|
|
Q1 (Kuartil 1)
|
0.00
|
|
Q3 (Kuartil 3)
|
83.00
|
|
Range (Jangkauan)
|
34272.00
|
|
Variance (Varians)
|
5055647.05
|
|
Standard Deviation
|
2248.48
|