Memuat library yang dibutuhkan

library(tidyverse)
library(lubridate)
library(scales)

Persiapan Data

# Load dataset
covid_data <- read.csv("owid-covid-data.csv")

df_filtered <- covid_data %>%
  mutate(
    date = as.Date(date),
    new_cases = as.numeric(new_cases),      
    new_deaths = as.numeric(new_deaths),   
    total_deaths = as.numeric(total_deaths)
  ) %>%
  filter(date >= as.Date("2020-04-01") & date <= as.Date("2020-04-09")) %>%
  filter(continent != "") %>%
  select(iso_code, location, continent, date, new_cases, new_deaths, total_deaths)

# Tampilkan beberapa data awal
head(df_filtered)
##   iso_code    location continent       date new_cases new_deaths total_deaths
## 1      AFG Afghanistan      Asia 2020-04-01        25          0            4
## 2      AFG Afghanistan      Asia 2020-04-02        26          0            4
## 3      AFG Afghanistan      Asia 2020-04-03        43          0            4
## 4      AFG Afghanistan      Asia 2020-04-04         0          0            4
## 5      AFG Afghanistan      Asia 2020-04-05        35          1            5
## 6      AFG Afghanistan      Asia 2020-04-06        29          2            7

Pie Chart

df_pie <- df_filtered %>%
  group_by(continent) %>%
  summarise(sum_new_deaths = sum(new_deaths, na.rm = TRUE)) %>%
  mutate(prop = sum_new_deaths / sum(sum_new_deaths) * 100)

ggplot(df_pie, aes(x = "", y = prop, fill = continent)) +
  geom_bar(stat = "identity", width = 1, color = "white") +
  coord_polar("y", start = 0) +
  theme_void() +
  labs(title = "Proporsi Kematian Baru per Benua (1-9 April 2020)",
       fill = "Benua")

Bar Chart

# Menyiapkan data 10 teratas
df_bar <- df_filtered %>%
  group_by(location) %>%
  summarise(sum_cases = sum(new_cases, na.rm = TRUE)) %>%
  arrange(desc(sum_cases)) %>%
  slice(1:10)

ggplot(df_bar, aes(x = reorder(location, sum_cases), y = sum_cases, fill = location)) +
  geom_col() +
  coord_flip() +
  theme_minimal() +
  labs(title = "Top 10 Negara dengan Penambahan Kasus Tertinggi (1-9 April 2020)",
       x = "Negara (Location)", y = "Total Kasus Baru",
       fill = "Negara") +
  theme(panel.grid.major.y = element_blank())

Histogram

ggplot(df_filtered, aes(x = new_cases, fill = continent)) +
  geom_histogram(color = "white", bins = 50, alpha = 0.8, position = "stack") +
  theme_minimal() +
  scale_x_log10(labels = comma) + 
  labs(title = "Distribusi Frekuensi Kasus Baru Harian per Benua",
       x = "Penambahan Kasus Baru", y = "Frekuensi Observasi Harian",
       fill = "Benua")

Density Plot

ggplot(df_filtered, aes(x = total_deaths, fill = continent)) +
  geom_density(alpha = 0.5, color = "white") +
  scale_x_log10(labels = comma) +
  theme_minimal() +
  labs(title = "Density Plot: Total Kematian per Benua",
       x = "Total Kematian", y = "Kepadatan",
       fill = "Benua") 

Box Plot

ggplot(df_filtered, aes(x = reorder(continent, new_deaths, FUN = median, na.rm=TRUE), 
                        y = new_deaths, fill = continent)) +
  geom_boxplot(outlier.color = "red", outlier.alpha = 0.6) +
  scale_y_log10(labels = comma) + 
  coord_flip() +
  theme_minimal() +
  labs(title = "Persebaran Kematian Baru Harian per Benua",
       x = "Benua (Continent)", y = "Penambahan Kematian Baru",
       fill = "Benua")

Statistika Deskriptif

# Menghilangkan nilai NA sebelum menghitung
kasus_baru <- na.omit(df_filtered$new_cases)

# Fungsi buatan untuk mencari Modus
get_mode <- function(v) {
   uniqv <- unique(v)
   uniqv[which.max(tabulate(match(v, uniqv)))]
}

# Perhitungan nilai-nilai statistika
stat_mean     <- mean(kasus_baru)
stat_median   <- median(kasus_baru)
stat_mode     <- get_mode(kasus_baru)
stat_q1       <- quantile(kasus_baru, 0.25)
stat_q3       <- quantile(kasus_baru, 0.75)
stat_range    <- max(kasus_baru) - min(kasus_baru)
stat_variance <- var(kasus_baru)
stat_sd       <- sd(kasus_baru)

df_stats <- data.frame(
  Indikator = c("Mean (Rata-rata)", "Median (Nilai Tengah)", "Modus (Nilai Sering Muncul)", 
                "Q1 (Kuartil 1)", "Q3 (Kuartil 3)", "Range (Jangkauan)", 
                "Variance (Varians)", "Standard Deviation"),
  Nilai = c(round(stat_mean, 2), stat_median, stat_mode, 
            stat_q1, stat_q3, stat_range, 
            round(stat_variance, 2), round(stat_sd, 2))
)

# Menampilkan tabel statistik
knitr::kable(df_stats, format = "html", caption = "Tabel Statistik Deskriptif untuk new_cases (1-9 April 2020)")
Tabel Statistik Deskriptif untuk new_cases (1-9 April 2020)
Indikator Nilai
Mean (Rata-rata) 384.64
Median (Nilai Tengah) 10.00
Modus (Nilai Sering Muncul) 0.00
Q1 (Kuartil 1) 0.00
Q3 (Kuartil 3) 83.00
Range (Jangkauan) 34272.00
Variance (Varians) 5055647.05
Standard Deviation 2248.48