Install dan Load Library

Berikut ini adalah library yang digunakan untuk mengolah data :

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(readxl)
library(ggthemes)
library(gridExtra)
library(scales)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.3
library(readr)
## Warning: package 'readr' was built under R version 4.4.3
## 
## Attaching package: 'readr'
## The following object is masked from 'package:scales':
## 
##     col_factor
options(scipen = 999)

custom_theme <- theme(
  plot.title = element_text(size = 16, face = "bold", hjust = 0.5),  
  axis.title.x = element_text(size = 14, face = "bold"),              
  axis.title.y = element_text(size = 14, face = "bold"),              
  axis.text.x = element_text(size = 12, angle = 45, hjust = 1),       
  axis.text.y = element_text(size = 12),                              
  panel.background = element_rect(fill = "white"),                   
  panel.grid.major = element_line(color = "gray80"),                 
  panel.grid.minor = element_blank(),                                 
  legend.position = "bottom",                                         
  legend.title = element_text(size = 12, face = "bold"),              
  legend.text = element_text(size = 10)                               
)
theme_void() +
  theme(legend.title = element_blank())
## List of 136
##  $ line                            : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ rect                            : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ text                            :List of 11
##   ..$ family       : chr ""
##   ..$ face         : chr "plain"
##   ..$ colour       : chr "black"
##   ..$ size         : num 11
##   ..$ hjust        : num 0.5
##   ..$ vjust        : num 0.5
##   ..$ angle        : num 0
##   ..$ lineheight   : num 0.9
##   ..$ margin       : 'margin' num [1:4] 0points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ title                           : NULL
##  $ aspect.ratio                    : NULL
##  $ axis.title                      : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.title.x                    : NULL
##  $ axis.title.x.top                : NULL
##  $ axis.title.x.bottom             : NULL
##  $ axis.title.y                    : NULL
##  $ axis.title.y.left               : NULL
##  $ axis.title.y.right              : NULL
##  $ axis.text                       : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.text.x                     : NULL
##  $ axis.text.x.top                 : NULL
##  $ axis.text.x.bottom              : NULL
##  $ axis.text.y                     : NULL
##  $ axis.text.y.left                : NULL
##  $ axis.text.y.right               : NULL
##  $ axis.text.theta                 : NULL
##  $ axis.text.r                     : NULL
##  $ axis.ticks                      : NULL
##  $ axis.ticks.x                    : NULL
##  $ axis.ticks.x.top                : NULL
##  $ axis.ticks.x.bottom             : NULL
##  $ axis.ticks.y                    : NULL
##  $ axis.ticks.y.left               : NULL
##  $ axis.ticks.y.right              : NULL
##  $ axis.ticks.theta                : NULL
##  $ axis.ticks.r                    : NULL
##  $ axis.minor.ticks.x.top          : NULL
##  $ axis.minor.ticks.x.bottom       : NULL
##  $ axis.minor.ticks.y.left         : NULL
##  $ axis.minor.ticks.y.right        : NULL
##  $ axis.minor.ticks.theta          : NULL
##  $ axis.minor.ticks.r              : NULL
##  $ axis.ticks.length               : 'simpleUnit' num 0points
##   ..- attr(*, "unit")= int 8
##  $ axis.ticks.length.x             : NULL
##  $ axis.ticks.length.x.top         : NULL
##  $ axis.ticks.length.x.bottom      : NULL
##  $ axis.ticks.length.y             : NULL
##  $ axis.ticks.length.y.left        : NULL
##  $ axis.ticks.length.y.right       : NULL
##  $ axis.ticks.length.theta         : NULL
##  $ axis.ticks.length.r             : NULL
##  $ axis.minor.ticks.length         : 'simpleUnit' num 0points
##   ..- attr(*, "unit")= int 8
##  $ axis.minor.ticks.length.x       : NULL
##  $ axis.minor.ticks.length.x.top   : NULL
##  $ axis.minor.ticks.length.x.bottom: NULL
##  $ axis.minor.ticks.length.y       : NULL
##  $ axis.minor.ticks.length.y.left  : NULL
##  $ axis.minor.ticks.length.y.right : NULL
##  $ axis.minor.ticks.length.theta   : NULL
##  $ axis.minor.ticks.length.r       : NULL
##  $ axis.line                       : NULL
##  $ axis.line.x                     : NULL
##  $ axis.line.x.top                 : NULL
##  $ axis.line.x.bottom              : NULL
##  $ axis.line.y                     : NULL
##  $ axis.line.y.left                : NULL
##  $ axis.line.y.right               : NULL
##  $ axis.line.theta                 : NULL
##  $ axis.line.r                     : NULL
##  $ legend.background               : NULL
##  $ legend.margin                   : NULL
##  $ legend.spacing                  : NULL
##  $ legend.spacing.x                : NULL
##  $ legend.spacing.y                : NULL
##  $ legend.key                      : NULL
##  $ legend.key.size                 : 'simpleUnit' num 1.2lines
##   ..- attr(*, "unit")= int 3
##  $ legend.key.height               : NULL
##  $ legend.key.width                : NULL
##  $ legend.key.spacing              : 'simpleUnit' num 5.5points
##   ..- attr(*, "unit")= int 8
##  $ legend.key.spacing.x            : NULL
##  $ legend.key.spacing.y            : NULL
##  $ legend.frame                    : NULL
##  $ legend.ticks                    : NULL
##  $ legend.ticks.length             : 'rel' num 0.2
##  $ legend.axis.line                : NULL
##  $ legend.text                     :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.text.position            : NULL
##  $ legend.title                    : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ legend.title.position           : NULL
##  $ legend.position                 : chr "right"
##  $ legend.position.inside          : NULL
##  $ legend.direction                : NULL
##  $ legend.byrow                    : NULL
##  $ legend.justification            : NULL
##  $ legend.justification.top        : NULL
##  $ legend.justification.bottom     : NULL
##  $ legend.justification.left       : NULL
##  $ legend.justification.right      : NULL
##  $ legend.justification.inside     : NULL
##  $ legend.location                 : NULL
##  $ legend.box                      : NULL
##  $ legend.box.just                 : NULL
##  $ legend.box.margin               : NULL
##  $ legend.box.background           : NULL
##  $ legend.box.spacing              : NULL
##   [list output truncated]
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi TRUE
##  - attr(*, "validate")= logi TRUE

Import Data

Data berikut diinput dari file :

setwd("C:/Users/Acer/Pictures/Probstat/Pertemuan Ketiga (2-3-2025)")
data <- read_excel("data_tugas.xlsx")
head(data)
## # A tibble: 6 × 4
##   Negara    Hari                Total_Kasus Total_Kematian
##   <chr>     <dttm>                    <dbl>          <dbl>
## 1 Brunei    2025-02-23 00:00:00      350550            182
## 2 Cambodia  2025-02-23 00:00:00      139326           3056
## 3 Indonesia 2025-02-23 00:00:00     6830212         162059
## 4 Laos      2025-02-23 00:00:00      219060            671
## 5 Malaysia  2025-02-23 00:00:00     5329836          37351
## 6 Myanmar   2025-02-23 00:00:00      643241          19494
df <- read.csv("Total Kasus & Kematian/vaccinations.csv")
head(df)
##       country       date total_vaccinations people_vaccinated
## 1 Afghanistan 2021-02-22                  0                 0
## 2 Afghanistan 2021-02-23                 NA                NA
## 3 Afghanistan 2021-02-24                 NA                NA
## 4 Afghanistan 2021-02-25                 NA                NA
## 5 Afghanistan 2021-02-26                 NA                NA
## 6 Afghanistan 2021-02-27                 NA                NA
##   people_fully_vaccinated total_boosters daily_vaccinations
## 1                      NA             NA                 NA
## 2                      NA             NA                 NA
## 3                      NA             NA                 NA
## 4                      NA             NA                 NA
## 5                      NA             NA                 NA
## 6                      NA             NA                 NA
##   daily_vaccinations_smoothed daily_people_vaccinated_smoothed
## 1                          NA                               NA
## 2                    1366.667                         1366.667
## 3                    1366.667                         1366.667
## 4                    1366.667                         1366.667
## 5                    1366.667                         1366.667
## 6                    1366.667                         1366.667
##   total_vaccinations_per_hundred people_vaccinated_per_hundred
## 1                              0                             0
## 2                             NA                            NA
## 3                             NA                            NA
## 4                             NA                            NA
## 5                             NA                            NA
## 6                             NA                            NA
##   people_fully_vaccinated_per_hundred total_boosters_per_hundred
## 1                                  NA                         NA
## 2                                  NA                         NA
## 3                                  NA                         NA
## 4                                  NA                         NA
## 5                                  NA                         NA
## 6                                  NA                         NA
##   daily_people_vaccinated_smoothed_per_hundred
## 1                                           NA
## 2                                  0.003367929
## 3                                  0.003367929
## 4                                  0.003367929
## 5                                  0.003367929
## 6                                  0.003367929
##   daily_vaccinations_smoothed_per_million people_unvaccinated share_of_boosters
## 1                                      NA            39671220                NA
## 2                                33.67929                  NA                NA
## 3                                33.67929                  NA                NA
## 4                                33.67929                  NA                NA
## 5                                33.67929                  NA                NA
## 6                                33.67929                  NA                NA
##   total_vaccinations_interpolated people_vaccinated_interpolated
## 1                           0.000                          0.000
## 2                        1366.667                       1366.667
## 3                        2733.333                       2733.333
## 4                        4100.000                       4100.000
## 5                        5466.667                       5466.667
## 6                        6833.333                       6833.333
##   people_fully_vaccinated_interpolated total_boosters_interpolated
## 1                                55624                           0
## 2                                55624                           0
## 3                                55624                           0
## 4                                55624                           0
## 5                                55624                           0
## 6                                55624                           0
##   total_vaccinations_no_boosters_interpolated
## 1                                       0.000
## 2                                    1366.667
## 3                                    2733.333
## 4                                    4100.000
## 5                                    5466.667
## 6                                    6833.333
##   total_vaccinations_no_boosters_per_hundred_interpolated
## 1                                                      NA
## 2                                                      NA
## 3                                                      NA
## 4                                                      NA
## 5                                                      NA
## 6                                                      NA
##   rolling_vaccinations_6m rolling_vaccinations_6m_per_hundred
## 1                      NA                                  NA
## 2                1366.667                         0.003367929
## 3                2733.333                         0.006735858
## 4                4100.000                         0.010103787
## 5                5466.667                         0.013471715
## 6                6833.333                         0.016839644
##   rolling_vaccinations_9m rolling_vaccinations_9m_per_hundred
## 1                      NA                                  NA
## 2                1366.667                         0.003367929
## 3                2733.333                         0.006735858
## 4                4100.000                         0.010103787
## 5                5466.667                         0.013471715
## 6                6833.333                         0.016839644
##   rolling_vaccinations_12m rolling_vaccinations_12m_per_hundred
## 1                       NA                                   NA
## 2                 1366.667                          0.003367929
## 3                 2733.333                          0.006735858
## 4                 4100.000                          0.010103787
## 5                 5466.667                          0.013471715
## 6                 6833.333                          0.016839644
df <- read.csv("Total Kasus & Kematian/varian.csv")
head(df)
##      Entity Code        Day
## 1 Argentina  ARG 2020-12-04
## 2 Argentina  ARG 2020-12-05
## 3 Argentina  ARG 2020-12-06
## 4 Argentina  ARG 2020-12-07
## 5 Argentina  ARG 2020-12-08
## 6 Argentina  ARG 2020-12-09
##   COVID.19.doses..cumulative....Manufacturer.Pfizer.BioNTech
## 1                                                          1
## 2                                                          1
## 3                                                          1
## 4                                                          1
## 5                                                          1
## 6                                                          1
##   COVID.19.doses..cumulative....Manufacturer.Moderna
## 1                                                  1
## 2                                                  1
## 3                                                  1
## 4                                                  1
## 5                                                  1
## 6                                                  1
##   COVID.19.doses..cumulative....Manufacturer.Oxford.AstraZeneca
## 1                                                             1
## 2                                                             1
## 3                                                             1
## 4                                                             1
## 5                                                             1
## 6                                                             1
##   COVID.19.doses..cumulative....Manufacturer.Johnson.Johnson
## 1                                                          0
## 2                                                          0
## 3                                                          0
## 4                                                          0
## 5                                                          0
## 6                                                          0
##   COVID.19.doses..cumulative....Manufacturer.Sputnik.V
## 1                                                20493
## 2                                                20493
## 3                                                20493
## 4                                                20493
## 5                                                20493
## 6                                                20493
##   COVID.19.doses..cumulative....Manufacturer.Sinovac
## 1                                                  0
## 2                                                  0
## 3                                                  0
## 4                                                  0
## 5                                                  0
## 6                                                  0
##   COVID.19.doses..cumulative....Manufacturer.Sinopharm.Beijing
## 1                                                            1
## 2                                                            1
## 3                                                            1
## 4                                                            1
## 5                                                            1
## 6                                                            1
##   COVID.19.doses..cumulative....Manufacturer.CanSino
## 1                                                  1
## 2                                                  1
## 3                                                  1
## 4                                                  1
## 5                                                  1
## 6                                                  1
##   COVID.19.doses..cumulative....Manufacturer.Novavax
## 1                                                  0
## 2                                                  0
## 3                                                  0
## 4                                                  0
## 5                                                  0
## 6                                                  0
##   COVID.19.doses..cumulative....Manufacturer.Covaxin
## 1                                                  0
## 2                                                  0
## 3                                                  0
## 4                                                  0
## 5                                                  0
## 6                                                  0
##   COVID.19.doses..cumulative....Manufacturer.Medicago
## 1                                                   0
## 2                                                   0
## 3                                                   0
## 4                                                   0
## 5                                                   0
## 6                                                   0
##   COVID.19.doses..cumulative....Manufacturer.Sanofi.GSK
## 1                                                     0
## 2                                                     0
## 3                                                     0
## 4                                                     0
## 5                                                     0
## 6                                                     0
##   COVID.19.doses..cumulative....Manufacturer.SKYCovione
## 1                                                     0
## 2                                                     0
## 3                                                     0
## 4                                                     0
## 5                                                     0
## 6                                                     0
##   COVID.19.doses..cumulative....Manufacturer.Valneva
## 1                                                  0
## 2                                                  0
## 3                                                  0
## 4                                                  0
## 5                                                  0
## 6                                                  0

Datasets diambil dari situs Our World in Data “COVID-19 Pandemic” dengan link berikut : https://ourworldindata.org/coronavirus#all-charts

Visualisasi Data Total Kasus dan Total Kematian COVID-19

1. Bar Chart

Berdasarkan data COVID-19 dengan tujuan untuk menunjukkan Total Kasus dan Total Kematian per Negara. Dilampirkan visualisasi data dalam bentuk Bar Chart:

bar_chart <- ggplot(data, aes(x = reorder(Negara, Total_Kasus), y = Total_Kasus)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(title = "Total Kasus per Negara", x = "Negara", y = "Total Kasus") +
  custom_theme  

print(bar_chart)

bar_chart <- ggplot(data, aes(x = reorder(Negara, Total_Kematian), y = Total_Kematian)) +
  geom_bar(stat = "identity", fill = "red") +
  labs(title = "Total Kematian per Negara", x = "Negara", y = "Total Kematian") +
  custom_theme  

print(bar_chart)

Interpretasi : Berdasarkan kedua Bar Chart diatas Negara Vietnam memiliki Total Kasus tertinggi diikuti oleh Indonesia lalu Malaysia, sedangkan Total Kematian tertinggi ditempati oleh Indonesia dengan selisih yang sangat jauh dengan negara Asean lainnya.

2. Histogram

Berdasarkan data COVID-19 dengan tujuan menunjukkan Total Kasus per Negara. Dilampirkan visualisasi data dalam bentuk Histogram :

histogram <- ggplot(data, aes(x = Total_Kasus)) +
  geom_histogram(binwidth = 1000000, fill = "blue", color = "black") +
  labs(title = "Distribusi Total Kasus", x = "Total Kasus", y = "Frekuensi") +
  custom_theme  

print(histogram)

Interpretasi : Berdasarkan Histogram diatas mayoritas negara Asean memiliki distribusi Total Kasus antara 0 hingga 5 juta, dengan beberapa outlier seperti Vietnam yang memiliki total kasus yang sangat tinggi.

3. Density Plot

Berdasarkan data COVID-19 dengan tujuan menunjukkan Total Kasus dan Total Kematian per Negara. Dilampirkan visualisasi data dalam bentuk Density Plot :

density_plot <- ggplot(data, aes(x = Total_Kasus)) +
  geom_density(fill = "blue", alpha = 0.5) +
  labs(title = "Density Plot Total Kasus", x = "Total Kasus", y = "Density") +
  custom_theme  

print(density_plot)

density_plot <- ggplot(data, aes(x = Total_Kematian)) +
  geom_density(fill = "blue", alpha = 0.5) +
  labs(title = "Density Plot Total Kematian", x = "Total Kematian", y = "Density") +
  custom_theme  

print(density_plot)

Interpretasi : Berdasarkan Density Plot dapat disimpulkan dari kurva yang cenderung miring ke kiri bahwa mayoritas Negara Asean memiliki nilai Kasus dan Kematian akibat Covid - 19 yang relatif rendah, dengan beberapa negara memiliki nilai yang sangat tinggi.

4. Box Plot

Berdasarkan data COVID-19 dengan tujuan menunjukkan Total Kasus dan Total Kematian per Negara. Dilampirkan visualisasi data dalam bentuk Box Plot :

boxplot <- ggplot(data, aes(x = "", y = Total_Kasus)) +
  geom_boxplot(fill = "blue", alpha = 0.5) +
  labs(title = "Boxplot Total Kasus", x = "", y = "Total Kasus") +
  custom_theme  

print(boxplot)

boxplot <- ggplot(data, aes(x = "", y = Total_Kematian)) +
  geom_boxplot(fill = "blue", alpha = 0.5) +
  labs(title = "Boxplot Total Kematian", x = "", y = "Total Kematian") +
  custom_theme  

print(boxplot)

Interpretasi : Berdasarkan Box Plot dapat dikatakan mayoritas negara Asean memiliki nilai Kasus dan Kematian yang rendah. Terdapat beberapa outlier, terutama Vietnam dan Indonesia, yang memiliki total nilai jauh lebih tinggi dari yang lainnya.

Mean (Rata-Rata)

Mean untuk Total Kasus :

mean(data$Total_Kasus)
## [1] 3709310

Rata-rata total kasus terkena COVID-19 pada negara Asean sekitar 3.5 juta.

Mean untuk Total Kematian :

mean(data$Total_Kematian)
## [1] 36965

Rata-rata total kematian akibat COVID-19 pada negara Asean sekitar 36 ribu.

Median (Nilai Tengah)

Median atau nilai tengah digunakan untuk menentukan nilai yang terletak di tengah jika data diurutkan. Median untuk Total Kasus :

median(data$Total_Kasus)
## [1] 3573269

Distribusi total kasus di Asean hampir simetris.

Median untuk Total Kematian :

median(data$Total_Kematian)
## [1] 27118.5

Distribusi total kematian di Asean condong ke nilai yang rendah.

Modus (Nilai yang sering muncul)

Modus untuk Total Kasus :

modus <- function(x) {
  uniqx <- unique(x)
  uniqx[which.max(tabulate(match(x, uniqx)))]
}

modus(data$Total_Kasus)
## [1] 350550

Modus untuk Total Kematian :

modus(data$Total_Kematian)
## [1] 182

Ukuran Letak Data

(Q0/Min, Q1, Q2/Median, Q3, Q4/Max) Digunakan untuk memberikan gambaran atau rangkuman dari data. Letak Data untuk Total Kasus :

quantile(data$Total_Kasus)
##         0%        25%        50%        75%       100% 
##   139326.0   423722.8  3573269.0  5199961.5 11624000.0

Letak Data untuk Total Kematian :

quantile(data$Total_Kematian)
##        0%       25%       50%       75%      100% 
##    182.00   2282.00  27118.50  41742.25 162059.00

Ukuran Penyebaran Data

  1. Jangkauan Jangkauan atau range digunakan untuk menentukan selisih nilai terkecil dan nilai terbesar dari data. Range untuk Total Kasus :
diff(range(data$Total_Kasus))
## [1] 11484674

Range untuk Total Kematian :

diff(range(data$Total_Kematian))
## [1] 161877
  1. Ragam Ragam atau variance digunakan untuk menentukan nilai sebaran kuadrat dari data. Ragam untuk Total Kasus :
var(data$Total_Kasus)
## [1] 13612182646848

Ragam untuk Total Kematian :

var(data$Total_Kematian)
## [1] 2436963170
  1. Standar Deviasi Deviasi standar atau standard deviation digunakan untuk menentukan nilai sebaran sesuai satuan data awalnya. Standar Deviasi untuk Total Kasus :
sd(data$Total_Kasus)
## [1] 3689469

Standar Deviasi untuk Total Kematian :

sd(data$Total_Kematian)
## [1] 49365.61

Visualisasi Data Status Vaksinasi dan Varian Vaksin

df <- read.csv("Total Kasus & Kematian/vaccinations.csv")
# Cek apakah kolom 'people_unvaccinated' ada dalam dataset
if ("people_unvaccinated" %in% colnames(df)) {
  # Gantilah NA dengan 0 pada kolom people_unvaccinated
  df <- df %>%
    mutate(people_unvaccinated = ifelse(is.na(people_unvaccinated), 0, people_unvaccinated))
  
  print("Kolom 'people_unvaccinated' berhasil diproses.")
} else {
  print("Kolom 'people_unvaccinated' tidak ditemukan dalam dataset.")
}
## [1] "Kolom 'people_unvaccinated' berhasil diproses."
# Cek struktur dataset setelah perubahan
str(df)
## 'data.frame':    203057 obs. of  29 variables:
##  $ country                                                : chr  "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ date                                                   : chr  "2021-02-22" "2021-02-23" "2021-02-24" "2021-02-25" ...
##  $ total_vaccinations                                     : num  0 NA NA NA NA NA 8200 NA NA NA ...
##  $ people_vaccinated                                      : num  0 NA NA NA NA NA 8200 NA NA NA ...
##  $ people_fully_vaccinated                                : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ total_boosters                                         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ daily_vaccinations                                     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ daily_vaccinations_smoothed                            : num  NA 1367 1367 1367 1367 ...
##  $ daily_people_vaccinated_smoothed                       : num  NA 1367 1367 1367 1367 ...
##  $ total_vaccinations_per_hundred                         : num  0 NA NA NA NA ...
##  $ people_vaccinated_per_hundred                          : num  0 NA NA NA NA ...
##  $ people_fully_vaccinated_per_hundred                    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ total_boosters_per_hundred                             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ daily_people_vaccinated_smoothed_per_hundred           : num  NA 0.00337 0.00337 0.00337 0.00337 ...
##  $ daily_vaccinations_smoothed_per_million                : num  NA 33.7 33.7 33.7 33.7 ...
##  $ people_unvaccinated                                    : num  39671220 0 0 0 0 ...
##  $ share_of_boosters                                      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ total_vaccinations_interpolated                        : num  0 1367 2733 4100 5467 ...
##  $ people_vaccinated_interpolated                         : num  0 1367 2733 4100 5467 ...
##  $ people_fully_vaccinated_interpolated                   : num  55624 55624 55624 55624 55624 ...
##  $ total_boosters_interpolated                            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ total_vaccinations_no_boosters_interpolated            : num  0 1367 2733 4100 5467 ...
##  $ total_vaccinations_no_boosters_per_hundred_interpolated: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ rolling_vaccinations_6m                                : num  NA 1367 2733 4100 5467 ...
##  $ rolling_vaccinations_6m_per_hundred                    : num  NA 0.00337 0.00674 0.0101 0.01347 ...
##  $ rolling_vaccinations_9m                                : num  NA 1367 2733 4100 5467 ...
##  $ rolling_vaccinations_9m_per_hundred                    : num  NA 0.00337 0.00674 0.0101 0.01347 ...
##  $ rolling_vaccinations_12m                               : num  NA 1367 2733 4100 5467 ...
##  $ rolling_vaccinations_12m_per_hundred                   : num  NA 0.00337 0.00674 0.0101 0.01347 ...
# Mengganti semua nilai NA dalam dataset dengan 0
df[is.na(df)] <- 0

# Cek hasil setelah perubahan
str(df)
## 'data.frame':    203057 obs. of  29 variables:
##  $ country                                                : chr  "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ date                                                   : chr  "2021-02-22" "2021-02-23" "2021-02-24" "2021-02-25" ...
##  $ total_vaccinations                                     : num  0 0 0 0 0 0 8200 0 0 0 ...
##  $ people_vaccinated                                      : num  0 0 0 0 0 0 8200 0 0 0 ...
##  $ people_fully_vaccinated                                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ total_boosters                                         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ daily_vaccinations                                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ daily_vaccinations_smoothed                            : num  0 1367 1367 1367 1367 ...
##  $ daily_people_vaccinated_smoothed                       : num  0 1367 1367 1367 1367 ...
##  $ total_vaccinations_per_hundred                         : num  0 0 0 0 0 ...
##  $ people_vaccinated_per_hundred                          : num  0 0 0 0 0 ...
##  $ people_fully_vaccinated_per_hundred                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ total_boosters_per_hundred                             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ daily_people_vaccinated_smoothed_per_hundred           : num  0 0.00337 0.00337 0.00337 0.00337 ...
##  $ daily_vaccinations_smoothed_per_million                : num  0 33.7 33.7 33.7 33.7 ...
##  $ people_unvaccinated                                    : num  39671220 0 0 0 0 ...
##  $ share_of_boosters                                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ total_vaccinations_interpolated                        : num  0 1367 2733 4100 5467 ...
##  $ people_vaccinated_interpolated                         : num  0 1367 2733 4100 5467 ...
##  $ people_fully_vaccinated_interpolated                   : num  55624 55624 55624 55624 55624 ...
##  $ total_boosters_interpolated                            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ total_vaccinations_no_boosters_interpolated            : num  0 1367 2733 4100 5467 ...
##  $ total_vaccinations_no_boosters_per_hundred_interpolated: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ rolling_vaccinations_6m                                : num  0 1367 2733 4100 5467 ...
##  $ rolling_vaccinations_6m_per_hundred                    : num  0 0.00337 0.00674 0.0101 0.01347 ...
##  $ rolling_vaccinations_9m                                : num  0 1367 2733 4100 5467 ...
##  $ rolling_vaccinations_9m_per_hundred                    : num  0 0.00337 0.00674 0.0101 0.01347 ...
##  $ rolling_vaccinations_12m                               : num  0 1367 2733 4100 5467 ...
##  $ rolling_vaccinations_12m_per_hundred                   : num  0 0.00337 0.00674 0.0101 0.01347 ...
# Filter data terbaru untuk seluruh dunia
df_global <- df %>%
  group_by(country) %>%
  filter(date == max(date)) %>%
  ungroup()

# Hitung rata-rata persentase vaksinasi global
global_vaccinated <- mean(df_global$people_vaccinated_per_hundred, na.rm = TRUE)
global_unvaccinated <- 100 - global_vaccinated

# Buat dataframe untuk visualisasi
df_status_global <- data.frame(
  status = c("Vaccinated", "Unvaccinated"),
  percentage = c(global_vaccinated, global_unvaccinated)
)

# Plot bar chart status vaksinasi global
ggplot(df_status_global, aes(x = "", y = percentage, fill = status)) +
  geom_bar(stat = "identity", width = 0.5) +
  coord_polar("y", start = 0) +  # Pie chart
  labs(
    title = "Status Vaksinasi Global",
    fill = "Status"
  ) +
  scale_fill_manual(values = c("Vaccinated" = "blue", "Unvaccinated" = "red")) +
  theme_minimal()

ggplot(df_status_global, aes(x = status, y = percentage, fill = status)) +
  geom_bar(stat = "identity", width = 0.5) +
  labs(
    title = "Status Vaksinasi Global",
    x = "Status",
    y = "Persentase (%)",
    fill = "Status"
  ) +
  scale_fill_manual(values = c("Vaccinated" = "blue", "Unvaccinated" = "red")) +
  theme_minimal()

negara_asean <- c("Indonesia", "Malaysia", "Singapore", "Thailand", "Vietnam",
                  "Philippines ETC")

# Filter data untuk negara ASEAN dan lakukan pivot
df_asean <- df %>%
  filter(country %in% negara_asean) %>%
  summarise(
    total_vaccinated = sum(people_vaccinated, na.rm = TRUE),
    total_unvaccinated = sum(people_unvaccinated, na.rm = TRUE)
  ) %>%
  pivot_longer(cols = everything(), names_to = "status", values_to = "jumlah")

# Ubah label status
df_asean$status <- recode(df_asean$status,
                          "total_vaccinated" = "Vaccinated",
                          "total_unvaccinated" = "Unvaccinated")

# Buat pie chart dengan daftar negara di judul
ggplot(df_asean, aes(x = "", y = jumlah, fill = status)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  labs(
    title = paste0("Distribusi Status Vaksinasi di ASEAN\n(",
                   paste(negara_asean, collapse = ", "), ")"),
    fill = "Status"
  ) +
  scale_fill_manual(values = c("Vaccinated" = "blue", "Unvaccinated" = "red")) +
  theme_void()

df <- read.csv("Total Kasus & Kematian/varian.csv")
# Menghilangkan kolom yang tidak perlu (Entity, Code, Day)
vaksin_data <- df %>% select(-c(Entity, Code, Day))

# Menjumlahkan total dosis per jenis vaksin
total_dosis <- colSums(vaksin_data, na.rm = TRUE)

# Membuat data frame baru untuk visualisasi
df_vaksin <- data.frame(Vaksin = names(total_dosis), Jumlah = total_dosis)

# Mengambil 10 vaksin dengan dosis terbanyak
df_vaksin_top10 <- df_vaksin %>% top_n(10, Jumlah)
ggplot(df_vaksin_top10, aes(x = "", y = Jumlah, fill = Vaksin)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar(theta = "y") +
  labs(title = "Distribusi 10 Vaksin COVID-19 Terbanyak") +
  theme_void()

# Filter hanya untuk negara ASEAN
asean_countries <- c("Indonesia", "Malaysia", "Thailand", "Singapore", "Vietnam", "Philippines", "Myanmar", "Cambodia", "Laos", "Brunei")
varian_asean <- df %>% filter(Entity %in% asean_countries)

# Pilih hanya kolom vaksin berdasarkan dataset yang ada
vaksin_cols <- c(
 "COVID.19.doses..cumulative....Manufacturer.Pfizer.BioNTech",   
 "COVID.19.doses..cumulative....Manufacturer.Moderna",           
 "COVID.19.doses..cumulative....Manufacturer.Oxford.AstraZeneca",
 "COVID.19.doses..cumulative....Manufacturer.Johnson.Johnson",   
 "COVID.19.doses..cumulative....Manufacturer.Sputnik.V",         
 "COVID.19.doses..cumulative....Manufacturer.Sinovac",           
 "COVID.19.doses..cumulative....Manufacturer.Sinopharm.Beijing" ,
 "COVID.19.doses..cumulative....Manufacturer.CanSino",           
 "COVID.19.doses..cumulative....Manufacturer.Novavax",           
 "COVID.19.doses..cumulative....Manufacturer.Covaxin",           
 "COVID.19.doses..cumulative....Manufacturer.Medicago",          
 "COVID.19.doses..cumulative....Manufacturer.Sanofi.GSK" ,       
 "COVID.19.doses..cumulative....Manufacturer.SKYCovione",        
"COVID.19.doses..cumulative....Manufacturer.Valneva" 
)

# Hanya ambil kolom yang ada di dataset
vaksin_cols <- intersect(vaksin_cols, colnames(varian_asean))

# Jika tidak ada kolom vaksin, hentikan eksekusi
if (length(vaksin_cols) == 0) {
  stop("Tidak ada data vaksin yang sesuai.")
}

# Hitung total dosis setiap jenis vaksin
total_vaksin <- colSums(varian_asean[, vaksin_cols], na.rm = TRUE)

# Konversi ke data frame untuk pie chart
varian_vaksin <- data.frame(
  Vaksin = names(total_vaksin),
  Dosis = as.numeric(total_vaksin)
)

# Buat pie chart
ggplot(varian_vaksin, aes(x = "", y = Dosis, fill = Vaksin)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar(theta = "y") +
  labs(title = "Distribusi Jenis Vaksin di ASEAN") +
  theme_void() +
  theme(legend.position = "right")