Install Libary

Beberapa libary yang perlu install sebagai berikut:

library(ggplot2)
library(readxl)
library(ggthemes)
library(gridExtra)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Input Data

Data diinput dari Microsoft excel dengan syntax sebagai berikut:

setwd("D:/FOLDER TUGAS/SEMESTER 2/PROSTAT")
data <- read_excel("life_expectancy.xlsx")
head(data)
## # A tibble: 6 × 4
##   Country   Sum of Females  Life…¹ Sum of Life Expectan…² Sum of Males  Life E…³
##   <chr>                      <dbl>                  <dbl>                  <dbl>
## 1 Chad                        57.2                   55.2                   53.4
## 2 Nigeria                     54.9                   54.6                   54.3
## 3 South Su…                   60.8                   57.7                   54.8
## 4 Lesotho                     60.4                   57.8                   55.0
## 5 Central …                   59.6                   57.7                   55.5
## 6 Somalia                     61.6                   59.0                   56.5
## # ℹ abbreviated names: ¹​`Sum of Females  Life Expectancy`,
## #   ²​`Sum of Life Expectancy  (both sexes)`, ³​`Sum of Males  Life Expectancy`

Pie Chart

Dari data tersebut akan disajikan visualisasi data dalam bentuk Pie Chart:

# Menghitung rata-rata harapan hidup berdasarkan jenis kelamin
life_expectancy_avg <- data.frame(
  Category = c("Female", "Male"),
  Life_Expectancy = c(mean(data$`Sum of Females  Life Expectancy`, na.rm = TRUE),
                      mean(data$`Sum of Males  Life Expectancy`, na.rm = TRUE))
)

# Membuat Pie Chart
ggplot(life_expectancy_avg, aes(x = "", y = Life_Expectancy, fill = Category)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  theme_void() +
  labs(title = "Rata-rata Harapan Hidup Berdasarkan Jenis Kelamin") +
  scale_fill_manual(values = c("Female" = "pink", "Male" = "blue"))

#Box Plot Box plot digunakan untuk melihat distribusi data harapan hidup berdasarkan jenis kelamin:

ggplot(data, aes(x = "Female", y = `Sum of Females  Life Expectancy`, fill = "Female")) +
  geom_boxplot(color = "black", fill = "pink") +
  geom_boxplot(aes(x = "Male", y = `Sum of Males  Life Expectancy`, fill = "Male"), 
               color = "black", fill = "blue") +
  labs(title = "Box Plot Harapan Hidup Berdasarkan Jenis Kelamin", x = "Jenis Kelamin", y = "Harapan Hidup") +
  theme_minimal() +
  scale_fill_manual(values = c("Female" = "pink", "Male" = "blue"))
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

#Density Plot Density plot digunakan untuk melihat distribusi kepadatan data:

ggplot(data, aes(x = `Sum of Life Expectancy  (both sexes)`, fill = "Density")) +
  geom_density(alpha = 0.5, color = "black", fill = "purple") +
  labs(title = "Density Plot Harapan Hidup", x = "Harapan Hidup (Semua Jenis Kelamin)", y = "Density") +
  theme_minimal()

#Dot Plot Dot plot digunakan untuk melihat distribusi nilai-nilai harapan hidup:

ggplot(data, aes(x = `Sum of Life Expectancy  (both sexes)`, y = 1)) +
  geom_point(aes(color = `Sum of Life Expectancy  (both sexes)`), size = 3) +
  labs(title = "Dot Plot Harapan Hidup", x = "Harapan Hidup", y = "") +
  theme_minimal() +
  theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
  scale_color_gradient(low = "blue", high = "red")

#Histogram Histogram digunakan untuk melihat distribusi frekuensi harapan hidup:

ggplot(data, aes(x = `Sum of Life Expectancy  (both sexes)`)) +
  geom_histogram(binwidth = 2, fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Histogram Harapan Hidup", x = "Harapan Hidup", y = "Frekuensi") +
  theme_minimal()

#Summary Dari data tersebut akan disajikan visualisasi data dalam bentuk deskriptif numerik berdasarkan “life expectancy”, berikut terlampir hasil visualisasi datanya:

# Fungsi untuk menghitung modus
modus <- function(x) {
  uniq_x <- unique(x)
  uniq_x[which.max(tabulate(match(x, uniq_x)))]
}

# Menghitung statistik deskriptif
stats_summary <- data %>%
  summarise(
    Mean_Female = mean(`Sum of Females  Life Expectancy`, na.rm = TRUE),
    Median_Female = median(`Sum of Females  Life Expectancy`, na.rm = TRUE),
    Mode_Female = modus(`Sum of Females  Life Expectancy`),
    Q1_Female = quantile(`Sum of Females  Life Expectancy`, 0.25, na.rm = TRUE),
    Q3_Female = quantile(`Sum of Females  Life Expectancy`, 0.75, na.rm = TRUE),
    Range_Female = max(`Sum of Females  Life Expectancy`, na.rm = TRUE) - min(`Sum of Females  Life Expectancy`, na.rm = TRUE),
    Variance_Female = var(`Sum of Females  Life Expectancy`, na.rm = TRUE),
    SD_Female = sd(`Sum of Females  Life Expectancy`, na.rm = TRUE),
    
    Mean_Male = mean(`Sum of Males  Life Expectancy`, na.rm = TRUE),
    Median_Male = median(`Sum of Males  Life Expectancy`, na.rm = TRUE),
    Mode_Male = modus(`Sum of Males  Life Expectancy`),
    Q1_Male = quantile(`Sum of Males  Life Expectancy`, 0.25, na.rm = TRUE),
    Q3_Male = quantile(`Sum of Males  Life Expectancy`, 0.75, na.rm = TRUE),
    Range_Male = max(`Sum of Males  Life Expectancy`, na.rm = TRUE) - min(`Sum of Males  Life Expectancy`, na.rm = TRUE),
    Variance_Male = var(`Sum of Males  Life Expectancy`, na.rm = TRUE),
    SD_Male = sd(`Sum of Males  Life Expectancy`, na.rm = TRUE)
  )

# Menampilkan hasil
stats_summary
## # A tibble: 1 × 16
##   Mean_Female Median_Female Mode_Female Q1_Female Q3_Female Range_Female
##         <dbl>         <dbl>       <dbl>     <dbl>     <dbl>        <dbl>
## 1        76.8          78.0        76.7      71.3      82.0         94.3
## # ℹ 10 more variables: Variance_Female <dbl>, SD_Female <dbl>, Mean_Male <dbl>,
## #   Median_Male <dbl>, Mode_Male <dbl>, Q1_Male <dbl>, Q3_Male <dbl>,
## #   Range_Male <dbl>, Variance_Male <dbl>, SD_Male <dbl>