library

Beberapa libary yang perlu install sebagai berikut:

library(ggplot2)
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(ggthemes)
library(gridExtra)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

input data

setwd("D:/Document/TUGAS KULIAH/SEMESTER 2/PROSTA")
rawdata <- read_excel("life_expectancy.xlsx")
# Tampilkan nama kolom untuk pengecekan
colnames(rawdata) <- gsub("\\s+", "_", colnames(rawdata))  # Menghapus spasi dari nama kolom
colnames(rawdata)
## [1] "Country"                             "Sum_of_Females_Life_Expectancy"     
## [3] "Sum_of_Life_Expectancy_(both_sexes)" "Sum_of_Males_Life_Expectancy"
head(rawdata)
## # A tibble: 6 × 4
##   Country   Sum_of_Females_Life_…¹ Sum_of_Life_Expectan…² Sum_of_Males_Life_Ex…³
##   <chr>                      <dbl>                  <dbl>                  <dbl>
## 1 Chad                        57.2                   55.2                   53.4
## 2 Nigeria                     54.9                   54.6                   54.3
## 3 South Su…                   60.8                   57.7                   54.8
## 4 Lesotho                     60.4                   57.8                   55.0
## 5 Central …                   59.6                   57.7                   55.5
## 6 Somalia                     61.6                   59.0                   56.5
## # ℹ abbreviated names: ¹​Sum_of_Females_Life_Expectancy,
## #   ²​`Sum_of_Life_Expectancy_(both_sexes)`, ³​Sum_of_Males_Life_Expectancy

Box Plot

Box plot digunakan untuk melihat distribusi data harapan hidup berdasarkan jenis kelamin:

ggplot() +
  geom_boxplot(data = rawdata, aes(x = "Female", y = Sum_of_Females_Life_Expectancy), 
               color = "black", fill = "pink") +
  geom_boxplot(data = rawdata, aes(x = "Male", y = Sum_of_Males_Life_Expectancy), 
               color = "black", fill = "blue") +
  labs(title = "Box Plot Harapan Hidup Berdasarkan Jenis Kelamin", 
       x = "Jenis Kelamin", y = "Harapan Hidup") +
  theme_minimal()

pie chart

Dari data tersebut akan disajikan visualisasi data dalam bentuk Pie Chart:

# Menghitung rata-rata harapan hidup berdasarkan jenis kelamin
life_expectancy_avg <- data.frame(
  Category = c("Female", "Male"),
  Life_Expectancy = c(
    mean(rawdata$Sum_of_Females_Life_Expectancy, na.rm = TRUE),
    mean(rawdata$Sum_of_Males_Life_Expectancy, na.rm = TRUE)
  )
)

# Membuat Pie Chart
ggplot(life_expectancy_avg, aes(x = "", y = Life_Expectancy, fill = Category)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  theme_void() +
  labs(title = "Rata-rata Harapan Hidup Berdasarkan Jenis Kelamin") +
  scale_fill_manual(values = c("Female" = "pink", "Male" = "blue"))

bar chart

bar chart digunakan untuk melihat perbandingan rata-rata harapan hidup antara laki-laki dan perempuan:

ggplot(life_expectancy_avg, aes(x = Category, y = Life_Expectancy, fill = Category)) +
  geom_bar(stat = "identity", width = 0.5) +
  labs(title = "Bar Chart Rata-rata Harapan Hidup Berdasarkan Jenis Kelamin",
       x = "Jenis Kelamin",
       y = "Rata-rata Harapan Hidup") +
  theme_minimal() +
  scale_fill_manual(values = c("Female" = "pink", "Male" = "blue"))

histogram

Histogram digunakan untuk melihat distribusi frekuensi harapan hidup:

ggplot(rawdata, aes(x = Sum_of_Females_Life_Expectancy)) +
  geom_histogram(binwidth = 2, fill = "pink", color = "black", alpha = 0.7) +
  labs(title = "Histogram Harapan Hidup (Female)", 
       x = "Harapan Hidup", 
       y = "Frekuensi") +
  theme_minimal()

ggplot(rawdata, aes(x = Sum_of_Males_Life_Expectancy)) +
  geom_histogram(binwidth = 2, fill = "blue", color = "black", alpha = 0.7) +
  labs(title = "Histogram Harapan Hidup (Male)", 
       x = "Harapan Hidup", 
       y = "Frekuensi") +
  theme_minimal()

# Density Density plot digunakan untuk melihat distribusi kepadatan data:

ggplot(rawdata, aes(x = Sum_of_Females_Life_Expectancy)) +
  geom_density(fill = "pink", alpha = 0.5, color = "red") +
  labs(title = "Density Plot Harapan Hidup (Female)", 
       x = "Harapan Hidup", 
       y = "Density") +
  theme_minimal()

ggplot(rawdata, aes(x = Sum_of_Males_Life_Expectancy)) +
  geom_density(fill = "blue", alpha = 0.5, color = "darkblue") +
  labs(title = "Density Plot Harapan Hidup (Male)", 
       x = "Harapan Hidup", 
       y = "Density") +
  theme_minimal()

##Summary Dari data tersebut akan disajikan visualisasi data dalam bentuk deskriptif numerik berdasarkan “life expectancy”, berikut terlampir hasil visualisasi datanya:

# Pastikan nama kolom tidak memiliki spasi
colnames(rawdata) <- gsub("\\s+", "_", colnames(rawdata)) 

# Fungsi untuk menghitung modus
modus <- function(x) {
  x <- na.omit(x)  # Hilangkan NA
  uniq_x <- unique(x)
  uniq_x[which.max(tabulate(match(x, uniq_x)))]
}

# Hitung statistik deskriptif
stats_summary <- rawdata %>%
  summarise(
    # Statistik untuk Female
    Mean_Female = mean(Sum_of_Females_Life_Expectancy, na.rm = TRUE),
    Median_Female = median(Sum_of_Females_Life_Expectancy, na.rm = TRUE),
    Mode_Female = modus(Sum_of_Females_Life_Expectancy),
    Q1_Female = quantile(Sum_of_Females_Life_Expectancy, 0.25, na.rm = TRUE),
    Q3_Female = quantile(Sum_of_Females_Life_Expectancy, 0.75, na.rm = TRUE),
    Range_Female = diff(range(Sum_of_Females_Life_Expectancy, na.rm = TRUE)),
    Variance_Female = var(Sum_of_Females_Life_Expectancy, na.rm = TRUE),
    SD_Female = sd(Sum_of_Females_Life_Expectancy, na.rm = TRUE),
    
    # Statistik untuk Male
    Mean_Male = mean(Sum_of_Males_Life_Expectancy, na.rm = TRUE),
    Median_Male = median(Sum_of_Males_Life_Expectancy, na.rm = TRUE),
    Mode_Male = modus(Sum_of_Males_Life_Expectancy),
    Q1_Male = quantile(Sum_of_Males_Life_Expectancy, 0.25, na.rm = TRUE),
    Q3_Male = quantile(Sum_of_Males_Life_Expectancy, 0.75, na.rm = TRUE),
    Range_Male = diff(range(Sum_of_Males_Life_Expectancy, na.rm = TRUE)),
    Variance_Male = var(Sum_of_Males_Life_Expectancy, na.rm = TRUE),
    SD_Male = sd(Sum_of_Males_Life_Expectancy, na.rm = TRUE)
  )

# Tampilkan hasil
print(stats_summary)
## # A tibble: 1 × 16
##   Mean_Female Median_Female Mode_Female Q1_Female Q3_Female Range_Female
##         <dbl>         <dbl>       <dbl>     <dbl>     <dbl>        <dbl>
## 1        76.8          78.0        76.7      71.3      82.0         94.3
## # ℹ 10 more variables: Variance_Female <dbl>, SD_Female <dbl>, Mean_Male <dbl>,
## #   Median_Male <dbl>, Mode_Male <dbl>, Q1_Male <dbl>, Q3_Male <dbl>,
## #   Range_Male <dbl>, Variance_Male <dbl>, SD_Male <dbl>