Beberapa libary yang perlu install sebagai berikut:
library(ggplot2)
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(ggthemes)
library(gridExtra)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
setwd("D:/Document/TUGAS KULIAH/SEMESTER 2/PROSTA")
rawdata <- read_excel("life_expectancy.xlsx")
# Tampilkan nama kolom untuk pengecekan
colnames(rawdata) <- gsub("\\s+", "_", colnames(rawdata)) # Menghapus spasi dari nama kolom
colnames(rawdata)
## [1] "Country" "Sum_of_Females_Life_Expectancy"
## [3] "Sum_of_Life_Expectancy_(both_sexes)" "Sum_of_Males_Life_Expectancy"
head(rawdata)
## # A tibble: 6 × 4
## Country Sum_of_Females_Life_…¹ Sum_of_Life_Expectan…² Sum_of_Males_Life_Ex…³
## <chr> <dbl> <dbl> <dbl>
## 1 Chad 57.2 55.2 53.4
## 2 Nigeria 54.9 54.6 54.3
## 3 South Su… 60.8 57.7 54.8
## 4 Lesotho 60.4 57.8 55.0
## 5 Central … 59.6 57.7 55.5
## 6 Somalia 61.6 59.0 56.5
## # ℹ abbreviated names: ¹Sum_of_Females_Life_Expectancy,
## # ²`Sum_of_Life_Expectancy_(both_sexes)`, ³Sum_of_Males_Life_Expectancy
Box plot digunakan untuk melihat distribusi data harapan hidup berdasarkan jenis kelamin:
ggplot() +
geom_boxplot(data = rawdata, aes(x = "Female", y = Sum_of_Females_Life_Expectancy),
color = "black", fill = "pink") +
geom_boxplot(data = rawdata, aes(x = "Male", y = Sum_of_Males_Life_Expectancy),
color = "black", fill = "blue") +
labs(title = "Box Plot Harapan Hidup Berdasarkan Jenis Kelamin",
x = "Jenis Kelamin", y = "Harapan Hidup") +
theme_minimal()
Dari data tersebut akan disajikan visualisasi data dalam bentuk Pie Chart:
# Menghitung rata-rata harapan hidup berdasarkan jenis kelamin
life_expectancy_avg <- data.frame(
Category = c("Female", "Male"),
Life_Expectancy = c(
mean(rawdata$Sum_of_Females_Life_Expectancy, na.rm = TRUE),
mean(rawdata$Sum_of_Males_Life_Expectancy, na.rm = TRUE)
)
)
# Membuat Pie Chart
ggplot(life_expectancy_avg, aes(x = "", y = Life_Expectancy, fill = Category)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
theme_void() +
labs(title = "Rata-rata Harapan Hidup Berdasarkan Jenis Kelamin") +
scale_fill_manual(values = c("Female" = "pink", "Male" = "blue"))
bar chart digunakan untuk melihat perbandingan rata-rata harapan hidup antara laki-laki dan perempuan:
ggplot(life_expectancy_avg, aes(x = Category, y = Life_Expectancy, fill = Category)) +
geom_bar(stat = "identity", width = 0.5) +
labs(title = "Bar Chart Rata-rata Harapan Hidup Berdasarkan Jenis Kelamin",
x = "Jenis Kelamin",
y = "Rata-rata Harapan Hidup") +
theme_minimal() +
scale_fill_manual(values = c("Female" = "pink", "Male" = "blue"))
Histogram digunakan untuk melihat distribusi frekuensi harapan hidup:
ggplot(rawdata, aes(x = Sum_of_Females_Life_Expectancy)) +
geom_histogram(binwidth = 2, fill = "pink", color = "black", alpha = 0.7) +
labs(title = "Histogram Harapan Hidup (Female)",
x = "Harapan Hidup",
y = "Frekuensi") +
theme_minimal()
ggplot(rawdata, aes(x = Sum_of_Males_Life_Expectancy)) +
geom_histogram(binwidth = 2, fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Histogram Harapan Hidup (Male)",
x = "Harapan Hidup",
y = "Frekuensi") +
theme_minimal()
# Density Density plot digunakan untuk melihat distribusi kepadatan
data:
ggplot(rawdata, aes(x = Sum_of_Females_Life_Expectancy)) +
geom_density(fill = "pink", alpha = 0.5, color = "red") +
labs(title = "Density Plot Harapan Hidup (Female)",
x = "Harapan Hidup",
y = "Density") +
theme_minimal()
ggplot(rawdata, aes(x = Sum_of_Males_Life_Expectancy)) +
geom_density(fill = "blue", alpha = 0.5, color = "darkblue") +
labs(title = "Density Plot Harapan Hidup (Male)",
x = "Harapan Hidup",
y = "Density") +
theme_minimal()
##Summary Dari data tersebut akan disajikan visualisasi data dalam bentuk deskriptif numerik berdasarkan “life expectancy”, berikut terlampir hasil visualisasi datanya:
# Pastikan nama kolom tidak memiliki spasi
colnames(rawdata) <- gsub("\\s+", "_", colnames(rawdata))
# Fungsi untuk menghitung modus
modus <- function(x) {
x <- na.omit(x) # Hilangkan NA
uniq_x <- unique(x)
uniq_x[which.max(tabulate(match(x, uniq_x)))]
}
# Hitung statistik deskriptif
stats_summary <- rawdata %>%
summarise(
# Statistik untuk Female
Mean_Female = mean(Sum_of_Females_Life_Expectancy, na.rm = TRUE),
Median_Female = median(Sum_of_Females_Life_Expectancy, na.rm = TRUE),
Mode_Female = modus(Sum_of_Females_Life_Expectancy),
Q1_Female = quantile(Sum_of_Females_Life_Expectancy, 0.25, na.rm = TRUE),
Q3_Female = quantile(Sum_of_Females_Life_Expectancy, 0.75, na.rm = TRUE),
Range_Female = diff(range(Sum_of_Females_Life_Expectancy, na.rm = TRUE)),
Variance_Female = var(Sum_of_Females_Life_Expectancy, na.rm = TRUE),
SD_Female = sd(Sum_of_Females_Life_Expectancy, na.rm = TRUE),
# Statistik untuk Male
Mean_Male = mean(Sum_of_Males_Life_Expectancy, na.rm = TRUE),
Median_Male = median(Sum_of_Males_Life_Expectancy, na.rm = TRUE),
Mode_Male = modus(Sum_of_Males_Life_Expectancy),
Q1_Male = quantile(Sum_of_Males_Life_Expectancy, 0.25, na.rm = TRUE),
Q3_Male = quantile(Sum_of_Males_Life_Expectancy, 0.75, na.rm = TRUE),
Range_Male = diff(range(Sum_of_Males_Life_Expectancy, na.rm = TRUE)),
Variance_Male = var(Sum_of_Males_Life_Expectancy, na.rm = TRUE),
SD_Male = sd(Sum_of_Males_Life_Expectancy, na.rm = TRUE)
)
# Tampilkan hasil
print(stats_summary)
## # A tibble: 1 × 16
## Mean_Female Median_Female Mode_Female Q1_Female Q3_Female Range_Female
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 76.8 78.0 76.7 71.3 82.0 94.3
## # ℹ 10 more variables: Variance_Female <dbl>, SD_Female <dbl>, Mean_Male <dbl>,
## # Median_Male <dbl>, Mode_Male <dbl>, Q1_Male <dbl>, Q3_Male <dbl>,
## # Range_Male <dbl>, Variance_Male <dbl>, SD_Male <dbl>