Beberapa libary yang perlu install sebagai berikut:
library(ggplot2)
library(readxl)
library(ggthemes)
library(gridExtra)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Data diinput dari Microsoft excel dengan syntax sebagai berikut:
setwd("D:/FOLDER TUGAS/SEMESTER 2/PROSTAT")
data <- read_excel("life_expectancy.xlsx")
head(data)
## # A tibble: 6 × 4
## Country Sum of Females Life…¹ Sum of Life Expectan…² Sum of Males Life E…³
## <chr> <dbl> <dbl> <dbl>
## 1 Chad 57.2 55.2 53.4
## 2 Nigeria 54.9 54.6 54.3
## 3 South Su… 60.8 57.7 54.8
## 4 Lesotho 60.4 57.8 55.0
## 5 Central … 59.6 57.7 55.5
## 6 Somalia 61.6 59.0 56.5
## # ℹ abbreviated names: ¹`Sum of Females Life Expectancy`,
## # ²`Sum of Life Expectancy (both sexes)`, ³`Sum of Males Life Expectancy`
Dari data tersebut akan disajikan visualisasi data dalam bentuk Pie Chart:
# Menghitung rata-rata harapan hidup berdasarkan jenis kelamin
life_expectancy_avg <- data.frame(
Category = c("Female", "Male"),
Life_Expectancy = c(mean(data$`Sum of Females Life Expectancy`, na.rm = TRUE),
mean(data$`Sum of Males Life Expectancy`, na.rm = TRUE))
)
# Membuat Pie Chart
ggplot(life_expectancy_avg, aes(x = "", y = Life_Expectancy, fill = Category)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
theme_void() +
labs(title = "Rata-rata Harapan Hidup Berdasarkan Jenis Kelamin") +
scale_fill_manual(values = c("Female" = "pink", "Male" = "blue"))
#Box Plot Box plot digunakan untuk melihat distribusi data harapan hidup
berdasarkan jenis kelamin:
ggplot(data, aes(x = "Female", y = `Sum of Females Life Expectancy`, fill = "Female")) +
geom_boxplot(color = "black", fill = "pink") +
geom_boxplot(aes(x = "Male", y = `Sum of Males Life Expectancy`, fill = "Male"),
color = "black", fill = "blue") +
labs(title = "Box Plot Harapan Hidup Berdasarkan Jenis Kelamin", x = "Jenis Kelamin", y = "Harapan Hidup") +
theme_minimal() +
scale_fill_manual(values = c("Female" = "pink", "Male" = "blue"))
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
#Density Plot Density plot digunakan untuk melihat distribusi kepadatan
data:
ggplot(data, aes(x = `Sum of Life Expectancy (both sexes)`, fill = "Density")) +
geom_density(alpha = 0.5, color = "black", fill = "purple") +
labs(title = "Density Plot Harapan Hidup", x = "Harapan Hidup (Semua Jenis Kelamin)", y = "Density") +
theme_minimal()
#Dot Plot Dot plot digunakan untuk melihat distribusi nilai-nilai
harapan hidup:
ggplot(data, aes(x = `Sum of Life Expectancy (both sexes)`, y = 1)) +
geom_point(aes(color = `Sum of Life Expectancy (both sexes)`), size = 3) +
labs(title = "Dot Plot Harapan Hidup", x = "Harapan Hidup", y = "") +
theme_minimal() +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
scale_color_gradient(low = "blue", high = "red")
#Histogram Histogram digunakan untuk melihat distribusi frekuensi
harapan hidup:
ggplot(data, aes(x = `Sum of Life Expectancy (both sexes)`)) +
geom_histogram(binwidth = 2, fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Histogram Harapan Hidup", x = "Harapan Hidup", y = "Frekuensi") +
theme_minimal()
#Summary Dari data tersebut akan disajikan visualisasi data dalam bentuk
deskriptif numerik berdasarkan “life expectancy”, berikut terlampir
hasil visualisasi datanya:
# Fungsi untuk menghitung modus
modus <- function(x) {
uniq_x <- unique(x)
uniq_x[which.max(tabulate(match(x, uniq_x)))]
}
# Menghitung statistik deskriptif
stats_summary <- data %>%
summarise(
Mean_Female = mean(`Sum of Females Life Expectancy`, na.rm = TRUE),
Median_Female = median(`Sum of Females Life Expectancy`, na.rm = TRUE),
Mode_Female = modus(`Sum of Females Life Expectancy`),
Q1_Female = quantile(`Sum of Females Life Expectancy`, 0.25, na.rm = TRUE),
Q3_Female = quantile(`Sum of Females Life Expectancy`, 0.75, na.rm = TRUE),
Range_Female = max(`Sum of Females Life Expectancy`, na.rm = TRUE) - min(`Sum of Females Life Expectancy`, na.rm = TRUE),
Variance_Female = var(`Sum of Females Life Expectancy`, na.rm = TRUE),
SD_Female = sd(`Sum of Females Life Expectancy`, na.rm = TRUE),
Mean_Male = mean(`Sum of Males Life Expectancy`, na.rm = TRUE),
Median_Male = median(`Sum of Males Life Expectancy`, na.rm = TRUE),
Mode_Male = modus(`Sum of Males Life Expectancy`),
Q1_Male = quantile(`Sum of Males Life Expectancy`, 0.25, na.rm = TRUE),
Q3_Male = quantile(`Sum of Males Life Expectancy`, 0.75, na.rm = TRUE),
Range_Male = max(`Sum of Males Life Expectancy`, na.rm = TRUE) - min(`Sum of Males Life Expectancy`, na.rm = TRUE),
Variance_Male = var(`Sum of Males Life Expectancy`, na.rm = TRUE),
SD_Male = sd(`Sum of Males Life Expectancy`, na.rm = TRUE)
)
# Menampilkan hasil
stats_summary
## # A tibble: 1 × 16
## Mean_Female Median_Female Mode_Female Q1_Female Q3_Female Range_Female
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 76.8 78.0 76.7 71.3 82.0 94.3
## # ℹ 10 more variables: Variance_Female <dbl>, SD_Female <dbl>, Mean_Male <dbl>,
## # Median_Male <dbl>, Mode_Male <dbl>, Q1_Male <dbl>, Q3_Male <dbl>,
## # Range_Male <dbl>, Variance_Male <dbl>, SD_Male <dbl>