# =====================================================
# TUGAS 2 - ANALISIS HARAPAN HIDUP MAHASISWA
# =====================================================
# -----------------------------
# 1. SETUP & DATA
# -----------------------------
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'tibble' was built under R version 4.5.2
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'dplyr' was built under R version 4.5.2
## Warning: package 'stringr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
set.seed(123)
data_mahasiswa <- data.frame(
nama = paste0("Mahasiswa_", 1:50),
usia = sample(18:30, 50, replace = TRUE),
gender = sample(c("L", "P"), 50, replace = TRUE),
merokok = sample(c("Ya", "Tidak"), 50, replace = TRUE, prob = c(0.3, 0.7)),
harapan_hidup = rnorm(50, mean = 75, sd = 5)
)
# -----------------------------
# 2. COMPLETE & CURTATE LIFE EXPECTANCY
# -----------------------------
data_mahasiswa <- data_mahasiswa %>%
mutate(
complete_life_expectancy = harapan_hidup - usia,
curtate_life_expectancy = floor(complete_life_expectancy)
)
# -----------------------------
# 3. RATA-RATA HARAPAN HIDUP BERDASARKAN GENDER
# -----------------------------
rata_gender <- data_mahasiswa %>%
group_by(gender) %>%
summarize(
jumlah = n(),
rata_harapan_hidup = mean(harapan_hidup),
rata_sisa_hidup = mean(complete_life_expectancy),
.groups = "drop"
)
print("RATA-RATA HARAPAN HIDUP BERDASARKAN GENDER")
## [1] "RATA-RATA HARAPAN HIDUP BERDASARKAN GENDER"
## # A tibble: 2 × 4
## gender jumlah rata_harapan_hidup rata_sisa_hidup
## <chr> <int> <dbl> <dbl>
## 1 L 26 75.0 51.0
## 2 P 24 74.8 49.3
# -----------------------------
# 4. UJI HIPOTESIS (L vs P)
# -----------------------------
laki <- data_mahasiswa %>% filter(gender == "L") %>% pull(harapan_hidup)
perempuan <- data_mahasiswa %>% filter(gender == "P") %>% pull(harapan_hidup)
uji_t <- t.test(perempuan, laki)
cat("\n=== UJI HIPOTESIS GENDER ===\n")
##
## === UJI HIPOTESIS GENDER ===
cat("Rata-rata Laki-laki :", mean(laki), "\n")
## Rata-rata Laki-laki : 75.03551
cat("Rata-rata Perempuan:", mean(perempuan), "\n")
## Rata-rata Perempuan: 74.79693
cat("p-value:", uji_t$p.value, "\n")
## p-value: 0.8414225
if (uji_t$p.value < 0.05) {
cat("KESIMPULAN: Ada perbedaan signifikan\n")
} else {
cat("KESIMPULAN: Tidak ada perbedaan signifikan\n")
}
## KESIMPULAN: Tidak ada perbedaan signifikan
# -----------------------------
# 5. HISTOGRAM COMPLETE LIFE EXPECTANCY
# -----------------------------
ggplot(data_mahasiswa, aes(x = complete_life_expectancy)) +
geom_histogram(bins = 15, fill = "steelblue", alpha = 0.7) +
labs(
title = "Histogram Complete Life Expectancy",
x = "Complete Life Expectancy (tahun)",
y = "Frekuensi"
) +
theme_minimal()

# -----------------------------
# 6. BOXPLOT HARAPAN HIDUP vs MEROKOK
# -----------------------------
ggplot(data_mahasiswa, aes(x = merokok, y = harapan_hidup, fill = merokok)) +
geom_boxplot(alpha = 0.7) +
labs(
title = "Harapan Hidup Berdasarkan Status Merokok",
x = "Status Merokok",
y = "Harapan Hidup (tahun)"
) +
theme_minimal()

# -----------------------------
# 7. SCATTER PLOT USIA vs SISA HARAPAN HIDUP
# -----------------------------
ggplot(data_mahasiswa, aes(x = usia, y = complete_life_expectancy, color = gender)) +
geom_point(size = 3, alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "Hubungan Usia dan Sisa Harapan Hidup",
x = "Usia (tahun)",
y = "Sisa Harapan Hidup (tahun)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# -----------------------------
# 8. RINGKASAN AKHIR
# -----------------------------
cat("\n=== RINGKASAN ===\n")
##
## === RINGKASAN ===
cat("Rata-rata Complete Life Expectancy:",
mean(data_mahasiswa$complete_life_expectancy), "\n")
## Rata-rata Complete Life Expectancy: 50.18099
cat("Rata-rata Curtate Life Expectancy:",
mean(data_mahasiswa$curtate_life_expectancy), "\n")
## Rata-rata Curtate Life Expectancy: 49.64
cat("Selisih (Complete - Curtate):",
mean(data_mahasiswa$complete_life_expectancy) -
mean(data_mahasiswa$curtate_life_expectancy), "\n")
## Selisih (Complete - Curtate): 0.5409902