Pertemuan 3 Matematika Aktuaria

# =====================================================
# TUGAS 2 - ANALISIS HARAPAN HIDUP MAHASISWA
# =====================================================

# -----------------------------
# 1. SETUP & DATA
# -----------------------------
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'tibble' was built under R version 4.5.2
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'dplyr' was built under R version 4.5.2
## Warning: package 'stringr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)

set.seed(123)

data_mahasiswa <- data.frame(
  nama = paste0("Mahasiswa_", 1:50),
  usia = sample(18:30, 50, replace = TRUE),
  gender = sample(c("L", "P"), 50, replace = TRUE),
  merokok = sample(c("Ya", "Tidak"), 50, replace = TRUE, prob = c(0.3, 0.7)),
  harapan_hidup = rnorm(50, mean = 75, sd = 5)
)

# -----------------------------
# 2. COMPLETE & CURTATE LIFE EXPECTANCY
# -----------------------------
data_mahasiswa <- data_mahasiswa %>%
  mutate(
    complete_life_expectancy = harapan_hidup - usia,
    curtate_life_expectancy = floor(complete_life_expectancy)
  )

# -----------------------------
# 3. RATA-RATA HARAPAN HIDUP BERDASARKAN GENDER
# -----------------------------
rata_gender <- data_mahasiswa %>%
  group_by(gender) %>%
  summarize(
    jumlah = n(),
    rata_harapan_hidup = mean(harapan_hidup),
    rata_sisa_hidup = mean(complete_life_expectancy),
    .groups = "drop"
  )

print("RATA-RATA HARAPAN HIDUP BERDASARKAN GENDER")
## [1] "RATA-RATA HARAPAN HIDUP BERDASARKAN GENDER"
print(rata_gender)
## # A tibble: 2 × 4
##   gender jumlah rata_harapan_hidup rata_sisa_hidup
##   <chr>   <int>              <dbl>           <dbl>
## 1 L          26               75.0            51.0
## 2 P          24               74.8            49.3
# -----------------------------
# 4. UJI HIPOTESIS (L vs P)
# -----------------------------
laki <- data_mahasiswa %>% filter(gender == "L") %>% pull(harapan_hidup)
perempuan <- data_mahasiswa %>% filter(gender == "P") %>% pull(harapan_hidup)

uji_t <- t.test(perempuan, laki)

cat("\n=== UJI HIPOTESIS GENDER ===\n")
## 
## === UJI HIPOTESIS GENDER ===
cat("Rata-rata Laki-laki :", mean(laki), "\n")
## Rata-rata Laki-laki : 75.03551
cat("Rata-rata Perempuan:", mean(perempuan), "\n")
## Rata-rata Perempuan: 74.79693
cat("p-value:", uji_t$p.value, "\n")
## p-value: 0.8414225
if (uji_t$p.value < 0.05) {
  cat("KESIMPULAN: Ada perbedaan signifikan\n")
} else {
  cat("KESIMPULAN: Tidak ada perbedaan signifikan\n")
}
## KESIMPULAN: Tidak ada perbedaan signifikan
# -----------------------------
# 5. HISTOGRAM COMPLETE LIFE EXPECTANCY
# -----------------------------
ggplot(data_mahasiswa, aes(x = complete_life_expectancy)) +
  geom_histogram(bins = 15, fill = "steelblue", alpha = 0.7) +
  labs(
    title = "Histogram Complete Life Expectancy",
    x = "Complete Life Expectancy (tahun)",
    y = "Frekuensi"
  ) +
  theme_minimal()

# -----------------------------
# 6. BOXPLOT HARAPAN HIDUP vs MEROKOK
# -----------------------------
ggplot(data_mahasiswa, aes(x = merokok, y = harapan_hidup, fill = merokok)) +
  geom_boxplot(alpha = 0.7) +
  labs(
    title = "Harapan Hidup Berdasarkan Status Merokok",
    x = "Status Merokok",
    y = "Harapan Hidup (tahun)"
  ) +
  theme_minimal()

# -----------------------------
# 7. SCATTER PLOT USIA vs SISA HARAPAN HIDUP
# -----------------------------
ggplot(data_mahasiswa, aes(x = usia, y = complete_life_expectancy, color = gender)) +
  geom_point(size = 3, alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE) +
  labs(
    title = "Hubungan Usia dan Sisa Harapan Hidup",
    x = "Usia (tahun)",
    y = "Sisa Harapan Hidup (tahun)"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# -----------------------------
# 8. RINGKASAN AKHIR
# -----------------------------
cat("\n=== RINGKASAN ===\n")
## 
## === RINGKASAN ===
cat("Rata-rata Complete Life Expectancy:",
    mean(data_mahasiswa$complete_life_expectancy), "\n")
## Rata-rata Complete Life Expectancy: 50.18099
cat("Rata-rata Curtate Life Expectancy:",
    mean(data_mahasiswa$curtate_life_expectancy), "\n")
## Rata-rata Curtate Life Expectancy: 49.64
cat("Selisih (Complete - Curtate):",
    mean(data_mahasiswa$complete_life_expectancy) -
    mean(data_mahasiswa$curtate_life_expectancy), "\n")
## Selisih (Complete - Curtate): 0.5409902