Data yang digunakan merupakan pengamatan nyata terhadap Geyser Old Faithful di Yellowstone National Park, USA Data ini mencatat: durasi letusan (eruption duration) waktu tunggu ke letusan berikutnya (waiting time) yang diukur langsung di lapangan oleh peneliti geologi. Data Built-in pada R
#Membaca data
data(faithful)
y <- faithful$waiting
n <- length(y)
#Mixture Dua Distribusi Diketahui
# Initial value
p <- 0.5
mu1 <- 55
mu2 <- 80
s1 <- 5
s2 <- 5
max_iter <- 200
tol <- 1e-6
loglik <- function(p,y){
sum(log(p*dnorm(y,mu1,s1)+
(1-p)*dnorm(y,mu2,s2)))
}
LL <- numeric(max_iter)
for(k in 1:max_iter){
tau <- (p*dnorm(y,mu1,s1))/
(p*dnorm(y,mu1,s1)+
(1-p)*dnorm(y,mu2,s2))
p_new <- mean(tau)
LL[k] <- loglik(p_new,y)
if(k>1 && abs(LL[k]-LL[k-1])<tol) break
p <- p_new
}
p
## [1] 0.3641023
#EM: Mixture Distribusi Dua Normal Tidak Diketahui
# Initial value
p <- 0.5
mu1 <- mean(y) - 10
mu2 <- mean(y) + 10
s1 <- var(y)
s2 <- var(y)
loglik <- function(p,mu1,mu2,s1,s2,y){
sum(log(p*dnorm(y,mu1,sqrt(s1)) +
(1-p)*dnorm(y,mu2,sqrt(s2))))
}
LL <- numeric(max_iter)
for(k in 1:max_iter){
tau <- (p*dnorm(y,mu1,sqrt(s1))) /
(p*dnorm(y,mu1,sqrt(s1)) +
(1-p)*dnorm(y,mu2,sqrt(s2)))
p_new <- mean(tau)
mu1_new <- sum(tau*y)/sum(tau)
mu2_new <- sum((1-tau)*y)/sum(1-tau)
s1_new <- sum(tau*(y-mu1_new)^2)/sum(tau)
s2_new <- sum((1-tau)*(y-mu2_new)^2)/sum(1-tau)
LL[k] <- loglik(p_new,mu1_new,mu2_new,s1_new,s2_new,y)
if(k>1 && abs(LL[k]-LL[k-1]) < tol) break
p <- p_new
mu1<- mu1_new
mu2<- mu2_new
s1 <- s1_new
s2 <- s2_new
}
result <- data.frame(
Parameter=c("p","mu1","sigma1^2","mu2","sigma2^2"),
Estimate=c(p,mu1,s1,mu2,s2)
)
result
## Parameter Estimate
## 1 p 0.3609059
## 2 mu1 54.6155142
## 3 sigma1^2 34.4778259
## 4 mu2 80.0914862
## 5 sigma2^2 34.4254231
#EM Menggunakan Package mixtools
library(mixtools)
## mixtools package, version 2.0.0.1, Released 2022-12-04
## This package is based upon work supported by the National Science Foundation under Grant No. SES-0518772 and the Chan Zuckerberg Initiative: Essential Open Source Software for Science (Grant No. 2020-255193).
fit <- normalmixEM(y,
lambda=c(0.5,0.5),
mu=c(55,80),
sigma=c(5,5))
## number of iterations= 16
fit$lambda
## [1] 0.360887 0.639113
fit$mu
## [1] 54.61490 80.09109
fit$sigma
## [1] 5.871247 5.867714
#Visualisasi
plot(fit, which=2)
Install dan load package yang diperlukan
packages <- c("icdGLM", "dplyr", "ggplot2", "DataExplorer",
"gridExtra", "tidyr", "knitr", "kableExtra")
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(packages[!installed_packages])
}
# Load packages
library(icdGLM)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(DataExplorer)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(tidyr)
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
Deskripsi Data TLI
Data TLI berasal dari sebuah studi medis yang dilakukan oleh Colice, Stukel, dan Dain (1989) dan dipublikasikan di jurnal Chest . Tujuan utama dari studi ini adalah untuk mengidentifikasi pasien yang mengalami intubasi berkepanjangan dan mengevaluasi secara prospektif insiden serta jenis komplikasi laring yang mungkin mereka alami .
Secara spesifik, berikut adalah poin-poin penting terkait data tersebut:
Subjek Penelitian: Data dikumpulkan dari 82 pasien yang menjalani intubasi translaring (translyangeal intubation - TLI) selama lebih dari empat hari .
Tujuan Analisis (dalam Konteks Jurnal Ibrahim): Dalam jurnal Ibrahim (1990), data ini digunakan sebagai contoh pertama untuk mengilustrasikan penerapan Algoritma EM dengan metode pembobotan (method of weights) pada model linear umum (GLM) dengan data tidak lengkap. Tujuannya adalah untuk mendapatkan estimasi parameter model yang lebih efisien dibandingkan hanya menghapus observasi yang tidak lengkap (case deletion) .
Karakteristik Data Asli: Pada saat awal studi, data dikumpulkan mengenai 13 variabel penjelas (kovariat) baseline dari para pasien. Dari 13 kovariat ini, 3 di antaranya bersifat kontinu dan 10 bersifat dikotomus (biner). Terdapat 4 variabel yang tidak lengkap (memiliki nilai hilang/missing). Variabel respons aslinya adalah variabel diskrit dengan empat level yang merepresentasikan tingkat kerusakan laring pada awal penelitian .
Definisi Variabel (dalam Contoh Ibrahim, 1990) Untuk keperluan ilustrasi dalam jurnal, Ibrahim menyederhanakan model dengan hanya menggunakan tiga kovariat. Berikut adalah definisi dari masing-masing variabel seperti yang tercantom pada Tabel 1 di halaman 768 PDF:
y (variabel respon):
Merupakan variabel dependen (respons) biner. Variabel asli dengan 4 level diringkas menjadi dua kategori untuk memudahkan analisis .
0: Tidak ada kerusakan (no damage) pada laring di awal penelitian .
1: Ada kerusakan (damage) pada laring di awal penelitian .
x1 (Kovariat 1): Serum Albumin
Variabel penjelas pertama yang telah didikotomisasi.
0: Jika kadar Serum Albumin < 30 SI (Standard International units) .
1: Jika kadar Serum Albumin ≥ 30 SI .
x2 (Kovariat 2): Serum Creatinine
Variabel penjelas kedua, juga bersifat dikotomus.
0: Jika kadar Serum Creatinine < 200 SI .
1: Jika kadar Serum Creatinine ≥ 200 SI .
x3 (Kovariat 3): Rasio Ukuran Laring terhadap Ukuran Tabung Trakea
Variabel penjelas ketiga, juga didikotomisasi.
0: Jika rasio kurang dari 0.45 .
1: Jika rasio lebih dari atau sama dengan 0.45 .
# Load data ke environment (opsional, tapi disarankan)}
data("TLI.data", package = "icdGLM")
data
## function (..., list = character(), package = NULL, lib.loc = NULL,
## verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE)
## {
## fileExt <- function(x) {
## db <- grepl("\\.[^.]+\\.(gz|bz2|xz)$", x)
## ans <- sub(".*\\.", "", x)
## ans[db] <- sub(".*\\.([^.]+\\.)(gz|bz2|xz)$", "\\1\\2",
## x[db])
## ans
## }
## my_read_table <- function(...) {
## lcc <- Sys.getlocale("LC_COLLATE")
## on.exit(Sys.setlocale("LC_COLLATE", lcc))
## Sys.setlocale("LC_COLLATE", "C")
## read.table(...)
## }
## stopifnot(is.character(list))
## names <- c(as.character(substitute(list(...))[-1L]), list)
## if (!is.null(package)) {
## if (!is.character(package))
## stop("'package' must be a character vector or NULL")
## }
## paths <- find.package(package, lib.loc, verbose = verbose)
## if (is.null(lib.loc))
## paths <- c(path.package(package, TRUE), if (!length(package)) getwd(),
## paths)
## paths <- unique(normalizePath(paths[file.exists(paths)]))
## paths <- paths[dir.exists(file.path(paths, "data"))]
## dataExts <- tools:::.make_file_exts("data")
## if (length(names) == 0L) {
## db <- matrix(character(), nrow = 0L, ncol = 4L)
## for (path in paths) {
## entries <- NULL
## packageName <- if (file_test("-f", file.path(path,
## "DESCRIPTION")))
## basename(path)
## else "."
## if (file_test("-f", INDEX <- file.path(path, "Meta",
## "data.rds"))) {
## entries <- readRDS(INDEX)
## }
## else {
## dataDir <- file.path(path, "data")
## entries <- tools::list_files_with_type(dataDir,
## "data")
## if (length(entries)) {
## entries <- unique(tools::file_path_sans_ext(basename(entries)))
## entries <- cbind(entries, "")
## }
## }
## if (NROW(entries)) {
## if (is.matrix(entries) && ncol(entries) == 2L)
## db <- rbind(db, cbind(packageName, dirname(path),
## entries))
## else warning(gettextf("data index for package %s is invalid and will be ignored",
## sQuote(packageName)), domain = NA, call. = FALSE)
## }
## }
## colnames(db) <- c("Package", "LibPath", "Item", "Title")
## footer <- if (missing(package))
## paste0("Use ", sQuote(paste("data(package =", ".packages(all.available = TRUE))")),
## "\n", "to list the data sets in all *available* packages.")
## else NULL
## y <- list(title = "Data sets", header = NULL, results = db,
## footer = footer)
## class(y) <- "packageIQR"
## return(y)
## }
## paths <- file.path(paths, "data")
## for (name in names) {
## found <- FALSE
## for (p in paths) {
## tmp_env <- if (overwrite)
## envir
## else new.env()
## if (file_test("-f", file.path(p, "Rdata.rds"))) {
## rds <- readRDS(file.path(p, "Rdata.rds"))
## if (name %in% names(rds)) {
## found <- TRUE
## if (verbose)
## message(sprintf("name=%s:\t found in Rdata.rds",
## name), domain = NA)
## thispkg <- sub(".*/([^/]*)/data$", "\\1", p)
## thispkg <- sub("_.*$", "", thispkg)
## thispkg <- paste0("package:", thispkg)
## objs <- rds[[name]]
## lazyLoad(file.path(p, "Rdata"), envir = tmp_env,
## filter = function(x) x %in% objs)
## break
## }
## else if (verbose)
## message(sprintf("name=%s:\t NOT found in names() of Rdata.rds, i.e.,\n\t%s\n",
## name, paste(names(rds), collapse = ",")),
## domain = NA)
## }
## files <- list.files(p, full.names = TRUE)
## files <- files[grep(name, files, fixed = TRUE)]
## if (length(files) > 1L) {
## o <- match(fileExt(files), dataExts, nomatch = 100L)
## paths0 <- dirname(files)
## paths0 <- factor(paths0, levels = unique(paths0))
## files <- files[order(paths0, o)]
## }
## if (length(files)) {
## for (file in files) {
## if (verbose)
## message("name=", name, ":\t file= ...", .Platform$file.sep,
## basename(file), "::\t", appendLF = FALSE,
## domain = NA)
## ext <- fileExt(file)
## if (basename(file) != paste0(name, ".", ext))
## found <- FALSE
## else {
## found <- TRUE
## switch(ext, R = , r = {
## library("utils")
## sys.source(file, chdir = TRUE, envir = tmp_env)
## }, RData = , rdata = , rda = load(file, envir = tmp_env),
## TXT = , txt = , tab = , tab.gz = , tab.bz2 = ,
## tab.xz = , txt.gz = , txt.bz2 = , txt.xz = assign(name,
## my_read_table(file, header = TRUE, as.is = FALSE),
## envir = tmp_env), CSV = , csv = , csv.gz = ,
## csv.bz2 = , csv.xz = assign(name, my_read_table(file,
## header = TRUE, sep = ";", as.is = FALSE),
## envir = tmp_env), found <- FALSE)
## }
## if (found)
## break
## }
## if (verbose)
## message(if (!found)
## "*NOT* ", "found", domain = NA)
## }
## if (found)
## break
## }
## if (!found) {
## warning(gettextf("data set %s not found", sQuote(name)),
## domain = NA)
## }
## else if (!overwrite) {
## for (o in ls(envir = tmp_env, all.names = TRUE)) {
## if (exists(o, envir = envir, inherits = FALSE))
## warning(gettextf("an object named %s already exists and will not be overwritten",
## sQuote(o)))
## else assign(o, get(o, envir = tmp_env, inherits = FALSE),
## envir = envir)
## }
## rm(tmp_env)
## }
## }
## invisible(names)
## }
## <bytecode: 0x0000023bb1595a70>
## <environment: namespace:utils>
# Sekarang TLI_data siap digunakan (sesuaikan nama variabel dengan data asli di package)
TLI_data <- TLI.data
cat("\n=== RINGKASAN MISSING VALUES ===\n")
##
## === RINGKASAN MISSING VALUES ===
# Cek missing values
cat("\n=== RINGKASAN MISSING VALUES ===\n")
##
## === RINGKASAN MISSING VALUES ===
missing_count <- sapply(TLI_data, function(x) sum(is.na(x)))
print(missing_count)
## x1 x2 x3 y
## 3 2 11 0
cat("\nPersentase Missing Values:\n")
##
## Persentase Missing Values:
missing_pct <- sapply(TLI_data, function(x) round(mean(is.na(x)) * 100, 2))
print(missing_pct)
## x1 x2 x3 y
## 3.66 2.44 13.41 0.00
##Eksplorasi Data
# ============================================
# EKSPLORASI DATA
# ============================================
# 1. Plot intro dengan DataExplorer
plot_intro(TLI_data) +
ggtitle("Ringkasan Data TLI") +
theme_minimal()
# 2. Diagram lingkaran berdasarkan Y (tanpa missing)
pie_data <- as.data.frame(table(TLI_data$y))
colnames(pie_data) <- c("y", "Frekuensi")
pie_data$y <- ifelse(pie_data$y == 0, "Tidak Rusak (0)", "Rusak (1)")
pie_data$Persentase <- round(pie_data$Frekuensi/sum(pie_data$Frekuensi)*100, 1)
pie_data$label <- paste0(pie_data$y, "\n", pie_data$Frekuensi, " (", pie_data$Persentase, "%)")
pie_chart <- ggplot(pie_data, aes(x = "", y = Frekuensi, fill = y)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
scale_fill_manual(values = c("Tidak Rusak (0)" = "skyblue",
"Rusak (1)" = "salmon")) +
theme_void() +
geom_text(aes(label = label), position = position_stack(vjust = 0.5)) +
labs(title = "Distribusi Status Kerusakan Laring (y)",
fill = "Status Kerusakan") +
theme(plot.title = element_text(hjust = 0.5, face = "bold"))
print(pie_chart)
# 3. Bar plot berdasarkan Y untuk variabel prediktor
# Membuat data dalam format long
TLI_long <- TLI_data %>%
mutate(y_label = case_when(
y == 0 ~ "Tidak Rusak",
y == 1 ~ "Rusak",
TRUE ~ NA_character_
)) %>%
pivot_longer(cols = c(x1, x2, x3),
names_to = "Variabel",
values_to = "Nilai") %>%
filter(!is.na(Nilai) & !is.na(y_label))
# Bar plot dengan label sederhana
bar_plot <- ggplot(TLI_long, aes(x = as.factor(Nilai), fill = y_label)) +
geom_bar(position = "dodge", alpha = 0.7) +
facet_wrap(~Variabel, scales = "free_x",
labeller = labeller(Variabel = c(x1 = "Albumin Serum",
x2 = "Kreatinin Serum",
x3 = "Rasio Laring/Trakea"))) +
scale_fill_manual(values = c("Tidak Rusak" = "skyblue",
"Rusak" = "salmon")) +
scale_x_discrete(labels = c("0", "1")) +
labs(title = "Distribusi Variabel Prediktor berdasarkan Status Kerusakan",
x = "Kategori", y = "Frekuensi",
fill = "Status Laring") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
strip.text = element_text(face = "bold"),
legend.position = "bottom")
print(bar_plot)
# Statistik deskriptif
cat("\n=== STATISTIK DESKRIPTIF ===\n")
##
## === STATISTIK DESKRIPTIF ===
summary_stats <- data.frame(
Statistik = c("Total Observasi", "Missing x1", "Missing x2", "Missing x3",
"Missing y", "y=0 (Tidak Rusak)", "y=1 (Rusak)"),
Jumlah = c(nrow(TLI_data),
sum(is.na(TLI_data$x1)),
sum(is.na(TLI_data$x2)),
sum(is.na(TLI_data$x3)),
sum(is.na(TLI_data$y)),
sum(TLI_data$y == 0, na.rm = TRUE),
sum(TLI_data$y == 1, na.rm = TRUE)),
Persentase = c("100%",
paste0(round(mean(is.na(TLI_data$x1))*100, 2), "%"),
paste0(round(mean(is.na(TLI_data$x2))*100, 2), "%"),
paste0(round(mean(is.na(TLI_data$x3))*100, 2), "%"),
paste0(round(mean(is.na(TLI_data$y))*100, 2), "%"),
paste0(round(mean(TLI_data$y == 0, na.rm = TRUE)*100, 2), "% dari non-missing"),
paste0(round(mean(TLI_data$y == 1, na.rm = TRUE)*100, 2), "% dari non-missing"))
)
print(summary_stats)
## Statistik Jumlah Persentase
## 1 Total Observasi 82 100%
## 2 Missing x1 3 3.66%
## 3 Missing x2 2 2.44%
## 4 Missing x3 11 13.41%
## 5 Missing y 0 0%
## 6 y=0 (Tidak Rusak) 39 47.56% dari non-missing
## 7 y=1 (Rusak) 43 52.44% dari non-missing
# ============================================
# ANALISIS MODEL
# ============================================
# METODE 1: Complete Case Analysis dengan Fisher Scoring
# Hanya menggunakan observasi dengan semua variabel lengkap (termasuk y tidak missing)
data_complete <- TLI_data[complete.cases(TLI_data), ]
cat("\n\n=== COMPLETE CASE ANALYSIS ===\n")
##
##
## === COMPLETE CASE ANALYSIS ===
cat("Jumlah complete cases:", nrow(data_complete), "dari", nrow(TLI_data), "\n")
## Jumlah complete cases: 69 dari 82
cat("Persentase data digunakan:", round(nrow(data_complete)/nrow(TLI_data)*100, 2), "%\n")
## Persentase data digunakan: 84.15 %
model_cca <- glm(y ~ x1 + x2 + x3,
data = data_complete,
family = binomial(link = "logit"))
summary_cca <- summary(model_cca)
cat("\n=== HASIL COMPLETE CASE ANALYSIS (FISHER SCORING) ===\n")
##
## === HASIL COMPLETE CASE ANALYSIS (FISHER SCORING) ===
print(summary_cca)
##
## Call:
## glm(formula = y ~ x1 + x2 + x3, family = binomial(link = "logit"),
## data = data_complete)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1642 0.4093 -0.401 0.688
## x1 -0.4013 0.6443 -0.623 0.533
## x2 0.3554 0.5637 0.630 0.528
## x3 0.1093 0.4885 0.224 0.823
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 95.524 on 68 degrees of freedom
## Residual deviance: 94.514 on 65 degrees of freedom
## AIC: 102.51
##
## Number of Fisher Scoring iterations: 4
# METODE 2: Analisis dengan icdGLM (EM Algorithm)
# Menggunakan expand_data sesuai dokumentasi package
cat("\n\n=== PREPARASI DATA DENGAN EXPAND_DATA ===\n")
##
##
## === PREPARASI DATA DENGAN EXPAND_DATA ===
# Gunakan expand_data untuk mempersiapkan data
expanded_data <- expand_data(
data = TLI.data[, 1:3], # x1, x2, x3
y = TLI.data[, 4], # y
missing.x = 1:3, # semua prediktor bisa missing
value.set = 0:1 # nilai yang mungkin (0 atau 1)
)
cat("Struktur hasil expand_data:\n")
## Struktur hasil expand_data:
cat("- Nama komponen:", names(expanded_data), "\n")
## - Nama komponen: data weights indicator
cat("- Dimensi data:", dim(expanded_data$data), "\n")
## - Dimensi data: 103 4
cat("- Jumlah weights:", length(expanded_data$weights), "\n")
## - Jumlah weights: 103
cat("- Jumlah indicator:", length(expanded_data$indicator), "\n")
## - Jumlah indicator: 103
cat("- Range weights:", range(expanded_data$weights), "\n")
## - Range weights: 0.125 1
# Tampilkan beberapa baris pertama data yang sudah di-expand
cat("\nBeberapa baris pertama data setelah expand:\n")
##
## Beberapa baris pertama data setelah expand:
print(head(expanded_data$data, 10))
## x1 x2 x3 y
## 1 0 0 0 0
## 2 1 0 0 0
## 3 0 0 0 1
## 4 0 1 0 1
## 5 0 1 0 0
## 6 1 1 0 0
## 7 0 0 0 1
## 8 0 0 0 1
## 9 1 0 0 1
## 10 1 1 0 1
# Jalankan icdGLM dengan data yang sudah di-expand
model_icd <- icdglm(
formula = y ~ x1 + x2 + x3,
family = binomial(link = "logit"),
data = expanded_data$data,
weights = expanded_data$weights,
indicator = expanded_data$indicator,
# Menambahkan kontrol optimasi
control = list(maxit = 500, epsilon = 1e-6)
)
## [1] 9
##
## Family: binomial
## Link function: logit
##
## [1] 0 0 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0
## [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1
## [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1] 0.829590346 0.170409654 0.673318645 0.326681355 0.479650701 0.896396879
## [7] 0.507001597 0.507001597 0.343366198 0.896976712 0.896976712 0.896976712
## [13] 0.896976712 0.385040067 0.082598294 0.253696695 0.054422685 0.123088794
## [19] 0.076536833 0.520349299 0.103603121 0.492998403 0.492998403 0.656633802
## [25] 0.103023288 0.103023288 0.103023288 0.103023288 0.374405405 0.157956234
## [31] 0.246689687 0.104074817 0.132699763 0.008790726 1.000000000 1.000000000
## [37] 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## [43] 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## [49] 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## [55] 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## [61] 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## [67] 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## [73] 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## [79] 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## [85] 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## [91] 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## [97] 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000 1.000000000
## [103] 1.000000000
## [1] 0.207353778 0.042596988 0.168243640 0.078910565 0.116008719 0.209781981
## [7] 0.126523101 0.126523101 0.085656488 0.209797226 0.209797226 0.209797226
## [13] 0.209797226 0.096075313 0.020613236 0.063301970 0.013581621 0.029690241
## [19] 0.017889076 0.125547155 0.029626290 0.123406881 0.123406881 0.164337879
## [25] 0.029611025 0.029611025 0.029611025 0.029611025 0.093709011 0.039547915
## [31] 0.061742865 0.026057276 0.032112534 0.002524885 0.249942868 0.249942868
## [37] 0.249942868 0.249942868 0.249942868 0.249942868 0.249942868 0.249942868
## [43] 0.249942868 0.249942868 0.249916772 0.249916772 0.249916772 0.249916772
## [49] 0.249916772 0.249916772 0.249916772 0.249916772 0.249916772 0.249916772
## [55] 0.241411498 0.241411498 0.241411498 0.241411498 0.241411498 0.241411498
## [61] 0.241689435 0.241689435 0.249997048 0.249997048 0.249997048 0.249997048
## [67] 0.249997048 0.249997048 0.249997048 0.249989223 0.249942868 0.249942868
## [73] 0.249942868 0.249942868 0.249942868 0.249942868 0.249942868 0.249942868
## [79] 0.249942868 0.249916772 0.249916772 0.249916772 0.249916772 0.249916772
## [85] 0.249916772 0.249916772 0.249916772 0.249916772 0.249916772 0.241411498
## [91] 0.241411498 0.241689435 0.241689435 0.241689435 0.241689435 0.241689435
## [97] 0.241689435 0.241689435 0.249989223 0.249989223 0.249989223 0.249997048
## [103] 0.239370337
## [1] 0.4924414 0.5032829 0.4924414 0.5926742 0.5926742 0.6031003 0.4924414
## [8] 0.4924414 0.5032829 0.6031003 0.6031003 0.6031003 0.6031003 0.4924414
## [15] 0.5032829 0.4924414 0.5032829 0.5926742 0.6031003 0.5911623 0.6016011
## [22] 0.4908771 0.4908771 0.5017182 0.6016011 0.6016011 0.6016011 0.6016011
## [29] 0.4908771 0.5017182 0.4908771 0.5017182 0.5911623 0.6016011 0.4924414
## [36] 0.4924414 0.4924414 0.4924414 0.4924414 0.4924414 0.4924414 0.4924414
## [43] 0.4924414 0.4924414 0.4908771 0.4908771 0.4908771 0.4908771 0.4908771
## [50] 0.4908771 0.4908771 0.4908771 0.4908771 0.4908771 0.5926742 0.5926742
## [57] 0.5926742 0.5926742 0.5926742 0.5926742 0.5911623 0.5911623 0.5017182
## [64] 0.5017182 0.5017182 0.5017182 0.5017182 0.5017182 0.5017182 0.5032829
## [71] 0.4924414 0.4924414 0.4924414 0.4924414 0.4924414 0.4924414 0.4924414
## [78] 0.4924414 0.4924414 0.4908771 0.4908771 0.4908771 0.4908771 0.4908771
## [85] 0.4908771 0.4908771 0.4908771 0.4908771 0.4908771 0.5926742 0.5926742
## [92] 0.5911623 0.5911623 0.5911623 0.5911623 0.5911623 0.5911623 0.5911623
## [99] 0.5032829 0.5032829 0.5032829 0.5017182 0.6031003
## (Intercept) x1 x2 x3
## (Intercept) 18.991301 4.381520 5.464266 9.521921
## x1 4.381520 4.381520 1.224123 2.133865
## x2 5.464266 1.224123 5.464266 2.243046
## x3 9.521921 2.133865 2.243046 9.521921
cat("\n=== HASIL icdGLM (EM ALGORITHM) ===\n")
##
## === HASIL icdGLM (EM ALGORITHM) ===
cat("Jumlah iterasi EM:", model_icd$iter, "\n")
## Jumlah iterasi EM: 9
cat("Konvergen:", ifelse(model_icd$converged, "Ya", "Tidak"), "\n")
## Konvergen: Ya
# Lihat struktur model_icd untuk mengetahui di mana logLik disimpan
cat("\n=== STRUKTUR MODEL ICD ===\n")
##
## === STRUKTUR MODEL ICD ===
cat("Nama-nama komponen model_icd:\n")
## Nama-nama komponen model_icd:
print(names(model_icd))
## [1] "x" "y" "new.weights"
## [4] "indicator" "glm.fit.data" "coefficients"
## [7] "qr" "residuals" "fitted.values"
## [10] "effects" "R" "rank"
## [13] "family" "linear.predictors" "deviance"
## [16] "aic" "null.deviance" "iter"
## [19] "weights" "prior.weights" "df.residual"
## [22] "df.null" "converged" "boundary"
## [25] "model" "call" "formula"
## [28] "terms" "data" "control"
# Coba akses logLik dari berbagai kemungkinan lokasi
logLik_icd <- NA
if (!is.null(model_icd$logLik)) {
logLik_icd <- as.numeric(model_icd$logLik)
cat("logLik ditemukan di model_icd$logLik\n")
} else if (!is.null(model_icd$deviance)) {
# Deviance = -2 * logLik, sehingga logLik = -deviance/2
logLik_icd <- -as.numeric(model_icd$deviance)/2
cat("logLik dihitung dari deviance\n")
} else if (!is.null(model_icd$aic)) {
# AIC = -2*logLik + 2k, tapi ini rumit
cat("Menggunakan AIC tidak langsung memberikan logLik\n")
}
## logLik dihitung dari deviance
cat("\nNilai logLik icdGLM:", logLik_icd, "\n")
##
## Nilai logLik icdGLM: -56.38626
# Tampilkan ringkasan model
cat("\nRingkasan model icdGLM:\n")
##
## Ringkasan model icdGLM:
print(summary(model_icd))
##
## Call:
## icdglm(formula = y ~ x1 + x2 + x3, family = binomial(link = "logit"),
## data = expanded_data$data, weights = expanded_data$weights,
## indicator = expanded_data$indicator, control = list(maxit = 500,
## epsilon = 1e-06))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.030237 0.391154 -0.077 0.938
## x1 0.043368 0.544789 0.080 0.937
## x2 0.405268 0.510340 0.794 0.427
## x3 -0.006259 0.462099 -0.014 0.989
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 113.48 on 81 degrees of freedom
## Residual deviance: 112.77 on 78 degrees of freedom
## AIC: 118.18
##
## Number of Fisher Scoring iterations: 9
# 1. Pastikan library yang dibutuhkan sudah ter-load
library(knitr)
library(kableExtra)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
# 2. Cek apakah model berhasil dibuat
if (!exists("model_icd") || is.null(model_icd)) {
stop("Model icdGLM tidak ditemukan. Periksa apakah proses estimasi di chunk sebelumnya berhasil.")
}
# 3. Ambil nilai dengan proteksi jika NULL
dev_icd <- if(!is.null(model_icd$deviance)) round(model_icd$deviance, 4) else "N/A"
aic_icd <- if(!is.null(model_icd$aic)) round(model_icd$aic, 4) else "N/A"
# 4. Buat Summary Data Frame
final_summary <- data.frame(
Aspek = c("Metode Estimasi", "Penanganan Missing", "Algoritma",
"Jumlah Observasi Awal", "Jumlah Observasi Analisis",
"Jumlah Data Setelah Expand", "Deviance", "AIC"),
Complete_Case = c("Fisher Scoring", "Listwise Deletion", "IWLS",
nrow(TLI_data),
if(exists("data_complete")) nrow(data_complete) else "N/A",
"-",
if(exists("model_cca")) round(model_cca$deviance, 4) else "N/A",
if(exists("model_cca")) round(AIC(model_cca), 4) else "N/A"),
icdGLM = c("EM Algorithm", "Method of Weights (Ibrahim, 1990)", "EM + IWLS",
nrow(TLI_data), nrow(TLI_data),
if(exists("expanded_data")) nrow(expanded_data$data) else "N/A",
dev_icd, aic_icd)
)
# Cetak Tabel
kable(final_summary, format = "html", caption = "Ringkasan Perbandingan Metode Analisis Data TLI") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
| Aspek | Complete_Case | icdGLM |
|---|---|---|
| Metode Estimasi | Fisher Scoring | EM Algorithm |
| Penanganan Missing | Listwise Deletion | Method of Weights (Ibrahim, 1990) |
| Algoritma | IWLS | EM + IWLS |
| Jumlah Observasi Awal | 82 | 82 |
| Jumlah Observasi Analisis | 69 | 82 |
| Jumlah Data Setelah Expand |
|
103 |
| Deviance | 94.514 | 112.7725 |
| AIC | 102.514 | 118.1839 |
# 5. Proteksi Perbandingan Koefisien
if (exists("coef_cca") && exists("coef_icd")) {
cat("\n\n=== KOEFISIEN MODEL BERDAMPINGAN ===\n")
# Pastikan panjangnya sama
common_params <- intersect(names(coef_cca), names(coef_icd))
coef_comparison <- data.frame(
Parameter = common_params,
Complete_Case = round(coef_cca[common_params], 4),
icdGLM_EM = round(coef_icd[common_params], 4)
)
print(coef_comparison)
}
# ============================================
# PERBANDINGAN HASIL
# ============================================
# Membuat dataframe untuk koefisien kedua model
coef_cca <- coef(model_cca)
coef_icd <- model_icd$coefficients
se_cca <- summary_cca$coefficients[, 2]
se_icd <- sqrt(diag(vcov(model_icd)))
z_cca <- summary_cca$coefficients[, 3]
z_icd <- coef_icd / se_icd
p_cca <- summary_cca$coefficients[, 4]
p_icd <- 2 * (1 - pnorm(abs(z_icd)))
# Tabel perbandingan koefisien
comparison_table <- data.frame(
Variabel = names(coef_cca),
CCA_Coef = round(coef_cca, 4),
CCA_SE = round(se_cca, 4),
CCA_Pvalue = round(p_cca, 4),
ICD_Coef = round(coef_icd, 4),
ICD_SE = round(se_icd, 4),
ICD_Pvalue = round(p_icd, 4)
)
cat("\n\n=== TABEL PERBANDINGAN KOEFISIEN ===\n")
##
##
## === TABEL PERBANDINGAN KOEFISIEN ===
print(comparison_table)
## Variabel CCA_Coef CCA_SE CCA_Pvalue ICD_Coef ICD_SE ICD_Pvalue
## (Intercept) (Intercept) -0.1642 0.4093 0.6884 -0.0302 0.3912 0.9384
## x1 x1 -0.4013 0.6443 0.5334 0.0434 0.5448 0.9366
## x2 x2 0.3554 0.5637 0.5284 0.4053 0.5103 0.4271
## x3 x3 0.1093 0.4885 0.8230 -0.0063 0.4621 0.9892
# Informasi tambahan - gunakan deviance sebagai ukuran kebaikan model
info_summary <- data.frame(
Metode = c("Complete Case", "icdGLM (EM)"),
N_Obs_Awal = c(nrow(TLI_data), nrow(TLI_data)),
N_Obs_Digunakan = c(nrow(data_complete), nrow(expanded_data$data)),
N_Unique_Weights = c(NA, length(unique(expanded_data$weights))),
Deviance = c(round(model_cca$deviance, 4), round(model_icd$deviance, 4)),
AIC = c(round(AIC(model_cca), 4), round(model_icd$aic, 4)),
Iterasi = c(model_cca$iter, model_icd$iter)
)
cat("\n\n=== RINGKASAN INFORMASI MODEL ===\n")
##
##
## === RINGKASAN INFORMASI MODEL ===
print(info_summary)
## Metode N_Obs_Awal N_Obs_Digunakan N_Unique_Weights Deviance AIC
## 1 Complete Case 82 69 NA 94.5140 102.5140
## 2 icdGLM (EM) 82 103 4 112.7725 118.1839
## Iterasi
## 1 4
## 2 9
# ============================================
# VISUALISASI PERBANDINGAN
# ============================================
# Plot perbandingan koefisien
coef_plot_data <- data.frame(
Variabel = rep(names(coef_cca), 2),
Metode = c(rep("Complete Case", 4), rep("icdGLM (EM)", 4)),
Koefisien = c(coef_cca, coef_icd),
SE = c(se_cca, se_icd)
)
coef_plot_data$lower <- coef_plot_data$Koefisien - 1.96 * coef_plot_data$SE
coef_plot_data$upper <- coef_plot_data$Koefisien + 1.96 * coef_plot_data$SE
coef_plot_data$Variabel_desc <- factor(coef_plot_data$Variabel,
levels = c("(Intercept)", "x1", "x2", "x3"),
labels = c("Intercept", "Albumin (x1)",
"Kreatinin (x2)", "Rasio (x3)"))
coef_plot <- ggplot(coef_plot_data, aes(x = Variabel_desc, y = Koefisien, color = Metode)) +
geom_point(position = position_dodge(width = 0.5), size = 3) +
geom_errorbar(aes(ymin = lower, ymax = upper),
width = 0.2, position = position_dodge(width = 0.5)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
scale_color_manual(values = c("Complete Case" = "blue", "icdGLM (EM)" = "red")) +
labs(title = "Perbandingan Koefisien Model",
subtitle = paste("Complete Case: n =", nrow(data_complete),
"| icdGLM: data expand =", nrow(expanded_data$data),
"observasi dengan pembobotan"),
x = "Variabel", y = "Koefisien (log-odds)") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
legend.position = "bottom")
print(coef_plot)
# ============================================
# DIAGNOSTIK TAMBAHAN
# ============================================
# Untuk prediksi, kita perlu menggunakan data complete cases
# dan memastikan kolom sesuai
data_complete$pred_cca <- predict(model_cca, type = "response")
data_complete$pred_icd <- predict(model_icd, newdata = data_complete[, c("x1", "x2", "x3")], type = "response")
# Korelasi prediksi
cor_pred <- cor(data_complete$pred_cca, data_complete$pred_icd)
cat("\n\n=== DIAGNOSTIK PREDIKSI ===\n")
##
##
## === DIAGNOSTIK PREDIKSI ===
cat("Korelasi prediksi kedua model:", round(cor_pred, 4), "\n")
## Korelasi prediksi kedua model: 0.6978
# Plot prediksi
pred_plot <- ggplot(data_complete, aes(x = pred_cca, y = pred_icd, color = as.factor(y))) +
geom_point(size = 2, alpha = 0.7) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
scale_color_manual(values = c("0" = "skyblue", "1" = "salmon"),
labels = c("0" = "Tidak Rusak", "1" = "Rusak")) +
labs(title = "Perbandingan Prediksi Probabilitas",
x = "Prediksi Complete Case", y = "Prediksi icdGLM",
color = "Aktual") +
theme_minimal() +
theme(legend.position = "bottom")
print(pred_plot)
## Ringkasan Akhir
# ============================================
# RINGKASAN AKHIR
# ============================================
final_summary <- data.frame(
Aspek = c("Metode Estimasi", "Penanganan Missing", "Algoritma",
"Jumlah Observasi Awal", "Jumlah Observasi Analisis",
"Jumlah Data Setelah Expand", "Deviance", "AIC"),
Complete_Case = c("Fisher Scoring", "Listwise Deletion", "IWLS",
nrow(TLI_data), nrow(data_complete), "-",
round(model_cca$deviance, 4), round(AIC(model_cca), 4)),
icdGLM = c("EM Algorithm", "Method of Weights (Ibrahim, 1990)", "EM + IWLS",
nrow(TLI.data), nrow(TLI_data), nrow(expanded_data$data),
round(model_icd$deviance, 4), round(model_icd$aic, 4))
)
cat("\n\n=== RINGKASAN FINAL ===\n")
##
##
## === RINGKASAN FINAL ===
print(final_summary)
## Aspek Complete_Case
## 1 Metode Estimasi Fisher Scoring
## 2 Penanganan Missing Listwise Deletion
## 3 Algoritma IWLS
## 4 Jumlah Observasi Awal 82
## 5 Jumlah Observasi Analisis 69
## 6 Jumlah Data Setelah Expand -
## 7 Deviance 94.514
## 8 AIC 102.514
## icdGLM
## 1 EM Algorithm
## 2 Method of Weights (Ibrahim, 1990)
## 3 EM + IWLS
## 4 82
## 5 82
## 6 103
## 7 112.7725
## 8 118.1839
final_summary %>%
kable(format = "html", caption = "Ringkasan Perbandingan Metode Analisis Data TLI") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
| Aspek | Complete_Case | icdGLM |
|---|---|---|
| Metode Estimasi | Fisher Scoring | EM Algorithm |
| Penanganan Missing | Listwise Deletion | Method of Weights (Ibrahim, 1990) |
| Algoritma | IWLS | EM + IWLS |
| Jumlah Observasi Awal | 82 | 82 |
| Jumlah Observasi Analisis | 69 | 82 |
| Jumlah Data Setelah Expand |
|
103 |
| Deviance | 94.514 | 112.7725 |
| AIC | 102.514 | 118.1839 |
# Informasi tambahan tentang pembobotan
cat("\n\n=== INFORMASI PEMBOBOTAN EM ALGORITHM ===\n")
##
##
## === INFORMASI PEMBOBOTAN EM ALGORITHM ===
cat("Ringkasan weights:\n")
## Ringkasan weights:
print(summary(expanded_data$weights))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1250 0.5000 1.0000 0.7961 1.0000 1.0000
cat("\nDistribusi weights:\n")
##
## Distribusi weights:
print(table(cut(expanded_data$weights, breaks = c(0, 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 1))))
##
## (0,0.01] (0.01,0.05] (0.05,0.1] (0.1,0.25] (0.25,0.5] (0.5,0.75]
## 0 0 0 12 22 0
## (0.75,1]
## 69
# Tampilkan koefisien kedua model berdampingan
cat("\n\n=== KOEFISIEN MODEL BERDAMPINGAN ===\n")
##
##
## === KOEFISIEN MODEL BERDAMPINGAN ===
coef_comparison <- data.frame(
Parameter = names(coef_cca),
Complete_Case = round(coef_cca, 4),
icdGLM_EM = round(coef_icd, 4),
Perbedaan = round(coef_icd - coef_cca, 4),
Perbedaan_Persen = paste0(round((coef_icd - coef_cca)/abs(coef_cca)*100, 2), "%")
)
print(coef_comparison)
## Parameter Complete_Case icdGLM_EM Perbedaan Perbedaan_Persen
## (Intercept) (Intercept) -0.1642 -0.0302 0.1339 81.58%
## x1 x1 -0.4013 0.0434 0.4447 110.81%
## x2 x2 0.3554 0.4053 0.0499 14.04%
## x3 x3 0.1093 -0.0063 -0.1156 -105.73%
# Bandingkan juga standard error
se_comparison <- data.frame(
Parameter = names(coef_cca),
CCA_SE = round(se_cca, 4),
ICD_SE = round(se_icd, 4),
Rasio_SE = round(se_icd/se_cca, 4)
)
cat("\n\n=== PERBANDINGAN STANDARD ERROR ===\n")
##
##
## === PERBANDINGAN STANDARD ERROR ===
print(se_comparison)
## Parameter CCA_SE ICD_SE Rasio_SE
## (Intercept) (Intercept) 0.4093 0.3912 0.9556
## x1 x1 0.6443 0.5448 0.8456
## x2 x2 0.5637 0.5103 0.9054
## x3 x3 0.4885 0.4621 0.9459
#===========================================================
# ILUSTRASI PROFILE LIKELIHOOD - Data Model Pertumbuhan VBGM
#===========================================================
# Load packages
library(stats)
library(ggplot2)
# 1. SIMULASI DATA
set.seed(123)
age <- seq(1, 20, length.out = 30)
beta1_true <- 100
beta2_true <- 90
beta3_true <- 0.2
y_true <- beta1_true - beta2_true * exp(-beta3_true * age)
y_obs <- y_true + rnorm(30, 0, 5)
data_ikan <- data.frame(age = age, length = y_obs)
# 2. PLOT DATA
p1 <- ggplot(data_ikan, aes(x = age, y = length)) +
geom_point(color = "blue", size = 2) +
geom_line(aes(y = y_true), color = "red", linetype = "dashed") +
labs(title = "Data Pertumbuhan Ikan", x = "Umur", y = "Panjang") +
theme_minimal()
print(p1)
# 3. METODE 1: nls dengan plinear
cat("\n=== METODE PLINEAR ===\n")
##
## === METODE PLINEAR ===
fit_plinear <- nls(length ~ cbind(1, exp(-beta3 * age)),
data = data_ikan,
start = list(beta3 = 0.2),
algorithm = "plinear")
coef_plinear <- coef(fit_plinear)
beta3_est <- coef_plinear["beta3"]
beta1_est <- coef_plinear[".lin1"]
beta2_est <- -coef_plinear[".lin2"]
cat("beta1 =", round(beta1_est, 2), "\n")
## beta1 = 97.02
cat("beta2 =", round(beta2_est, 2), "\n")
## beta2 = 89.15
cat("beta3 =", round(beta3_est, 4), "\n")
## beta3 = 0.2279
# 4. METODE 2: nls biasa
cat("\n=== METODE STANDAR ===\n")
##
## === METODE STANDAR ===
fit_std <- nls(length ~ beta1 - beta2 * exp(-beta3 * age),
data = data_ikan,
start = list(beta1 = 90, beta2 = 80, beta3 = 0.2))
print(coef(fit_std))
## beta1 beta2 beta3
## 97.0201437 89.1481905 0.2279258
# 5. PROFILE LIKELIHOOD
cat("\n=== PROFILE LIKELIHOOD ===\n")
##
## === PROFILE LIKELIHOOD ===
# Manual grid search
beta3_grid <- seq(0.15, 0.25, length.out = 30)
rss <- numeric(length(beta3_grid))
for(i in seq_along(beta3_grid)) {
b3 <- beta3_grid[i]
# Hitung x = exp(-b3 * age)
x <- exp(-b3 * age)
# Regresi linear: y ~ x
fit_lm <- lm(y_obs ~ x)
# Ambil koefisien
beta1_opt <- coef(fit_lm)[1] # intercept
beta2_opt <- -coef(fit_lm)[2] # karena model: y = beta1 - beta2 * exp(-b3*age)
# Hitung prediksi
y_pred <- beta1_opt - beta2_opt * exp(-b3 * age)
# Hitung Residual Sum of Squares (RSS)
rss[i] <- sum((y_obs - y_pred)^2)
}
# Cek hasil grid search
best_idx <- which.min(rss)
best_beta3 <- beta3_grid[best_idx]
best_rss <- rss[best_idx]
cat("\nHasil grid search:\n")
##
## Hasil grid search:
cat(" Beta3 optimal =", round(best_beta3, 4), "\n")
## Beta3 optimal = 0.2293
cat(" RSS minimal =", round(best_rss, 2), "\n")
## RSS minimal = 646.84
# Buat dataframe untuk profile plot
profile_df <- data.frame(beta3 = beta3_grid, rss = rss)
# Buat dataframe terpisah untuk titik optimal (HANYA 1 BARIS)
optimal_point <- data.frame(beta3 = best_beta3, rss = best_rss)
# Plot profile
p2 <- ggplot(profile_df, aes(x = beta3, y = rss)) +
geom_line(color = "blue", linewidth = 1) +
# CARA 1: Menggunakan data frame terpisah untuk titik optimal
geom_point(data = optimal_point,
aes(x = beta3, y = rss),
color = "red", size = 3) +
# CARA 2: Alternatif dengan annotate (juga benar)
# annotate("point", x = best_beta3, y = best_rss,
# color = "red", size = 3) +
geom_vline(xintercept = best_beta3,
linetype = "dashed", color = "red", alpha = 0.5) +
labs(title = "Profile Residual Sum of Squares (RSS) untuk Beta3",
subtitle = paste("Nilai optimal beta3 =", round(best_beta3, 4)),
x = expression(beta[3] ~ "(laju pertumbuhan)"),
y = "Residual Sum of Squares (RSS)") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, color = "red"))
print(p2)
# Alternatif dengan annotate (lebih sederhana)
p2_alternatif <- ggplot(profile_df, aes(x = beta3, y = rss)) +
geom_line(color = "blue", linewidth = 1) +
annotate("point", x = best_beta3, y = best_rss,
color = "red", size = 3) +
geom_vline(xintercept = best_beta3,
linetype = "dashed", color = "red", alpha = 0.5) +
labs(title = "Profile RSS untuk Beta3 (dengan annotate)",
x = expression(beta[3]),
y = "RSS") +
theme_minimal()
print(p2_alternatif)
### FIT KURVA DENGAN PARAMETER OPTIMAL
# 6. FIT KURVA DENGAN PARAMETER OPTIMAL
cat("\n=== FIT KURVA DENGAN PARAMETER OPTIMAL ===\n")
##
## === FIT KURVA DENGAN PARAMETER OPTIMAL ===
# Hitung parameter optimal
x_opt <- exp(-best_beta3 * age)
fit_lm_opt <- lm(y_obs ~ x_opt)
beta1_opt <- coef(fit_lm_opt)[1]
beta2_opt <- -coef(fit_lm_opt)[2]
cat("\nParameter optimal dari profile likelihood:\n")
##
## Parameter optimal dari profile likelihood:
cat(" beta1 =", round(beta1_opt, 2), "\n")
## beta1 = 96.94
cat(" beta2 =", round(beta2_opt, 2), "\n")
## beta2 = 89.32
cat(" beta3 =", round(best_beta3, 4), "\n")
## beta3 = 0.2293
# Buat prediksi untuk kurva halus
age_pred <- seq(min(age), max(age), length.out = 100)
y_pred_opt <- beta1_opt - beta2_opt * exp(-best_beta3 * age_pred)
y_pred_true <- beta1_true - beta2_true * exp(-beta3_true * age_pred)
# Prediksi dari metode plinear (jika sudah dihitung sebelumnya)
if(exists("beta1_est") && exists("beta2_est") && exists("beta3_est")) {
y_pred_plinear <- beta1_est - beta2_est * exp(-beta3_est * age_pred)
# Plot kurva fit dengan semua metode
p3 <- ggplot() +
# Data points
geom_point(data = data_ikan, aes(x = age, y = length),
size = 2, alpha = 0.6, color = "gray30") +
# Kurva true
geom_line(aes(x = age_pred, y = y_pred_true,
color = "Nilai True", linetype = "Nilai True"),
linewidth = 1) +
# Kurva fitted dari profile likelihood
geom_line(aes(x = age_pred, y = y_pred_opt,
color = "Profile Likelihood", linetype = "Profile Likelihood"),
linewidth = 1) +
# Kurva fitted dari metode plinear
geom_line(aes(x = age_pred, y = y_pred_plinear,
color = "Metode Plinear", linetype = "Metode Plinear"),
linewidth = 1) +
scale_color_manual(name = "Kurva",
values = c("Nilai True" = "red",
"Profile Likelihood" = "blue",
"Metode Plinear" = "darkgreen")) +
scale_linetype_manual(name = "Kurva",
values = c("Nilai True" = "dashed",
"Profile Likelihood" = "solid",
"Metode Plinear" = "dotted")) +
labs(title = "Perbandingan Kurva Von Bertalanffy",
subtitle = paste("Beta3 optimal =", round(best_beta3, 3)),
x = "Umur (tahun)",
y = "Panjang (cm)") +
theme_minimal() +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
} else {
# Plot sederhana jika metode plinear belum dihitung
p3 <- ggplot() +
geom_point(data = data_ikan, aes(x = age, y = length),
size = 2, alpha = 0.6, color = "gray30") +
geom_line(aes(x = age_pred, y = y_pred_true,
color = "Nilai True", linetype = "Nilai True"),
linewidth = 1) +
geom_line(aes(x = age_pred, y = y_pred_opt,
color = "Profile Likelihood", linetype = "Profile Likelihood"),
linewidth = 1) +
scale_color_manual(name = "Kurva",
values = c("Nilai True" = "red",
"Profile Likelihood" = "blue")) +
scale_linetype_manual(name = "Kurva",
values = c("Nilai True" = "dashed",
"Profile Likelihood" = "solid")) +
labs(title = "Kurva Von Bertalanffy dengan Profile Likelihood",
x = "Umur (tahun)", y = "Panjang (cm)") +
theme_minimal() +
theme(legend.position = "bottom")
}
print(p3)
# 7. PLOT DIAGNOSTIK: Nilai beta1 dan beta2 untuk setiap beta3
cat("\n=== PLOT DIAGNOSTIK PARAMETER ===\n")
##
## === PLOT DIAGNOSTIK PARAMETER ===
# Hitung beta1 dan beta2 untuk setiap nilai beta3 dalam grid
beta1_grid <- numeric(length(beta3_grid))
beta2_grid <- numeric(length(beta3_grid))
for(i in seq_along(beta3_grid)) {
b3 <- beta3_grid[i]
x <- exp(-b3 * age)
fit_lm <- lm(y_obs ~ x)
beta1_grid[i] <- coef(fit_lm)[1]
beta2_grid[i] <- -coef(fit_lm)[2]
}
# Plot beta1 vs beta3
param_df1 <- data.frame(beta3 = beta3_grid, beta1 = beta1_grid)
p4 <- ggplot(param_df1, aes(x = beta3, y = beta1)) +
geom_line(color = "darkgreen", linewidth = 1) +
geom_point(data = data.frame(beta3 = best_beta3, beta1 = beta1_opt),
color = "red", size = 3) +
labs(title = expression(beta[1] ~ "vs" ~ beta[3]),
x = expression(beta[3]), y = expression(beta[1])) +
theme_minimal()
print(p4)